diff --git a/docs/2_38/cmdstan-guide/bib.html b/docs/2_38/cmdstan-guide/bib.html index cda8b7cf5..c17d804f1 100644 --- a/docs/2_38/cmdstan-guide/bib.html +++ b/docs/2_38/cmdstan-guide/bib.html @@ -1,6 +1,6 @@ - + @@ -215,6 +215,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/command_line_options.html b/docs/2_38/cmdstan-guide/command_line_options.html index 81addd6f9..d9d9bc2de 100644 --- a/docs/2_38/cmdstan-guide/command_line_options.html +++ b/docs/2_38/cmdstan-guide/command_line_options.html @@ -1,6 +1,6 @@ - + @@ -453,6 +453,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/compiling_stan_programs.html b/docs/2_38/cmdstan-guide/compiling_stan_programs.html index d4b8592ce..3057aa25e 100644 --- a/docs/2_38/cmdstan-guide/compiling_stan_programs.html +++ b/docs/2_38/cmdstan-guide/compiling_stan_programs.html @@ -1,6 +1,6 @@ - + @@ -488,6 +488,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/diagnose_config.html b/docs/2_38/cmdstan-guide/diagnose_config.html index 7fb35fc53..d46f113ee 100644 --- a/docs/2_38/cmdstan-guide/diagnose_config.html +++ b/docs/2_38/cmdstan-guide/diagnose_config.html @@ -1,6 +1,6 @@ - + @@ -444,6 +444,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/diagnose_utility.html b/docs/2_38/cmdstan-guide/diagnose_utility.html index b23b61ea8..1b4c84a96 100644 --- a/docs/2_38/cmdstan-guide/diagnose_utility.html +++ b/docs/2_38/cmdstan-guide/diagnose_utility.html @@ -1,6 +1,6 @@ - + @@ -490,6 +490,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/example_model_data.html b/docs/2_38/cmdstan-guide/example_model_data.html index 6e9039a47..de05b64dc 100644 --- a/docs/2_38/cmdstan-guide/example_model_data.html +++ b/docs/2_38/cmdstan-guide/example_model_data.html @@ -1,6 +1,6 @@ - + @@ -449,6 +449,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/external_code.html b/docs/2_38/cmdstan-guide/external_code.html index 0fd42c59f..9df8c2332 100644 --- a/docs/2_38/cmdstan-guide/external_code.html +++ b/docs/2_38/cmdstan-guide/external_code.html @@ -1,6 +1,6 @@ - + @@ -452,6 +452,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/generate_quantities_config.html b/docs/2_38/cmdstan-guide/generate_quantities_config.html index 9f631fb8f..5c7a2eebf 100644 --- a/docs/2_38/cmdstan-guide/generate_quantities_config.html +++ b/docs/2_38/cmdstan-guide/generate_quantities_config.html @@ -1,6 +1,6 @@ - + @@ -453,6 +453,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/index.html b/docs/2_38/cmdstan-guide/index.html index fe5f177f6..9529afc1c 100644 --- a/docs/2_38/cmdstan-guide/index.html +++ b/docs/2_38/cmdstan-guide/index.html @@ -1,6 +1,6 @@ - + diff --git a/docs/2_38/cmdstan-guide/installation.html b/docs/2_38/cmdstan-guide/installation.html index 74fa8aa25..86f1f3d08 100644 --- a/docs/2_38/cmdstan-guide/installation.html +++ b/docs/2_38/cmdstan-guide/installation.html @@ -1,6 +1,6 @@ - + @@ -462,6 +462,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/json_apdx.html b/docs/2_38/cmdstan-guide/json_apdx.html index 5b2b06ab7..c9bfe4cf3 100644 --- a/docs/2_38/cmdstan-guide/json_apdx.html +++ b/docs/2_38/cmdstan-guide/json_apdx.html @@ -1,6 +1,6 @@ - + @@ -486,6 +486,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/laplace_sample_config.html b/docs/2_38/cmdstan-guide/laplace_sample_config.html index 6d639198d..6b461ca4e 100644 --- a/docs/2_38/cmdstan-guide/laplace_sample_config.html +++ b/docs/2_38/cmdstan-guide/laplace_sample_config.html @@ -1,6 +1,6 @@ - + @@ -450,6 +450,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/log_prob_config.html b/docs/2_38/cmdstan-guide/log_prob_config.html index 01ea7fdcc..5112cfc84 100644 --- a/docs/2_38/cmdstan-guide/log_prob_config.html +++ b/docs/2_38/cmdstan-guide/log_prob_config.html @@ -1,6 +1,6 @@ - + @@ -453,6 +453,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/mcmc_config.html b/docs/2_38/cmdstan-guide/mcmc_config.html index dd4f21874..0981094f6 100644 --- a/docs/2_38/cmdstan-guide/mcmc_config.html +++ b/docs/2_38/cmdstan-guide/mcmc_config.html @@ -1,6 +1,6 @@ - + @@ -498,6 +498,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/optimize_config.html b/docs/2_38/cmdstan-guide/optimize_config.html index a16b09626..84ba9183e 100644 --- a/docs/2_38/cmdstan-guide/optimize_config.html +++ b/docs/2_38/cmdstan-guide/optimize_config.html @@ -1,6 +1,6 @@ - + @@ -450,6 +450,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/parallelization.html b/docs/2_38/cmdstan-guide/parallelization.html index ba2d09864..bf56ac662 100644 --- a/docs/2_38/cmdstan-guide/parallelization.html +++ b/docs/2_38/cmdstan-guide/parallelization.html @@ -1,6 +1,6 @@ - + @@ -434,6 +434,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/pathfinder_config.html b/docs/2_38/cmdstan-guide/pathfinder_config.html index 247e91e8b..de14d9417 100644 --- a/docs/2_38/cmdstan-guide/pathfinder_config.html +++ b/docs/2_38/cmdstan-guide/pathfinder_config.html @@ -1,6 +1,6 @@ - + @@ -470,6 +470,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/print.html b/docs/2_38/cmdstan-guide/print.html index 1a33ecfea..1b1850ff2 100644 --- a/docs/2_38/cmdstan-guide/print.html +++ b/docs/2_38/cmdstan-guide/print.html @@ -1,6 +1,6 @@ - + @@ -415,6 +415,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/rdump_apdx.html b/docs/2_38/cmdstan-guide/rdump_apdx.html index fd5461561..5e7fc4c20 100644 --- a/docs/2_38/cmdstan-guide/rdump_apdx.html +++ b/docs/2_38/cmdstan-guide/rdump_apdx.html @@ -1,6 +1,6 @@ - + @@ -465,6 +465,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/stan_csv_apdx.html b/docs/2_38/cmdstan-guide/stan_csv_apdx.html index f764c5ff8..cd03fe40f 100644 --- a/docs/2_38/cmdstan-guide/stan_csv_apdx.html +++ b/docs/2_38/cmdstan-guide/stan_csv_apdx.html @@ -1,6 +1,6 @@ - + @@ -457,6 +457,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/stanc.html b/docs/2_38/cmdstan-guide/stanc.html index 7ed425e4a..a11a17e70 100644 --- a/docs/2_38/cmdstan-guide/stanc.html +++ b/docs/2_38/cmdstan-guide/stanc.html @@ -1,6 +1,6 @@ - + @@ -419,6 +419,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/stansummary.html b/docs/2_38/cmdstan-guide/stansummary.html index 4611817d4..1eb154019 100644 --- a/docs/2_38/cmdstan-guide/stansummary.html +++ b/docs/2_38/cmdstan-guide/stansummary.html @@ -1,6 +1,6 @@ - + @@ -473,6 +473,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/cmdstan-guide/variational_config.html b/docs/2_38/cmdstan-guide/variational_config.html index e0ca7d9f0..d6b583d1f 100644 --- a/docs/2_38/cmdstan-guide/variational_config.html +++ b/docs/2_38/cmdstan-guide/variational_config.html @@ -1,6 +1,6 @@ - + @@ -469,6 +469,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/array_operations.html b/docs/2_38/functions-reference/array_operations.html index a61e53b3a..a489b29eb 100644 --- a/docs/2_38/functions-reference/array_operations.html +++ b/docs/2_38/functions-reference/array_operations.html @@ -1,6 +1,6 @@ - + @@ -570,6 +570,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/binary_distributions.html b/docs/2_38/functions-reference/binary_distributions.html index 6db376df1..341ebee29 100644 --- a/docs/2_38/functions-reference/binary_distributions.html +++ b/docs/2_38/functions-reference/binary_distributions.html @@ -1,6 +1,6 @@ - + @@ -541,6 +541,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/bounded_continuous_distributions.html b/docs/2_38/functions-reference/bounded_continuous_distributions.html index f69007d73..a1330a390 100644 --- a/docs/2_38/functions-reference/bounded_continuous_distributions.html +++ b/docs/2_38/functions-reference/bounded_continuous_distributions.html @@ -1,6 +1,6 @@ - + @@ -529,6 +529,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/bounded_discrete_distributions.html b/docs/2_38/functions-reference/bounded_discrete_distributions.html index 5b142af7e..47cedad56 100644 --- a/docs/2_38/functions-reference/bounded_discrete_distributions.html +++ b/docs/2_38/functions-reference/bounded_discrete_distributions.html @@ -1,6 +1,6 @@ - + @@ -594,6 +594,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/circular_distributions.html b/docs/2_38/functions-reference/circular_distributions.html index 7e81ff5cb..bb6b00d58 100644 --- a/docs/2_38/functions-reference/circular_distributions.html +++ b/docs/2_38/functions-reference/circular_distributions.html @@ -1,6 +1,6 @@ - + @@ -564,6 +564,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/complex-valued_basic_functions.html b/docs/2_38/functions-reference/complex-valued_basic_functions.html index 80143c22c..db600e3ba 100644 --- a/docs/2_38/functions-reference/complex-valued_basic_functions.html +++ b/docs/2_38/functions-reference/complex-valued_basic_functions.html @@ -1,6 +1,6 @@ - + @@ -577,6 +577,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/complex_matrix_operations.html b/docs/2_38/functions-reference/complex_matrix_operations.html index 03bb5f7e1..864630ec4 100644 --- a/docs/2_38/functions-reference/complex_matrix_operations.html +++ b/docs/2_38/functions-reference/complex_matrix_operations.html @@ -1,6 +1,6 @@ - + @@ -611,6 +611,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/compound_arithmetic_and_assignment.html b/docs/2_38/functions-reference/compound_arithmetic_and_assignment.html index 8d34e2cbc..dfca90c6d 100644 --- a/docs/2_38/functions-reference/compound_arithmetic_and_assignment.html +++ b/docs/2_38/functions-reference/compound_arithmetic_and_assignment.html @@ -1,6 +1,6 @@ - + @@ -500,6 +500,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/continuous_distributions_on_0_1.html b/docs/2_38/functions-reference/continuous_distributions_on_0_1.html index 252cce468..6d653285c 100644 --- a/docs/2_38/functions-reference/continuous_distributions_on_0_1.html +++ b/docs/2_38/functions-reference/continuous_distributions_on_0_1.html @@ -1,6 +1,6 @@ - + @@ -535,6 +535,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/conventions_for_probability_functions.html b/docs/2_38/functions-reference/conventions_for_probability_functions.html index 838febad9..a74286267 100644 --- a/docs/2_38/functions-reference/conventions_for_probability_functions.html +++ b/docs/2_38/functions-reference/conventions_for_probability_functions.html @@ -1,6 +1,6 @@ - + @@ -574,6 +574,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/correlation_matrix_distributions.html b/docs/2_38/functions-reference/correlation_matrix_distributions.html index b1b16ce4c..e74f96d7d 100644 --- a/docs/2_38/functions-reference/correlation_matrix_distributions.html +++ b/docs/2_38/functions-reference/correlation_matrix_distributions.html @@ -1,6 +1,6 @@ - + @@ -589,6 +589,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/covariance_matrix_distributions.html b/docs/2_38/functions-reference/covariance_matrix_distributions.html index 77e397def..67bfa00b5 100644 --- a/docs/2_38/functions-reference/covariance_matrix_distributions.html +++ b/docs/2_38/functions-reference/covariance_matrix_distributions.html @@ -1,6 +1,6 @@ - + @@ -545,6 +545,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/deprecated_functions.html b/docs/2_38/functions-reference/deprecated_functions.html index 2b5f328d0..24e6b568c 100644 --- a/docs/2_38/functions-reference/deprecated_functions.html +++ b/docs/2_38/functions-reference/deprecated_functions.html @@ -1,6 +1,6 @@ - + @@ -570,6 +570,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/distributions_over_unbounded_vectors.html b/docs/2_38/functions-reference/distributions_over_unbounded_vectors.html index dc0eee2d7..4ee9b089d 100644 --- a/docs/2_38/functions-reference/distributions_over_unbounded_vectors.html +++ b/docs/2_38/functions-reference/distributions_over_unbounded_vectors.html @@ -1,6 +1,6 @@ - + @@ -590,6 +590,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/functions_index.html b/docs/2_38/functions-reference/functions_index.html index b89c87236..1a442781d 100644 --- a/docs/2_38/functions-reference/functions_index.html +++ b/docs/2_38/functions-reference/functions_index.html @@ -1,6 +1,6 @@ - + @@ -514,6 +514,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/hidden_markov_models.html b/docs/2_38/functions-reference/hidden_markov_models.html index 639e59fb4..50ef7d251 100644 --- a/docs/2_38/functions-reference/hidden_markov_models.html +++ b/docs/2_38/functions-reference/hidden_markov_models.html @@ -1,6 +1,6 @@ - + @@ -524,6 +524,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/higher-order_functions.html b/docs/2_38/functions-reference/higher-order_functions.html index d37d0bdb1..82220b73f 100644 --- a/docs/2_38/functions-reference/higher-order_functions.html +++ b/docs/2_38/functions-reference/higher-order_functions.html @@ -1,6 +1,6 @@ - + @@ -612,6 +612,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/index.html b/docs/2_38/functions-reference/index.html index 273f5b5b9..e5b300e0b 100644 --- a/docs/2_38/functions-reference/index.html +++ b/docs/2_38/functions-reference/index.html @@ -1,6 +1,6 @@ - + diff --git a/docs/2_38/functions-reference/integer-valued_basic_functions.html b/docs/2_38/functions-reference/integer-valued_basic_functions.html index 61f8975e8..73077daba 100644 --- a/docs/2_38/functions-reference/integer-valued_basic_functions.html +++ b/docs/2_38/functions-reference/integer-valued_basic_functions.html @@ -1,6 +1,6 @@ - + @@ -532,6 +532,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/mathematical_functions.html b/docs/2_38/functions-reference/mathematical_functions.html index 47259bf12..cad19a557 100644 --- a/docs/2_38/functions-reference/mathematical_functions.html +++ b/docs/2_38/functions-reference/mathematical_functions.html @@ -1,6 +1,6 @@ - + @@ -527,6 +527,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/matrix_operations.html b/docs/2_38/functions-reference/matrix_operations.html index c4aba8c04..c09abbf5f 100644 --- a/docs/2_38/functions-reference/matrix_operations.html +++ b/docs/2_38/functions-reference/matrix_operations.html @@ -1,6 +1,6 @@ - + @@ -633,6 +633,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/mixed_operations.html b/docs/2_38/functions-reference/mixed_operations.html index 8101005d9..e7bfb8235 100644 --- a/docs/2_38/functions-reference/mixed_operations.html +++ b/docs/2_38/functions-reference/mixed_operations.html @@ -1,6 +1,6 @@ - + @@ -521,6 +521,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/multivariate_discrete_distributions.html b/docs/2_38/functions-reference/multivariate_discrete_distributions.html index 348a6c101..417c70760 100644 --- a/docs/2_38/functions-reference/multivariate_discrete_distributions.html +++ b/docs/2_38/functions-reference/multivariate_discrete_distributions.html @@ -1,6 +1,6 @@ - + @@ -541,6 +541,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/positive_continuous_distributions.html b/docs/2_38/functions-reference/positive_continuous_distributions.html index c29d8e03e..7a68c0426 100644 --- a/docs/2_38/functions-reference/positive_continuous_distributions.html +++ b/docs/2_38/functions-reference/positive_continuous_distributions.html @@ -1,6 +1,6 @@ - + @@ -589,6 +589,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/positive_lower-bounded_distributions.html b/docs/2_38/functions-reference/positive_lower-bounded_distributions.html index c658d57df..137031d3a 100644 --- a/docs/2_38/functions-reference/positive_lower-bounded_distributions.html +++ b/docs/2_38/functions-reference/positive_lower-bounded_distributions.html @@ -1,6 +1,6 @@ - + @@ -565,6 +565,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/real-valued_basic_functions.html b/docs/2_38/functions-reference/real-valued_basic_functions.html index b46001086..4f7a96719 100644 --- a/docs/2_38/functions-reference/real-valued_basic_functions.html +++ b/docs/2_38/functions-reference/real-valued_basic_functions.html @@ -1,6 +1,6 @@ - + @@ -616,6 +616,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/references.html b/docs/2_38/functions-reference/references.html index 4458bfda0..a65d071b8 100644 --- a/docs/2_38/functions-reference/references.html +++ b/docs/2_38/functions-reference/references.html @@ -1,6 +1,6 @@ - + @@ -215,6 +215,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/removed_functions.html b/docs/2_38/functions-reference/removed_functions.html index 041785972..26b8a6d22 100644 --- a/docs/2_38/functions-reference/removed_functions.html +++ b/docs/2_38/functions-reference/removed_functions.html @@ -1,6 +1,6 @@ - + @@ -528,6 +528,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/simplex_distributions.html b/docs/2_38/functions-reference/simplex_distributions.html index bd3e01b16..4a51774c9 100644 --- a/docs/2_38/functions-reference/simplex_distributions.html +++ b/docs/2_38/functions-reference/simplex_distributions.html @@ -1,6 +1,6 @@ - + @@ -564,6 +564,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/sparse_matrix_operations.html b/docs/2_38/functions-reference/sparse_matrix_operations.html index 25534a49a..9f68ccada 100644 --- a/docs/2_38/functions-reference/sparse_matrix_operations.html +++ b/docs/2_38/functions-reference/sparse_matrix_operations.html @@ -1,6 +1,6 @@ - + @@ -533,6 +533,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/transform_functions.html b/docs/2_38/functions-reference/transform_functions.html index 43ab4cbc9..e0ba6b791 100644 --- a/docs/2_38/functions-reference/transform_functions.html +++ b/docs/2_38/functions-reference/transform_functions.html @@ -1,6 +1,6 @@ - + @@ -548,6 +548,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/unbounded_continuous_distributions.html b/docs/2_38/functions-reference/unbounded_continuous_distributions.html index b08557e47..eb5a37ad9 100644 --- a/docs/2_38/functions-reference/unbounded_continuous_distributions.html +++ b/docs/2_38/functions-reference/unbounded_continuous_distributions.html @@ -1,6 +1,6 @@ - + @@ -606,6 +606,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/unbounded_discrete_distributions.html b/docs/2_38/functions-reference/unbounded_discrete_distributions.html index a8a99e58b..8d1121eb9 100644 --- a/docs/2_38/functions-reference/unbounded_discrete_distributions.html +++ b/docs/2_38/functions-reference/unbounded_discrete_distributions.html @@ -1,6 +1,6 @@ - + @@ -590,6 +590,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/functions-reference/void_functions.html b/docs/2_38/functions-reference/void_functions.html index 5752c5b39..af45c6e4f 100644 --- a/docs/2_38/functions-reference/void_functions.html +++ b/docs/2_38/functions-reference/void_functions.html @@ -1,6 +1,6 @@ - + @@ -497,6 +497,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/analysis.html b/docs/2_38/reference-manual/analysis.html index 2831fdcf7..136b4103b 100644 --- a/docs/2_38/reference-manual/analysis.html +++ b/docs/2_38/reference-manual/analysis.html @@ -1,6 +1,6 @@ - + @@ -483,6 +483,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/blocks.html b/docs/2_38/reference-manual/blocks.html index cab4af7f5..d3767571e 100644 --- a/docs/2_38/reference-manual/blocks.html +++ b/docs/2_38/reference-manual/blocks.html @@ -1,6 +1,6 @@ - + @@ -536,6 +536,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/comments.html b/docs/2_38/reference-manual/comments.html index 9cb0493df..bd5915c10 100644 --- a/docs/2_38/reference-manual/comments.html +++ b/docs/2_38/reference-manual/comments.html @@ -1,6 +1,6 @@ - + @@ -454,6 +454,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/deprecations.html b/docs/2_38/reference-manual/deprecations.html index 9ba0faea4..1ebc12003 100644 --- a/docs/2_38/reference-manual/deprecations.html +++ b/docs/2_38/reference-manual/deprecations.html @@ -1,6 +1,6 @@ - + @@ -455,6 +455,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/diagnostics.html b/docs/2_38/reference-manual/diagnostics.html index 3feadead2..ee4750e0b 100644 --- a/docs/2_38/reference-manual/diagnostics.html +++ b/docs/2_38/reference-manual/diagnostics.html @@ -1,6 +1,6 @@ - + @@ -454,6 +454,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/encoding.html b/docs/2_38/reference-manual/encoding.html index 4c2d756d6..6f16c051b 100644 --- a/docs/2_38/reference-manual/encoding.html +++ b/docs/2_38/reference-manual/encoding.html @@ -1,6 +1,6 @@ - + @@ -423,6 +423,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/execution.html b/docs/2_38/reference-manual/execution.html index 4f0598b44..6138cb4ba 100644 --- a/docs/2_38/reference-manual/execution.html +++ b/docs/2_38/reference-manual/execution.html @@ -1,6 +1,6 @@ - + @@ -526,6 +526,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/expressions.html b/docs/2_38/reference-manual/expressions.html index 273c729f1..77b4a0315 100644 --- a/docs/2_38/reference-manual/expressions.html +++ b/docs/2_38/reference-manual/expressions.html @@ -1,6 +1,6 @@ - + @@ -568,6 +568,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/includes.html b/docs/2_38/reference-manual/includes.html index 2b2fd3e95..509dba21d 100644 --- a/docs/2_38/reference-manual/includes.html +++ b/docs/2_38/reference-manual/includes.html @@ -1,6 +1,6 @@ - + @@ -459,6 +459,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/index.html b/docs/2_38/reference-manual/index.html index 3697e8e63..432c5c578 100644 --- a/docs/2_38/reference-manual/index.html +++ b/docs/2_38/reference-manual/index.html @@ -1,6 +1,6 @@ - + diff --git a/docs/2_38/reference-manual/laplace.html b/docs/2_38/reference-manual/laplace.html index 533cc32b2..2a02354ea 100644 --- a/docs/2_38/reference-manual/laplace.html +++ b/docs/2_38/reference-manual/laplace.html @@ -1,6 +1,6 @@ - + @@ -445,6 +445,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/licenses.html b/docs/2_38/reference-manual/licenses.html index e6ff6813a..d3fd619f2 100644 --- a/docs/2_38/reference-manual/licenses.html +++ b/docs/2_38/reference-manual/licenses.html @@ -1,6 +1,6 @@ - + @@ -423,6 +423,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/mcmc.html b/docs/2_38/reference-manual/mcmc.html index 768f46a75..6de90a2f0 100644 --- a/docs/2_38/reference-manual/mcmc.html +++ b/docs/2_38/reference-manual/mcmc.html @@ -1,6 +1,6 @@ - + @@ -530,6 +530,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/optimization.html b/docs/2_38/reference-manual/optimization.html index d0542e3ed..73f21e6ef 100644 --- a/docs/2_38/reference-manual/optimization.html +++ b/docs/2_38/reference-manual/optimization.html @@ -1,6 +1,6 @@ - + @@ -512,6 +512,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/pathfinder.html b/docs/2_38/reference-manual/pathfinder.html index 077f145f0..6d87c0785 100644 --- a/docs/2_38/reference-manual/pathfinder.html +++ b/docs/2_38/reference-manual/pathfinder.html @@ -1,6 +1,6 @@ - + @@ -469,6 +469,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/references.html b/docs/2_38/reference-manual/references.html index 1c0ed4dae..6a0b1651a 100644 --- a/docs/2_38/reference-manual/references.html +++ b/docs/2_38/reference-manual/references.html @@ -1,6 +1,6 @@ - + @@ -215,6 +215,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/removals.html b/docs/2_38/reference-manual/removals.html index 82d0e1fc1..b35cacb95 100644 --- a/docs/2_38/reference-manual/removals.html +++ b/docs/2_38/reference-manual/removals.html @@ -1,6 +1,6 @@ - + @@ -464,6 +464,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/reproducibility.html b/docs/2_38/reference-manual/reproducibility.html index 980cc8215..f1bd3bd29 100644 --- a/docs/2_38/reference-manual/reproducibility.html +++ b/docs/2_38/reference-manual/reproducibility.html @@ -1,6 +1,6 @@ - + @@ -439,6 +439,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/statements.html b/docs/2_38/reference-manual/statements.html index b7678fe74..2a729ef5b 100644 --- a/docs/2_38/reference-manual/statements.html +++ b/docs/2_38/reference-manual/statements.html @@ -1,6 +1,6 @@ - + @@ -536,6 +536,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/syntax.html b/docs/2_38/reference-manual/syntax.html index 45bf930b2..eb1a253a8 100644 --- a/docs/2_38/reference-manual/syntax.html +++ b/docs/2_38/reference-manual/syntax.html @@ -1,6 +1,6 @@ - + @@ -472,6 +472,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/transforms.html b/docs/2_38/reference-manual/transforms.html index ec54cce49..e13178c82 100644 --- a/docs/2_38/reference-manual/transforms.html +++ b/docs/2_38/reference-manual/transforms.html @@ -1,6 +1,6 @@ - + @@ -588,6 +588,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/types.html b/docs/2_38/reference-manual/types.html index 9a11f4252..03f2e002a 100644 --- a/docs/2_38/reference-manual/types.html +++ b/docs/2_38/reference-manual/types.html @@ -1,6 +1,6 @@ - + @@ -574,6 +574,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/user-functions.html b/docs/2_38/reference-manual/user-functions.html index a9b5cb0f0..c2fa8d664 100644 --- a/docs/2_38/reference-manual/user-functions.html +++ b/docs/2_38/reference-manual/user-functions.html @@ -1,6 +1,6 @@ - + @@ -491,6 +491,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/variational.html b/docs/2_38/reference-manual/variational.html index e979ed016..a41df9856 100644 --- a/docs/2_38/reference-manual/variational.html +++ b/docs/2_38/reference-manual/variational.html @@ -1,6 +1,6 @@ - + @@ -474,6 +474,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/reference-manual/whitespace.html b/docs/2_38/reference-manual/whitespace.html index f7df00591..c2969ea8a 100644 --- a/docs/2_38/reference-manual/whitespace.html +++ b/docs/2_38/reference-manual/whitespace.html @@ -1,6 +1,6 @@ - + @@ -420,6 +420,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/algebraic-equations.html b/docs/2_38/stan-users-guide/algebraic-equations.html index 85d905234..1ed515448 100644 --- a/docs/2_38/stan-users-guide/algebraic-equations.html +++ b/docs/2_38/stan-users-guide/algebraic-equations.html @@ -1,6 +1,6 @@ - + @@ -615,6 +615,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/bootstrap.html b/docs/2_38/stan-users-guide/bootstrap.html index 1140f65ac..299c32214 100644 --- a/docs/2_38/stan-users-guide/bootstrap.html +++ b/docs/2_38/stan-users-guide/bootstrap.html @@ -1,6 +1,6 @@ - + @@ -615,6 +615,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/clustering.html b/docs/2_38/stan-users-guide/clustering.html index bb3a43bfb..d7ba47c64 100644 --- a/docs/2_38/stan-users-guide/clustering.html +++ b/docs/2_38/stan-users-guide/clustering.html @@ -1,6 +1,6 @@ - + @@ -630,6 +630,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/complex-numbers.html b/docs/2_38/stan-users-guide/complex-numbers.html index 5ab464f1a..e2068d39e 100644 --- a/docs/2_38/stan-users-guide/complex-numbers.html +++ b/docs/2_38/stan-users-guide/complex-numbers.html @@ -1,6 +1,6 @@ - + @@ -596,6 +596,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/copulas.html b/docs/2_38/stan-users-guide/copulas.html index bbd72e525..e6e9b063a 100644 --- a/docs/2_38/stan-users-guide/copulas.html +++ b/docs/2_38/stan-users-guide/copulas.html @@ -1,6 +1,6 @@ - + @@ -609,6 +609,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/cross-validation.html b/docs/2_38/stan-users-guide/cross-validation.html index 78393efbe..e8fe2d025 100644 --- a/docs/2_38/stan-users-guide/cross-validation.html +++ b/docs/2_38/stan-users-guide/cross-validation.html @@ -1,6 +1,6 @@ - + @@ -620,6 +620,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/custom-probability.html b/docs/2_38/stan-users-guide/custom-probability.html index 8e724a256..0e791c57d 100644 --- a/docs/2_38/stan-users-guide/custom-probability.html +++ b/docs/2_38/stan-users-guide/custom-probability.html @@ -1,6 +1,6 @@ - + @@ -588,6 +588,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/dae.html b/docs/2_38/stan-users-guide/dae.html index 665b22ee7..40b27c2dd 100644 --- a/docs/2_38/stan-users-guide/dae.html +++ b/docs/2_38/stan-users-guide/dae.html @@ -1,6 +1,6 @@ - + @@ -614,6 +614,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/decision-analysis.html b/docs/2_38/stan-users-guide/decision-analysis.html index 918b21040..2d7598b1e 100644 --- a/docs/2_38/stan-users-guide/decision-analysis.html +++ b/docs/2_38/stan-users-guide/decision-analysis.html @@ -1,6 +1,6 @@ - + @@ -611,6 +611,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/efficiency-tuning.html b/docs/2_38/stan-users-guide/efficiency-tuning.html index df3e37a7e..263d19cdc 100644 --- a/docs/2_38/stan-users-guide/efficiency-tuning.html +++ b/docs/2_38/stan-users-guide/efficiency-tuning.html @@ -1,6 +1,6 @@ - + @@ -644,6 +644,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/finite-mixtures.html b/docs/2_38/stan-users-guide/finite-mixtures.html index ea76dd5bb..f70019097 100644 --- a/docs/2_38/stan-users-guide/finite-mixtures.html +++ b/docs/2_38/stan-users-guide/finite-mixtures.html @@ -1,6 +1,6 @@ - + @@ -626,6 +626,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/floating-point.html b/docs/2_38/stan-users-guide/floating-point.html index 4261cedd6..a3d47d045 100644 --- a/docs/2_38/stan-users-guide/floating-point.html +++ b/docs/2_38/stan-users-guide/floating-point.html @@ -1,6 +1,6 @@ - + @@ -574,6 +574,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/for-bugs-users.html b/docs/2_38/stan-users-guide/for-bugs-users.html index ff54338ba..e4799bc41 100644 --- a/docs/2_38/stan-users-guide/for-bugs-users.html +++ b/docs/2_38/stan-users-guide/for-bugs-users.html @@ -1,6 +1,6 @@ - + @@ -618,6 +618,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/gaussian-processes.html b/docs/2_38/stan-users-guide/gaussian-processes.html index 7b794b927..eec146a84 100644 --- a/docs/2_38/stan-users-guide/gaussian-processes.html +++ b/docs/2_38/stan-users-guide/gaussian-processes.html @@ -1,6 +1,6 @@ - + @@ -617,6 +617,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/hyperspherical-models.html b/docs/2_38/stan-users-guide/hyperspherical-models.html index e0d154ccf..42f788dbd 100644 --- a/docs/2_38/stan-users-guide/hyperspherical-models.html +++ b/docs/2_38/stan-users-guide/hyperspherical-models.html @@ -1,6 +1,6 @@ - + @@ -610,6 +610,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/index.html b/docs/2_38/stan-users-guide/index.html index 8f1c647cd..f70af5a3c 100644 --- a/docs/2_38/stan-users-guide/index.html +++ b/docs/2_38/stan-users-guide/index.html @@ -1,6 +1,6 @@ - + diff --git a/docs/2_38/stan-users-guide/latent-discrete.html b/docs/2_38/stan-users-guide/latent-discrete.html index 148e5785d..9a0fcff74 100644 --- a/docs/2_38/stan-users-guide/latent-discrete.html +++ b/docs/2_38/stan-users-guide/latent-discrete.html @@ -1,6 +1,6 @@ - + @@ -636,6 +636,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/matrices-arrays.html b/docs/2_38/stan-users-guide/matrices-arrays.html index a68551950..bf685d9f3 100644 --- a/docs/2_38/stan-users-guide/matrices-arrays.html +++ b/docs/2_38/stan-users-guide/matrices-arrays.html @@ -1,6 +1,6 @@ - + @@ -602,6 +602,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/measurement-error.html b/docs/2_38/stan-users-guide/measurement-error.html index 063fd2045..260bd57e4 100644 --- a/docs/2_38/stan-users-guide/measurement-error.html +++ b/docs/2_38/stan-users-guide/measurement-error.html @@ -1,6 +1,6 @@ - + @@ -611,6 +611,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/missing-data.html b/docs/2_38/stan-users-guide/missing-data.html index edfdaf45f..2764f483d 100644 --- a/docs/2_38/stan-users-guide/missing-data.html +++ b/docs/2_38/stan-users-guide/missing-data.html @@ -1,6 +1,6 @@ - + @@ -607,6 +607,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/multi-indexing.html b/docs/2_38/stan-users-guide/multi-indexing.html index 70ceb95af..385db967b 100644 --- a/docs/2_38/stan-users-guide/multi-indexing.html +++ b/docs/2_38/stan-users-guide/multi-indexing.html @@ -1,6 +1,6 @@ - + @@ -603,6 +603,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/odes.html b/docs/2_38/stan-users-guide/odes.html index 6b50a7f03..26fa7bbe5 100644 --- a/docs/2_38/stan-users-guide/odes.html +++ b/docs/2_38/stan-users-guide/odes.html @@ -1,6 +1,6 @@ - + @@ -622,6 +622,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/one-dimensional-integrals.html b/docs/2_38/stan-users-guide/one-dimensional-integrals.html index 19a3910ee..0398aca3a 100644 --- a/docs/2_38/stan-users-guide/one-dimensional-integrals.html +++ b/docs/2_38/stan-users-guide/one-dimensional-integrals.html @@ -1,6 +1,6 @@ - + @@ -612,6 +612,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/parallelization.html b/docs/2_38/stan-users-guide/parallelization.html index 992015a2e..94f81f806 100644 --- a/docs/2_38/stan-users-guide/parallelization.html +++ b/docs/2_38/stan-users-guide/parallelization.html @@ -1,6 +1,6 @@ - + @@ -616,6 +616,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/posterior-prediction.html b/docs/2_38/stan-users-guide/posterior-prediction.html index 1c53ea2a6..52a391595 100644 --- a/docs/2_38/stan-users-guide/posterior-prediction.html +++ b/docs/2_38/stan-users-guide/posterior-prediction.html @@ -1,6 +1,6 @@ - + @@ -598,6 +598,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/posterior-predictive-checks.html b/docs/2_38/stan-users-guide/posterior-predictive-checks.html index 36e0a391b..aaeb1764a 100644 --- a/docs/2_38/stan-users-guide/posterior-predictive-checks.html +++ b/docs/2_38/stan-users-guide/posterior-predictive-checks.html @@ -1,6 +1,6 @@ - + @@ -624,6 +624,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/poststratification.html b/docs/2_38/stan-users-guide/poststratification.html index f410ffbe1..e2aea641f 100644 --- a/docs/2_38/stan-users-guide/poststratification.html +++ b/docs/2_38/stan-users-guide/poststratification.html @@ -1,6 +1,6 @@ - + @@ -620,6 +620,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/problematic-posteriors.html b/docs/2_38/stan-users-guide/problematic-posteriors.html index 19076b1b9..40671ebab 100644 --- a/docs/2_38/stan-users-guide/problematic-posteriors.html +++ b/docs/2_38/stan-users-guide/problematic-posteriors.html @@ -1,6 +1,6 @@ - + @@ -633,6 +633,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/proportionality-constants.html b/docs/2_38/stan-users-guide/proportionality-constants.html index af4998be0..409044254 100644 --- a/docs/2_38/stan-users-guide/proportionality-constants.html +++ b/docs/2_38/stan-users-guide/proportionality-constants.html @@ -1,6 +1,6 @@ - + @@ -586,6 +586,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/references.html b/docs/2_38/stan-users-guide/references.html index 9c8f97f9b..e1059738e 100644 --- a/docs/2_38/stan-users-guide/references.html +++ b/docs/2_38/stan-users-guide/references.html @@ -1,6 +1,6 @@ - + @@ -215,6 +215,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/regression.html b/docs/2_38/stan-users-guide/regression.html index 226edeb9d..ffc6748ac 100644 --- a/docs/2_38/stan-users-guide/regression.html +++ b/docs/2_38/stan-users-guide/regression.html @@ -1,6 +1,6 @@ - + @@ -659,6 +659,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/reparameterization.html b/docs/2_38/stan-users-guide/reparameterization.html index ad073e45d..a0dfcd3bb 100644 --- a/docs/2_38/stan-users-guide/reparameterization.html +++ b/docs/2_38/stan-users-guide/reparameterization.html @@ -1,6 +1,6 @@ - + @@ -618,6 +618,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/simulation-based-calibration.html b/docs/2_38/stan-users-guide/simulation-based-calibration.html index 2f0477d4d..435be6b27 100644 --- a/docs/2_38/stan-users-guide/simulation-based-calibration.html +++ b/docs/2_38/stan-users-guide/simulation-based-calibration.html @@ -1,6 +1,6 @@ - + @@ -621,6 +621,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/sparse-ragged.html b/docs/2_38/stan-users-guide/sparse-ragged.html index 70f1f99ea..567cb0970 100644 --- a/docs/2_38/stan-users-guide/sparse-ragged.html +++ b/docs/2_38/stan-users-guide/sparse-ragged.html @@ -1,6 +1,6 @@ - + @@ -604,6 +604,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/style-guide.html b/docs/2_38/stan-users-guide/style-guide.html index ca18559fc..572389e67 100644 --- a/docs/2_38/stan-users-guide/style-guide.html +++ b/docs/2_38/stan-users-guide/style-guide.html @@ -1,6 +1,6 @@ - + @@ -608,6 +608,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/survival.html b/docs/2_38/stan-users-guide/survival.html index e7b04bf70..b205ad88c 100644 --- a/docs/2_38/stan-users-guide/survival.html +++ b/docs/2_38/stan-users-guide/survival.html @@ -1,6 +1,6 @@ - + @@ -618,6 +618,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/time-series.html b/docs/2_38/stan-users-guide/time-series.html index 5a6c93f76..cad04948c 100644 --- a/docs/2_38/stan-users-guide/time-series.html +++ b/docs/2_38/stan-users-guide/time-series.html @@ -1,6 +1,6 @@ - + @@ -625,6 +625,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/truncation-censoring.html b/docs/2_38/stan-users-guide/truncation-censoring.html index 03906c6ae..c0499c9a8 100644 --- a/docs/2_38/stan-users-guide/truncation-censoring.html +++ b/docs/2_38/stan-users-guide/truncation-censoring.html @@ -1,6 +1,6 @@ - + @@ -593,6 +593,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/user-functions.html b/docs/2_38/stan-users-guide/user-functions.html index 769fbe52d..f17f58e15 100644 --- a/docs/2_38/stan-users-guide/user-functions.html +++ b/docs/2_38/stan-users-guide/user-functions.html @@ -1,6 +1,6 @@ - + @@ -614,6 +614,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_38/stan-users-guide/using-stanc.html b/docs/2_38/stan-users-guide/using-stanc.html index a509cde5f..5ce26b061 100644 --- a/docs/2_38/stan-users-guide/using-stanc.html +++ b/docs/2_38/stan-users-guide/using-stanc.html @@ -1,6 +1,6 @@ - + @@ -585,6 +585,10 @@

On this page

+
+This is an old version, view current version. +
+ diff --git a/docs/2_39/404.html b/docs/2_39/404.html new file mode 100644 index 000000000..7f4b1223b --- /dev/null +++ b/docs/2_39/404.html @@ -0,0 +1,871 @@ + + + + + + + + + +Page Not Found + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ +
+
+

Page Not Found

+
+ + + +
+ + + + +
+ + + +
+ + +

Ulam

+

The page you requested cannot be found (perhaps it was moved or renamed).

+

You may want to try searching to find the page’s new location.

+
+

+        reject("This page cannot be found");
+
+ + + + Back to top
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide-2_39.pdf b/docs/2_39/cmdstan-guide-2_39.pdf new file mode 100644 index 000000000..0df8dd16a Binary files /dev/null and b/docs/2_39/cmdstan-guide-2_39.pdf differ diff --git a/docs/2_39/cmdstan-guide/bib.html b/docs/2_39/cmdstan-guide/bib.html new file mode 100644 index 000000000..5bed838fb --- /dev/null +++ b/docs/2_39/cmdstan-guide/bib.html @@ -0,0 +1,821 @@ + + + + + + + + + +bib + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ + + + +
+

References

+ + +
+ + Back to top
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/command_line_options.html b/docs/2_39/cmdstan-guide/command_line_options.html new file mode 100644 index 000000000..e821bbec5 --- /dev/null +++ b/docs/2_39/cmdstan-guide/command_line_options.html @@ -0,0 +1,1151 @@ + + + + + + + + + +Command-Line Interface Overview + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Command-Line Interface Overview

+

A CmdStan executable is built from the Stan model concept and the CmdStan command line parser. The command line argument syntax consists of sets of keywords and keyword-value pairs. Arguments are grouped by the following keywords:

+
    +
  • method - specifies the kind of inference done on the model.
    Each kind of inference requires further configuration via sub-arguments. The method argument is required. It can be specified overtly as the a keyword-value pair method=<inference> or implicitly as one of the following:

    +
      +
    • sample - obtain a sample (set of draws) from the posterior using HMC
    • +
    • optimize - penalized maximum likelihood estimation
    • +
    • pathfinder - quasi-Newton variational inference
    • +
    • variational - automatic differentation variational inference (ADVI)
    • +
    • laplace - sample from a normal approximation centered at the mode
    • +
    • generate_quantities - run model’s generated quantities block on existing sample to obtain new quantities of interest.
    • +
    • log_prob - compute the log probability and gradient of the model for one set of parameters.
    • +
    • diagnose - compute and compare sampler gradient calculations to finite differences.
    • +
  • +
  • data - specifies the input data file, if any.

  • +
  • output - specifies program outputs, both disk files and terminal window outputs.

  • +
  • init - specifies initial values for the model parameters, if any.

  • +
  • random - specifies the seed for the pseudo-random number.

  • +
+

The remainder of this chapter covers the general configuration options used for all processing. The following chapters cover the per-inference configuration options.

+
+

Input data argument

+

The values for all variables declared in the data block of the model are read in from an input data file in either JSON or Rdump format. The syntax for the input data argument is:

+
data file=<filepath>
+

The keyword data must be followed directly by the keyword-value pair file=<filepath>. If the model doesn’t declare any data variables, this argument is ignored.

+

The input data file must contain definitions for all data variables declared in the data block. If one or more data block variables are missing from the input data file, the program prints an error message to stderr and returns a non-zero return code. For example, the model bernoulli.stan defines two data variables N and y. If the input data file doesn’t include both variables, or if the data variable doesn’t match the declared type and dimensions, the program will exit with an error message at the point where it first encounters missing data.

+

For example if the input data file doesn’t include the definition for variable y, the executable exits with the following message:

+
Exception: variable does not exist; processing stage=data initialization; variable name=y; base type=int (in 'examples/bernoulli/bernoulli.stan', line 3, column 2 to column 28)
+
+
+

Output control arguments

+

The output keyword is used to specify non-default options for output files and messages written to the terminal window. The output keyword takes several keyword-value pair sub-arguments.

+

The keyword value pair file=<filepath> specifies the location of the Stan CSV output file. If unspecified, the output file is written to a file named output.csv in the current working directory.

+

The keyword value pair diagnostic_file=<filepath> specifies the location of the auxiliary output file. By default, no auxiliary output file is produced. This option is only valid for the iterative algorithms sample and variational.

+

The keyword value pair refresh=<int> specifies the number of iterations between progress messages written to the terminal window. The default value is 100 iterations.

+

The keyword value pair sig_figs=<int> specifies the number of significant digits for all numerical values in the output files. Allowable values are between 1 and 18, which is the maximum amount of precision available for 64-bit floating point arithmetic. The default value is 8.   Note: increasing sig_figs above the default will increase the size of the output CSV files accordingly.

+

The keyword value pair profile_file=<filepath> specifies the location of the output file for profiling data. If the model uses no profiling, the output profile file is not produced. If the model uses profiling and profile_file is unspecified, the profiling data is written to a file named profile.csv in the current working directory.

+

The keyword value pair save_cmdstan_config=<boolean> specifies whether to save the configuration options used to run the program to a file named <output file>_config.json alongside the other output files. The default value is false, which means the configuration file is not saved. The contents of this file are similar to the comments in the Stan CSV file, but should be more portable across versions and easier to parse.

+
+
+

Initialize model parameters argument

+

Initialization is only applied to parameters defined in the parameters block. By default, all parameters are initialized to random draws from a uniform distribution over the range \([-2, 2]\). These values are on the unconstrained scale, so must be inverse transformed back to satisfy the constraints declared for parameters. Because zero is chosen to be a reasonable default initial value for most parameters, the interval around zero provides a fairly diffuse starting point. For instance, unconstrained variables are initialized randomly in \((-2, 2)\), variables constrained to be positive are initialized roughly in \((0.14, 7.4)\), variables constrained to fall between 0 and 1 are initialized with values roughly in \((0.12, 0.88)\).

+

The initialization argument is specified as keyword-value pair with keyword init. The value can be one of the following:

+
    +
  • positive real number \(x\). All parameters will be initialized to random draws from a uniform distribution over the range \([-x, x]\).

  • +
  • \(0\) - All parameters will be initialized to zero values on the unconstrained scale. The transforms are arranged in such a way that zero initialization provides reasonable variable initializations: \(0\) for unconstrained parameters; \(1\) for parameters constrained to be positive; \(0.5\) for variables to constrained to lie between \(0\) and \(1\); a symmetric (uniform) vector for simplexes; unit matrices for both correlation and covariance matrices; and so on.

  • +
  • filepath - A data file in JSON or Rdump format containing initial parameters values for some or all of the model parameters. User specified initial values must satisfy the constraints declared in the model (i.e., they are on the constrained scale). Parameters which aren’t explicitly initialized will be initialized randomly over the range \([-2, 2]\).

  • +
+
+
+

Random number generator arguments

+

The random-number generator’s behavior is determined by the unsigned seed (positive integer) it is started with. If a seed is not specified, or a seed of 0 or less is specified, the system time is used to generate a seed. The seed is recorded and included with Stan’s output regardless of whether it was specified or generated randomly from the system time.

+

The syntax for the random seed argument is:

+
random seed=<int>
+

The keyword random must be followed directly by the keyword-value pair seed=<int>.

+
+
+

Chain identifier argument: id

+

The chain identifier argument is used in conjunction with the random seed argument when running multiple Markov chains for sampling. The chain identifier is used to advance the random number generator a very large number of random variates so that two chains with the same seed and different identifiers draw from non-overlapping subsequences of the random-number sequence determined by the seed. Together, the seed and chain identifier determine the behavior of the random number generator.

+

The syntax for the random seed argument is:

+
id=<int>
+

The default value is 1.

+

When running a set of chains from the command line with a specified seed, this argument should be set to the chain index. E.g., when running 4 chains, the value should be 1,..,4, successively. When running multiple chains from a single command, Stan’s interfaces manage the chain identifier arguments automatically.

+

For complete reproducibility, every aspect of the environment needs to be locked down from the OS and version to the C++ compiler and version to the version of Stan and all dependent libraries. See the Stan Reference Manual Reproducibility chapter for further details.

+
+
+

Command line help

+

CmdStan provides a help and help-all mechanism that displays either the available top-level or keyword-specific key-value argument pairs. To display top-level help, call the CmdStan executable with keyword help:

+
./bernoulli help
+
+
+

Error messages and return codes

+

CmdStan executables and utility programs use streams standard output (stdout) and standard error (stderr) to report information and error messages, respectively. Some methods also generate warning messages when the algorithm detects potential problems with the inference. Depending on the method, these messages are sent to either standard out or standard error.

+

All program executables provide a return code between 0 and 255:

+
    +
  • 0 - Program ran to termination as expected.

  • +
  • value in range [1 : 125] - Method invoked could not run due to problems with model or data.

  • +
  • value > 128 - Fatal error during execution, process terminated by signal. To determine the signal number, subtract 128 from the return value, e.g. return code 139 results from termination signal 11 (segmentation violation).

  • +
+

A non-zero return code or outputs sent to stderr indicate problems with the inference. However, a return code of zero and absence of error messages doesn’t necessarily mean that the inference is valid, it is still necessary to validate the inferences using all available summary and diagnostic techniques.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/compiling_stan_programs.html b/docs/2_39/cmdstan-guide/compiling_stan_programs.html new file mode 100644 index 000000000..2b151827e --- /dev/null +++ b/docs/2_39/cmdstan-guide/compiling_stan_programs.html @@ -0,0 +1,1191 @@ + + + + + + + + + +Compiling a Stan Program + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Compiling a Stan Program

+

A Stan program must be in a file with extension .stan. The CmdStan makefile rules specify all necessary steps to translate files with suffix .stan to a CmdStan executable program. This is a two-stage process:

+
    +
  • first the Stan program is translated to C++ by the stanc compiler
  • +
  • then the C++ compiler compiles all C++ sources and links them together with the CmdStan interface program and the Stan and math libraries.
  • +
+
+

Invoking the Make utility

+

To compile Stan programs, you must invoke the Make program from the <cmdstan-home> directory. The Stan program can be in a different directory, but the directory path names cannot contain spaces - this limitation is imposed by Make.

+
> cd <cmdstan_home>
+

In the call to the Make program, the target is name of the CmdStan executable corresponding to the Stan program file. On Mac and Linux, this is the name of the Stan program with the .stan omitted. On Windows, replace .stan with .exe, and make sure that the path is given with slashes and not backslashes. To build the Bernoulli example, on Mac and Linux:

+
> make examples/bernoulli/bernoulli
+

On Windows, the command is the same with the addition of .exe at the end of the target (note the use of forward slashes):

+
> make examples/bernoulli/bernoulli.exe
+

The generated C++ code (bernoulli.hpp), object file (bernoulli.o) and the compiled executable will be placed in the same directory as the Stan program.

+

The compiled executable consists of the Stan model and the CmdStan command line interface which provides inference algorithms to do MCMC sampling, optimization, and variational inference. The following sections provide examples of doing inference using each method on the example model and data file.

+
+
+

Dependencies

+

When executing a Make target, all its dependencies are checked to see if they are up to date, and if they are not, they are rebuilt. If the you call Make with target bernoulli twice in a row, without any editing bernoulli.stan or otherwise changing the system, on the second invocation, Make will determine that the executable is already newer than the Stan source file and will not recompile the program:

+
> make examples/bernoulli/bernoulli
+make: `examples/bernoulli/bernoulli' is up to date.
+

If the file containing the Stan program is updated, the next call to make will rebuild the CmdStan executable.

+
+
+

Compiler errors

+

The Stan probabilistic programming language is a programming language with a rich syntax, as such, it is often the case that a carefully written program contains errors.

+

The simplest class of errors are simple syntax errors such as forgetting the semi-colon statement termination marker at the end of a line, or typos such as a misspelled variable name. For example, if in the bernoulli.stan program, we introduce a typo on line \(9\) by writing thata instead of theta, the Make command fails with the following

+
--- Translating Stan model to C++ code ---
+bin/stanc  --o=bernoulli.hpp bernoulli.stan
+
+Semantic error in 'bernoulli.stan', line 9, column 2 to column 7:
+   -------------------------------------------------
+     7:  }
+     8:  model {
+     9:    thata ~ beta(1, 1);  // uniform prior on interval 0, 1
+           ^
+    10:    y ~ bernoulli(theta);
+    11:  }
+   -------------------------------------------------
+
+Identifier 'thata' not in scope.
+
+make: *** [bernoulli.hpp] Error 1
+

Stan is a strongly-typed language; and the compiler will throw an error if statements or expressions violate the type rules. The following trivial program foo.stan contains an illegal assignment statement:

+
data {
+  real x;
+}
+transformed data {
+  int y = x;
+}
+

The Make command fails with the following:

+
Semantic error in 'foo.stan', line 5, column 2 to column 12:
+   -------------------------------------------------
+     3:  }
+     4:  transformed data {
+     5:    int y = x;
+           ^
+     6:  }
+   -------------------------------------------------
+
+Ill-typed arguments supplied to assignment operator =:
+ lhs has type int and rhs has type real
+

The Stan Reference Manual provides a complete specification of the Stan programming language. The Stan User’s Guide also contains a full description of the errors and warnings stanc can emit.

+
+
+

Troubleshooting C++ compiler or linker errors

+

If the stanc compiler successfully translates a Stan program to C++, the resulting C++ code should be valid C++ which can be compiled into an executable. The stanc compiler is also a program, and while it has been extensively tested, it may still contain errors such that the generated C++ code fails to compile.

+

The Make command prints the following message to the terminal at the point when it compiles and links the C++ file:

+
--- Compiling, linking C++ code ---
+

If the program fails to compile for any reason, the C++ compiler and linker will most likely print a long series of error messages to the console.

+

If this happens, please report the error, together with the Stan program on either the Stan Forums or on the Stan compiler GitHub issues tracker.

+
+
+

C++ compilation and linking flags

+

Users can set flags for the C++ compiler and linker and compiler to optimize their executables. We advise users to only do this once they are sure their basic setup of Cmdstan without flags works.

+

The CXXFLAGS and LDFLAGS makefile variables can be used to set compiler and linker flags respectively. We recommend setting these in the make/local file.

+

For example:

+
CXXFLAGS = -O2
+

A recommend a set of CXXFLAGS and LDFLAGS flags can be turned on by setting STAN_CPP_OPTIMS=true in the make/local file. These are tested compiler and link-time optimizations that can speed up execution of certain models. We have observed speedups up to 15 percent, but this depends on the model, operating system and hardware used. The use of these flags does considerably slow down compilation, so they are not used by default.

+
+

Optimizing by ignoring range checks

+

When assigning or reading from with vectors, row_vectors, matrices or arrays using indexing, Stan checks that a supplied index is valid (not out of range), which avoids segmentation faults and other difficult-to-debug runtime errors.

+

For some models these checks can represent a significant part of the models execution time. By setting the STAN_NO_RANGE_CHECKS=true makefile flag in the make/local file the range checks can be removed. Use this flag with caution (only once the indexing has been validated). In case of any unexpected behavior remove the flag for easier debugging.

+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/diagnose_config.html b/docs/2_39/cmdstan-guide/diagnose_config.html new file mode 100644 index 000000000..e590ceb56 --- /dev/null +++ b/docs/2_39/cmdstan-guide/diagnose_config.html @@ -0,0 +1,1102 @@ + + + + + + + + + +Diagnosing HMC by Comparison of Gradients + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Diagnosing HMC by Comparison of Gradients

+

CmdStan has a basic diagnostic feature that will calculate the gradients of the initial state and compare them with gradients calculated by finite differences. Discrepancies between the two indicate that there is a problem with the model or initial states or else there is a bug in Stan.

+

To allow for the possibility of adding other kinds of diagnostic tests, the diagnose method argument configuration has subargument test which currently only takes value gradient. There are two available gradient test configuration arguments:

+
    +
  • epsilon - The finite difference step size. Must be a positive real number. Default value is \(1^{-6}\)

  • +
  • error - The error threshold. Must be a positive real number. Default value is \(1^{-6}\)

  • +
+

To run on the different platforms with the default configuration, use one of the following.

+

Mac OS and Linux

+
> ./my_model diagnose data file=my_data
+

Windows

+
> my_model diagnose data file=my_data
+

To relax the test threshold, specify the error argument as follows:

+
> ./my_model diagnose test=gradient error=0.0001 data file=my_data
+

To see how this works, we run diagnostics on the example bernoulli model:

+
> ./bernoulli diagnose data file=bernoulli.data.R
+

Executing this command prints output to the console and as a series of comment lines to the output csv file. The console output is:

+
method = diagnose
+  diagnose
+    test = gradient (Default)
+      gradient
+        epsilon = 9.9999999999999995e-07 (Default)
+        error = 9.9999999999999995e-07 (Default)
+id = 0 (Default)
+data
+  file = bernoulli.data.json
+init = 2 (Default)
+random
+  seed = 2152196153 (Default)
+output
+  file = output.csv (Default)
+  diagnostic_file =  (Default)
+  refresh = 100 (Default)
+
+TEST GRADIENT MODE
+
+ Log probability=-8.42814
+
+ param idx           value           model     finite diff           error
+         0       0.0361376         -3.1084         -3.1084    -2.37554e-10
+

The same information is printed to the output file as csv comments, i.e., each line is prefixed with a pound sign #.

+ + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/diagnose_utility.html b/docs/2_39/cmdstan-guide/diagnose_utility.html new file mode 100644 index 000000000..804b2d293 --- /dev/null +++ b/docs/2_39/cmdstan-guide/diagnose_utility.html @@ -0,0 +1,1226 @@ + + + + + + + + + +Diagnosing Biased Hamiltonian Monte Carlo Inferences + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

diagnose: Diagnosing Biased Hamiltonian Monte Carlo Inferences

+

CmdStan is distributed with a utility that is able to read in and analyze the output of one or more Markov chains to check for the following potential problems:

+
    +
  • Divergent transitions
  • +
  • Transitions that hit the maximum treedepth
  • +
  • Low E-BFMI values
  • +
  • Low effective sample sizes
  • +
  • High \(\hat{R}\) values
  • +
+

The meanings of several of these problems are discussed in https://arxiv.org/abs/1701.02434.

+
+

Building the diagnose command

+

The CmdStan makefile task build compiles the diagnose utility into the bin directory. It can be compiled directly using the makefile as follows:

+
> cd <cmdstan-home>
+> make bin/diagnose
+
+
+

Running the diagnose command

+

The diagnose command is executed on one or more output files, which are provided as command-line arguments separated by spaces. If there are no apparent problems with the output files passed to diagnose, it outputs a message that all transitions are within treedepth limit and that no divergent transitions were found. It problems are detected, it outputs a summary of the problem along with possible ways to mitigate it.

+

To fully exercise the diagnose command, we run 4 chains to sample from the Neal’s funnel distribution, discussed in the Stan User’s Guide reparameterization section. This program defines a distribution which exemplifies the difficulties of sampling from some hierarchical models:

+
parameters {
+  real y;
+  vector[9] x;
+}
+model {
+  y ~ normal(0, 3);
+  x ~ normal(0, exp(y / 2));
+}
+

This program is available on GitHub: https://github.com/stan-dev/example-models/blob/master/misc/funnel/funnel.stan

+

Stan has trouble sampling from the region where y is small and thus x is constrained to be near 0. This is due to the fact that the density’s scale changes with y, so that a step size that works well when y is large is inefficient when y is small and vice-versa.

+

Running 4 chains produces output files output_1.csv, …, output_4.csv. We run diagnose command on this fileset:

+
> bin/diagnose output_*.csv
+

The output is printed to the terminal window:

+
Checking sampler transitions treedepth.
+18 of 4000 (0.45%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps.
+Trajectories that are prematurely terminated due to this limit will result in slow exploration.
+For optimal performance, increase this limit.
+
+Checking sampler transitions for divergences.
+11 of 4000 (0.28%) transitions ended with a divergence.
+These divergent transitions indicate that HMC is not fully able to explore the posterior distribution.
+Try increasing adapt delta closer to 1.
+If this doesn't remove all divergences, try to reparameterize the model.
+
+Checking E-BFMI - sampler transitions HMC potential energy.
+The E-BFMI, 0.06, is below the nominal threshold of 0.30 which suggests that HMC may have trouble exploring the target distribution.
+If possible, try to reparameterize the model.
+
+Rank-normalized split effective sample size satisfactory for all parameters.
+
+The following parameters had rank-normalized split R-hat greater than 1.01:
+  y, x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9]
+Such high values indicate incomplete mixing and biased estimation.
+You should consider regularizing your model with additional prior information or a more effective parameterization.
+
+Processing complete.
+

In this example, changing the model to use a non-centered parameterization is the only way to correct these problems. In this second model, the parameters x_raw and y_raw are sampled as independent standard normals, which is easy for Stan.

+
parameters {
+  real y_raw;
+  vector[9] x_raw;
+}
+transformed parameters {
+  real y;
+  vector[9] x;
+
+  y = 3.0 * y_raw;
+  x = exp(y / 2) * x_raw;
+}
+model {
+  y_raw ~ std_normal(); // implies y ~ normal(0, 3)
+  x_raw ~ std_normal(); // implies x ~ normal(0, exp(y / 2))
+}
+

This program is available on GitHub: https://github.com/stan-dev/example-models/blob/master/misc/funnel/funnel_reparam.stan

+

We compile the program and run 4 chains, as before. Now the diagnose command doesn’t detect any problems:

+
Checking sampler transitions treedepth.
+Treedepth satisfactory for all transitions.
+
+Checking sampler transitions for divergences.
+No divergent transitions found.
+
+Checking E-BFMI - sampler transitions HMC potential energy.
+E-BFMI satisfactory.
+
+Rank-normalized split effective sample size satisfactory for all parameters.
+
+Rank-normalized split R-hat values satisfactory for all parameters.
+
+Processing complete, no problems detected.
+
+
+

diagnose warnings and recommendations

+
+

Divergent transitions after warmup

+

Stan uses Hamiltonian Monte Carlo (HMC) to explore the target distribution — the posterior defined by a Stan program + data — by simulating the evolution of a Hamiltonian system. In order to approximate the exact solution of the Hamiltonian dynamics we need to choose a step size governing how far we move each time we evolve the system forward. That is, the step size controls the resolution of the sampler.

+

Unfortunately, for particularly hard problems there are features of the target distribution that are too small for this resolution. Consequently the sampler misses those features and returns biased estimates. Fortunately, this mismatch of scales manifests as divergences which provide a practical diagnostic. If there are any divergences after warmup, then the sample based estimates may be biased.

+

If the divergent transitions cannot be eliminated by increasing the adapt_delta parameter, we have to find a different way to write the model that is logically equivalent but simplifies the geometry of the posterior distribution. This problem occurs frequently with hierarchical models and one of the simplest examples is Neal’s Funnel, which is discussed in the reparameterization section of the Stan User’s Guide.

+
+
+

Maximum treedepth exceeded

+

Warnings about hitting the maximum treedepth are not as serious as warnings about divergent transitions. While divergent transitions are a validity concern, hitting the maximum treedepth is an efficiency concern. Configuring the No-U-Turn-Sampler (the variant of HMC used by Stan) requires putting a cap on the depth of the trees that it evaluates during each iteration (for details on this see the Hamiltonian Monte Carlo Sampling chapter in the Stan Reference Manual). When the maximum allowed tree depth is reached it indicates that NUTS is terminating prematurely to avoid excessively long execution time.

+

This is controlled through the max_depth argument. If the number of transitions which exceed maximum treedepth is low, increasing max_depth may correct this problem.

+
+
+

Low E-BFMI values - sampler transitions HMC potential energy.

+

The sampler csv output column energy__ is used to diagnose the accuracy of any Hamiltonian Monte Carlo sampler. If the standard deviation of energy is much larger than \(\sqrt{D / 2}\), where \(D\) is the number of unconstrained parameters, then the sampler is unlikely to be able to explore the posterior adequately. This is usually due to heavy-tailed posteriors and can sometimes be remedied by reparameterizing the model.

+

The warning that some number of chains had an estimated Bayesian Fraction of Missing Information (BFMI) that was too low implies that the adaptation phase of the Markov Chains did not turn out well and those chains likely did not explore the posterior distribution efficiently. For more details on this diagnostic, see https://arxiv.org/abs/1604.00695. Should this occur, you can either run the sampler for more iterations, or consider reparameterizing your model.

+
+
+

Low effective sample sizes

+

Roughly speaking, the effective sample size (ESS) of a quantity of interest captures how many independent draws contain the same amount of information as the dependent sample obtained by the MCMC algorithm. Clearly, the higher the ESS the better. Stan uses \(\hat{R}\) adjustment to use the between-chain information in computing the ESS. For example, in case of multimodal distributions with well-separated modes, this leads to an ESS estimate that is close to the number of distinct modes that are found.

+

Bulk-ESS refers to the effective sample size based on the rank normalized draws. This does not directly compute the ESS relevant for computing the mean of the parameter, but instead computes a quantity that is well defined even if the chains do not have finite mean or variance. Overall bulk-ESS estimates the sampling efficiency for the location of the distribution (e.g. mean and median).

+

Often quite smaller ESS would be sufficient for the desired estimation accuracy, but the estimation of ESS and convergence diagnostics themselves require higher ESS. We recommend requiring that the bulk-ESS is greater than 100 times the number of chains. For example, when running four chains, this corresponds to having a rank-normalized effective sample size of at least 400.

+
+
+

High \(\hat{R}\)

+

\(\hat{R}\) (R-hat) convergence diagnostic compares the between- and within-chain estimates for model parameters and other univariate quantities of interest. If chains have not mixed well (ie, the between- and within-chain estimates don’t agree), \(\hat{R}\) is larger than 1. We recommend running at least four chains by default and only using the sample if \(\hat{R}\) is less than 1.01. Stan reports \(\hat{R}\) which is the maximum of rank normalized split-R-hat and rank normalized folded-split-R-hat, which works for thick tailed distributions and is sensitive also to differences in scale. For more details on this diagnostic, see https://arxiv.org/abs/1903.08008.

+

There is further discussion in https://arxiv.org/abs/1701.02434; however the correct resolution is necessarily model specific, hence all suggestions general guidelines only.

+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/example_model_data.html b/docs/2_39/cmdstan-guide/example_model_data.html new file mode 100644 index 000000000..7bd1e760e --- /dev/null +++ b/docs/2_39/cmdstan-guide/example_model_data.html @@ -0,0 +1,1091 @@ + + + + + + + + + +Example Model and Data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Example Model and Data

+

The following is a simple, complete Stan program for a Bernoulli model of binary data.1 The model assumes the binary observed data y[1],...,y[N] are i.i.d. with Bernoulli chance-of-success theta.

+
data { 
+  int<lower=0> N; 
+  array[N] int<lower=0, upper=1> y;
+} 
+parameters {
+  real<lower=0, upper=1> theta;
+} 
+model {
+  theta ~ beta(1, 1);  // uniform prior on interval 0,1
+  y ~ bernoulli(theta);
+}
+

The input data file contains definitions for the two variables N and y which are specified in the data block of program bernoulli.stan (above).

+

A data set of N=10 observations is included in the example Bernoulli model directory in both JSON notation and Rdump data format where 8 out of 10 trials had outcome 0 (failure) and 2 trials had outcome 1 (success). In JSON, this data is:

+
{
+    "N" : 10,
+    "y" : [0,1,0,0,0,0,0,0,0,1]
+}
+ + +
+ + + Back to top

Footnotes

+ +
    +
  1. The model is available with the CmdStan distribution at the path <cmdstan-home>/examples/bernoulli/bernoulli.stan.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/external_code.html b/docs/2_39/cmdstan-guide/external_code.html new file mode 100644 index 000000000..591f51b58 --- /dev/null +++ b/docs/2_39/cmdstan-guide/external_code.html @@ -0,0 +1,1197 @@ + + + + + + + + + +Using external C++ code + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Using external C++ code

+

The --allow-undefined flag can be passed to the call to stanc, which will allow undefined functions in the Stan language to be parsed without an error. We can then include a definition of the function in a C++ header file.

+

This requires specifying two makefile variables:

+
    +
  • STANCFLAGS=--allow-undefined
  • +
  • USER_HEADER=<header_file.hpp>, where <header_file.hpp> is the name of a header file that defines a function with the same name and a compatible signature. This function can appear in the global namespace or in the model namespace, which is defined as the name of the model (either the file name, or the --name argument to stanc) followed by _namespace.
  • +
+

This is an advanced feature which is only recommended to users familiar with the internals of Stan’s Math library. Most existing C++ code will need to be modified to work with Stan, to varying degrees.

+

As an example, consider the following variant of the Bernoulli example

+
functions {
+  real make_odds(data real theta);
+}
+data {
+  int<lower=0> N;
+  array[N] int<lower=0, upper=1> y;
+}
+parameters {
+  real<lower=0, upper=1> theta;
+}
+model {
+  theta ~ beta(1, 1); // uniform prior on interval 0, 1
+  y ~ bernoulli(theta);
+}
+generated quantities {
+  real odds;
+  odds = make_odds(theta);
+}
+

Here the make_odds function is declared but not defined, which would ordinarily result in a parser error. However, if you put STANCFLAGS = --allow-undefined into the make/local file or into the stanc call, then the stanc compiler will translate this program to C++, but the generated C++ code will not compile unless you write a file such as examples/bernoulli/make_odds.hpp with the following lines

+
#include <ostream>
+
+double make_odds(const double& theta, std::ostream *pstream__) {
+  return theta / (1 - theta);
+}
+

The signature for this function needs to fulfill all the usages in the C++ class emitted by stanc. The pstream__ argument is mandatory in the signature but need not be used if your function does not print any output. Because make_odds was declared with a data argument and only used in generated quantites, a signature which accepts and returns double is acceptable. Functions which will have parameters passed as input in the transformed parameters or model blocks will require the ability to accept Stan’s autodiff types. If you wish to autodiff through this function, the simplest option is to make it a template, like

+
template <typename T>
+T make_odds(const T &theta, std::ostream *pstream__)
+{
+    return theta / (1 - theta);
+}
+

Given the above, the following make invocation should work

+
> make STANCFLAGS=--allow-undefined USER_HEADER=examples/bernoulli/make_odds.hpp examples/bernoulli/bernoulli # on Windows add .exe
+

Alternatively, you could put STANCFLAGS and USER_HEADER into the make/local file instead of specifying them on the command-line.

+

If the function were more complicated and involved functions in the Stan Math Library, then you would need to add #include <stan/model/model_header.hpp> and prefix the function calls with stan::math::.

+
+

Derivative specializations

+

External C++ functions are currently the only way to encode a function with a known analytic gradient outside the Stan Math Library. This is done very similarly to how a function would be added to the Math library with a reverse-mode specialization. The following code is adapted from the Stan Math documentation.

+

Suppose you have the following (nonsensical) model which relies on a function called my_dot_self. We will implement this as a copy of the built-in dot_self function.

+
functions {
+  // both overloads end up using the same C++ template
+  real my_dot_self(vector theta);
+  real my_dot_self(row_vector theta);
+}
+data {
+  int<lower=0> N;
+  vector[N] input_data;
+}
+transformed data {
+  // no autodiff for data - will call using doubles
+  real ds = my_dot_self(input_data);
+}
+parameters {
+  row_vector[N] thetas;
+}
+model {
+  thetas ~ normal(0,1);
+  // autodiff - will call using stan::math::var types
+  input_data ~ normal(thetas, my_dot_self(thetas));
+}
+

If you wanted to autodiff through this function, the following header would suffice1:

+
#include <stan/model/model_header.hpp>
+#include <ostream>
+
+template <typename EigVec, stan::require_eigen_vector_t<EigVec> * = nullptr>
+inline stan::value_type_t<EigVec> my_dot_self(const EigVec &x, std::ostream *pstream__)
+{
+    const auto &x_ref = stan::math::to_ref(x);
+    stan::value_type_t<EigVec> sum_x = 0.0;
+    for (int i = 0; i < x.size(); ++i)
+    {
+        sum_x += x_ref.coeff(i) * x_ref.coeff(i);
+    }
+    return sum_x;
+}
+

However, we know the derivative of this function directly. To leverage this, we could use a more complicated form which has two function templates that differentiate themselves based on whether or not derivatives are required:

+
#include <stan/model/model_header.hpp>
+#include <ostream>
+
+template <typename EigVec, stan::require_eigen_vector_t<EigVec> * = nullptr,
+          stan::require_not_st_var<EigVec> * = nullptr>
+inline double my_dot_self(const EigVec &x, std::ostream *pstream__)
+{
+    auto x_ref = stan::math::to_ref(x);
+    double sum = 0.0;
+    for (int i = 0; i < x.size(); ++i)
+    {
+        sum += x_ref.coeff(i) * x_ref.coeff(i);
+    }
+    return sum;
+}
+
+template <typename EigVec, stan::require_eigen_vt<stan::is_var, EigVec> * = nullptr>
+inline stan::math::var my_dot_self(const EigVec &v, std::ostream *pstream__)
+{
+    // (1) put v into our memory arena
+    stan::arena_t<EigVec> arena_v(v);
+    // (2) calculate forward pass using
+    // (3) the .val() method for matrices of var types
+    stan::math::var res = my_dot_self(arena_v.val(), pstream__);
+    // (4) Place a callback for the reverse pass on the callback stack.
+    stan::math::reverse_pass_callback(
+        [res, arena_v]() mutable
+        { arena_v.adj() += 2.0 * res.adj() * arena_v.val(); });
+    return res;
+}
+

For more details about how to write C++ code using the Stan Math Library, see the Math library documentation at https://mc-stan.org/math/ or the paper at https://arxiv.org/abs/1509.07164.

+
+
+

Special functions: RNGs, distributions, editing target

+

Some functions have special meanings in Stan and place additional requirements on their signatures if used in external C++.

+
    +
  • RNGs must end with _rng. They will be passed a “base RNG object” as the second to last argument, before the pointer to the ostream. We recommend making this a template, since it may change. This is currently a stan::rng_t object (a type alias to boost::rng::mixmax).
  • +
  • Functions which edit the target directly must end with _lp and will be passed a reference to lp__ and a reference to a stan::math::accumulator object as the final parameters before the ostream pointer. They are also expected to have a boolean template parameter propto__ which controls whether or not constant terms can be dropped.
  • +
  • Probability distributions must end with _lpdf or _lpmf and will be passed a boolean template parameter propto__ which controls whether or not constant terms can be dropped.
  • +
+ + +
+
+ + + Back to top

Footnotes

+ +
    +
  1. Details of programming in the Stan Math style are omitted from this section, it is presented only as an example↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/generate_quantities_config.html b/docs/2_39/cmdstan-guide/generate_quantities_config.html new file mode 100644 index 000000000..d2c731c4a --- /dev/null +++ b/docs/2_39/cmdstan-guide/generate_quantities_config.html @@ -0,0 +1,1159 @@ + + + + + + + + + +Generating Quantities of Interest from a Fitted Model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Generating Quantities of Interest from a Fitted Model

+

The generate_quantities method allows you to generate additional quantities of interest from a fitted model without re-running the sampler. Instead, you write a modified version of the original Stan program and add a generated quantities block or modify the existing one which specifies how to compute the new quantities of interest. Running the generate_quantities method on the new program together with sampler outputs (i.e., a set of draws) from the fitted model runs the generated quantities block of the new program using the the existing sample by plugging in the per-draw parameter estimates for the computations in the generated quantities block.

+

This method requires sub-argument fitted_params which takes as its value an existing Stan CSV file that contains a parameter values from an equivalent model, i.e., a model with the same parameters block, conditioned on the same data.

+

The generated quantities block computes quantities of interest (QOIs) based on the data, transformed data, parameters, and transformed parameters. It can be used to:

+
    +
  • generate simulated data for model testing by forward sampling
  • +
  • generate predictions for new data
  • +
  • calculate posterior event probabilities, including multiple comparisons, sign tests, etc.
  • +
  • calculate posterior expectations
  • +
  • transform parameters for reporting
  • +
  • apply full Bayesian decision theory
  • +
  • calculate log likelihoods, deviances, etc. for model comparison
  • +
+

For an overview of the uses of this feature, see the Stan User’s Guide section on Stand-alone generated quantities and ongoing prediction.

+
+

Example

+

To illustrate how this works we use the generate_quantities method to do posterior predictive checks using the estimate of theta given the example bernoulli model and data, following the posterior predictive simulation procedure in the Stan User’s Guide.

+

We write a program bernoulli_ppc.stan which contains the following generated quantities block, with comments to explain the procedure:

+
generated quantities {
+  array[N] int y_sim;
+  // use current estimate of theta to generate new sample
+  for (n in 1:N) {
+    y_sim[n] = bernoulli_rng(theta);
+  }
+  // estimate theta_rep from new sample
+  real<lower=0, upper=1> theta_rep = sum(y_sim) * 1.0 / N;
+}
+

The rest of the program is the same as in bernoulli.stan.

+

The generate_method requires the sub-argument fitted_params which takes as its value the name of a Stan CSV file. The per-draw parameter values from the fitted_params file will be used to run the generated quantities block.

+

If we run the bernoulli.stan program for a single chain to generate a sample in file bernoulli_fit.csv:

+
> ./bernoulli sample data file=bernoulli.data.json output file=bernoulli_fit.csv
+

Then we can run the bernoulli_ppc.stan to carry out the posterior predictive checks:

+
> ./bernoulli_ppc generate_quantities fitted_params=bernoulli_fit.csv \
+                  data file=bernoulli.data.json \
+                  output file=bernoulli_ppc.csv
+

The output file bernoulli_ppc.csv contains only the values for the variables declared in the generated quantities block, i.e., theta_rep and the elements of y_sim:

+
# model = bernoulli_ppc_model
+# method = generate_quantities
+#   generate_quantities
+#     fitted_params = bernoulli_fit.csv
+# id = 1 (Default)
+# data
+#   file = bernoulli.data.json
+# init = 2 (Default)
+# random
+#   seed = 2983956445 (Default)
+# output
+#   file = output.csv (Default)
+y_sim.1,y_sim.2,y_sim.3,y_sim.4,y_sim.5,y_sim.6,y_sim.7,y_sim.8,y_sim.9,y_sim.10,theta_rep
+1,1,1,0,0,0,1,1,0,1,0.6
+1,1,0,1,0,0,1,0,1,0,0.5
+1,0,1,1,1,1,1,1,0,1,0.8
+0,1,0,1,0,1,0,1,0,0,0.4
+1,0,0,0,0,0,0,0,0,0,0.1
+0,0,0,0,0,1,1,1,0,0,0.3
+0,0,1,0,1,0,0,0,0,0,0.2
+1,0,1,0,1,1,0,1,1,0,0.6
+...
+

Given the current implementation, to see the fitted parameter values for each draw, create a copy variable in the generated quantities block, e.g.:

+
generated quantities {
+  array[N] int y_sim;
+  // use current estimate of theta to generate new sample
+  for (n in 1:N) {
+    y_sim[n] = bernoulli_rng(theta);
+  }
+  real<lower=0, upper=1> theta_cp = theta;
+  // estimate theta_rep from new sample
+  real<lower=0, upper=1> theta_rep = sum(y_sim) * 1.0 / N;
+}
+

Now the output is slightly more interpretable: theta_cp is the same as the theta used to generate the values y_sim[1] through y_sim[1]. Comparing columns theta_cp and theta_rep allows us to see how the uncertainty in our estimate of theta is carried forward into our predictions:

+
y_sim.1,y_sim.2,y_sim.3,y_sim.4,y_sim.5,y_sim.6,y_sim.7,y_sim.8,y_sim.9,y_sim.10,theta_cp,theta_rep
+0,1,1,0,1,0,0,1,1,0,0.545679,0.5
+1,1,1,1,1,1,0,1,1,0,0.527164,0.8
+1,1,1,1,0,1,1,1,1,0,0.529116,0.8
+1,0,1,1,1,1,0,0,1,0,0.478844,0.6
+0,1,0,0,0,0,1,0,1,0,0.238793,0.3
+0,0,0,0,0,1,1,0,0,0,0.258294,0.2
+1,1,1,0,0,0,0,0,0,0,0.258465,0.3
+
+
+

Errors

+

The fitted_params file must be a Stan CSV file; attempts to use a regular CSV file will result an error message of the form:

+
Error reading fitted param names from sample csv file <filename.csv>
+

The fitted_params file must contain columns corresponding to legal values for all parameters defined in the model. If any parameters are missing, the program will exit with an error message of the form:

+
Error reading fitted param names from sample csv file <filename.csv>
+

The parameter values of the fitted_params are on the constrained scale and must obey all constraints. For example, if we modify the contents of the first reported draw in bernoulli_fit.csv so that the value of theta is outside the declared bounds real<lower=0, upper=1>, the program will return the following error message:

+
Exception: lub_free: Bounded variable is 1.21397, but must be in the interval [0, 1] \
+(in 'bernoulli_ppc.stan', line 5, column 2 to column 30)
+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/img/logo_tm.png b/docs/2_39/cmdstan-guide/img/logo_tm.png new file mode 100644 index 000000000..48c9769c7 Binary files /dev/null and b/docs/2_39/cmdstan-guide/img/logo_tm.png differ diff --git a/docs/2_39/cmdstan-guide/img/warmup-epochs.png b/docs/2_39/cmdstan-guide/img/warmup-epochs.png new file mode 100644 index 000000000..7ccf22e05 Binary files /dev/null and b/docs/2_39/cmdstan-guide/img/warmup-epochs.png differ diff --git a/docs/2_39/cmdstan-guide/index.html b/docs/2_39/cmdstan-guide/index.html new file mode 100644 index 000000000..db95069e3 --- /dev/null +++ b/docs/2_39/cmdstan-guide/index.html @@ -0,0 +1,1077 @@ + + + + + + + + + +CmdStan User’s Guide + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ +
+
+

CmdStan User’s Guide

+

Version 2.39

+
+ + + +
+ + + + +
+ + + +
+ + +

+

This document is a user’s guide for CmdStan, the command-line interface to the Stan statistical modeling language. CmdStan provides the programs and tools to compile Stan programs into C++ executables that can be run directly from the command line, together with a few utilities to check and summarize the resulting outputs.

+

In CmdStan, statistical models written in the Stan probabilistic programming language are translated into a C++ program which is then compiled together with the CmdStan routines that provide the logic needed to manage all user inputs and program outputs and the Stan inference algorithms and math library. The resulting command line executable program can be used to

+
    +
  • do inference on data, producing an exact or approximate estimate of the posterior;

  • +
  • generate new quantities of interest from an existing estimate;

  • +
  • generate data from the model according to a given set of parameters.

  • +
+

The packages CmdStanR and CmdStanPy provide interfaces to CmdStan from R and Python, respectively, similarly, JuliaStan also interfaces with CmdStan.

+

Download the pdf version of this manual.

+
+

Benefits of CmdStan

+
    +
  • With every new Stan release, there is a corresponding CmdStan release, therefore CmdStan provides access to the latest version of Stan, and can be used to run the development version of Stan as well.

  • +
  • Of the Stan interfaces, CmdStan has the lightest memory footprint, therefore it can fit larger and more complex models. It has has the fewest dependencies, which makes it easier to run in limited environments such as clusters.

  • +
  • The output generated is in CSV format and can be post-processed using other Stan interfaces or general tools.

  • +
+
+
+

Stan documentation

+
    +
  • Stan User’s Guide The Stan user’s guide provides example models and programming techniques for coding statistical models in Stan. It also serves as an example-driven introduction to Bayesian modeling and inference:

  • +
  • Stan Reference Manual Stan’s modeling language is shared across all of its interfaces. The Stan Language Reference Manual provides a concise definition of the language syntax for all elements in the language together with an overview of the inference algorithms and posterior inference tools.

  • +
  • Stan Functions Reference The Stan Functions Reference provides definitions and examples for all the functions defined in the Stan math library and available in the Stan programming language, including all probability distributions.

  • +
+
+ +
+

Licensing

+ + + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/installation.html b/docs/2_39/cmdstan-guide/installation.html new file mode 100644 index 000000000..abdc6b9c4 --- /dev/null +++ b/docs/2_39/cmdstan-guide/installation.html @@ -0,0 +1,1357 @@ + + + + + + + + + +CmdStan Installation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

CmdStan Installation

+

There are a few ways that you can install CmdStan. Depending on your operating system and your level of expertise, you can either:

+
    +
  • Use the conda package management system to install a pre-built version of CmdStan along with the required dependencies. Recommended for Windows users.

  • +
  • Install the source code from GitHub CmdStan repository. This requires a modern C++ compiler and toolchain. See the C++ Toolchain section for further details.

  • +
+
+

Installation via conda

+

With conda, you can install CmdStan from the conda-forge channel. This will install a pre-built version of CmdStan along with the required dependencies (i.e. a C++ compiler, a version of Make, and required libraries). The conda installation is designed so one can use the R or Python bindings to CmdStan seamlessly. Additionally, it provides the command cmdstan_model to activate the CmdStan makefile from anywhere.

+

Note: This requires that conda has been installed already on your machine. We recommend using the miniforge distribution.

+

We recommend installing CmdStan in a new conda environment:

+
 conda create -n stan -c conda-forge cmdstan
+

This command creates a new conda environment named stan and downloads and installs the cmdstan package as well as CmdStan and the required C++ toolchain.

+

To install into an existing conda environment, use the conda install command instead of create:

+
 conda install -c conda-forge cmdstan
+

Whichever installation method you use, afterwards you must activate the new environment or deactivate/activate the existing one. For example, if you installed cmdstan into a new environment stan, run the command

+
 conda activate stan
+

By default, the latest release of CmdStan is installed. If you require a specific release of CmdStan, CmdStan versions 2.26.1 and newer can be installed by specifying cmdstan==VERSION in the install command. For example to install an earlier version of CmdStan into your current conda environment, run the following command, then re-activate the environment

+
conda install -c conda-forge cmdstan=2.27.0
+
+

CmdStan install location under conda

+

A Conda environment is a directory that contains a specific collection of Conda packages. To see the locations of your conda environments, use the command

+
 conda info -e
+

The shell environment variable CONDA_PREFIX points to the active conda environment (if any). Both CmdStan and the C++ toolchain are installed into the bin subdirectory of the conda environment directory, i.e., $CONDA_PREFIX/bin/cmdstan (Linux, MacOS), %CONDA_PREFIX%\bin\cmdstan (Windows).

+

Please report conda-specific install problems directly to the conda-forge issue tracker, here.

+
+
+
+

Installation from GitHub

+

Installation from GitHub consists of the following steps:

+
    +
  • Verify that you have a modern C++ toolchain. See the C++ Toolchain section for details.

  • +
  • Download the CmdStan source code from GitHub

  • +
  • Build the CmdStan libraries and executables

  • +
  • Check the installation by compiling and running the CmdStan example model bernoulli.stan.

  • +
+
+

Downloading the source code

+

The GitHub source code is divided into sub-modules, each in its own repository. The CmdStan repo contains just the cmdstan module; the Stan inference engine algorithms and Stan math library functions are specified as submodules and stored in the GitHub repositories stan and math, respectively.

+

A CmdStan release is compressed tarfile which contains CmdStan and the Stan and math library submodules. The most recent CmdStan release is always available as https://github.com/stan-dev/cmdstan/releases/latest. A CmdStan release is versioned by major, minor, patch numbers, e.g., “2.29.2”. Please ensure you download a tarfile which is named “cmdstan-<version-number” rather than using the “Source Code” links at the bottom of the release. These are automatically generated by GitHub and do not contain the required submodules. The release tarfile unpacks into a directory named “cmdstan-”, e.g. “cmdstan-2.29.2”.

+

By cloning the CmdStan repository with argument --recursive, Git automatically initializes and updates each submodule in the repository, including nested submodules if any of the submodules in the repository have submodules themselves. The following command will download the source code from the current development branch of CmdStan into a directory named cmdstan:

+
> git clone https://github.com/stan-dev/cmdstan.git --recursive
+

Throughout this manual, we refer to this top-level CmdStan source directory as <cmdstan-home>. This directory contains the following subdirectories:

+
    +
  • directory cmdstan/stan contains the sub-module stan (https://github.com/stan-dev/stan)
  • +
  • directory cmdstan/stan/lib/stan_math contains the sub-module math (https://github.com/stan-dev/math)
  • +
+
+
+

Building CmdStan

+

Building CmdStan involves preparing a set of executable programs and compiling the command line interface and supporting libraries. The CmdStan tools are:

+
    +
  • stanc: the Stan compiler (translates Stan language to C++).

  • +
  • stansummary: a basic posterior analysis tool. The stansummary utility processes one or more output files from a run or set of runs of Stan’s HMC sampler. For all parameters and quantities of interest in the Stan program, stansummary reports a set of statistics including mean, standard deviation, percentiles, effective sample size, and \(\hat{R}\) values.

  • +
  • diagnose: a basic sampler diagnostic tool which checks for indications that the HMC sampler was unable to sample from the full posterior.

  • +
+

CmdStan releases include pre-built binaries of the Stan language compiler (https://github.com/stan-dev/stanc3): bin/linux-stanc, bin/mac-stanc and bin/windows-stanc. The CmdStan makefile build task copies the appropriate binary to bin/stanc. For CmdStan installations which have been cloned of downloaded from the CmdStan GitHub repository, the makefile task will download the appropriate OS-specific binary from the stanc3 repository’s nightly release.

+

Steps to build CmdStan:

+
    +
  • Open a command-line terminal window and change directories to the CmdStan home directory.

  • +
  • Run the makefile target build which instantiates the CmdStan utilities and compiles all necessary C++ libraries.

  • +
+
> cd <cmdstan-home>
+> make build
+

If your computer has multiple cores and sufficient ram, the build process can be parallelized by providing the -j option. For example, to build on 4 cores, type:

+
> make -j4 build
+

When make build is successful, the directory <cmdstan-home>/bin/ will contain the executables stanc, stansummary, and diagnose (on Windows, corresponding .exe files) and the final lines of console output will show the version of CmdStan that has just been built, e.g.:

+
--- CmdStan v2.29.2 built ---
+

Warning: The Make program may take 10+ minutes and consume 2+ GB of memory to build CmdStan.

+

Windows only: CmdStan requires that the Intel TBB library, which is built by the above command, can be found by the Windows system. This requires that the directory <cmdstan-home>/stan/lib/stan_math/lib/tbb is part of the PATH environment variable. See these instructions for details on changing the PATH. To permanently make this setting for the current user, you may execute:

+
> make install-tbb
+

After changing the PATH environment variable, you must open an new shell in order for the new environment variable settings to take effect. (This is not necessary on Mac and Linux systems because they can use the absolute path to the Intel TBB library when linking into Stan programs.)

+
+
+
+

Checking the Stan compiler

+

To check that the CmdStan installation is complete and in working order, run the following series of commands from the folder which CmdStan was installed.

+

On Linux and macOS:

+
# compile the example
+> make examples/bernoulli/bernoulli
+
+# fit to provided data (results of 10 trials, 2 out of 10 successes)
+> ./examples/bernoulli/bernoulli sample\
+  data file=examples/bernoulli/bernoulli.data.json
+
+# default output written to file `output.csv`,
+# default num_samples is 1000, output file should have approx. 1050 lines
+> wc -l output.csv
+
+# run the `bin/stansummary utility to summarize parameter estimates
+> bin/stansummary output.csv
+

On Windows:

+
# compile the example
+> make examples/bernoulli/bernoulli.exe
+
+# fit to provided data (results of 10 trials, 2 out of 10 successes)
+> ./examples/bernoulli/bernoulli.exe sample data file=examples/bernoulli/bernoulli.data.json
+
+# run the `bin/stansummary.exe utility to summarize parameter estimates
+> bin/stansummary.exe output.csv
+

The sample data in file bernoulli.json.data specifies 2 out of 10 successes, therefore the range mean(theta)\(\pm\)sd(theta) should include 0.2.

+
+
+

Troubleshooting the installation

+

Updates to CmdStan, changes in compiler options, or updates to the C++ toolchain may result in errors when trying to compile a Stan program. Often, these problems can be resolved by removing the existing CmdStan binaries and recompiling. To do this, you must run the makefile commands from the <cmdstan-home> directory:

+
> cd <cmdstan-home>
+> make clean-all
+> make build
+
+

Common problems

+

This section contains solutions to problems reported on https://discourse.mc-stan.org

+

Compiler error message about PCH file

+

To speed up compilation, the Stan makefile pre-compiles parts of the core Stan library. If these pre-compiled files are out of sync with the compiled model, the compiler will complain, e.g.:

+
error: PCH file uses an older PCH format that is no longer supported
+

In this case, clean and rebuild CmdStan, as shown in the previous section.

+

Windows: ‘g++’, ‘make’, or ‘cut’ is not recognized

+

The CmdStan makefile uses a few shell utilities which might not be present in Windows, resulting in the error message:

+
'cut' is not recognized as an internal or external command,
+operable program or batch file.
+

To fix this, ensure you have followed the steps for adding the toolchain to your PATH and installing the additional utilities covered in the configuration instructions

+

Spaces in paths to CmdStan or model

+

make can fail when dealing with files in folders with a space somewhere in their file path. Particularly on Windows, this can be an issue when CmdStan, or the models you are trying to build, are placed in the One Drive folder.

+

Unfortunately, the errors created by this situation are not alwas informative. Some errors you may see are:

+
make: *** INTERNAL: readdir: Invalid argument
+
make: *** [make/program:50: x.hpp] Error 2
+

If the (fully-expanded) folder path to CmdStan or the model you are trying to build contains a space, we recommend trying a different location if you encounter any issues during building.

+
+
+
+

C++ Toolchain

+

Compiling a Stan program requires a modern C++ compiler and the GNU Make build utility (a.k.a. “gmake”). These vary by operating system.

+
+

Linux

+

The required C++ compiler is g++ 4.9 3. On most systems the GNU Make utility is pre-installed and is the default make utility. There is usually a pre-installed C++ compiler as well, however, it may not be new enough. To check, run commands:

+
g++ --version
+make --version
+

If these are at least at g++ version 4.9.3 or later and make version 3.81 or later, no additional installations are necessary. It may still be desirable to update the C++ compiler g++, because later versions are faster.

+

To install the latest version of these tools (or upgrade an older version), use the following commands or their equivalent for your distribution, install via the commands:

+
sudo apt install g++
+sudo apt install make
+

If you can’t run sudo, you will need to ask your sysadmin or cluster administrator to install these tools for you.

+
+
+

MacOS

+

To check if you already already have an appropriate toolchain installed, open the Terminal application and enter:

+
clang++ --version
+make --version
+

If either of these commands prints the message command not found, you will need to install Xcode’s command line tools.

+

Open the Terminal application and enter:

+
xcode-select --install
+

Select “Install” in the window that opens.

+

After the installation completes, you can double check that installation was successful by reopening the Terminal and running:

+
clang++ --version
+make --version
+

You can read more about Xcode on its site: https://developer.apple.com/xcode/

+

We don’t recommend trying to use the GNU C++ compiler, available via Homebrew, based on the number of reports of installation difficulties from Mac users on GitHub as well as the Stan forums.

+
+
+

Windows

+

The Windows toolchain consists of programs g++, the C++ compiler, and make, the GNU Make utility. To check if these are present, open a command shell [^1] and type:

+
g++ --version
+make --version
+

CmdStan is known compatible with the RTools45 toolchain. The toolchain will require updating your PATH variable, See these instructions for details on changing the PATH if you are unfamiliar. The following instructions will assume that the default installation directory was used, so be sure to update the paths accordingly if you have chosen a different directory.

+
+
RTools45
+

All required utilities (e.g., make, g++) for compiling and running CmdStan models on Windows are provided by the RTools45 toolchain from the R Project. Installation steps are provided below, and for more technical details on the toolchain refer to the R Project documentation.

+

The R Project provides RTools45 for both Intel/AMD 64-bit (x86_64) and ARM 64-bit (aarch64) systems. If you are unsure which to use, then you can check by going to the Windows Settings, selecting the ‘System’ menu and then the ‘About’ option. If the ‘System Type’ field lists ‘ARM-based processor’, then you should follow the ARM64 instructions below.

+

Note that the toolchain is only available for 64-bit systems, and uses the new Universal C Runtime (UCRT). UCRT is only natively supported on Windows 10 and newer, older systems will require a Microsoft update.

+
+
Installation - Intel/AMD 64-bit (x86_64)
+

Download the installer and complete the prompts for installation:

+ +

Next, you need to add the toolchain directory to your PATH variable:

+
C:\rtools45\usr\bin
+C:\rtools45\x86_64-w64-mingw32.static.posix\bin
+
+
+
Installation - ARM 64-bit (arm64/aarch64)
+

Download the installer and complete the prompts for installation:

+ +

Next, you need to add the toolchain directory to your PATH variable:

+
C:\rtools45-aarch64\usr\bin
+C:\rtools45-aarch64\aarch64-w64-mingw32.static.posix\bin
+
+
+
+
+
+

Using GNU Make

+

CmdStan relies on the GNU Make utility to build both the Stan model executables and the CmdStan tools.

+

GNU Make builds executable programs and libraries from source code by reading files called Makefiles which specify how to derive the target program. A Makefile consists of a set of recursive rules where each rule specifies a target, its dependencies, and the specific operations required to build the target. Specifying dependencies for a target provides a way to control the build process so that targets which depend on other files will be updated as needed only when there are changes to those other files. Thus Make provides an efficient way to manage complex software.

+

The CmdStan Makefile is in the <cmdstan-home> directory and is named makefile. This is one of the default GNU Makefile names, which allows you to omit the -f makefile argument to the Make command. Because the CmdStan Makefile includes several other Makefiles, Make only works properly when invoked from the <cmdstan-home> directory; attempts to use this Makefile from another directory by specifying the full path to the file makefile won’t work. For example, trying to call Make from another directory by specifying the full path the the makefile results in the following set of error messages:

+
make -f ~/github/stan-dev/cmdstan/makefile
+/Users/mitzi/github/stan-dev/cmdstan/makefile:58: make/stanc: No such file or directory
+/Users/mitzi/github/stan-dev/cmdstan/makefile:59: make/program: No such file or directory
+/Users/mitzi/github/stan-dev/cmdstan/makefile:60: make/tests: No such file or directory
+/Users/mitzi/github/stan-dev/cmdstan/makefile:61: make/command: No such file or directory
+make: *** No rule to make target `make/command'.  Stop.
+

The conda-forge cmdstan package provides a solution to this problem via cmdstan_model command which lets you run the CmdStan makefile from anywhere to compile a Stan model.

+

Makefile syntax allows general pattern rules based on file suffixes. Stan programs must be stored in files with suffix .stan; the CmdStan makefile rules specify how to transform the Stan source code into a binary executable. For example, to compile the Stan program my_program.stan in directory ../my_dir/, the make target is ../my_dir/my_program or ../my_dir/my_program.exe (on Windows).

+

To call Make, you invoke the utility name, make, followed by, in order:

+
    +
  • zero or more Make program options, then specify any Make variables as a series of

  • +
  • zero of more Make variables, described below

  • +
  • zero or more target names; the set of names is determined by the Makefile rules.

  • +
+
make <flags> <variables> <targets>
+

Makefile Variables

+

Make targets can be preceded by any number of Makefile variable name=value pairs. For example, to compile ../my_dir/my_program.stan for an OpenCL (GPU) machine, set the makefile variable STAN_OPENCL to TRUE:

+
> make STAN_OPENCL=TRUE ../my_dir/my_program
+

Makefile variables can also be set by creating a file named local in the CmdStan make subdirectory which contains a list of <VARIABLE>=<VALUE> pairs, one per line. For example, to get the same effect as the above command every time, you would put the line STAN_OPENCL=TRUE into the file <cmdstan_home>/make/local.

+

The complete set of Makefile variables can be found in file <cmdstan-home>/cmdstan/stan/lib/stan_math/make/compiler_flags.

+

Make Targets

+

When invoked without any arguments at all, Make prints a help message:

+
> make
+--------------------------------------------------------------------------------
+CmdStan v2.33.1 help
+
+  Build CmdStan utilities:
+    > make build
+
+    This target will:
+    1. Install the Stan compiler bin/stanc from stanc3 binaries.
+    2. Build the print utility bin/print (deprecated; will be removed in v3.0)
+    3. Build the stansummary utility bin/stansummary
+    4. Build the diagnose utility bin/diagnose
+    5. Build all libraries and object files compile and link an executable Stan program
+
+    Note: to build using multiple cores, use the -j option to make, e.g.,
+    for 4 cores:
+    > make build -j4
+
+
+  Build a Stan program:
+
+    Given a Stan program at foo/bar.stan, build an executable by typing:
+    > make foo/bar
+
+    This target will:
+    1. Install the Stan compiler (bin/stanc), as needed.
+    2. Use the Stan compiler to generate C++ code, foo/bar.hpp.
+    3. Compile the C++ code using cc . to generate foo/bar
+
+  Additional make options:
+    STANCFLAGS: defaults to "". These are extra options passed to bin/stanc
+      when generating C++ code. If you want to allow undefined functions in the
+      Stan program, either add this to make/local or the command line:
+          STANCFLAGS = --allow_undefined
+    USER_HEADER: when STANCFLAGS has --allow_undefined, this is the name of the
+      header file that is included. This defaults to "user_header.hpp" in the
+      directory of the Stan program.
+    STANC3_VERSION: When set, uses that tagged version specified; otherwise, downloads
+      the nightly version.
+    STAN_CPP_OPTIMS: Turns on additonal compiler flags for performance.
+    STAN_NO_RANGE_CHECKS: Removes the range checks from the model for performance.
+
+
+  Example - bernoulli model: examples/bernoulli/bernoulli.stan
+
+    1. Build the model:
+       > make examples/bernoulli/bernoulli
+    2. Run the sampling algorithm given the model and data:
+       > examples/bernoulli/bernoulli sample data file=examples/bernoulli/bernoulli.data.R
+    3. Look at the posterior sample:
+       > bin/stansummary output.csv
+
+
+  Clean CmdStan:
+
+    Remove the built CmdStan tools:
+    > make clean-all
+
+--------------------------------------------------------------------------------
+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/json_apdx.html b/docs/2_39/cmdstan-guide/json_apdx.html new file mode 100644 index 000000000..f958afc7a --- /dev/null +++ b/docs/2_39/cmdstan-guide/json_apdx.html @@ -0,0 +1,1273 @@ + + + + + + + + + +JSON Format for CmdStan + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

JSON Format for CmdStan

+

CmdStan can use JSON format for input data for both model data and parameters. Model data is read in by the model constructor. Model parameters are used to initialize the sampler and optimizer.

+
+

Creating JSON files

+

You can create the JSON file yourself using the guidelines below, but a more convenient way to create a JSON file for use with CmdStan is to use the write_stan_json() function provided by the CmdStanR interface.

+
+
+

JSON syntax summary

+

JSON is a data interchange notation, defined by an EMCA standard. JSON data files must in Unicode. JSON data is a series of structural tokens, literal tokens, and values:

+
    +
  • Structural tokens are the left and right curly bracket {}, left and right square bracket [], the semicolon ;, and the comma ,.

  • +
  • Literal tokens must always be in lowercase. There are three literal tokens: true, false, null.

  • +
  • A primitive value is a single token which is either a literal, a string, or a number.

  • +
  • A string consists of zero or more Unicode characters enclosed in double quotes, e.g. "foo". A backslash is used to escape the double quote character as well as the backslash itself. JSON allows the use of Unicode character escapes, e.g. "\\uHHHH" where HHHH is the Unicode code point in hex.

  • +
  • Numbers are represented using either decimal notation or scientific notation. The following are examples of numbers: 17, 17.2, -17.2, -17.2e8, 17.2e-8.
    There is no distinction between integer and real numbers in the JSON format other than whether they have periods or scientific notation.

  • +
  • The special floating point values for positive infinity, negative infinity, and not-a-number can be represented in multiple ways. Positive infinity can be represented as the string "Inf", the string "Infinity", or the atom Infinity. Negative infinity can be represented as the string "-Inf", the string "-Infinity", or the atom -Infinity. Not-a-number can be represented as the string "NaN" or the atom NaN. These values may be mixed with other numerical types.

  • +
  • A complex scalar is represented as a two-element array consisting of its real component followed by its imaginary component. For example, the complex number \(2.3 - 1.83i\) would be represented in JSON as the two-element array [2.3, -1.83].

  • +
  • A JSON array is an ordered, comma-separated list of zero or more JSON values enclosed in square brackets. The elements of an array can be of any type. The following are examples of arrays: [], [1], [0.2, "-inf", true].

  • +
  • Vectors and row vectors in JSON are representing as arrays of their elements. For example, both the vector \([1 \quad 2]^{\top}\) and the row vector \([1 \quad 2]\) are represented by the JSON array [1, 2].

  • +
  • Complex vectors are represented as arrays of two-element arrays. For example, the complex vector \([2.3 - 1.83i \quad -4.8 + +2i]^{\top}\) is represented as [[2.3, -1.83], [-4.8, 2]] in JSON. A complex row vector has the same representation as its transpose (the vector with the same elements).

  • +
  • Matrices are represented as arrays of their row vectors. For example, the \(2 \times 3\) matrix \[\begin{equation*} +\begin{bmatrix} +1 & 2.7 & -9.8 \\ +4.2 & 1.8 & -7.3 +\end{bmatrix} +\end{equation*}\] is represented in JSON as [[1, 2.7, -9.8], [4.2, 1.8, -7.3]].

  • +
  • Complex matrices are also represented as arrays of their row vectors. For example, the \(2 \times 3\) complex matrix \[\begin{equation*} +\begin{bmatrix} +1 + 2i & 3 - 4.2i & 13.1 + 2.7i \\ +3.1 & -5i & 0 +\end{bmatrix} +\end{equation*}\] would be represented in JSON as [[[1, 2], [3, -4.2], [13.1, 2.7]], [[3.1, 0], [0, -5], [0, 0]]].

  • +
  • Tuples are written as nested JSON objects where the keys are strings for the numbered slots in the tuple. For example, the tuple (1.5, 3.4) is represented in JSON as {"1": 1.5, "2": 3.4}.

  • +
  • A name-value pair consists of a string followed by a colon followed by a value, either primitive or compound.

  • +
  • A JSON object is a comma-separated series of zero or more name-value pairs enclosed in curly brackets. Each name-value pair is a member of the object. Membership is unordered. Member names are not required to be unique. The following are examples of objects: { }, {"foo": null}, {"bar" : 17, "baz" : [14,15,16.6] }.

  • +
+
+
+

Stan data types in JSON notation

+

Stan follows the JSON standard. A Stan input file in JSON notation consists of single JSON object which contains zero or more name-value pairs. This structure corresponds to a Python data dictionary object. The following is an example of JSON data for the simple Bernoulli example model:

+
{ "N" : 10, "y" : [0,1,0,0,0,0,0,0,0,1] }
+

Matrix data and multi-dimensional arrays are indexed in row-major order. For a Stan program which has data block:

+
data {
+  int d1;
+  int d2;
+  int d3;
+  array[d1, d2, d3] int ar;
+}
+

the following JSON input would be valid:

+
{ "d1" : 2,
+  "d2" : 3,
+  "d3" : 4,
+  "ar" : [[[0,1,2,3], [4,5,6,7], [8,9,10,11]],
+          [[12,13,14,15], [16,17,18,19], [20,21,22,23]]]
+}
+

JSON ignores whitespace. In the above examples, the spaces and newlines are only used to improve readability and can be omitted.

+

All data inputs are encoded as name-value pairs. The following table provides more examples of JSON data. The left column contains a Stan data variable declaration and the right column contains valid JSON data inputs.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Stan declarationJSON encoding
int i"i": 17
real a"a" : 17
"a" : 17.2
"a" : "NaN"
"a" : "+inf"
"a" : "-inf"
complex z"z": [1, -2.3]
array[5] int"a" : [1, 2, 3, 4, 5]
array[5] real a"a" : [ 1, 2, 3.3, "NaN", 5 ]
array[2] complex b"b" : [[1, -2.3], [4.9, 0]]
vector[5] a"a" : [1, 2, 3.3, "NaN", 5]
row_vector[5] a"a" : [1, 2, 3.3, "NaN", 5]
matrix[2, 3] a"a" : [[ 1, 2, 3 ], [ 4, 5, 6]]
complex_vector[2] c"c" : [[-1.2, 3.3], [4.8, 1.9], [2.3, 0]]
complex_row_vector[2] c"c" : [[-1.2, 3.3], [4.8, 1.9], [2.3, 0]]
complex_matrix[2, 3] d"d" : [[[1, 1], [2, 2], [3, 3]], [4, 4], [5, 5], [6, 6]]]
tuple(real, array[2] int) t"t" : { "1": 1.4, "2": [1, 2]}
+
+

Empty arrays in JSON

+

JSON notation is not able to distinguish between multi-dimensional arrays where any dimension is \(0\), e.g., a 2-D array with dimensions \((1,0)\), i.e., an array which contains a single array which is empty, has JSON representation [ ]. To see how this works, consider the following Stan program data block:

+
data {
+  int d;
+  array[d] int ar_1d;
+  array[d, d] int ar_2d;
+  array[d, d, d] int ar_3d;
+}
+

In the case where variable d is 1, all arrays will contain a single value. If array variable ar_d1 contains value 7, 2-D array variable ar_d2 contains (an array which contains) value 8, and 3-D array variable ar_d3 contains (an array which contains an array which contains) value 9, the JSON representation is:

+
{ "ar_d1" : [7],
+  "ar_d2" : [[8]],
+  "ar_d3" : [[[9]]]
+}
+

However, in the case where variable d is 0, ar_d1 is empty, i.e., it contains no values, as is ar_d2, ar_d3, and the JSON representation is

+
{ "d" : 0,
+  "ar_d1" : [ ],
+  "ar_d2" : [ ],
+  "ar_d3" : [ ]
+}
+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/laplace_sample_config.html b/docs/2_39/cmdstan-guide/laplace_sample_config.html new file mode 100644 index 000000000..65f2b4133 --- /dev/null +++ b/docs/2_39/cmdstan-guide/laplace_sample_config.html @@ -0,0 +1,1134 @@ + + + + + + + + + +Laplace sampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Laplace sampling

+

The laplace method produces a sample from a normal approximation centered at the mode of a distribution in the unconstrained space. If the mode is a maximum a posteriori (MAP) estimate, the sample provides an estimate of the mean and standard deviation of the posterior distribution. If the mode is a maximum likelihood estimate (MLE), the sample provides an estimate of the standard error of the likelihood. In general, the posterior mode in the unconstrained space doesn’t correspond to the mean (nor mode) in the constrained space, and thus the sample is needed to infer the mean as well as the standard deviation. (See this case study for a visual illustration.)

+

This is computationally inexpensive compared to exact Bayesian inference with MCMC. The goodness of this estimate depends on both the estimate of the mode and how much the true posterior in the unconstrained space resembles a Gaussian.

+
+

Configuration

+

This method takes several arguments:

+
    +
  • mode - Input file of parameters values on the constrained scale. When Stan’s optimize method is used to estimate the modal values, the value of boolean argument jacobian should be false if optimize was run with default settings, i.e., the input is the MLE estimate; if optimize was run with argument jacobian=true, then the laplace method default setting, jacobian=true, should be used.

  • +
  • jacobian - Whether or not the Jacobian adjustment should be included in the gradient. The default value is true (include adjustment). (Note: in optimization, the default value is false, for historical reasons.)

  • +
  • draws - How many total draws to return. The default is \(1000\).

  • +
  • calculate_lp - Whether to calculate the log probability of the model at each draw. If this is false, the log_p__ column of the output will be entirely nan. The default value is true.

  • +
+
+
+

CSV output

+

The output file consists of the following pieces of information:

+
    +
  • The full set of configuration options available for the laplace method is reported at the beginning of the output file as CSV comments.

  • +
  • Output columns log_p__ and log_q__, the unnormalized log density and the unnormalized density of the Laplace approximation, respectively. These can be used for diagnostics and importance sampling.

  • +
  • Output columns for all model parameters on the constrained scale.

  • +
+
+
+

Diagnostic file outputs

+

If requested with output diagnostic_file=, a JSON file will be created which contains the log density, the gradient, and the Hessian of the log density evaluated at the mode.

+
+
+

Example

+

To get an approximate estimate of the mode and standard deviation of the example Bernoulli model given the example dataset:

+
    +
  • find the MAP estimate by running optimization with argument jacobian=true

  • +
  • run the Laplace estimator using the MAP estimate as the mode argument.

  • +
+

Because the default output file name from all methods is output.csv, a more informative name is used for the output of optimization. We run the commands from the CmdStan home directory. This results in a sample with mean 2.7 and standard deviation 0.12. In comparison, running the NUTS-HMC sampler results in mean 2.6 and standard deviation 0.12.

+
./examples/bernoulli/bernoulli optimize jacobian=1 \
+  data file=examples/bernoulli/bernoulli.data.json \
+  output file=bernoulli_optimize_lbfgs.csv random seed=1234
+
+
+./examples/bernoulli/bernoulli laplace mode=bernoulli_optimize_lbfgs.csv \
+ data file=examples/bernoulli/bernoulli.data.json random seed=1234
+

The header and first few data rows of the output sample are shown below.

+
# method = laplace
+#   laplace
+#     mode = bernoulli_lbfgs.csv
+#     jacobian = true (Default)
+#     draws = 1000 (Default)
+#     calculate_lp = true (default)
+# id = 1 (Default)
+# data
+#   file = examples/bernoulli/bernoulli.data.json
+# init = 2 (Default)
+# random
+#   seed = 875960551 (Default)
+# output
+#   file = output.csv (Default)
+#   diagnostic_file =  (Default)
+#   refresh = 100 (Default)
+#   sig_figs = 8 (Default)
+#   profile_file = profile.csv (Default)
+# num_threads = 1 (Default)
+log_p__,log_q__,theta
+-9.4562,-2.33997,0.0498545
+-6.9144,-0.0117349,0.182898
+-7.18171,-0.746034,0.376428
+...
+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/log_prob_config.html b/docs/2_39/cmdstan-guide/log_prob_config.html new file mode 100644 index 000000000..b60d63e64 --- /dev/null +++ b/docs/2_39/cmdstan-guide/log_prob_config.html @@ -0,0 +1,1121 @@ + + + + + + + + + +Extracting log probabilities and gradients for diagnostics + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Extracting log probabilities and gradients for diagnostics

+

CmdStan can return the computed log probability and the gradient with respect to a set of parameters.

+

This is similar to the diagnose subcommand, but the output format differs and the results here are not compared with those from finite differences.

+

Note: Startup and data initialization costs mean that this method is not an efficient way to calculate these quantities. It is provided only for convenience and should not be used for serious computation.

+
+

Configuration

+

This method takes 3 arguments:

+
    +
  • jacobian - Whether or not the Jacobian adjustment for constrained parameters should be included in the gradient. Default value is true (include adjustment).

  • +
  • constrained_params - Input file of parameters values on the constrained scale. A single set of constrained parameters can be specified using JSON format. Alternatively, the input file can be set of draws in StanCSV format.

  • +
  • unconstrained_params - Input file (JSON or R dump) of parameter values on unconstrained scale. These files should contain a single variable, called params_r, which is a flattened vector of all unconstrained parameters. If this object is two dimensional, each entry should be a vector of the same form and the output will feature multiple rows.

  • +
+

Only one of constrained_params and unconstrained_params can be specified.

+

For more on the differences between constrained and unconstrained parameters, see the Stan reference manual section on variable transforms.

+
+
+

CSV output

+

The output file consists of the following pieces of information:

+
    +
  • The full set of configuration options available for the log_prob method is reported at the beginning of the output file as CSV comments.

  • +
  • Column headers, the first column is labelled lp__, and the rest are named after parameters. These will be the unconstrained parameters, regardless of whether constrained or unconstrained parameters were supplied as input.

  • +
  • Values which correspond to the value of the log density (column 1) and the gradient with respect to each parameter (remaining columns).

  • +
+

For example, if we have a file called params.json:

+
{
+    "theta" : 0.1
+}
+

We can run the example model:

+
/bernoulli log_prob constrained_params=params.json data file=bernoulli.data.json
+

This yields

+
# method = log_prob
+#   log_prob
+#     unconstrained_params =  (Default)
+#     constrained_params = params.json
+#     jacobian = true (Default)
+# id = 1 (Default)
+# data
+#   file = bernoulli.data.json
+# init = 2 (Default)
+# random
+#   seed = 2390820139 (Default)
+# output
+#   file = output.csv (Default)
+#   diagnostic_file =  (Default)
+#   refresh = 100 (Default)
+#   sig_figs = 8 (Default)
+#   profile_file = profile.csv (Default)
+# num_threads = 1 (Default)
+lp_,theta
+-7.856,1.8
+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/mcmc_config.html b/docs/2_39/cmdstan-guide/mcmc_config.html new file mode 100644 index 000000000..d418b1d8a --- /dev/null +++ b/docs/2_39/cmdstan-guide/mcmc_config.html @@ -0,0 +1,1597 @@ + + + + + + + + + +MCMC Sampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

MCMC Sampling using Hamiltonian Monte Carlo

+

The sample method provides Bayesian inference over the model conditioned on data using Hamiltonian Monte Carlo (HMC) sampling. By default, the inference engine used is the No-U-Turn sampler (NUTS), an adaptive form of Hamiltonian Monte Carlo sampling. For details on HMC and NUTS, see the Stan Reference Manual chapter on MCMC Sampling.

+
+

Running the sampler

+

To generate a sample from the posterior distribution of the model conditioned on the data, we run the executable program with the argument sample or method=sample together with the input data. The executable can be run from any directory.

+

The full set of configuration options available for the sample method is available by using the sample help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the sampler console output and in the output CSV file’s comments.

+

Here, we run it in the directory which contains the Stan program and input data, <cmdstan-home>/examples/bernoulli:

+
> cd examples/bernoulli
+> ls
+  bernoulli  bernoulli.data.json  bernoulli.data.R  bernoulli.stan
+

To execute sampling of the model under Linux or Mac, use:

+
> ./bernoulli sample data file=bernoulli.data.json
+

In Windows, the ./ prefix is not needed:

+
> bernoulli.exe sample data file=bernoulli.data.json
+

The output is the same across all supported platforms. First, the configuration of the program is echoed to the standard output:

+
method = sample (Default)
+  sample
+    num_samples = 1000 (Default)
+    num_warmup = 1000 (Default)
+    save_warmup = false (Default)
+    thin = 1 (Default)
+    adapt
+      engaged = true (Default)
+      gamma = 0.050000000000000003 (Default)
+      delta = 0.80000000000000004 (Default)
+      kappa = 0.75 (Default)
+      t0 = 10 (Default)
+      init_buffer = 75 (Default)
+      term_buffer = 50 (Default)
+      window = 25 (Default)
+      save_metric = false (Default)
+    algorithm = hmc (Default)
+      hmc
+        engine = nuts (Default)
+          nuts
+            max_depth = 10 (Default)
+        metric = diag_e (Default)
+        metric_file =  (Default)
+        stepsize = 1 (Default)
+        stepsize_jitter = 0 (Default)
+    num_chains = 1 (Default)
+id = 0 (Default)
+data
+  file = bernoulli.data.json
+init = 2 (Default)
+random
+  seed = 3252652196 (Default)
+output
+  file = output.csv (Default)
+  diagnostic_file =  (Default)
+  refresh = 100 (Default)
+

After the configuration has been displayed, a short timing message is given.

+
Gradient evaluation took 1.2e-05 seconds
+1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
+Adjust your expectations accordingly!
+

Next, the sampler reports the iteration number, reporting the percentage complete.

+
Iteration:    1 / 2000 [  0%]  (Warmup)
+...
+Iteration: 2000 / 2000 [100%]  (Sampling)
+

Finally, the sampler reports timing information:

+
 Elapsed Time: 0.007 seconds (Warm-up)
+               0.017 seconds (Sampling)
+               0.024 seconds (Total)
+
+
+

Stan CSV output file

+

Each execution of the model results in draws from a single Markov chain being written to a file in comma-separated value (CSV) format. The default name of the output file is output.csv.

+

The first part of the output file records the version of the underlying Stan library and the configuration as comments (i.e., lines beginning with the pound sign (#)).

+

When the example model bernoulli.stan is run via the command line with all default arguments, the following configuration is displayed:

+
# stan_version_major = 2
+# stan_version_minor = 23
+# stan_version_patch = 0
+# model = bernoulli_model
+# method = sample (Default)
+#   sample
+#     num_samples = 1000 (Default)
+#     num_warmup = 1000 (Default)
+#     save_warmup = false (Default)
+#     thin = 1 (Default)
+#     adapt
+#       engaged = 1 (Default)
+#       gamma = 0.050000 (Default)
+#       delta = 0.800000 (Default)
+#       kappa = 0.750000 (Default)
+#       t0 = 10.000000 (Default)
+#       init_buffer = 75 (Default)
+#       term_buffer = 50 (Default)
+#       window = 25 (Default)
+#       save_metric = false (Default)
+#     algorithm = hmc (Default)
+#       hmc
+#         engine = nuts (Default)
+#           nuts
+#             max_depth = 10 (Default)
+#         metric = diag_e (Default)
+#         metric_file =  (Default)
+#         stepsize = 1.000000 (Default)
+#         stepsize_jitter = 0.000000 (Default)
+#     num_chains = 1 (Default)
+# output
+#   file = output.csv (Default)
+#   diagnostic_file =  (Default)
+#   refresh = 100 (Default)
+

This is followed by a CSV header indicating the names of the values sampled.

+
lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta
+

The first output columns report the HMC sampler information:

+
    +
  • lp__ - the total log probability density (up to an additive constant) at each sample
  • +
  • accept_stat__ - the average Metropolis acceptance probability over each simulated Hamiltonian trajectory
  • +
  • stepsize__ - integrator step size
  • +
  • treedepth__ - depth of tree used by NUTS (NUTS sampler)
  • +
  • n_leapfrog__ - number of leapfrog calculations (NUTS sampler)
  • +
  • divergent__ - has value 1 if trajectory diverged, otherwise 0. (NUTS sampler)
  • +
  • energy__ - value of the Hamiltonian
  • +
  • int_time__ - total integration time (static HMC sampler)
  • +
+

Because the above header is from the NUTS sampler, it has columns treedepth__, n_leapfrog__, and divergent__ and doesn’t have column int_time__. The remaining columns correspond to model parameters. For the Bernoulli model, it is just the final column, theta.

+

The header line is written to the output file before warmup begins. If option save_warmup is set to true, the warmup draws are output directly after the header. The total number of warmup draws saved is num_warmup divided by thin, rounded up (i.e., ceiling).

+

Following the warmup draws (if any), are comments which record the results of adaptation: the stepsize, and inverse mass metric used during sampling:

+
# Adaptation terminated
+# Step size = 0.884484
+# Diagonal elements of inverse mass matrix:
+# 0.535006
+

The default sampler is NUTS with an adapted step size and a diagonal inverse mass matrix. For this example, the step size is 0.884484, and the inverse mass contains the single entry 0.535006 corresponding to the parameter theta.

+

Draws from the posterior distribution are printed out next, each line containing a single draw with the columns corresponding to the header.

+
-6.84097,0.974135,0.884484,1,3,0,6.89299,0.198853
+-6.91767,0.985167,0.884484,1,1,0,6.92236,0.182295
+-7.04879,0.976609,0.884484,1,1,0,7.05641,0.162299
+-6.88712,1,0.884484,1,1,0,7.02101,0.188229
+-7.22917,0.899446,0.884484,1,3,0,7.73663,0.383596
+...
+

The output ends with timing details:

+
#  Elapsed Time: 0.007 seconds (Warm-up)
+#                0.017 seconds (Sampling)
+#                0.024 seconds (Total)
+
+
+

Iterations

+

At every sampler iteration, the sampler returns a set of estimates for all parameters and quantities of interest in the model. During warmup, the NUTS algorithm adjusts the HMC algorithm parameters metric and stepsize in order to efficiently sample from typical set, the neighborhood substantial posterior probability mass through which the Markov chain will travel in equilibrium. After warmup, the fixed metric and stepsize are used to produce a set of draws.

+

The following keyword-value arguments control the total number of iterations:

+
    +
  • num_samples
  • +
  • num_warmup
  • +
  • save_warmup
  • +
  • thin
  • +
+

The values for arguments num_samples and num_warmup must be a non-negative integer. The default value for both is \(1000\).

+

For well-specified models and data, the sampler may converge faster and this many warmup iterations may be overkill. Conversely, complex models which have difficult posterior geometries may require more warmup iterations in order to arrive at good values for the step size and metric.

+

The number of sampling iterations to runs depends on the effective sample size (EFF) reported for each parameter and the desired precision of your estimates. An EFF of at least 100 is required to make a viable estimate. The precision of your estimate is \(\sqrt{N}\); therefore every additional decimal place of accuracy increases this by a factor of 10.

+

Argument save_warmup takes values false or true. The default value is false, i.e., warmup draws are not saved to the output file. When the value is true, the warmup draws are written to the CSV output file directly after the CSV header line.

+

Argument thin controls the number of draws from the posterior written to the output file. Some users familiar with older approaches to MCMC sampling might be used to thinning to eliminate an expected autocorrelation in the draws. HMC is not nearly as susceptible to this autocorrelation problem and thus thinning is generally not required nor advised, as HMC can produce anticorrelated draws, which increase the effective sample size beyond the number of draws from the posterior. Thinning should only be used in circumstances where storage of the draws is limited and/or RAM for later processing the draws is limited.

+

The value of argument thin must be a positive integer. When thin is set to value \(N\), every \(N^{th}\) iteration is written to the output file. Should the value of thin exceed the specified number of iterations, the first iteration is saved to the output. This is because the iteration counter starts from zero and whenever the counter modulo the value of thin equals zero, the iteration is saved to the output file. Since zero modulo any positive integer is zero, the first iteration is always saved. When num_sampling=M and thin=N, the number of iterations written to the output CSV file will be ceiling(M/N). If save_warmup=true, thinning is applied to the warmup iterations as well.

+
+
+

Adaptation

+

The adapt keyword is used to specify non-default options for the sampler adaptation schedule and settings.

+

Adaptation can be turned off by setting sub-argument engaged to value false. If engaged=false, no adaptation will be done, and all other adaptation sub-arguments will be ignored. Since the default argument is engaged=1, this keyword-value pair can be omitted from the command.

+

There are two sets of adaptation sub-arguments: step size optimization parameters and the warmup schedule. These are described in detail in the Reference Manual section Automatic Parameter Tuning.

+

The boolean sub-argument save_metric was added in Stan version 2.34. When save_metric=true, the adapted stepsize and metric are output as JSON at the end of adaptation. The saved metric file name is the output file basename with the suffix _metric.json, e.g., if using the default output filename output.csv, the saved metric file will be output_metric.json. This metric file can be reused in subsequent sampler runs as the initial metric, via sampler argument metric_file.

+
+

Step size optimization configuration

+

The Stan User’s Guide section on model conditioning and curvature provides a discussion of adaptation and stepsize issues. The Stan Reference Manual section on HMC algorithm parameters explains the NUTS-HMC adaptation schedule and the tuning parameters for setting the step size.

+

The following keyword-value arguments control the settings used to optimize the step size:

+
    +
  • delta - The target Metropolis acceptance rate. The default value is \(0.8\). Its value must be strictly between \(0\) and \(1\). Increasing the default value forces the algorithm to use smaller step sizes. This can improve sampling efficiency (effective sample size per iteration) at the cost of increased iteration times. Raising the value of delta will also allow some models that would otherwise get stuck to overcome their blockages.
    Models with difficult posterior geometries may required increasing the delta argument closer to \(1\); we recommend first trying to raise it to \(0.9\) or at most \(0.95\). Values about \(0.95\) are strong indication of bad geometry; the better solution is to change the model geometry through reparameterization which could yield both more efficient and faster sampling.

  • +
  • gamma - Adaptation regularization scale. Must be a positive real number, default value is \(0.05\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.

  • +
  • kappa - Adaptation relaxation exponent. Must be a positive real number, default value is \(0.75\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.

  • +
  • t_0 - Adaptation iteration offset. Must be a positive real number, default value is \(10\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.

  • +
+
+
+

Warmup schedule configuration

+

When adaptation is engaged, the warmup schedule is specified by sub-arguments, all of which take positive integers as values:

+
    +
  • init_buffer - The number of iterations spent tuning the step size at the outset of adaptation.
  • +
  • window - The initial number of iterations devoted to tune the metric, will be doubled successively.
  • +
  • term_buffer - The number of iterations used to re-tune the step size once the metric has been tuned.
  • +
+

The specified values may be modified slightly in order to ensure alignment between the warmup schedule and total number of warmup iterations.

+

The following figure is taken from the Stan Reference Manual, where label “I” correspond to init_buffer, the initial “II” corresponds to window, and the final “III” corresponds to term_buffer:

+

Warmup Epochs Figure. Adaptation during warmup occurs in three stages: an initial fast adaptation interval (I), a series of expanding slow adaptation intervals (II), and a final fast adaptation interval (III). For HMC, both the fast and slow intervals are used for adapting the step size, while the slow intervals are used for learning the (co)variance necessitated by the metric. Iteration numbering starts at 1 on the left side of the figure and increases to the right.

+

+
+
+
+

Algorithm

+

The algorithm keyword-value pair specifies the algorithm used to generate the sample. There are two possible values: hmc, which generates from an HMC-driven Markov chain; and fixed_param which generates a new sample without changing the state of the Markov chain. The default argument is algorithm=hmc.

+
+

Samples from a set of fixed parameters

+

If a model doesn’t specify any parameters, then argument algorithm=fixed_param is mandatory.

+

The fixed parameter sampler generates a new sample without changing the current state of the Markov chain. This can be used to write models which generate pseudo-data via calls to RNG functions in the transformed data and generated quantities blocks.

+
+
+

HMC samplers

+

All HMC algorithms have three parameters:

+
    +
  • step size
  • +
  • metric
  • +
  • integration time - the number of steps taken along the Hamiltonian trajectory
  • +
+

See the Stan Reference Manual section on HMC algorithm parameters for further details.

+
+

Step size

+

The HMC algorithm simulates the evolution of a Hamiltonian system. The step size parameter controls the resolution of the sampler. Low step sizes can get HMC samplers unstuck that would otherwise get stuck with higher step sizes.

+

The following keyword-value arguments control the step size:

+
    +
  • stepsize - How far to move each time the Hamiltonian system evolves forward. Must be a positive real number, default value is \(1\).

  • +
  • stepsize_jitter - Allows step size to be “jittered” randomly during sampling to avoid any poor interactions with a fixed step size and regions of high curvature. Must be a real value between \(0\) and \(1\). The default value is \(0\). Setting stepsize_jitter to \(1\) causes step sizes to be selected in the range of \(0\) to twice the adapted step size. Jittering below the adapted value will increase the number of steps required and will slow down sampling, while jittering above the adapted value can cause premature rejection due to simulation error in the Hamiltonian dynamics calculation. We strongly recommend always using the default value.

  • +
+
+
+

Metric

+

All HMC implementations in Stan utilize quadratic kinetic energy functions which are specified up to the choice of a symmetric, positive-definite matrix known as a mass matrix or, more formally, a metric Betancourt (2017).

+

The metric argument specifies the choice of Euclidean HMC implementations:

+
    +
  • metric=unit specifies unit metric (diagonal matrix of ones).
  • +
  • metric=diag_e specifies a diagonal metric (diagonal matrix with positive diagonal entries). This is the default value.
  • +
  • metric=dense_e specifies a dense metric (a dense, symmetric positive definite matrix).
  • +
+

By default, the metric is estimated during warmup. However, when metric=diag_e or metric=dense_e, an initial guess for the metric can be specified with the metric_file argument whose value is the filepath to a JSON or Rdump file which contains a single variable inv_metric. For a diag_e metric the inv_metric value must be a vector of positive values, one for each parameter in the system. For a dense_e metric, inv_metric value must be a positive-definite square matrix with number of rows and columns equal to the number of parameters in the model.

+

The metric_file option can be used with and without adaptation enabled. If adaptation is enabled, the provided metric will be used as the initial guess in the adaptation process. If the initial guess is good, then adaptation should not change it much. If the metric is no good, then the adaptation will override the initial guess.

+

If adaptation is disabled, both the metric_file and stepsize arguments should be specified.

+
+
+

Integration time

+

The total integration time is determined by the argument engine which take possible values:

+
    +
  • nuts - the No-U-Turn Sampler which dynamically determines the optimal integration time.
  • +
  • static - an HMC sampler which uses a user-specified integration time.
  • +
+

The default argument is engine=nuts.

+

The NUTS sampler generates a proposal by starting at an initial position determined by the parameters drawn in the last iteration. It then evolves the initial system both forwards and backwards in time to form a balanced binary tree. The algorithm is iterative; at each iteration the tree depth is increased by one, doubling the number of leapfrog steps thus effectively doubling the computation time. The algorithm terminates in one of two ways: either the NUTS criterion (i.e., a U-turn in Euclidean space on a subtree) is satisfied for a new subtree or the completed tree; or the depth of the completed tree hits the maximum depth allowed.

+

When engine=nuts, the subargument max_depth can be used to control the depth of the tree. The default argument is max_depth=10. In the case where a model has a difficult posterior from which to sample, max_depth should be increased to ensure that that the NUTS tree can grow as large as necessary.

+

When the argument engine=static is specified, the user must specify the integration time via keyword int_time which takes as a value a positive number. The default value is \(2\pi\).

+
+
+
+
+

Sampler diagnostic file

+

The output keyword sub-argument diagnostic_file=<filepath> specifies the location of the auxiliary output file which contains sampler information for each draw, and the gradients on the unconstrained scale and log probabilities for all parameters in the model. By default, no auxiliary output file is produced.

+
+
+

Running multiple chains

+

A Markov chain generates draws from the target distribution only after it has converged to equilibrium. In theory, convergence is only guaranteed asymptotically as the number of draws grows without bound. In practice, diagnostics must be applied to monitor convergence for the finite number of draws actually available. One way to monitor whether a chain has approximately converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. For robust diagnostics, we recommend running 4 chains.

+

The preferred way of using multiple chains is to run them all from the same executable using the num_chains argument. There is also the option to use the Unix or DOS shell to run multiple executables.

+
+

Using the num_chains argument to run multiple chains

+

The num_chains argument can be used for all of Stan’s samplers with the exception of the static HMC engine. This will run multiple chains of MCMC from the same executable, which can save on memory usage due to only needing one copy of the model and data. Depending on whether the model was compiled with STAN_THREADS=true, these will either run in parallel or one after the other.

+

When num_chains is greather than 1 (the default), arguments related to filenames (e.g. output file=, init=) can accept a comma separated list of values, one per each chain.

+

For example, sample will specify the names of the three chain’s output files.

+
./bernoulli sample num_chains=3 data file=bernoulli.data.json output file=output_1.csv,output_2.csv,output_3.csv
+

This will write the output in output_1.csv, output_2.csv, output_3.csv.

+

If the model was not compiled with STAN_THREADS=true, the above command will run 3 chains sequentially.

+

If the model was compiled with STAN_THREADS=true, the chains can run in parallel, with the num_threads argument defining the maximum number of threads used to run the chains. If the model uses no within-chain parallelization (map_rect or reduce_sum calls), the below command will run 3 chains in parallel, provided there are cores available:

+
./bernoulli sample num_chains=4 data file=bernoulli.data.json num_threads=4
+

If the model uses within-chain parallelization (map_rect or reduce_sum calls), the threads are automatically scheduled to run the parallel parts of a single chain or run the sequential parts of another chains. The below call starts 4 chains that can use 16 threads. At a given moment a single chain may use all 16 threads, 1 thread, anything in between, or can wait for a thread to be available. The scheduling is left to the Threading Building Blocks scheduler.

+
./bernoulli_par sample num_chains=4 data file=bernoulli.data.json num_threads=16
+
+

Legacy filename behavior

+

If a comma separated list is not used, the num_chains argument changes the normal meanings of filename arguments when it is greater than 1 (the default). They are now interpreted as a “template” which is used for each chain.

+

For example, when num_chains=2, the argument output file=foo.csv no longer produces a file foo.csv, but instead produces two files, foo_1.csv and foo_2.csv. If you also supply id=5, the files produced will be foo_5.csv and foo_6.csvid=5 gives the id of the first chain, and the remaining chains are sequential from there.

+

This also applies to input files, like those used for initialization. For example, if num_chains=3 and init=bar.json will first look for bar_1.json. If it exists, it will use bar_1.json for the first chain, bar_2.json for the second, and so on. If bar_1.json does not exist, it falls back to looking for bar.json, and if it exists, uses the same initial values for each chain. The numbers in these filenames are also based on the id argument, which defaults to 1.

+

For example, this shorthand is equivalent to the example given above:

+
./bernoulli sample num_chains=3 data file=bernoulli.data.json output file=output.csv
+

A suffix with the chain id is appended to the provided output filename (output.csv in the above command), so this will also produce files output_1.csv, output_2.csv, output_3.csv.

+
+
+
+
+

Summarizing sampler output(s) with stansummary

+

The stansummary utility processes one or more output files from a run or set of runs of Stan’s HMC sampler given a model and data. For all columns in the Stan CSV output file stansummary reports a set of statistics including mean, standard deviation, percentiles, effective sample size, and \(\hat{R}\) values.

+

To run stansummary on the output files generated by the for loop above, by the above run of the bernoulli model on Mac or Linux:

+
<cmdstan-home>/bin/stansummary output_*.csv
+

On Windows, use backslashes to call the stansummary.exe.

+
<cmdstan-home>\bin\stansummary.exe output_*.csv
+

The stansummary output consists of one row of statistics per column in the Stan CSV output file. Therefore, the first rows in the stansummary report statistics over the sampler state. The final row of output summarizes the estimates of the model variable theta:

+
Inference for Stan model: bernoulli_model
+4 chains: each with iter=1000; warmup=1000; thin=1; 1000 iterations saved.
+
+Warmup took (0.0060, 0.0040, 0.0050, 0.0050) seconds, 0.020 seconds total
+Sampling took (0.0080, 0.010, 0.010, 0.010) seconds, 0.038 seconds total
+
+                Mean     MCSE  StdDev    MAD     5%   50%   95%  ESS_bulk  ESS_tail  ESS_bulk/s  R_hat
+
+lp__            -7.3  1.9e-02    0.72   0.34   -8.7  -7.0  -6.8      1731      1610       45546    1.0
+accept_stat__   0.93  2.7e-03    0.12  0.041   0.68  0.97   1.0      5078      3437     1.3e+05    1.0
+stepsize__      0.90      nan    0.10  0.046   0.82  0.86   1.1       nan       nan         nan    nan
+treedepth__      1.4  9.3e-03    0.51   0.00    1.0   1.0   2.0      3167      3441     8.3e+04    1.0
+n_leapfrog__     2.7  1.7e-01     1.6   0.00    1.0   3.0   7.0       494      2000     1.3e+04    1.0
+divergent__     0.00      nan    0.00   0.00   0.00  0.00  0.00       nan       nan         nan    nan
+energy__         7.8  2.6e-02     1.0   0.70    6.8   7.4   9.8      1598      2069     4.2e+04    1.0
+
+theta           0.26  2.9e-03    0.12   0.12  0.084  0.24  0.47      1658      1490       43629    1.0
+
+Samples were drawn using hmc with nuts.
+For each parameter, ESS_bulk and ESS_tail measure the effective sample size for the entire sample (bulk)
+and for the .05 and .95 tails (tail), and R_hat measures the potential scale reduction on split chains.
+At convergence R_hat will be very close to 1.00.
+

In this example, we conditioned the model on data consisting of the outcomes of 10 bernoulli trials, where only 2 trials reported success. The 5%, 50%, and 95% percentile values for theta reflect the uncertainty in our estimate, due to the small amount of data, given the prior of beta(1, 1)

+
+
+

Examples - older parallelism

+

Note: Many of these examples can be simplified by using the num_chains argument.

+

When the num_chains argument is not available or is undesirable for whatever reason, built-in tools in the system shell can be used.

+

To run multiple chains given a model and data, either sequentially or in parallel, we can also use the Unix or DOS shell for loop to set up index variables needed to identify each chain and its outputs.

+

On MacOS or Linux, the for-loop syntax for both the bash and zsh interpreters is:

+
for NAME [in LIST]; do COMMANDS; done
+

The list can be a simple sequence of numbers, or you can use the shell expansion syntax {1..N} which expands to the sequence from \(1\) to \(N\), e.g. {1..4} expands to 1 2 3 4. Note that the expression {1..N} cannot contain spaces.

+

To run 4 chains for the example bernoulli model on MacOS or Linux:

+
> for i in {1..4}
+    do
+      ./bernoulli sample data file=bernoulli.data.json \
+      output file=output_${i}.csv
+    done
+

The backslash (\) indicates a line continuation in Unix. The expression ${i} substitutes in the value of loop index variable i. To run chains in parallel, put an ampersand (&) at the end of the nested sampler command:

+
> for i in {1..4}
+    do
+      ./bernoulli sample data file=bernoulli.data.json \
+      output file=output_${i}.csv &
+    done
+

This pushes each process into the background which allows the loop to continue without waiting for the current chain to finish.

+

On Windows, the DOS for-loop syntax is one of:

+
for %i in (SET) do COMMAND COMMAND-ARGUMENTS
+for /l %i in (START, STEP, END) do COMMAND COMMAND-ARGUMENTS
+

To run 4 chains in parallel on Windows:

+
>for /l %i in (1, 1, 4) do start /b bernoulli.exe sample ^
+                                    data file=bernoulli.data.json my_data ^
+                                    output file=output_%i.csv
+

The caret (^) indicates a line continuation in DOS. The expression %i is the loop index.

+

In the following extended examples, we focus on just the nested sampler command for Unix.

+
+

Running multiple chains with a specified RNG seed

+

For reproducibility, we specify the same RNG seed across all chains and use the chain id argument to specify the RNG offset.

+

The RNG seed is specified by random seed=<int> and the offset is specified by id=<loop index>, so the call to the sampler is:

+
./my_model sample data file=my_model.data.json \
+            output file=output_${i}.csv \
+            random seed=12345 id=${i}
+
+
+

Changing the default warmup and sampling iterations

+

The warmup and sampling iteration keyword-value arguments must follow the sample keyword. The call to the sampler which overrides the default warmup and sampling iterations is:

+
./my_model sample num_warmup=500 num_sampling=500 \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+
+
+

Saving warmup draws

+

To save warmup draws as part of the Stan CSV output file, use the keyword-value argument save_warmup=true. This must be grouped with the other sample keyword sub-arguments.

+
./my_model sample num_warmup=500 num_sampling=500 save_warmup=true \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+
+
+

Initializing parameters

+

By default, all parameters are initialized on an unconstrained scale to random draws from a uniform distribution over the range \([{-2}, 2]\). To initialize some or all parameters to good starting points on the constrained scale from a data file in JSON or Rdump format, use the keyword-value argument init=<filepath>:

+
./my_model sample init=my_param_inits.json data file=my_model.data.json \
+           output file=output_${i}.csv
+

To verify that the specified values will be used by the sampler, you can run the sampler with option algorithm=fixed_param, so that the initial values are used to generate the sample. Since this generates a set of identical draws, setting num_warmp=0 and num_samples=1 saves unnecessary iterations. As the output values are also on the constrained scale, the set of reported values will match the set of specified initial values.

+

For example, if we run the example Bernoulli model with specified initial value for parameter “theta”:

+
{ "theta" : 0.5 }
+

via command:

+
./bernoulli sample algorithm=fixed_param num_warmup=0 num_samples=1 \
+            init=bernoulli.init.json data file=bernoulli.data.json
+

The resulting output CSV file contains a single draw:

+
lp__,accept_stat__,theta
+0,0,0.5
+#
+#  Elapsed Time: 0 seconds (Warm-up)
+#                0 seconds (Sampling)
+#                0 seconds (Total)
+#
+
+
+

Specifying the metric and stepsize

+

An initial estimate for the metric can be specified with the metric_file argument whose value is the filepath to a JSON or Rdump file which contains a variable inv_metric. The metric_file option can be used with and without adaptation enabled.

+

By default, the metric is estimated during warmup adaptation. If the initial guess is good, then adaptation should not change it much. If the metric is no good, then the adaptation will override the initial guess. For example, the JSON file bernoulli.diag_e.json, contents

+
{ "inv_metric" : [0.296291] }
+

can be used as the initial metric as follows:

+
../my_model sample algorithm=hmc metric_file=bernoulli.diag_e.json \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+

If adaptation is disabled, both the metric_file and stepsize arguments should be specified.

+
../my_model sample adapt engaged=false \
+            algorithm=hmc stepsize=0.9 \
+            metric_file=bernoulli.diag_e.json \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+

The resulting output CSV file will contain the following set of comment lines:

+
# Adaptation terminated
+# Step size = 0.9
+# Diagonal elements of inverse mass matrix:
+# 0.296291
+

As of Stan versione 2.34, the adapted metric can be saved in JSON format, via sub-argument save_metric, described above. This allows for no or minimal adaptation starting from this file. It is still necessary to specify the stepsize argument as well as the metric_file arguments; the former is the value of the stepsize element in the saved metric file, and the later is the metric file path.

+
+
+

Changing the NUTS-HMC adaptation parameters

+

The keyword-value arguments for these settings are grouped together under the adapt keyword which itself is a sub-argument of the sample keyword.

+

Models with difficult posterior geometries may required increasing the delta argument closer to \(1\).

+
./my_model sample adapt delta=0.95 \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+

To skip adaptation altogether, use the keyword-value argument engaged=false. Disabling adaptation disables both metric and stepsize adaptation, so a stepsize should be provided along with a metric to enable efficient sampling.

+
../my_model sample adapt engaged=false \
+            algorithm=hmc stepsize=0.9 \
+            metric_file=bernoulli.diag_e.json \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+

Even with adaptation disabled, it is still advisable to run warmup iterations in order to allow the initial parameter values to be adjusted to estimates which fall within the typical set.

+

To skip warmup altogether requires specifying both num_warmup=0 and adapt engaged=false.

+
../my_model sample num_warmup=0 adapt engaged=false \
+            algorithm=hmc stepsize=0.9 \
+            metric_file=bernoulli.diag_e.json \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+
+
+

Increasing the tree-depth

+

Models with difficult posterior geometries may required increasing the max_depth argument from its default value \(10\). This requires specifying a series of keyword-argument pairs:

+
./my_model sample adapt delta=0.95 \
+            algorithm=hmc engine=nuts max_depth=15 \
+            data file=my_model.data.json \
+            output file=output_${i}.csv
+
+
+

Capturing Hamiltonian diagnostics and gradients

+

The output keyword sub-argument diagnostic_file=<filepath> write the sampler parameters and gradients of all model parameters for each draw to a CSV file:

+
./my_model sample data file=my_model.data.json \
+            output file=output_${i}.csv \
+            diagnostic_file=diagnostics_${i}.csv
+
+
+

Suppressing progress updates to the console

+

The output keyword sub-argument refresh=<int> specifies the number of iterations between progress messages written to the terminal window. The default value is \(100\) iterations. The progress updates look like:

+
Iteration:    1 / 2000 [  0%]  (Warmup)
+Iteration:  100 / 2000 [  5%]  (Warmup)
+Iteration:  200 / 2000 [ 10%]  (Warmup)
+Iteration:  300 / 2000 [ 15%]  (Warmup)
+

For simple models which fit quickly, such updates can be annoying; to suppress them altogether, set refresh=0. This only turns off the Iteration: messages; the configuration and timing information are still written to the terminal.

+
./my_model sample data file=my_model.data.json \
+            output file=output_${i}.csv \
+            refresh=0
+

For complicated models which take a long time to fit, setting the refresh rate to a low number, e.g. \(10\) or even \(1\), provides a way to more closely monitor the sampler.

+
+
+

Everything example

+

The CmdStan argument parser requires keeping sampler config sub-arguments together; interleaving sampler config with the inputs, outputs, inits, RNG seed and chain id config results in an error message such as the following:

+
./bernoulli sample data file=bernoulli.data.json adapt delta=0.95
+adapt is either mistyped or misplaced.
+Perhaps you meant one of the following valid configurations?
+  method=sample sample adapt
+  method=variational variational adapt
+Failed to parse arguments, terminating Stan
+

The following example provides a template for a call to the sampler which specifies input data, initial parameters, initial step-size and metric, adaptation, output, and RNG initialization.

+
./my_model sample num_warmup=2000 \
+           init=my_param_inits.json \
+           adapt delta=0.95 init_buffer=100 \
+           window=50 term_buffer=100 \
+           algorithm=hmc engine=nuts max_depth=15 \
+           metric=dense_e metric_file=my_metric.json \
+           stepsize=0.6555 \
+           data file=my_model.data.json \
+           output file=output_${i}.csv refresh=10 \
+           random seed=12345 id=${i}
+

The keywords sample, data, output, and random are the top-level argument groups. Within the sample config arguments, the keyword adapt groups the adaptation algorithm parameters and the keyword-value algorithm=hmc groups the NUTS-HMC parameters.

+

The top-level groups can be freely ordered with respect to one another. The following is also a valid command:

+
./my_model random seed=12345 id=${i} \
+           data file=my_model.data.json \
+           output file=output_${i}.csv refresh=10 \
+           sample num_warmup=2000 \
+           init=my_param_inits.json \
+           algorithm=hmc engine=nuts max_depth=15 \
+           metric=dense_e metric_file=my_metric.json \
+           stepsize=0.6555 \
+           adapt delta=0.95 init_buffer=100 \
+           window=50 term_buffer=100
+ + + +
+
+
+ + Back to top

References

+
+Betancourt, Michael. 2017. “A Conceptual Introduction to Hamiltonian Monte Carlo.” arXiv 1701.02434. https://arxiv.org/abs/1701.02434. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/optimize_config.html b/docs/2_39/cmdstan-guide/optimize_config.html new file mode 100644 index 000000000..56a348a61 --- /dev/null +++ b/docs/2_39/cmdstan-guide/optimize_config.html @@ -0,0 +1,1234 @@ + + + + + + + + + +Optimization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Optimization

+

The CmdStan executable can run Stan’s optimization algorithms, which provide a deterministic method to find the posterior mode. If the posterior is not convex, there is no guarantee Stan will be able to find the global optimum as opposed to a local optimum of log probability.

+

The full set of configuration options available for the optimize method is available by using the optimize help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the optimizer console output and in the output CSV file’s comments.

+

The executable does not need to be recompiled in order to switch from sampling to optimization, and the data input format is the same. The following is a minimal call to Stan’s optimizer using defaults for everything but the location of the data file.

+
> ./bernoulli optimize data file=bernoulli.data.json
+

Executing this command prints both output to the console and to a CSV file.

+

The first part of the console output reports on the configuration used. The above command uses all default configurations, therefore the optimizer used is the L-BFGS optimizer and its default initial stepsize and tolerances for monitoring convergence:

+
 ./bernoulli optimize data file=bernoulli.data.json
+method = optimize
+  optimize
+    algorithm = lbfgs (Default)
+      lbfgs
+        init_alpha = 0.001 (Default)
+        tol_obj = 1e-12 (Default)
+        tol_rel_obj = 10000 (Default)
+        tol_grad = 1e-08 (Default)
+        tol_rel_grad = 1e+07 (Default)
+        tol_param = 1e-08 (Default)
+        history_size = 5 (Default)
+    jacobian = false (Default)
+    iter = 2000 (Default)
+    save_iterations = false (Default)
+id = 1 (Default)
+data
+  file = bernoulli.data.json
+init = 2 (Default)
+random
+  seed = 87122538 (Default)
+output
+  file = output.csv (Default)
+  diagnostic_file =  (Default)
+  refresh = 100 (Default)
+  sig_figs = 8 (Default)
+  profile_file = profile.csv (Default)
+  save_cmdstan_config = false (Default)
+num_threads = 1 (Default)
+

The second part of the output indicates how well the algorithm fared, here converging and terminating normally. The numbers reported indicate that it took 5 iterations and 8 gradient evaluations. This is, not surprisingly, far fewer iterations than required for sampling; even fewer iterations would be used with less stringent user-specified convergence tolerances. The alpha value is for step size used. In the final state the change in parameters was roughly \(0.002\) and the length of the gradient roughly 3e-05 (\(0.00003\)).

+
Initial log joint probability = -6.85653
+    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes
+       5      -5.00402    0.00184936   3.35074e-05           1           1        8
+Optimization terminated normally:
+  Convergence detected: relative gradient magnitude is below tolerance
+

The output from optimization is written into the file output.csv by default. The output follows the same pattern as the output for sampling, first dumping the entire set of parameters used as comment lines:

+
# stan_version_major = 2
+# stan_version_minor = 23
+# stan_version_patch = 0
+# model = bernoulli_model
+# method = optimize
+#   optimize
+#     algorithm = lbfgs (Default)
+#       lbfgs
+#         init_alpha = 0.001 (Default)
+#         tol_obj = 1e-12 (Default)
+#         tol_rel_obj = 10000 (Default)
+#         tol_grad = 1e-08 (Default)
+#         tol_rel_grad = 1e+07 (Default)
+#         tol_param = 1e-08 (Default)
+#         history_size = 5 (Default)
+#     jacobian = false (Default)
+#     iter = 2000 (Default)
+#     save_iterations = false (Default)
+

Following the config information are two lines of output, the CSV headers and the recorded values:

+
lp__,converged__,theta
+-5.00402,31,0.200003
+

Note that everything is a comment other than a line for the header, and a line for the values. Here, the header indicates the unnormalized log probability with lp__, algorithm status in converged__, and the model parameter theta. The maximum log probability is -5.0 and the posterior mode for theta is 0.20. The mode exactly matches what we would expect from the data. Because the prior was uniform, the result 0.20 represents the maximum likelihood estimate (MLE) for the very simple Bernoulli model. Note that no uncertainty is reported.

+

All of the optimizers stream per-iteration intermediate approximations to the command line console. The sub-argument save_iterations specifies whether or not to save the intermediate iterations to the output file. Allowed values are true or false. The default value is false, i.e., intermediate iterations are not saved to the output file. Running the optimizer with save_iterations=true writes both the initial log joint probability and values for all iterations to the output CSV file.

+

Running the example model with option save_iterations=true, i.e., the command

+
> ./bernoulli optimize save_iterations=1 data file=bernoulli.data.json
+

produces CSV file output rows:

+
lp__,converged__,theta
+-6.85653,0,0.493689
+-6.10128,0,0.420936
+-5.02953,0,0.22956
+-5.00517,0,0.206107
+-5.00403,0,0.200299
+-5.00402,31,0.200003
+
+

Meaning of the converged__ column

+

The converged__ column is used to indicate the status of the algorithm. It can take on the following values.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Numeric valueMeaning
-1Line search failed to achieve a sufficient decrease, no more progress can be made
0Successful step completed
10Convergence detected: absolute parameter change was below tolerance
20Convergence detected: absolute change in objective function was below tolerance
21Convergence detected: relative change in objective function was below tolerance
30Convergence detected: gradient norm is below tolerance
31Convergence detected: relative gradient magnitude is below tolerance
40Maximum number of iterations hit, may not be at an optima
+

Note that not all algorithms can produce all codes, and some, such as 0, will only be observed if save_iterations is used.

+

A comment explaining the final value (with the same contents as the above table) is added at the end of the CSV file.

+
+
+

Jacobian adjustments

+

If there are constrained parameters, Stan makes a transformation to an unconstrained space and runs the optimization algorithm in the unconstrained space.

+

The jacobian argument specifies whether or not the call to the model’s log probability function should include the log absolute Jacobian determinant of inverse parameter transforms.

+

If the Jacobian adjustment is not included (the default), the optimization returns parameter values that correspond to a mode of the target in the constrained space (if such mode exists). Thus this option is useful for any optimization where we want to find the mode in the original constrained parameter space.

+

If the Jacobian adjustment is included, the optimization returns parameter values that correspond to a mode in the unconstrained space. This is useful, for example, if we want to make a distributional approximation of the posterior at the mode (see, Laplace sampling, as then Jacobian adjustment needs to be included for correct results.

+
+
+

Optimization algorithms

+

The algorithm argument specifies the optimization algorithm. This argument takes one of the following three values:

+
    +
  • lbfgs A quasi-Newton optimizer. This is the default optimizer and also much faster than the other optimizers.

  • +
  • bfgs A quasi-Newton optimizer.

  • +
  • newton A Newton optimizer. This is the least efficient optimization algorithm, but has the advantage of setting its own stepsize.

  • +
+

See the Stan Reference Manual’s Optimization chapter for a description of these algorithms.

+

All of the optimizers stream per-iteration intermediate approximations to the command line console. The sub-argument save_iterations specifies whether or not to save the intermediate iterations to the output file. Allowed values are true or false. The default value isfalse`, i.e., intermediate iterations are not saved to the output file.

+
+
+

The quasi-Newton optimizers

+

For both BFGS and L-BFGS optimizers, convergence monitoring is controlled by a number of tolerance values, any one of which being satisfied causes the algorithm to terminate with a solution. See the BFGS and L-BFGS configuration section for details on the convergence tests.

+

Both BFGS and L-BFGS have the following configuration arguments:

+
    +
  • init_alpha - The initial step size parameter. Must be a positive real number. Default value is \(0.001\)

  • +
  • tol_obj - Convergence tolerance on changes in objective function value. Must be a positive real number. Default value is \(1^{-12}\).

  • +
  • tol_rel_obj - Convergence tolerance on relative changes in objective function value. Must be a positive real number. Default value is \(1^{4}\).

  • +
  • tol_grad - Convergence tolerance on the norm of the gradient. Must be a positive real number. Default value is \(1^{-8}\).

  • +
  • tol_rel_grad - Convergence tolerance on the relative norm of the gradient. Must be a positive real number. Default value is \(1^{7}\).

  • +
  • tol_param - Convergence tolerance on changes in parameter value. Must be a positive real number. Default value is \(1^{-8}\).

  • +
+

The init_alpha argument specifies the first step size to try on the initial iteration. If the first iteration takes a long time (and requires a lot of function evaluations), set init_alpha to be the roughly equal to the alpha used in that first iteration. The default value is very small, which is reasonable for many problems but might be too large or too small depending on the objective function and initialization. Being too big or too small just means that the first iteration will take longer (i.e., require more gradient evaluations) before the line search finds a good step length.

+

In addition to the above, the L-BFGS algorithm has argument history_size which controls the size of the history it uses to approximate the Hessian. The value should be less than the dimensionality of the parameter space and, in general, relatively small values (\(5\)-\(10\)) are sufficient; the default value is \(5\).

+

If L-BFGS performs poorly but BFGS performs well, consider increasing the history size. Increasing history size will increase the memory usage, although this is unlikely to be an issue for typical Stan models.

+
+
+

The Newton optimizer

+

There are no configuration parameters for the Newton optimizer. It is not recommended because of the slow Hessian calculation involving finite differences.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/parallelization.html b/docs/2_39/cmdstan-guide/parallelization.html new file mode 100644 index 000000000..a6c4315fc --- /dev/null +++ b/docs/2_39/cmdstan-guide/parallelization.html @@ -0,0 +1,1162 @@ + + + + + + + + + +Parallelization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Parallelization

+

Stan provides three ways of parallelizing execution of a Stan model:

+
    +
  • multi-threading with Intel Threading Building Blocks (TBB),
  • +
  • multi-processing with Message Passing Interface (MPI) and
  • +
  • manycore processing with OpenCL.
  • +
+
+

Multi-threading with TBB

+

In order to exploit multi-threading in a Stan model, the models must be rewritten to use the reduce_sum and map_rect functions. For instructions on how to rewrite Stan models to use these functions see Stan’s User guide chapter on parallelization, the reduce_sum case study or the Multithreading and Map-Reduce tutorial.

+
+

Compiling

+

Once a model is rewritten to use the above-mentioned functions, the model must be compiled with the STAN_THREADS makefile flag. The flag can be supplied in the make call but we recommend writing the flag to the make/local file. If the STAN_THREADS flag is defined/non-empty, threads will be enabled.

+

An example of the contents of make/local to enable threading with TBB:

+
STAN_THREADS=true
+

The model is then compiled as normal:

+
make path/to/model
+
+
+

Running

+

Before running a multi-threaded model, we need to specify the maximum number of threads the program can run (total threads for all chains). This is done by setting the num_threads argument. Valid values for num_threads are positive integers and -1. If num_threads is set to -1, all available cores will be used.

+

Generally, this number should not exceed the number of available cores for best performance.

+

Example:

+
./model sample data file=data.json num_threads=4 ...
+

When the model is compiled with STAN_THREADS we can sample with multiple chains with a single executable (see section running multiple chains for cases when this is available). When running multiple chains num_threads is the maximum number of threads that can be used by all the chains combined. The exact number of threads that will be used for each chain at a given point in time is determined by the TBB scheduler. The following example start 2 chains with 8 total threads available:

+
./model sample num_chains=2 data file=data.json num_threads=8 ...
+
+
+
+

Multi-processing with MPI

+

In order to use multi-processing with MPI in a Stan model, the models must be rewritten to use the map_rect function. By using MPI, the model can be parallelized across multiple cores or a cluster. MPI with Stan is supported on MacOS and Linux.

+
+

Dependencies

+

Compiling and running Stan models with MPI requires that the system has an MPI implementation installed. For Unix systems the most commonly used implementations are MPICH and OpenMPI.

+
+
+

Compiling

+

Once a model is rewritten to use map_rect, additional makefile flags must be written to the make/local. These are:

+
    +
  • STAN_MPI: Enables the use of MPI with Stan if defined.
  • +
  • CXX: The name of the MPI C++ compiler wrapper. Typically mpicxx.
  • +
  • TBB_CXX_TYPE: The C++ compiler the MPI wrapper wraps. Typically gcc on Linux and clang on macOS.
  • +
+

An example of make/local on Linux:

+
STAN_MPI=true
+CXX=mpicxx
+TBB_CXX_TYPE=gcc
+

The model is then compiled as normal:

+
make path/to/model
+
+
+

Running

+

The Stan model compiled with STAN_MPI is run using an MPI launcher. The MPI standard suggests using mpiexec, but a vendor wrapper for the launcher like mpirun can also be used. The launcher is supplied the path to the built executable and the number of processes to start: -n X for mpiexec or -np X for mpirun where X is replaced by the integer representing the number of processes.

+

Example for running a model with six processes:

+
mpiexec -n 6 path/to/model sample data file=data.json ...
+
+
+
+

OpenCL

+
+

Dependencies

+

OpenCL is supported on most modern CPUs and GPUs. In order to run OpenCL-enabled Stan models, an OpenCL runtime for the target device must be installed. This subsection lists installation instructions for OpenCL runtimes of the commonly-found devices.

+

In order to check if any OpenCL-enabled device and its runtime is already present use the clinfo tool. On Linux, clinfo can typically be installed with the default package manager (for example sudo apt-get install clinfo on Ubuntu). For Windows, pre-built clinfo binary can be found here.

+

Also use clinfo to verify successful installation of OpenCL runtimes.

+
+

NVIDIA GPU

+
    +
  • Linux:

    +

    Install the NVIDIA GPU driver and the NVIDIA CUDA Toolkit. On Ubuntu the commands to install both is:

    +
    sudo apt update
    +sudo apt install nvidia-driver-460 nvidia-cuda-toolkit
    +

    Replace the driver version (460 in the above case) with the lastest number at the time of installation.

  • +
  • Windows:

    +

    Install the NVIDIA GPU Driver and CUDA Toolkit.

  • +
+
+
+

AMD GPU

+
    +
  • Linux:

    +

    Install Radeon Software for Linux available here.

  • +
  • Windows:

    +

    We recommend installing the open source OCL-SDK.

  • +
+
+
+

AMD CPU

+

Install the open source PoCL.

+
+
+

Intel CPU/GPU

+

Follow Intel’s install instructions given here (requires registration).

+
+
+
+

Compiling

+

In order to enable the OpenCL backend the model must be compiled with the STAN_OPENCL makefile flag defined/non-empty. The flag can be supplied in the make call but we recommend writing the flag to the make/local file.

+

An example of the contents of make/local to enable parallelization with OpenCL:

+
STAN_OPENCL=true
+

If you are using OpenCL with an integrated GPU you also need to define the INTEGRATED_OPENCL flag, as the sharing of memory between CPU and GPU is slightly different with integrated graphics:

+
INTEGRATED_OPENCL=true
+

The model is then compiled as normal:

+
make path/to/model
+
+
+

Running

+

The Stan model compiled with STAN_OPENCL can also be supplied the OpenCL platform and device IDs of the target device. These IDs determine the device on which to run the OpenCL-supported functions on. You can list the devices on your system using the clinfo program. If the system has one GPU and no OpenCL CPU runtime, the platform and device IDs of the GPU are typically 0. In that case you can also omit the OpenCL IDs as the default 0 IDs are used in that case.

+

We supply these IDs when starting the executable as shown below:

+
path/to/model sample data file=data.json opencl platform=0 device=1
+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/pathfinder_config.html b/docs/2_39/cmdstan-guide/pathfinder_config.html new file mode 100644 index 000000000..3ad17a27a --- /dev/null +++ b/docs/2_39/cmdstan-guide/pathfinder_config.html @@ -0,0 +1,1269 @@ + + + + + + + + + +Pathfinder Method for Approximate Bayesian Inference + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Pathfinder Method for Approximate Bayesian Inference

+

The CmdStan method pathfinder uses the Pathfinder algorithm of Zhang et al. (2022), which is further described in the Stan Reference Manual.

+

A single run of the Pathfinder algorithm generates a set of approximate draws. Inference is improved by running multiple Pathfinder instances and using Pareto-smoothed importance resampling (PSIS) of the resulting sets of draws. This better matches non-normal target densities and also eliminates minor modes.

+

The pathfinder method runs multi-path Pathfinder by default, which returns a PSIS sample over the draws from several individual (“single-path”) Pathfinder runs. Argument num_paths specifies the number of single-path Pathfinders, the default is \(4\). If num_paths is set to 1, then only one individual Pathfinder is run without the PSIS reweighting of the sample.

+

The full set of configuration options available for the pathfinder method is available by using the pathfinder help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the algorithm’s console output and in the output CSV file’s comments.

+

The following is a minimal call to the Pathfinder algorithm using defaults for everything but the location of the data file.

+
> ./bernoulli pathfinder data file=bernoulli.data.R
+

Executing this command prints output both to the console and to csv files.

+

The first part of the console output reports on the configuration used.

+
method = pathfinder
+  pathfinder
+    init_alpha = 0.001 (Default)
+    tol_obj = 1e-12 (Default)
+    tol_rel_obj = 10000 (Default)
+    tol_grad = 1e-08 (Default)
+    tol_rel_grad = 1e+07 (Default)
+    tol_param = 1e-08 (Default)
+    history_size = 5 (Default)
+    num_psis_draws = 1000 (Default)
+    num_paths = 4 (Default)
+    save_single_paths = false (Default)
+    psis_resample = true (Default)
+    calculate_lp = true (Default)
+    max_lbfgs_iters = 1000 (Default)
+    num_draws = 1000 (Default)
+    num_elbo_draws = 25 (Default)
+id = 1 (Default)
+data
+  file = bernoulli.data.json
+init = 2 (Default)
+random
+  seed = 2790476610 (Default)
+output
+  file = output.csv (Default)
+  diagnostic_file =  (Default)
+  refresh = 100 (Default)
+  sig_figs = 8 (Default)
+  profile_file = profile.csv (Default)
+  save_cmdstan_config = false (Default)
+num_threads = 1 (Default)
+

The rest of the output describes the progression of the algorithm.

+

By default, the Pathfinder algorithm runs 4 single-path Pathfinders in parallel, then uses importance resampling on the set of returned draws to produce the specified number of draws.

+
Path [1] :Initial log joint density = -11.543343
+Path [1] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes
+              5      -6.748e+00      1.070e-03   1.707e-05    1.000e+00  1.000e+00       126 -6.220e+00 -6.220e+00
+Path [1] :Best Iter: [5] ELBO (-6.219833) evaluations: (126)
+Path [2] :Initial log joint density = -7.443345
+Path [2] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes
+              5      -6.748e+00      9.936e-05   3.738e-07    1.000e+00  1.000e+00       126 -6.164e+00 -6.164e+00
+Path [2] :Best Iter: [5] ELBO (-6.164015) evaluations: (126)
+Path [3] :Initial log joint density = -18.986308
+Path [3] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes
+              5      -6.748e+00      2.996e-04   4.018e-06    1.000e+00  1.000e+00       126 -6.201e+00 -6.201e+00
+Path [3] :Best Iter: [5] ELBO (-6.200559) evaluations: (126)
+Path [4] :Initial log joint density = -8.304453
+Path [4] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes
+              5      -6.748e+00      2.814e-04   2.034e-06    1.000e+00  1.000e+00       126 -6.221e+00 -6.221e+00
+Path [4] :Best Iter: [3] ELBO (-6.161276) evaluations: (126)
+Total log probability function evaluations:8404
+
+

Pathfinder Configuration

+
    +
  • num_psis_draws - Final number of draws from multi-path pathfinder. Must be a positive integer. Default value is \(1000\).

  • +
  • num_paths - Number of single pathfinders. Must be a positive integer. Default value is \(4\).

  • +
  • save_single_paths - When true, save outputs from single pathfinders. Valid values: [true, false]. Default is false.

  • +
  • max_lbfgs_iters - Maximum number of L-BFGS iterations. Must be a positive integer. Default value is \(1000\).

  • +
  • num_draws - Number of approximate posterior draws for each single pathfinder. Must be a positive integer. Default value is \(1000\). Can differ from num_psis_draws.

  • +
  • num_elbo_draws - Number of Monte Carlo draws to evaluate ELBO. Must be a positive integer. Default value is \(25\).

  • +
  • psis_resample - If true, perform PSIS resampling on draws returned from individual pathfinders. If false, returns all num_paths * num_draws draws from the individual pathfinders. Valid values: [true, false]. Default is true.

  • +
  • calculate_lp - If true, log probabilities of the approximate draws are calculated and returned with the output. If false, each pathfinder will only calculate the lp values needed for the ELBO calculation. If false, PSIS resampling cannot be performed and the algorithm returns num_paths * num_draws samples. The output will still contain any lp values used when calculating ELBO scores within L-BFGS iterations. Valid values: [true, false]. Default is true.

  • +
+
+
+

L-BFGS Configuration

+

Arguments init_alpha through history_size are the full set of arguments to the L-BFGS optimizer and have the same defaults for optimization.

+
+
+

Multi-path Pathfinder CSV files

+

By default, the pathfinder method uses 4 independent Pathfinder runs, each of which produces 1000 approximate draws, which are then importance resampled down to 1000 final draws. The importance resampled draws are output as a StanCSV file.

+

The CSV files have the following structure:

+

The initial CSV comment rows contain the complete set of CmdStan configuration options.

+
...
+# method = pathfinder
+#   pathfinder
+#     init_alpha = 0.001 (Default)
+#     tol_obj = 9.9999999999999998e-13 (Default)
+#     tol_rel_obj = 10000 (Default)
+#     tol_grad = 1e-08 (Default)
+#     tol_rel_grad = 10000000 (Default)
+#     tol_param = 1e-08 (Default)
+#     history_size = 5 (Default)
+#     num_psis_draws = 1000 (Default)
+#     num_paths = 4 (Default)
+#     psis_resample = 1 (Default)
+#     calculate_lp = 1 (Default)
+#     save_single_paths = 0 (Default)
+#     max_lbfgs_iters = 1000 (Default)
+#     num_draws = 1000 (Default)
+#     num_elbo_draws = 25 (Default)
+...
+

Next is the column header line, followed the set of approximate draws. The Pathfinder algorithm first outputs lp_approx__, the log density in the approximating distribution, and lp__, the log density in the target distribution, followed by estimates of the model parameters, transformed parameters, and generated quantities.

+
lp_approx__,lp__,theta
+-2.4973, -8.2951, 0.0811852
+-0.87445, -7.06526, 0.160207
+-0.812285, -7.07124, 0.35819
+...
+

The final lines are comment lines which give timing information.

+
# Elapsed Time: 0.016000 seconds (Pathfinders)
+#               0.003000 seconds (PSIS)
+#               0.019000 seconds (Total)
+

Pathfinder provides option save_single_paths which will save output from the single-path Pathfinder runs.

+
+
+

Single-path Pathfinder Outputs

+

The boolean option save_single_paths is used to save both the draws and the ELBO iterations from the individual Pathfinder runs. When save_single_paths is true, the draws from each are saved to StanCSV files with the same format as the PSIS sample and the ELBO evaluations along the L-BFGS trajectory for each are saved as JSON. Given an output file name, CmdStan adds suffixes to the base filename to distinguish between the output files. For the default output file name output.csv and default number of runs (4), the resulting CSV files are

+
output.csv
+output_path_1.csv
+output_path_1.json
+output_path_2.csv
+output_path_2.json
+output_path_3.csv
+output_path_3.json
+output_path_4.csv
+output_path_4.json
+

The individual sample CSV files have the same structure as the PSIS sample CSV file. The JSON files contain information from each ELBO iteration.

+

To see how this works, we run Pathfinder on the centered-parameterization of the eight-schools model, where the posterior distribution has a funnel shape:

+
> ./eight_schools pathfinder save_single_paths=true data file=eight_schools.data.json
+

Each JSON file records the approximations to the target density at each point along the trajectory of the L-BFGS optimization algorithms.

+
{
+  "0": {
+    "iter": 0,
+    "unconstrained_parameters": [1.00595, -0.503687, 1.79367, 0.99083, 0.498077, -0.65816, 1.49176, -1.22647, 1.62911, 0.767445],
+    "grads": [-0.868919, 0.45198, -0.107675, -0.0123304, 0.163172, 0.354362, -0.108746, 0.673306, -0.102268, -4.51445]
+  },
+  "1": {
+    "iter": 1,
+    "unconstrained_parameters": [1.00595, -0.503687, 1.79367, 0.99083, 0.498077, -0.65816, 1.49176, -1.22647, 1.62911, 0.767445],
+    "grads": [-0.868919, 0.45198, -0.107675, -0.0123304, 0.163172, 0.354362, -0.108746, 0.673306, -0.102268, -4.51445],
+    "history_size": 1,
+    "lbfgs_success": true,
+    "pathfinder_success": true,
+    "x_center": [0.126047, -0.065048, 1.55708, 0.958509, 0.628075, -0.217041, 1.32032, -0.561338, 1.42988, 1.23213],
+    "logDetCholHk": -2.6839,
+    "L_approx": [[-0.0630456, -0.0187959], [0, 1.08328]],
+    "Qk": [[-0.361073, 0.5624], [0.183922, -0.279474], [-0.0708175, 0.15715], [-0.00917823, 0.0215802], [0.0606019, -0.0814513], [0.164071, -0.285769], [-0.057723, 0.112428], [0.276376, -0.424348], [-0.0620524, 0.131786], [-0.846488, -0.531094]],
+    "alpha": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+    "full": false,
+    "lbfgs_note": ""
+  },
+  ...,
+  "171": {
+    "iter": 171,
+    "unconstrained_parameters": [1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, -35.7821],
+    "grads": [2.66927e+15, -0.117312, -0.0639521, -2.66927e+15, -0.0445885, 0.0321579, 0.00499827, -0.163952, -0.032084, 6.4073],
+    "history_size": 5,
+    "lbfgs_success": true,
+    "pathfinder_success": true,
+    "x_center": [5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, -2.02979e+17],
+    "logDetCholHk": 299.023,
+    "L_approx": [[4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, -1.70162e+08], [0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 0, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 0, 0, 2.89552e+16]],
+    "Qk": [],
+    "alpha": [1.11027e-12, 2.24669e-12, 2.05603e-12, 3.71177e-12, 5.7855e-12, 1.80169e-12, 3.40291e-12, 2.29699e-12, 3.43423e-12, 1.25815e-08],
+    "full": true,
+    "lbfgs_note": ""
+  },
+  "172": {
+    "iter": 172,
+    "unconstrained_parameters": [1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, -35.801],
+    "grads": [-0, -0.11731, -0.0639469, 0.0179895, -0.0445842, 0.0321643, 0.00500256, -0.163947, -0.0320824, 7],
+    "history_size": 5,
+    "lbfgs_success": false,
+    "pathfinder_success": false,
+    "lbfgs_note": ""
+  }
+}
+

Option num_paths=1 runs one single-path Pathfinder and the output CSV file contains the draws from that run without PSIS reweighting. The combination of arguments num_paths=1 save_single_paths=true creates just two output files, the CSV sample and the set of ELBO iterations. In this case, the default output file name is “output.csv” and the default diagnostic file name is “output.json”.

+ + + +
+
+ + Back to top

References

+
+Zhang, Lu, Bob Carpenter, Andrew Gelman, and Aki Vehtari. 2022. “Pathfinder: Parallel Quasi-Newton Variational Inference.” Journal of Machine Learning Research 23 (306): 1–49. http://jmlr.org/papers/v23/21-0889.html. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/print.html b/docs/2_39/cmdstan-guide/print.html new file mode 100644 index 000000000..38a6da455 --- /dev/null +++ b/docs/2_39/cmdstan-guide/print.html @@ -0,0 +1,1034 @@ + + + + + + + + + +Utility print (deprecated) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + + + + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/rdump_apdx.html b/docs/2_39/cmdstan-guide/rdump_apdx.html new file mode 100644 index 000000000..aea8c2afe --- /dev/null +++ b/docs/2_39/cmdstan-guide/rdump_apdx.html @@ -0,0 +1,1290 @@ + + + + + + + + + +RDump Format for CmdStan + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

RDump Format for CmdStan

+

NOTE: Although the RDump format is still supported, I/O with JSON is faster and recommended. See the chapter on JSON for more details.

+

RDump format can be used to represent values for Stan variables. This format was introduced in SPLUS and is used in R, JAGS, and in BUGS (but with a different ordering).

+

A dump file is structured as a sequence of variable definitions. Each variable is defined in terms of its dimensionality and its values. There are three kinds of variable declarations: - scalars - sequences - general arrays

+
+

Creating dump files

+

Dump files can be created from R using RStan, via the rstan package function stan_rdump. Stan RDump files must be created via stan_rdump and not by R’s native dump function because R’s dump function uses a richer syntax than is supported by the underlying Stan i/o libraries.

+
+
+

Scalar variables

+

A simple scalar value can be thought of as having an empty list of dimensions. Its declaration in the dump format follows the SPLUS assignment syntax. For example, the following would constitute a valid dump file defining a single scalar variable y with value \(17.2\):

+
y <- 17.2
+
+
+

Sequence variables

+

One-dimensional arrays may be specified directly using the SPLUS sequence notation. The following example defines an integer-value and a real-valued sequence.

+
n <- c(1,2,3) y <- c(2.0,3.0,9.7)
+

Arrays are provided without a declaration of dimensionality because the reader just counts the number of entries to determine the size of the array.

+

Sequence variables may alternatively be represented with R’s colon-based notation. For instance, the first example above could equivalently be written as

+
n <- 1:3
+

The sequence denoted by 1:3 is of length \(3\), running from \(1\) to \(3\) inclusive. The colon notation allows sequences going from high to low. The following are equivalent:

+
n <- 2:-2
+n <- c(2,1,0,-1,-2)
+

As a special case, a sequence of zeros can also be represented in the dump format by integer(x) and double(x), for type int and double, respectively. Here x is a non-negative integer to specify the length. If x is \(0\), it can be omitted. The following are some examples.

+
x1 <- integer()
+x2 <- integer(0)
+x3 <- integer(2)
+y1 <- double()
+y2 <- double(0)
+y3 <- double(2)
+
+
+

Array variables

+

For more than one dimension, the dump format uses a dimensionality specification. For example, the following defines a \(2 \times 3\) array:

+
y <- structure(c(1,2,3,4,5,6), .Dim = c(2,3))
+

Data is stored column-major, thus the values for y will be:

+
y[1, 1] = 1
+y[1, 2] = 3
+y[1, 3] = 5
+y[2, 1] = 2
+y[2, 2] = 4
+y[2, 3] = 6
+

The structure keyword just wraps a sequence of values and a dimensionality declaration, which is itself just a sequence of non-negative integer values. The product of the dimensions must equal the length of the array.

+

If the values happen to form a contiguous sequence of integers, they may be written with colon notation. Thus the example above is equivalent to the following.

+
y <- structure(1:6, .Dim = c(2,3))
+

Sequence notation can be used within any call to the generic c() function in R. In the above example, c(2,3) could be written as c(2:3).

+

The generalization of column-major indexing is last-index major indexing. Arrays of more than two dimensions are written in a last-index major form. For example,

+
z <- structure(1:24, .Dim = c(2,3,4))
+

produces a three-dimensional int (assignable to real) array z with values:

+
z[1, 1, 1] = 1
+z[2, 1, 1] = 2
+z[1, 2, 1] = 3
+z[2, 2, 1] = 4
+z[1, 3, 1] = 5
+z[2, 3, 1] = 6
+z[1, 1, 2] = 7
+z[2, 1, 2] = 8
+z[1, 2, 2] = 9
+z[2, 2, 2] = 10
+z[1, 3, 2] = 11
+z[2, 3, 2] = 12
+z[1, 1, 3] = 13
+z[2, 1, 3] = 14
+z[1, 2, 3] = 15
+z[2, 2, 3] = 16
+z[1, 3, 3] = 17
+z[2, 3, 3] = 18
+z[1, 1, 4] = 19
+z[2, 1, 4] = 20
+z[1, 2, 4] = 21
+z[2, 2, 4] = 22
+z[1, 3, 4] = 23
+z[2, 3, 4] = 24
+

If the underlying 3-D array is stored as a 1-D array in last-index major format, the innermost array elements will be contiguous.

+

The sequence of values inside structure can also be integer(x) or double(x). In particular, if one or more dimensions is zero, integer() can be put inside structure. For instance, the following example is supported by the dump format.

+
y <- structure(integer(), .Dim = c(2,0))
+
+
+

Matrix- and vector-valued variables

+

The dump format for matrices and vectors, including arrays of matrices and vectors, is the same as that for arrays of the same shape.

+
+

Vector dump format

+

The following three declarations have the same dump format for their data.

+
array[K] real a;
+vector[K] b;
+row_vector[K] c;
+
+
+

Matrix dump format

+

The following declarations have the same dump format.

+
array[M, N] real a;
+matrix[M, N] b;
+
+
+

Arrays of vectors and matrices

+

The key to understanding arrays is that the array indexing comes before any of the container indexing. That is, an array of vectors is just that: each array element is a vector. See the chapter on array and matrix types in the user’s guide section of the language manual for more information.

+

For the dump data format, the following declarations have the same arrangement.

+
array[M, N] real a;
+matrix[M, N] b;
+array[M] vector[N] c;
+array[M] row_vector[N] d;
+

Similarly, the following also have the same dump format.

+
array[P, M, N] real a;
+array[P] matrix[M, N] b;
+array[P, M] vector[N] c;
+array[P, M] row_vector[N] d;
+
+
+
+

Complex-valued variables

+

At this time, there is no support for complex number input through the R dump format. As an alternative, the JSON input format supports complex numbers.

+
+
+

Integer- and real-valued variables

+

There is no declaration in a dump file that distinguishes integer versus continuous values. If a value in a dump file’s definition of a variable contains a decimal point (e.g., \(132.3\)) or uses scientific notation (e.g., \(1.323e2\)), Stan assumes that the values are real.

+

For a single value, if there is no decimal point, it may be assigned to an int or real variable in Stan. An array value may only be assigned to an int array if there is no decimal point or scientific notation in any of the values. This convention is compatible with the way R writes data.

+

The following dump file declares an integer value for y.

+
y <- 2
+

This definition can be used for a Stan variable y declared as real or as int. Assigning an integer value to a real variable automatically promotes the integer value to a real value.

+

Integer values may optionally be followed by L or l, denoting long integer values. The following example, where the type is explicit, is equivalent to the above.

+
y <- 2L
+

The following dump file provides a real value for y.

+
y <- 2.0
+

Even though this is a round value, the occurrence of the decimal point in the value, \(2.0\), causes Stan to infer that y is real valued. This dump file may only be used for variables y declared as real in Stan.

+
+

Scientific notation

+

Numbers written in scientific notation may only be used for real values in Stan. R will write out the integer one million as \(1e+06\).

+
+
+

Infinite and not-a-number values

+

Stan’s reader supports infinite and not-a-number values for scalar quantities (see the section of the reference manual section of the language manual for more information on Stan’s numerical data types). Both infinite and not-a-number values are supported by Stan’s dump-format readers.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
ValuePreferred FormAlternative Forms
positive infinityInfInfinity, infinity
negative infinity-Inf-Infinity, -infinity
not a numberNaN
+

These strings are not case sensitive, so inf may also be used for positive infinity, or NAN for not-a-number.

+
+
+
+

Quoted variable names

+

In order to support JAGS data files, variables may be double quoted. For instance, the following definition is legal in a dump file.

+
"y" <- c(1,2,3) \end{Verbatim}
+
+
+

Line breaks

+

The line breaks in a dump file are required to be consistent with the way R reads in data. Both of the following declarations are legal.

+
y <- 2
+y <-
+3
+

Also following R, breaking before the assignment arrow are not allowed, so the following is invalid.

+
y
+<- 2 # Syntax Error
+

Lines may also be broken in the middle of sequences declared using the c(...) notation., as well as between the comma following a sequence definition and the dimensionality declaration. For example, the following declaration of a \(2 \times 2 \times 3\) array is valid.

+
y <-
+structure(c(1,2,3,
+4,5,6,7,8,9,10,11,
+12), .Dim = c(2,2,
+3))
+

Because there are no decimal points in the values, the resulting dump file may be used for three-dimensional array variables declared as int or real.

+
+
+

BNF grammar for dump data

+

A more precise definition of the dump data format is provided by the following (mildly templated) Backus-Naur form grammar.

+
definition ::= name <- value optional_semicolon
+
+name ::= char*     | ''' char* '''     | '"' char* '"'
+
+value ::= value<int> | value<double>
+
+value<T> ::= T       | seq<T>       | zero_array<T>       |
+'structure' '(' seq<T> ',' ".Dim" '=' seq<int> ')'       | 'structure'
+'(' zero_array<T> ',' ".Dim" '=' seq<int> ')'
+
+seq<int> ::= int ':' int       | cseq<int>
+
+zero_array<int> ::= "integer" '(' <non-negative int>? ')'
+
+zero_array<real> ::= "double" '(' <non-negative int>? ')'
+
+seq<real> ::= cseq<real>
+
+cseq<T> ::= 'c' '(' vseq<T> ')'
+
+vseq<T> ::= T      | T ',' vseq<T>
+

The template parameters T will be set to either int or real. Because Stan allows promotion of integer values to real values, an integer sequence specification in the dump data format may be assigned to either an integer- or real-based variable in Stan.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/stan_csv_apdx.html b/docs/2_39/cmdstan-guide/stan_csv_apdx.html new file mode 100644 index 000000000..0c202f475 --- /dev/null +++ b/docs/2_39/cmdstan-guide/stan_csv_apdx.html @@ -0,0 +1,1279 @@ + + + + + + + + + +Stan CSV File Format + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Stan CSV File Format

+

The output from all CmdStan methods is in CSV format. A Stan CSV file is a data table where the columns are the method and model parameters and quantities of interest. Each row contains one record’s worth of data in plain-text format using the comma character (‘,’) as the field delimiter (hence the name).

+

For the Stan CSV files, data is strictly numerical, however, possible values include both positive and negative infinity and “Not-a-Number” which are represented as the strings NaN, inf, +inf, -inf. All other values are written in decimal notation by default with at most 8 digits of precision. The number of significant digits written can be controlled with argument sig_figs=<int>. See more in Output control arguments section.

+

Stan CSV files have a header row containing the column names. They also make extensive use of CSV comments, i.e., lines which begin with the # character. In addition to initial and final comment rows, some methods also put comment rows in the middle of the data table, which makes it difficult to use many of the commonly used CSV parser packages.

+
+

CSV column names and order

+

The data table is laid out with zero or more method-specific columns followed by the Stan program variables declared in the parameter block, then the variables in the transformed parameters block, finally variables declared in the generated quantities, in declaration order.

+

Stan provides three types of container objects: arrays, vectors, and matrices. In order to output all elements of a container object, it is necessary to choose an indexing notation and a serialization order. The Stan CSV file indexing notation is

+
    +
  • The column name consists of the variable name followed by the element indices.
  • +
  • Indices are delimited by periods (‘.’).
  • +
  • Indexing is 1-based, i.e., given a dimension of size \(N\), the first element index is \(1\) and the last element index is \(N\).
  • +
  • Tuples are laid out element-by-element, with each tuple slot being delimited by a colon (‘:’).
  • +
+

Container variables are serialized in column major order, a.k.a. “Fortran” order. In column major-order for a 2-D container, all elements of column 1 are listed in ascending order, followed by all elements of column 2, thus the column index changes the slowest and the row index changes the fastest. For higher dimensions, this generalizes to the last index changing the slowest and first index changing the fastest.

+

To see how this works, consider a 3-dimensional variable with dimension sizes 2, 3, and 4, e.g., an array of matrices, a 2-D array of vectors or row_vectors, or a 3-D array of scalars. Given a Stan program with model parameter variable:

+
 array[2, 3, 4] real foo;
+

The Stan CSV file will require 24 columns to output the elements of foo. The first 6 columns will be labeled:

+
foo.1.1.1,foo.2.1.1,foo.1.2.1,foo.2.2.1,foo.1.3.1,foo.2.3.1
+

The final 6 columns will be labeled:

+
foo.1.1.4,foo.2.1.4,foo.1.2.4,foo.2.2.4,foo.1.3.4,foo.2.3.4
+

To see how a tuple would be laid out, consider the following variable:

+
tuple(real, array[3] real) bar;
+

This will correspond to 4 columns in the CSV file, which are labeled

+
bar:1,bar:2.1,bar:2.2,bar:2.3
+
+
+

MCMC sampler CSV output

+

The sample method produces both a Stan CSV output file and a diagnostic file which contains the sampler parameters together with the gradients on the unconstrained scale and log probabilities for all parameters in the model.

+

To see how this works, we show snippets of the output file resulting from the following command:

+
./bernoulli sample save_warmup=1 num_warmup=200 num_samples=100 \
+            data file=bernoulli.data.json \
+            output file=bernoulli_samples.csv
+
+

Sampler Stan CSV output file

+

The sampler output file contains the following:

+
    +
  • Initial comment rows listing full CmdStan argument configuration.
  • +
  • Header row
  • +
  • Data rows containing warmup draws, if run with option save_warmup=1
  • +
  • Comment rows for adaptation listing step size and metric used for sampling
  • +
  • Sampling draws
  • +
  • Comment rows giving timing information
  • +
+

Initial comments rows: argument configuration

+

All configuration arguments are listed, one per line, indented according to CmdStan’s hierarchy of arguments and sub-arguments. Arguments not overtly specified on the command line are annotated as (Default).

+

In the above example the num_samples, num_warmup, and save_warmup arguments were specified, whereas subargument thin is left at its default value, as seen in the initial comment rows:

+
# stan_version_major = 2
+# stan_version_minor = 24
+# stan_version_patch = 0
+# model = bernoulli_model
+# method = sample (Default)
+#   sample
+#     num_samples = 100
+#     num_warmup = 200
+#     save_warmup = 1
+#     thin = 1 (Default)
+#     adapt
+#       engaged = 1 (Default)
+#       gamma = 0.050000000000000003 (Default)
+#       delta = 0.80000000000000004 (Default)
+#       kappa = 0.75 (Default)
+#       t0 = 10 (Default)
+#       init_buffer = 75 (Default)
+#       term_buffer = 50 (Default)
+#       window = 25 (Default)
+#     algorithm = hmc (Default)
+#       hmc
+#         engine = nuts (Default)
+#           nuts
+#             max_depth = 10 (Default)
+#         metric = diag_e (Default)
+#         metric_file =  (Default)
+#         stepsize = 1 (Default)
+#         stepsize_jitter = 0 (Default)
+# id = 0 (Default)
+# data
+#   file = bernoulli.data.json
+# init = 2 (Default)
+# random
+#   seed = 2991989946 (Default)
+# output
+#   file = bernoulli_samples.csv
+#   diagnostic_file = bernoulli_diagnostics.csv
+#   refresh = 100 (Default)
+

Note that when running multi-threaded programs which use reduce_sum for high-level parallelization, the number of threads used will also be included in this initial comment header.

+

Column headers

+

The CSV header row lists all sampler parameters, model parameters, transformed parameters, and quantities of interest. The sampler parameters are described in detail in the output file section of the chapter on MCMC Sampling. The example model bernoulli.stan only contains one parameter theta, therefore the CSV file data table consists of 7 sampler parameter columns and one column for the model parameter:

+
lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta
+

As a second example, we show the output of the eight_schools.stan model on run on example dataset. This model has 3 parameters: mu, theta a vector whose length is dependent on the input data, here N = 8, and tau. The initial columns are for the 7 sampler parameters, as before. The column headers for the model parameters are:

+
mu,theta.1,theta.2,theta.3,theta.4,theta.5,theta.6,theta.7,theta.8,tau
+

Data rows containing warmup draws

+

When run with option save_warmup=1, the thinned warmup draws are written to the CSV output file directly after the CSV header line. Since the default option is save_warmup=0, this section is usually not present in the output file.

+

Here we specified num_warmup=200 and left thin at the default value \(1\), therefore the next 200 lines are data rows containing the sampler and model parameter values for each warmup draw.

+
-6.74827,1,1,1,1,0,6.75348,0.247195
+-6.74827,4.1311e-103,14.3855,1,1,0,6.95087,0.247195
+-6.74827,1.74545e-21,2.43117,1,1,0,7.67546,0.247195
+-6.77655,0.99873,0.239791,2,7,0,6.81982,0.280619
+-6.7552,0.999392,0.323158,1,3,0,6.79175,0.26517
+

Comment rows for adaptation

+

During warmup, the sampler adjusts the stepsize and the metric. At the end warmup, the sampler outputs this information as comments.

+
# Adaptation terminated
+# Step size = 0.813694
+# Diagonal elements of inverse mass matrix:
+# 0.592879
+

As the example bernoulli model only contains a single parameter, and as the default metric is diag_e, the inverse mass matrix is a \(1 \times 1\) matrix, and the length of the diagonal vector is also \(1\).

+

In contrast, if we run the eight schools example model with metric dense_e, the adaptation comments section lists both the stepsize and the full \(10 \times 10\) inverse mass matrix:

+
# Adaptation terminated
+# Step size = 0.211252
+# Elements of inverse mass matrix:
+# 25.6389, 17.3379, 13.9455, 15.9036, 15.1953, 8.73729, 16.9486, 14.4231, 17.4969, 0.518757
+# 17.3379, 79.8719, 12.2989, -1.28006, 9.92895, -3.51622, 10.073, 22.0196, 19.8151, 4.71028
+# 13.9455, 12.2989, 36.1572, 12.8734, 11.9446, 9.09582, 9.74519, 10.9539, 12.1204, 0.211353
+# 15.9036, -1.28006, 12.8734, 59.9998, 10.245, 8.03461, 16.9754, 3.13443, 9.68292, -1.36097
+# 15.1953, 9.92895, 11.9446, 10.245, 43.548, 15.3403, 13.0537, 7.69818, 10.1093, 0.155245
+# 8.73729, -3.51622, 9.09582, 8.03461, 15.3403, 39.981, 12.7695, 1.16248, 6.13749, -2.08507
+# 16.9486, 10.073, 9.74519, 16.9754, 13.0537, 12.7695, 45.8884, 11.6074, 8.96413, -1.15946
+# 14.4231, 22.0196, 10.9539, 3.13443, 7.69818, 1.16248, 11.6074, 49.4083, 18.9169, 3.15661
+# 17.4969, 19.8151, 12.1204, 9.68292, 10.1093, 6.13749, 8.96413, 18.9169, 68.0228, 1.74104
+# 0.518757, 4.71028, 0.211353, -1.36097, 0.155245, -2.08507, -1.15946, 3.15661, 1.74104, 1.50433
+

Note that when the sampler is run with arguments algorithm=fixed_param, this section will be missing.

+

Data rows containing sampling draws

+

The output file contains the values for the thinned set draws during sampling. Here we specified num_sampling=100 and left thin at the default value \(1\), therefore the next 100 lines are data rows containing the sampler and model parameter values for each sampling iteration.

+
-8.76921,0.796814,0.813694,1,1,0,9.75854,0.535093
+-6.79143,0.979604,0.813694,1,3,0,9.13092,0.214431
+-6.79451,0.955359,0.813694,2,3,0,7.19149,0.289341
+

Timing information

+

Upon successful completion, the sampler writes timing information to the output CSV file as a series of final comment lines:

+
#
+#  Elapsed Time: 0.005 seconds (Warm-up)
+#                0.002 seconds (Sampling)
+#                0.007 seconds (Total)
+#
+
+
+

Diagnostic CSV output file

+

The diagnostic file contains the following:

+
    +
  • Initial comment rows listing full CmdStan argument configuration.
  • +
  • Header row
  • +
  • Data rows containing warmup draws, if run with option save_warmup=1
  • +
  • Sampling draws
  • +
  • Comment rows giving timing information
  • +
+

The columns in this file contain, in order:

+
    +
  • all sampler parameters
  • +
  • all model parameter estimates (on the unconstrained scale)
  • +
  • the latent Hamiltonian for each parameter
  • +
  • the gradient for each parameters
  • +
+

The labels for the latent Hamiltonian columns are the parameter column label with prefix p_ and the labels for the gradient columns are the parameter column label with prefix g_.

+

These are the column labels from the file bernoulli_diagnostic.csv:

+
lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta,p_theta,g_theta
+
+
+

Profiling CSV output file

+

The profiling information is stored in a plain CSV format with no meta information in the comments.

+

Each row represents timing information collected in a profile statement for a given thread. It is possible that some profile statements have only one entry (if they were only executed by one thread) and others have multiple entries (if they were executed by multiple threads).

+

The columns are as follows:

+
    +
  • name, The name of the profile statement that is being timed
  • +
  • thread_id, The thread that executed the profile statement
  • +
  • total_time, The combined time spent executing statements inside the profile which includes calculation with and without automatic differentiation
  • +
  • forward_time, The time spent in the profile statement during the forward pass of a reverse mode automatic differentiation calculation or during a calculation without automatic differentiation
  • +
  • reverse_time, The time spent in the profile statement during the reverse (backward) pass of reverse mode automatic differentiation
  • +
  • chain_stack, The number of objects allocated on the chaining automatic differentiation stack. There is a function call for each of these objects in the reverse pass
  • +
  • no_chain_stack, The number of objects allocated on the non-chaining automatic differentiation stack
  • +
  • autodiff_calls, The total number of times the profile statement was executed with automatic differentiation
  • +
  • no_autodiff_calls - The total number of times the profile statement was executed without automatic differentiation
  • +
+
+
+
+

Optimization output

+
    +
  • Config as comments
  • +
  • Header row
  • +
  • Penalized maximum likelihood estimate
  • +
+
+
+

Variational inference output

+
    +
  • Config as comments
  • +
  • Header row
  • +
  • Adaptation as comments
  • +
  • Variational estimate
  • +
  • Sample draws from estimate of the posterior
  • +
+
+
+

Generate quantities outputs

+
    +
  • Header row
  • +
  • Quantities of interest
  • +
+
+
+

Diagnose method outputs

+
    +
  • Header row
  • +
  • Gradients
  • +
+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/stanc.html b/docs/2_39/cmdstan-guide/stanc.html new file mode 100644 index 000000000..0a8d4f5ff --- /dev/null +++ b/docs/2_39/cmdstan-guide/stanc.html @@ -0,0 +1,1064 @@ + + + + + + + + + +Translating Stan to C++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

stanc: Translating Stan to C++

+

CmdStan translates Stan programs to C++ using the Stan compiler program which is included in the CmdStan release bin directory as program stanc. One can view the complete stanc documentation in the Stan User’s Guide.

+

As of release 2.22, the CmdStan Stan to C++ compiler is written in OCaml. This compiler is called “stanc3” and has has its own repository https://github.com/stan-dev/stanc3, from which pre-built binaries for Linux, Mac, and Windows can be downloaded.

+
+

Instantiating the stanc binary

+

Before the Stan compiler can be used, the binary stanc must be created. This can be done using the makefile as follows. For Mac and Linux:

+
make bin/stanc
+

For Windows:

+
make bin/stanc.exe
+

This is also done as part of the make build command.

+
+
+

The Stan compiler program

+

The Stan compiler program stanc converts Stan programs to C++ concepts. If the compiler encounters syntax errors in the program, it will provide an error message indicating the location in the input where the failure occurred and reason for the failure. The following example illustrates a fully qualified call to stanc to generate the C++ translation of the example model bernoulli.stan. For Linux and Mac:

+
> cd <cmdstan-home>
+> bin/stanc --o=bernoulli.hpp examples/bernoulli/bernoulli.stan
+

For Windows:

+
> cd <cmdstan-home>
+> bin/stanc.exe --o=bernoulli.hpp examples/bernoulli/bernoulli.stan
+

The base name of the Stan program file determines the name of the C++ model class. Because this name is the name of a C++ class, it must start with an alphabetic character (a--z or A--Z) and contain only alphanumeric characters (a--z, A--Z, and 0--9) and underscores (_) and should not conflict with any C++ reserved keyword.

+

The C++ code implementing the class is written to the file bernoulli.hpp in the current directory. The final argument, bernoulli.stan, is the file from which to read the Stan program.

+

In practice, stanc is invoked indirectly, via the GNU Make utility, which contains rules that compile a Stan program to its corresponding executable. To build the simple Bernoulli model via make, we specify the name of the target executable file. On Mac and Linux, this is the name of the Stan program with the .stan omitted. On Windows, replace .stan with .exe, and make sure that the path is given with slashes and not backslashes. For Linux and Mac:

+
> make examples/bernoulli/bernoulli
+

For Windows:

+
> make examples/bernoulli/bernoulli.exe
+

The makefile rules first invoke the stanc compiler to translate the Stan model to C++ , then compiles and links the C++ code to a binary executable. The makefile variable STANCFLAGS can be used to to override the default arguments to stanc, e.g.,

+
> make STANCFLAGS="--include-paths=~/foo" examples/bernoulli/bernoulli
+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/stansummary.html b/docs/2_39/cmdstan-guide/stansummary.html new file mode 100644 index 000000000..e9070745a --- /dev/null +++ b/docs/2_39/cmdstan-guide/stansummary.html @@ -0,0 +1,1212 @@ + + + + + + + + + +Summarizing MCMC Output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

stansummary: MCMC Output Analysis

+

The CmdStan stansummary program reports statistics for one or more sampler chains over all sampler and model parameters and quantities of interest. The statistics reported include both summary statistics of the estimates and diagnostic statistics on the sampler chains, reported in the following order:

+
    +
  • Mean - sample mean
  • +
  • MCSE - Monte Carlo Standard Error, a measure of the amount of noise in the sample
  • +
  • StdDev - sample standard deviation - the standard deviation around the sample mean.
  • +
  • MAD - Median Absolute Deviation - the median absolute deviation around the sample median.
  • +
  • Quantiles - default 5%, 50%, 95%
  • +
  • ESS_bulk
  • +
  • ESS_tail
  • +
  • ESS_bulk/s - Bulk ESS per second
  • +
  • R_hat - \(\hat{R}\) statistic, a MCMC convergence diagnostic
  • +
+

When reviewing the stansummary output, it is important to check the final three output columns first - these are the diagnostic statistics on MCMC convergence and effective sample size. A \(\hat{R}\) statistic of greater than \(1\) indicates potential convergence problems and that the sample is not presentative of the target posterior, thus the estimates of the mean and all other summary statistics are likely to be invalid. A value \(1.01\) can be used as generic threshold to decide whether more iterations or further convergence analysis is needed, but other thresholds can be used depending on the specific use case.

+

Estimation by sampling produces an approximate value for the model parameters; the MCSE statistic indicates the amount of uncertainty in the estimate. Therefore MCSE column is placed next to the sample mean column, in order to make it easy to compare this sample with others.

+

For more information, see the Posterior Analysis chapter of the Stan Reference Manual which describes both the theory and practice of MCMC estimation techniques.

+

The statistics - Mean, StdDev, MAD, and Quantiles - are computed directly from all draws across all chains. The diagnostic statistics - ESS_bulk, ESS_tail, and R_hat are computed from the rank-normalized, folded, and splitted chains according to the definitions by Vehtari et al. (2021). the MCSE statistic is computed using split chain R_hat and autocorrelations. The summary statistics and the algorithms used to compute them are described in sections Notation for draws and Effective sample size.

+
+

Building the stansummary command

+

The CmdStan makefile task build compiles the stansummary utility into the bin directory. It can be compiled directly using the makefile as follows:

+
> cd <cmdstan-home>
+> make bin/stansummary
+
+
+

Running the stansummary program

+

The stansummary utility processes one or more output files from a set of chains from one run of the HMC sampler. To run stansummary on the output file or files generated by a run of the sampler, on Mac or Linux:

+
<cmdstan-home>/bin/stansummary <file_1.csv> ... <file_N.csv>
+

On Windows, use backslashes to call the stansummary.exe.

+
<cmdstan-home>\bin\stansummary.exe <file_1.csv> ... <file_N.csv>
+

For example, after running 4 chains to fit the example model eight_schools.stan to the supplied example data file, we run stansummary on the resulting Stan CSV output files to get the following report:

+
> bin/stansummary eight_*.csv
+Inference for Stan model: eight_schools_model
+4 chains: each with iter=1000; warmup=1000; thin=1; 1000 iterations saved.
+
+Warmup took (0.065, 0.078, 0.080, 0.086) seconds, 0.31 seconds total
+Sampling took (0.047, 0.044, 0.045, 0.053) seconds, 0.19 seconds total
+
+                 Mean   MCSE  StdDev    MAD       5%   50%   95%  ESS_bulk  ESS_tail  ESS_bulk/s  R_hat
+
+lp__              -19   0.31     4.9    5.0      -27   -19   -11       264       275        1396    1.0
+accept_stat__    0.77  0.024    0.31  0.096  6.5e-03  0.93  1.00       243       273        1287    1.0
+stepsize__       0.25    nan   0.016  0.016  2.2e-01  0.25  0.26       nan       nan         nan    nan
+treedepth__       3.4  0.048    0.76   0.00  2.0e+00   4.0   4.0       285       295        1507    1.0
+n_leapfrog__       13   0.80     7.1   0.00  3.0e+00    15    31       220       274        1165    1.0
+divergent__     0.015    nan    0.12   0.00  0.0e+00  0.00  0.00       nan       nan         nan    nan
+energy__           24   0.32     5.4    5.5  1.5e+01    24    33       289       488        1527    1.0
+
+mu                7.8   0.20     5.5    4.9     -1.3   7.7    17       688       915        3641    1.0
+theta[1]           12   0.28     8.7    7.4    -0.36    11    28       908       763        4802    1.0
+theta[2]          7.7   0.19     6.8    6.1     -3.4   7.8    19      1194      2011        6320    1.0
+theta[3]          5.6   0.23     8.5    7.0     -9.1   6.2    18      1260      1723        6669    1.0
+theta[4]          7.5   0.20     7.0    6.5     -4.1   7.6    19      1171      1744        6197    1.0
+theta[5]          4.6   0.21     6.7    6.3     -7.0   4.9    15      1045      1513        5530    1.0
+theta[6]          5.7   0.23     7.2    6.4     -6.8   6.0    17      1012      1626        5354    1.0
+theta[7]           11   0.24     7.1    6.6    0.025    11    24       885       473        4682    1.0
+theta[8]          8.4   0.23     8.5    7.3     -4.8   8.1    23      1280      1848        6773    1.0
+tau               7.8   0.26     5.9    4.5      1.8   6.3    18       248       178        1310    1.0
+
+Samples were drawn using hmc with nuts.
+For each parameter, ESS_bulk and ESS_tail measure the effective sample size for the entire sample (bulk)
+and for the .05 and .95 tails (tail), and R_hat measures the potential scale reduction on split chains.
+At convergence R_hat will be very close to 1.00.
+

The console output information consists of

+
    +
  • Model, chains, and timing summaries
  • +
  • Sampler parameter statistics
  • +
  • Model parameter statistics
  • +
  • Sampling algorithm - either nuts (shown here) or static HMC.
  • +
+

There is one row per parameter and the row order in the summary report corresponds to the column order in the Stan CSV output file. NaN values for some columns are expected if the value doesn’t change, e.g. if there are no divergent transitions.

+
+

Sampler parameters

+

The initial Stan CSV columns provide information on the sampler state for each draw:

+
    +
  • lp__ - the total log probability density (up to an additive constant) at each sample
  • +
  • accept_stat__ - the average Metropolis acceptance probability over each simulated Hamiltonian trajectory
  • +
  • stepsize__ - integrator step size
  • +
  • treedepth__ - depth of tree used by NUTS (NUTS sampler)
  • +
  • n_leapfrog__ - number of leapfrog calculations (NUTS sampler)
  • +
  • divergent__ - has value 1 if trajectory diverged, otherwise 0. (NUTS sampler)
  • +
  • energy__ - value of the Hamiltonian
  • +
  • int_time__ - total integration time (static HMC sampler)
  • +
+

Because we ran the NUTS sampler, the above summary reports sampler parameters treedepth__, n_leapfrog__, and divergent__; the static HMC sampler would report int_time__ instead.

+
+
+

Model parameters and quantities of interest

+

The remaining Stan CSV columns report the values of all parameters, transformed parameters, and generated quantities in the order in which these variables are declared in the Stan program. For container variables, i.e., vector, row_vector, matrix, and array variables, the statistics for each element are reported separately, in row-major order. The eight_schools.stan program parameters block contains the following parameter variable declarations:

+
  real mu;
+  array[J] real theta;
+  real<lower=0> tau;
+

In the example data, J is \(8\); therefore the stansummary listing reports on theta[1] through theta[8].

+
+
+
+

Command-line options

+

The stansummary command syntax provides a set of flags to customize the output which must precede the list of filenames. When invoked with no arguments or with the -h or --help option, the program prints the usage message to the console and exits.

+
Report statistics for one or more Stan CSV files from a HMC sampler run.
+Example:  stansummary model_chain_1.csv model_chain_2.csv
+Options:
+  -a, --autocorr [n]          Display the chain autocorrelation for the n-th
+                              input file, in addition to statistics.
+  -c, --csv_filename [file]   Write statistics to a CSV file.
+  -h, --help                  Produce help message, then exit.
+  -p, --percentiles [values]  Percentiles to report as ordered set of
+                              comma-separated numbers from (0.1,99.9), inclusive.
+                              Default is 5,50,95.
+  -s, --sig_figs [n]          Significant figures reported. Default is 2.
+                              Must be an integer from (1, 18), inclusive.
+  -i, --include_param [name]  Include the named parameter in the summary output.
+                              By default, all parameters in the file are summarized,
+                              passing this argument one or more times will filter
+                              the output down to just the requested arguments.
+

Both short an long option names are allowed. Short names are specified as -<o> <value>; long option names can be specified either as --<option>=<value> or --<option> <value>.

+

The --percentiles argument can also be passed an empty string "", which results in no percentiles being displayed in the output of the command.

+

The amount of precision in the sampler output limits the amount of real precision in the summary report. CmdStan’s command line interface also has output argument sig_figs. The default sampler output precision is 8. The --sig_figs argument to the stansummary program should not exceed the sig_figs argument to the sampler.

+ + + +
+
+ + Back to top

References

+
+Vehtari, Aki, Andrew Gelman, Daniel Simpson, Bob Carpenter, and Paul-Christian Bürkner. 2021. “Rank-Normalization, Folding, and Localization: An Improved \(\widehat{R}\) for Assessing Convergence of MCMC.” Bayesian Analysis 16: 667–718. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/cmdstan-guide/variational_config.html b/docs/2_39/cmdstan-guide/variational_config.html new file mode 100644 index 000000000..96e9ed10f --- /dev/null +++ b/docs/2_39/cmdstan-guide/variational_config.html @@ -0,0 +1,1240 @@ + + + + + + + + + +ADVI for Variational Inference + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Variational Inference using ADVI

+

Stan implements an automatic variational inference algorithm, called Automatic Differentiation Variational Inference (ADVI) Kucukelbir et al. (2017). ADVI uses Monte Carlo integration to approximate the variational objective function, the ELBO (evidence lower bound). ADVI optimizes the ELBO in the real-coordinate space using stochastic gradient ascent. The measures of convergence are similar to the relative tolerance scheme of Stan’s optimization algorithms.

+

The algorithm progression consists of an adaptation phase followed by a sampling phase. The adaptation phase finds a good value for the step size scaling parameter eta. The evidence lower bound (ELBO) is the variational objective function and is evaluated based on a Monte Carlo estimate. The variational inference algorithm in Stan is stochastic, which makes it challenging to assess convergence. The algorithm runs until the mean change in ELBO drops below the specified tolerance.

+

The full set of configuration options available for the variational method is available by using the variational help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the algorithm’s console output and in the output CSV file’s comments.

+

The following is a minimal call to Stan’s variational inference algorithm using defaults for everything but the location of the data file.

+
> ./bernoulli variational data file=bernoulli.data.R
+

Executing this command prints both output to the console and to a csv file.

+

The first part of the console output reports on the configuration used: the default option algorithm=meanfield and the default tolerances for monitoring the algorithm’s convergence.

+
method = variational
+  variational
+    algorithm = meanfield (Default)
+      meanfield
+    iter = 10000 (Default)
+    grad_samples = 1 (Default)
+    elbo_samples = 100 (Default)
+    eta = 1 (Default)
+    adapt
+      engaged = true (Default)
+      iter = 50 (Default)
+    tol_rel_obj = 0.01 (Default)
+    eval_elbo = 100 (Default)
+    output_samples = 1000 (Default)
+id = 1 (Default)
+data
+  file = bernoulli.data.json
+init = 2 (Default)
+random
+  seed = 2790599354 (Default)
+output
+  file = output.csv (Default)
+  diagnostic_file =  (Default)
+  refresh = 100 (Default)
+  sig_figs = 8 (Default)
+  profile_file = profile.csv (Default)
+  save_cmdstan_config = false (Default)
+num_threads = 1 (Default)
+

After the configuration has been displayed, informational and timing messages are output:

+
------------------------------------------------------------
+EXPERIMENTAL ALGORITHM:
+  This procedure has not been thoroughly tested and may be unstable
+  or buggy. The interface is subject to change.
+------------------------------------------------------------
+
+Gradient evaluation took 2.1e-05 seconds
+1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.
+Adjust your expectations accordingly!
+

The rest of the output describes the progression of the algorithm. An adaptation phase finds a good value for the step size scaling parameter eta. The evidence lower bound (ELBO) is the variational objective function and is evaluated based on a Monte Carlo estimate. The variational inference algorithm in Stan is stochastic, which makes it challenging to assess convergence. That is, while the algorithm appears to have converged in \(\sim\) 250 iterations, the algorithm runs for another few thousand iterations until mean change in ELBO drops below the default tolerance of 0.01.

+
Begin eta adaptation.
+Iteration:   1 / 250 [  0%]  (Adaptation)
+Iteration:  50 / 250 [ 20%]  (Adaptation)
+Iteration: 100 / 250 [ 40%]  (Adaptation)
+Iteration: 150 / 250 [ 60%]  (Adaptation)
+Iteration: 200 / 250 [ 80%]  (Adaptation)
+Success! Found best value [eta = 1] earlier than expected.
+
+Begin stochastic gradient ascent.
+  iter             ELBO   delta_ELBO_mean   delta_ELBO_med   notes
+   100           -6.131             1.000            1.000
+   200           -6.458             0.525            1.000
+   300           -6.300             0.359            0.051
+   400           -6.137             0.276            0.051
+   500           -6.243             0.224            0.027
+   600           -6.305             0.188            0.027
+   700           -6.289             0.162            0.025
+   800           -6.402             0.144            0.025
+   900           -6.103             0.133            0.025
+  1000           -6.314             0.123            0.027
+  1100           -6.348             0.024            0.025
+  1200           -6.244             0.020            0.018
+  1300           -6.293             0.019            0.017
+  1400           -6.250             0.017            0.017
+  1500           -6.241             0.015            0.010   MEDIAN ELBO CONVERGED
+
+Drawing a sample of size 1000 from the approximate posterior...
+COMPLETED.
+
+

Variational algorithms

+

Stan implements two variational algorithms. They differ in the approximating distribution used in the unconstrained variable space. By default, ADVI uses option algorithm=meanfield. The algorithm argument specifies the variational algorithm.

+
    +
  • algorithm=meanfield - Use a fully factorized Gaussian for the approximation. This is the default algorithm.

  • +
  • algorithm=fullrank Use a Gaussian with a full-rank covariance matrix for the approximation.

  • +
+
+
+

Configuration

+
    +
  • iter=<int> Maximum number of iterations. Must be \(> 0\). Default is \(10000\).

  • +
  • grad_samples=<int> Number of samples for Monte Carlo estimate of gradients. Must be \(> 0\). Default is \(1\).

  • +
  • elbo_samples=<int> Number of samples for Monte Carlo estimate of ELBO (objective function). Must be \(> 0\). Default is \(100\).

  • +
  • eta=<double> Stepsize weighting parameter for adaptive stepsize sequence. Must be \(> 0\). Default is \(1.0\).

  • +
  • adapt Warmup Adaptation keyword, takes sub-arguments:

    +
      +
    • engaged=<boolean> Adaptation engaged? Valid values: [true, false]. Default is true.

    • +
    • iter=<int> Maximum number of adaptation iterations. Must be \(> 0\). Default is \(50\).

    • +
  • +
  • tol_rel_obj=<double> Convergence tolerance on the relative norm of the objective. Must be \(> 0\). Default is \(0.01\).

  • +
  • eval_elbo=<int> Evaluate ELBO every Nth iteration. Must be \(> 0\). Default is 100.

  • +
  • output_samples=<int> Number of posterior samples to draw and save. Must be \(> 0\). Default is 1000.

  • +
+
+
+

CSV output

+

The output file consists of the following pieces of information:

+
    +
  • The full set of configuration options available for the variational method is reported at the beginning of the sampler output file as CSV comments.

  • +
  • The first three output columns are labelled lp__, log_p__, log_g__, the rest are the model parameters.

  • +
  • The stepsize adaptation information is output as CSV comments following column header row.

  • +
  • The following line contains the mean of the variational approximation.

  • +
  • The rest of the output contains output_samples number of draws sampled from the variational approximation.

  • +
+

To illustrate, we call Stan’s variational inference on the example model and data:

+
> ./bernoulli variational data file=bernoulli.data.R
+

By default, the output file is output.csv.

+

The output follows the same pattern as the output for sampling, first dumping the entire set of parameters used as CSV comments:

+
# stan_version_major = 2
+# stan_version_minor = 23
+# stan_version_patch = 0
+# model = bernoulli_model
+# method = variational
+#   variational
+#     algorithm = meanfield (Default)
+#       meanfield
+#     iter = 10000 (Default)
+#     grad_samples = 1 (Default)
+#     elbo_samples = 100 (Default)
+#     eta = 1 (Default)
+#     adapt
+#       engaged = true (Default)
+#       iter = 50 (Default)
+#     tol_rel_obj = 0.01 (Default)
+#     eval_elbo = 100 (Default)
+#     output_samples = 1000 (Default)
+...
+

Next, the column header row:

+
lp__,log_p__,log_g__,theta
+

Additional comments provide stepsize adaptation information:

+
# Stepsize adaptation complete.
+# eta = 1
+

Followed by the data rows. The first line is special — it is the mean of the variational approximation.

+
0,0,0,0.214911
+

That is, the estimate for theta given the data is 0.2.

+

The rest of the output contains output_samples number of draws samples from the variational approximation.

+

The following is a sample based on this approximation:

+
0,-14.0252,-5.21718,0.770397
+0,-7.05063,-0.10025,0.162061
+0,-6.75031,-0.0191099,0.241606
+...
+

The header indicates the unnormalized log probability with lp__. This is a legacy feature that we do not use for variational inference. The ELBO is not stored unless a diagnostic option is given.

+ + + +
+
+ + Back to top

References

+
+Kucukelbir, Alp, Dustin Tran, Rajesh Ranganath, Andrew Gelman, and David M Blei. 2017. “Automatic Differentiation Variational Inference.” Journal of Machine Learning Research. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference-2_39.pdf b/docs/2_39/functions-reference-2_39.pdf new file mode 100644 index 000000000..4a0ef0f1a Binary files /dev/null and b/docs/2_39/functions-reference-2_39.pdf differ diff --git a/docs/2_39/functions-reference/array_operations.html b/docs/2_39/functions-reference/array_operations.html new file mode 100644 index 000000000..dc0b99ca2 --- /dev/null +++ b/docs/2_39/functions-reference/array_operations.html @@ -0,0 +1,1507 @@ + + + + + + + + + +Array Operations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Array Operations

+
+

Reductions

+

The following operations take arrays as input and produce single output values. The boundary values for size 0 arrays are the unit with respect to the combination operation (min, max, sum, or product).

+
+

Minimum and maximum

+ +

+

real min(array[] real x)
The minimum value in x, or \(+\infty\) if x is size 0.

+Available since 2.0 + +

+

int min(array[] int x)
The minimum value in x, or error if x is size 0.

+Available since 2.0 + +

+

real max(array[] real x)
The maximum value in x, or \(-\infty\) if x is size 0.

+Available since 2.0 + +

+

int max(array[] int x)
The maximum value in x, or error if x is size 0.

+Available since 2.0 +
+
+

Sum, product, and log sum of exp

+ +

+

int sum(array[] int x)
The sum of the elements in x, or 0 if the array is empty.

+Available since 2.1 + +

+

real sum(array[] real x)
The sum of the elements in x; see definition above.

+Available since 2.0 + +

+

complex sum(array[] complex x)
The sum of the elements in x; see definition above.

+Available since 2.30 + +

+

real prod(array[] real x)
The product of the elements in x, or 1 if x is size 0.

+Available since 2.0 + +

+

real prod(array[] int x)
The product of the elements in x, \[\begin{equation*} +\text{product}(x) = \begin{cases} +\prod_{n=1}^N x_n & \text{if} N > 0 \\[4pt] 1 & \text{if} N = 0 +\end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

real log_sum_exp(array[] real x)
The natural logarithm of the sum of the exponentials of the elements in x, or \(-\infty\) if the array is empty.

+Available since 2.0 +
+
+

Sample mean, variance, and standard deviation

+

The sample mean, variance, and standard deviation are calculated in the usual way. For i.i.d. draws from a distribution of finite mean, the sample mean is an unbiased estimate of the mean of the distribution. Similarly, for i.i.d. draws from a distribution of finite variance, the sample variance is an unbiased estimate of the variance.1 The sample deviation is defined as the square root of the sample deviation, but is not unbiased.

+ +

+

real mean(array[] real x)
The sample mean of the elements in x. For an array \(x\) of size \(N > +0\), \[\begin{equation*} +\text{mean}(x) \ = \ \bar{x} \ = \ \frac{1}{N} \sum_{n=1}^N +x_n. +\end{equation*}\] It is an error to the call the mean function with an array of size \(0\).

+Available since 2.0 + +

+

real variance(array[] real x)
The sample variance of the elements in x. For \(N > 0\), \[\begin{equation*} +\text{variance}(x) \ = \ \begin{cases} \frac{1}{N-1} \sum_{n=1}^N (x_n +- \bar{x})^2 & \text{if } N > 1 \\[4pt] 0 & \text{if } N = 1 +\end{cases} +\end{equation*}\] It is an error to call the variance function with an array of size 0.

+Available since 2.0 + +

+

real sd(array[] real x)
The sample standard deviation of elements in x. \[\begin{equation*} +\text{sd}(x) = +\begin{cases} \sqrt{\, \text{variance}(x)} & \text{if } N > 1 \\[4pt] +0 & \text{if } N = 0 \end{cases} +\end{equation*}\] It is an error to call the sd function with an array of size 0.

+Available since 2.0 +
+
+

Norms

+ +

+

real norm1(vector x)
The L1 norm of x, defined by \[\begin{equation*} +\text{norm1}(x) \ = \ \textstyle \sum_{n=1}^N (|x_n|) +\end{equation*}\] where N is the size of x.

+Available since 2.30 + +

+

real norm1(row_vector x)
The L1 norm of x

+Available since 2.30 + +

+

real norm1(array[] real x)
The L1 norm of x

+Available since 2.30 + +

+

real norm2(vector x)
The L2 norm of x, defined by \[\begin{equation*} +\text{norm2}(x) \ = \ \sqrt{\textstyle \sum_{n=1}^N (x_n)^2} +\end{equation*}\] where N is the size of x

+Available since 2.30 + +

+

real norm2(row_vector x)
The L2 norm of x

+Available since 2.30 + +

+

real norm2(array[] real x)
The L2 norm of x

+Available since 2.30 +
+
+

Euclidean distance and squared distance

+ +

+

real distance(vector x, vector y)
The Euclidean distance between x and y, defined by \[\begin{equation*} +\text{distance}(x,y) \ = \ \sqrt{\textstyle \sum_{n=1}^N (x_n - y_n)^2} +\end{equation*}\] where N is the size of x and y. It is an error to call distance with arguments of unequal size.

+Available since 2.2 + +

+

real distance(vector x, row_vector y)
The Euclidean distance between x and y

+Available since 2.2 + +

+

real distance(row_vector x, vector y)
The Euclidean distance between x and y

+Available since 2.2 + +

+

real distance(row_vector x, row_vector y)
The Euclidean distance between x and y

+Available since 2.2 + +

+

real squared_distance(vector x, vector y)
The squared Euclidean distance between x and y, defined by \[\begin{equation*} +\mathrm{squared\_distance}(x,y) \ = \ \text{distance}(x,y)^2 \ = \ \textstyle \sum_{n=1}^N (x_n - y_n)^2, +\end{equation*}\] where N is the size of x and y. It is an error to call squared_distance with arguments of unequal size.

+Available since 2.7 + +

+

real squared_distance(vector x, row_vector y)
The squared Euclidean distance between x and y

+Available since 2.26 + +

+

real squared_distance(row_vector x, vector y)
The squared Euclidean distance between x and y

+Available since 2.26 + +

+

real squared_distance(row_vector x, row_vector y)
The Euclidean distance between x and y

+Available since 2.26 +
+
+

Quantile

+

Produces sample quantiles corresponding to the given probabilities. The smallest observation corresponds to a probability of 0 and the largest to a probability of 1.

+

Implements algorithm 7 from Hyndman, R. J. and Fan, Y., Sample quantiles in Statistical Packages (R’s default quantile function).

+ +

+

real quantile(data array[] real x, data real p)
The p-th quantile of x

+Available since 2.27 + +

+

array[] real quantile(data array[] real x, data array[] real p)
An array containing the quantiles of x given by the array of probabilities p

+Available since 2.27 +
+
+
+

Array size and dimension function

+

The size of an array or matrix can be obtained using the dims() function. The dims() function is defined to take an argument consisting of any variable with up to 8 array dimensions (and up to 2 additional matrix dimensions) and returns an array of integers with the dimensions. For example, if two variables are declared as follows,

+
 array[7, 8, 9] real x;
+ array[7] matrix[8, 9] y;
+

then calling dims(x) or dims(y) returns an integer array of size 3 containing the elements 7, 8, and 9 in that order.

+

The size() function extracts the number of elements in an array. This is just the top-level elements, so if the array is declared as

+
 array[M, N] real a;
+

the size of a is M.

+

The function num_elements, on the other hand, measures all of the elements, so that the array a above has \(M \times N\) elements.

+

The specialized functions rows() and cols() should be used to extract the dimensions of vectors and matrices.

+ +

+

array[] int dims(T x)
Return an integer array containing the dimensions of x; the type of the argument T can be any Stan type with up to 8 array dimensions.

+Available since 2.0 + +

+

int num_elements(array[] T x)
Return the total number of elements in the array x including all elements in contained arrays, vectors, and matrices. T can be any array type. For example, if x is of type array[4, 3] real then num_elements(x) is 12, and if y is declared as array[5] matrix[3, 4] y, then size(y) evaluates to 60.

+Available since 2.5 + +

+

int size(array[] T x)
Return the number of elements in the array x; the type of the array T can be any type, but the size is just the size of the top level array, not the total number of elements contained. For example, if x is of type array[4, 3] real then size(x) is 4.

+Available since 2.0 +
+
+

Array broadcasting

+

The following operations create arrays by repeating elements to fill an array of a specified size. These operations work for all input types T, including reals, integers, vectors, row vectors, matrices, or arrays.

+ +

+

array[] T rep_array(T x, int n)
Return the n array with every entry assigned to x.

+Available since 2.0 + +

+

array [,] T rep_array(T x, int m, int n)
Return the m by n array with every entry assigned to x.

+Available since 2.0 + +

+

array[,,] T rep_array(T x, int k, int m, int n)
Return the k by m by n array with every entry assigned to x.

+Available since 2.0 +

For example, rep_array(1.0,5) produces a real array (type array[] real) of size 5 with all values set to 1.0. On the other hand, rep_array(1,5) produces an integer array (type array[] int) of size 5 with all values set to 1. This distinction is important because it is not possible to assign an integer array to a real array. For example, the following example contrasts legal with illegal array creation and assignment

+
 array[5] real y;
+ array[5] int x;
+
+ x = rep_array(1, 5);     // ok
+ y = rep_array(1.0, 5);   // ok
+
+ x = rep_array(1.0, 5);   // illegal
+ y = rep_array(1, 5);     // illegal
+
+ x = y;                  // illegal
+ y = x;                  // illegal
+

If the value being repeated v is a vector (i.e., T is vector), then rep_array(v, 27) is a size 27 array consisting of 27 copies of the vector v.

+
 vector[5] v;
+ array[3] vector[5] a;
+
+ a = rep_array(v, 3);  // fill a with copies of v
+ a[2, 4] = 9.0;        // v[4], a[1, 4], a[3, 4] unchanged
+

If the type T of x is itself an array type, then the result will be an array with one, two, or three added dimensions, depending on which of the rep_array functions is called. For instance, consider the following legal code snippet.

+
 array[5, 6] real a;
+ array[3, 4, 5, 6] real b;
+
+ b = rep_array(a, 3, 4); //  make (3 x 4) copies of a
+ b[1, 1, 1, 1] = 27.9;    //  a[1, 1] unchanged
+

After the assignment to b, the value for b[j, k, m, n] is equal to a[m, n] where it is defined, for j in 1:3, k in 1:4, m in 1:5, and n in 1:6.

+
+
+

Array concatenation

+ +

+

T append_array(T x, T y)
Return the concatenation of two arrays in the order of the arguments. T must be an N-dimensional array of any Stan type (with a maximum N of 7). All dimensions but the first must match.

+Available since 2.18 +

For example, the following code appends two three dimensional arrays of matrices together. Note that all dimensions except the first match. Any mismatches will cause an error to be thrown.

+
 array[2, 1, 7] matrix[4, 6] x1;
+ array[3, 1, 7] matrix[4, 6] x2;
+ array[5, 1, 7] matrix[4, 6] x3;
+
+ x3 = append_array(x1, x2);
+
+
+

Sorting functions

+

Sorting can be used to sort values or the indices of those values in either ascending or descending order. For example, if v is declared as a real array of size 3, with values \[\begin{equation*} +\text{v} = (1, -10.3, +20.987), +\end{equation*}\] then the various sort routines produce \[\begin{eqnarray*} +\mathrm{sort\_asc(v)} & = & (-10.3,1,20.987) \\[4pt] +\mathrm{sort\_desc(v)} & = & (20.987,1,-10.3) \\[4pt] +\mathrm{sort\_indices\_asc(v)} & = & (2,1,3) \\[4pt] +\mathrm{sort\_indices\_desc(v)} & = & (3,1,2) +\end{eqnarray*}\]

+ +

+

array[] real sort_asc(array[] real v)
Sort the elements of v in ascending order

+Available since 2.0 + +

+

array[] int sort_asc(array[] int v)
Sort the elements of v in ascending order

+Available since 2.0 + +

+

array[] real sort_desc(array[] real v)
Sort the elements of v in descending order

+Available since 2.0 + +

+

array[] int sort_desc(array[] int v)
Sort the elements of v in descending order

+Available since 2.0 + +

+

array[] int sort_indices_asc(array[] real v)
Return an array of indices between 1 and the size of v, sorted to index v in ascending order.

+Available since 2.3 + +

+

array[] int sort_indices_asc(array[] int v)
Return an array of indices between 1 and the size of v, sorted to index v in ascending order.

+Available since 2.3 + +

+

array[] int sort_indices_desc(array[] real v)
Return an array of indices between 1 and the size of v, sorted to index v in descending order.

+Available since 2.3 + +

+

array[] int sort_indices_desc(array[] int v)
Return an array of indices between 1 and the size of v, sorted to index v in descending order.

+Available since 2.3 + +

+

int rank(array[] real v, int s)
Number of components of v less than v[s]

+Available since 2.0 + +

+

int rank(array[] int v, int s)
Number of components of v less than v[s]

+Available since 2.0 +
+
+

Reversing functions

+

Stan provides functions to create a new array by reversing the order of elements in an existing array. For example, if v is declared as a real array of size 3, with values \[\begin{equation*} +\text{v} = (1,\, -10.3,\, 20.987), +\end{equation*}\] then \[\begin{equation*} +\mathrm{reverse(v)} = (20.987,\, -10.3,\, 1). +\end{equation*}\]

+ +

+

array[] T reverse(array[] T v)
Return a new array containing the elements of the argument in reverse order.

+Available since 2.23 + + +
+
+ + + Back to top

Footnotes

+ +
    +
  1. Dividing by \(N\) rather than \((N-1)\) produces a maximum likelihood estimate of variance, which is biased to underestimate variance.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/binary_distributions.html b/docs/2_39/functions-reference/binary_distributions.html new file mode 100644 index 000000000..18a7b19d6 --- /dev/null +++ b/docs/2_39/functions-reference/binary_distributions.html @@ -0,0 +1,1331 @@ + + + + + + + + + +Binary Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Binary Distributions

+

Binary probability distributions have support on \(\{0,1\}\), where 1 represents the value true and 0 the value false.

+
+

Bernoulli distribution

+
+

Probability mass function

+

If \(\theta \in [0,1]\), then for \(y \in \{0,1\}\), \[\begin{equation*} +\text{Bernoulli}(y~|~\theta) = \left\{ \begin{array}{ll} \theta & +\text{if } y = 1, \text{ and} \\ 1 - \theta & \text{if } y = 0. +\end{array} \right. +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ bernoulli(theta)

+

Increment target log probability density with bernoulli_lupmf(y | theta).

+Available since 2.0 + +

+
+
+

Stan Functions

+ +

+

real bernoulli_lpmf(ints y | reals theta)
The log Bernoulli probability mass of y given chance of success theta

+Available since 2.12 + +

+

real bernoulli_lupmf(ints y | reals theta)
The log Bernoulli probability mass of y given chance of success theta dropping constant additive terms

+Available since 2.25 + +

+

real bernoulli_cdf(ints y | reals theta)
The Bernoulli cumulative distribution function of y given chance of success theta

+Available since 2.0 + +

+

real bernoulli_lcdf(ints y | reals theta)
The log of the Bernoulli cumulative distribution function of y given chance of success theta

+Available since 2.12 + +

+

real bernoulli_lccdf(ints y | reals theta)
The log of the Bernoulli complementary cumulative distribution function of y given chance of success theta

+Available since 2.12 + +

+

ints bernoulli_rng(reals theta)
Generate a Bernoulli variate with chance of success theta or an array of Bernoulli variates given an array of thetas of the same dimensions; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Bernoulli distribution, logit parameterization

+

Stan also supplies a direct parameterization in terms of a logit-transformed chance-of-success parameter. This parameterization is more numerically stable if the chance-of-success parameter is on the logit scale, as with the linear predictor in a logistic regression.

+
+

Probability mass function

+

If \(\alpha \in \mathbb{R}\), then for \(y \in \{0,1\}\), \[\begin{equation*} +\text{BernoulliLogit}(y~|~\alpha) = \text{Bernoulli}(y | +\text{logit}^{-1}(\alpha)) = \left\{ \begin{array}{ll} +\text{logit}^{-1}(\alpha) & \text{if } y = 1, \text{ and} \\ 1 - +\text{logit}^{-1}(\alpha) & \text{if } y = 0. \end{array} \right. +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ bernoulli_logit(alpha)

+

Increment target log probability density with bernoulli_logit_lupmf(y | alpha).

+Available since 2.0 + +

+
+
+

Stan Functions

+ +

+

real bernoulli_logit_lpmf(ints y | reals alpha)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha)

+Available since 2.12 + +

+

real bernoulli_logit_lupmf(ints y | reals alpha)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha) dropping constant additive terms

+Available since 2.25 + +

+

R bernoulli_logit_rng(reals alpha)
Generate a Bernoulli variate with chance of success \(\text{logit}^{-1}(\alpha)\); may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Bernoulli-logit generalized linear model (Logistic Regression)

+

Stan also supplies a single function for a generalized linear model with Bernoulli distribution and logit link function, i.e. a function for a logistic regression. This provides a more efficient implementation of logistic regression than a manually written regression in terms of a Bernoulli distribution and matrix multiplication.

+
+

Probability mass function

+

If \(x\in \mathbb{R}^{n\cdot m}, \alpha \in \mathbb{R}^n, \beta\in +\mathbb{R}^m\), then for \(y \in {\{0,1\}}^n\), \[\begin{align*} +&\text{BernoulliLogitGLM}(y~|~x, \alpha, \beta) = \prod_{1\leq i \leq +n}\text{Bernoulli}(y_i~|~\text{logit}^{-1}(\alpha_i + x_i\cdot +\beta))\\ &= \prod_{1\leq i \leq n} \left\{ \begin{array}{ll} +\text{logit}^{-1}(\alpha_i + \sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) +& \text{if } y_i = 1, \text{ and} \\ 1 - \text{logit}^{-1}(\alpha_i + +\sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) & \text{if } y_i = 0. +\end{array} \right. \end{align*}\]

+
+
+

Distribution statement

+

y ~ bernoulli_logit_glm(x, alpha, beta)

+

Increment target log probability density with bernoulli_logit_glm_lupmf(y | x, alpha, beta).

+Available since 2.25 + +

+
+
+

Stan Functions

+ +

+

real bernoulli_logit_glm_lpmf(int y | matrix x, real alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).

+Available since 2.23 + +

+

real bernoulli_logit_glm_lupmf(int y | matrix x, real alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.25 + +

+

real bernoulli_logit_glm_lpmf(int y | matrix x, vector alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).

+Available since 2.23 + +

+

real bernoulli_logit_glm_lupmf(int y | matrix x, vector alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.25 + +

+

real bernoulli_logit_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).

+Available since 2.23 + +

+

real bernoulli_logit_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.25 + +

+

real bernoulli_logit_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).

+Available since 2.23 + +

+

real bernoulli_logit_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.25 + +

+

real bernoulli_logit_glm_lpmf(array[] int y | matrix x, real alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).

+Available since 2.18 + +

+

real bernoulli_logit_glm_lupmf(array[] int y | matrix x, real alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.25 + +

+

real bernoulli_logit_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).

+Available since 2.18 + +

+

real bernoulli_logit_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta)
The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.25 + +

+

array[] int bernoulli_logit_glm_rng(matrix x, vector alpha, vector beta)
Generate an array of Bernoulli variates with chances of success inv_logit(alpha + x * beta); may only be used in transformed data and generated quantities blocks.

+Available since 2.29 + +

+

array[] int bernoulli_logit_glm_rng(row_vector x, vector alpha, vector beta)
Generate an array of Bernoulli variates with chances of success inv_logit(alpha + x * beta); may only be used in transformed data and generated quantities blocks.

+Available since 2.29 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/bounded_continuous_distributions.html b/docs/2_39/functions-reference/bounded_continuous_distributions.html new file mode 100644 index 000000000..eda015ead --- /dev/null +++ b/docs/2_39/functions-reference/bounded_continuous_distributions.html @@ -0,0 +1,1197 @@ + + + + + + + + + +Bounded Continuous Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Bounded Continuous Distributions

+

The bounded continuous probabilities have support on a finite interval of real numbers.

+
+

Uniform distribution

+
+

Probability density function

+

If \(\alpha \in \mathbb{R}\) and \(\beta \in (\alpha,\infty)\), then for \(y \in [\alpha,\beta]\), \[\begin{equation*} \text{Uniform}(y|\alpha,\beta) = +\frac{1}{\beta - \alpha} . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ uniform(alpha, beta)

+

Increment target log probability density with uniform_lupdf(y | alpha, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real uniform_lpdf(reals y | reals alpha, reals beta)
The log of the uniform density of y given lower bound alpha and upper bound beta

+Available since 2.12 + +

+

real uniform_lupdf(reals y | reals alpha, reals beta)
The log of the uniform density of y given lower bound alpha and upper bound beta dropping constant additive terms

+Available since 2.25 + +

+

real uniform_cdf(reals y | reals alpha, reals beta)
The uniform cumulative distribution function of y given lower bound alpha and upper bound beta

+Available since 2.0 + +

+

real uniform_lcdf(reals y | reals alpha, reals beta)
The log of the uniform cumulative distribution function of y given lower bound alpha and upper bound beta

+Available since 2.12 + +

+

real uniform_lccdf(reals y | reals alpha, reals beta)
The log of the uniform complementary cumulative distribution function of y given lower bound alpha and upper bound beta

+Available since 2.12 + +

+

R uniform_rng(reals alpha, reals beta)
Generate a uniform variate with lower bound alpha and upper bound beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/bounded_discrete_distributions.html b/docs/2_39/functions-reference/bounded_discrete_distributions.html new file mode 100644 index 000000000..36da4d4c7 --- /dev/null +++ b/docs/2_39/functions-reference/bounded_discrete_distributions.html @@ -0,0 +1,1768 @@ + + + + + + + + + +Bounded Discrete Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Bounded Discrete Distributions

+

Bounded discrete probability functions have support on \(\{ 0, \ldots, +N \}\) for some upper bound \(N\).

+
+

Binomial distribution

+
+

Probability mass function

+

Suppose \(N \in \mathbb{N}\) and \(\theta \in [0,1]\), and \(n \in +\{0,\ldots,N\}\). \[\begin{equation*} \text{Binomial}(n~|~N,\theta) = \binom{N}{n} +\theta^n (1 - \theta)^{N - n}. \end{equation*}\]

+
+
+

Log probability mass function

+

\[\begin{eqnarray*} \log \text{Binomial}(n~|~N,\theta) & = & \log +\Gamma(N+1) - \log \Gamma(n + 1) - \log \Gamma(N- n + 1) \\[4pt] & & { +} + n \log \theta + (N - n) \log (1 - \theta), \end{eqnarray*}\]

+
+
+

Gradient of log probability mass function

+

\[\begin{equation*} \frac{\partial}{\partial \theta} \log \text{Binomial}(n~|~N,\theta) += \frac{n}{\theta} - \frac{N - n}{1 - \theta} \end{equation*}\]

+
+
+

Distribution statement

+

n ~ binomial(N, theta)

+

Increment target log probability density with binomial_lupmf(n | N, theta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real binomial_lpmf(ints n | ints N, reals theta)
The log binomial probability mass of n successes in N trials given chance of success theta

+Available since 2.12 + +

+

real binomial_lupmf(ints n | ints N, reals theta)
The log binomial probability mass of n successes in N trials given chance of success theta dropping constant additive terms

+Available since 2.25 + +

+

real binomial_cdf(ints n | ints N, reals theta)
The binomial cumulative distribution function of n successes in N trials given chance of success theta

+Available since 2.0 + +

+

real binomial_lcdf(ints n | ints N, reals theta)
The log of the binomial cumulative distribution function of n successes in N trials given chance of success theta

+Available since 2.12 + +

+

real binomial_lccdf(ints n | ints N, reals theta)
The log of the binomial complementary cumulative distribution function of n successes in N trials given chance of success theta

+Available since 2.12 + +

+

R binomial_rng(ints N, reals theta)
Generate a binomial variate with N trials and chance of success theta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Binomial distribution, logit parameterization

+

Stan also provides a version of the binomial probability mass function distribution with the chance of success parameterized on the unconstrained logistic scale.

+
+

Probability mass function

+

Suppose \(N \in \mathbb{N}\), \(\alpha \in \mathbb{R}\), and \(n \in +\{0,\ldots,N\}\). Then \[\begin{eqnarray*} +\text{BinomialLogit}(n~|~N,\alpha) & = & +\text{Binomial}(n~|~N,\text{logit}^{-1}(\alpha)) \\[6pt] & = & +\binom{N}{n} \left( \text{logit}^{-1}(\alpha) \right)^{n} \left( 1 - +\text{logit}^{-1}(\alpha) \right)^{N - n}. \end{eqnarray*}\]

+
+
+

Log probability mass function

+

\[\begin{eqnarray*} \log \text{BinomialLogit}(n~|~N,\alpha) & = & \log +\Gamma(N+1) - \log \Gamma(n + 1) - \log \Gamma(N- n + 1) \\[4pt] & & +{ } + n \log \text{logit}^{-1}(\alpha) + (N - n) \log \left( 1 - +\text{logit}^{-1}(\alpha) \right), \end{eqnarray*}\]

+
+
+

Gradient of log probability mass function

+

\[\begin{equation*} \frac{\partial}{\partial \alpha} \log +\text{BinomialLogit}(n~|~N,\alpha) = +\frac{n}{\text{logit}^{-1}(-\alpha)} - \frac{N - +n}{\text{logit}^{-1}(\alpha)} \end{equation*}\]

+
+
+

Distribution statement

+

n ~ binomial_logit(N, alpha)

+

Increment target log probability density with binomial_logit_lupmf(n | N, alpha).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real binomial_logit_lpmf(ints n | ints N, reals alpha)
The log binomial probability mass of n successes in N trials given logit-scaled chance of success alpha

+Available since 2.12 + +

+

real binomial_logit_lupmf(ints n | ints N, reals alpha)
The log binomial probability mass of n successes in N trials given logit-scaled chance of success alpha dropping constant additive terms

+Available since 2.25 +
+
+
+

Binomial-logit generalized linear model (Logistic Regression)

+

Stan also supplies a single function for a generalized linear model with binomial distribution and logit link function, i.e., a function for logistic regression with aggregated outcomes. This provides a more efficient implementation of logistic regression than a manually written regression in terms of a binomial distribution and matrix multiplication.

+
+

Probability mass function

+

Suppose \(N \in \mathbb{N}\), \(x\in \mathbb{R}^{n\cdot m}, \alpha \in \mathbb{R}^n, \beta \in \mathbb{R}^m\), and \(n \in +\{0,\ldots,N\}\). Then \[\begin{align*} + &\text{BinomialLogitGLM}(n~|~N, x, \alpha, \beta) = \text{Binomial}(n~|~N,\text{logit}^{-1}(\alpha_i + x_i \cdot \beta)) \\ + &= \binom{N}{n} \left( \text{logit}^{-1}(\alpha_i + \sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) \right)^{n} \left( 1 - \text{logit}^{-1}(\alpha_i + \sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) \right)^{N - n}. +\end{align*}\]

+
+
+

Distribution statement

+

n ~ binomial_logit_glm(N, x, alpha, beta)

+

Increment target log probability density with binomial_logit_glm_lupmf(n | N, x, alpha, beta).

+Available since 2.34 + +

+
+
+

Stan Functions

+ +

+

real binomial_logit_glm_lpmf(int n | int N, matrix x, real alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).

+Available since 2.34 + +

+

real binomial_logit_glm_lupmf(int n | int N, matrix x, real alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.34 + +

+

real binomial_logit_glm_lpmf(int n | int N, matrix x, vector alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).

+Available since 2.34 + +

+

real binomial_logit_glm_lupmf(int n | int N, matrix x, vector alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.34 + +

+

real binomial_logit_glm_lpmf(array[] int n | array[] int N, row_vector x, real alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).

+Available since 2.34 + +

+

real binomial_logit_glm_lupmf(array[] int n | array[] int N, row_vector x, real alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.34 + +

+

real binomial_logit_glm_lpmf(array[] int n | array[] int N, row_vector x, vector alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).

+Available since 2.34 + +

+

real binomial_logit_glm_lupmf(array[] int n | array[] int N, row_vector x, vector alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.34 + +

+

real binomial_logit_glm_lpmf(array[] int n | array[] int N, matrix x, real alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).

+Available since 2.34 + +

+

real binomial_logit_glm_lupmf(array[] int n | array[] int N, matrix x, real alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.34 + +

+

real binomial_logit_glm_lpmf(array[] int n | array[] int N, matrix x, vector alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).

+Available since 2.34 + +

+

real binomial_logit_glm_lupmf(array[] int n | array[] int N, matrix x, vector alpha, vector beta)
The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.

+Available since 2.34 +
+
+
+

Beta-binomial distribution

+
+

Probability mass function

+

If \(N \in \mathbb{N}\), \(\alpha \in \mathbb{R}^+\), and \(\beta \in +\mathbb{R}^+\), then for \(n \in {0,\ldots,N}\), \[\begin{equation*} +\text{BetaBinomial}(n~|~N,\alpha,\beta) = \binom{N}{n} +\frac{\mathrm{B}(n+\alpha, N -n + \beta)}{\mathrm{B}(\alpha,\beta)}, +\end{equation*}\] where the beta function \(\mathrm{B}(u,v)\) is defined for \(u \in +\mathbb{R}^+\) and \(v \in \mathbb{R}^+\) by \[\begin{equation*} \mathrm{B}(u,v) = +\frac{\Gamma(u) \ \Gamma(v)}{\Gamma(u + v)}. \end{equation*}\]

+
+
+

Distribution statement

+

n ~ beta_binomial(N, alpha, beta)

+

Increment target log probability density with beta_binomial_lupmf(n | N, alpha, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real beta_binomial_lpmf(ints n | ints N, reals alpha, reals beta)
The log beta-binomial probability mass of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta

+Available since 2.12 + +

+

real beta_binomial_lupmf(ints n | ints N, reals alpha, reals beta)
The log beta-binomial probability mass of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta dropping constant additive terms

+Available since 2.25 + +

+

real beta_binomial_cdf(ints n | ints N, reals alpha, reals beta)
The beta-binomial cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta

+Available since 2.0 + +

+

real beta_binomial_lcdf(ints n | ints N, reals alpha, reals beta)
The log of the beta-binomial cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta

+Available since 2.12 + +

+

real beta_binomial_lccdf(ints n | ints N, reals alpha, reals beta)
The log of the beta-binomial complementary cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta

+Available since 2.12 + +

+

R beta_binomial_rng(ints N, reals alpha, reals beta)
Generate a beta-binomial variate with N trials, prior success count (plus one) of alpha, and prior failure count (plus one) of beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Hypergeometric distribution

+
+

Probability mass function

+

If \(a \in \mathbb{N}\), \(b \in \mathbb{N}\), and \(N \in +\{0,\ldots,a+b\}\), then for \(n \in \{\max(0,N-b),\ldots,\min(a,N)\}\), \[\begin{equation*} \text{Hypergeometric}(n~|~N,a,b) = \frac{\normalsize{\binom{a}{n} +\binom{b}{N - n}}} {\normalsize{\binom{a + b}{N}}}. \end{equation*}\]

+
+
+

Distribution statement

+

n ~ hypergeometric(N, a, b)

+

Increment target log probability density with hypergeometric_lupmf(n | N, a, b).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real hypergeometric_lpmf(int n | int N, int a, int b)
The log hypergeometric probability mass of n successes in N trials given total success count of a and total failure count of b

+Available since 2.12 + +

+

real hypergeometric_lupmf(int n | int N, int a, int b)
The log hypergeometric probability mass of n successes in N trials given total success count of a and total failure count of b dropping constant additive terms

+Available since 2.25 + +

+

int hypergeometric_rng(int N, int a, int b)
Generate a hypergeometric variate with N trials, total success count of a, and total failure count of b; may only be used in transformed data and generated quantities blocks

+Available since 2.18 +
+
+
+

Categorical distribution

+
+

Probability mass functions

+

If \(N \in \mathbb{N}\), \(N > 0\), and if \(\theta \in \mathbb{R}^N\) forms an \(N\)-simplex (i.e., has nonnegative entries summing to one), then for \(y \in \{1,\ldots,N\}\), \[\begin{equation*} \text{Categorical}(y~|~\theta) = +\theta_y. \end{equation*}\] In addition, Stan provides a log-odds scaled categorical distribution, \[\begin{equation*} \text{CategoricalLogit}(y~|~\beta) = +\text{Categorical}(y~|~\text{softmax}(\beta)). \end{equation*}\] See the definition of softmax for the definition of the softmax function.

+
+
+

Distribution statement

+

y ~ categorical(theta)

+

Increment target log probability density with categorical_lupmf(y | theta) dropping constant additive terms.

+Available since 2.0 + +

+
+
+

Distribution statement

+

y ~ categorical_logit(beta)

+

Increment target log probability density with categorical_logit_lupmf(y | beta).

+Available since 2.4 + +

+
+
+

Stan functions

+

All of the categorical distributions are vectorized so that the outcome y can be a single integer (type int) or an array of integers (type array[] int).

+ +

+

real categorical_lpmf(ints y | vector theta)
The log categorical probability mass function with outcome(s) y in \(1:N\) given \(N\)-vector of outcome probabilities theta. The parameter theta must have non-negative entries that sum to one, but it need not be a variable declared as a simplex.

+Available since 2.12 + +

+

real categorical_lupmf(ints y | vector theta)
The log categorical probability mass function with outcome(s) y in \(1:N\) given \(N\)-vector of outcome probabilities theta dropping constant additive terms. The parameter theta must have non-negative entries that sum to one, but it need not be a variable declared as a simplex.

+Available since 2.25 + +

+

real categorical_logit_lpmf(ints y | vector beta)
The log categorical probability mass function with outcome(s) y in \(1:N\) given log-odds of outcomes beta.

+Available since 2.12 + +

+

real categorical_logit_lupmf(ints y | vector beta)
The log categorical probability mass function with outcome(s) y in \(1:N\) given log-odds of outcomes beta dropping constant additive terms.

+Available since 2.25 + +

+

int categorical_rng(vector theta)
Generate a categorical variate with \(N\)-simplex distribution parameter theta; may only be used in transformed data and generated quantities blocks

+Available since 2.0 + +

+

int categorical_logit_rng(vector beta)
Generate a categorical variate with outcome in range \(1:N\) from log-odds vector beta; may only be used in transformed data and generated quantities blocks

+Available since 2.16 +
+
+
+

Categorical logit generalized linear model (softmax regression)

+

Stan also supplies a single function for a generalized linear model with categorical distribution and logit link function, i.e. a function for a softmax regression. This provides a more efficient implementation of softmax regression than a manually written regression in terms of a categorical distribution and matrix multiplication.

+

Note that the implementation does not put any restrictions on the coefficient matrix \(\beta\). It is up to the user to use a reference category, a suitable prior or some other means of identifiability. See Multi-logit in the Stan User’s Guide.

+
+

Probability mass functions

+

If \(N,M,K \in \mathbb{N}\), \(N,M,K > 0\), and if \(x\in \mathbb{R}^{M\times K}, \alpha \in \mathbb{R}^N, \beta\in \mathbb{R}^{K\cdot N}\), then for \(y \in \{1,\ldots,N\}^M\), \[\begin{equation*} +\begin{split} +\text{CategoricalLogitGLM}(y~|~x,\alpha,\beta) +& = \prod_{1\leq i \leq M}\text{CategoricalLogit}(y_i~|~\alpha+x_i\cdot\beta) \\[8pt] +& = \prod_{1\leq i \leq M}\text{Categorical}(y_i~|~softmax(\alpha+x_i\cdot\beta)). +\end{split} +\end{equation*}\] See the definition of softmax for the definition of the softmax function.

+
+
+

Distribution statement

+

y ~ categorical_logit_glm(x, alpha, beta)

+

Increment target log probability density with categorical_logit_glm_lupmf(y | x, alpha, beta).

+Available since 2.23 + +

+
+
+

Stan functions

+ +

+

real categorical_logit_glm_lpmf(int y | row_vector x, vector alpha, matrix beta)
The log categorical probability mass function with outcome y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta.

+Available since 2.23 + +

+

real categorical_logit_glm_lupmf(int y | row_vector x, vector alpha, matrix beta)
The log categorical probability mass function with outcome y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real categorical_logit_glm_lpmf(int y | matrix x, vector alpha, matrix beta)
The log categorical probability mass function with outcomes y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta.

+Available since 2.23 + +

+

real categorical_logit_glm_lupmf(int y | matrix x, vector alpha, matrix beta)
The log categorical probability mass function with outcomes y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real categorical_logit_glm_lpmf(array[] int y | row_vector x, vector alpha, matrix beta)
The log categorical probability mass function with outcomes y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta.

+Available since 2.23 + +

+

real categorical_logit_glm_lupmf(array[] int y | row_vector x, vector alpha, matrix beta)
The log categorical probability mass function with outcomes y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real categorical_logit_glm_lpmf(array[] int y | matrix x, vector alpha, matrix beta)
The log categorical probability mass function with outcomes y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta.

+Available since 2.23 + +

+

real categorical_logit_glm_lupmf(array[] int y | matrix x, vector alpha, matrix beta)
The log categorical probability mass function with outcomes y in \(1:N\) given \(N\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.

+Available since 2.25 +
+
+
+

Discrete range distribution

+
+

Probability mass functions

+

If \(l, u \in \mathbb{Z}\) are lower and upper bounds (\(l \le u\)), then for any integer \(y \in \{l,\ldots,u\}\), \[\begin{equation*} \text{DiscreteRange}(y ~|~ l, u) = +\frac{1}{u - l + 1}. \end{equation*}\]

+
+
+

Distribution statement

+

y ~ discrete_range(l, u)

+

Increment the target log probability density with discrete_range_lupmf(y | l, u) dropping constant additive terms.

+Available since 2.26 + +

+
+
+

Stan functions

+

All of the discrete range distributions are vectorized so that the outcome y and the bounds l, u can be a single integer (type int) or an array of integers (type array[] int).

+ +

+

real discrete_range_lpmf(ints y | ints l, ints u)
The log probability mass function with outcome(s) y in \(l:u\).

+Available since 2.26 + +

+

real discrete_range_lupmf(ints y | ints l, ints u)
The log probability mass function with outcome(s) y in \(l:u\) dropping constant additive terms.

+Available since 2.26 + +

+

real discrete_range_cdf(ints y | ints l, ints u)
The discrete range cumulative distribution function for the given y, lower and upper bounds.

+Available since 2.26 + +

+

real discrete_range_lcdf(ints y | ints l, ints u)
The log of the discrete range cumulative distribution function for the given y, lower and upper bounds.

+Available since 2.26 + +

+

real discrete_range_lccdf(ints y | ints l, ints u)
The log of the discrete range complementary cumulative distribution function for the given y, lower and upper bounds.

+Available since 2.26 + +

+

ints discrete_range_rng(ints l, ints u)
Generate a discrete variate between the given lower and upper bounds; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.26 +
+
+
+

Ordered logistic distribution

+
+

Probability mass function

+

If \(K \in \mathbb{N}\) with \(K > 2\), \(c \in \mathbb{R}^{K-1}\) such that \(c_k < c_{k+1}\) for \(k \in \{1,\ldots,K-2\}\), and \(\eta \in +\mathbb{R}\), then for \(k \in \{1,\ldots,K\}\), \[\begin{equation*} +\text{OrderedLogistic}(k~|~\eta,c) = \left\{ \begin{array}{ll} 1 - +\text{logit}^{-1}(\eta - c_1) & \text{if } k = 1, \\[4pt] +\text{logit}^{-1}(\eta - c_{k-1}) - \text{logit}^{-1}(\eta - c_{k}) & +\text{if } 1 < k < K, \text{and} \\[4pt] \text{logit}^{-1}(\eta - +c_{K-1}) - 0 & \text{if } k = K. \end{array} \right. \end{equation*}\] The \(k=K\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \(k=1\) and \(k=K\) edge cases can be subsumed into the general definition by setting \(c_0 = -\infty\) and \(c_K = +\infty\) with \(\text{logit}^{-1}(-\infty) = 0\) and \(\text{logit}^{-1}(\infty) = 1\).

+
+
+

Distribution statement

+

k ~ ordered_logistic(eta, c)

+

Increment target log probability density with ordered_logistic_lupmf(k | eta, c).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real ordered_logistic_lpmf(ints k | vector eta, vectors c)
The log ordered logistic probability mass of k given linear predictors eta, and cutpoints c.

+Available since 2.18 + +

+

real ordered_logistic_lupmf(ints k | vector eta, vectors c)
The log ordered logistic probability mass of k given linear predictors eta, and cutpoints c dropping constant additive terms.

+Available since 2.25 + +

+

int ordered_logistic_rng(real eta, vector c)
Generate an ordered logistic variate with linear predictor eta and cutpoints c; may only be used in transformed data and generated quantities blocks

+Available since 2.0 +
+
+
+

Ordered logistic generalized linear model (ordinal regression)

+
+

Probability mass function

+

If \(N,M,K \in \mathbb{N}\) with \(N, M > 0\), \(K > 2\), \(c \in \mathbb{R}^{K-1}\) such that \(c_k < c_{k+1}\) for \(k \in \{1,\ldots,K-2\}\), and \(x\in \mathbb{R}^{N\times M}, \beta\in \mathbb{R}^M\), then for \(y \in \{1,\ldots,K\}^N\), \[\begin{equation*} +\begin{split} +\\ +& \text{OrderedLogisticGLM}(y~|~x,\beta,c) \\[8pt] +& = \prod_{1\leq i \leq N}\text{OrderedLogistic}(y_i~|~x_i\cdot \beta,c) \\ +& = \prod_{1\leq i \leq N} \left\{ \begin{array}{ll} +1 - \text{logit}^{-1}(x_i\cdot \beta - c_1) & \text{if } y = 1, \\[4pt] +\text{logit}^{-1}(x_i\cdot \beta - c_{y-1}) - \text{logit}^{-1}(x_i\cdot \beta - c_{y}) & \text{if } 1 < y < K, \text{and} \\[4pt] +\text{logit}^{-1}(x_i\cdot \beta - c_{K-1}) - 0 & \text{if } y = K. +\end{array} \right. +\end{split} +\end{equation*}\] The \(k=K\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \(y=1\) and \(y=K\) edge cases can be subsumed into the general definition by setting \(c_0 = -\infty\) and \(c_K = +\infty\) with \(\text{logit}^{-1}(-\infty) = 0\) and \(\text{logit}^{-1}(\infty) = 1\).

+
+
+

Distribution statement

+

y ~ ordered_logistic_glm(x, beta, c)

+

Increment target log probability density with ordered_logistic_lupmf(y | x, beta, c).

+Available since 2.23 + +

+
+
+

Stan functions

+ +

+

real ordered_logistic_glm_lpmf(int y | row_vector x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.

+Available since 2.23 + +

+

real ordered_logistic_glm_lupmf(int y | row_vector x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.

+Available since 2.25 + +

+

real ordered_logistic_glm_lpmf(int y | matrix x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.

+Available since 2.23 + +

+

real ordered_logistic_glm_lupmf(int y | matrix x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.

+Available since 2.25 + +

+

real ordered_logistic_glm_lpmf(array[] int y | row_vector x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.

+Available since 2.23 + +

+

real ordered_logistic_glm_lupmf(array[] int y | row_vector x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.

+Available since 2.25 + +

+

real ordered_logistic_glm_lpmf(array[] int y | matrix x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.

+Available since 2.23 + +

+

real ordered_logistic_glm_lupmf(array[] int y | matrix x, vector beta, vector c)
The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.

+Available since 2.25 +
+
+
+

Ordered probit distribution

+
+

Probability mass function

+

If \(K \in \mathbb{N}\) with \(K > 2\), \(c \in \mathbb{R}^{K-1}\) such that \(c_k < c_{k+1}\) for \(k \in \{1,\ldots,K-2\}\), and \(\eta \in +\mathbb{R}\), then for \(k \in \{1,\ldots,K\}\), \[\begin{equation*} +\text{OrderedProbit}(k~|~\eta,c) = \left\{ \begin{array}{ll} 1 - +\Phi(\eta - c_1) & \text{if } k = 1, \\[4pt] \Phi(\eta - c_{k-1}) - +\Phi(\eta - c_{k}) & \text{if } 1 < k < K, \text{and} \\[4pt] +\Phi(\eta - c_{K-1}) - 0 & \text{if } k = K. \end{array} \right. \end{equation*}\] The \(k=K\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \(k=1\) and \(k=K\) edge cases can be subsumed into the general definition by setting \(c_0 = +-\infty\) and \(c_K = +\infty\) with \(\Phi(-\infty) = 0\) and \(\Phi(\infty) = 1\).

+
+
+

Distribution statement

+

k ~ ordered_probit(eta, c)

+

Increment target log probability density with ordered_probit_lupmf(k | eta, c).

+Available since 2.19 + +

+
+
+

Stan functions

+ +

+

real ordered_probit_lpmf(ints k | vector eta, vectors c)
The log ordered probit probability mass of k given linear predictors eta, and cutpoints c.

+Available since 2.18 + +

+

real ordered_probit_lupmf(ints k | vector eta, vectors c)
The log ordered probit probability mass of k given linear predictors eta, and cutpoints c dropping constant additive terms.

+Available since 2.25 + +

+

real ordered_probit_lpmf(ints k | real eta, vectors c)
The log ordered probit probability mass of k given linear predictor eta, and cutpoints c.

+Available since 2.19 + +

+

real ordered_probit_lupmf(ints k | real eta, vectors c)
The log ordered probit probability mass of k given linear predictor eta, and cutpoints c dropping constant additive terms.

+Available since 2.19 + +

+

int ordered_probit_rng(real eta, vector c)
Generate an ordered probit variate with linear predictor eta and cutpoints c; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/circular_distributions.html b/docs/2_39/functions-reference/circular_distributions.html new file mode 100644 index 000000000..a2e8bc372 --- /dev/null +++ b/docs/2_39/functions-reference/circular_distributions.html @@ -0,0 +1,1248 @@ + + + + + + + + + +Circular Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Circular Distributions

+

Circular distributions are defined for finite values y in any interval of length \(2\pi\).

+
+

Von Mises distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\kappa \in \mathbb{R}^+\), then for \(y \in +\mathbb{R}\), \[\begin{equation*} \text{VonMises}(y|\mu,\kappa) = +\frac{\exp(\kappa\cos(y-\mu))}{2\pi I_0(\kappa)} \!. \end{equation*}\] In order for this density to properly normalize, \(y\) must be restricted to some interval \((c, c + 2\pi)\) of length \(2 \pi\), because \[\begin{equation*} \int_{c}^{c + +2\pi} \text{VonMises}(y|\mu,\kappa) dy = 1. \end{equation*}\] Similarly, if \(\mu\) is a parameter, it will typically be restricted to the same range as \(y\).

+

If \(\kappa > 0\), a von Mises distribution with its \(2 \pi\) interval of support centered around its location \(\mu\) will have a single mode at \(\mu\); for example, restricting \(y\) to \((-\pi,\pi)\) and taking \(\mu = 0\) leads to a single local optimum at the mode \(\mu\). If the location \(\mu\) is not in the center of the support, the density is circularly translated and there will be a second local maximum at the boundary furthest from the mode. Ideally, the parameterization and support will be set up so that the bulk of the probability mass is in a continuous interval around the mean \(\mu\).

+

For \(\kappa = 0\), the Von Mises distribution corresponds to the circular uniform distribution with density \(1 / (2 \pi)\) (independently of the values of \(y\) or \(\mu\)).

+
+
+

Distribution statement

+

y ~ von_mises(mu, kappa)

+

Increment target log probability density with von_mises_lupdf(y | mu, kappa).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real von_mises_lpdf(reals y | reals mu, reals kappa)
The log of the von mises density of y given location mu and scale kappa.

+Available since 2.18 + +

+

real von_mises_lupdf(reals y | reals mu, reals kappa)
The log of the von mises density of y given location mu and scale kappa dropping constant additive terms.

+Available since 2.25 + +

+

real von_mises_cdf(reals y | reals mu, reals kappa)
The von mises cumulative distribution function of y given location mu and scale kappa.

+Available since 2.29 + +

+

real von_mises_lcdf(reals y | reals mu, reals kappa)
The log of the von mises cumulative distribution function of y given location mu and scale kappa.

+Available since 2.29 + +

+

real von_mises_lccdf(reals y | reals mu, reals kappa)
The log of the von mises complementary cumulative distribution function of y given location mu and scale kappa.

+Available since 2.29 + +

+

R von_mises_rng(reals mu, reals kappa)
Generate a Von Mises variate with location mu and scale kappa (i.e. returns values in the interval \([(\mu \mod 2\pi)-\pi,(\mu \mod +2\pi)+\pi]\)); may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+

Numerical stability

+

Evaluating the Von Mises distribution for \(\kappa > 100\) is numerically unstable in the current implementation. Nathanael I. Lichti suggested the following workaround on the Stan users group, based on the fact that as \(\kappa \rightarrow \infty\), \[\begin{equation*} +\text{VonMises}(y|\mu,\kappa) \rightarrow \text{Normal}(\mu, \sqrt{1 / +\kappa}). \end{equation*}\] The workaround is to replace y ~ von_mises(mu,kappa) with

+
if (kappa < 100) {
+  y ~ von_mises(mu, kappa);
+} else {
+  y ~ normal(mu, sqrt(1 / kappa));
+}
+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/complex-valued_basic_functions.html b/docs/2_39/functions-reference/complex-valued_basic_functions.html new file mode 100644 index 000000000..70ee5f82b --- /dev/null +++ b/docs/2_39/functions-reference/complex-valued_basic_functions.html @@ -0,0 +1,1544 @@ + + + + + + + + + +Complex-Valued Basic Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Complex-Valued Basic Functions

+

This chapter describes built-in functions that operate on complex numbers, either as an argument type or a return type. This includes the arithmetic operators generalized to complex numbers.

+
+

Complex assignment and promotion

+

Just as integers may be assigned to real variables, real variables may be assigned to complex numbers, with the result being a zero imaginary component.

+
int n = 5;       // n = 5
+real x = a;      // x = 5.0
+complex z1 = n;  // z = 5.0 + 0.0i
+complex z2 = x;  // z = 5.0 + 0.0i
+
+

Complex function arguments

+

Function arguments of type int or real may be promoted to type complex. The complex version of functions in this chapter are only used if one of the arguments is complex. For example, if z is complex, then pow(z, 2) will call the complex version of the power function and the integer 2 will be promoted to a complex number with a real component of 2 and an imaginary component of 0. The same goes for binary operators like addition and subtraction, where z + 2 will be legal and produce a complex result. Functions such as arg and conj that are only available for complex numbers can accept integer or real arguments, promoting them to complex before applying the function.

+
+
+
+

Complex constructors and accessors

+
+

Complex constructors

+

Variables and constants of type complex are constructed from zero, one, or two real numbers.

+
complex z1 = to_complex();        // z1 = 0.0 + 0.0i
+real re = -2.9;
+complex z2 = to_complex(re);      // z2 = -2.9 + 0.0i
+real im = 1.3;
+complex z3 = to_complex(re, im);  // z3 = -2.9 + 1.3i
+ +

+

complex to_complex()
Return complex number with real part 0.0 and imaginary part 0.0.

+Available since 2.28 + +

+

complex to_complex(real re)
Return complex number with real part re and imaginary part 0.0.

+Available since 2.28 + +

+

complex to_complex(real re, real im)
Return complex number with real part re and imaginary part im.

+Available since 2.28 + +

+

Z to_complex(T1 re, T2 im)
Vectorized implementation of the to_complex function.

+

T1 and T2 can either be real containers of the same size, or a real container and a real, in which case the real value is used for the corresponding component in all elements of the output.

+Available since 2.30 +
+
+

Complex accessors

+

Given a complex number, its real and imaginary parts can be extracted with the following functions.

+ +

+

real get_real(complex z)
Return the real part of the complex number z.

+Available since 2.28 + +

+

real get_imag(complex z)
Return the imaginary part of the complex number z.

+Available since 2.28 +
+
+
+

Complex arithmetic operators

+

The arithmetic operators have the same precedence for complex and real arguments. The complex form of an operator will be selected if at least one of its argument is of type complex. If there are two arguments and only one is of type complex, then the other will be promoted to type complex before performing the operation.

+
+

Unary operators

+ +

+

complex operator+(complex z)
Return the complex argument z, \[\begin{equation*} +z = z. \end{equation*}\]

+Available since 2.28 + +

+

complex operator-(complex z)
Return the negation of the complex argument z, which for \(z = x + yi\) is \[\begin{equation*} -z = -x - yi. \end{equation*}\]

+Available since 2.28 + +

+

T operator-(T x)
Vectorized version of operator-. If T x is a (possibly nested) array of complex numbers, -x is the same shape array where each individual value is negated.

+Available since 2.31 +
+
+

Binary operators

+ +

+

complex operator+(complex x, complex y)
Return the sum of x and y, \[\begin{equation*} (x + y) = \text{operator+}(x, y) = x + y. \end{equation*}\]

+Available since 2.28 + +

+

complex operator-(complex x, complex y)
Return the difference between x and y, \[\begin{equation*} (x - y) = +\text{operator-}(x, y) = x - y. \end{equation*}\]

+Available since 2.28 + +

+

complex operator*(complex x, complex y)
Return the product of x and y, \[\begin{equation*} (x \, * \, y) = \text{operator*}(x, y) = x +\times y. \end{equation*}\]

+Available since 2.28 + +

+

complex operator/(complex x, complex y)
Return the quotient of x and y, \[\begin{equation*} (x / y) = \text{operator/}(x,y) = +\frac{x}{y} \end{equation*}\]

+Available since 2.28 + +

+

complex operator^(complex x, complex y)
Return x raised to the power of y, \[\begin{equation*} +(x^\mathrm{\wedge}y)= \text{operator}^\mathrm{\wedge}(x,y) += \textrm{exp}(y \, \log(x)). +\end{equation*}\]

+Available since 2.28 +
+
+
+

Complex comparison operators

+

Complex numbers are equal if and only if both their real and imaginary components are equal. That is, the conditional

+
z1 == z2
+

is equivalent to

+
get_real(z1) == get_real(z2) && get_imag(z1) == get_imag(z2)
+

As with other complex functions, if one of the arguments is of type real or int, it will be promoted to type complex before comparison. For example, if z is of type complex, then z == 0 will be true if z has real component equal to 0.0 and complex component equal to 0.0.

+

Warning: As with real values, it is usually a mistake to compare complex numbers for equality because their parts are implemented using floating-point arithmetic, which suffers from precision errors, rendering algebraically equivalent expressions not equal after evaluation.

+ +

+

int operator==(complex x, complex y)
Return 1 if x is equal to y and 0 otherwise, \[\begin{equation*} +(x \,\text{==}\, y) +\ = \ \text{operator==}(x,y) +\ = \ \begin{cases} 1 & \text{if $x = y$}, \ \text{and} \\ 0 & \text{otherwise.} +\end{cases} +\end{equation*}\]

+Available since 2.28 + +

+

int operator!=(complex x, complex y)
Return 1 if x is not equal to y and 0 otherwise, \[\begin{equation*} +(x \,\text{!=}\, y) +\ = \ \text{operator!=}(x,y) +\ = \ \begin{cases} 1 & \text{if $x \neq y$}, \ \text{and} \\ 0 & +\text{otherwise.} \end{cases} +\end{equation*}\]

+Available since 2.28 +
+
+

Complex (compound) assignment operators

+

The assignment operator only serves as a component in the assignment statement and is thus not technically a function in the Stan language. With that caveat, it is documented here for completeness.

+

Assignment of complex numbers works elementwise. If an expression of type int or real is assigned to a complex number, it will be promoted before assignment as if calling to_complex(), so that the imaginary component is 0.0.

+ +

+

void operator=(complex x, complex y)
y = x; assigns a (copy of) the value of y to x.

+Available since 2.28 + +

+

void operator+=(complex x, complex y)
x += y; is equivalent to x = x + y;.

+Available since 2.28 + +

+

void operator-=(complex x, complex y)
x -= y; is equivalent to x = x - y;.

+Available since 2.28 + +

+

void operator*=(complex x, complex y)
x *= y; is equivalent to x = x * y;.

+Available since 2.28 + +

+

void operator/=(complex x, complex y)
x /= y; is equivalent to x = x / y;.

+Available since 2.28 +
+
+

Complex special functions

+

The following functions are specific to complex numbers other than absolute value, which has a specific meaning for complex numbers.

+ +

+

real abs(complex z)
Return the absolute value of z, also known as the modulus or magnitude, which for \(z = x + yi\) is \[\begin{equation*} +\textrm{abs}(z) = \sqrt{x^2 + y^2}. +\end{equation*}\]

+

This function works elementwise over containers, returning the same shape and kind of the input container but holding reals. For example, a complex_vector[n] input will return a vector[n] output, with each element transformed by the above equation.

+Available since 2.28, vectorized in 2.30 + +

+

real arg(complex z)
Return the phase angle (in radians) of z, which for \(z = x + yi\) is \[\begin{equation*} +\textrm{arg}(z) = \textrm{atan2}(y, x) = \textrm{atan}(y / x). +\end{equation*}\]

+Available since 2.28 + +

+

real norm(complex z)
Return the Euclidean norm of z, which is its absolute value squared, and which for \(z = x + yi\) is \[\begin{equation*} +\textrm{norm}(z) = \textrm{abs}^2(z) = x^2 + y^2. +\end{equation*}\]

+Available since 2.28 + +

+

complex conj(complex z)
Return the complex conjugate of z, which negates the imaginary component, so that if \(z = x + yi\), \[\begin{equation*} +\textrm{conj}(z) = x - yi. +\end{equation*}\]

+Available since 2.28 + +

+

Z conj(Z z)
Vectorized version of conj. This will apply the conj function to each element of a complex array, vector, or matrix.

+Available since 2.31 + +

+

complex proj(complex z)
Return the projection of z onto the Riemann sphere, which for \(z = x ++ yi\) is \[\begin{equation*} +\textrm{proj}(z) += \begin{cases} + z & \textrm{if} \ z \ \textrm{is finite, and} \\ + 0 + \textrm{sign}(y)i & \textrm{otherwise,} +\end{cases} +\end{equation*}\] where \(\textrm{sign}(y)\) is -1 if \(y\) is negative and 1 otherwise.

+Available since 2.28 + +

+

complex polar(real r, real theta)
Return the complex number with magnitude (absolute value) r and phase angle theta.

+Available since 2.28 +
+
+

Complex exponential and power functions

+

The exponential, log, and power functions may be supplied with complex arguments with specialized meanings that generalize their real counterparts. These versions are only called when the argument is complex.

+ +

+

complex exp(complex z)
Return the complex natural exponential of z, which for \(z = x + yi\) is \[\begin{equation*} +\exp z = \exp(x) \textrm{cis}(y) = \exp(x) (\cos(y) + i \sin(y)). +\end{equation*}\]

+Available since 2.28 + +

+

complex log(complex z)
Return the complex natural logarithm of z, which for \(z = \textrm{polar}(r, +\theta)\) is \[\begin{equation*} +\log z = \log r + \theta i. +\end{equation*}\]

+Available since 2.28 + +

+

complex log10(complex z)
Return the complex common logarithm of z, \[\begin{equation*} +\log_{10} z = \frac{\log z}{\log 10}. +\end{equation*}\]

+Available since 2.28 + +

+

complex pow(complex x, complex y)
Return x raised to the power of y, \[\begin{equation*} +\text{pow}(x,y) = \textrm{exp}(y \, \log(x)). +\end{equation*}\]

+Available since 2.28 + +

+

Z pow(T1 x, T2 y)
Vectorized implementation of the pow function

+Available since 2.30 + +

+

complex sqrt(complex x)
Return the complex square root of x with branch cut along the negative real axis. For finite inputs, the result will be in the right half-plane.

+Available since 2.28 +
+
+

Complex trigonometric functions

+

The standard trigonometric functions are supported for complex numbers.

+ +

+

complex cos(complex z)
Return the complex cosine of z, which is \[\begin{equation*} +\cos(z) += \textrm{cosh}(z \, i) += \frac{\displaystyle \exp(z \, i) + \exp(-z \, i)} + {\displaystyle 2}. +\end{equation*}\]

+Available since 2.28 + +

+

complex sin(complex z)
Return the complex sine of z, \[\begin{equation*} +\sin(z) += -\textrm{sinh}(z \, i) \, i += \frac{\displaystyle \exp(z \, i) - \exp(-z \, i)} + {\displaystyle 2 \, i}. +\end{equation*}\]

+Available since 2.28 + +

+

complex tan(complex z)
Return the complex tangent of z, \[\begin{equation*} +\tan(z) += -\textrm{tanh}(z \, i) \, i += \frac{(\exp(-z \, i) - \exp(z \, i)) \, i} + {\exp(-z \, i) + \exp(z \, i)}. +\end{equation*}\]

+Available since 2.28 + +

+

complex acos(complex z)
Return the complex arc (inverse) cosine of z, \[\begin{equation*} +\textrm{acos}(z) += \frac{1}{2} \pi + \log (z \, i + \sqrt{1 - z^2}) \, i. +\end{equation*}\]

+Available since 2.28 + +

+

complex asin(complex z)
Return the complex arc (inverse) sine of z, \[\begin{equation*} +\text{asin}(z) += -\log(z \, i + \sqrt{1 - z^2}) \, i. +\end{equation*}\]

+Available since 2.28 + +

+

complex atan(complex z)
Return the complex arc (inverse) tangent of z, \[\begin{equation*} +\text{atan}(z) += - \frac{1}{2} (\log(1 - z \, i) - \log(1 + z \, i)) \, i. +\end{equation*}\]

+Available since 2.28 +
+
+

Complex hyperbolic trigonometric functions

+

The standard hyperbolic trigonometric functions are supported for complex numbers.

+ +

+

complex cosh(complex z)
Return the complex hyperbolic cosine of z, \[\begin{equation*} +\textrm{cosh}(z) += \frac{\exp(z) + \exp(-z)} + {2}. +\end{equation*}\]

+Available since 2.28 + +

+

complex sinh(complex z)
Return the complex hyperbolic sine of z, \[\begin{equation*} +\textrm{sinh}(z) += \frac{\displaystyle \exp(z) - \exp(-z)} + {\displaystyle 2}. +\end{equation*}\]

+Available since 2.28 + +

+

complex tanh(complex z)
Return the complex hyperbolic tangent of z, \[\begin{equation*} +\textrm{tanh}(z) +\ = \ \frac{\textrm{sinh}(z)} + {\textrm{cosh}(z)} +\ = \ \frac{\displaystyle \exp(z) - \exp(-z)} + {\displaystyle \exp(z) + \exp(-z)}. +\end{equation*}\]

+Available since 2.28 + +

+

complex acosh(complex z)
Return the complex hyperbolic arc (inverse) cosine of z, \[\begin{equation*} +\textrm{acosh}(z) += \log(z + \sqrt{(z + 1)(z - 1)}). +\end{equation*}\]

+Available since 2.28 + +

+

complex asinh(complex z)
Return the complex hyperbolic arc (inverse) sine of z, \[\begin{equation*} +\textrm{asinh}(z) += \log(z + \sqrt{1 + z^2}). +\end{equation*}\]

+Available since 2.28 + +

+

complex atanh(complex z)
Return the complex hyperbolic arc (inverse) tangent of z, \[\begin{equation*} +\textrm{atanh}(z) += \frac{\log(1 + z) - \log(1 - z)} + {2}. +\end{equation*}\]

+Available since 2.28 + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/complex_matrix_operations.html b/docs/2_39/functions-reference/complex_matrix_operations.html new file mode 100644 index 000000000..35eaadeb8 --- /dev/null +++ b/docs/2_39/functions-reference/complex_matrix_operations.html @@ -0,0 +1,2106 @@ + + + + + + + + + +Complex Matrix Operations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Complex Matrix Operations

+
+

Complex promotion

+

This chapter provides the details of functions that operate over complex matrices, vectors, and row vectors. These mirror the operations over real complex_matrix types and are defined in the usual way for complex numbers.

+
+

Promotion of complex arguments

+

If an expression e can be assigned to a variable of type T, then it can be used as an argument to a function that is specified to take arguments of type T. For instance, sqrt(real) is specified to take a real argument, but an integer expression such as 2 + 2 of type int can be passed to sqrt, so that sqrt(2 + 2) is well defined. This works by promoting the integer expression 2 + 2 to be of real type.

+

The rules for promotion in Stan are simple:

+
    +
  • int may be promoted to real,
  • +
  • real may be promoted to complex,
  • +
  • vector can be promoted to complex_vector,
  • +
  • row_vector can be promoted to complex_row_vector,
  • +
  • matrix can be promoted to complex_matrix,
  • +
  • if T can be promoted to U and U can be promoted to V, then T can be promoted to V (transitive), and
  • +
  • if T can be promoted to U, then T[] can be promoted to U[] (covariant).
  • +
+
+
+

Signature selection

+

When a function is called, the definition requiring the fewest number of promotions is used. For example, when calling vector + vector, the real-valued signature is used. When calling any of complex_vector + vector, vector + complex_vector, or complex_vector + complex_vector, the complex signature is used. If more than one signature matches with a the minimal number of promotions, the call is ambiguous, and an error will be raised by the compiler. Promotion ambiguity leading to ill-defined calls should never happen with Stan built-in functions.

+
+
+

Signatures for complex functions

+

Complex function signatures will only list the fully complex type. For example, with complex vector addition, we will list a single signature, complex operator+(complex_vector, complex_vector). Through promotion, operator+ may be called with one complex vector and one real vector as well, but the documentation elides the implied signatures operator+(complex_vector, vector) and operator+(vector, complex_vector).

+
+
+

Generic functions work for complex containers

+

Generic functions work for arrays containing complex, complex matrix, complex vector, or complex row vector types. This includes the functions append_array, dims, head, num_elements, rep_array, reverse, segment, size, and tail.

+
+
+
+

Integer-valued complex matrix size functions

+ +

+

int num_elements(complex_vector x)
The total number of elements in the vector x (same as function rows)

+Available since 2.30 + +

+

int num_elements(complex_row_vector x)
The total number of elements in the vector x (same as function cols)

+Available since 2.30 + +

+

int num_elements(complex_matrix x)
The total number of elements in the matrix x. For example, if x is a \(5 \times 3\) matrix, then num_elements(x) is 15

+Available since 2.30 + +

+

int rows(complex_vector x)
The number of rows in the vector x

+Available since 2.30 + +

+

int rows(complex_row_vector x)
The number of rows in the row vector x, namely 1

+Available since 2.30 + +

+

int rows(complex_matrix x)
The number of rows in the matrix x

+Available since 2.30 + +

+

int cols(complex_vector x)
The number of columns in the vector x, namely 1

+Available since 2.30 + +

+

int cols(complex_row_vector x)
The number of columns in the row vector x

+Available since 2.30 + +

+

int cols(complex_matrix x)
The number of columns in the matrix x

+Available since 2.30 + +

+

int size(complex_vector x)
The size of x, i.e., the number of elements

+Available since 2.30 + +

+

int size(complex_row_vector x)
The size of x, i.e., the number of elements

+Available since 2.30 + +

+

int size(matrix x)
The size of the matrix x. For example, if x is a \(5 \times 3\) matrix, then size(x) is 15.

+Available since 2.30 +
+
+

Complex matrix arithmetic operators

+

Stan supports all basic complex arithmetic operators using infix, prefix and postfix operations. This section lists the operations supported by Stan along with their argument and result types.

+
+

Negation prefix operators

+ +

+

complex_vector operator-(complex_vector x)
The negation of the vector x.

+Available since 2.30 + +

+

complex_row_vector operator-(complex_row_vector x)
The negation of the row vector x.

+Available since 2.30 + +

+

complex_matrix operator-(complex_matrix x)
The negation of the matrix x.

+Available since 2.30 + +

+

T operator-(T x)
Vectorized version of operator-. If T x is a (possibly nested) array of matrix types, -x is the same shape array where each individual value is negated.

+Available since 2.31 +
+
+

Infix complex_matrix operators

+ +

+

complex_vector operator+(complex_vector x, complex_vector y)
The sum of the vectors x and y.

+Available since 2.30 + +

+

complex_row_vector operator+(complex_row_vector x, complex_row_vector y)
The sum of the row vectors x and y.

+Available since 2.30 + +

+

complex_matrix operator+(complex_matrix x, complex_matrix y)
The sum of the matrices x and y

+Available since 2.30 + +

+

complex_vector operator-(complex_vector x, complex_vector y)
The difference between the vectors x and y.

+Available since 2.30 + +

+

complex_row_vector operator-(complex_row_vector x, complex_row_vector y)
The difference between the row vectors x and y

+Available since 2.30 + +

+

complex_matrix operator-(complex_matrix x, complex_matrix y)
The difference between the matrices x and y

+Available since 2.30 + +

+

complex_vector operator*(complex x, complex_vector y)
The product of the scalar x and vector y

+Available since 2.30 + +

+

complex_row_vector operator*(complex x, complex_row_vector y)
The product of the scalar x and the row vector y

+Available since 2.30 + +

+

complex_matrix operator*(complex x, complex_matrix y)
The product of the scalar x and the matrix y

+Available since 2.30 + +

+

complex_vector operator*(complex_vector x, complex y)
The product of the scalar y and vector x

+Available since 2.30 + +

+

complex_matrix operator*(complex_vector x, complex_row_vector y)
The product of the vector x and row vector y

+Available since 2.30 + +

+

complex_row_vector operator*(complex_row_vector x, complex y)
The product of the scalar y and row vector x

+Available since 2.30 + +

+

complex operator*(complex_row_vector x, complex_vector y)
The product of the row vector x and vector y

+Available since 2.30 + +

+

complex_row_vector operator*(complex_row_vector x, complex_matrix y)
The product of the row vector x and matrix y

+Available since 2.30 + +

+

complex_matrix operator*(complex_matrix x, complex y)
The product of the scalar y and matrix x

+Available since 2.30 + +

+

complex_vector operator*(complex_matrix x, complex_vector y)
The product of the matrix x and vector y

+Available since 2.30 + +

+

complex_matrix operator*(complex_matrix x, complex_matrix y)
The product of the matrices x and y

+Available since 2.30 +
+
+

Broadcast infix operators

+ +

+

complex_vector operator+(complex_vector x, complex y)
The result of adding y to every entry in the vector x

+Available since 2.30 + +

+

complex_vector operator+(complex x, complex_vector y)
The result of adding x to every entry in the vector y

+Available since 2.30 + +

+

complex_row_vector operator+(complex_row_vector x, complex y)
The result of adding y to every entry in the row vector x

+Available since 2.30 + +

+

complex_row_vector operator+(complex x, complex_row_vector y)
The result of adding x to every entry in the row vector y

+Available since 2.30 + +

+

complex_matrix operator+(complex_matrix x, complex y)
The result of adding y to every entry in the matrix x

+Available since 2.30 + +

+

complex_matrix operator+(complex x, complex_matrix y)
The result of adding x to every entry in the matrix y

+Available since 2.30 + +

+

complex_vector operator-(complex_vector x, complex y)
The result of subtracting y from every entry in the vector x

+Available since 2.30 + +

+

complex_vector operator-(complex x, complex_vector y)
The result of adding x to every entry in the negation of the vector y

+Available since 2.30 + +

+

complex_row_vector operator-(complex_row_vector x, complex y)
The result of subtracting y from every entry in the row vector x

+Available since 2.30 + +

+

complex_row_vector operator-(complex x, complex_row_vector y)
The result of adding x to every entry in the negation of the row vector y

+Available since 2.30 + +

+

complex_matrix operator-(complex_matrix x, complex y)
The result of subtracting y from every entry in the matrix x

+Available since 2.30 + +

+

complex_matrix operator-(complex x, complex_matrix y)
The result of adding x to every entry in negation of the matrix y

+Available since 2.30 + +

+

complex_vector operator/(complex_vector x, complex y)
The result of dividing each entry in the vector x by y

+Available since 2.30 + +

+

complex_row_vector operator/(complex_row_vector x, complex y)
The result of dividing each entry in the row vector x by y

+Available since 2.30 + +

+

complex_matrix operator/(complex_matrix x, complex y)
The result of dividing each entry in the matrix x by y

+Available since 2.30 +
+
+
+

Complex Transposition Operator

+

Complex complex_matrix transposition is represented using a postfix operator.

+ +

+

complex_matrix operator'(complex_matrix x)
The transpose of the matrix x, written as x'

+Available since 2.30 + +

+

complex_row_vector operator'(complex_vector x)
The transpose of the vector x, written as x'

+Available since 2.30 + +

+

complex_vector operator'(complex_row_vector x)
The transpose of the row vector x, written as x'

+Available since 2.30 +
+
+

Complex elementwise functions

+

As in the real case, elementwise complex functions apply a function to each element of a vector or matrix, returning a result of the same shape as the argument.

+ +

+

complex_vector operator.*(complex_vector x, complex_vector y)
The elementwise product of x and y

+Available since 2.30 + +

+

complex_row_vector operator.*(complex_row_vector x, complex_row_vector y)
The elementwise product of x and y

+Available since 2.30 + +

+

complex_matrix operator.*(complex_matrix x, complex_matrix y)
The elementwise product of x and y

+Available since 2.30 + +

+

complex_vector operator./(complex_vector x, complex_vector y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_vector operator./(complex x, complex_vector y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_vector operator./(complex_vector x, complex y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_row_vector operator./(complex_row_vector x, complex_row_vector y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_row_vector operator./(complex x, complex_row_vector y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_row_vector operator./(complex_row_vector x, complex y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_matrix operator./(complex_matrix x, complex_matrix y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_matrix operator./(complex x, complex_matrix y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

complex_matrix operator./(complex_matrix x, complex y)
The elementwise quotient of x and y

+Available since 2.30 + +

+

vector operator.^(complex_vector x, complex_vector y)
The elementwise power of y and x

+Available since 2.30 + +

+

vector operator.^(complex_vector x, complex y)
The elementwise power of y and x

+Available since 2.30 + +

+

vector operator.^(complex x, complex_vector y)
The elementwise power of y and x

+Available since 2.30 + +

+

row_vector operator.^(complex_row_vector x, complex_row_vector y)
The elementwise power of y and x

+Available since 2.30 + +

+

row_vector operator.^(complex_row_vector x, complex y)
The elementwise power of y and x

+Available since 2.30 + +

+

row_vector operator.^(complex x, complex_row_vector y)
The elementwise power of y and x

+Available since 2.30 + +

+

matrix operator.^( complex_matrix x, complex_matrix y)
The elementwise power of y and x

+Available since 2.30 + +

+

matrix operator.^( complex_matrix x, complex y)
The elementwise power of y and x

+Available since 2.30 + +

+

matrix operator.^(complex x, complex_matrix y)
The elementwise power of y and x

+Available since 2.30 +
+
+

Dot products and specialized products for complex matrices

+ +

+

complex dot_product(complex_vector x, complex_vector y)
The dot product of x and y

+Available since 2.30 + +

+

complex dot_product(complex_vector x, complex_row_vector y)
The dot product of x and y

+Available since 2.30 + +

+

complex dot_product(complex_row_vector x, complex_vector y)
The dot product of x and y

+Available since 2.30 + +

+

complex dot_product(complex_row_vector x, complex_row_vector y)
The dot product of x and y

+Available since 2.30 + +

+

complex_row_vector columns_dot_product(complex_vector x, complex_vector y)
The dot product of the columns of x and y

+Available since 2.30 + +

+

complex_row_vector columns_dot_product(complex_row_vector x, complex_row_vector y)
The dot product of the columns of x and y

+Available since 2.30 + +

+

complex_row_vector columns_dot_product(complex_matrix x, complex_matrix y)
The dot product of the columns of x and y

+Available since 2.30 + +

+

complex_vector rows_dot_product(complex_vector x, complex_vector y)
The dot product of the rows of x and y

+Available since 2.30 + +

+

complex_vector rows_dot_product(complex_row_vector x, complex_row_vector y)
The dot product of the rows of x and y

+Available since 2.30 + +

+

complex_vector rows_dot_product(complex_matrix x, complex_matrix y)
The dot product of the rows of x and y

+Available since 2.30 + +

+

complex dot_self(complex_vector x)
The dot product of the vector x with itself

+Available since 2.30 + +

+

complex dot_self(complex_row_vector x)
The dot product of the row vector x with itself

+Available since 2.30 + +

+

complex_row_vector columns_dot_self(complex_vector x)
The dot product of the columns of x with themselves

+Available since 2.30 + +

+

complex_row_vector columns_dot_self(complex_row_vector x)
The dot product of the columns of x with themselves

+Available since 2.30 + +

+

complex_row_vector columns_dot_self(complex_matrix x)
The dot product of the columns of x with themselves

+Available since 2.30 + +

+

complex_vector rows_dot_self(complex_vector x)
The dot product of the rows of x with themselves

+Available since 2.30 + +

+

complex_vector rows_dot_self(complex_row_vector x)
The dot product of the rows of x with themselves

+Available since 2.30 + +

+

complex_vector rows_dot_self(complex_matrix x)
The dot product of the rows of x with themselves

+Available since 2.30 +
+

Specialized products

+ +

+

complex_matrix diag_pre_multiply(complex_vector v, complex_matrix m)
Return the product of the diagonal matrix formed from the vector v and the matrix m, i.e., diag_matrix(v) * m.

+Available since 2.30 + +

+

complex_matrix diag_pre_multiply(complex_row_vector v, complex_matrix m)
Return the product of the diagonal matrix formed from the vector rv and the matrix m, i.e., diag_matrix(rv) * m.

+Available since 2.30 + +

+

complex_matrix diag_post_multiply(complex_matrix m, complex_vector v)
Return the product of the matrix m and the diagonal matrix formed from the vector v, i.e., m * diag_matrix(v).

+Available since 2.30 + +

+

complex_matrix diag_post_multiply(complex_matrix m, complex_row_vector v)
Return the product of the matrix m and the diagonal matrix formed from the the row vector rv, i.e., m * diag_matrix(rv).

+Available since 2.30 +
+
+
+

Complex reductions

+
+

Sums and products

+ +

+

complex sum(complex_vector x)
The sum of the values in x, or 0 if x is empty

+Available since 2.30 + +

+

complex sum(complex_row_vector x)
The sum of the values in x, or 0 if x is empty

+Available since 2.30 + +

+

complex sum(complex_matrix x)
The sum of the values in x, or 0 if x is empty

+Available since 2.30 + +

+

complex prod(complex_vector x)
The product of the values in x, or 1 if x is empty

+Available since 2.30 + +

+

complex prod(complex_row_vector x)
The product of the values in x, or 1 if x is empty

+Available since 2.30 + +

+

complex prod(complex_matrix x)
The product of the values in x, or 1 if x is empty

+Available since 2.30 +
+
+
+

Vectorized accessor functions

+

Much like with complex scalars, two functions are defined to get the real and imaginary components of complex-valued objects.

+
+

Type “demotion”

+

These functions return the same shape (e.g., matrix, vector, row vector, or array) object as their input, but demoted to a real type. For example, get_real(complex_matrix M) yields a matrix containing the real component of each value in M.

+

The following table contains examples of what this notation can mean:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Type TType T_demoted
complexreal
complex_vectorvector
complex_row_vectorrow_vector
complex_matrixmatrix
array[] complexarray[] real
array[,,] complexarray[,,] real
+
+
+

Real and imaginary component accessor functions

+ +

+

T_demoted get_real(T x)
Given an object of complex type T, return the same shape object but of type real by getting the real component of each element of x.

+Available since 2.30 + +

+

T_demoted get_imag(T x)
Given an object of complex type T, return the same shape object but of type real by getting the imaginary component of each element of x.

+Available since 2.30 +

For example, given the Stan declaration

+
  complex_vector[2] z = [3+4i, 5+6i]';
+

A call get_real(z) will yield the vector [3, 5]', and a call get_imag(z) will yield the vector [4, 6]'.

+
+
+
+

Complex broadcast functions

+

The following broadcast functions allow vectors, row vectors and matrices to be created by copying a single element into all of their cells. Matrices may also be created by stacking copies of row vectors vertically or stacking copies of column vectors horizontally.

+ +

+

complex_vector rep_vector(complex z, int m)
Return the size m (column) vector consisting of copies of z.

+Available since 2.30 + +

+

complex_row_vector rep_row_vector(complex z, int n)
Return the size n row vector consisting of copies of z.

+Available since 2.30 + +

+

complex_matrix rep_matrix(complex z, int m, int n)
Return the m by n matrix consisting of copies of z.

+Available since 2.30 + +

+

complex_matrix rep_matrix(complex_vector v, int n)
Return the m by n matrix consisting of n copies of the (column) vector v of size m.

+Available since 2.30 + +

+

complex_matrix rep_matrix(complex_row_vector rv, int m)
Return the m by n matrix consisting of m copies of the row vector rv of size n.

+Available since 2.30 +
+

Symmetrization

+ +

+

complex_matrix symmetrize_from_lower_tri(complex_matrix A)
Construct a symmetric matrix from the lower triangle of A.

+Available since 2.30 +
+
+
+

Diagonal complex matrix functions

+ +

+

complex_matrix add_diag(complex_matrix m, complex_row_vector d)
Add row_vector d to the diagonal of matrix m.

+Available since 2.30 + +

+

complex_matrix add_diag(complex_matrix m, complex_vector d)
Add vector d to the diagonal of matrix m.

+Available since 2.30 + +

+

complex_matrix add_diag(complex_matrix m, complex d)
Add scalar d to every diagonal element of matrix m.

+Available since 2.30 + +

+

complex_vector diagonal(complex_matrix x)
The diagonal of the matrix x

+Available since 2.30 + +

+

complex_matrix diag_matrix(complex_vector x)
The diagonal matrix with diagonal x

+Available since 2.30 +
+
+

Slicing and blocking functions for complex matrices

+

Stan provides several functions for generating slices or blocks or diagonal entries for matrices.

+
+

Columns and rows

+ +

+

complex_vector col(complex_matrix x, int n)
The n-th column of matrix x

+Available since 2.30 + +

+

complex_row_vector row(complex_matrix x, int m)
The m-th row of matrix x

+Available since 2.30 +
+
+

Block operations

+
+

Matrix slicing operations

+ +

+

complex_matrix block(complex_matrix x, int i, int j, int n_rows, int n_cols)
Return the submatrix of x that starts at row i and column j and extends n_rows rows and n_cols columns.

+Available since 2.30 + +

+

complex_vector sub_col(complex_matrix x, int i, int j, int n_rows)
Return the sub-column of x that starts at row i and column j and extends n_rows rows and 1 column.

+Available since 2.30 + +

+

complex_row_vector sub_row(complex_matrix x, int i, int j, int n_cols)
Return the sub-row of x that starts at row i and column j and extends 1 row and n_cols columns.

+Available since 2.30 +
+
+

Vector slicing operations.

+ +

+

complex_vector head(complex_vector v, int n)
Return the vector consisting of the first n elements of v.

+Available since 2.30 + +

+

complex_row_vector head(complex_row_vector rv, int n)
Return the row vector consisting of the first n elements of rv.

+Available since 2.30 + +

+

complex_vector tail(complex_vector v, int n)
Return the vector consisting of the last n elements of v.

+Available since 2.30 + +

+

complex_row_vector tail(complex_row_vector rv, int n)
Return the row vector consisting of the last n elements of rv.

+Available since 2.30 + +

+

complex_vector segment(complex_vector v, int i, int n)
Return the vector consisting of the n elements of v starting at i; i.e., elements i through through i + n - 1.

+Available since 2.30 + +

+

complex_row_vector segment(complex_row_vector rv, int i, int n)
Return the row vector consisting of the n elements of rv starting at i; i.e., elements i through through i + n - 1.

+Available since 2.30 +
+
+
+
+

Complex matrix concatenation

+
+

Horizontal concatenation

+ +

+

complex_matrix append_col(complex_matrix x, complex_matrix y)
Combine matrices x and y by column. The matrices must have the same number of rows.

+Available since 2.30 + +

+

complex_matrix append_col(complex_matrix x, complex_vector y)
Combine matrix x and vector y by column. The matrix and the vector must have the same number of rows.

+Available since 2.30 + +

+

complex_matrix append_col(complex_vector x, complex_matrix y)
Combine vector x and matrix y by column. The vector and the matrix must have the same number of rows.

+Available since 2.30 + +

+

complex_matrix append_col(complex_vector x, complex_vector y)
Combine vectors x and y by column. The vectors must have the same number of rows.

+Available since 2.30 + +

+

complex_row_vector append_col(complex_row_vector x, complex_row_vector y)
Combine row vectors x and y (of any size) into another row vector by appending y to the end of x.

+Available since 2.30 + +

+

complex_row_vector append_col(complex x, complex_row_vector y)
Append x to the front of y, returning another row vector.

+Available since 2.30 + +

+

complex_row_vector append_col(complex_row_vector x, complex y)
Append y to the end of x, returning another row vector.

+Available since 2.30 +
+
+

Vertical concatenation

+ +

+

complex_matrix append_row(complex_matrix x, complex_matrix y)
Combine matrices x and y by row. The matrices must have the same number of columns.

+Available since 2.30 + +

+

complex_matrix append_row(complex_matrix x, complex_row_vector y)
Combine matrix x and row vector y by row. The matrix and the row vector must have the same number of columns.

+Available since 2.30 + +

+

complex_matrix append_row(complex_row_vector x, complex_matrix y)
Combine row vector x and matrix y by row. The row vector and the matrix must have the same number of columns.

+Available since 2.30 + +

+

complex_matrix append_row(complex_row_vector x, complex_row_vector y)
Combine row vectors x and y by row. The row vectors must have the same number of columns.

+Available since 2.30 + +

+

complex_vector append_row(complex_vector x, complex_vector y)
Concatenate vectors x and y of any size into another vector.

+Available since 2.30 + +

+

complex_vector append_row(complex x, complex_vector y)
Append x to the top of y, returning another vector.

+Available since 2.30 + +

+

complex_vector append_row(complex_vector x, complex y)
Append y to the bottom of x, returning another vector.

+Available since 2.30 +
+
+
+

Complex special matrix functions

+
+

Fast Fourier transforms

+

Stan’s fast Fourier transform functions take the standard definition of the discrete Fourier transform (see the definitions below for specifics) and scale the inverse transform by one over dimensionality so that the following identities hold for complex vectors u and v,

+
    fft(inv_fft(u)) == u        inv_fft(fft(v)) == v
+

and in the 2-dimensional case for complex matrices A and B,

+
    fft2(inv_fft2(A)) == A      inv_fft2(fft2(B)) == B
+

Although the FFT functions only accept complex inputs, real vectors and matrices will be promoted to their complex counterparts before applying the FFT functions.

+ +

+

complex_vector fft(complex_vector v)
Return the discrete Fourier transform of the specified complex vector v. If \(v \in \mathbb{C}^N\) is a complex vector with \(N\) elements and \(u = +\textrm{fft}(v)\), then \[\begin{equation*} +u_n = \sum_{m < n} +v_m \cdot +\exp\left(\frac{-n \cdot m \cdot 2 \cdot \pi \cdot \sqrt{-1}}{N}\right). +\end{equation*}\]

+Available since 2.30 + +

+

complex_matrix fft2(complex_matrix m)
Return the 2D discrete Fourier transform of the specified complex matrix m. The 2D FFT is defined as the result of applying the FFT to each row and then to each column.

+Available since 2.30 + +

+

complex_vector inv_fft(complex_vector u)
Return the inverse of the discrete Fourier transform of the specified complex vector u. The inverse FFT (this function) is scaled so that fft(inv_fft(u)) == u. If \(u \in \mathbb{C}^N\) is a complex vector with \(N\) elements and \(v = \textrm{fft}^{-1}(u)\), then \[\begin{equation*} +v_n = \frac{1}{N} \sum_{m < n} +u_m \cdot +\exp\left(\frac{n \cdot m \cdot 2 \cdot \pi \cdot \sqrt{-1}}{N}\right). +\end{equation*}\] This only differs from the FFT by the sign inside the exponential and the scaling. The \(\frac{1}{N}\) scaling ensures that fft(inv_fft(u)) == u and inv_fft(fft(v)) == v for complex vectors u and v.

+Available since 2.30 + +

+

complex_matrix inv_fft2(complex_matrix m)
Return the inverse of the 2D discrete Fourier transform of the specified complex matrix m. The 2D inverse FFT is defined as the result of applying the inverse FFT to each row and then to each column. The invertible scaling of the inverse FFT ensures fft2(inv_fft2(A)) == A and inv_fft2(fft2(B)) == B.

+Available since 2.30 +
+
+

Cumulative sums

+

The cumulative sum of a sequence \(x_1,\ldots,x_N\) is the sequence \(y_1,\ldots,y_N\), where \[\begin{equation*} y_n = \sum_{m = 1}^{n} x_m. \end{equation*}\]

+ +

+

array[] complex cumulative_sum(array[] complex x)
The cumulative sum of x

+Available since 2.30 + +

+

complex_vector cumulative_sum(complex_vector v)
The cumulative sum of v

+Available since 2.30 + +

+

complex_row_vector cumulative_sum(complex_row_vector rv)
The cumulative sum of rv

+Available since 2.30 +
+
+
+

Complex linear algebra functions

+
+

Complex matrix division operators and functions

+

In general, it is much more efficient and also more arithmetically stable to use matrix division than to multiply by an inverse.

+
+

Complex matrix division operators

+ +

+

complex_row_vector operator/(complex_row_vector b, complex_matrix A)
The right division of b by A; equivalently b * inverse(A)

+Available since 2.30 + +

+

complex_matrix operator/(complex_matrix B, complex_matrix A)
The right division of B by A; equivalently B * inverse(A)

+Available since 2.30 +
+
+
+

Linear algebra functions

+
+

Trace

+ +

+

complex trace(complex_matrix A)
The trace of A, or 0 if A is empty; A is not required to be diagonal

+Available since 2.30 +
+
+

Eigendecomposition

+ +

+

complex_vector eigenvalues(complex_matrix A)
The complex-valued vector of eigenvalues of the matrix A. The eigenvalues are repeated according to their algebraic multiplicity, so there are as many eigenvalues as rows in the matrix. The eigenvalues are not sorted in any particular order.

+Available since 2.32 + +

+

complex_matrix eigenvectors(complex_matrix A)
The matrix with the complex-valued (column) eigenvectors of the matrix A in the same order as returned by the function eigenvalues

+Available since 2.32 + +

+

tuple(complex_matrix, complex_vector) eigendecompose(complex_matrix A)
Return the matrix of (column) eigenvectors and vector of eigenvalues of the matrix A. This function is equivalent to (eigenvectors(A), eigenvalues(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 + +

+

complex_vector eigenvalues_sym(complex_matrix A)
The vector of eigenvalues of a symmetric matrix A in ascending order

+Available since 2.30 + +

+

complex_matrix eigenvectors_sym(complex_matrix A)
The matrix with the (column) eigenvectors of symmetric matrix A in the same order as returned by the function eigenvalues_sym

+Available since 2.30 + +

+

tuple(complex_matrix, complex_vector) eigendecompose_sym(complex_matrix A)
Return the matrix of (column) eigenvectors and vector of eigenvalues of the symmetric matrix A. This function is equivalent to (eigenvectors_sym(A), eigenvalues_sym(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 +

Because multiplying an eigenvector by \(-1\) results in an eigenvector, eigenvectors returned by a decomposition are only identified up to a sign change. In order to compare the eigenvectors produced by Stan’s eigendecomposition to others, signs may need to be normalized in some way, such as by fixing the sign of a component, or doing comparisons allowing a multiplication by \(-1\).

+

The condition number of a symmetric matrix is defined to be the ratio of the largest eigenvalue to the smallest eigenvalue. Large condition numbers lead to difficulty in numerical algorithms such as computing inverses, and thus known as “ill conditioned.” The ratio can even be infinite in the case of singular matrices (i.e., those with eigenvalues of 0).

+
+
+

Singular value decomposition

+

The matrix A can be decomposed into a diagonal matrix of singular values, D, and matrices of its left and right singular vectors, U and V, \[\begin{equation*} A = U D V^T. \end{equation*}\] The matrices of singular vectors here are thin. That is for an \(N\) by \(P\) input A, \(M = min(N, P)\), U is size \(N\) by \(M\) and V is size \(P\) by \(M\).

+ +

+

vector singular_values(complex_matrix A)
The singular values of A in descending order

+Available since 2.30 + +

+

complex_matrix svd_U(complex_matrix A)
The left-singular vectors of A

+Available since 2.30 + +

+

complex_matrix svd_V(complex_matrix A)
The right-singular vectors of A

+Available since 2.30 + +

+

tuple(complex_matrix, vector, complex_matrix) svd(complex_matrix A)
Returns a tuple containing the left-singular vectors of A, the singular values of A in descending order, and the right-singular values of A. This function is equivalent to (svd_U(A), singular_values(A), svd_V(A)) but with a lower computational cost due to the shared work between the different components.

+Available since 2.33 +
+
+

Complex Schur Decomposition

+

The complex Schur decomposition of a square matrix \(A\) produces a complex unitary matrix \(U\) and a complex upper-triangular Schur form matrix \(T\) such that \[A = U \cdot T \cdot U^{-1}\]

+

Since \(U\) is unitary, its inverse is also its conjugate transpose, \(U^{-1} = U^*\), \(U^*(i, j) = \mathrm{conj}(U(j, i))\)

+ +

+

complex_matrix complex_schur_decompose_t(matrix A)
Compute the upper-triangular Schur form matrix of the complex Schur decomposition of A.

+Available since 2.31 + +

+

complex_matrix complex_schur_decompose_t(complex_matrix A)
Compute the upper-triangular Schur form matrix of the complex Schur decomposition of A.

+Available since 2.31 + +

+

complex_matrix complex_schur_decompose_u(matrix A)
Compute the unitary matrix of the complex Schur decomposition of A.

+Available since 2.31 + +

+

complex_matrix complex_schur_decompose_u(complex_matrix A)
Compute the unitary matrix of the complex Schur decomposition of A.

+Available since 2.31 + +

+

tuple(complex_matrix, complex_matrix) complex_schur_decompose(matrix A)
Returns the unitary matrix and the upper-triangular Schur form matrix of the complex Schur decomposition of A. This function is equivalent to (complex_schur_decompose_u(A), complex_schur_decompose_t(A)) but with a lower computational cost due to the shared work between the two results. This overload is equivalent to complex_schur_decompose(to_complex(A,0)) but is more efficient.

+Available since 2.33 + +

+

tuple(complex_matrix, complex_matrix) complex_schur_decompose(complex_matrix A)
Returns the unitary matrix and the upper-triangular Schur form matrix of the complex Schur decomposition of A. This function is equivalent to (complex_schur_decompose_u(A), complex_schur_decompose_t(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 +
+
+
+
+

Reverse functions for complex matrices

+ +

+

complex_vector reverse(complex_vector v)
Return a new vector containing the elements of the argument in reverse order.

+Available since 2.30 + +

+

complex_row_vector reverse(complex_row_vector v)
Return a new row vector containing the elements of the argument in reverse order.

+Available since 2.30 + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/compound_arithmetic_and_assignment.html b/docs/2_39/functions-reference/compound_arithmetic_and_assignment.html new file mode 100644 index 000000000..1b6ce2cfe --- /dev/null +++ b/docs/2_39/functions-reference/compound_arithmetic_and_assignment.html @@ -0,0 +1,1177 @@ + + + + + + + + + +Compound Arithmetic and Assignment + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Compound Arithmetic and Assignment

+

Compound arithmetic and assignment statements combine an arithmetic operation and assignment, replacing a statement such as

+
 x = x op y;
+

with the more compact compound form

+
 x op= y;
+

For example, x = x + 1; may be replaced with x += 1;. This works for all types that support arithmetic, including the scalar types int, real, complex, the real matrix types vector, row_vector, and matrix, and the complex matrix types, complex_vector, complex_row_vector, and complex_matrix.

+
+

Compound addition and assignment

+

Compound addition and assignment works wherever the corresponding addition and assignment would be well formed.

+ +

+

void operator+=(T x, U y)
x += y is equivalent to x = x + y. Defined for all types T and U where T = T + U is well formed.

+Available since 2.17, complex signatures added in 2.30 +
+
+

Compound subtraction and assignment

+

Compound addition and assignment works wherever the corresponding subtraction and assignment would be well formed.

+ +

+

void operator-=(T x, U y)
x -= y is equivalent to x = x - y. Defined for all types T and U where T = T - U is well formed.

+Available since 2.17, complex signatures added in 2.30 +
+
+

Compound multiplication and assignment

+

Compound multiplication and assignment works wherever the corresponding multiplication and assignment would be well formed.

+ +

+

void operator*=(T x, U y)
x *= y is equivalent to x = x * y. Defined for all types T and U where T = T * U is well formed.

+Available since 2.17, complex signatures added in 2.30 +
+
+

Compound division and assignment

+

Compound division and assignment works wherever the corresponding division and assignment would be well formed.

+ +

+

void operator/=(T x, U y)
x /= y is equivalent to x = x / y. Defined for all types T and U where T = T / U is well formed.

+Available since 2.17, complex signatures added in 2.30 +
+
+

Compound elementwise multiplication and assignment

+

Compound elementwise multiplication and assignment works wherever the corresponding multiplication and assignment would be well formed.

+ +

+

void operator.*=(T x, U y)
x .*= y is equivalent to x = x .* y. Defined for all types T and U where T = T .* U is well formed.

+Available since 2.17, complex signatures added in 2.30 +
+
+

Compound elementwise division and assignment

+

Compound elementwise division and assignment works wherever the corresponding division and assignment would be well formed.

+ +

+

void operator./=(T x, U y)
x ./= y is equivalent to x = x ./ y. Defined for all types T and U where T = T ./ U is well formed.

+Available since 2.17, complex signatures added in 2.30 + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/continuous_distributions_on_0_1.html b/docs/2_39/functions-reference/continuous_distributions_on_0_1.html new file mode 100644 index 000000000..df8c1baeb --- /dev/null +++ b/docs/2_39/functions-reference/continuous_distributions_on_0_1.html @@ -0,0 +1,1248 @@ + + + + + + + + + +Continuous Distributions on [0, 1] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Continuous Distributions on [0, 1]

+

The continuous distributions with outcomes in the interval \([0,1]\) are used to characterized bounded quantities, including probabilities.

+
+

Beta distribution

+
+

Probability density function

+

If \(\alpha \in \mathbb{R}^+\) and \(\beta \in \mathbb{R}^+\), then for \(\theta \in (0,1)\), \[\begin{equation*} \text{Beta}(\theta|\alpha,\beta) = +\frac{1}{\mathrm{B}(\alpha,\beta)} \, \theta^{\alpha - 1} \, (1 - +\theta)^{\beta - 1} , \end{equation*}\] where the beta function \(\mathrm{B}()\) is as defined in section combinatorial functions.

+

Warning: If \(\theta = 0\) or \(\theta = 1\), then the probability is 0 and the log probability is \(-\infty\). Similarly, the distribution requires strictly positive parameters, \(\alpha, \beta > +0\).

+
+
+

Distribution statement

+

theta ~ beta(alpha, beta)

+

Increment target log probability density with beta_lupdf(theta | alpha, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real beta_lpdf(reals theta | reals alpha, reals beta)
The log of the beta density of theta in \([0,1]\) given positive prior successes (plus one) alpha and prior failures (plus one) beta

+Available since 2.12 + +

+

real beta_lupdf(reals theta | reals alpha, reals beta)
The log of the beta density of theta in \([0,1]\) given positive prior successes (plus one) alpha and prior failures (plus one) beta dropping constant additive terms

+Available since 2.25 + +

+

real beta_cdf(reals theta | reals alpha, reals beta)
The beta cumulative distribution function of theta in \([0,1]\) given positive prior successes (plus one) alpha and prior failures (plus one) beta

+Available since 2.0 + +

+

real beta_lcdf(reals theta | reals alpha, reals beta)
The log of the beta cumulative distribution function of theta in \([0,1]\) given positive prior successes (plus one) alpha and prior failures (plus one) beta

+Available since 2.12 + +

+

real beta_lccdf(reals theta | reals alpha, reals beta)
The log of the beta complementary cumulative distribution function of theta in \([0,1]\) given positive prior successes (plus one) alpha and prior failures (plus one) beta

+Available since 2.12 + +

+

R beta_rng(reals alpha, reals beta)
Generate a beta variate with positive prior successes (plus one) alpha and prior failures (plus one) beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Beta proportion distribution

+
+

Probability density function

+

If \(\mu \in (0, 1)\) and \(\kappa \in \mathbb{R}^+\), then for \(\theta +\in (0,1)\), \[\begin{equation*} \mathrm{Beta\_Proportion}(\theta|\mu,\kappa) = +\frac{1}{\mathrm{B}(\mu \kappa, (1 - \mu) \kappa)} \, +\theta^{\mu\kappa - 1} \, (1 - \theta)^{(1 - \mu)\kappa- 1} , \end{equation*}\] where the beta function \(\mathrm{B}()\) is as defined in section combinatorial functions.

+

Warning: If \(\theta = 0\) or \(\theta = 1\), then the probability is 0 and the log probability is \(-\infty\). Similarly, the distribution requires \(\mu \in (0, 1)\) and strictly positive parameter, \(\kappa > 0\).

+
+
+

Distribution statement

+

theta ~ beta_proportion(mu, kappa)

+

Increment target log probability density with beta_proportion_lupdf(theta | mu, kappa).

+Available since 2.19 + +

+
+
+

Stan functions

+ +

+

real beta_proportion_lpdf(reals theta | reals mu, reals kappa)
The log of the beta_proportion density of theta in \((0,1)\) given mean mu and precision kappa

+Available since 2.19 + +

+

real beta_proportion_lupdf(reals theta | reals mu, reals kappa)
The log of the beta_proportion density of theta in \((0,1)\) given mean mu and precision kappa dropping constant additive terms

+Available since 2.25 + +

+

real beta_proportion_lcdf(reals theta | reals mu, reals kappa)
The log of the beta_proportion cumulative distribution function of theta in \((0,1)\) given mean mu and precision kappa

+Available since 2.18 + +

+

real beta_proportion_lccdf(reals theta | reals mu, reals kappa)
The log of the beta_proportion complementary cumulative distribution function of theta in \((0,1)\) given mean mu and precision kappa

+Available since 2.18 + +

+

R beta_proportion_rng(reals mu, reals kappa)
Generate a beta_proportion variate with mean mu and precision kappa; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/conventions_for_probability_functions.html b/docs/2_39/functions-reference/conventions_for_probability_functions.html new file mode 100644 index 000000000..892c0238d --- /dev/null +++ b/docs/2_39/functions-reference/conventions_for_probability_functions.html @@ -0,0 +1,1379 @@ + + + + + + + + + +Conventions for Probability Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Conventions for Probability Functions

+

Functions associated with distributions are set up to follow the same naming conventions for both built-in distributions and for user-defined distributions.

+
+

Suffix marks type of function

+

The suffix is determined by the type of function according to the following table.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
functionoutcomesuffix
log probability mass functiondiscrete_lpmf
log probability density functioncontinuous_lpdf
log cumulative distribution functionany_lcdf
log complementary cumulative distribution functionany_lccdf
random number generatorany_rng
+

For example, normal_lpdf is the log of the normal probability density function (pdf) and bernoulli_lpmf is the log of the bernoulli probability mass function (pmf). The log of the corresponding cumulative distribution functions (cdf) use the same suffix, normal_lcdf and bernoulli_lcdf.

+
+
+

Argument order and the vertical bar

+

Each probability function has a specific outcome value and a number of parameters. Following conditional probability notation, probability density and mass functions use a vertical bar to separate the outcome from the parameters of the distribution. For example, normal_lpdf(y | mu, sigma) returns the value of mathematical formula \(\log \text{Normal}(y \, | \, \mu, \sigma)\). Cumulative distribution functions separate the outcome from the parameters in the same way (e.g., normal_lcdf(y_low | mu, sigma)

+
+
+

Sampling notation

+

The notation

+
 y ~ normal(mu, sigma);
+

provides the same (proportional) contribution to the model log density as the explicit target density increment,

+
 target += normal_lpdf(y | mu, sigma);
+

In both cases, the effect is to add terms to the target log density. The only difference is that the example with the sampling (~) notation drops all additive constants in the log density; the constants are not necessary for any of Stan’s sampling, approximation, or optimization algorithms.

+
+
+

Finite inputs

+

All of the distribution functions are configured to throw exceptions (effectively rejecting iterations or optimization steps) when they are supplied with non-finite arguments. The two cases of non-finite arguments are the infinite values and not-a-number value—these are standard in floating-point arithmetic.

+
+
+

Boundary conditions

+

Many distributions are defined with support or constraints on parameters forming an open interval. For example, the normal density function accepts a scale parameter \(\sigma > 0\). If \(\sigma = 0\), the probability function will throw an exception.

+

This is true even for (complementary) cumulative distribution functions, which will throw exceptions when given input that is out of the support.

+
+
+

Pseudorandom number generators

+

For most of the probability functions, there is a matching pseudorandom number generator (PRNG) with the suffix _rng. For example, the function normal_rng(real, real) accepts two real arguments, an unconstrained location \(\mu\) and positive scale \(\sigma > 0\), and returns an unconstrained pseudorandom value drawn from \(\text{Normal}(\mu,\sigma)\). There are also vectorized forms of random number generators which return more than one random variate at a time.

+
+

Restricted to transformed data and generated quantities

+

Unlike regular functions, the PRNG functions may only be used in the transformed data or generated quantities blocks.

+
+
+

Limited vectorization

+

Unlike the probability functions, only some of the PRNG functions are vectorized.

+
+
+
+

Cumulative distribution functions

+

For most of the univariate probability functions, there is a corresponding cumulative distribution function, log cumulative distribution function, and log complementary cumulative distribution function.

+

For a univariate random variable \(Y\) with probability function \(p_Y(y \, | \, \theta)\), the cumulative distribution function (CDF) \(F_Y\) is defined by \[\begin{equation*} +F_Y(y) \ = \ \text{Pr}[Y \le y] \ = \ \int_{-\infty}^y p(y\, | \, \theta) \ \text{d}y. +\end{equation*}\] The complementary cumulative distribution function (CCDF) is defined as \[\begin{equation*} +\text{Pr}[Y > y] \ = \ 1 - F_Y(y). +\end{equation*}\] The reason to use CCDFs instead of CDFs in floating-point arithmetic is that it is possible to represent numbers very close to 0 (the closest you can get is roughly \(10^{-300}\)), but not numbers very close to 1 (the closest you can get is roughly \(1 - 10^{-15}\)).

+

In Stan, there is a cumulative distribution function for each probability function. For instance, normal_cdf(y | mu, sigma) is defined by \[\begin{equation*} +\int_{-\infty}^y \text{Normal}(y \, | \, \mu, \sigma) \ \text{d}y. +\end{equation*}\] There are also log forms of the CDF and CCDF for most univariate distributions. For example, normal_lcdf(y | mu, sigma) is defined by \[\begin{equation*} +\log \left( \int_{-\infty}^y \text{Normal}(y \, | \, \mu, \sigma) \ \text{d}y \right) +\end{equation*}\] and normal_lccdf(y | mu, sigma) is defined by \[\begin{equation*} +\log \left( 1 - \int_{-\infty}^y \text{Normal}(y \, | \, \mu, \sigma) \ \text{d}y \right). +\end{equation*}\]

+
+
+

Vectorization

+

Stan’s univariate log probability functions, including the log density functions, log mass functions, log CDFs, and log CCDFs, all support vectorized function application, with results defined to be the sum of the elementwise application of the function. Some of the PRNG functions support vectorization, see section vectorized PRNG functions for more details.

+

In all cases, matrix operations are at least as fast and usually faster than loops and vectorized log probability functions are faster than their equivalent form defined with loops. This isn’t because loops are slow in Stan, but because more efficient automatic differentiation can be used. The efficiency comes from the fact that a vectorized log probability function only introduces one new node into the expression graph, thus reducing the number of virtual function calls required to compute gradients in C++, as well as from allowing caching of repeated computations.

+

Stan also overloads the multivariate normal distribution, including the Cholesky-factor form, allowing arrays of row vectors or vectors for the variate and location parameter. This is a huge savings in speed because the work required to solve the linear system for the covariance matrix is only done once.

+

Stan also overloads some scalar functions, such as log and exp, to apply to vectors (arrays) and return vectors (arrays). These vectorizations are defined elementwise and unlike the probability functions, provide only minimal efficiency speedups over repeated application and assignment in a loop.

+
+

Vectorized function signatures

+
+

Vectorized scalar arguments

+

The normal probability function is specified with the signature

+
 normal_lpdf(reals | reals, reals);
+

The pseudotype reals is used to indicate that an argument position may be vectorized. Argument positions declared as reals may be filled with a real, a one-dimensional array, a vector, or a row-vector. If there is more than one array or vector argument, their types can be anything but their size must match. For instance, it is legal to use normal_lpdf(row_vector | vector, real) as long as the vector and row vector have the same size.

+
+
+

Vectorized vector and row vector arguments

+

The multivariate normal distribution accepting vector or array of vector arguments is written as

+
 multi_normal_lpdf(vectors | vectors, matrix);
+

These arguments may be row vectors, column vectors, or arrays of row vectors or column vectors.

+
+
+

Vectorized integer arguments

+

The pseudotype ints is used for vectorized integer arguments. Where it appears either an integer or array of integers may be used.

+
+
+
+

Evaluating vectorized log probability functions

+

The result of a vectorized log probability function is equivalent to the sum of the evaluations on each element. Any non-vector argument, namely real or int, is repeated. For instance, if y is a vector of size N, mu is a vector of size N, and sigma is a scalar, then

+
 ll = normal_lpdf(y | mu, sigma);
+

is just a more efficient way to write

+
 ll = 0;
+ for (n in 1:N) {
+   ll = ll + normal_lpdf(y[n] | mu[n], sigma);
+ }
+

With the same arguments, the vectorized sampling statement

+
 y ~ normal(mu, sigma);
+

has the same effect on the total log probability as

+
 for (n in 1:N) {
+   y[n] ~ normal(mu[n], sigma);
+ }
+
+
+

Evaluating vectorized PRNG functions

+

Some PRNG functions accept sequences as well as scalars as arguments. Such functions are indicated by argument pseudotypes reals or ints. In cases of sequence arguments, the output will also be a sequence. For example, the following is allowed in the transformed data and generated quantities blocks.

+
 vector[3] mu = // ...
+ array[3] real x = normal_rng(mu, 3);
+
+

Argument types

+

In the case of PRNG functions, arguments marked ints may be integers or integer arrays, whereas arguments marked reals may be integers or reals, integer or real arrays, vectors, or row vectors.

+ ++++ + + + + + + + + + + + + + + + + +
pseudotypeallowable PRNG arguments
intsint, array[] int
realsint, array[] int, real, array[] real, vector, row_vector
+
+
+

Dimension matching

+

In general, if there are multiple non-scalar arguments, they must all have the same dimensions, but need not have the same type. For example, the normal_rng function may be called with one vector argument and one real array argument as long as they have the same number of elements.

+
 vector[3] mu = // ...
+ array[3] real sigma = // ...
+ array[3] real x = normal_rng(mu, sigma);
+
+
+

Return type

+

The result of a vectorized PRNG function depends on the size of the arguments and the distribution’s support. If all arguments are scalars, then the return type is a scalar. For a continuous distribution, if there are any non-scalar arguments, the return type is a real array (array[] real) matching the size of any of the non-scalar arguments, as all non-scalar arguments must have matching size. Discrete distributions return ints and continuous distributions return reals, each of appropriate size. The symbol R denotes such a return type.

+ + +
+
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/correlation_matrix_distributions.html b/docs/2_39/functions-reference/correlation_matrix_distributions.html new file mode 100644 index 000000000..2d9c7541a --- /dev/null +++ b/docs/2_39/functions-reference/correlation_matrix_distributions.html @@ -0,0 +1,1308 @@ + + + + + + + + + +Correlation Matrix Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Correlation Matrix Distributions

+

The correlation matrix distributions have support on the (Cholesky factors of) correlation matrices. A Cholesky factor \(L\) for a \(K +\times K\) correlation matrix \(\Sigma\) of dimension \(K\) has rows of unit length so that the diagonal of \(L L^{\top}\) is the unit \(K\)-vector. Even though models are usually conceptualized in terms of correlation matrices, it is better to operationalize them in terms of their Cholesky factors. If you are interested in the posterior distribution of the correlations, you can recover them in the generated quantities block via

+
 generated quantities {
+   corr_matrix[K] Sigma;
+   Sigma = multiply_lower_tri_self_transpose(L);
+ }
+
+

LKJ correlation distribution

+
+

Probability density function

+

For \(\eta > 0\), if \(\Sigma\) a positive-definite, symmetric matrix with unit diagonal (i.e., a correlation matrix), then \[\begin{equation*} +\text{LkjCorr}(\Sigma|\eta) \propto \det \left( \Sigma \right)^{(\eta +- 1)}. \end{equation*}\] The expectation is the identity matrix for any positive value of the shape parameter \(\eta\), which can be interpreted like the shape parameter of a symmetric beta distribution:

+
    +
  • if \(\eta = 1\), then the density is uniform over correlation matrices of order \(K\);

  • +
  • if \(\eta > 1\), the identity matrix is the modal correlation matrix, with a sharper peak in the density at the identity matrix for larger \(\eta\); and

  • +
  • for \(0 < \eta < 1\), the density has a trough at the identity matrix.

  • +
  • if \(\eta\) were an unknown parameter, the Jeffreys prior is proportional to \(\sqrt{2\sum_{k=1}^{K-1}\left( +\psi_1\left(\eta+\frac{K-k-1}{2}\right) - 2\psi_1\left(2\eta+K-k-1 +\right)\right)}\), where \(\psi_1()\) is the trigamma function

  • +
+

See (Lewandowski, Kurowicka, and Joe 2009) for definitions. However, it is much better computationally to work directly with the Cholesky factor of \(\Sigma\), so this distribution should never be explicitly used in practice.

+
+
+

Distribution statement

+

y ~ lkj_corr(eta)

+

Increment target log probability density with lkj_corr_lupdf(y | eta).

+Available since 2.3 + +

+
+
+

Stan functions

+ +

+

real lkj_corr_lpdf(matrix y | real eta)
The log of the LKJ density for the correlation matrix y given nonnegative shape eta. lkj_corr_cholesky_lpdf is faster, more numerically stable, uses less memory, and should be preferred to this.

+Available since 2.12 + +

+

real lkj_corr_lupdf(matrix y | real eta)
The log of the LKJ density for the correlation matrix y given nonnegative shape eta dropping constant additive terms. lkj_corr_cholesky_lupdf is faster, more numerically stable, uses less memory, and should be preferred to this.

+Available since 2.25 + +

+

matrix lkj_corr_rng(int K, real eta)
Generate a LKJ random correlation matrix of order K with shape eta; may only be used in transformed data and generated quantities blocks

+Available since 2.0 +
+
+
+

Cholesky LKJ correlation distribution

+

Stan provides an implicit parameterization of the LKJ correlation matrix density in terms of its Cholesky factor, which you should use rather than the explicit parameterization in the previous section. For example, if L is a Cholesky factor of a correlation matrix, then

+
 L ~ lkj_corr_cholesky(2.0); # implies L * L' ~ lkj_corr(2.0);
+

Because Stan requires models to have support on all valid constrained parameters, L will almost always1 be a parameter declared with the type of a Cholesky factor for a correlation matrix; for example,

+
 parameters {   cholesky_factor_corr[K] L;   # rather than corr_matrix[K] Sigma;   // ...
+
+

Probability density function

+

For \(\eta > 0\), if \(L\) is a \(K \times K\) lower-triangular Cholesky factor of a symmetric positive-definite matrix with unit diagonal (i.e., a correlation matrix), then \[\begin{equation*} \text{LkjCholesky}(L|\eta) +\propto \left|J\right|\det(L L^\top)^{(\eta - 1)} = \prod_{k=2}^K +L_{kk}^{K-k+2\eta-2}. \end{equation*}\] See the previous section for details on interpreting the shape parameter \(\eta\). Note that even if \(\eta=1\), it is still essential to evaluate the density function because the density of \(L\) is not constant, regardless of the value of \(\eta\), even though the density of \(LL^\top\) is constant iff \(\eta=1\).

+

A lower triangular \(L\) is a Cholesky factor for a correlation matrix if and only if \(L_{k,k} > 0\) for \(k \in 1{:}K\) and each row \(L_k\) has unit Euclidean length.

+
+
+

Distribution statement

+

L ~ lkj_corr_cholesky(eta)

+

Increment target log probability density with lkj_corr_cholesky_lupdf(L | eta).

+Available since 2.4 + +

+
+
+

Stan functions

+ +

+

real lkj_corr_cholesky_lpdf(matrix L | real eta)
The log of the LKJ density for the lower-triangular Cholesky factor L of a correlation matrix given shape eta

+Available since 2.12 + +

+

real lkj_corr_cholesky_lupdf(matrix L | real eta)
The log of the LKJ density for the lower-triangular Cholesky factor L of a correlation matrix given shape eta dropping constant additive terms

+Available since 2.25 + +

+

matrix lkj_corr_cholesky_rng(int K, real eta)
Generate a random Cholesky factor of a correlation matrix of order K that is distributed LKJ with shape eta; may only be used in transformed data and generated quantities blocks

+Available since 2.4 + + + +
+
+
+ + + Back to top

References

+
+Lewandowski, Daniel, Dorota Kurowicka, and Harry Joe. 2009. “Generating Random Correlation Matrices Based on Vines and Extended Onion Method.” Journal of Multivariate Analysis 100: 1989–2001. +
+

Footnotes

+ +
    +
  1. It is possible to build up a valid L within Stan, but that would then require Jacobian adjustments to imply the intended posterior.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/covariance_matrix_distributions.html b/docs/2_39/functions-reference/covariance_matrix_distributions.html new file mode 100644 index 000000000..26470d87e --- /dev/null +++ b/docs/2_39/functions-reference/covariance_matrix_distributions.html @@ -0,0 +1,1312 @@ + + + + + + + + + +Covariance Matrix Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Covariance Matrix Distributions

+

The covariance matrix distributions have support on symmetric, positive-definite \(K \times K\) matrices or their Cholesky factors (square, lower triangular matrices with positive diagonal elements).

+
+

Wishart distribution

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\nu \in (K-1,\infty)\), and \(S \in +\mathbb{R}^{K \times K}\) is symmetric and positive definite, then for symmetric and positive-definite \(W \in \mathbb{R}^{K \times K}\), \[\begin{equation*} +\text{Wishart}(W \mid \nu,S) = \frac{1}{2^{\nu K / 2}} +\ \frac{1}{\Gamma_K \! \left( \frac{\nu}{2} \right)} +\ \left| S \right|^{-\nu/2} \ \left| W \right|^{(\nu - K - 1)/2} +\ \exp \! \left(- \frac{1}{2} \ \text{tr}\left( S^{-1} W \right) \right) \! , +\end{equation*}\] where \(\text{tr}()\) is the matrix trace function, and \(\Gamma_K()\) is the multivariate Gamma function, \[\begin{equation*} +\Gamma_K(x) = \frac{1}{\pi^{K(K-1)/4}} \ \prod_{k=1}^K \Gamma \left( x + \frac{1 - k}{2} \right) \!. +\end{equation*}\]

+
+
+

Distribution statement

+

W ~ wishart(nu, Sigma)

+

Increment target log probability density with wishart_lupdf(W | nu, Sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real wishart_lpdf(matrix W | real nu, matrix Sigma)
Return the log of the Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma.

+Available since 2.12 + +

+

real wishart_lupdf(matrix W | real nu, matrix Sigma)
Return the log of the Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma dropping constant additive terms.

+Available since 2.25 + +

+

matrix wishart_rng(real nu, matrix Sigma)
Generate a Wishart variate with degrees of freedom nu and symmetric and positive-definite scale matrix Sigma; may only be used in transformed data and generated quantities blocks.

+Available since 2.0 +
+
+
+

Wishart distribution, Cholesky Parameterization

+

The Cholesky parameterization of the Wishart distribution uses a Cholesky factor for both the variate and the parameter. If \(S\) and \(W\) are positive definite matrices with Cholesky factors \(L_S\) and \(L_W\) (i.e., \(S = L_S L_S^{\top}\) and \(W = L_W L_W^{\top}\)), then the Cholesky parameterization is defined so that \[\begin{equation*} +L_W \sim \textrm{WishartCholesky}(\nu, L_S) +\end{equation*}\] if and only if \[\begin{equation*} +W \sim \textrm{Wishart}(\nu, S). +\end{equation*}\]

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\nu \in (K-1, \infty)\), and \(L_S, L_W \in \mathbb{R}^{K \times K}\) are lower triangular matrixes with positive diagonal elements, then the Cholesky parameterized Wishart density is \[\begin{equation*} +\text{WishartCholesky}(L_W \mid \nu,L_S) += \text{Wishart}(L_W L_W^{\top} \mid \nu,L_S L_S^{\top}) \, \left| J_{f^{-1}} \right|, +\end{equation*}\] where \(J_{f^{-1}}\) is the Jacobian of the (inverse) transform of the variate, \(f^{-1}(L_W) = L_W L_W^{\top}\). The log absolute determinant is \[\begin{equation*} +\log \left| J_{f^{-1}} \right| += K \log(2) + \sum_{k=1}^K (K - k + 1) \log {(L_W)_{k,\, k}}. +\end{equation*}\]

+

The probability functions will raise errors if \(\nu \leq K - 1\) or if \(L_S\) and \(L_W\) are not Cholesky factors (square, lower-triangular matrices with positive diagonal elements) of the same size.

+
+
+

Stan functions

+ +

+

real wishart_cholesky_lpdf(matrix L_W | real nu, matrix L_S)
Return the log of the Wishart density for lower-triangular Cholesky factor L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S.

+Available since 2.30 + +

+

real wishart_cholesky_lupdf(matrix L_W | real nu, matrix L_S)
Return the log of the Wishart density for lower-triangular Cholesky factor of L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S dropping constant additive terms.

+Available since 2.30 + +

+

matrix wishart_cholesky_rng(real nu, matrix L_S)
Generate the Cholesky factor of a Wishart variate with degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S; may only be used in transformed data and generated quantities blocks

+Available since 2.30 +
+
+
+

Inverse Wishart distribution

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\nu \in (K-1,\infty)\), and \(S \in +\mathbb{R}^{K \times K}\) is symmetric and positive definite, then for symmetric and positive-definite \(W \in \mathbb{R}^{K \times K}\), \[\begin{equation*} +\text{InvWishart}(W \mid \nu,S) = \frac{1}{2^{\nu K / 2}} \ \frac{1}{\Gamma_K \! \left( \frac{\nu}{2} \right)} +\ \left| S \right|^{\nu/2} \ \left| W \right|^{-(\nu + K + 1)/2} +\ \exp \! \left( - \frac{1}{2} \ \text{tr}(SW^{-1}) \right) \! . +\end{equation*}\]

+
+
+

Distribution statement

+

W ~ inv_wishart(nu, Sigma)

+

Increment target log probability density with inv_wishart_lupdf(W | nu, Sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real inv_wishart_lpdf(matrix W | real nu, matrix Sigma)
Return the log of the inverse Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma.

+Available since 2.12 + +

+

real inv_wishart_lupdf(matrix W | real nu, matrix Sigma)
Return the log of the inverse Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma dropping constant additive terms.

+Available since 2.25 + +

+

matrix inv_wishart_rng(real nu, matrix Sigma)
Generate an inverse Wishart variate with degrees of freedom nu and symmetric and positive-definite scale matrix Sigma; may only be used in transformed data and generated quantities blocks.

+Available since 2.0 +
+
+
+

Inverse Wishart distribution, Cholesky Parameterization

+

The Cholesky parameterization of the inverse Wishart distribution uses a Cholesky factor for both the variate and the parameter. If \(S\) and \(W\) are positive definite matrices with Cholesky factors \(L_S\) and \(L_W\) (i.e., \(S = L_S L_S^{\top}\) and \(W = L_W L_W^{\top}\)), then the Cholesky parameterization is defined so that \[\begin{equation*} +L_W \sim \textrm{InvWishartCholesky}(\nu, L_S) +\end{equation*}\] if and only if \[\begin{equation*} +W \sim \textrm{InvWishart}(\nu, S). +\end{equation*}\]

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\nu \in (K-1, \infty)\), and \(L_S, L_W \in +\mathbb{R}^{K \times K}\) are lower triangular matrixes with positive diagonal elements, then the Cholesky parameterized inverse Wishart density is \[\begin{equation*} +\text{InvWishartCholesky}(L_W \mid \nu,L_S) = +\text{InvWishart}(L_WL_W^{\top} \mid \nu, L_S L_S^{\top}) \, \left| J_{f^{-1}} \right|, +\end{equation*}\] where \(J_{f^{-1}}\) is the Jacobian of the (inverse) transform of the variate, \(f^{-1}(L_W) = L_W L_W^{\top}\). The log absolute determinant is \[\begin{equation*} +\log \left| J_{f^{-1}} \right| += K \log(2) + \sum_{k=1}^K (K - k + 1) \log {(L_W)_{k,\, k}}. +\end{equation*}\]

+

The probability functions will raise errors if \(\nu \leq K - 1\) or if \(L_S\) and \(L_W\) are not Cholesky factors (square, lower-triangular matrices with positive diagonal elements) of the same size.

+
+
+

Stan functions

+ +

+

real inv_wishart_cholesky_lpdf(matrix L_W | real nu, matrix L_S)
Return the log of the inverse Wishart density for lower-triangular Cholesky factor L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S.

+Available since 2.30 + +

+

real inv_wishart_cholesky_lupdf(matrix L_W | real nu, matrix L_S)
Return the log of the inverse Wishart density for lower-triangular Cholesky factor of L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S dropping constant additive terms.

+Available since 2.30 + +

+

matrix inv_wishart_cholesky_rng(real nu, matrix L_S)
Generate the Cholesky factor of an inverse Wishart variate with degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S; may only be used in transformed data and generated quantities blocks.

+Available since 2.30 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/deprecated_functions.html b/docs/2_39/functions-reference/deprecated_functions.html new file mode 100644 index 000000000..1a9def5ee --- /dev/null +++ b/docs/2_39/functions-reference/deprecated_functions.html @@ -0,0 +1,1357 @@ + + + + + + + + + +Deprecated Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Deprecated Functions

+

This appendix lists currently deprecated functionality along with how to replace it.

+

Starting in Stan 2.29, deprecated functions with drop in replacements (such as the renaming of get_lp or multiply_log) will be removed 3 versions later e.g., functions deprecated in Stan 2.20 will be removed in Stan 2.23 and placed in Removed Functions. The Stan compiler can automatically update these on the behalf of the user for the entire deprecation window and at least one version following the removal.

+
+

Integer division with operator/

+

Deprecated: Using / with two integer arguments is interpreted as integer floor division, such that

+

\[ 1 / 2 = 0 \]

+

This is deprecated due to its confusion with real-valued division, where

+

\[ 1.0 / 2.0 = 0.5 \]

+

Replacement: Use the integer division operator operator%/% instead.

+
+
+

integrate_ode_rk45, integrate_ode_adams, integrate_ode_bdf ODE Integrators

+

These ODE integrator functions have been replaced by those described in Ordinary Differential Equation (ODE) Solvers.

+
+

Specifying an ordinary differential equation as a function

+

A system of ODEs is specified as an ordinary function in Stan within the functions block. The ODE system function must have this function signature:

+
array[] real ode(real time, array[] real state, array[] real theta,
+                 array[] real x_r, array[] int x_i);
+

The ODE system function should return the derivative of the state with respect to time at the time provided. The length of the returned real array must match the length of the state input into the function.

+

The arguments to this function are:

+
    +
  • time, the time to evaluate the ODE system

  • +
  • state, the state of the ODE system at the time specified

  • +
  • theta, parameter values used to evaluate the ODE system

  • +
  • x_r, data values used to evaluate the ODE system

  • +
  • x_i, integer data values used to evaluate the ODE system.

  • +
+

The ODE system function separates parameter values, theta, from data values, x_r, for efficiency in computing the gradients of the ODE.

+
+
+

Non-stiff solver

+ +

+

array[,] real integrate_ode_rk45(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i)
Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.

+Available since 2.10, deprecated in 2.24 + +

+

array[,] real integrate_ode_rk45(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps)
Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method with additional control parameters for the solver.

+Available since 2.10, deprecated in 2.24 + +

+

array[,] real integrate_ode(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i)
Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.

+Available since 2.10, deprecated in 2.24 + +

+

array[,] real integrate_ode_adams(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i)
Solves the ODE system for the times provided using the Adams-Moulton method.

+Available since 2.23, deprecated in 2.24 + +

+

array[,] real integrate_ode_adams(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i, data real rel_tol, data real abs_tol, data int max_num_steps)
Solves the ODE system for the times provided using the Adams-Moulton method with additional control parameters for the solver.

+Available since 2.23, deprecated in 2.24 +
+
+

Stiff solver

+ +

+

array[,] real integrate_ode_bdf(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i)
Solves the ODE system for the times provided using the backward differentiation formula (BDF) method.

+Available since 2.10, deprecated in 2.24 + +

+

array[,] real integrate_ode_bdf(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i, data real rel_tol, data real abs_tol, data int max_num_steps)
Solves the ODE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.

+Available since 2.10, deprecated in 2.24 +
+
+

Arguments to the ODE solvers

+

The arguments to the ODE solvers in both the stiff and non-stiff cases are as follows.

+
    +
  • ode: function literal referring to a function specifying the system of differential equations with signature:
  • +
+
(real, array[] real, array[] real, data array[] real, data array[] int):array[] real
+

The arguments represent (1) time, (2) system state, (3) parameters, (4) real data, and (5) integer data, and the return value contains the derivatives with respect to time of the state,

+
    +
  • initial_state: initial state, type array[] real,

  • +
  • initial_time: initial time, type int or real,

  • +
  • times: solution times, type array[] real,

  • +
  • theta: parameters, type array[] real,

  • +
  • data x_r: real data, type array[] real, data only, and

  • +
  • data x_i: integer data, type array[] int, data only.

  • +
+

For more fine-grained control of the ODE solvers, these parameters can also be provided:

+
    +
  • data rel_tol: relative tolerance for the ODE solver, type real, data only,

  • +
  • data abs_tol: absolute tolerance for the ODE solver, type real, data only, and

  • +
  • data max_num_steps: maximum number of steps to take in the ODE solver, type int, data only.

  • +
+
+

Return values

+

The return value for the ODE solvers is an array of type array[,] real, with values consisting of solutions at the specified times.

+
+
+

Sizes and parallel arrays

+

The sizes must match, and in particular, the following groups are of the same size:

+
    +
  • state variables passed into the system function, derivatives returned by the system function, initial state passed into the solver, and rows of the return value of the solver,

  • +
  • solution times and number of rows of the return value of the solver,

  • +
  • parameters, real data and integer data passed to the solver will be passed to the system function

  • +
+
+
+
+
+

algebra_solver, algebra_solver_newton algebraic solvers

+

These algebraic solver functions have been replaced by those described in Algebraic Equation Solvers..

+
+

Specifying an algebraic equation as a function

+

An algebraic system is specified as an ordinary function in Stan within the function block. The algebraic system function must have this signature:

+
 vector algebra_system(vector y, vector theta,
+                              data array[] real x_r, array[] int x_i)
+

The algebraic system function should return the value of the algebraic function which goes to 0, when we plug in the solution to the algebraic system.

+

The argument of this function are:

+
    +
  • y, the unknowns we wish to solve for

  • +
  • theta, parameter values used to evaluate the algebraic system

  • +
  • x_r, data values used to evaluate the algebraic system

  • +
  • x_i, integer data used to evaluate the algebraic system

  • +
+

The algebraic system function separates parameter values, theta, from data values, x_r, for efficiency in propagating the derivatives through the algebraic system.

+
+
+

Call to the algebraic solver

+

vector algebra_solver(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i)
Solves the algebraic system, given an initial guess, using the Powell hybrid algorithm.

+Available since 2.17, deprecated in 2.31 + +

+

vector algebra_solver(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps)
Solves the algebraic system, given an initial guess, using the Powell hybrid algorithm with additional control parameters for the solver.

+Available since 2.17, deprecated in 2.31 +

Note: In future releases, the function algebra_solver will be deprecated and replaced with algebra_solver_powell.

+ +

+

vector algebra_solver_newton(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i)
Solves the algebraic system, given an initial guess, using Newton’s method.

+Available since 2.24, deprecated in 2.31 + +

+

vector algebra_solver_newton(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps)
Solves the algebraic system, given an initial guess, using Newton’s method with additional control parameters for the solver.

+Available since 2.24, deprecated in 2.31 +
+

Arguments to the algebraic solver

+

The arguments to the algebraic solvers are as follows:

+
    +
  • algebra_system: function literal referring to a function specifying the system of algebraic equations with signature (vector, vector, array[] real, array[] int):vector. The arguments represent (1) unknowns, (2) parameters, (3) real data, and (4) integer data, and the return value contains the value of the algebraic function, which goes to 0 when we plug in the solution to the algebraic system,

  • +
  • y_guess: initial guess for the solution, type vector,

  • +
  • theta: parameters only, type vector,

  • +
  • x_r: real data only, type array[] real, and

  • +
  • x_i: integer data only, type array[] int.

  • +
+

For more fine-grained control of the algebraic solver, these parameters can also be provided:

+
    +
  • rel_tol: relative tolerance for the algebraic solver, type real, data only,

  • +
  • function_tol: function tolerance for the algebraic solver, type real, data only,

  • +
  • max_num_steps: maximum number of steps to take in the algebraic solver, type int, data only.

  • +
+
+
+

Return value

+

The return value for the algebraic solver is an object of type vector, with values which, when plugged in as y make the algebraic function go to 0.

+
+
+

Sizes and parallel arrays

+

Certain sizes have to be consistent. The initial guess, return value of the solver, and return value of the algebraic function must all be the same size.

+

The parameters, real data, and integer data will be passed from the solver directly to the system function.

+ + +
+
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/distributions_over_unbounded_vectors.html b/docs/2_39/functions-reference/distributions_over_unbounded_vectors.html new file mode 100644 index 000000000..aa66e4846 --- /dev/null +++ b/docs/2_39/functions-reference/distributions_over_unbounded_vectors.html @@ -0,0 +1,1624 @@ + + + + + + + + + +Distributions over Unbounded Vectors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Distributions over Unbounded Vectors

+

The unbounded vector probability distributions have support on all of \(\mathbb{R}^K\) for some fixed \(K\).

+
+

Multivariate normal distribution

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\mu \in \mathbb{R}^K\), and \(\Sigma \in +\mathbb{R}^{K \times K}\) is symmetric and positive definite, then for \(y \in \mathbb{R}^K\), \[\begin{equation*} +\text{MultiNormal}(y|\mu,\Sigma) = +\frac{1}{\left( 2 \pi \right)^{K/2}} \ \frac{1}{\sqrt{|\Sigma|}} +\ \exp \! \left( \! - \frac{1}{2} (y - \mu)^{\top} \, \Sigma^{-1} \, (y - \mu) \right) \! , +\end{equation*}\] where \(|\Sigma|\) is the absolute determinant of \(\Sigma\).

+
+
+

Distribution statement

+

y ~ multi_normal(mu, Sigma)

+

Increment target log probability density with multi_normal_lupdf(y | mu, Sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+

The multivariate normal probability function is overloaded to allow the variate vector \(y\) and location vector \(\mu\) to be vectors or row vectors (or to mix the two types). The density function is also vectorized, so it allows arrays of row vectors or vectors as arguments; see section vectorized function signatures for a description of vectorization.

+ +

+

real multi_normal_lpdf(vectors y | vectors mu, matrix Sigma)
The log of the multivariate normal density of vector(s) y given location vector(s) mu and covariance matrix Sigma

+Available since 2.12 + +

+

real multi_normal_lupdf(vectors y | vectors mu, matrix Sigma)
The log of the multivariate normal density of vector(s) y given location vector(s) mu and covariance matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_lpdf(vectors y | row_vectors mu, matrix Sigma)
The log of the multivariate normal density of vector(s) y given location row vector(s) mu and covariance matrix Sigma

+Available since 2.12 + +

+

real multi_normal_lupdf(vectors y | row_vectors mu, matrix Sigma)
The log of the multivariate normal density of vector(s) y given location row vector(s) mu and covariance matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_lpdf(row_vectors y | vectors mu, matrix Sigma)
The log of the multivariate normal density of row vector(s) y given location vector(s) mu and covariance matrix Sigma

+Available since 2.12 + +

+

real multi_normal_lupdf(row_vectors y | vectors mu, matrix Sigma)
The log of the multivariate normal density of row vector(s) y given location vector(s) mu and covariance matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_lpdf(row_vectors y | row_vectors mu, matrix Sigma)
The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and covariance matrix Sigma

+Available since 2.12 + +

+

real multi_normal_lupdf(row_vectors y | row_vectors mu, matrix Sigma)
The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and covariance matrix Sigma dropping constant additive terms

+Available since 2.25 +

Although there is a direct multi-normal RNG function, if more than one result is required, it’s much more efficient to Cholesky factor the covariance matrix and call multi_normal_cholesky_rng; see section multi-variate normal, cholesky parameterization.

+ +

+

vector multi_normal_rng(vector mu, matrix Sigma)
Generate a multivariate normal variate with location mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.0 + +

+

vector multi_normal_rng(row_vector mu, matrix Sigma)
Generate a multivariate normal variate with location mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + +

+

vectors multi_normal_rng(vectors mu, matrix Sigma)
Generate an array of multivariate normal variates with locations mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + +

+

vectors multi_normal_rng(row_vectors mu, matrix Sigma)
Generate an array of multivariate normal variates with locations mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.18 +
+
+
+

Multivariate normal distribution, precision parameterization

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\mu \in \mathbb{R}^K\), and \(\Omega \in +\mathbb{R}^{K \times K}\) is symmetric and positive definite, then for \(y \in \mathbb{R}^K\), \[\begin{equation*} \text{MultiNormalPrecision}(y|\mu,\Omega) += \text{MultiNormal}(y|\mu,\Omega^{-1}) \end{equation*}\]

+
+
+

Distribution statement

+

y ~ multi_normal_prec(mu, Omega)

+

Increment target log probability density with multi_normal_prec_lupdf(y | mu, Omega).

+Available since 2.3 + +

+
+
+

Stan functions

+ +

+

real multi_normal_prec_lpdf(vectors y | vectors mu, matrix Omega)
The log of the multivariate normal density of vector(s) y given location vector(s) mu and positive definite precision matrix Omega

+Available since 2.18 + +

+

real multi_normal_prec_lupdf(vectors y | vectors mu, matrix Omega)
The log of the multivariate normal density of vector(s) y given location vector(s) mu and positive definite precision matrix Omega dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_prec_lpdf(vectors y | row_vectors mu, matrix Omega)
The log of the multivariate normal density of vector(s) y given location row vector(s) mu and positive definite precision matrix Omega

+Available since 2.18 + +

+

real multi_normal_prec_lupdf(vectors y | row_vectors mu, matrix Omega)
The log of the multivariate normal density of vector(s) y given location row vector(s) mu and positive definite precision matrix Omega dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_prec_lpdf(row_vectors y | vectors mu, matrix Omega)
The log of the multivariate normal density of row vector(s) y given location vector(s) mu and positive definite precision matrix Omega

+Available since 2.18 + +

+

real multi_normal_prec_lupdf(row_vectors y | vectors mu, matrix Omega)
The log of the multivariate normal density of row vector(s) y given location vector(s) mu and positive definite precision matrix Omega dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_prec_lpdf(row_vectors y | row_vectors mu, matrix Omega)
The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and positive definite precision matrix Omega

+Available since 2.18 + +

+

real multi_normal_prec_lupdf(row_vectors y | row_vectors mu, matrix Omega)
The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and positive definite precision matrix Omega dropping constant additive terms

+Available since 2.25 +
+
+
+

Multivariate normal distribution, Cholesky parameterization

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\mu \in \mathbb{R}^K\), and \(L \in +\mathbb{R}^{K \times K}\) is lower triangular and such that \(LL^{\top}\) is positive definite, then for \(y \in \mathbb{R}^K\), \[\begin{equation*} +\text{MultiNormalCholesky}(y|\mu,L) = +\text{MultiNormal}(y|\mu,LL^{\top}). \end{equation*}\] If \(L\) is lower triangular and \(LL^{top}\) is a \(K \times K\) positive definite matrix, then \(L_{k,k}\) must be strictly positive for \(k \in 1{:}K\). If an \(L\) is provided that is not the Cholesky factor of a positive-definite matrix, the probability functions will raise errors.

+
+
+

Distribution statement

+

y ~ multi_normal_cholesky(mu, L)

+

Increment target log probability density with multi_normal_cholesky_lupdf(y | mu, L).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real multi_normal_cholesky_lpdf(vectors y | vectors mu, matrix L)
The log of the multivariate normal density of vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L

+Available since 2.18 + +

+

real multi_normal_cholesky_lupdf(vectors y | vectors mu, matrix L)
The log of the multivariate normal density of vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_cholesky_lpdf(vectors y | row_vectors mu, matrix L)
The log of the multivariate normal density of vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L

+Available since 2.18 + +

+

real multi_normal_cholesky_lupdf(vectors y | row_vectors mu, matrix L)
The log of the multivariate normal density of vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_cholesky_lpdf(row_vectors y | vectors mu, matrix L)
The log of the multivariate normal density of row vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L

+Available since 2.18 + +

+

real multi_normal_cholesky_lupdf(row_vectors y | vectors mu, matrix L)
The log of the multivariate normal density of row vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms

+Available since 2.25 + +

+

real multi_normal_cholesky_lpdf(row_vectors y | row_vectors mu, matrix L)
The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L

+Available since 2.18 + +

+

real multi_normal_cholesky_lupdf(row_vectors y | row_vectors mu, matrix L)
The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms

+Available since 2.25 + +

+

vector multi_normal_cholesky_rng(vector mu, matrix L)
Generate a multivariate normal variate with location mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks

+Available since 2.3 + +

+

vector multi_normal_cholesky_rng(row_vector mu, matrix L)
Generate a multivariate normal variate with location mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + +

+

vectors multi_normal_cholesky_rng(vectors mu, matrix L)
Generate an array of multivariate normal variates with locations mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + +

+

vectors multi_normal_cholesky_rng(row_vectors mu, matrix L)
Generate an array of multivariate normal variates with locations mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks

+Available since 2.18 +
+
+
+

Multivariate Gaussian process distribution

+
+

Probability density function

+

If \(K,N \in \mathbb{N}\), \(\Sigma \in \mathbb{R}^{N \times N}\) is symmetric, positive definite kernel matrix and \(w \in \mathbb{R}^{K}\) is a vector of positive inverse scales, then for \(y \in \mathbb{R}^{K +\times N}\), \[\begin{equation*} \text{MultiGP}(y|\Sigma,w) = \prod_{i=1}^{K} +\text{MultiNormal}(y_i|0,w_i^{-1} \Sigma), \end{equation*}\] where \(y_i\) is the \(i\)th row of \(y\). This is used to efficiently handle Gaussian Processes with multi-variate outputs where only the output dimensions share a kernel function but vary based on their scale. Note that this function does not take into account the mean prediction.

+
+
+

Distribution statement

+

y ~ multi_gp(Sigma, w)

+

Increment target log probability density with multi_gp_lupdf(y | Sigma, w).

+Available since 2.3 + +

+
+
+

Stan functions

+ +

+

real multi_gp_lpdf(matrix y | matrix Sigma, vector w)
The log of the multivariate GP density of matrix y given kernel matrix Sigma and inverses scales w

+Available since 2.12 + +

+

real multi_gp_lupdf(matrix y | matrix Sigma, vector w)
The log of the multivariate GP density of matrix y given kernel matrix Sigma and inverses scales w dropping constant additive terms

+Available since 2.25 +
+
+
+

Multivariate Gaussian process distribution, Cholesky parameterization

+
+

Probability density function

+

If \(K,N \in \mathbb{N}\), \(L \in \mathbb{R}^{N \times N}\) is lower triangular and such that \(LL^{\top}\) is positive definite kernel matrix (implying \(L_{n,n} > 0\) for \(n \in 1{:}N\)), and \(w \in +\mathbb{R}^{K}\) is a vector of positive inverse scales, then for \(y +\in \mathbb{R}^{K \times N}\), \[\begin{equation*} \text{MultiGPCholesky}(y \, | \ L,w) += \prod_{i=1}^{K} \text{MultiNormal}(y_i|0,w_i^{-1} LL^{\top}), \end{equation*}\] where \(y_i\) is the \(i\)th row of \(y\). This is used to efficiently handle Gaussian Processes with multi-variate outputs where only the output dimensions share a kernel function but vary based on their scale. If the model allows parameterization in terms of Cholesky factor of the kernel matrix, this distribution is also more efficient than \(\text{MultiGP}()\). Note that this function does not take into account the mean prediction.

+
+
+

Distribution statement

+

y ~ multi_gp_cholesky(L, w)

+

Increment target log probability density with multi_gp_cholesky_lupdf(y | L, w).

+Available since 2.5 + +

+
+
+

Stan functions

+ +

+

real multi_gp_cholesky_lpdf(matrix y | matrix L, vector w)
The log of the multivariate GP density of matrix y given lower-triangular Cholesky factor of the kernel matrix L and inverses scales w

+Available since 2.12 + +

+

real multi_gp_cholesky_lupdf(matrix y | matrix L, vector w)
The log of the multivariate GP density of matrix y given lower-triangular Cholesky factor of the kernel matrix L and inverses scales w dropping constant additive terms

+Available since 2.25 +
+
+
+

Multivariate Student-t distribution

+
+

Probability density function

+

If \(K \in \mathbb{N}\), \(\nu \in \mathbb{R}^+\), \(\mu \in \mathbb{R}^K\), and \(\Sigma \in \mathbb{R}^{K \times K}\) is symmetric and positive definite, then for \(y \in \mathbb{R}^K\), \[\begin{equation*} \begin{array}{l} +\text{MultiStudentT}(y\,|\,\nu,\,\mu,\,\Sigma) \\ = +\frac{1}{\pi^{K/2}} \ \frac{1}{\nu^{K/2}} \ \frac{\Gamma\!\left((\nu + +K)/2\right)} {\Gamma(\nu/2)} \ \frac{1}{\sqrt{\left| \Sigma +\right|}} \ \left( 1 + \frac{1}{\nu} \, \left(y - \mu\right)^{\top} \, +\Sigma^{-1} \, \left(y - \mu\right) \right)^{-(\nu + K)/2} \! . +\end{array} \end{equation*}\]

+
+
+

Distribution statement

+

y ~ multi_student_t(nu, mu, Sigma)

+

Increment target log probability density with multi_student_t_lupdf(y | nu, mu, Sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real multi_student_t_lpdf(vectors y | real nu, vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma

+Available since 2.18 + +

+

real multi_student_t_lupdf(vectors y | real nu, vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

real multi_student_t_lpdf(vectors y | real nu, row_vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma

+Available since 2.18 + +

+

real multi_student_t_lupdf(vectors y | real nu, row_vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

real multi_student_t_lpdf(row_vectors y | real nu, vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of row vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma

+Available since 2.18 + +

+

real multi_student_t_lupdf(row_vectors y | real nu, vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of row vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

real multi_student_t_lpdf(row_vectors y | real nu, row_vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of row vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma

+Available since 2.18 + +

+

real multi_student_t_lupdf(row_vectors y | real nu, row_vectors mu, matrix Sigma)
The log of the multivariate Student-\(t\) density of row vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma dropping constant additive terms

+Available since 2.25 + +

+

vector multi_student_t_rng(real nu, vector mu, matrix Sigma)
Generate a multivariate Student-\(t\) variate with degrees of freedom nu, location mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.0 + +

+

vector multi_student_t_rng(real nu, row_vector mu, matrix Sigma)
Generate a multivariate Student-\(t\) variate with degrees of freedom nu, location mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + +

+

vectors multi_student_t_rng(real nu, vectors mu, matrix Sigma)
Generate an array of multivariate Student-\(t\) variates with degrees of freedom nu, locations mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks

+Available since 2.18 + +

+

vectors multi_student_t_rng(real nu, row_vectors mu, matrix Sigma)
Generate an array of multivariate Student-\(t\) variates with degrees of freedom nu, locations mu, and scale matrix Sigma; may only be used in transformed data andgenerated quantities blocks

+Available since 2.18 +
+
+
+

Multivariate Student-t distribution, Cholesky parameterization

+
+

Probability density function

+

Let \(K \in \mathbb{N}\), \(\nu \in \mathbb{R}^+\), \(\mu \in \mathbb{R}^K\), and \(L\) a \(K \times K\) lower-triangular matrix with strictly positive, finite diagonal then \[\begin{equation*} +\begin{array}{l} +\text{MultiStudentTCholesky}(y\,\mid \nu,\,\mu,\,L) \\ = +\frac{1}{\pi^{K/2}} \ \frac{1}{\nu^{K/2}} \ \frac{\Gamma\!\left((\nu + +K)/2\right)} {\Gamma(\nu/2)} \ \frac{1}{\left| L +\right|} \ \left( 1 + \frac{1}{\nu} \, \left(y - \mu\right)^{\top} \, +L^{-T}L^{-1} \, \left(y - \mu\right) \right)^{-(\nu + K)/2} \! . +\end{array} +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ multi_student_t_cholesky(nu, mu, L)

+

Increment target log probability density with multi_student_t_cholesky_lupdf(y | nu, mu, L).

+Available since 2.30 + +

+
+
+

Stan functions

+ +

+

real multi_student_t_cholesky_lpdf(vectors y | real nu, vectors mu, matrix L)
The log of the multivariate Student-\(t\) density of vector or array of vectors y given degrees of freedom nu, location vector or array of vectors mu, and Cholesky factor of the scale matrix L. For a definition of the arguments compatible with the vectors type, see the probability vectorization section.

+Available since 2.30 + +

+

real multi_student_t_cholesky_lupdf(vectors y | real nu, vectors mu, matrix L)
The log of the multivariate Student-\(t\) density of vector or vector array y given degrees of freedom nu, location vector or vector array mu, and Cholesky factor of the scale matrix L, dropping constant additive terms. For a definition of arguments compatible with the vectors type, see the probability vectorization section.

+Available since 2.30 + +

+

vector multi_student_t_cholesky_rng(real nu, vector mu, matrix L)
Generate a multivariate Student-\(t\) variate with degrees of freedom nu, location mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.

+Available since 2.30 + +

+

array[] vector multi_student_t_cholesky_rng(real nu, array[] vector mu, matrix L)
Generate a multivariate Student-\(t\) variate with degrees of freedom nu, location array mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.

+Available since 2.30 + +

+

array[] vector multi_student_t_cholesky_rng(real nu, array[] row_vector mu, matrix L)
Generate an array of multivariate Student-\(t\) variate with degrees of freedom nu, location array mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.

+Available since 2.30 +
+
+
+

Gaussian dynamic linear models

+

A Gaussian Dynamic Linear model is defined as follows, For \(t \in 1, +\dots, T\), \[\begin{equation*} \begin{aligned}[t] y_{t} &\sim N(F' \theta_{t}, V) +\\ \theta_{t} &\sim N(G \theta_{t - 1}, W) \\ \theta_{0} &\sim +N(m_{0}, C_{0}) \end{aligned} \end{equation*}\] where \(y\) is \(n \times T\) matrix where rows are variables and columns are observations. These functions calculate the log-density of the observations marginalizing over the latent states (\(p(y | F, G, V, W, m_{0}, C_{0})\)). This log-density is a system that is calculated using the Kalman Filter. If \(V\) is diagonal, then a more efficient algorithm which sequentially processes observations and avoids a matrix inversions can be used (Durbin and Koopman 2001, sec. 6.4).

+
+

Distribution statement

+

y ~ gaussian_dlm_obs(F, G, V, W, m0, C0)

+

Increment target log probability density with gaussian_dlm_obs_lupdf(y | F, G, V, W, m0, C0).

+Available since 2.0 + +

+
+
+

Stan functions

+

The following two functions differ in the type of their V, the first taking a full observation covariance matrix V and the second a vector V representing the diagonal of the observation covariance matrix. The sampling statement defined in the previous section works with either type of observation V.

+ +

+

real gaussian_dlm_obs_lpdf(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0)
The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0.

+Available since 2.12 + +

+

real gaussian_dlm_obs_lupdf(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0)
The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0. This function drops constant additive terms.

+Available since 2.25 + +

+

real gaussian_dlm_obs_lpdf(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0)
The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix with diagonal V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0.

+Available since 2.12 + +

+

real gaussian_dlm_obs_lupdf(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0)
The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix with diagonal V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0. This function drops constant additive terms.

+Available since 2.25 + + + +
+
+
+ + Back to top

References

+
+Durbin, J., and S. J. Koopman. 2001. Time Series Analysis by State Space Methods. New York: Oxford University Press. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/embedded_laplace.html b/docs/2_39/functions-reference/embedded_laplace.html new file mode 100644 index 000000000..e3e23970e --- /dev/null +++ b/docs/2_39/functions-reference/embedded_laplace.html @@ -0,0 +1,1517 @@ + + + + + + + + + +Embedded Laplace Approximation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Embedded Laplace Approximation

+

The embedded Laplace approximation can be used to approximate certain marginal and conditional distributions that arise in latent Gaussian models. Embedded Laplace replaces explicit sampling of (high-dimensional) Gaussian latent variables with a local Gaussian approximation. In doing so, it marginalizes out the latent Gaussian variables. Inference can then be performed on the remaining, often low-dimensional, parameters. The embedded Laplace approximation in Stan is best suited for latent Gaussian models when jointly sampling over all model parameters is expensive and the conditional posterior of the Gaussian latent variables is reasonably close to Gaussian.

+

For observed data \(y\), latent Gaussian variables \(\theta\), and hyperparameters \(\phi\), a latent Gaussian model observes the following hierarchical structure: \[\begin{eqnarray} + \phi &\sim& p(\phi), \\ + \theta &\sim& \text{MultiNormal}(0, K(\phi)), \\ + y &\sim& p(y \mid \theta, \phi). +\end{eqnarray}\] In this formulation, \(p(y \mid \theta, \phi)\) is the data model that specifies how observations are generated conditional on \(\theta\) and \(\phi\). \(K(\phi)\) denotes the prior covariance matrix for the latent Gaussian variables \(\theta\) and is parameterized by \(\phi\). The prior on \(\theta\) is centered at 0, however an offset can always be added when specifying the data model \(p(y \mid \theta, \phi)\).

+

Conditioning on observations \(y\) we obtain the joint posterior \(p(\phi, \theta \mid y) \propto p(y \mid \theta, \phi) p(\theta | +\phi) p(\phi)\), where \(p(y \mid \theta, \phi)\) as function of \(\theta\) and \(\phi\) is the likelihood function. To sample from the joint posterior, we can either use a standard method, such as Markov chain Monte Carlo, or we can follow a two-step procedure:

+
    +
  1. sample from the marginal posterior \(p(\phi \mid y)\),
  2. +
  3. sample from the conditional posterior \(p(\theta \mid y, \phi)\).
  4. +
+

In the above procedure, neither the marginal posterior nor the conditional posterior are typically available in closed form and so they must be approximated. The marginal posterior can be written as \(p(\phi \mid y) \propto p(y \mid \phi) p(\phi)\), where \(p(y \mid \phi) = \int p(y \mid \phi, \theta) p(\theta) \text{d}\theta\) is called the marginal likelihood. The Laplace method approximates \(p(y \mid \phi, \theta) p(\theta)\) with a normal distribution centered at the mode, \[ + \theta^* = \underset{\theta}{\text{argmax}} \ \log p(\theta \mid y, \phi), +\] and \(\theta^*\) is obtained using a numerical optimizer. The resulting Gaussian integral can be evaluated analytically to obtain an approximation to the log marginal likelihood \(\log \hat p(y \mid \phi) \approx \log p(y \mid \phi)\). Specifically: \[ + \hat p(y \mid \phi) = \frac{p(\theta^* \mid \phi) p(y \mid \theta^*, \phi)}{\hat p (\theta^* \mid \phi, y)}. +\]

+

Combining this marginal likelihood with the prior in the model block, we can then sample from the marginal posterior \(p(\phi \mid y)\) using one of Stan’s algorithms. The marginal posterior is lower dimensional and likely to have a simpler geometry leading to more efficient inference. On the other hand each marginal likelihood computation is more costly, and the combined change in efficiency depends on the application.

+

To obtain posterior draws for \(\theta\), we sample from the normal approximation to \(p(\theta \mid y, \phi)\) in generated quantities. The process of iteratively sampling from \(p(\phi \mid y)\) (say, with MCMC) and then \(p(\theta \mid y, \phi)\) produces posterior draws from the joint posterior \(p(\theta, \phi \mid y)\).

+

The Laplace approximation is especially useful if \(p(y \mid \phi, \theta)\) as function of \(\theta\) is log-concave, e.g., in case of Poisson, binomial, negative-binomial, and Bernoulli. (The likelihood of normal model is also log concave, however when the likelihood is normal, marginalization can be performed exactly and does not required an approximation.) Stan’s embedded Laplace approximation is restricted to the case where the prior \(p(\theta \mid \phi)\) is multivariate normal. Furthermore, the likelihood \(p(y \mid \phi, \theta)\) must be computed using only operations which support higher-order derivatives (see section specifying the likelihood function).

+

The Laplace approximation can also be useful in generated quantities to marginalize out latent variables even if the sampling had been done using the full joint posterior.

+
+

Approximating the log marginal likelihood \(\log p(y \mid \phi)\)

+

In the model block, we increment target with laplace_marginal, a function that approximates the log marginal likelihood \(\log p(y \mid \phi)\). The signature of the function is:

+ +

+

real laplace_marginal(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)

+

which returns an approximation to the log marginal likelihood \(p(y \mid \phi)\).

+

The embedded Laplace functions accept two functors whose user defined arguments are passed in as tuples to laplace_marginal.

+
    +
  1. likelihood_function - user-specified log likelihood whose first argument is the vector of latent Gaussian variables \(\theta\). The subsequent arguments are user defined.
  2. +
+
    +
  • real likelihood_function(vector theta, likelihood_arguments_1, likelihood_arguments_2, ...).
  • +
+
    +
  1. likelihood_arguments - A tuple of arguments whose internal members are be passed to the log likelihood function. This tuple does NOT include the latent variable \(\theta\).
  2. +
  3. hessian_block_size - the block size of the Hessian of the log likelihood, \(\partial^2 \log p(y \mid \theta, \phi) / \partial \theta^2\).
  4. +
  5. covariance_function - A function that returns the covariance matrix of the multivariate normal prior on \(\theta\).
  6. +
+
    +
  • matrix covariance_function(covariance_argument_1, covariance_argument_2, ...).
  • +
+
    +
  1. covariance_arguments A tuple of the arguments whose internal members will be passed to the the covariance function.
  2. +
+Available since 2.39 +

Below we go over each argument in more detail.

+
+
+

Specifying the log likelihood function

+

The first step to use the embedded Laplace approximation is to write down a function in the functions block which returns the log likelihood \(\log p(y \mid \theta, \phi)\).

+

There are a few constraints on this function:

+
    +
  1. The function return type must be real.

  2. +
  3. The first argument must be the latent Gaussian variable \(\theta\) and must have type vector.

  4. +
  5. The operations in the function must support higher-order automatic differentiation (AD). Most functions in Stan support higher-order AD. The exceptions are functions with specialized calls for reverse-mode AD, and these are higher-order functions (algebraic solvers, differential equation solvers, and integrators), the marginalization function for hidden Markov models (HMM) function, and the embedded Laplace approximation itself.

  6. +
+

The base signature of the function is

+
real likelihood_function(vector theta, ...)
+

The ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter.

+

The tuple after likelihood_function contains the arguments that get passed to likelihood_function excluding \(\theta\). For instance, if a user defined likelihood uses a real and a matrix, the likelihood function’s signature would first have a vector and then a real and matrix argument.

+
real likelihood_fun(vector theta, real a, matrix X)
+

The call to the laplace marginal would start with this likelihood and tuple holding the other likelihood arguments. We do not need to pass theta, since it is marginalized out and therefore does not appear explicitly as a passed parameter.

+
real val = laplace_marginal(likelihood_fun, (a, X), hessian_block_size, ...);
+

If the likelihood_function has only one argument, the tuple syntax is (a, ).

+

As always, users should use parameter arguments only when necessary in order to speed up differentiation. In general, we recommend marking data only arguments with the keyword data, for example,

+
real likelihood_function(vector theta, data vector x, ...)
+

In addition to the likelihood function, users must specify the block size of the Hessian, \(\partial^2 \log p(y \mid \theta, \phi) / \partial \theta^2\). The Hessian is often block diagonal and this structure can be taken advantage of for fast computation. For example, if \(y_i\) only depends on \(\theta_i\), then the Hessian is diagonal and hessian_block_size=1,

+
real val = laplace_marginal(likelihood_fun, (a, X), 1, ...);
+

On the other hand, if the Hessian is not block diagonal, we can always set hessian_block_size=n where \(n\) is the size of \(\theta\).

+
+
+

Specifying the covariance function

+

The argument covariance_function returns the prior covariance matrix \(K\). The signature for this function is the same as a standard stan function. It’s return type must be a matrix of size \(n \times n\) where \(n\) is the size of \(\theta\).

+
matrix covariance_function(...)
+

The ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter. The variables \(\phi\) is implicitly defined as the collection of all non-data arguments passed to likelihood_function (excluding \(\theta\)) and covariance_function.

+

The tuple after covariance_function contains the arguments that get passed to covariance_function. For instance, if a user defined covariance function uses two vectors

+
matrix cov_fun(real b, matrix Z)
+

the call to the Laplace marginal would include the covariance function and a tuple holding the covariance function arguments.

+
real val = laplace_marginal(likelihood_fun, (a, X), cov_fun, (b, Z), ...);
+

If the covariance_function has only one argument, the tuple syntax is (b, ).

+
+
+

Control parameters

+

It also possible to specify control parameters, which can help improve the optimization that underlies the Laplace approximation, using laplace_marginal_tol with the following signature:

+ +

+

real laplace_marginal_tol(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances)

+

The final argument, tolerances, is a tuple with the following elements

+
tuple(vector theta_init, real tol, int max_steps, int solver,
+      int max_steps_linesearch, int allow_fallback)
+

Returns an approximation to the log marginal likelihood \(p(y \mid \phi)\) and allows the user to tune the control parameters of the approximation.

+
    +
  • theta_init: the initial guess for a Newton solver when finding the mode of \(p(\theta \mid y, \phi)\). By default, it is a zero-vector.

  • +
  • tol: the tolerance \(\epsilon\) of the optimizer. Specifically, the optimizer stops when \(||\nabla \log p(\theta \mid y, \phi)|| \le \epsilon\). By default, the value is \(\epsilon \approx 1.49 \times 10^{-8}\), which is the square-root of machine precision.

  • +
  • max_num_steps: the maximum number of steps taken by the optimizer before it gives up (in which case the Metropolis proposal gets rejected). The default is 500 steps.

  • +
  • solver: choice of Newton solver. The optimizer underlying the Laplace approximation does one of three matrix decompositions to compute a Newton step. The problem determines which decomposition is numerically stable. By default (solver=1), the solver attempts a Cholesky decomposition of the negative Hessian of the log likelihood, \(- \partial^2 \log p(y \mid \theta, \phi) / \partial^2 \theta\). This operation is legal if the negative Hessian is positive-definite, which will always be true when the likelihood as function of \(\theta\) is log concave. If solver=2, the solver makes a Cholesky decomposition of the covariance matrix \(K(\phi)\). Since a covariance matrix is always positive-definite, computing its Cholesky decomposition is always a legal operation, at least in theory. In practice, we may not be able to compute the Cholesky decomposition of the negative Hessian nor of the covariance matrix, either because it does not exist or because of numerical issues. In that case, we can use solver=3 which uses a more expensive but less specialized approach to compute a Newton step.

  • +
  • max_steps_linesearch: maximum number of steps in linesearch. The linesearch adjusts to step size to ensure that a Newton step leads to an increase in the objective function (i.e., \(f(\theta) = p(\theta \mid \phi, y)\)). If a standard Newton step does not improve the objective function, the step is adjusted iteratively until the objective function increases or the maximum number of steps in the linesearch is reached. By default, max_steps_linesearch=1000. Setting max_steps_linesearch=0 results in no linesearch.

  • +
  • allow_fallback: If user set solver fails, this flag determines whether to fallback to the next solver. For example, if the user specifies solver=1 but the Cholesky decomposition of the negative Hessian \(- \partial^2 \log p(y \mid \theta, \phi) / \partial^2 \theta\) fails, the optimizer will try solver=2 instead. By default, allow_fallback = 1 (TRUE).

  • +
+Available since 2.39 +

The embedded Laplace approximation’s options have a helper callable generate_laplace_options(int theta_size) that will generate the tuple for the user. This can be useful for quickly setting up the control parameters in the transformed data block to reuse within the model.

+
tuple(vector[theta_size], real, int, int, int, int, int) laplace_ops = generate_laplace_options(theta_size);
+// Modify solver type
+laplace_ops.5 = 2;
+// Turn off fallthrough
+laplace_ops.7 = 0;
+ +

+

tuple(vector, real, int, int, int, int) generate_laplace_options(int dimension)

+

Create a default laplace options tuple for a theta_init of size dimension.

+Available since 2.39 + +

+

tuple(vector, real, int, int, int, int) generate_laplace_options(vector theta_init)

+

Create a default Laplace options tuple containing theta_init.

+Available since 2.39 +
+
+

Sample from the approximate conditional \(\hat{p}(\theta \mid y, \phi)\)

+

In generated quantities, it is possible to sample from the Laplace approximation of \(p(\theta \mid \phi, y)\) using laplace_latent_rng. The signature for laplace_latent_rng follows closely the signature for laplace_marginal:

+ +

+

vector laplace_latent_rng(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)

+

Samples from the Laplace approximation to the conditional posterior \(p(\theta \mid y, \phi)\).

+Available since 2.39 +

Once again, it is possible to specify control parameters:

+ +

+

vector laplace_latent_tol_rng(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances)
Samples from the approximate conditional posterior \(p(\theta \mid y, \phi)\) and allows the user to tune the control parameters of the approximation.

+Available since 2.39 +
+
+

Built-in Laplace marginal likelihood functions

+

Stan provides convenient wrappers for the embedded Laplace approximation when applied to latent Gaussian models with certain likelihoods arising from some common data models. With this wrapper, the likelihood is pre-specified and does not need to be specified by the user. The selection of supported likelihoods is currently narrow and expected to grow. The wrappers exist for the user’s convenience but are not more computationally efficient than specifying log likelihoods in the functions block.

+ + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/functions_index.html b/docs/2_39/functions-reference/functions_index.html new file mode 100644 index 000000000..cf99a8514 --- /dev/null +++ b/docs/2_39/functions-reference/functions_index.html @@ -0,0 +1,7656 @@ + + + + + + + + + +Alphabetical Index + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + + +
+

Alphabetical Index

+
+

A

+

abs:

+ +

acos:

+ +

acosh:

+ +

add_diag:

+ +

algebra_solver:

+ +

algebra_solver_newton:

+ +

append_array:

+ +

append_col:

+ +

append_row:

+ +

arg:

+ +

asin:

+ +

asinh:

+ +

atan:

+ +

atan2:

+ +

atanh:

+ +
+
+

B

+

bernoulli:

+ +

bernoulli_cdf:

+ +

bernoulli_lccdf:

+ +

bernoulli_lcdf:

+ +

bernoulli_logit:

+ +

bernoulli_logit_glm:

+ +

bernoulli_logit_glm_lpmf:

+ +

bernoulli_logit_glm_lupmf:

+ +

bernoulli_logit_glm_rng:

+ +

bernoulli_logit_lpmf:

+ +

bernoulli_logit_lupmf:

+ +

bernoulli_logit_rng:

+ +

bernoulli_lpmf:

+ +

bernoulli_lupmf:

+ +

bernoulli_rng:

+ +

bessel_first_kind:

+ +

bessel_second_kind:

+ +

beta:

+ +

beta_binomial:

+ +

beta_binomial_cdf:

+ +

beta_binomial_lccdf:

+ +

beta_binomial_lcdf:

+ +

beta_binomial_lpmf:

+ +

beta_binomial_lupmf:

+ +

beta_binomial_rng:

+ +

beta_cdf:

+ +

beta_lccdf:

+ +

beta_lcdf:

+ +

beta_lpdf:

+ +

beta_lupdf:

+ +

beta_neg_binomial:

+ +

beta_neg_binomial_cdf:

+ +

beta_neg_binomial_lccdf:

+ +

beta_neg_binomial_lcdf:

+ +

beta_neg_binomial_lpmf:

+ +

beta_neg_binomial_lupmf:

+ +

beta_neg_binomial_rng:

+ +

beta_proportion:

+ +

beta_proportion_lccdf:

+ +

beta_proportion_lcdf:

+ +

beta_proportion_lpdf:

+ +

beta_proportion_lupdf:

+ +

beta_proportion_rng:

+ +

beta_rng:

+ +

binary_log_loss:

+ +

binomial:

+ +

binomial_cdf:

+ +

binomial_lccdf:

+ +

binomial_lcdf:

+ +

binomial_logit:

+ +

binomial_logit_glm:

+ +

binomial_logit_glm_lpmf:

+ +

binomial_logit_glm_lupmf:

+ +

binomial_logit_lpmf:

+ +

binomial_logit_lupmf:

+ +

binomial_lpmf:

+ +

binomial_lupmf:

+ +

binomial_rng:

+ +

block:

+ +
+
+

C

+

categorical:

+ +

categorical_logit:

+ +

categorical_logit_glm:

+ +

categorical_logit_glm_lpmf:

+ +

categorical_logit_glm_lupmf:

+ +

categorical_logit_lpmf:

+ +

categorical_logit_lupmf:

+ +

categorical_logit_rng:

+ +

categorical_lpmf:

+ +

categorical_lupmf:

+ +

categorical_rng:

+ +

cauchy:

+ +

cauchy_cdf:

+ +

cauchy_lccdf:

+ +

cauchy_lcdf:

+ +

cauchy_lpdf:

+ +

cauchy_lupdf:

+ +

cauchy_rng:

+ +

cbrt:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

ceil:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

chi_square:

+ +

chi_square_cdf:

+ +

chi_square_lccdf:

+ +

chi_square_lcdf:

+ +

chi_square_lpdf:

+ +

chi_square_lupdf:

+ +

chi_square_rng:

+ +

chol2inv:

+ +

cholesky_decompose:

+ +

cholesky_factor_corr_constrain:

+ +

cholesky_factor_corr_jacobian:

+ +

cholesky_factor_corr_unconstrain:

+ +

cholesky_factor_cov_constrain:

+ +

cholesky_factor_cov_jacobian:

+ +

cholesky_factor_cov_unconstrain:

+ +

choose:

+ +

col:

+ +

cols:

+ +

columns_dot_product:

+ +

columns_dot_self:

+ +

complex_schur_decompose:

+ +

complex_schur_decompose_t:

+ +

complex_schur_decompose_u:

+ +

conj:

+ +

corr_matrix_constrain:

+ +

corr_matrix_jacobian:

+ +

corr_matrix_unconstrain:

+ +

cos:

+ +

cosh:

+ +

cov_exp_quad:

+ +

cov_matrix_constrain:

+ +

cov_matrix_jacobian:

+ +

cov_matrix_unconstrain:

+ +

crossprod:

+ +

csr_extract:

+ +

csr_extract_u:

+ +

csr_extract_v:

+ +

csr_extract_w:

+ +

csr_matrix_times_vector:

+ +

csr_to_dense_matrix:

+ +

cumulative_sum:

+ +
+
+

D

+

dae:

+ +

dae_tol:

+ +

determinant:

+ +

diag_matrix:

+ +

diag_post_multiply:

+ +

diag_pre_multiply:

+ +

diagonal:

+ +

digamma:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

dims:

+ +

dirichlet:

+ +

dirichlet_lpdf:

+ +

dirichlet_lupdf:

+ +

dirichlet_multinomial:

+ +

dirichlet_multinomial_lpmf:

+ +

dirichlet_multinomial_lupmf:

+ +

dirichlet_multinomial_rng:

+ +

dirichlet_rng:

+ +

discrete_range:

+ +

discrete_range_cdf:

+ +

discrete_range_lccdf:

+ +

discrete_range_lcdf:

+ +

discrete_range_lpmf:

+ +

discrete_range_lupmf:

+ +

discrete_range_rng:

+ +

distance:

+ +

dot_product:

+ +

dot_self:

+ +

double_exponential:

+ +

double_exponential_cdf:

+ +

double_exponential_lccdf:

+ +

double_exponential_lcdf:

+ +

double_exponential_lpdf:

+ +

double_exponential_lupdf:

+ +

double_exponential_rng:

+ +
+
+

E

+

e:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

eigendecompose:

+ +

eigendecompose_sym:

+ +

eigenvalues:

+ +

eigenvalues_sym:

+ +

eigenvectors:

+ +

eigenvectors_sym:

+ +

erf:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

erfc:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

exp:

+ +

exp2:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

exp_mod_normal:

+ +

exp_mod_normal_cdf:

+ +

exp_mod_normal_lccdf:

+ +

exp_mod_normal_lcdf:

+ +

exp_mod_normal_lpdf:

+ +

exp_mod_normal_lupdf:

+ +

exp_mod_normal_rng:

+ +

expm1:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

exponential:

+ +

exponential_cdf:

+ +

exponential_lccdf:

+ +

exponential_lcdf:

+ +

exponential_lpdf:

+ +

exponential_lupdf:

+ +

exponential_rng:

+ +
+
+

F

+

falling_factorial:

+ +

fatal_error:

+ +

fdim:

+ +

fft:

+ +

fft2:

+ +

floor:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

fma:

+ +

fmax:

+ +

fmin:

+ +

fmod:

+ +

frechet:

+ +

frechet_cdf:

+ +

frechet_lccdf:

+ +

frechet_lcdf:

+ +

frechet_lpdf:

+ +

frechet_lupdf:

+ +

frechet_rng:

+ +
+
+

G

+

gamma:

+ +

gamma_cdf:

+ +

gamma_lccdf:

+ +

gamma_lcdf:

+ +

gamma_lpdf:

+ +

gamma_lupdf:

+ +

gamma_p:

+ +

gamma_q:

+ +

gamma_rng:

+ +

gaussian_dlm_obs:

+ +

gaussian_dlm_obs_lpdf:

+ +

gaussian_dlm_obs_lupdf:

+ +

generalized_inverse:

+ +

generate_laplace_options:

+ +

get_imag:

+ +

get_real:

+ +

gp_dot_prod_cov:

+ +

gp_exp_quad_cov:

+ +

gp_exponential_cov:

+ +

gp_matern23_cov:

+ +

gp_matern52_cov:

+ +

gp_periodic_cov:

+ +

gumbel:

+ +

gumbel_cdf:

+ +

gumbel_lccdf:

+ +

gumbel_lcdf:

+ +

gumbel_lpdf:

+ +

gumbel_lupdf:

+ +

gumbel_rng:

+ +
+
+

H

+

head:

+ +

hmm_hidden_state_prob:

+ +

hmm_latent_rng:

+ +

hmm_marginal:

+ +

hypergeometric:

+ +

hypergeometric_1F0:

+ +

hypergeometric_2F1:

+ +

hypergeometric_3F2:

+ +

hypergeometric_lpmf:

+ +

hypergeometric_lupmf:

+ +

hypergeometric_pFq:

+ +

hypergeometric_rng:

+ +

hypot:

+ +
+
+

I

+

identity_matrix:

+ +

inc_beta:

+ +

int_step:

+ +

integrate_1d:

+ +

integrate_ode:

+ +

integrate_ode_adams:

+ +

integrate_ode_bdf:

+ +

integrate_ode_rk45:

+ +

inv:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_chi_square:

+ +

inv_chi_square_cdf:

+ +

inv_chi_square_lccdf:

+ +

inv_chi_square_lcdf:

+ +

inv_chi_square_lpdf:

+ +

inv_chi_square_lupdf:

+ +

inv_chi_square_rng:

+ +

inv_cloglog:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_erfc:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_fft:

+ +

inv_fft2:

+ +

inv_gamma:

+ +

inv_gamma_cdf:

+ +

inv_gamma_lccdf:

+ +

inv_gamma_lcdf:

+ +

inv_gamma_lpdf:

+ +

inv_gamma_lupdf:

+ +

inv_gamma_rng:

+ +

inv_inc_beta:

+ +

inv_logit:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_Phi:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_sqrt:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_square:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

inv_wishart:

+ +

inv_wishart_cholesky_lpdf:

+ +

inv_wishart_cholesky_lupdf:

+ +

inv_wishart_cholesky_rng:

+ +

inv_wishart_lpdf:

+ +

inv_wishart_lupdf:

+ +

inv_wishart_rng:

+ +

inverse:

+ +

inverse_spd:

+ +

is_inf:

+ +

is_nan:

+ +
+
+

L

+

lambert_w0:

+ +

lambert_wm1:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

laplace_latent_bernoulli_logit_rng:

+ +

laplace_latent_neg_binomial_2_log_rng:

+ +

laplace_latent_poisson_log_rng:

+ +

laplace_latent_rng:

+ +

laplace_latent_rng_tol:

+ +

laplace_latent_tol_bernoulli_logit_rng:

+ +

laplace_latent_tol_neg_binomial_2_log_rng:

+ +

laplace_latent_tol_poisson_log_rng:

+ +

laplace_marginal:

+ +

laplace_marginal_bernoulli_logit:

+ +

laplace_marginal_bernoulli_logit_lpmf:

+ +

laplace_marginal_bernoulli_logit_lupmf:

+ +

laplace_marginal_neg_binomial_2_log:

+ +

laplace_marginal_neg_binomial_2_log_lpmf:

+ +

laplace_marginal_neg_binomial_2_log_lupmf:

+ +

laplace_marginal_poisson_log:

+ +

laplace_marginal_poisson_log_lpmf:

+ +

laplace_marginal_poisson_log_lupmf:

+ +

laplace_marginal_tol:

+ +

laplace_marginal_tol_bernoulli_logit:

+ +

laplace_marginal_tol_bernoulli_logit_lpmf:

+ +

laplace_marginal_tol_bernoulli_logit_lupmf:

+ +

laplace_marginal_tol_neg_binomial_2_log:

+ +

laplace_marginal_tol_neg_binomial_2_log_lpmf:

+ +

laplace_marginal_tol_neg_binomial_2_log_lupmf:

+ +

laplace_marginal_tol_poisson_log:

+ +

laplace_marginal_tol_poisson_log_lpmf:

+ +

laplace_marginal_tol_poisson_log_lupmf:

+ +

lbeta:

+ +

lchoose:

+ +

ldexp:

+ +

lgamma:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

linspaced_array:

+ +

linspaced_int_array:

+ +

linspaced_row_vector:

+ +

linspaced_vector:

+ +

lkj_corr:

+ +

lkj_corr_cholesky:

+ +

lkj_corr_cholesky_lpdf:

+ +

lkj_corr_cholesky_lupdf:

+ +

lkj_corr_cholesky_rng:

+ +

lkj_corr_lpdf:

+ +

lkj_corr_lupdf:

+ +

lkj_corr_rng:

+ +

lmgamma:

+ +

lmultiply:

+ +

log:

+ +

log10:

+ +

log1m:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log1m_exp:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log1m_inv_logit:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log1p:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log1p_exp:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log2:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log_determinant:

+ +

log_diff_exp:

+ +

log_falling_factorial:

+ +

log_inv_logit:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

log_inv_logit_diff:

+ +

log_mix:

+ +

log_modified_bessel_first_kind:

+ +

log_rising_factorial:

+ +

log_softmax:

+ +

log_sum_exp:

+ +

logistic:

+ +

logistic_cdf:

+ +

logistic_lccdf:

+ +

logistic_lcdf:

+ +

logistic_lpdf:

+ +

logistic_lupdf:

+ +

logistic_rng:

+ +

logit:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

loglogistic:

+ +

loglogistic_cdf:

+ +

loglogistic_lpdf:

+ +

loglogistic_rng:

+ +

lognormal:

+ +

lognormal_cdf:

+ +

lognormal_lccdf:

+ +

lognormal_lcdf:

+ +

lognormal_lpdf:

+ +

lognormal_lupdf:

+ +

lognormal_rng:

+ +

lower_bound_constrain:

+ +

lower_bound_jacobian:

+ +

lower_bound_unconstrain:

+ +

lower_upper_bound_constrain:

+ +

lower_upper_bound_jacobian:

+ +

lower_upper_bound_unconstrain:

+ +
+
+

M

+

machine_precision:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

map_rect:

+ +

matrix_exp:

+ +

matrix_exp_multiply:

+ +

matrix_power:

+ +

max:

+ +

mdivide_left_spd:

+ +

mdivide_left_tri_low:

+ +

mdivide_right_spd:

+ +

mdivide_right_tri_low:

+ +

mean:

+ +

min:

+ +

modified_bessel_first_kind:

+ +

modified_bessel_second_kind:

+ +

multi_gp:

+ +

multi_gp_cholesky:

+ +

multi_gp_cholesky_lpdf:

+ +

multi_gp_cholesky_lupdf:

+ +

multi_gp_lpdf:

+ +

multi_gp_lupdf:

+ +

multi_normal:

+ +

multi_normal_cholesky:

+ +

multi_normal_cholesky_lpdf:

+ +

multi_normal_cholesky_lupdf:

+ +

multi_normal_cholesky_rng:

+ +

multi_normal_lpdf:

+ +

multi_normal_lupdf:

+ +

multi_normal_prec:

+ +

multi_normal_prec_lpdf:

+ +

multi_normal_prec_lupdf:

+ +

multi_normal_rng:

+ +

multi_student_cholesky_t_rng:

+ +

multi_student_t:

+ +

multi_student_t_cholesky:

+ +

multi_student_t_cholesky_lpdf:

+ +

multi_student_t_cholesky_lupdf:

+ +

multi_student_t_cholesky_rng:

+ +

multi_student_t_lpdf:

+ +

multi_student_t_lupdf:

+ +

multi_student_t_rng:

+ +

multinomial:

+ +

multinomial_logit:

+ +

multinomial_logit_lpmf:

+ +

multinomial_logit_lupmf:

+ +

multinomial_logit_rng:

+ +

multinomial_lpmf:

+ +

multinomial_lupmf:

+ +

multinomial_rng:

+ +

multiply_lower_tri_self_transpose:

+ +
+
+

N

+

neg_binomial:

+ +

neg_binomial_2:

+ +

neg_binomial_2_cdf:

+ +

neg_binomial_2_lccdf:

+ +

neg_binomial_2_lcdf:

+ +

neg_binomial_2_log:

+ +

neg_binomial_2_log_glm:

+ +

neg_binomial_2_log_glm_lpmf:

+ +

neg_binomial_2_log_glm_lupmf:

+ +

neg_binomial_2_log_lpmf:

+ +

neg_binomial_2_log_lupmf:

+ +

neg_binomial_2_log_rng:

+ +

neg_binomial_2_lpmf:

+ +

neg_binomial_2_lupmf:

+ +

neg_binomial_2_rng:

+ +

neg_binomial_cdf:

+ +

neg_binomial_lccdf:

+ +

neg_binomial_lcdf:

+ +

neg_binomial_lpmf:

+ +

neg_binomial_lupmf:

+ +

neg_binomial_rng:

+ +

negative_infinity:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

norm:

+ +

norm1:

+ +

norm2:

+ +

normal:

+ +

normal_cdf:

+ +

normal_id_glm:

+ +

normal_id_glm_lpdf:

+ +

normal_id_glm_lupdf:

+ +

normal_lccdf:

+ +

normal_lcdf:

+ +

normal_lpdf:

+ +

normal_lupdf:

+ +

normal_rng:

+ +

not_a_number:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

num_elements:

+ +
+
+

O

+

ode_adams:

+ +

ode_adams_tol:

+ +

ode_adjoint_tol_ctl:

+ +

ode_bdf:

+ +

ode_bdf_tol:

+ +

ode_ckrk:

+ +

ode_ckrk_tol:

+ +

ode_rk45:

+ +

ode_rk45_tol:

+ +

offset_multiplier_constrain:

+ +

offset_multiplier_jacobian:

+ +

offset_multiplier_unconstrain:

+ +

one_hot_array:

+ +

one_hot_int_array:

+ +

one_hot_row_vector:

+ +

one_hot_vector:

+ +

ones_array:

+ +

ones_int_array:

+ +

ones_row_vector:

+ +

ones_vector:

+ +

operator!:

+ +

operator!=:

+ +

operator%:

+ +

operator%/%:

+ +

operator&&:

+ +

<a id=‘operator’’ href=‘#operator’’ class=‘anchored unlink’>operator’:

+ +

operator*:

+ +

operator*=:

+ +

operator+:

+ +

operator+=:

+ +

operator-:

+ +

operator-=:

+ +

operator.*:

+ +

operator.*=:

+ +

operator./:

+ +

operator./=:

+ +

operator.^:

+ +

operator/:

+ +

operator/=:

+ +

operator<:

+ +

operator<=:

+ +

operator=:

+ +

operator==:

+ +

operator>:

+ +

operator>=:

+ +

operator\:

+ +

operator^:

+ +

operator||:

+ +

ordered_constrain:

+ +

ordered_jacobian:

+ +

ordered_logistic:

+ +

ordered_logistic_glm_lpmf:

+ +

ordered_logistic_glm_lupmf:

+ +

ordered_logistic_lpmf:

+ +

ordered_logistic_lupmf:

+ +

ordered_logistic_rng:

+ +

ordered_probit:

+ +

ordered_probit_lpmf:

+ +

ordered_probit_lupmf:

+ +

ordered_probit_rng:

+ +

ordered_unconstrain:

+ +

owens_t:

+ +
+
+

P

+

pareto:

+ +

pareto_cdf:

+ +

pareto_lccdf:

+ +

pareto_lcdf:

+ +

pareto_lpdf:

+ +

pareto_lupdf:

+ +

pareto_rng:

+ +

pareto_type_2:

+ +

pareto_type_2_cdf:

+ +

pareto_type_2_lccdf:

+ +

pareto_type_2_lcdf:

+ +

pareto_type_2_lpdf:

+ +

pareto_type_2_lupdf:

+ +

pareto_type_2_rng:

+ +

Phi:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

Phi_approx:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

pi:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

poisson:

+ +

poisson_cdf:

+ +

poisson_lccdf:

+ +

poisson_lcdf:

+ +

poisson_log:

+ +

poisson_log_glm:

+ +

poisson_log_glm_lpmf:

+ +

poisson_log_glm_lupmf:

+ +

poisson_log_lpmf:

+ +

poisson_log_lupmf:

+ +

poisson_log_rng:

+ +

poisson_lpmf:

+ +

poisson_lupmf:

+ +

poisson_rng:

+ +

polar:

+ +

positive_infinity:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

positive_ordered_constrain:

+ +

positive_ordered_jacobian:

+ +

positive_ordered_unconstrain:

+ +

pow:

+ +

print:

+ +

prod:

+ +

proj:

+ +
+
+

Q

+

qr:

+ +

qr_Q:

+ +

qr_R:

+ +

qr_thin:

+ +

qr_thin_Q:

+ +

qr_thin_R:

+ +

quad_form:

+ +

quad_form_diag:

+ +

quad_form_sym:

+ +

quantile:

+ +
+
+

R

+

rank:

+ +

rayleigh:

+ +

rayleigh_cdf:

+ +

rayleigh_lccdf:

+ +

rayleigh_lcdf:

+ +

rayleigh_lpdf:

+ +

rayleigh_lupdf:

+ +

rayleigh_rng:

+ +

reduce_sum:

+ +

reduce_sum_static:

+ +

reject:

+ +

rep_array:

+ +

rep_matrix:

+ +

rep_row_vector:

+ +

rep_vector:

+ +

reverse:

+ +

rising_factorial:

+ +

round:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

row:

+ +

rows:

+ +

rows_dot_product:

+ +

rows_dot_self:

+ +
+
+

S

+

scale_matrix_exp_multiply:

+ +

scaled_inv_chi_square:

+ +

scaled_inv_chi_square_cdf:

+ +

scaled_inv_chi_square_lccdf:

+ +

scaled_inv_chi_square_lcdf:

+ +

scaled_inv_chi_square_lpdf:

+ +

scaled_inv_chi_square_lupdf:

+ +

scaled_inv_chi_square_rng:

+ +

sd:

+ +

segment:

+ +

simplex_constrain:

+ +

simplex_jacobian:

+ +

simplex_unconstrain:

+ +

sin:

+ +

singular_values:

+ +

sinh:

+ +

size:

+ +

skew_double_exponential:

+ +

skew_double_exponential_cdf:

+ +

skew_double_exponential_lccdf:

+ +

skew_double_exponential_lcdf:

+ +

skew_double_exponential_lpdf:

+ +

skew_double_exponential_lupdf:

+ +

skew_double_exponential_rng:

+ +

skew_normal:

+ +

skew_normal_cdf:

+ +

skew_normal_lccdf:

+ +

skew_normal_lcdf:

+ +

skew_normal_lpdf:

+ +

skew_normal_lupdf:

+ +

skew_normal_rng:

+ +

softmax:

+ +

solve_newton:

+ +

solve_newton_tol:

+ +

solve_powell:

+ +

solve_powell_tol:

+ +

sort_asc:

+ +

sort_desc:

+ +

sort_indices_asc:

+ +

sort_indices_desc:

+ +

sqrt:

+ +

sqrt2:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

square:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

squared_distance:

+ +

std_normal:

+ +

std_normal_cdf:

+ +

std_normal_lccdf:

+ +

std_normal_lcdf:

+ +

std_normal_log_qf:

+
    +
  • +(T x) : R (unbounded_continuous_distributions.html) +
  • +
+

std_normal_lpdf:

+ +

std_normal_lupdf:

+ +

std_normal_qf:

+
    +
  • +(T x) : R (unbounded_continuous_distributions.html) +
  • +
+

std_normal_rng:

+
    +
  • +() : real (unbounded_continuous_distributions.html) +
  • +
+

step:

+ +

stochastic_column_constrain:

+ +

stochastic_column_jacobian:

+ +

stochastic_column_unconstrain:

+ +

stochastic_row_constrain:

+ +

stochastic_row_jacobian:

+ +

stochastic_row_unconstrain:

+ +

student_t:

+ +

student_t_cdf:

+ +

student_t_lccdf:

+ +

student_t_lcdf:

+ +

student_t_lpdf:

+ +

student_t_lupdf:

+ +

student_t_rng:

+ +

sub_col:

+ +

sub_row:

+ +

sum:

+ +

sum_to_zero_constrain:

+ +

sum_to_zero_jacobian:

+ +

sum_to_zero_unconstrain:

+ +

svd:

+ +

svd_U:

+ +

svd_V:

+ +

symmetrize_from_lower_tri:

+ +
+
+

T

+

tail:

+ +

tan:

+ +

tanh:

+ +

target:

+
    +
  • +() : real (real-valued_basic_functions.html) +
  • +
+

tcrossprod:

+ +

tgamma:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

to_array_1d:

+ +

to_array_2d:

+ +

to_complex:

+ +

to_int:

+ +

to_matrix:

+ +

to_row_vector:

+ +

to_vector:

+ +

trace:

+ +

trace_dot:

+ +

trace_gen_quad_form:

+ +

trace_quad_form:

+ +

trigamma:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+

trunc:

+
    +
  • +(T x) : R (real-valued_basic_functions.html) +
  • +
+
+
+

U

+

uniform:

+ +

uniform_cdf:

+ +

uniform_lccdf:

+ +

uniform_lcdf:

+ +

uniform_lpdf:

+ +

uniform_lupdf:

+ +

uniform_rng:

+ +

uniform_simplex:

+ +

unit_vectors_constrain:

+ +

unit_vectors_jacobian:

+ +

unit_vectors_unconstrain:

+ +

upper_bound_constrain:

+ +

upper_bound_jacobian:

+ +

upper_bound_unconstrain:

+ +
+
+

V

+

variance:

+ +

von_mises:

+ +

von_mises_cdf:

+ +

von_mises_lccdf:

+ +

von_mises_lcdf:

+ +

von_mises_lpdf:

+ +

von_mises_lupdf:

+ +

von_mises_rng:

+ +
+
+

W

+

weibull:

+ +

weibull_cdf:

+ +

weibull_lccdf:

+ +

weibull_lcdf:

+ +

weibull_lpdf:

+ +

weibull_lupdf:

+ +

weibull_rng:

+ +

wiener:

+ +

wiener_lccdf_unnorm:

+ +

wiener_lcdf_unnorm:

+ +

wiener_lpdf:

+ +

wiener_lupdf:

+ +

wishart:

+ +

wishart_cholesky_lpdf:

+ +

wishart_cholesky_lupdf:

+ +

wishart_cholesky_rng:

+ +

wishart_lpdf:

+ +

wishart_lupdf:

+ +

wishart_rng:

+ +
+
+

Y

+

yule_simon:

+ +

yule_simon_cdf:

+ +

yule_simon_lccdf:

+ +

yule_simon_lcdf:

+ +

yule_simon_lpmf:

+ +

yule_simon_lupmf:

+ +

yule_simon_rng:

+ +
+
+

Z

+

zeros_array:

+ +

zeros_int_array:

+ +

zeros_row_vector:

+ +

zeros_vector:

+ + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/hidden_markov_models.html b/docs/2_39/functions-reference/hidden_markov_models.html new file mode 100644 index 000000000..4b055c0fc --- /dev/null +++ b/docs/2_39/functions-reference/hidden_markov_models.html @@ -0,0 +1,1178 @@ + + + + + + + + + +Hidden Markov Models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Hidden Markov Models

+

An elementary first-order Hidden Markov model is a probabilistic model over \(N\) observations, \(y_n\), and \(N\) hidden states, \(x_n\), which can be fully defined by the conditional distributions \(p(y_n \mid x_n, \phi)\) and \(p(x_n \mid x_{n - 1}, \phi)\). Here we make the dependency on additional model parameters, \(\phi\), explicit. When \(x\) is continuous, the user can explicitly encode these distributions in Stan and use Markov chain Monte Carlo to integrate \(x\) out.

+

When each state \(x\) takes a value over a discrete and finite set, say \(\{1, 2, ..., K\}\), we can take advantage of the dependency structure to marginalize \(x\) and compute \(p(y \mid \phi)\). We start by defining the conditional observational distribution, stored in a \(K \times N\) matrix \(\omega\) with \[ +\omega_{kn} = p(y_n \mid x_n = k, \phi). +\] Next, we introduce the \(K \times K\) transition matrix, \(\Gamma\), with \[ + \Gamma_{ij} = p(x_n = j \mid x_{n - 1} = i, \phi). +\] Each row defines a probability distribution and must therefore be a simplex (i.e. its components must add to 1). Currently, Stan only supports stationary transitions where a single transition matrix is used for all transitions. Finally we define the initial state \(K\)-vector \(\rho\), with \[ + \rho_k = p(x_0 = k \mid \phi). +\]

+

The Stan functions that support this type of model are special in that the user does not explicitly pass \(y\) and \(\phi\) as arguments. Instead, the user passes \(\log \omega\), \(\Gamma\), and \(\rho\), which in turn depend on \(y\) and \(\phi\).

+
+

Stan functions

+ +

+

real hmm_marginal(matrix log_omega, matrix Gamma, vector rho)
Returns the log probability density of \(y\), with \(x_n\) integrated out at each iteration.

+Available since 2.24 +

The arguments represent (1) the log density of each output, (2) the transition matrix, and (3) the initial state vector.

+
    +
  • log_omega: \(\log \omega_{kn} = \log p(y_n \mid x_n = k, \phi)\), log density of each output,

  • +
  • Gamma: \(\Gamma_{ij} = p(x_n = j | x_{n - 1} = i, \phi)\), the transition matrix,

  • +
  • rho: \(\rho_k = p(x_0 = k \mid \phi)\), the initial state probability.

  • +
+ +

+

array[] int hmm_latent_rng(matrix log_omega, matrix Gamma, vector rho)
Returns a length \(N\) array of integers over \(\{1, ..., K\}\), sampled from the joint posterior distribution of the hidden states, \(p(x \mid \phi, y)\). May be only used in transformed data and generated quantities.

+Available since 2.24 + +

+

matrix hmm_hidden_state_prob(matrix log_omega, matrix Gamma, vector rho)
Returns the matrix of marginal posterior probabilities of each hidden state value. This will be a \(K \times N\) matrix. The \(n^\mathrm{th}\) column is a simplex of probabilities for the \(n^\mathrm{th}\) variable. Moreover, let \(A\) be the output. Then \(A_{ij} = p(x_j = i \mid \phi, y)\). This function may only be used in transformed data and generated quantities.

+Available since 2.24 + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/higher-order_functions.html b/docs/2_39/functions-reference/higher-order_functions.html new file mode 100644 index 000000000..1b73b613a --- /dev/null +++ b/docs/2_39/functions-reference/higher-order_functions.html @@ -0,0 +1,1670 @@ + + + + + + + + + +Higher-Order Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Higher-Order Functions

+

Stan provides a few higher-order functions that act on other functions. In all cases, the function arguments to the higher-order functions are defined as functions within the Stan language and passed by name to the higher-order functions.

+
+

Algebraic equation solvers

+

Stan provides two built-in algebraic equation solvers, respectively based on the Newton method and the Powell “dog leg” hybrid method. Empirically the Newton method is found to be faster and its use is recommended for most problems.

+

An algebraic solver is a higher-order function, i.e. it takes another function as one of its arguments. Other functions in Stan which share this feature are the differential equation solvers (see section Ordinary Differential Equation (ODE) Solvers and Differential Algebraic Equation (DAE) solver). Ordinary Stan functions do not allow functions as arguments.

+
+

Specifying an algebraic equation as a function

+

An algebraic system is specified as an ordinary function in Stan within the function block. The function must return a vector and takes in, as its first argument, the unknowns \(y\) we wish to solve for, also passed as a vector. This argument is followed by additional arguments as specified by the user; we call such arguments variadic arguments and denote them .... The signature of the algebraic system is then:

+
 vector algebra_system (vector y, ...)
+

There is no type restriction for the variadic arguments and each argument can be passed as data or parameter. However users should use parameter arguments only when necessary and mark data arguments with the keyword data. In the below example, the last variadic argument, \(x\), is restricted to being data:

+
  vector algebra_system (vector y, vector theta, data vector x)
+

Distinguishing data and parameter is important for computational reasons. Augmenting the total number of parameters increases the cost of propagating derivatives through the solution to the algebraic equation, and ultimately the computational cost of evaluating the gradients.

+
+
+

Call to the algebraic solver

+ +

vector solve_newton(function algebra_system, vector y_guess, ...)
Solves the algebraic system, given an initial guess, using Newton’s method.

+Available since 2.31 + +

vector solve_newton_tol(function algebra_system, vector y_guess, data real scaling_step, data real f_tol, int max_steps, ...)
Solves the algebraic system, given an initial guess, using Newton’s method with additional control parameters for the solver.

+Available since 2.31 + +

vector solve_powell(function algebra_system, vector y_guess, ...)
Solves the algebraic system, given an initial guess, using Powell’s hybrid method.

+Available since 2.31 + +

vector solve_powell_tol(function algebra_system, vector y_guess, data real rel_tol, data real f_tol, int max_steps, ...)
Solves the algebraic system, given an initial guess, using Powell’s hybrid method with additional control parameters for the solver.

+Available since 2.31 +
+

Arguments to the algebraic solver

+

The arguments to the algebraic solvers are as follows:

+
    +
  • algebra_system: function literal referring to a function specifying the system of algebraic equations with signature (vector, ...):vector. The arguments represent (1) unknowns, (2) additional parameter and/or data arguments, and the return value contains the value of the algebraic function, which goes to 0 when we plug in the solution to the algebraic system,

  • +
  • y_guess: initial guess for the solution, type vector,

  • +
  • ...: variadic arguments.

  • +
+

The algebraic solvers admit control parameters. While Stan provides default values, the user should be prepared to adjust the control parameters. The following controls are available:

+
    +
  • scaling_step: for the Newton solver only, the scaled-step stopping tolerance, type real, data only. If a Newton step is smaller than the scaling step tolerance, the code breaks, assuming the solver is no longer making significant progress. If set to 0, this constraint is ignored. Default value is \(10^{-3}\).

  • +
  • rel_tol: for the Powell solver only, the relative tolerance, type real, data only. The relative tolerance is the estimated relative error of the solver and serves to test if a satisfactory solution has been found. Default value is \(10^{-10}\).

  • +
  • function_tol: function tolerance for the algebraic solver, type real, data only. After convergence of the solver, the proposed solution is plugged into the algebraic system and its norm is compared to the function tolerance. If the norm is below the function tolerance, the solution is deemed acceptable. Default value is \(10^{-6}\).

  • +
  • max_num_steps: maximum number of steps to take in the algebraic solver, type int, data only. If the solver reaches this number of steps, it breaks and returns an error message. Default value is \(200\).

  • +
+

The difference in which control parameters are available has to do with the underlying implementations for the solvers and the control parameters these implementations support. The Newton solver is based on KINSOL from the SUNDIAL suites, while the Powell solver uses a module from the Eigen library.

+
+
+

Return value

+

The return value for the algebraic solver is an object of type vector, with values which, when plugged in as y make the algebraic function go to 0 (approximately, within the specified function tolerance).

+
+
+

Sizes and parallel arrays

+

Certain sizes have to be consistent. The initial guess, return value of the solver, and return value of the algebraic function must all be the same size.

+
+
+

Algorithmic details

+

Stan offers two methods to solve algebraic equations. solve_newton and solve_newton_tol use the Newton method, a first-order derivative based numerical solver. The Stan code builds on the implementation in KINSOL from the SUNDIALS suite (Hindmarsh et al. 2005). For many problems, we find that the Newton method is faster than the Powell method. If however Newton’s method performs poorly, either failing to or requiring an excessively long time to converge, the user should be prepared to switch to the Powell method.

+

solve_powell and solve_powell_tol are based on the Powell hybrid method (Powell 1970), which also uses first-order derivatives. The Stan code builds on the implementation of the hybrid solver in the unsupported module for nonlinear optimization problems of the Eigen library (Guennebaud, Jacob, et al. 2010). This solver is in turn based on the algorithm developed for the package MINPACK-1 (Jorge J. More 1980).

+

For both solvers, derivatives are propagated through the solution to the algebraic solution using the implicit function theorem and an adjoint method of automatic differentiation; for a discussion on this topic, see (Gaebler 2021) and (Margossian and Betancourt 2022).

+
+
+
+
+

Ordinary differential equation (ODE) solvers

+

Stan provides several higher order functions for solving initial value problems specified as Ordinary Differential Equations (ODEs).

+

Solving an initial value ODE means given a set of differential equations \(y'(t, \theta) = f(t, y, \theta)\) and initial conditions \(y(t_0, \theta)\), solving for \(y\) at a sequence of times \(t_0 < t_1 < t_2, \cdots < t_n\). \(f(t, y, \theta)\) is referred to here as the ODE system function.

+

\(f(t, y, \theta)\) will be defined as a function with a certain signature and provided along with the initial conditions and output times to one of the ODE solver functions.

+

To make it easier to write ODEs, the solve functions take extra arguments that are passed along unmodified to the user-supplied system function. Because there can be any number of these arguments and they can be of different types, they are denoted below as .... The types of the arguments represented by ... in the ODE solve function call must match the types of the arguments represented by ... in the user-supplied system function.

+
+

Non-stiff solver

+ +

+

array[] vector ode_rk45(function ode, vector initial_state, real initial_time, array[] real times, ...)
Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.

+Available since 2.24 + +

+

array[] vector ode_rk45_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...)
Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method with additional control parameters for the solver.

+Available since 2.24 + +

+

array[] vector ode_ckrk(function ode, vector initial_state, real initial_time, array[] real times, ...)
Solves the ODE system for the times provided using the Cash-Karp algorithm, a 4th/5th order explicit Runge-Kutta method.

+Available since 2.27 + +

+

array[] vector ode_ckrk_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...)
Solves the ODE system for the times provided using the Cash-Karp algorithm, a 4th/5th order explicit Runge-Kutta method with additional control parameters for the solver.

+Available since 2.27 + +

+

array[] vector ode_adams(function ode, vector initial_state, real initial_time, array[] real times, ...)
Solves the ODE system for the times provided using the Adams-Moulton method.

+Available since 2.24 + +

+

array[] vector ode_adams_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...)
Solves the ODE system for the times provided using the Adams-Moulton method with additional control parameters for the solver.

+Available since 2.24 +
+
+

Stiff solver

+ +

+

array[] vector ode_bdf(function ode, vector initial_state, real initial_time, array[] real times, ...)
Solves the ODE system for the times provided using the backward differentiation formula (BDF) method.

+Available since 2.24 + +

+

array[] vector ode_bdf_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...)
Solves the ODE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.

+Available since 2.24 +
+
+

Adjoint solver

+ +

+

array[] vector ode_adjoint_tol_ctl(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol_forward, data vector abs_tol_forward, data real rel_tol_backward, data vector abs_tol_backward, int max_num_steps, int num_steps_between_checkpoints, int interpolation_polynomial, int solver_forward, int solver_backward, ...)

+

Solves the ODE system for the times provided using the adjoint ODE solver method from CVODES. The adjoint ODE solver requires a checkpointed forward in time ODE integration, a backwards in time integration that makes uses of an interpolated version of the forward solution, and the solution of a quadrature problem (the number of which depends on the number of parameters passed to the solve). The tolerances and numeric methods used for the forward solve, backward solve, quadratures, and interpolation can all be configured.

+Available since 2.27 +
+
+

ODE system function

+

The first argument to one of the ODE solvers is always the ODE system function. The ODE system function must have a vector return type, and the first two arguments must be a real and vector in that order. These two arguments are followed by the variadic arguments that are passed through from the ODE solve function call:

+
 vector ode(real time, vector state, ...)
+

The ODE system function should return the derivative of the state with respect to time at the time and state provided. The length of the returned vector must match the length of the state input into the function.

+

The arguments to this function are:

+
    +
  • time, the time to evaluate the ODE system

  • +
  • state, the state of the ODE system at the time specified

  • +
  • ..., sequence of arguments passed unmodified from the ODE solve function call. The types here must match the types in the ... arguments of the ODE solve function call.

  • +
+
+
+

Arguments to the ODE solvers

+

The arguments to the ODE solvers in both the stiff and non-stiff solvers are the same. The arguments to the adjoint ODE solver are different; see Arguments to the adjoint ODE solver.

+
    +
  • ode: ODE system function,

  • +
  • initial_state: initial state, type vector,

  • +
  • initial_time: initial time, type real,

  • +
  • times: solution times, type array[] real,

  • +
  • ...: sequence of arguments that will be passed through unmodified to the ODE system function. The types here must match the types in the ... arguments of the ODE system function.

  • +
+

For the versions of the ode solver functions ending in _tol, these three parameters must be provided after times and before the ... arguments:

+
    +
  • data rel_tol: relative tolerance for the ODE solver, type real, data only,

  • +
  • data abs_tol: absolute tolerance for the ODE solver, type real, data only, and

  • +
  • max_num_steps: maximum number of steps to take between output times in the ODE solver, type int, data only.

  • +
+

Because the tolerances are data arguments, they must be defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.

+
+
+

Arguments to the adjoint ODE solver

+

The arguments to the adjoint ODE solver are different from those for the other functions (for those see Arguments to the ODE solvers).

+
    +
  • ode: ODE system function,

  • +
  • initial_state: initial state, type vector,

  • +
  • initial_time: initial time, type real,

  • +
  • times: solution times, type array[] real,

  • +
  • data rel_tol_forward: Relative tolerance for forward solve, type real, data only,

  • +
  • data abs_tol_forward: Absolute tolerance vector for each state for forward solve, type vector, data only,

  • +
  • data rel_tol_backward: Relative tolerance for backward solve, type real, data only,

  • +
  • data abs_tol_backward: Absolute tolerance vector for each state for backward solve, type vector, data only,

  • +
  • data rel_tol_quadrature: Relative tolerance for backward quadrature, type real, data only,

  • +
  • data abs_tol_quadrature: Absolute tolerance for backward quadrature, type real, data only,

  • +
  • data max_num_steps: Maximum number of time-steps to take in integrating the ODE solution between output time points for forward and backward solve, type int, data only,

  • +
  • num_steps_between_checkpoints: number of steps between checkpointing forward solution, type int, data only,

  • +
  • interpolation_polynomial: can be 1 for hermite or 2 for polynomial interpolation method of CVODES, type int, data only,

  • +
  • solver_forward: solver used for forward ODE problem: 1=Adams (non-stiff), 2=BDF (stiff), type int, data only,

  • +
  • solver_backward: solver used for backward ODE problem: 1=Adams (non-stiff), 2=BDF (stiff), type int, data only.

  • +
  • ...: sequence of arguments that will be passed through unmodified to the ODE system function. The types here must match the types in the ... arguments of the ODE system function.

  • +
+

Because the tolerances are data arguments, they must be defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.

+
+

Return values

+

The return value for the ODE solvers is an array of vectors (type array[] vector), one vector representing the state of the system at every time in specified in the times argument.

+
+
+

Array and vector sizes

+

The sizes must match, and in particular, the following groups are of the same size:

+
    +
  • state variables passed into the system function, derivatives returned by the system function, initial state passed into the solver, and length of each vector in the output,

  • +
  • number of solution times and number of vectors in the output.

  • +
+
+
+
+
+

Differential-Algebraic equation (DAE) solver

+

Stan provides two higher order functions for solving initial value problems specified as Differential-Algebraic Equations (DAEs) with index-1 (Serban et al. 2021).

+

Solving an initial value DAE means given a set of residual functions \(r(y'(t, \theta), y(t, \theta), t)\) and initial conditions \((y(t_0, \theta), y'(t_0, \theta))\), solving for \(y\) at a sequence of times \(t_0 < t_1 \leq t_2, \cdots \leq t_n\). The residual function \(r(y', y, t, \theta)\) will be defined as a function with a certain signature and provided along with the initial conditions and output times to one of the DAE solver functions.

+

Similar to ODE solvers, the DAE solver function takes extra arguments that are passed along unmodified to the user-supplied system function. Because there can be any number of these arguments and they can be of different types, they are denoted below as ..., and the types of these arguments, also represented by ... in the DAE solver call, must match the types of the arguments represented by ... in the user-supplied system function.

+
+

The DAE solver

+ +

+

array[] vector dae(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, ...)
Solves the DAE system using the backward differentiation formula (BDF) method (Serban et al. 2021).

+Available since 2.29 + +

+

array[] vector dae_tol(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...)
Solves the DAE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.

+Available since 2.29 +
+
+

DAE system function

+

The first argument to the DAE solver is the DAE residual function. The DAE residual function must have a vector return type, and the first three arguments must be a real, vector, and vector, in that order. These three arguments are followed by the variadic arguments that are passed through from the DAE solver function call:

+
  vector residual(real time, vector state, vector state_derivative, ...)
+

The DAE residual function should return the residuals at the time and state provided. The length of the returned vector must match the length of the state input into the function.

+

The arguments to this function are:

+
    +
  • time, the time to evaluate the DAE system

  • +
  • state, the state of the DAE system at the time specified

  • +
  • state_derivative, the time derivatives of the state of the DAE system at the time specified

  • +
  • ..., sequence of arguments passed unmodified from the DAE solve function call. The types here must match the types in the ... arguments of the DAE solve function call.

  • +
+
+
+

Arguments to the DAE solver

+

The arguments to the DAE solver are

+
    +
  • residual: DAE residual function,

  • +
  • initial_state: initial state, type vector,

  • +
  • initial_state_derivative: time derivative of the initial state, type vector,

  • +
  • initial_time: initial time, type data real,

  • +
  • times: solution times, type data array[] real,

  • +
  • ...: sequence of arguments that will be passed through unmodified to the DAE residual function. The types here must match the types in the ... arguments of the DAE residual function.

  • +
+

For dae_tol, the following three parameters must be provided after times and before the ... arguments:

+
    +
  • data rel_tol: relative tolerance for the DAE solver, type real, data only,

  • +
  • data abs_tol: absolute tolerance for the DAE solver, type real, data only, and

  • +
  • max_num_steps: maximum number of steps to take between output times in the DAE solver, type int, data only.

  • +
+

Because the tolerances are data arguments, they must be supplied as primitive numerics or defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.

+
+

Consistency of the initial conditions

+

The user is responsible to ensure the residual function becomes zero at the initial time, t0, when the arguments initial_state and initial_state_derivative are introduced as state and state_derivative, respectively.

+
+
+

Return values

+

The return value for the DAE solvers is an array of vectors (type array[] vector), one vector representing the state of the system at every time specified in the times argument.

+
+
+

Array and vector sizes

+

The sizes must match, and in particular, the following groups are of the same size:

+
    +
  • state variables and state derivatives passed into the residual function, the residual returned by the residual function, initial state and initial state derivatives passed into the solver, and length of each vector in the output,

  • +
  • number of solution times and number of vectors in the output.

  • +
+
+
+
+
+

1D integrator

+

Stan provides a built-in mechanism to perform 1D integration of a function via quadrature methods.

+

It operates similarly to the algebraic solver and the ordinary differential equations solver in that it allows as an argument a function.

+

Like both of those utilities, some of the arguments are limited to data only expressions. These expressions must not contain variables other than those declared in the data or transformed data blocks.

+
+

Specifying an integrand as a function

+

Performing a 1D integration requires the integrand to be specified somehow. This is done by defining a function in the Stan functions block with the special signature:

+
real integrand(real x, real xc, array[] real theta,
+               array[] real x_r, array[] int x_i)
+

The function should return the value of the integrand evaluated at the point x.

+

The argument of this function are:

+
    +
  • x, the independent variable being integrated over

  • +
  • xc, a high precision version of the distance from x to the nearest endpoint in a definite integral (for more into see section Precision Loss).

  • +
  • theta, parameter values used to evaluate the integral

  • +
  • x_r, data values used to evaluate the integral

  • +
  • x_i, integer data used to evaluate the integral

  • +
+

Like algebraic solver and the differential equations solver, the 1D integrator separates parameter values, theta, from data values, x_r.

+
+
+

Call to the 1D integrator

+ +

+

real integrate_1d (function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i)
Integrates the integrand from a to b.

+Available since 2.23 + +

+

real integrate_1d (function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i, real relative_tolerance)
Integrates the integrand from a to b with the given relative tolerance.

+Available since 2.23 +
+

Arguments to the 1D integrator

+

The arguments to the 1D integrator are as follows:

+
    +
  • integrand: function literal referring to a function specifying the integrand with signature (real, real, array[] real, array[] real, array[] int):real The arguments represent +
      +
      1. +
      2. where integrand is evaluated,
      3. +
    • +
      1. +
      2. distance from evaluation point to integration limit for definite integrals,
      3. +
    • +
      1. +
      2. parameters,
      3. +
    • +
      1. +
      2. real data
      3. +
    • +
      1. +
      2. integer data, and the return value is the integrand evaluated at the given point,
      3. +
    • +
  • +
  • a: left limit of integration, may be negative infinity, type real,
  • +
  • b: right limit of integration, may be positive infinity, type real,
  • +
  • theta: parameters only, type array[] real,
  • +
  • x_r: real data only, type array[] real,
  • +
  • x_i: integer data only, type array[] int.
  • +
+

A relative_tolerance argument can optionally be provided for more control over the algorithm:

+
    +
  • relative_tolerance: relative tolerance for the 1d integrator, type real, data only.
  • +
+
+
+

Return value

+

The return value for the 1D integrator is a real, the value of the integral.

+
+
+

Zero-crossing integrals

+

For numeric stability, integrals on the (possibly infinite) interval \((a, b)\) that cross zero are split into two integrals, one from \((a, 0)\) and one from \((0, b)\). Each integral is separately integrated to the given relative_tolerance.

+
+
+

Precision loss near limits of integration in definite integrals

+

When integrating certain definite integrals, there can be significant precision loss in evaluating the integrand near the endpoints. This has to do with the breakdown in precision of double precision floating point values when adding or subtracting a small number from a number much larger than it in magnitude (for instance, 1.0 - x). xc (as passed to the integrand) is a high-precision version of the distance between x and the definite integral endpoints and can be used to address this issue. More information (and an example where this is useful) is given in the User’s Guide. For zero crossing integrals, xc will be a high precision version of the distance to the endpoints of the two smaller integrals. For any integral with an endpoint at negative infinity or positive infinity, xc is set to NaN.

+
+
+

Algorithmic details

+

Internally the 1D integrator uses the double-exponential methods in the Boost 1D quadrature library. Boost in turn makes use of quadrature methods developed in (Takahasi and Mori 1974), (Mori 1978), (Bailey, Jeyabalan, and Li 2005), and (Tanaka et al. 2009).

+

The gradients of the integral are computed in accordance with the Leibniz integral rule. Gradients of the integrand are computed internally with Stan’s automatic differentiation.

+
+
+
+
+

Reduce-sum function

+

Stan provides a higher-order reduce function for summation. A function which returns a scalar g: U -> real is mapped to every element of a list of type array[] U, { x1, x2, ... } and all the results are accumulated,

+

g(x1) + g(x2) + ...

+

For efficiency reasons the reduce function doesn’t work with the element-wise evaluated function g itself, but instead works through evaluating partial sums, f: array[] U -> real, where:

+
f({ x1 }) = g(x1)
+f({ x1, x2 }) = g(x1) + g(x2)
+f({ x1, x2, ... }) = g(x1) + g(x2) + ...
+

Mathematically the summation reduction is associative and forming arbitrary partial sums in an arbitrary order will not change the result. However, floating point numerics on computers only have a limited precision such that associativity does not hold exactly. This implies that the order of summation determines the exact numerical result. For this reason, the higher-order reduce function is available in two variants:

+
    +
  • reduce_sum: Automatically choose partial sums partitioning based on a dynamic scheduling algorithm.
  • +
  • reduce_sum_static: Compute the same sum as reduce_sum, but partition the input in the same way for given data set (in reduce_sum this partitioning might change depending on computer load). This should result in stable numerical evaluations.
  • +
+
+

Specifying the reduce-sum function

+

The higher-order reduce function takes a partial sum function f, an array argument x (with one array element for each term in the sum), a recommended grainsize, and a set of shared arguments. This representation allows parallelization of the resultant sum.

+ +

+

real reduce_sum(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)

+ +

real reduce_sum_static(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)

+

Returns the equivalent of f(x, 1, size(x), s1, s2, ...), but computes the result in parallel by breaking the array x into independent partial sums. s1, s2, ... are shared between all terms in the sum.

+Available since 2.23 +
    +
  • f: function literal referring to a function specifying the partial sum operation. Refer to the partial sum function.
  • +
  • x: array of T, one for each term of the reduction, T can be any type,
  • +
  • grainsize: For reduce_sum, grainsize is the recommended size of the partial sum (grainsize = 1 means pick totally automatically). For reduce_sum_static, grainsize determines the maximum size of the partial sums, type int,
  • +
  • s1: first (optional) shared argument, type T1, where T1 can be any type
  • +
  • s2: second (optional) shared argument, type T2, where T2 can be any type,
  • +
  • ...: remainder of shared arguments, each of which can be any type.
  • +
+
+
+

The partial sum function

+

The partial sum function must have the following signature where the type T, and the types of all the shared arguments (T1, T2, …) match those of the original reduce_sum (reduce_sum_static) call.

+
(array[] T x_subset, int start, int end, T1 s1, T2 s2, ...):real
+

The partial sum function returns the sum of the start to end terms (inclusive) of the overall calculations. The arguments to the partial sum function are:

+
    +
  • x_subset, the subset of x a given partial sum is responsible for computing, type array[] T, where T matches the type of x in reduce_sum (reduce_sum_static)

  • +
  • start, the index of the first term of the partial sum, type int

  • +
  • end, the index of the last term of the partial sum (inclusive), type int

  • +
  • s1, first shared argument, type T1, matching type of s1 in reduce_sum (reduce_sum_static)

  • +
  • s2, second shared argument, type T2, matching type of s2 in reduce_sum (reduce_sum_static)

  • +
  • ..., remainder of shared arguments, with types matching those in reduce_sum (reduce_sum_static)

  • +
+
+
+
+

Map-rect function

+

Stan provides a higher-order map function. This allows map-reduce functionality to be coded in Stan as described in the user’s guide.

+
+

Specifying the mapped function

+

The function being mapped must have a signature identical to that of the function f in the following declaration.

+
 vector f(vector phi, vector theta,
+          data array[] real x_r, data array[] int x_i);
+

The map function returns the sequence of results for the particular shard being evaluated. The arguments to the mapped function are:

+
    +
  • phi, the sequence of parameters shared across shards

  • +
  • theta, the sequence of parameters specific to this shard

  • +
  • x_r, sequence of real-valued data

  • +
  • x_i, sequence of integer data

  • +
+

All input for the mapped function must be packed into these sequences and all output from the mapped function must be packed into a single vector. The vector of output from each mapped function is concatenated into the final result.

+
+
+

Rectangular map

+

The rectangular map function operates on rectangular (not ragged) data structures, with parallel data structures for job-specific parameters, job-specific real data, and job-specific integer data.

+ +

+

vector map_rect(F f, vector phi, array[] vector theta, data array[,] real x_r, data array[,] int x_i)
Return the concatenation of the results of applying the function f, of type (vector, vector, array[] real, array[] int):vector elementwise, i.e., f(phi, theta[n], x_r[n], x_i[n]) for each n in 1:N, where N is the size of the parallel arrays of job-specific/local parameters theta, real data x_r, and integer data x_r. The shared/global parameters phi are passed to each invocation of f.

+Available since 2.18 + + + +
+
+
+ + Back to top

References

+
+Bailey, David H., Karthik Jeyabalan, and Xiaoye S. Li. 2005. “A Comparison of Three High-Precision Quadrature Schemes.” Experiment. Math. 14 (3): 317–29. https://projecteuclid.org:443/euclid.em/1128371757. +
+
+Gaebler, Johann D. 2021. “Autodiff for Implicit Functions in Stan.” https://www.jgaeb.com/2021/09/13/implicit-autodiff.html#fn:7. +
+
+Guennebaud, Gaël, Benoît Jacob, et al. 2010. “Eigen V3.” http://eigen.tuxfamily.org. +
+
+Hindmarsh, Alan C, Peter N Brown, Keith E Grant, Steven L Lee, Radu Serban, Dan E Shumaker, and Carol S Woodward. 2005. SUNDIALS: Suite of Nonlinear and Differential/Algebraic Equation Solvers.” ACM Transactions on Mathematical Software (TOMS) 31 (3): 363–96. +
+
+Jorge J. More, Kenneth E. Hillstrom, Burton S. Garbow. 1980. User Guide for MINPACK-1. 9700 South Cass Avenue, Argonne, Illinois 60439: Argonne National Laboratory. +
+
+Margossian, Charles C, and Michael Betancourt. 2022. “Efficient Automatic Differentiation of Implicit Functions.” Preprint. arXiv:2112.14217. +
+
+Mori, Masatake. 1978. “An IMT-Type Double Exponential Formula for Numerical Integration.” Publications of the Research Institute for Mathematical Sciences 14 (3): 713–29. https://doi.org/10.2977/prims/1195188835. +
+
+Powell, Michael J. D. 1970. “A Hybrid Method for Nonlinear Equations.” In Numerical Methods for Nonlinear Algebraic Equations, edited by P. Rabinowitz. Gordon; Breach. +
+
+Serban, Radu, Cosmin Petra, Alan C. Hindmarsh, Cody J. Balos, David J. Gardner, Daniel R. Reynolds, and Carol S. Woodward. 2021. “User Documentation for IDAS V5.0.0.” Lawrence Livermore National Laboratory. +
+
+Takahasi, Hidetosi, and Masatake Mori. 1974. “Double Exponential Formulas for Numerical Integration.” Publications of the Research Institute for Mathematical Sciences 9 (3): 721–41. https://doi.org/10.2977/prims/1195192451. +
+
+Tanaka, Ken’ichiro, Masaaki Sugihara, Kazuo Murota, and Masatake Mori. 2009. “Function Classes for Double Exponential Integration Formulas.” Numerische Mathematik 111 (4): 631–55. https://doi.org/10.1007/s00211-008-0195-1. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/img/logo_tm.png b/docs/2_39/functions-reference/img/logo_tm.png new file mode 100644 index 000000000..48c9769c7 Binary files /dev/null and b/docs/2_39/functions-reference/img/logo_tm.png differ diff --git a/docs/2_39/functions-reference/index.html b/docs/2_39/functions-reference/index.html new file mode 100644 index 000000000..820f6d50b --- /dev/null +++ b/docs/2_39/functions-reference/index.html @@ -0,0 +1,1142 @@ + + + + + + + + + +Stan Functions Reference + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ +
+
+

Stan Functions Reference

+

Version 2.39

+
+ + + +
+ + + + +
+ + + +
+ + +

+

This is the reference for the functions defined in the Stan math library and available in the Stan programming language.

+

For more information the Stan language and inference engines and how to use Stan for Bayesian inference, see

+
    +
  • the Stan User’s Guide. The Stan user’s guide provides example models and programming techniques for coding statistical models in Stan. It also serves as an example-driven introduction to Bayesian modeling and inference:

  • +
  • the Stan Reference Manual. Stan’s modeling language is shared across all of its interfaces. The Stan Language Reference Manual provides a concise definition of the language syntax for all elements in the language together with an overview of the inference algorithms and posterior inference tools.

  • +
+

Download the pdf version of this manual.

+ +
+

Licensing

+ + + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/integer-valued_basic_functions.html b/docs/2_39/functions-reference/integer-valued_basic_functions.html new file mode 100644 index 000000000..4788a0d0f --- /dev/null +++ b/docs/2_39/functions-reference/integer-valued_basic_functions.html @@ -0,0 +1,1274 @@ + + + + + + + + + +Integer-Valued Basic Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Integer-Valued Basic Functions

+

This chapter describes Stan’s built-in function that take various types of arguments and return integer values.

+
+

Integer-valued arithmetic operators

+

Stan’s arithmetic is based on standard double-precision C++ integer and floating-point arithmetic. If the arguments to an arithmetic operator are both integers, as in 2 + 2, integer arithmetic is used. If one argument is an integer and the other a floating-point value, as in 2.0 + 2 and 2 + 2.0, then the integer is promoted to a floating point value and floating-point arithmetic is used.

+

Integer arithmetic behaves slightly differently than floating point arithmetic. The first difference is how overflow is treated. If the sum or product of two integers overflows the maximum integer representable, the result is an undesirable wraparound behavior at the bit level. If the integers were first promoted to real numbers, they would not overflow a floating-point representation. There are no extra checks in Stan to flag overflows, so it is up to the user to make sure it does not occur.

+

Secondly, because the set of integers is not closed under division and there is no special infinite value for integers, integer division implicitly rounds the result. If both arguments are positive, the result is rounded down. For example, 1 / 2 evaluates to 0 and 5 / 3 evaluates to 1.

+

If one of the integer arguments to division is negative, the latest C++ specification ( C++11), requires rounding toward zero. This would have 1 / 2 and -1 / 2 evaluate to 0, -7 / 2 evaluate to -3, and 7 / 2 evaluate to 3. Before the C++11 specification, the behavior was platform dependent, allowing rounding up or down. All compilers recent enough to be able to deal with Stan’s templating should follow the C++11 specification, but it may be worth testing if you are not sure and plan to use integer division with negative values.

+

Unlike floating point division, where 1.0 / 0.0 produces the special positive infinite value, integer division by zero, as in 1 / 0, has undefined behavior in the C++ standard. For example, the clang++ compiler on Mac OS X returns 3764, whereas the g++ compiler throws an exception and aborts the program with a warning. As with overflow, it is up to the user to make sure integer divide-by-zero does not occur.

+
+

Binary infix operators

+

Operators are described using the C++ syntax. For instance, the binary operator of addition, written X + Y, would have the Stan signature int operator+(int, int) indicating it takes two real arguments and returns a real value. As noted previously, the value of integer division is platform-dependent when rounding is platform dependent before C++11; the descriptions below provide the C++11 definition.

+ +

+

int operator+(int x, int y)
The sum of the addends x and y \[\begin{equation*} \text{operator+}(x,y) = (x + y) \end{equation*}\]

+Available since 2.0 + +

+

int operator-(int x, int y)
The difference between the minuend x and subtrahend y \[\begin{equation*} +\text{operator-}(x,y) = (x - y) \end{equation*}\]

+Available since 2.0 + +

+

int operator*(int x, int y)
The product of the factors x and y \[\begin{equation*} \text{operator*}(x,y) = (x +\times y) \end{equation*}\]

+Available since 2.0 + +

+

int operator/(int x, int y)
The integer quotient of the dividend x and divisor y \[\begin{equation*} +\text{operator/}(x,y) = \begin{cases} \lfloor x / y \rfloor & \text{if +} x / y \geq 0 \\ - \lfloor \text{floor}(-x / y) \rfloor & \text{if } +x / y < 0. \end{cases} \end{equation*}\] deprecated; - use operator%/% instead.

+Available since 2.0, deprecated in 2.24 + +

+

int operator%/%(int x, int y)
The integer quotient of the dividend x and divisor y \[\begin{equation*} +\text{operator\%/\%}(x,y) = \begin{cases} \lfloor x / y \rfloor & \text{if +} x / y \geq 0 \\ - \lfloor \text{floor}(-x / y) \rfloor & \text{if } +x / y < 0. \end{cases} \end{equation*}\]

+Available since 2.24 + +

+

int operator%(int x, int y)
x modulo y, which is the positive remainder after dividing x by y. If both x and y are non-negative, so is the result; otherwise, the sign of the result is platform dependent. \[\begin{equation*} \mathrm{operator\%}(x, y) \ = +\ x \ \text{mod} \ y \ = \ x - y * \lfloor x / y \rfloor \end{equation*}\]

+Available since 2.13 +
+
+

Unary prefix operators

+ +

+

int operator-(int x)
The negation of the subtrahend x \[\begin{equation*} \text{operator-}(x) = -x \end{equation*}\]

+Available since 2.0 + +

+

T operator-(T x)
Vectorized version of operator-. If T x is a (possibly nested) array of integers, -x is the same shape array where each individual integer is negated.

+Available since 2.31 + +

+

int operator+(int x)
This is a no-op. \[\begin{equation*} \text{operator+}(x) = x \end{equation*}\]

+Available since 2.0 +
+
+
+

Absolute functions

+ +

+

T abs(T x)
The absolute value of x.

+

This function works elementwise over containers such as vectors. Given a type T which is int, or an array of ints, abs returns the same type where each element has had its absolute value taken.

+Available since 2.0, vectorized in 2.30 + +

+

int int_step(int x)

+ +

+

int int_step(real x)
Return the step function of x as an integer, \[\begin{equation*} \mathrm{int\_step}(x) += \begin{cases} 1 & \text{if } x > 0 \\ 0 & \text{if } x \leq 0 \text{ +or } x \text{ is } NaN \end{cases} \end{equation*}\] Warning: int_step(0) and int_step(NaN) return 0 whereas step(0) and step(NaN) return 1.

+

See the warning in section step functions about the dangers of step functions applied to anything other than data.

+Available since 2.0 +
+
+

Bound functions

+ +

+

int min(int x, int y)
Return the minimum of x and y. \[\begin{equation*} \text{min}(x, y) = \begin{cases} x & +\text{if } x < y\\ y & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

int max(int x, int y)
Return the maximum of x and y. \[\begin{equation*} \text{max}(x, y) = \begin{cases} x & +\text{if } x > y\\ y & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 +
+
+

Size functions

+ +

+

int size(int x)

+ +

+

int size(real x)

+

Return the size of x which for scalar-valued x is 1

+Available since 2.26 +
+
+

Casting functions

+

It is possible to cast real numbers to integers as long as the real value is data. See data only qualifiers in the Stan Reference Manual.

+ +

+

int to_int(data real x)

+

Return the value x truncated to an integer. This will throw an error if the value of x is too big to represent as a 32-bit signed integer.

+

This is similar to trunc (see Rounding functions) but the return type is of type int. For example, to_int(3.9) is 3, and to_int(-3.9) is -3.

+Available since 2.31 + +

+

I to_int(data T x)

+

The vectorized version of to_int. This function accepts a (possibly nested) array of reals and returns an array of the same shape where each element has been truncated to an integer.

+Available since 2.31 + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/mathematical_functions.html b/docs/2_39/functions-reference/mathematical_functions.html new file mode 100644 index 000000000..d81ac39a9 --- /dev/null +++ b/docs/2_39/functions-reference/mathematical_functions.html @@ -0,0 +1,1178 @@ + + + + + + + + + +Mathematical Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Mathematical Functions

+

This appendix provides the definition of several mathematical functions used throughout the manual.

+
+

Beta

+

The beta function, \(\text{B}(a, b)\), computes the normalizing constant for the beta distribution, and is defined for \(a > 0\) and \(b +> 0\) by \[\begin{equation*} \text{B}(a,b) \ = \ \int_0^1 u^{a - 1} (1 - u)^{b - 1} \, +du \ = \ \frac{\Gamma(a) \, \Gamma(b)}{\Gamma(a+b)} \, , \end{equation*}\] where \(\Gamma(x)\) is the Gamma function.

+
+
+

Incomplete beta

+

The incomplete beta function, \(\text{B}(x; a, b)\), is defined for \(x +\in [0, 1]\) and \(a, b \geq 0\) such that \(a + b \neq 0\) by \[\begin{equation*} +\text{B}(x; \, a, b) \ = \ \int_0^x u^{a - 1} \, (1 - u)^{b - 1} \, +du, \end{equation*}\] where \(\text{B}(a, b)\) is the beta function defined in appendix. If \(x = 1\), the incomplete beta function reduces to the beta function, \(\text{B}(1; a, b) = \text{B}(a, b)\).

+

The regularized incomplete beta function divides the incomplete beta function by the beta function, \[\begin{equation*} I_x(a, b) \ = \ \frac{\text{B}(x; \, +a, b)}{B(a, b)} \, . \end{equation*}\]

+
+
+

Gamma

+

The gamma function, \(\Gamma(x)\), is the generalization of the factorial function to continuous variables, defined so that for positive integers \(n\), \[\begin{equation*} \Gamma(n+1) = n! \end{equation*}\] Generalizing to all positive numbers and non-integer negative numbers, \[\begin{equation*} \Gamma(x) = +\int_0^{\infty} u^{x - 1} \exp(-u) \, du. \end{equation*}\]

+
+
+

Digamma

+

The digamma function \(\Psi\) is the derivative of the \(\log \Gamma\) function, \[\begin{equation*} +\Psi(u) \ = \ \frac{d}{d u} \log \Gamma(u) \ = \ \frac{1}{\Gamma(u)} \ \frac{d}{d u} \Gamma(u). +\end{equation*}\]

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/matrix_operations.html b/docs/2_39/functions-reference/matrix_operations.html new file mode 100644 index 000000000..40457a087 --- /dev/null +++ b/docs/2_39/functions-reference/matrix_operations.html @@ -0,0 +1,2681 @@ + + + + + + + + + +Matrix Operations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Matrix Operations

+
+

Integer-valued matrix size functions

+ +

+

int num_elements(vector x)
The total number of elements in the vector x (same as function rows)

+Available since 2.5 + +

+

int num_elements(row_vector x)
The total number of elements in the vector x (same as function cols)

+Available since 2.5 + +

+

int num_elements(matrix x)
The total number of elements in the matrix x. For example, if x is a \(5 \times 3\) matrix, then num_elements(x) is 15

+Available since 2.5 + +

+

int rows(vector x)
The number of rows in the vector x

+Available since 2.0 + +

+

int rows(row_vector x)
The number of rows in the row vector x, namely 1

+Available since 2.0 + +

+

int rows(matrix x)
The number of rows in the matrix x

+Available since 2.0 + +

+

int cols(vector x)
The number of columns in the vector x, namely 1

+Available since 2.0 + +

+

int cols(row_vector x)
The number of columns in the row vector x

+Available since 2.0 + +

+

int cols(matrix x)
The number of columns in the matrix x

+Available since 2.0 + +

+

int size(vector x)
The size of x, i.e., the number of elements

+Available since 2.26 + +

+

int size(row_vector x)
The size of x, i.e., the number of elements

+Available since 2.26 + +

+

int size(matrix x)
The size of the matrix x. For example, if x is a \(5 \times 3\) matrix, then size(x) is 15

+Available since 2.26 +
+
+

Matrix arithmetic operators

+

Stan supports the basic matrix operations using infix, prefix and postfix operations. This section lists the operations supported by Stan along with their argument and result types.

+
+

Negation prefix operators

+ +

+

vector operator-(vector x)
The negation of the vector x.

+Available since 2.0 + +

+

row_vector operator-(row_vector x)
The negation of the row vector x.

+Available since 2.0 + +

+

matrix operator-(matrix x)
The negation of the matrix x.

+Available since 2.0 + +

+

T operator-(T x)
Vectorized version of operator-. If T x is a (possibly nested) array of matrix types, -x is the same shape array where each individual value is negated.

+Available since 2.31 +
+
+

Infix matrix operators

+ +

+

vector operator+(vector x, vector y)
The sum of the vectors x and y.

+Available since 2.0 + +

+

row_vector operator+(row_vector x, row_vector y)
The sum of the row vectors x and y.

+Available since 2.0 + +

+

matrix operator+(matrix x, matrix y)
The sum of the matrices x and y

+Available since 2.0 + +

+

vector operator-(vector x, vector y)
The difference between the vectors x and y.

+Available since 2.0 + +

+

row_vector operator-(row_vector x, row_vector y)
The difference between the row vectors x and y

+Available since 2.0 + +

+

matrix operator-(matrix x, matrix y)
The difference between the matrices x and y

+Available since 2.0 + +

+

vector operator*(real x, vector y)
The product of the scalar x and vector y

+Available since 2.0 + +

+

row_vector operator*(real x, row_vector y)
The product of the scalar x and the row vector y

+Available since 2.0 + +

+

matrix operator*(real x, matrix y)
The product of the scalar x and the matrix y

+Available since 2.0 + +

+

vector operator*(vector x, real y)
The product of the scalar y and vector x

+Available since 2.0 + +

+

matrix operator*(vector x, row_vector y)
The product of the vector x and row vector y

+Available since 2.0 + +

+

row_vector operator*(row_vector x, real y)
The product of the scalar y and row vector x

+Available since 2.0 + +

+

real operator*(row_vector x, vector y)
The product of the row vector x and vector y

+Available since 2.0 + +

+

row_vector operator*(row_vector x, matrix y)
The product of the row vector x and matrix y

+Available since 2.0 + +

+

matrix operator*(matrix x, real y)
The product of the scalar y and matrix x

+Available since 2.0 + +

+

vector operator*(matrix x, vector y)
The product of the matrix x and vector y

+Available since 2.0 + +

+

matrix operator*(matrix x, matrix y)
The product of the matrices x and y

+Available since 2.0 +
+
+

Broadcast infix operators

+ +

+

vector operator+(vector x, real y)
The result of adding y to every entry in the vector x

+Available since 2.0 + +

+

vector operator+(real x, vector y)
The result of adding x to every entry in the vector y

+Available since 2.0 + +

+

row_vector operator+(row_vector x, real y)
The result of adding y to every entry in the row vector x

+Available since 2.0 + +

+

row_vector operator+(real x, row_vector y)
The result of adding x to every entry in the row vector y

+Available since 2.0 + +

+

matrix operator+(matrix x, real y)
The result of adding y to every entry in the matrix x

+Available since 2.0 + +

+

matrix operator+(real x, matrix y)
The result of adding x to every entry in the matrix y

+Available since 2.0 + +

+

vector operator-(vector x, real y)
The result of subtracting y from every entry in the vector x

+Available since 2.0 + +

+

vector operator-(real x, vector y)
The result of adding x to every entry in the negation of the vector y

+Available since 2.0 + +

+

row_vector operator-(row_vector x, real y)
The result of subtracting y from every entry in the row vector x

+Available since 2.0 + +

+

row_vector operator-(real x, row_vector y)
The result of adding x to every entry in the negation of the row vector y

+Available since 2.0 + +

+

matrix operator-(matrix x, real y)
The result of subtracting y from every entry in the matrix x

+Available since 2.0 + +

+

matrix operator-(real x, matrix y)
The result of adding x to every entry in negation of the matrix y

+Available since 2.0 + +

+

vector operator/(vector x, real y)
The result of dividing each entry in the vector x by y

+Available since 2.0 + +

+

row_vector operator/(row_vector x, real y)
The result of dividing each entry in the row vector x by y

+Available since 2.0 + +

+

matrix operator/(matrix x, real y)
The result of dividing each entry in the matrix x by y

+Available since 2.0 +
+
+
+

Transposition operator

+

Matrix transposition is represented using a postfix operator.

+ +

+

matrix operator'(matrix x)
The transpose of the matrix x, written as x'

+Available since 2.0 + +

+

row_vector operator'(vector x)
The transpose of the vector x, written as x'

+Available since 2.0 + +

+

vector operator'(row_vector x)
The transpose of the row vector x, written as x'

+Available since 2.0 +
+
+

Elementwise functions

+

Elementwise functions apply a function to each element of a vector or matrix, returning a result of the same shape as the argument. There are many functions that are vectorized in addition to the ad hoc cases listed in this section; see section function vectorization for the general cases.

+ +

+

vector operator.*(vector x, vector y)
The elementwise product of y and x

+Available since 2.0 + +

+

row_vector operator.*(row_vector x, row_vector y)
The elementwise product of y and x

+Available since 2.0 + +

+

matrix operator.*(matrix x, matrix y)
The elementwise product of y and x

+Available since 2.0 + +

+

vector operator./(vector x, vector y)
The elementwise quotient of y and x

+Available since 2.0 + +

+

vector operator./(vector x, real y)
The elementwise quotient of y and x

+Available since 2.4 + +

+

vector operator./(real x, vector y)
The elementwise quotient of y and x

+Available since 2.4 + +

+

row_vector operator./(row_vector x, row_vector y)
The elementwise quotient of y and x

+Available since 2.0 + +

+

row_vector operator./(row_vector x, real y)
The elementwise quotient of y and x

+Available since 2.4 + +

+

row_vector operator./(real x, row_vector y)
The elementwise quotient of y and x

+Available since 2.4 + +

+

matrix operator./(matrix x, matrix y)
The elementwise quotient of y and x

+Available since 2.0 + +

+

matrix operator./(matrix x, real y)
The elementwise quotient of y and x

+Available since 2.4 + +

+

matrix operator./(real x, matrix y)
The elementwise quotient of y and x

+Available since 2.4 + +

+

vector operator.^(vector x, vector y)
The elementwise power of y and x

+Available since 2.24 + +

+

vector operator.^(vector x, real y)
The elementwise power of y and x

+Available since 2.24 + +

+

vector operator.^(real x, vector y)
The elementwise power of y and x

+Available since 2.24 + +

+

row_vector operator.^(row_vector x, row_vector y)
The elementwise power of y and x

+Available since 2.24 + +

+

row_vector operator.^(row_vector x, real y)
The elementwise power of y and x

+Available since 2.24 + +

+

row_vector operator.^(real x, row_vector y)
The elementwise power of y and x

+Available since 2.24 + +

+

matrix operator.^(matrix x, matrix y)
The elementwise power of y and x

+Available since 2.24 + +

+

matrix operator.^(matrix x, real y)
The elementwise power of y and x

+Available since 2.24 + +

+

matrix operator.^(real x, matrix y)
The elementwise power of y and x

+Available since 2.24 +
+
+

Dot products and specialized products

+ +

+

real dot_product(vector x, vector y)
The dot product of x and y

+Available since 2.0 + +

+

real dot_product(vector x, row_vector y)
The dot product of x and y

+Available since 2.0 + +

+

real dot_product(row_vector x, vector y)
The dot product of x and y

+Available since 2.0 + +

+

real dot_product(row_vector x, row_vector y)
The dot product of x and y

+Available since 2.0 + +

+

row_vector columns_dot_product(vector x, vector y)
The dot product of the columns of x and y

+Available since 2.0 + +

+

row_vector columns_dot_product(row_vector x, row_vector y)
The dot product of the columns of x and y

+Available since 2.0 + +

+

row_vector columns_dot_product(matrix x, matrix y)
The dot product of the columns of x and y

+Available since 2.0 + +

+

vector rows_dot_product(vector x, vector y)
The dot product of the rows of x and y

+Available since 2.0 + +

+

vector rows_dot_product(row_vector x, row_vector y)
The dot product of the rows of x and y

+Available since 2.0 + +

+

vector rows_dot_product(matrix x, matrix y)
The dot product of the rows of x and y

+Available since 2.0 + +

+

real dot_self(vector x)
The dot product of the vector x with itself

+Available since 2.0 + +

+

real dot_self(row_vector x)
The dot product of the row vector x with itself

+Available since 2.0 + +

+

row_vector columns_dot_self(vector x)
The dot product of the columns of x with themselves

+Available since 2.0 + +

+

row_vector columns_dot_self(row_vector x)
The dot product of the columns of x with themselves

+Available since 2.0 + +

+

row_vector columns_dot_self(matrix x)
The dot product of the columns of x with themselves

+Available since 2.0 + +

+

vector rows_dot_self(vector x)
The dot product of the rows of x with themselves

+Available since 2.0 + +

+

vector rows_dot_self(row_vector x)
The dot product of the rows of x with themselves

+Available since 2.0 + +

+

vector rows_dot_self(matrix x)
The dot product of the rows of x with themselves

+Available since 2.0 +
+

Specialized products

+ +

+

matrix tcrossprod(matrix x)
The product of x postmultiplied by its own transpose, similar to the tcrossprod(x) function in R. The result is a symmetric matrix \(\text{x}\,\text{x}^{\top}\).

+Available since 2.0 + +

+

matrix crossprod(matrix x)
The product of x premultiplied by its own transpose, similar to the crossprod(x) function in R. The result is a symmetric matrix \(\text{x}^{\top}\,\text{x}\).

+Available since 2.0 +

The following functions all provide shorthand forms for common expressions, which are also much more efficient.

+ +

+

matrix quad_form(matrix A, matrix B)
The quadratic form, i.e., B' * A * B.

+Available since 2.0 + +

+

real quad_form(matrix A, vector B)
The quadratic form, i.e., B' * A * B.

+Available since 2.0 + +

+

matrix quad_form_diag(matrix m, vector v)
The quadratic form using the column vector v as a diagonal matrix, i.e., diag_matrix(v) * m * diag_matrix(v).

+Available since 2.3 + +

+

matrix quad_form_diag(matrix m, row_vector rv)
The quadratic form using the row vector rv as a diagonal matrix, i.e., diag_matrix(rv) * m * diag_matrix(rv).

+Available since 2.3 + +

+

matrix quad_form_sym(matrix A, matrix B)
Similarly to quad_form, gives B' * A * B, but additionally checks if A is symmetric and ensures that the result is also symmetric.

+Available since 2.3 + +

+

real quad_form_sym(matrix A, vector B)
Similarly to quad_form, gives B' * A * B, but additionally checks if A is symmetric and ensures that the result is also symmetric.

+Available since 2.3 + +

+

real trace_dot(matrix A, matrix B)
The trace of the matrix product, i.e., trace(A * B).

+Available since 2.39 + +

+

real trace_quad_form(matrix A, matrix B)
The trace of the quadratic form, i.e., trace(B' * A * B).

+Available since 2.0 + +

+

real trace_quad_form(matrix A, vector B)
The trace of the quadratic form, i.e., trace(B' * A * B).

+Available since 2.0 + +

+

real trace_gen_quad_form(matrix D, matrix A, matrix B)
The trace of a generalized quadratic form, i.e., trace(D * B' * A * B).

+Available since 2.0 + +

+

matrix multiply_lower_tri_self_transpose(matrix x)
The product of the lower triangular portion of x (including the diagonal) times its own transpose; that is, if L is a matrix of the same dimensions as x with L(m,n) equal to x(m,n) for \(\text{n} +\leq \text{m}\) and L(m,n) equal to 0 if \(\text{n} > \text{m}\), the result is the symmetric matrix \(\text{L}\,\text{L}^{\top}\). This is a specialization of tcrossprod(x) for lower-triangular matrices. The input matrix does not need to be square.

+Available since 2.0 + +

+

matrix diag_pre_multiply(vector v, matrix m)
Return the product of the diagonal matrix formed from the vector v and the matrix m, i.e., diag_matrix(v) * m.

+Available since 2.0 + +

+

matrix diag_pre_multiply(row_vector rv, matrix m)
Return the product of the diagonal matrix formed from the vector rv and the matrix m, i.e., diag_matrix(rv) * m.

+Available since 2.0 + +

+

matrix diag_post_multiply(matrix m, vector v)
Return the product of the matrix m and the diagonal matrix formed from the vector v, i.e., m * diag_matrix(v).

+Available since 2.0 + +

+

matrix diag_post_multiply(matrix m, row_vector rv)
Return the product of the matrix m and the diagonal matrix formed from the the row vector rv, i.e., m * diag_matrix(rv).

+Available since 2.0 +
+
+
+

Reductions

+
+

Log sum of exponents

+ +

+

real log_sum_exp(vector x)
The natural logarithm of the sum of the exponentials of the elements in x

+Available since 2.0 + +

+

real log_sum_exp(row_vector x)
The natural logarithm of the sum of the exponentials of the elements in x

+Available since 2.0 + +

+

real log_sum_exp(matrix x)
The natural logarithm of the sum of the exponentials of the elements in x

+Available since 2.0 +
+
+

Minimum and maximum

+ +

+

real min(vector x)
The minimum value in x, or \(+\infty\) if x is empty

+Available since 2.0 + +

+

real min(row_vector x)
The minimum value in x, or \(+\infty\) if x is empty

+Available since 2.0 + +

+

real min(matrix x)
The minimum value in x, or \(+\infty\) if x is empty

+Available since 2.0 + +

+

real max(vector x)
The maximum value in x, or \(-\infty\) if x is empty

+Available since 2.0 + +

+

real max(row_vector x)
The maximum value in x, or \(-\infty\) if x is empty

+Available since 2.0 + +

+

real max(matrix x)
The maximum value in x, or \(-\infty\) if x is empty

+Available since 2.0 +
+
+

Sums and products

+ +

+

real sum(vector x)
The sum of the values in x, or 0 if x is empty

+Available since 2.0 + +

+

real sum(row_vector x)
The sum of the values in x, or 0 if x is empty

+Available since 2.0 + +

+

real sum(matrix x)
The sum of the values in x, or 0 if x is empty

+Available since 2.0 + +

+

real prod(vector x)
The product of the values in x, or 1 if x is empty

+Available since 2.0 + +

+

real prod(row_vector x)
The product of the values in x, or 1 if x is empty

+Available since 2.0 + +

+

real prod(matrix x)
The product of the values in x, or 1 if x is empty

+Available since 2.0 +
+
+

Sample moments

+

Full definitions are provided for sample moments in section array reductions.

+ +

+

real mean(vector x)
The sample mean of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real mean(row_vector x)
The sample mean of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real mean(matrix x)
The sample mean of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real variance(vector x)
The sample variance of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real variance(row_vector x)
The sample variance of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real variance(matrix x)
The sample variance of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real sd(vector x)
The sample standard deviation of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real sd(row_vector x)
The sample standard deviation of the values in x; see section array reductions for details.

+Available since 2.0 + +

+

real sd(matrix x)
The sample standard deviation of the values in x; see section array reductions for details.

+Available since 2.0 +
+
+

Quantile

+

Produces sample quantiles corresponding to the given probabilities. The smallest observation corresponds to a probability of 0 and the largest to a probability of 1.

+

Implements algorithm 7 from Hyndman, R. J. and Fan, Y., Sample quantiles in Statistical Packages (R’s default quantile function).

+ +

+

real quantile(data vector x, data real p)
The p-th quantile of x

+Available since 2.27 + +

+

array[] real quantile(data vector x, data array[] real p)
An array containing the quantiles of x given by the array of probabilities p

+Available since 2.27 + +

+

real quantile(data row_vector x, data real p)
The p-th quantile of x

+Available since 2.27 + +

+

array[] real quantile(data row_vector x, data array[] real p)
An array containing the quantiles of x given by the array of probabilities p

+Available since 2.27 +
+
+
+

Broadcast functions

+

The following broadcast functions allow vectors, row vectors and matrices to be created by copying a single element into all of their cells. Matrices may also be created by stacking copies of row vectors vertically or stacking copies of column vectors horizontally.

+ +

+

vector rep_vector(real x, int m)
Return the size m (column) vector consisting of copies of x.

+Available since 2.0 + +

+

row_vector rep_row_vector(real x, int n)
Return the size n row vector consisting of copies of x.

+Available since 2.0 + +

+

matrix rep_matrix(real x, int m, int n)
Return the m by n matrix consisting of copies of x.

+Available since 2.0 + +

+

matrix rep_matrix(vector v, int n)
Return the m by n matrix consisting of n copies of the (column) vector v of size m.

+Available since 2.0 + +

+

matrix rep_matrix(row_vector rv, int m)
Return the m by n matrix consisting of m copies of the row vector rv of size n.

+Available since 2.0 +

Unlike the situation with array broadcasting (see section array broadcasting), where there is a distinction between integer and real arguments, the following two statements produce the same result for vector broadcasting; row vector and matrix broadcasting behave similarly.

+
 vector[3] x;
+ x = rep_vector(1, 3);
+ x = rep_vector(1.0, 3);
+

There are no integer vector or matrix types, so integer values are automatically promoted.

+
+

Symmetrization

+ +

+

matrix symmetrize_from_lower_tri(matrix A)

+

Construct a symmetric matrix from the lower triangle of A.

+Available since 2.26 +
+
+
+

Diagonal matrix functions

+ +

+

matrix add_diag(matrix m, row_vector d)
Add row_vector d to the diagonal of matrix m.

+Available since 2.21 + +

+

matrix add_diag(matrix m, vector d)
Add vector d to the diagonal of matrix m.

+Available since 2.21 + +

+

matrix add_diag(matrix m, real d)
Add scalar d to every diagonal element of matrix m.

+Available since 2.21 + +

+

vector diagonal(matrix x)
The diagonal of the matrix x

+Available since 2.0 + +

+

matrix diag_matrix(vector x)
The diagonal matrix with diagonal x

+Available since 2.0 +

Although the diag_matrix function is available, it is unlikely to ever show up in an efficient Stan program. For example, rather than converting a diagonal to a full matrix for use as a covariance matrix,

+
 y ~ multi_normal(mu, diag_matrix(square(sigma)));
+

it is much more efficient to just use a univariate normal, which produces the same density,

+
 y ~ normal(mu, sigma);
+

Rather than writing m * diag_matrix(v) where m is a matrix and v is a vector, it is much more efficient to write diag_post_multiply(m, v) (and similarly for pre-multiplication). By the same token, it is better to use quad_form_diag(m, v) rather than quad_form(m, diag_matrix(v)).

+ +

+

matrix identity_matrix(int k)
Create an identity matrix of size \(k \times k\)

+Available since 2.26 +
+
+

Container construction functions

+ +

+

array[] real linspaced_array(int n, data real lower, data real upper)
Create a real array of length n of equidistantly-spaced elements between lower and upper

+Available since 2.24 + +

+

array[] int linspaced_int_array(int n, int lower, int upper)
Create a regularly spaced, increasing integer array of length n between lower and upper, inclusively. If (upper - lower) / (n - 1) is less than one, repeat each output (n - 1) / (upper - lower) times. If neither (upper - lower) / (n - 1) or (n - 1) / (upper - lower) are integers, upper is reduced until one of these is true.

+Available since 2.26 + +

+

vector linspaced_vector(int n, data real lower, data real upper)
Create an n-dimensional vector of equidistantly-spaced elements between lower and upper

+Available since 2.24 + +

+

row_vector linspaced_row_vector(int n, data real lower, data real upper)
Create an n-dimensional row-vector of equidistantly-spaced elements between lower and upper

+Available since 2.24 + +

+

array[] int one_hot_int_array(int n, int k)
Create a one-hot encoded int array of length n with array[k] = 1

+Available since 2.26 + +

+

array[] real one_hot_array(int n, int k)
Create a one-hot encoded real array of length n with array[k] = 1

+Available since 2.24 + +

+

vector one_hot_vector(int n, int k)
Create an n-dimensional one-hot encoded vector with vector[k] = 1

+Available since 2.24 + +

+

row_vector one_hot_row_vector(int n, int k)
Create an n-dimensional one-hot encoded row-vector with row_vector[k] = 1

+Available since 2.24 + +

+

array[] int ones_int_array(int n)
Create an int array of length n of all ones

+Available since 2.26 + +

+

array[] real ones_array(int n)
Create a real array of length n of all ones

+Available since 2.26 + +

+

vector ones_vector(int n)
Create an n-dimensional vector of all ones

+Available since 2.26 + +

+

row_vector ones_row_vector(int n)
Create an n-dimensional row-vector of all ones

+Available since 2.26 + +

+

array[] int zeros_int_array(int n)
Create an int array of length n of all zeros

+Available since 2.26 + +

+

array[] real zeros_array(int n)
Create a real array of length n of all zeros

+Available since 2.24 + +

+

vector zeros_vector(int n)
Create an n-dimensional vector of all zeros

+Available since 2.24 + +

+

row_vector zeros_row_vector(int n)
Create an n-dimensional row-vector of all zeros

+Available since 2.24 + +

+

vector uniform_simplex(int n)
Create an n-dimensional simplex with elements vector[i] = 1 / n for all \(i \in 1, \dots, n\)

+Available since 2.24 +
+
+

Slicing and blocking functions

+

Stan provides several functions for generating slices or blocks or diagonal entries for matrices.

+
+

Columns and rows

+ +

+

vector col(matrix x, int n)
The n-th column of matrix x

+Available since 2.0 + +

+

row_vector row(matrix x, int m)
The m-th row of matrix x

+Available since 2.0 +

The row function is special in that it may be used as an lvalue in an assignment statement (i.e., something to which a value may be assigned). The row function is also special in that the indexing notation x[m] is just an alternative way of writing row(x,m). The col function may not, be used as an lvalue, nor is there an indexing based shorthand for it.

+
+
+

Block operations

+
+

Matrix slicing operations

+

Block operations may be used to extract a sub-block of a matrix.

+ +

+

matrix block(matrix x, int i, int j, int n_rows, int n_cols)
Return the submatrix of x that starts at row i and column j and extends n_rows rows and n_cols columns.

+Available since 2.0 +

The sub-row and sub-column operations may be used to extract a slice of row or column from a matrix

+ +

+

vector sub_col(matrix x, int i, int j, int n_rows)
Return the sub-column of x that starts at row i and column j and extends n_rows rows and 1 column.

+Available since 2.0 + +

+

row_vector sub_row(matrix x, int i, int j, int n_cols)
Return the sub-row of x that starts at row i and column j and extends 1 row and n_cols columns.

+Available since 2.0 +
+
+

Vector and array slicing operations

+

The head operation extracts the first \(n\) elements of a vector and the tail operation the last. The segment operation extracts an arbitrary subvector.

+ +

+

vector head(vector v, int n)
Return the vector consisting of the first n elements of v.

+Available since 2.0 + +

+

row_vector head(row_vector rv, int n)
Return the row vector consisting of the first n elements of rv.

+Available since 2.0 + +

+

array[] T head(array[] T sv, int n)
Return the array consisting of the first n elements of sv; applies to up to three-dimensional arrays containing any type of elements T.

+Available since 2.0 + +

+

vector tail(vector v, int n)
Return the vector consisting of the last n elements of v.

+Available since 2.0 + +

+

row_vector tail(row_vector rv, int n)
Return the row vector consisting of the last n elements of rv.

+Available since 2.0 + +

+

array[] T tail(array[] T sv, int n)
Return the array consisting of the last n elements of sv; applies to up to three-dimensional arrays containing any type of elements T.

+Available since 2.0 + +

+

vector segment(vector v, int i, int n)
Return the vector consisting of the n elements of v starting at i; i.e., elements i through through i + n - 1.

+Available since 2.0 + +

+

row_vector segment(row_vector rv, int i, int n)
Return the row vector consisting of the n elements of rv starting at i; i.e., elements i through through i + n - 1.

+Available since 2.10 + +

+

array[] T segment(array[] T sv, int i, int n)
Return the array consisting of the n elements of sv starting at i; i.e., elements i through through i + n - 1. Applies to up to three-dimensional arrays containing any type of elements T.

+Available since 2.0 +
+
+
+
+

Matrix and vector concatenation

+

Stan’s matrix and vector concatenation operations append_col and append_row are like the operations cbind and rbind in R.

+
+

Horizontal concatenation

+ +

+

matrix append_col(matrix x, matrix y)
Combine matrices x and y by column. The matrices must have the same number of rows.

+Available since 2.5 + +

+

matrix append_col(matrix x, vector y)
Combine matrix x and vector y by column. The matrix and the vector must have the same number of rows.

+Available since 2.5 + +

+

matrix append_col(vector x, matrix y)
Combine vector x and matrix y by column. The vector and the matrix must have the same number of rows.

+Available since 2.5 + +

+

matrix append_col(vector x, vector y)
Combine vectors x and y by column. The vectors must have the same number of rows.

+Available since 2.5 + +

+

row_vector append_col(row_vector x, row_vector y)
Combine row vectors x and y of any size into another row vector by appending y to the end of x.

+Available since 2.5 + +

+

row_vector append_col(real x, row_vector y)
Append x to the front of y, returning another row vector.

+Available since 2.12 + +

+

row_vector append_col(row_vector x, real y)
Append y to the end of x, returning another row vector.

+Available since 2.12 +
+
+

Vertical concatenation

+ +

+

matrix append_row(matrix x, matrix y)
Combine matrices x and y by row. The matrices must have the same number of columns.

+Available since 2.5 + +

+

matrix append_row(matrix x, row_vector y)
Combine matrix x and row vector y by row. The matrix and the row vector must have the same number of columns.

+Available since 2.5 + +

+

matrix append_row(row_vector x, matrix y)
Combine row vector x and matrix y by row. The row vector and the matrix must have the same number of columns.

+Available since 2.5 + +

+

matrix append_row(row_vector x, row_vector y)
Combine row vectors x and y by row. The row vectors must have the same number of columns.

+Available since 2.5 + +

+

vector append_row(vector x, vector y)
Concatenate vectors x and y of any size into another vector.

+Available since 2.5 + +

+

vector append_row(real x, vector y)
Append x to the top of y, returning another vector.

+Available since 2.12 + +

+

vector append_row(vector x, real y)
Append y to the bottom of x, returning another vector.

+Available since 2.12 +
+
+
+

Special matrix functions

+
+

Softmax

+

The softmax function maps1 \(y \in \mathbb{R}^K\) to the \(K\)-simplex by \[\begin{equation*} \text{softmax}(y) = \frac{\exp(y)} +{\sum_{k=1}^K \exp(y_k)}, \end{equation*}\] where \(\exp(y)\) is the componentwise exponentiation of \(y\). Softmax is usually calculated on the log scale, \[\begin{eqnarray*} \log \text{softmax}(y) & = & \ y - \log \sum_{k=1}^K +\exp(y_k) \\[4pt] & = & y - \mathrm{log\_sum\_exp}(y). \end{eqnarray*}\] where the vector \(y\) minus the scalar \(\mathrm{log\_sum\_exp}(y)\) subtracts the scalar from each component of \(y\).

+

Stan provides the following functions for softmax and its log.

+ +

+

vector softmax(vector x)
The softmax of x

+Available since 2.0 + +

+

vector log_softmax(vector x)
The natural logarithm of the softmax of x

+Available since 2.0 +
+
+

Cumulative sums

+

The cumulative sum of a sequence \(x_1,\ldots,x_N\) is the sequence \(y_1,\ldots,y_N\), where \[\begin{equation*} y_n = \sum_{m = 1}^{n} x_m. \end{equation*}\]

+ +

+

array[] int cumulative_sum(array[] int x)
The cumulative sum of x

+Available since 2.30 + +

+

array[] real cumulative_sum(array[] real x)
The cumulative sum of x

+Available since 2.0 + +

+

vector cumulative_sum(vector v)
The cumulative sum of v

+Available since 2.0 + +

+

row_vector cumulative_sum(row_vector rv)
The cumulative sum of rv

+Available since 2.0 +
+
+
+

Gaussian Process Covariance Functions

+

The Gaussian process covariance functions compute the covariance between observations in an input data set or the cross-covariance between two input data sets.

+

For one dimensional GPs, the input data sets are arrays of scalars. The covariance matrix is given by \(K_{ij} = k(x_i, x_j)\) (where \(x_i\) is the \(i^{th}\) element of the array \(x\)) and the cross-covariance is given by \(K_{ij} = k(x_i, y_j)\).

+

For multi-dimensional GPs, the input data sets are arrays of vectors. The covariance matrix is given by \(K_{ij} = k(\mathbf{x}_i, \mathbf{x}_j)\) (where \(\mathbf{x}_i\) is the \(i^{th}\) vector in the array \(x\)) and the cross-covariance is given by \(K_{ij} = k(\mathbf{x}_i, \mathbf{y}_j)\).

+
+

Exponentiated quadratic kernel

+

With magnitude \(\sigma\) and length scale \(l\), the exponentiated quadratic kernel is:

+

\[ +k(\mathbf{x}_i, \mathbf{x}_j) = \sigma^2 \exp \left( -\frac{|\mathbf{x}_i - \mathbf{x}_j|^2}{2l^2} \right) +\]

+ +

+

matrix gp_exp_quad_cov(array[] real x, real sigma, real length_scale)

+

Gaussian process covariance with exponentiated quadratic kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_exp_quad_cov(array[] real x1, array[] real x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_exp_quad_cov(vectors x, real sigma, real length_scale)

+

Gaussian process covariance with exponentiated quadratic kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_exp_quad_cov(vectors x, real sigma, array[] real length_scale)

+

Gaussian process covariance with exponentiated quadratic kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 + +

+

matrix gp_exp_quad_cov(vectors x1, vectors x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_exp_quad_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 +
+
+

Dot product kernel

+

With bias \(\sigma_0\) the dot product kernel is:

+

\[ +k(\mathbf{x}_i, \mathbf{x}_j) = \sigma_0^2 + \mathbf{x}_i^T \mathbf{x}_j +\]

+ +

+

matrix gp_dot_prod_cov(array[] real x, real sigma)

+

Gaussian process covariance with dot product kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_dot_prod_cov(array[] real x1, array[] real x2, real sigma)

+

Gaussian process cross-covariance of x1 and x2 with dot product kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_dot_prod_cov(vectors x, real sigma)

+

Gaussian process covariance with dot product kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_dot_prod_cov(vectors x1, vectors x2, real sigma)

+

Gaussian process cross-covariance of x1 and x2 with dot product kernel in multiple dimensions.

+Available since 2.20 +
+
+

Exponential kernel

+

With magnitude \(\sigma\) and length scale \(l\), the exponential kernel is:

+

\[ +k(\mathbf{x}_i, \mathbf{x}_j) = \sigma^2 \exp \left( -\frac{|\mathbf{x}_i - \mathbf{x}_j|}{l} \right) +\]

+ +

+

matrix gp_exponential_cov(array[] real x, real sigma, real length_scale)

+

Gaussian process covariance with exponential kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_exponential_cov(array[] real x1, array[] real x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with exponential kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_exponential_cov(vectors x, real sigma, real length_scale)

+

Gaussian process covariance with exponential kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_exponential_cov(vectors x, real sigma, array[] real length_scale)

+

Gaussian process covariance with exponential kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 + +

+

matrix gp_exponential_cov(vectors x1, vectors x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with exponential kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_exponential_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with exponential kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 +
+
+

Matern 3/2 kernel

+

With magnitude \(\sigma\) and length scale \(l\), the Matern 3/2 kernel is:

+

\[ +k(\mathbf{x}_i, \mathbf{x}_j) = \sigma^2 \left( 1 + \frac{\sqrt{3}|\mathbf{x}_i - \mathbf{x}_j|}{l} \right) \exp \left( -\frac{\sqrt{3}|\mathbf{x}_i - \mathbf{x}_j|}{l} \right) +\]

+ +

+

matrix gp_matern32_cov(array[] real x, real sigma, real length_scale)

+

Gaussian process covariance with Matern 3/2 kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_matern32_cov(array[] real x1, array[] real x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_matern32_cov(vectors x, real sigma, real length_scale)

+

Gaussian process covariance with Matern 3/2 kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_matern32_cov(vectors x, real sigma, array[] real length_scale)

+

Gaussian process covariance with Matern 3/2 kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 + +

+

matrix gp_matern32_cov(vectors x1, vectors x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_matern32_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 +
+
+

Matern 5/2 kernel

+

With magnitude \(\sigma\) and length scale \(l\), the Matern 5/2 kernel is:

+

\[ +k(\mathbf{x}_i, \mathbf{x}_j) = \sigma^2 \left( 1 + \frac{\sqrt{5}|\mathbf{x}_i - \mathbf{x}_j|}{l} + \frac{5 |\mathbf{x}_i - \mathbf{x}_j|^2}{3l^2} \right) +\exp \left( -\frac{\sqrt{5} |\mathbf{x}_i - \mathbf{x}_j|}{l} \right) +\]

+ +

+

matrix gp_matern52_cov(array[] real x, real sigma, real length_scale)

+

Gaussian process covariance with Matern 5/2 kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_matern52_cov(array[] real x1, array[] real x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_matern52_cov(vectors x, real sigma, real length_scale)

+

Gaussian process covariance with Matern 5/2 kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_matern52_cov(vectors x, real sigma, array[] real length_scale)

+

Gaussian process covariance with Matern 5/2 kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 + +

+

matrix gp_matern52_cov(vectors x1, vectors x2, real sigma, real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_matern52_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)

+

Gaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 +
+
+

Periodic kernel

+

With magnitude \(\sigma\), length scale \(l\), and period \(p\), the periodic kernel is:

+

\[ +k(\mathbf{x}_i, \mathbf{x}_j) = \sigma^2 \exp \left(-\frac{2 \sin^2 \left( \pi \frac{|\mathbf{x}_i - \mathbf{x}_j|}{p} \right) }{l^2} \right) +\]

+ +

+

matrix gp_periodic_cov(array[] real x, real sigma, real length_scale, real period)

+

Gaussian process covariance with periodic kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_periodic_cov(array[] real x1, array[] real x2, real sigma, real length_scale, real period)

+

Gaussian process cross-covariance of x1 and x2 with periodic kernel in one dimension.

+Available since 2.20 + +

+

matrix gp_periodic_cov(vectors x, real sigma, real length_scale, real period)

+

Gaussian process covariance with periodic kernel in multiple dimensions.

+Available since 2.20 + +

+

matrix gp_periodic_cov(vectors x1, vectors x2, real sigma, real length_scale, real period)

+

Gaussian process cross-covariance of x1 and x2 with periodic kernel in multiple dimensions with a length scale for each dimension.

+Available since 2.20 +
+
+
+

Linear algebra functions and solvers

+
+

Matrix division operators and functions

+

In general, it is much more efficient and also more arithmetically stable to use matrix division than to multiply by an inverse. There are specialized forms for lower triangular matrices and for symmetric, positive-definite matrices.

+
+

Matrix division operators

+ +

+

row_vector operator/(row_vector b, matrix A)
The right division of b by A; equivalently b * inverse(A)

+Available since 2.0 + +

+

matrix operator/(matrix B, matrix A)
The right division of B by A; equivalently B * inverse(A)

+Available since 2.5 + +

+

vector operator\(matrix A, vector b)
The left division of A by b; equivalently inverse(A) * b

+Available since 2.18 + +

+

matrix operator\(matrix A, matrix B)
The left division of A by B; equivalently inverse(A) * B

+Available since 2.18 +
+
+

Lower-triangular matrix division functions

+

There are four division functions which use lower triangular views of a matrix. The lower triangular view of a matrix \(\text{tri}(A)\) is used in the definitions and defined by \[\begin{equation*} \text{tri}(A)[m,n] = \left\{ +\begin{array}{ll} A[m,n] & \text{if } m \geq n, \text{ and} \\[4pt] 0 +& \text{otherwise}. \end{array} \right. \end{equation*}\] When a lower triangular view of a matrix is used, the elements above the diagonal are ignored.

+ +

+

vector mdivide_left_tri_low(matrix A, vector b)
The left division of b by a lower-triangular view of A; algebraically equivalent to the less efficient and stable form inverse(tri(A)) * b, where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.

+Available since 2.12 + +

+

matrix mdivide_left_tri_low(matrix A, matrix B)
The left division of B by a triangular view of A; algebraically equivalent to the less efficient and stable form inverse(tri(A)) * B, where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.

+Available since 2.5 + +

+

row_vector mdivide_right_tri_low(row_vector b, matrix A)
The right division of b by a triangular view of A; algebraically equivalent to the less efficient and stable form b * inverse(tri(A)), where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.

+Available since 2.12 + +

+

matrix mdivide_right_tri_low(matrix B, matrix A)
The right division of B by a triangular view of A; algebraically equivalent to the less efficient and stable form B * inverse(tri(A)), where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.

+Available since 2.5 +
+
+
+

Symmetric positive-definite matrix division functions

+

There are four division functions which are specialized for efficiency and stability for symmetric positive-definite matrix dividends. If the matrix dividend argument is not symmetric and positive definite, these will reject and print warnings.

+ +

+

matrix mdivide_left_spd(matrix A, vector b)
The left division of b by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form inverse(A) * b.

+Available since 2.12 + +

+

vector mdivide_left_spd(matrix A, matrix B)
The left division of B by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form inverse(A) * B.

+Available since 2.12 + +

+

row_vector mdivide_right_spd(row_vector b, matrix A)
The right division of b by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form b *inverse(A).

+Available since 2.12 + +

+

matrix mdivide_right_spd(matrix B, matrix A)
The right division of B by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form B * inverse(A).

+Available since 2.12 +
+
+

Matrix exponential

+

The exponential of the matrix \(A\) is formally defined by the convergent power series: \[\begin{equation*} e^A = \sum_{n=0}^{\infty} \dfrac{A^n}{n!} +\end{equation*}\]

+ +

+

matrix matrix_exp(matrix A)
The matrix exponential of A

+Available since 2.13 + +

+

matrix matrix_exp_multiply(matrix A, matrix B)
The multiplication of matrix exponential of A and matrix B; algebraically equivalent to the less efficient form matrix_exp(A) * B.

+Available since 2.18 + +

+

matrix scale_matrix_exp_multiply(real t, matrix A, matrix B)
The multiplication of matrix exponential of tA and matrix B; algebraically equivalent to the less efficient form matrix_exp(t * A) * B.

+Available since 2.18 +
+
+

Matrix power

+

Returns the nth power of the specific matrix: \[\begin{equation*} M^n = M_1 * ... * M_n \end{equation*}\]

+ +

+

matrix matrix_power(matrix A, int B)
Matrix A raised to the power B.

+Available since 2.24 +
+
+

Linear algebra functions

+
+

Trace

+ +

+

real trace(matrix A)
The trace of A, or 0 if A is empty; A is not required to be diagonal

+Available since 2.0 +
+
+

Determinants

+ +

+

real determinant(matrix A)
The determinant of A

+Available since 2.0 + +

+

real log_determinant(matrix A)
The log of the absolute value of the determinant of A

+Available since 2.0 +

real log_determinant_spd(matrix A)
The log of the absolute value of the determinant of the symmetric, positive-definite matrix A.

+Available since 2.30 +
+
+

Inverses

+

It is almost never a good idea to use matrix inverses directly because they are both inefficient and arithmetically unstable compared to the alternatives. Rather than inverting a matrix m and post-multiplying by a vector or matrix a, as in inverse(m) * a, it is better to code this using matrix division, as in m \ a. The pre-multiplication case is similar, with b * inverse(m) being more efficiently coded as as b / m. There are also useful special cases for triangular and symmetric, positive-definite matrices that use more efficient solvers.

+

Warning: The function inv(m) is the elementwise inverse function, which returns 1 / m[i, j] for each element.

+ +

+

matrix inverse(matrix A)
Compute the inverse of A

+Available since 2.0 + +

+

matrix inverse_spd(matrix A)
Compute the inverse of A where A is symmetric, positive definite. This version is faster and more arithmetically stable when the input is symmetric and positive definite.

+Available since 2.0 + +

+

matrix chol2inv(matrix L)
Compute the inverse of the matrix whose cholesky factorization is L. That is, for \(A = L L^T\), return \(A^{-1}\).

+Available since 2.26 +
+
+

Generalized Inverse

+

The generalized inverse \(M^+\) of a matrix \(M\) is a matrix that satisfies \(M M^+ M = M\). For an invertible, square matrix \(M\), \(M^+\) is equivalent to \(M^{-1}\). The dimensions of \(M^+\) are equivalent to the dimensions of \(M^T\). The generalized inverse exists for any matrix, so the \(M\) may be singular or less than full rank.

+

Even though the generalized inverse exists for any arbitrary matrix, the derivatives of this function only exist on matrices of locally constant rank (Golub and Pereyra 1973), meaning, the derivatives do not exist if small perturbations make the matrix change rank. For example, considered the rank of the matrix \(A\) as a function of \(\epsilon\):

+

\[ +A = \left( + \begin{array}{cccc} + 1 + \epsilon & 2 & 1 \\ + 2 & 4 & 2 + \end{array} + \right) +\]

+

When \(\epsilon = 0\), \(A\) is rank 1 because the second row is twice the first (and so there is only one linearly independent row). If \(\epsilon \neq 0\), the rows are no longer linearly dependent, and the matrix is rank 2. This matrix does not have locally constant rank at \(\epsilon = 0\), and so the derivatives do not exist at zero. Because HMC depends on the derivatives existing, this lack of differentiability creates undefined behavior.

+ +

+

matrix generalized_inverse(matrix A)
The generalized inverse of A

+Available since 2.26 +
+
+

Eigendecomposition

+ +

+

complex_vector eigenvalues(matrix A)
The complex-valued vector of eigenvalues of the matrix A. The eigenvalues are repeated according to their algebraic multiplicity, so there are as many eigenvalues as rows in the matrix. The eigenvalues are not sorted in any particular order.

+Available since 2.30 + +

+

complex_matrix eigenvectors(matrix A)
The matrix with the complex-valued (column) eigenvectors of the matrix A in the same order as returned by the function eigenvalues

+Available since 2.30 + +

+

tuple(complex_matrix, complex_vector) eigendecompose(matrix A)
Return the matrix of (column) eigenvectors and vector of eigenvalues of the matrix A. This function is equivalent to (eigenvectors(A), eigenvalues(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 + +

+

vector eigenvalues_sym(matrix A)
The vector of eigenvalues of a symmetric matrix A in ascending order

+Available since 2.0 + +

+

matrix eigenvectors_sym(matrix A)
The matrix with the (column) eigenvectors of symmetric matrix A in the same order as returned by the function eigenvalues_sym

+Available since 2.0 + +

+

tuple(matrix, vector) eigendecompose_sym(matrix A)
Return the matrix of (column) eigenvectors and vector of eigenvalues of the symmetric matrix A. This function is equivalent to (eigenvectors_sym(A), eigenvalues_sym(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 +

Because multiplying an eigenvector by \(-1\) results in an eigenvector, eigenvectors returned by a decomposition are only identified up to a sign change. In order to compare the eigenvectors produced by Stan’s eigendecomposition to others, signs may need to be normalized in some way, such as by fixing the sign of a component, or doing comparisons allowing a multiplication by \(-1\).

+

The condition number of a symmetric matrix is defined to be the ratio of the largest eigenvalue to the smallest eigenvalue. Large condition numbers lead to difficulty in numerical algorithms such as computing inverses, and thus known as “ill conditioned.” The ratio can even be infinite in the case of singular matrices (i.e., those with eigenvalues of 0).

+
+
+

QR decomposition

+ +

+

matrix qr_thin_Q(matrix A)
The orthogonal matrix in the thin QR decomposition of A, which implies that the resulting matrix has the same dimensions as A

+Available since 2.18 + +

+

matrix qr_thin_R(matrix A)
The upper triangular matrix in the thin QR decomposition of A, which implies that the resulting matrix is square with the same number of columns as A

+Available since 2.18 + +

+

tuple(matrix, matrix) qr_thin(matrix A)
Returns both portions of the QR decomposition of A. The first element (“Q”) is the orthonormal matrix in the thin QR decomposition and the second element (“R”) is upper triangular. This function is equivalent to (qr_thin_Q(A), qr_thin_R(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 + +

+

matrix qr_Q(matrix A)
The orthogonal matrix in the fat QR decomposition of A, which implies that the resulting matrix is square with the same number of rows as A

+Available since 2.3 + +

+

matrix qr_R(matrix A)
The upper trapezoidal matrix in the fat QR decomposition of A, which implies that the resulting matrix will be rectangular with the same dimensions as A

+Available since 2.3 + +

tuple(matrix, matrix) qr(matrix A)
Returns both portions of the QR decomposition of A. The first element (“Q”) is the orthogonal matrix in the fat QR decomposition and the second element (“R”) is upper trapezoidal. This function is equivalent to (qr_Q(A), qr_R(A)) but with a lower computational cost due to the shared work between the two results.

+Available since 2.33 +

The thin QR decomposition is always preferable because it will consume much less memory when the input matrix is large than will the fat QR decomposition. Both versions of the decomposition represent the input matrix as \[\begin{equation*} A = Q \, R. \end{equation*}\] Multiplying a column of an orthogonal matrix by \(-1\) still results in an orthogonal matrix, and you can multiply the corresponding row of the upper trapezoidal matrix by \(-1\) without changing the product. Thus, Stan adopts the normalization that the diagonal elements of the upper trapezoidal matrix are strictly positive and the columns of the orthogonal matrix are reflected if necessary. Also, these QR decomposition algorithms do not utilize pivoting and thus may be numerically unstable on input matrices that have less than full rank.

+
+
+

Cholesky decomposition

+

Every symmetric, positive-definite matrix (such as a correlation or covariance matrix) has a Cholesky decomposition. If \(\Sigma\) is a symmetric, positive-definite matrix, its Cholesky decomposition is the lower-triangular vector \(L\) such that \[\begin{equation*} \Sigma = L \, L^{\top}. \end{equation*}\]

+ +

+

matrix cholesky_decompose(matrix A)
The lower-triangular Cholesky factor of the symmetric positive-definite matrix A

+Available since 2.0 +
+
+

Singular value decomposition

+

The matrix A can be decomposed into a diagonal matrix of singular values, D, and matrices of its left and right singular vectors, U and V, \[\begin{equation*} A = U D V^T. \end{equation*}\] The matrices of singular vectors here are thin. That is for an \(N\) by \(P\) input A, \(M = min(N, P)\), U is size \(N\) by \(M\) and V is size \(P\) by \(M\).

+ +

+

vector singular_values(matrix A)
The singular values of A in descending order

+Available since 2.0 + +

+

matrix svd_U(matrix A)
The left-singular vectors of A

+Available since 2.26 + +

+

matrix svd_V(matrix A)
The right-singular vectors of A

+Available since 2.26 + +

+

tuple(matrix, vector, matrix) svd(matrix A)
Returns a tuple containing the left-singular vectors of A, the singular values of A in descending order, and the right-singular values of A. This function is equivalent to (svd_U(A), singular_values(A), svd_V(A)) but with a lower computational cost due to the shared work between the different components.

+Available since 2.33 +
+
+
+
+

Sort functions

+

See the sorting functions section for examples of how the functions work.

+ +

+

vector sort_asc(vector v)
Sort the elements of v in ascending order

+Available since 2.0 + +

+

row_vector sort_asc(row_vector v)
Sort the elements of v in ascending order

+Available since 2.0 + +

+

vector sort_desc(vector v)
Sort the elements of v in descending order

+Available since 2.0 + +

+

row_vector sort_desc(row_vector v)
Sort the elements of v in descending order

+Available since 2.0 + +

+

array[] int sort_indices_asc(vector v)
Return an array of indices between 1 and the size of v, sorted to index v in ascending order.

+Available since 2.3 + +

+

array[] int sort_indices_asc(row_vector v)
Return an array of indices between 1 and the size of v, sorted to index v in ascending order.

+Available since 2.3 + +

+

array[] int sort_indices_desc(vector v)
Return an array of indices between 1 and the size of v, sorted to index v in descending order.

+Available since 2.3 + +

+

array[] int sort_indices_desc(row_vector v)
Return an array of indices between 1 and the size of v, sorted to index v in descending order.

+Available since 2.3 + +

+

int rank(vector v, int s)
Number of components of v less than v[s]

+Available since 2.0 + +

+

int rank(row_vector v, int s)
Number of components of v less than v[s]

+Available since 2.0 +
+
+

Reverse functions

+ +

+

vector reverse(vector v)
Return a new vector containing the elements of the argument in reverse order.

+Available since 2.23 + +

+

row_vector reverse(row_vector v)
Return a new row vector containing the elements of the argument in reverse order.

+Available since 2.23 + + + +
+
+ + + Back to top

References

+
+Golub, G. H., and V. Pereyra. 1973. “The Differentiation of Pseudo-Inverses and Nonlinear Least Squares Problems Whose Variables Separate.” SIAM Journal on Numerical Analysis 10 (2): 413–32. https://doi.org/10.1137/0710036. +
+

Footnotes

+ +
    +
  1. The softmax function is so called because in the limit as \(y_n \rightarrow \infty\) with \(y_m\) for \(m \neq n\) held constant, the result tends toward the “one-hot” vector \(\theta\) with \(\theta_n += 1\) and \(\theta_m = 0\) for \(m \neq n\), thus providing a “soft” version of the maximum function.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/mixed_operations.html b/docs/2_39/functions-reference/mixed_operations.html new file mode 100644 index 000000000..af4d23e2a --- /dev/null +++ b/docs/2_39/functions-reference/mixed_operations.html @@ -0,0 +1,1378 @@ + + + + + + + + + +Mixed Operations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Mixed Operations

+

These functions perform conversions between Stan containers matrix, vector, row vector and arrays.

+ +

+

matrix to_matrix(matrix m)
Return the matrix m itself.

+Available since 2.3 + +

+

complex_matrix to_matrix(complex_matrix m)
Return the matrix m itself.

+Available since 2.30 + +

+

matrix to_matrix(vector v)
Convert the column vector v to a size(v) by 1 matrix.

+Available since 2.3 + +

+

complex_matrix to_matrix(complex_vector v)
Convert the column vector v to a size(v) by 1 matrix.

+Available since 2.30 + +

+

matrix to_matrix(row_vector v)
Convert the row vector v to a 1 by size(v) matrix.

+Available since 2.3 + +

+

complex_matrix to_matrix(complex_row_vector v)
Convert the row vector v to a 1 by size(v) matrix.

+Available since 2.30 + +

+

matrix to_matrix(matrix M, int m, int n)
Convert a matrix A to a matrix with m rows and n columns filled in column-major order.

+Available since 2.15 + +

+

complex_matrix to_matrix(complex_matrix M, int m, int n)
Convert a matrix A to a matrix with m rows and n columns filled in column-major order.

+Available since 2.30 + +

+

matrix to_matrix(vector v, int m, int n)
Convert a vector v to a matrix with m rows and n columns filled in column-major order.

+Available since 2.15 + +

+

complex_matrix to_matrix(complex_vector v, int m, int n)
Convert a vector v to a matrix with m rows and n columns filled in column-major order.

+Available since 2.30 + +

+

matrix to_matrix(row_vector v, int m, int n)
Convert a row_vector v to a matrix with m rows and n columns filled in column-major order.

+Available since 2.15 + +

+

complex_matrix to_matrix(complex_row_vector v, int m, int n)
Convert a row vector v to a matrix with m rows and n columns filled in column-major order.

+Available since 2.30 + +

+

matrix to_matrix(matrix A, int m, int n, int col_major)
Convert a matrix A to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.15 + +

+

complex_matrix to_matrix(complex_matrix A, int m, int n, int col_major)
Convert a matrix A to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.30 + +

+

matrix to_matrix(vector v, int m, int n, int col_major)
Convert a vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.15 + +

+

complex_matrix to_matrix(complex_vector v, int m, int n, int col_major)
Convert a vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.30 + +

+

matrix to_matrix(row_vector v, int m, int n, int col_major)
Convert a row vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.15 + +

+

complex_matrix to_matrix(complex_row_vector v, int m, int n, int col_major)
Convert a row vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.30 + +

+

matrix to_matrix(array[] real a, int m, int n)
Convert a one-dimensional array a to a matrix with m rows and n columns filled in column-major order.

+Available since 2.15 + +

+

matrix to_matrix(array[] int a, int m, int n)
Convert a one-dimensional array a to a matrix with m rows and n columns filled in column-major order.

+Available since 2.15 + +

+

complex_matrix to_matrix(array[] complex a, int m, int n)
Convert a one-dimensional array a to a matrix with m rows and n columns filled in column-major order.

+Available since 2.30 + +

+

matrix to_matrix(array[] real a, int m, int n, int col_major)
Convert a one-dimensional array a to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.15 + +

+

matrix to_matrix(array[] int a, int m, int n, int col_major)
Convert a one-dimensional array a to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.15 + +

+

complex_matrix to_matrix(array[] complex a, int m, int n, int col_major)
Convert a one-dimensional array a to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).

+Available since 2.30 + +

+

matrix to_matrix(array[] row_vector vs)
Convert a one-dimensional array of row vectors to a matrix, where the size of the array is the number of rows of the resulting matrix and the length of row vectors is the number of columns.

+Available since 2.28 + +

+

complex_matrix to_matrix(array[] complex_row_vector vs)
Convert a one-dimensional array of row vectors to a matrix, where the size of the array is the number of rows of the resulting matrix and the length of row vectors is the number of columns.

+Available since 2.30 + +

+

matrix to_matrix(array[,] real a)
Convert the two dimensional array a to a matrix with the same dimensions and indexing order.

+Available since 2.3 + +

+

matrix to_matrix(array[,] int a)
Convert the two dimensional array a to a matrix with the same dimensions and indexing order. If any of the dimensions of a are zero, the result will be a \(0 \times 0\) matrix.

+Available since 2.3 + +

+

complex_matrix to_matrix(array[,] complex a )
Convert the two dimensional array a to a matrix with the same dimensions and indexing order.

+Available since 2.30 + +

+

vector to_vector(matrix m)
Convert the matrix m to a column vector in column-major order.

+Available since 2.0 + +

+

complex_vector to_vector(complex_matrix m)
Convert the matrix m to a column vector in column-major order.

+Available since 2.30 + +

+

vector to_vector(vector v)
Return the column vector v itself.

+Available since 2.3 + +

+

complex_vector to_vector(complex_vector v)
Return the column vector v itself.

+Available since 2.30 + +

+

vector to_vector(row_vector v)
Convert the row vector v to a column vector.

+Available since 2.3 + +

+

complex_vector to_vector(complex_row_vector v)
Convert the row vector v to a column vector.

+Available since 2.30 + +

+

vector to_vector(array[] real a)
Convert the one-dimensional array a to a column vector.

+Available since 2.3 + +

+

vector to_vector(array[] int a)
Convert the one-dimensional integer array a to a column vector.

+Available since 2.3 + +

+

complex_vector to_vector(array[] complex a)
Convert the one-dimensional complex array a to a column vector.

+Available since 2.30 + +

+

row_vector to_row_vector(matrix m)
Convert the matrix m to a row vector in column-major order.

+Available since 2.3 + +

+

complex_row_vector to_row_vector(complex_matrix m)
Convert the matrix m to a row vector in column-major order.

+Available since 2.30 + +

+

row_vector to_row_vector(vector v)
Convert the column vector v to a row vector.

+Available since 2.3 + +

+

complex_row_vector to_row_vector(complex_vector v)
Convert the column vector v to a row vector.

+Available since 2.30 + +

+

row_vector to_row_vector(row_vector v)
Return the row vector v itself.

+Available since 2.3 + +

+

complex_row_vector to_row_vector(complex_row_vector v)
Return the row vector v itself.

+Available since 2.30 + +

+

row_vector to_row_vector(array[] real a)
Convert the one-dimensional array a to a row vector.

+Available since 2.3 + +

+

row_vector to_row_vector(array[] int a)
Convert the one-dimensional array a to a row vector.

+Available since 2.3 + +

+

complex_row_vector to_row_vector(array[] complex a)
Convert the one-dimensional complex array a to a row vector.

+Available since 2.30 + +

+

array[,] real to_array_2d(matrix m)
Convert the matrix m to a two dimensional array with the same dimensions and indexing order.

+Available since 2.3 + +

+

array[,] complex to_array_2d(complex_matrix m)
Convert the matrix m to a two dimensional array with the same dimensions and indexing order.

+Available since 2.30 + +

+

array[] real to_array_1d(vector v)
Convert the column vector v to a one-dimensional array.

+Available since 2.3 + +

+

array[] complex to_array_1d(complex_vector v)
Convert the column vector v to a one-dimensional array.

+Available since 2.30 + +

+

array[] real to_array_1d(row_vector v)
Convert the row vector v to a one-dimensional array.

+Available since 2.3 + +

+

array[] complex to_array_1d(complex_row_vector v)
Convert the row vector v to a one-dimensional array.

+Available since 2.30 + +

+

array[] real to_array_1d(matrix m)
Convert the matrix m to a one-dimensional array in column-major order.

+Available since 2.3 + +

+

array[] real to_array_1d(complex_matrix m)
Convert the matrix m to a one-dimensional array in column-major order.

+Available since 2.30 + +

+

array[] real to_array_1d(array[...] real a)
Convert the array a (of any dimension up to 10) to a one-dimensional array in row-major order.

+Available since 2.3 + +

+

array[] int to_array_1d(array[...] int a)
Convert the array a (of any dimension up to 10) to a one-dimensional array in row-major order.

+Available since 2.3 + +

+

array[] complex to_array_1d(array[...] complex a)
Convert the array a (of any dimension up to 10) to a one-dimensional array in row-major order.

+Available since 2.30 + + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/multivariate_discrete_distributions.html b/docs/2_39/functions-reference/multivariate_discrete_distributions.html new file mode 100644 index 000000000..007d82fcc --- /dev/null +++ b/docs/2_39/functions-reference/multivariate_discrete_distributions.html @@ -0,0 +1,1273 @@ + + + + + + + + + +Multivariate Discrete Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Multivariate Discrete Distributions

+

The multivariate discrete distributions are over multiple integer values, which are expressed in Stan as arrays.

+
+

Multinomial distribution

+
+

Probability mass function

+

If \(K \in \mathbb{N}\), \(N \in \mathbb{N}\), and \(\theta \in \text{$K$-simplex}\), then for \(y \in \mathbb{N}^K\) such that \(\sum_{k=1}^K y_k = N\), \[\begin{equation*} +\text{Multinomial}(y|\theta) = \binom{N}{y_1,\ldots,y_K} \prod_{k=1}^K \theta_k^{y_k}, +\end{equation*}\] where the multinomial coefficient is defined by \[\begin{equation*} +\binom{N}{y_1,\ldots,y_k} = \frac{N!}{\prod_{k=1}^K y_k!}. +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ multinomial(theta)

+

Increment target log probability density with multinomial_lupmf(y | theta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real multinomial_lpmf(array[] int y | vector theta)
The log multinomial probability mass function with outcome array y of size \(K\) given the \(K\)-simplex distribution parameter theta and (implicit) total count N = sum(y)

+Available since 2.12 + +

+

real multinomial_lupmf(array[] int y | vector theta)
The log multinomial probability mass function with outcome array y of size \(K\) given the \(K\)-simplex distribution parameter theta and (implicit) total count N = sum(y) dropping constant additive terms

+Available since 2.25 + +

+

array[] int multinomial_rng(vector theta, int N)
Generate a multinomial variate with simplex distribution parameter theta and total count \(N\); may only be used in transformed data and generated quantities blocks

+Available since 2.8 +
+
+
+

Multinomial distribution, logit parameterization

+

Stan also provides a version of the multinomial probability mass function distribution with the \(\text{$K$-simplex}\) for the event count probabilities per category given on the unconstrained logistic scale.

+
+

Probability mass function

+

If \(K \in \mathbb{N}\), \(N \in \mathbb{N}\), and \(\text{softmax}(\theta) \in \text{$K$-simplex}\), then for \(y \in \mathbb{N}^K\) such that \(\sum_{k=1}^K y_k = N\), \[\begin{equation*} +\begin{split} +\text{MultinomialLogit}(y \mid \gamma) & = \text{Multinomial}(y \mid \text{softmax}(\gamma)) \\ +& = \binom{N}{y_1,\ldots,y_K} \prod_{k=1}^K [\text{softmax}(\gamma_k)]^{y_k}, +\end{split} +\end{equation*}\] where the multinomial coefficient is defined by \[\begin{equation*} +\binom{N}{y_1,\ldots,y_k} = \frac{N!}{\prod_{k=1}^K y_k!}. +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ multinomial_logit(gamma)

+

Increment target log probability density with multinomial_logit_lupmf(y | gamma).

+Available since 2.24 + +

+
+
+

Stan functions

+ +

+

real multinomial_logit_lpmf(array[] int y | vector gamma)
The log multinomial probability mass function with outcome array y of size \(K\) given the log \(K\)-simplex distribution parameter \(\gamma\) and (implicit) total count N = sum(y)

+Available since 2.24 + +

+

real multinomial_logit_lupmf(array[] int y | vector gamma)
The log multinomial probability mass function with outcome array y of size \(K\) given the log \(K\)-simplex distribution parameter \(\gamma\) and (implicit) total count N = sum(y) dropping constant additive terms

+Available since 2.25 + +

+

array[] int multinomial_logit_rng(vector gamma, int N)
Generate a variate from a multinomial distribution with probabilities softmax(gamma) and total count N; may only be used in transformed data and generated quantities blocks.

+Available since 2.24 +
+
+
+

Dirichlet-multinomial distribution

+

Stan also provides the Dirichlet-multinomial distribution, which generalizes the Beta-binomial distribution to more than two categories. As such, it is an overdispersed version of the multinomial distribution.

+
+

Probability mass function

+

If \(K \in \mathbb{N}\), \(N \in \mathbb{N}\), and \(\alpha \in +\mathbb{R}_{+}^K\), then for \(y \in \mathbb{N}^K\) such that \(\sum_{k=1}^K y_k = N\), the PMF of the Dirichlet-multinomial distribution is defined as \[\begin{equation*} +\text{DirMult}(y|\theta) = +\frac{\Gamma(\alpha_0)\Gamma(N+1)}{\Gamma(N+\alpha_0)} \prod_{k=1}^K \frac{\Gamma(y_k + \alpha_k)}{\Gamma(\alpha_k)\Gamma(y_k+1)}, +\end{equation*}\] where \(\alpha_0\) is defined as \(\alpha_0 = \sum_{k=1}^K \alpha_k\).

+
+
+

Distribution statement

+

y ~ dirichlet_multinomial(alpha)

+

Increment target log probability density with dirichlet_multinomial_lupmf(y | alpha).

+Available since 2.34 + +

+
+
+

Stan functions

+ +

+

real dirichlet_multinomial_lpmf(array[] int y | vector alpha)
The log multinomial probability mass function with outcome array y with \(K\) elements given the positive \(K\)-vector distribution parameter alpha and (implicit) total count N = sum(y).

+Available since 2.34 + +

+

real dirichlet_multinomial_lupmf(array[] int y | vector alpha)
The log multinomial probability mass function with outcome array y with \(K\) elements, given the positive \(K\)-vector distribution parameter alpha and (implicit) total count N = sum(y) dropping constant additive terms.

+Available since 2.34 + +

+

array[] int dirichlet_multinomial_rng(vector alpha, int N)
Generate a multinomial variate with positive vector distribution parameter alpha and total count N; may only be used in transformed data and generated quantities blocks. This is equivalent to multinomial_rng(dirichlet_rng(alpha), N).

+Available since 2.34 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/positive_continuous_distributions.html b/docs/2_39/functions-reference/positive_continuous_distributions.html new file mode 100644 index 000000000..6c27f5814 --- /dev/null +++ b/docs/2_39/functions-reference/positive_continuous_distributions.html @@ -0,0 +1,1699 @@ + + + + + + + + + +Positive Continuous Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Positive Continuous Distributions

+

The positive continuous probability functions have support on the positive real numbers.

+
+

Lognormal distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in +\mathbb{R}^+\), \[\begin{equation*} \text{LogNormal}(y|\mu,\sigma) = \frac{1}{\sqrt{2 +\pi} \ \sigma} \, \frac{1}{y} \ \exp \! \left( - \, \frac{1}{2} +\, \left( \frac{\log y - \mu}{\sigma} \right)^2 \right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ lognormal(mu, sigma)

+

Increment target log probability density with lognormal_lupdf(y | mu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real lognormal_lpdf(reals y | reals mu, reals sigma)
The log of the lognormal density of y given location mu and scale sigma

+Available since 2.12 + +

+

real lognormal_lupdf(reals y | reals mu, reals sigma)
The log of the lognormal density of y given location mu and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real lognormal_cdf(reals y | reals mu, reals sigma)
The cumulative lognormal distribution function of y given location mu and scale sigma

+Available since 2.0 + +

+

real lognormal_lcdf(reals y | reals mu, reals sigma)
The log of the lognormal cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

real lognormal_lccdf(reals y | reals mu, reals sigma)
The log of the lognormal complementary cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

R lognormal_rng(reals mu, reals sigma)
Generate a lognormal variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.22 +
+
+
+

Chi-square distribution

+
+

Probability density function

+

If \(\nu \in \mathbb{R}^+\), then for \(y \in \mathbb{R}^+\), \[\begin{equation*} +\text{ChiSquare}(y|\nu) = \frac{2^{-\nu/2}} {\Gamma(\nu / 2)} \, +y^{\nu/2 - 1} \, \exp \! \left( -\, \frac{1}{2} \, y \right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ chi_square(nu)

+

Increment target log probability density with chi_square_lupdf(y | nu).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real chi_square_lpdf(reals y | reals nu)
The log of the Chi-square density of y given degrees of freedom nu

+Available since 2.12 + +

+

real chi_square_lupdf(reals y | reals nu)
The log of the Chi-square density of y given degrees of freedom nu dropping constant additive terms

+Available since 2.25 + +

+

real chi_square_cdf(reals y | reals nu)
The Chi-square cumulative distribution function of y given degrees of freedom nu

+Available since 2.0 + +

+

real chi_square_lcdf(reals y | reals nu)
The log of the Chi-square cumulative distribution function of y given degrees of freedom nu

+Available since 2.12 + +

+

real chi_square_lccdf(reals y | reals nu)
The log of the complementary Chi-square cumulative distribution function of y given degrees of freedom nu

+Available since 2.12 + +

+

R chi_square_rng(reals nu)
Generate a Chi-square variate with degrees of freedom nu; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Inverse chi-square distribution

+
+

Probability density function

+

If \(\nu \in \mathbb{R}^+\), then for \(y \in \mathbb{R}^+\), \[\begin{equation*} +\text{InvChiSquare}(y \, | \, \nu) = \frac{2^{-\nu/2}} {\Gamma(\nu +/ 2)} \, y^{-\nu/2 - 1} \, \exp\! \left( \! - \, \frac{1}{2} \, +\frac{1}{y} \right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ inv_chi_square(nu)

+

Increment target log probability density with inv_chi_square_lupdf(y | nu).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real inv_chi_square_lpdf(reals y | reals nu)
The log of the inverse Chi-square density of y given degrees of freedom nu

+Available since 2.12 + +

+

real inv_chi_square_lupdf(reals y | reals nu)
The log of the inverse Chi-square density of y given degrees of freedom nu dropping constant additive terms

+Available since 2.25 + +

+

real inv_chi_square_cdf(reals y | reals nu)
The inverse Chi-squared cumulative distribution function of y given degrees of freedom nu

+Available since 2.0 + +

+

real inv_chi_square_lcdf(reals y | reals nu)
The log of the inverse Chi-squared cumulative distribution function of y given degrees of freedom nu

+Available since 2.12 + +

+

real inv_chi_square_lccdf(reals y | reals nu)
The log of the inverse Chi-squared complementary cumulative distribution function of y given degrees of freedom nu

+Available since 2.12 + +

+

R inv_chi_square_rng(reals nu)
Generate an inverse Chi-squared variate with degrees of freedom nu; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Scaled inverse chi-square distribution

+
+

Probability density function

+

If \(\nu \in \mathbb{R}^+\) and \(\sigma \in \mathbb{R}^+\), then for \(y +\in \mathbb{R}^+\), \[\begin{equation*} \text{ScaledInvChiSquare}(y|\nu,\sigma) = +\frac{(\nu / 2)^{\nu/2}} {\Gamma(\nu / 2)} \, \sigma^{\nu} \, +y^{-(\nu/2 + 1)} \, \exp \! \left( \! - \, \frac{1}{2} \, \nu \, +\sigma^2 \, \frac{1}{y} \right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ scaled_inv_chi_square(nu, sigma)

+

Increment target log probability density with scaled_inv_chi_square_lupdf(y | nu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real scaled_inv_chi_square_lpdf(reals y | reals nu, reals sigma)
The log of the scaled inverse Chi-square density of y given degrees of freedom nu and scale sigma

+Available since 2.12 + +

+

real scaled_inv_chi_square_lupdf(reals y | reals nu, reals sigma)
The log of the scaled inverse Chi-square density of y given degrees of freedom nu and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real scaled_inv_chi_square_cdf(reals y | reals nu, reals sigma)
The scaled inverse Chi-square cumulative distribution function of y given degrees of freedom nu and scale sigma

+Available since 2.0 + +

+

real scaled_inv_chi_square_lcdf(reals y | reals nu, reals sigma)
The log of the scaled inverse Chi-square cumulative distribution function of y given degrees of freedom nu and scale sigma

+Available since 2.12 + +

+

real scaled_inv_chi_square_lccdf(reals y | reals nu, reals sigma)
The log of the scaled inverse Chi-square complementary cumulative distribution function of y given degrees of freedom nu and scale sigma

+Available since 2.12 + +

+

R scaled_inv_chi_square_rng(reals nu, reals sigma)
Generate a scaled inverse Chi-squared variate with degrees of freedom nu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Exponential distribution

+
+

Probability density function

+

If inverse scale (rate) \(\beta \in \mathbb{R}^+\), then for \(y \in \mathbb{R}^+\), \[\begin{equation*} +\text{Exponential}(y|\beta) = \beta \, \exp ( - \beta \, y ) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ exponential(beta)

+

Increment target log probability density with exponential_lupdf(y | beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real exponential_lpdf(reals y | reals beta)
The log of the exponential density of y given inverse scale beta

+Available since 2.12 + +

+

real exponential_lupdf(reals y | reals beta)
The log of the exponential density of y given inverse scale beta dropping constant additive terms

+Available since 2.25 + +

+

real exponential_cdf(reals y | reals beta)
The exponential cumulative distribution function of y given inverse scale beta

+Available since 2.0 + +

+

real exponential_lcdf(reals y | reals beta)
The log of the exponential cumulative distribution function of y given inverse scale beta

+Available since 2.12 + +

+

real exponential_lccdf(reals y | reals beta)
The log of the exponential complementary cumulative distribution function of y given inverse scale beta

+Available since 2.12 + +

+

R exponential_rng(reals beta)
Generate an exponential variate with inverse scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Gamma distribution

+
+

Probability density function

+

If the shape parameter \(\alpha \in \mathbb{R}^+\) and the rate (or inverse scale) parameter \(\beta \in \mathbb{R}^+\), then for \(y +\in \mathbb{R}^+\), \[\begin{equation*} \text{Gamma}(y|\alpha,\beta) = +\frac{\beta^{\alpha}} {\Gamma(\alpha)} \, y^{\alpha - 1} +\exp(-\beta \, y) . \end{equation*}\]

+

Under the shape and rate formulation of the Gamma distribution, \(\mathbb{E}[y] = \alpha / \beta\) and \(\textrm{var}[y] = \alpha / \beta^2\).

+
+
+

Distribution statement

+

y ~ gamma(alpha, beta)

+

Increment target log probability density with gamma_lupdf(y | alpha, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real gamma_lpdf(reals y | reals alpha, reals beta)
The log of the gamma density of y given shape alpha and inverse scale beta

+Available since 2.12 + +

+

real gamma_lupdf(reals y | reals alpha, reals beta)
The log of the gamma density of y given shape alpha and inverse scale beta dropping constant additive terms

+Available since 2.25 + +

+

real gamma_cdf(reals y | reals alpha, reals beta)
The cumulative gamma distribution function of y given shape alpha and inverse scale beta

+Available since 2.0 + +

+

real gamma_lcdf(reals y | reals alpha, reals beta)
The log of the cumulative gamma distribution function of y given shape alpha and inverse scale beta

+Available since 2.12 + +

+

real gamma_lccdf(reals y | reals alpha, reals beta)
The log of the complementary cumulative gamma distribution function of y given shape alpha and inverse scale beta

+Available since 2.12 + +

+

R gamma_rng(reals alpha, reals beta)
Generate a gamma variate with shape alpha and inverse scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Inverse gamma Distribution

+
+

Probability density function

+

If \(\alpha \in \mathbb{R}^+\) and \(\beta \in \mathbb{R}^+\), then for \(y +\in \mathbb{R}^+\), \[\begin{equation*} \text{InvGamma}(y|\alpha,\beta) = +\frac{\beta^{\alpha}} {\Gamma(\alpha)} \ y^{-(\alpha + 1)} \, +\exp \! \left( \! - \beta \, \frac{1}{y} \right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ inv_gamma(alpha, beta)

+

Increment target log probability density with inv_gamma_lupdf(y | alpha, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real inv_gamma_lpdf(reals y | reals alpha, reals beta)
The log of the inverse gamma density of y given shape alpha and scale beta

+Available since 2.12 + +

+

real inv_gamma_lupdf(reals y | reals alpha, reals beta)
The log of the inverse gamma density of y given shape alpha and scale beta dropping constant additive terms

+Available since 2.25 + +

+

real inv_gamma_cdf(reals y | reals alpha, reals beta)
The inverse gamma cumulative distribution function of y given shape alpha and scale beta

+Available since 2.0 + +

+

real inv_gamma_lcdf(reals y | reals alpha, reals beta)
The log of the inverse gamma cumulative distribution function of y given shape alpha and scale beta

+Available since 2.12 + +

+

real inv_gamma_lccdf(reals y | reals alpha, reals beta)
The log of the inverse gamma complementary cumulative distribution function of y given shape alpha and scale beta

+Available since 2.12 + +

+

R inv_gamma_rng(reals alpha, reals beta)
Generate an inverse gamma variate with shape alpha and scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Weibull distribution

+
+

Probability density function

+

If \(\alpha \in \mathbb{R}^+\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in [0,\infty)\), \[\begin{equation*} \text{Weibull}(y|\alpha,\sigma) = +\frac{\alpha}{\sigma} \, \left( \frac{y}{\sigma} \right)^{\alpha - 1} +\, \exp \! \left( \! - \left( \frac{y}{\sigma} \right)^{\alpha} +\right) . \end{equation*}\]

+

Note that if \(Y \propto \text{Weibull}(\alpha,\sigma)\), then \(Y^{-1} +\propto \text{Frechet}(\alpha,\sigma^{-1})\).

+
+
+

Distribution statement

+

y ~ weibull(alpha, sigma)

+

Increment target log probability density with weibull_lupdf(y | alpha, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real weibull_lpdf(reals y | reals alpha, reals sigma)
The log of the Weibull density of y given shape alpha and scale sigma

+Available since 2.12 + +

+

real weibull_lupdf(reals y | reals alpha, reals sigma)
The log of the Weibull density of y given shape alpha and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real weibull_cdf(reals y | reals alpha, reals sigma)
The Weibull cumulative distribution function of y given shape alpha and scale sigma

+Available since 2.0 + +

+

real weibull_lcdf(reals y | reals alpha, reals sigma)
The log of the Weibull cumulative distribution function of y given shape alpha and scale sigma

+Available since 2.12 + +

+

real weibull_lccdf(reals y | reals alpha, reals sigma)
The log of the Weibull complementary cumulative distribution function of y given shape alpha and scale sigma

+Available since 2.12 + +

+

R weibull_rng(reals alpha, reals sigma)
Generate a weibull variate with shape alpha and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Frechet distribution

+
+

Probability density function

+

If \(\alpha \in \mathbb{R}^+\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in \mathbb{R}^+\), \[\begin{equation*} \text{Frechet}(y|\alpha,\sigma) = +\frac{\alpha}{\sigma} \, \left( \frac{y}{\sigma} \right)^{-\alpha - 1} +\, \exp \! \left( \! - \left( \frac{y}{\sigma} \right)^{-\alpha} +\right) . \end{equation*}\]

+

Note that if \(Y \propto \text{Frechet}(\alpha,\sigma)\), then \(Y^{-1} +\propto \text{Weibull}(\alpha,\sigma^{-1})\).

+
+
+

Distribution statement

+

y ~ frechet(alpha, sigma)

+

Increment target log probability density with frechet_lupdf(y | alpha, sigma).

+Available since 2.5 + +

+
+
+

Stan functions

+ +

+

real frechet_lpdf(reals y | reals alpha, reals sigma)
The log of the Frechet density of y given shape alpha and scale sigma

+Available since 2.12 + +

+

real frechet_lupdf(reals y | reals alpha, reals sigma)
The log of the Frechet density of y given shape alpha and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real frechet_cdf(reals y | reals alpha, reals sigma)
The Frechet cumulative distribution function of y given shape alpha and scale sigma

+Available since 2.5 + +

+

real frechet_lcdf(reals y | reals alpha, reals sigma)
The log of the Frechet cumulative distribution function of y given shape alpha and scale sigma

+Available since 2.12 + +

+

real frechet_lccdf(reals y | reals alpha, reals sigma)
The log of the Frechet complementary cumulative distribution function of y given shape alpha and scale sigma

+Available since 2.12 + +

+

R frechet_rng(reals alpha, reals sigma)
Generate a Frechet variate with shape alpha and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Rayleigh distribution

+
+

Probability density function

+

If \(\sigma \in \mathbb{R}^+\), then for \(y \in [0,\infty)\), \[\begin{equation*} +\text{Rayleigh}(y|\sigma) = \frac{y}{\sigma^2} \exp(-y^2 / 2\sigma^2) +\!. \end{equation*}\]

+
+
+

Distribution statement

+

y ~ rayleigh(sigma)

+

Increment target log probability density with rayleigh_lupdf(y | sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real rayleigh_lpdf(reals y | reals sigma)
The log of the Rayleigh density of y given scale sigma

+Available since 2.12 + +

+

real rayleigh_lupdf(reals y | reals sigma)
The log of the Rayleigh density of y given scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real rayleigh_cdf(real y | real sigma)
The Rayleigh cumulative distribution of y given scale sigma

+Available since 2.0 + +

+

real rayleigh_lcdf(real y | real sigma)
The log of the Rayleigh cumulative distribution of y given scale sigma

+Available since 2.12 + +

+

real rayleigh_lccdf(real y | real sigma)
The log of the Rayleigh complementary cumulative distribution of y given scale sigma

+Available since 2.12 + +

+

R rayleigh_rng(reals sigma)
Generate a Rayleigh variate with scale sigma; may only be used in generated quantities block. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Log-logistic distribution

+
+

Probability density function

+

If \(\alpha, \beta \in \mathbb{R}^+\), then for \(y \in \mathbb{R}^+\), \[\begin{equation*} +\text{Log-Logistic}(y|\alpha,\beta) = +\frac{\ \left(\frac{\beta}{\alpha}\right) \left(\frac{y}{\alpha}\right)^{\beta-1}\ }{\left(1 + \left(\frac{y}{\alpha}\right)^\beta\right)^2} . +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ loglogistic(alpha, beta)

+

Increment target log probability density with unnormalized version of loglogistic_lpdf(y | alpha, beta)

+Available since 2.29 + +

+
+
+

Stan functions

+ +

+

real loglogistic_lpdf(reals y | reals alpha, reals beta)
The log of the log-logistic density of y given scale alpha and shape beta

+Available since 2.29 + +

+

real loglogistic_cdf(reals y | reals alpha, reals beta)
The log-logistic cumulative distribution function of y given scale alpha and shape beta

+Available since 2.29 + +

+

R loglogistic_rng(reals alpha, reals beta)
Generate a log-logistic variate with scale alpha and shape beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.29 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/positive_lower-bounded_distributions.html b/docs/2_39/functions-reference/positive_lower-bounded_distributions.html new file mode 100644 index 000000000..aa3c8aca2 --- /dev/null +++ b/docs/2_39/functions-reference/positive_lower-bounded_distributions.html @@ -0,0 +1,1452 @@ + + + + + + + + + +Positive Lower-Bounded Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Positive Lower-Bounded Distributions

+

The positive lower-bounded probabilities have support on real values above some positive minimum value.

+
+

Pareto distribution

+
+

Probability density function

+

If \(y_{\text{min}} \in \mathbb{R}^+\) and \(\alpha \in \mathbb{R}^+\), then for \(y \in \mathbb{R}^+\) with \(y \geq y_{\text{min}}\), \[\begin{equation*} +\text{Pareto}(y|y_{\text{min}},\alpha) = \frac{\displaystyle \alpha\,y_{\text{min}}^\alpha}{\displaystyle y^{\alpha+1}}. +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ pareto(y_min, alpha)

+

Increment target log probability density with pareto_lupdf(y | y_min, alpha).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real pareto_lpdf(reals y | reals y_min, reals alpha)
The log of the Pareto density of y given positive minimum value y_min and shape alpha

+Available since 2.12 + +

+

real pareto_lupdf(reals y | reals y_min, reals alpha)
The log of the Pareto density of y given positive minimum value y_min and shape alpha dropping constant additive terms

+Available since 2.25 + +

+

real pareto_cdf(reals y | reals y_min, reals alpha)
The Pareto cumulative distribution function of y given positive minimum value y_min and shape alpha

+Available since 2.0 + +

+

real pareto_lcdf(reals y | reals y_min, reals alpha)
The log of the Pareto cumulative distribution function of y given positive minimum value y_min and shape alpha

+Available since 2.12 + +

+

real pareto_lccdf(reals y | reals y_min, reals alpha)
The log of the Pareto complementary cumulative distribution function of y given positive minimum value y_min and shape alpha

+Available since 2.12 + +

+

R pareto_rng(reals y_min, reals alpha)
Generate a Pareto variate with positive minimum value y_min and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Pareto type 2 distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\), \(\lambda \in \mathbb{R}^+\), and \(\alpha \in \mathbb{R}^+\), then for \(y \geq \mu\), \[\begin{equation*} +\mathrm{Pareto\_Type\_2}(y|\mu,\lambda,\alpha) = +\ \frac{\alpha}{\lambda} \, \left( 1+\frac{y-\mu}{\lambda} \right)^{-(\alpha+1)} \! . +\end{equation*}\]

+

Note that the Lomax distribution is a Pareto Type 2 distribution with \(\mu=0\).

+
+
+

Distribution statement

+

y ~ pareto_type_2(mu, lambda, alpha)

+

Increment target log probability density with pareto_type_2_lupdf(y | mu, lambda, alpha).

+Available since 2.5 + +

+
+
+

Stan functions

+ +

+

real pareto_type_2_lpdf(reals y | reals mu, reals lambda, reals alpha)
The log of the Pareto Type 2 density of y given location mu, scale lambda, and shape alpha

+Available since 2.18 + +

+

real pareto_type_2_lupdf(reals y | reals mu, reals lambda, reals alpha)
The log of the Pareto Type 2 density of y given location mu, scale lambda, and shape alpha dropping constant additive terms

+Available since 2.25 + +

+

real pareto_type_2_cdf(reals y | reals mu, reals lambda, reals alpha)
The Pareto Type 2 cumulative distribution function of y given location mu, scale lambda, and shape alpha

+Available since 2.5 + +

+

real pareto_type_2_lcdf(reals y | reals mu, reals lambda, reals alpha)
The log of the Pareto Type 2 cumulative distribution function of y given location mu, scale lambda, and shape alpha

+Available since 2.18 + +

+

real pareto_type_2_lccdf(reals y | reals mu, reals lambda, reals alpha)
The log of the Pareto Type 2 complementary cumulative distribution function of y given location mu, scale lambda, and shape alpha

+Available since 2.18 + +

+

R pareto_type_2_rng(reals mu, reals lambda, reals alpha)
Generate a Pareto Type 2 variate with location mu, scale lambda, and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Wiener First Passage Time Distribution

+

For an extended explanation of how to use the wiener_lpdf and wiener_l[c]cdf_unnorm functions, see Henrich et al. (2024).

+
+

Probability density function

+

If \(\alpha \in \mathbb{R}^+\), \(\tau \in \mathbb{R}^+\), \(\beta \in (0, 1)\), \(\delta \in \mathbb{R}\), \(s_{\delta} \in \mathbb{R}^{\geq 0}\), \(s_{\beta} \in [0, 1)\), and \(s_{\tau} \in \mathbb{R}^{\geq 0}\) then for \(y > \tau\),

+

\[\begin{equation*} +\begin{split} +&\text{Wiener}(y\mid \alpha,\tau,\beta,\delta,s_{\delta},s_{\beta},s_{\tau}) = +\\ +&\frac{1}{s_{\tau}}\int_{\tau}^{\tau+s_{\tau}}\frac{1}{s_{\beta}}\int_{\beta-\frac{1}{2}s_{\beta}}^{\beta+\frac{1}{2}s_{\beta}}\int_{-\infty}^{\infty} p_3(y-{\tau_0}\mid \alpha,\nu,\omega) +\\ +&\times \frac{1}{\sqrt{2\pi s_{\delta}^2}}\exp\Bigl(-\frac{(\nu-\delta)^2}{2s_{\delta}^2}\Bigr) \,d\nu \,d\omega \,d{\tau_0}= +\\ +&\frac{1}{s_{\tau}}\int_{\tau}^{\tau+s_{\tau}}\frac{1}{s_{\beta}}\int_{\beta-\frac{1}{2}s_{\beta}}^{\beta+\frac{1}{2}s_{\beta}} M\times p_3(y-{\tau_0}\mid \alpha,\nu,\omega) \,d\omega \,d{\tau_0}, +\end{split} +\end{equation*}\]

+

where \(p()\) denotes the density function, and \(M\) and \(p_3()\) are defined, by using \(t:=y-{\tau_0}\), as

+

\[\begin{equation*} +M \coloneqq \frac{1}{\sqrt{1+s_{\delta}^2t}}\exp\Bigl(\alpha{\delta}\omega+\frac{\delta^2t}{2}+\frac{s_{\delta}^2\alpha^2\omega^2-2\alpha{\delta}\omega-\delta^2t}{2(1+s_{\delta}^2t)}\Bigr)\text{ and} +\end{equation*}\]

+

\[\begin{equation*} +p_3(t\mid \alpha,\delta,\beta) \coloneqq \frac{1}{\alpha^2}\exp\Bigl(-\alpha\delta\beta-\frac{\delta^2t}{2}\Bigr)f(\frac{t}{\alpha^2}\mid 0,1,\beta), +\end{equation*}\]

+

where \(f(t^*=\frac{t}{\alpha^2}\mid0,1,\beta)\) can be specified in two ways:

+

\[\begin{equation*} +f_l(t^*\mid 0,1,\beta) = \sum_{k=1}^\infty k\pi \exp\Bigl(-\frac{k^2\pi^2t^*}{2}\Bigr)\sin(k\pi \beta)\text{ and} +\end{equation*}\]

+

\[\begin{equation*} +f_s(t^*\mid0,1,\beta) = \sum_{k=-\infty}^\infty \frac{1}{\sqrt{2\pi(t^*)^3}}(\beta+2k) \exp\Bigl(-\frac{(\beta+2k)^2}{2t^*}\Bigr). +\end{equation*}\]

+

Which of these is used in the computations depends on which expression requires the smaller number of components \(k\) to guarantee a pre-specified precision

+

In the case where \(s_{\delta}\), \(s_{\beta}\), and \(s_{\tau}\) are all \(0\), this simplifies to one representation that converges fast for small reaction-time values (“small time expansion”): \[\begin{equation*} +\text{Wiener}(y|\alpha, \tau, \beta, \delta) = +\frac{\alpha}{(y-\tau)^{3/2}} \exp \! \left(- \delta \alpha \beta - +\frac{\delta^2(y-\tau)}{2}\right) \sum_{k = - \infty}^{\infty} (2k + +\beta) \phi \! \left(\frac{(2k + \beta)\alpha }{\sqrt{y - \tau}}\right), +\end{equation*}\] where \(\phi(x)\) denotes the standard normal density function, and one representation that converges fast for large reaction-time values (“large time expansion”): \[\begin{equation*} +\text{Wiener}(y|\alpha, \tau, \beta, \delta) = +\frac{\pi}{\alpha^2} \exp \! \left(- \delta \alpha \beta - +\frac{\delta^2(y-\tau)}{2}\right) \sum_{k = 1}^{\infty} k \exp \! \left(-\frac{k^2\pi^2(y-\tau)}; +{2\alpha^2}\right) \sin \!(k\pi\beta) +\end{equation*}\] see (Feller 1968), (Navarro and Fuss 2009).

+
+
+

Cumulative distribution function

+

For the cumulative distribution function (cdf) there also exist two expressions depending on the reaction time.

+

Let \(\alpha\), \(\tau\), \(\beta\), \(\delta\), \(s_{\delta}\), \(s_{\beta}\), \(s_{\tau}\) and \(y\) be as above.

+

The formula for the large-time cdf of decision times (excluding the additive reaction time components summarized in \(\tau\) for the time being) at the upper boundary is stated as follows:

+

\[\begin{equation} +F(y|\alpha, \beta, \delta) = P(\alpha, \beta, \delta) - + \exp\left(\delta\alpha(1-\beta)-\frac{\delta^2 y}{2}\right)F_l(y|\alpha,\beta,\delta), +\end{equation}\] where \(P(\alpha,\beta,\delta)\) is the probability to hit the upper boundary, defined as

+

\[\begin{equation} +P(\alpha, \beta, \delta) = +\begin{cases} + \frac{1-\exp(2\delta \alpha \beta)}{\exp(-2\delta \alpha(1-\beta)) - \exp(2\delta \alpha \beta)}, & \text{for } \delta\neq 0 \\ + \beta, & \text{for } \delta=0, +\end{cases} +\end{equation}\]

+

and

+

\[\begin{equation} +F_l(y|\alpha, \beta, \delta) = + \frac{2\pi}{\alpha^2}\sum_{k=1}^{\infty}{\frac{k\sin{k\pi(1-\beta)}}{\delta^2+(k\pi)^2/\alpha^2}}\exp(-\frac{k^2\pi^2y}{2\alpha^2}). +\end{equation}\]

+

The formula for the small-time cdf at the upper boundary is stated as follows:

+

\[\begin{equation} +F(y|\alpha,\beta,\delta) = \exp\left(\delta \alpha(1-\beta) -\frac{\delta^2y}{2}\right)F_s(y|\alpha, \beta,\delta), +\end{equation}\] where

+

\[\begin{equation} +F_s(y|\alpha,\beta,\delta) = \sum_{k=0}^{\infty}(-1)^k\phi\left(\frac{\alpha(k+\beta^{*}_k)} + {\sqrt{y}} \right) \times \left( R \left(\frac{\alpha(k+\beta^{*}_k)+\delta y}{\sqrt{y}} \right) + + R \left(\frac{\alpha(k+\beta^*_k)-\delta y}{\sqrt{y}} \right)\right), +\end{equation}\]

+

where \(\beta^*_k=(1-\beta)\) for \(k\) even, \(\beta^*_k=\beta\) for \(k\) odd, and \(R\) is Mill’s ratio.

+

The cdf for the lower boundary is \(F(y|\alpha,1-\beta,-\delta)\)

+
+
+

Distribution statement

+

y ~ wiener(alpha, tau, beta, delta)

+

Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta).

+Available since 2.7 +

y ~ wiener(alpha, tau, beta, delta, var_delta) Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta, var_delta).

+Available since 2.35 +

y ~ wiener(alpha, tau, beta, delta, var_delta, var_beta, var_tau) Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta, var_delta, var_beta, var_tau).

+Available since 2.35 + +

+
+
+

Stan functions

+ +

+

real wiener_lpdf(reals y | reals alpha, reals tau, reals beta, reals delta)
The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.

+Available since 2.18 + +

+

real wiener_lpdf(real y | real alpha, real tau, real beta, real delta, real var_delta)
The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, and inter-trial drift rate variability var_delta.

+

Setting var_delta to 0 recovers the 4-parameter signature above.

+Available since 2.35 + +

+

real wiener_lpdf(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)
The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.

+

Setting var_delta, var_beta, and var_tau to 0 recovers the 4-parameter signature above.

+Available since 2.35 + +

+

real wiener_lupdf(reals y | reals alpha, reals tau, reals beta, reals delta)
The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta, dropping constant additive terms

+Available since 2.25 + +

+

real wiener_lupdf(real y | real alpha, real tau, real beta, real delta, real var_delta)
The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, and inter-trial drift rate variability var_delta, dropping constant additive terms.

+

Setting var_delta to 0 recovers the 4-parameter signature above.

+Available since 2.35 + +

+

real wiener_lupdf(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)
The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau, dropping constant additive terms.

+

Setting var_delta, var_beta, and var_tau to 0 recovers the 4-parameter signature above.

+Available since 2.35 +
+

Note: The lcdf and lccdf functions for the wiener distribution are conditional and unnormalized, meaning that the cdf does not asymptote at 1, but rather at the probability to hit the upper boundary.

+

Similarly, the ccdf is defined as the probability to hit the upper boundary less the value of the cdf, as opposed to the more typical \(1 - \textrm{cdf}\).

+ +

+

real wiener_lcdf_unnorm(real y, real alpha, real tau, real beta, real delta)

+

The log of the cumulative distribution function (cdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.

+Available since 2.38 + +

+

real wiener_lccdf_unnorm(real y, real alpha, real tau, real beta, real delta)

+

The log of the complementary cumulative distribution function (ccdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.

+Available since 2.38 + +

+

real wiener_lcdf_unnorm(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)

+

The log of the cumulative distribution function (cdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.

+Available since 2.38 + +

+

real wiener_lccdf_unnorm(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)

+

The log of the complementary cumulative distribution function (ccdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.

+Available since 2.38 +
+
+

Boundaries

+

Stan returns the first passage time of the accumulation process over the upper boundary only. To get the result for the lower boundary, use \[\begin{equation*} +\text{Wiener}(y | \alpha, \tau, 1 - \beta, - \delta) +\end{equation*}\] For more details, see the appendix of Vandekerckhove and Wabersich (2014).

+
+
+

Vectorization

+

The 5- and 7-argument forms of the wiener distribution functions (listed above as recieving only real) are implemented in such a way where they can be fully vectorized, but currently only versions that accept all real and all vector arguments are exposed by Stan. If there are additional signatures that would prove useful, please request them by opening an issue.

+
+
+

Tolerance tuning

+

The 5- and 7-argument forms of the wiener distribution functions can also accept an additional data real argument controlling the required precision of the gradient calculation of the function. If omitted, this defaults to 1e-4 for the density and 1e-8 for the cdf functions.

+ + + +
+
+
+ + Back to top

References

+
+Feller, William. 1968. An Introduction to Probability Theory and Its Applications. Vol. 1. 3. Wiley, New York. +
+
+Henrich, Franziska, Raphael Hartmann, Valentin Pratz, Andreas Voss, and Karl Christoph Klauer. 2024. “The Seven-Parameter Diffusion Model: An Implementation in Stan for Bayesian Analyses.” Behavior Research Methods 56 (4): 3102–16. https://doi.org/10.3758/s13428-023-02179-1. +
+
+Navarro, Danielle J, and Ian G Fuss. 2009. “Fast and Accurate Calculations for First-Passage Times in Wiener Diffusion Models.” Journal of Mathematical Psychology 53 (4): 222–30. +
+
+Vandekerckhove, Joachim, and Dominik Wabersich. 2014. “The RWiener Package: An R Package Providing Distribution Functions for the Wiener Diffusion Model.” The R Journal 6/1. http://journal.r-project.org/archive/2014-1/vandekerckhove-wabersich.pdf. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/real-valued_basic_functions.html b/docs/2_39/functions-reference/real-valued_basic_functions.html new file mode 100644 index 000000000..96f0b6ebe --- /dev/null +++ b/docs/2_39/functions-reference/real-valued_basic_functions.html @@ -0,0 +1,2260 @@ + + + + + + + + + +Real-Valued Basic Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Real-Valued Basic Functions

+

This chapter describes built-in functions that take zero or more real or integer arguments and return real values.

+
+

Vectorization of real-valued functions

+

Although listed in this chapter, many of Stan’s built-in functions are vectorized so that they may be applied to any argument type. The vectorized form of these functions is not any faster than writing an explicit loop that iterates over the elements applying the function—it’s just easier to read and write and less error prone.

+
+

Unary function vectorization

+

Many of Stan’s unary functions can be applied to any argument type. For example, the exponential function, exp, can be applied to real arguments or arrays of real arguments. Other than for integer arguments, the result type is the same as the argument type, including dimensionality and size. Integer arguments are first promoted to real values, but the result will still have the same dimensionality and size as the argument.

+
+

Real and real array arguments

+

When applied to a simple real value, the result is a real value. When applied to arrays, vectorized functions like exp() are defined elementwise. For example,

+
 // declare some variables for arguments
+ real x0;
+ array[5] real x1;
+ array[4, 7] real x2;
+ // ...
+ // declare some variables for results
+ real y0;
+ array[5] real y1;
+ array[4, 7] real y2;
+ // ...
+ // calculate and assign results
+ y0 = exp(x0);
+ y1 = exp(x1);
+ y2 = exp(x2);
+

When exp is applied to an array, it applies elementwise. For example, the statement above,

+
 y2 = exp(x2);
+

produces the same result for y2 as the explicit loop

+
for (i in 1:4) {
+  for (j in 1:7) {
+    y2[i, j] = exp(x2[i, j]);
+  }
+}
+
+
+

Vector and matrix arguments

+

Vectorized functions also apply elementwise to vectors and matrices. For example,

+
 vector[5] xv;
+ row_vector[7] xrv;
+ matrix[10, 20] xm;
+
+ vector[5] yv;
+ row_vector[7] yrv;
+ matrix[10, 20] ym;
+
+ yv = exp(xv);
+ yrv = exp(xrv);
+ ym = exp(xm);
+

Arrays of vectors and matrices work the same way. For example,

+
 array[12] matrix[17, 93] u;
+
+ array[12] matrix[17, 93] z;
+
+ z = exp(u);
+

After this has been executed, z[i, j, k] will be equal to exp(u[i, j, k]).

+
+
+

Integer and integer array arguments

+

Integer arguments are promoted to real values in vectorized unary functions. Thus if n is of type int, exp(n) is of type real. Arrays work the same way, so that if n2 is a one dimensional array of integers, then exp(n2) will be a one-dimensional array of reals with the same number of elements as n2. For example,

+
 array[23] int n1;
+ array[23] real z1;
+ z1 = exp(n1);
+

It would be illegal to try to assign exp(n1) to an array of integers; the return type is a real array.

+
+
+
+

Binary function vectorization

+

Like the unary functions, many of Stan’s binary functions have been vectorized, and can be applied elementwise to combinations of both scalars or container types.

+
+

Scalar and scalar array arguments

+

When applied to two scalar values, the result is a scalar value. When applied to two arrays, or combination of a scalar value and an array, vectorized functions like pow() are defined elementwise. For example,

+
 // declare some variables for arguments
+ real x00;
+ real x01;
+ array[5] real x10;
+ array[5]real x11;
+ array[4, 7] real x20;
+ array[4, 7] real x21;
+ // ...
+ // declare some variables for results
+ real y0;
+ array[5] real y1;
+ array[4, 7] real y2;
+ // ...
+ // calculate and assign results
+ y0 = pow(x00, x01);
+ y1 = pow(x10, x11);
+ y2 = pow(x20, x21);
+

When pow is applied to two arrays, it applies elementwise. For example, the statement above,

+
 y2 = pow(x20, x21);
+

produces the same result for y2 as the explicit loop

+
for (i in 1:4) {
+  for (j in 1:7) {
+    y2[i, j] = pow(x20[i, j], x21[i, j]);
+  }
+}
+

Alternatively, if a combination of an array and a scalar are provided, the scalar value is broadcast to be applied to each value of the array. For example, the following statement:

+
y2 = pow(x20, x00);
+

produces the same result for y2 as the explicit loop:

+
for (i in 1:4) {
+  for (j in 1:7) {
+    y2[i, j] = pow(x20[i, j], x00);
+  }
+}
+
+
+

Vector and matrix arguments

+

Vectorized binary functions also apply elementwise to vectors and matrices, and to combinations of these with scalar values. For example,

+
 real x00;
+ vector[5] xv00;
+ vector[5] xv01;
+ row_vector[7] xrv;
+ matrix[10, 20] xm;
+
+ vector[5] yv;
+ row_vector[7] yrv;
+ matrix[10, 20] ym;
+
+ yv = pow(xv00, xv01);
+ yrv = pow(xrv, x00);
+ ym = pow(x00, xm);
+

Arrays of vectors and matrices work the same way. For example,

+
 array[12] matrix[17, 93] u;
+
+ array[12] matrix[17, 93] z;
+
+ z = pow(u, x00);
+

After this has been executed, z[i, j, k] will be equal to pow(u[i, j, k], x00).

+
+
+

Input & return types

+

Vectorised binary functions require that both inputs, unless one is a real, be containers of the same type and size. For example, the following statements are legal:

+
 vector[5] xv;
+ row_vector[7] xrv;
+ matrix[10, 20] xm;
+
+ vector[5] yv = pow(xv, xv)
+ row_vector[7] yrv = pow(xrv, xrv)
+ matrix[10, 20] = pow(xm, xm)
+

But the following statements are not:

+
 vector[5] xv;
+ vector[7] xv2;
+ row_vector[5] xrv;
+
+ // Cannot mix different types
+ vector[5] yv = pow(xv, xrv)
+
+ // Cannot mix different sizes of the same type
+ vector[5] yv = pow(xv, xv2)
+

While the vectorized binary functions generally require the same input types, the only exception to this is for binary functions that require one input to be an integer and the other to be a real (e.g., bessel_first_kind). For these functions, one argument can be a container of any type while the other can be an integer array, as long as the dimensions of both are the same. For example, the following statements are legal:

+
 vector[5] xv;
+ matrix[5, 5] xm;
+ array[5] int xi;
+ array[5, 5] int xii;
+
+ vector[5] yv = bessel_first_kind(xi, xv);
+ matrix[5, 5] ym = bessel_first_kind(xii, xm);
+

Whereas these are not:

+
 vector[5] xv;
+ matrix[5, 5] xm;
+ array[7] int xi;
+
+ // Dimensions of containers do not match
+ vector[5] yv = bessel_first_kind(xi, xv);
+
+ // Function requires first argument be an integer type
+ matrix[5, 5] ym = bessel_first_kind(xm, xm);
+
+
+
+
+

Mathematical constants

+

Constants are represented as functions with no arguments and must be called as such. For instance, the mathematical constant \(\pi\) must be written in a Stan program as pi().

+ +

+

real pi()
\(\pi\), the ratio of a circle’s circumference to its diameter

+Available since 2.0 + +

+

real e()
\(e\), the base of the natural logarithm

+Available since 2.0 + +

+

real sqrt2()
The square root of 2

+Available since 2.0 + +

+

real log2()
The natural logarithm of 2

+Available since 2.0 + +

+

real log10()
The natural logarithm of 10

+Available since 2.0 +
+
+

Special values

+ +

+

real not_a_number()
Not-a-number, a special non-finite real value returned to signal an error

+Available since 2.0 + +

+

real positive_infinity()
Positive infinity, a special non-finite real value larger than all finite numbers

+Available since 2.0 + +

+

real negative_infinity()
Negative infinity, a special non-finite real value smaller than all finite numbers

+Available since 2.0 + +

+

real machine_precision()
The smallest number \(x\) such that \((x + 1) \neq 1\) in floating-point arithmetic on the current hardware platform

+Available since 2.0 +
+
+

Log probability function

+

The basic purpose of a Stan program is to compute a log probability function and its derivatives. The log probability function in a Stan model outputs the log density on the unconstrained scale. A log probability accumulator starts at zero and is then incremented in various ways by a Stan program. The variables are first transformed from unconstrained to constrained, and the log Jacobian determinant added to the log probability accumulator. Then the model block is executed on the constrained parameters, with each sampling statement (~) and log probability increment statement (increment_log_prob) adding to the accumulator. At the end of the model block execution, the value of the log probability accumulator is the log probability value returned by the Stan program.

+

Stan provides a special built-in function target() that takes no arguments and returns the current value of the log probability accumulator. This function is primarily useful for debugging purposes, where for instance, it may be used with a print statement to display the log probability accumulator at various stages of execution to see where it becomes ill defined.

+ +

+

real target()
Return the current value of the log probability accumulator.

+Available since 2.10 +

target acts like a function ending in _lp, meaning that it may only may only be used in the model block.

+
+
+

Logical functions

+

Like C++, BUGS, and R, Stan uses 0 to encode false, and 1 to encode true. Stan supports the usual boolean comparison operations and boolean operators. These all have the same syntax and precedence as in C++; for the full list of operators and precedences, see the reference manual.

+
+

Comparison operators

+

All comparison operators return boolean values, either 0 or 1. Each operator has two signatures, one for integer comparisons and one for floating-point comparisons. Comparing an integer and real value is carried out by first promoting the integer value.

+ +

+

int operator<(int x, int y)

+ +

+

int operator<(real x, real y)
Return 1 if x is less than y and 0 otherwise. \[\begin{equation*} \text{operator<}(x,y) += \begin{cases} 1 & \text{if $x < y$} \\ 0 & \text{otherwise} +\end{cases} \end{equation*}\]

+Available since 2.0 + +

+

int operator<=(int x, int y)

+ +

+

int operator<=(real x, real y)
Return 1 if x is less than or equal y and 0 otherwise. \[\begin{equation*} +\text{operator<=}(x,y) = \begin{cases} 1 & \text{if $x \leq y$} \\ 0 & \text{otherwise} \end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

int operator>(int x, int y)

+ +

+

int operator>(real x, real y)
Return 1 if x is greater than y and 0 otherwise. \[\begin{equation*} +\text{operator>}(x,y) = \begin{cases} 1 & \text{if $x > y$} \\ 0 & \text{otherwise} \end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

int operator>=(int x, int y)

+ +

+

int operator>=(real x, real y)
Return 1 if x is greater than or equal to y and 0 otherwise. \[\begin{equation*} +\text{operator>=}(x,y) = \begin{cases} 1 & \text{if $x \geq y$} \\ 0 & \text{otherwise} \end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

int operator==(int x, int y)

+ +

+

int operator==(real x, real y)
Return 1 if x is equal to y and 0 otherwise. \[\begin{equation*} +\text{operator==}(x,y) = \begin{cases} 1 & \text{if $x = y$} \\ 0 & \text{otherwise} \end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

int operator!=(int x, int y)

+ +

+

int operator!=(real x, real y)
Return 1 if x is not equal to y and 0 otherwise. \[\begin{equation*} +\text{operator!=}(x,y) = \begin{cases} 1 & \text{if $x \neq y$} \\ 0 & +\text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 +
+
+

Boolean operators

+

Boolean operators return either 0 for false or 1 for true. Inputs may be any real or integer values, with non-zero values being treated as true and zero values treated as false. These operators have the usual precedences, with negation (not) binding the most tightly, conjunction the next and disjunction the weakest; all of the operators bind more tightly than the comparisons. Thus an expression such as !a && b is interpreted as (!a) && b, and a < b || c >= d && e != f as (a < b) || (((c >= d) && (e != f))).

+ +

+

int operator!(int x)
Return 1 if x is zero and 0 otherwise. \[\begin{equation*} \text{operator!}(x) = +\begin{cases} 0 & \text{if $x \neq 0$} \\ 1 & \text{if $x = 0$} +\end{cases} \end{equation*}\]

+Available since 2.0 + +

+

int operator!(real x)
Return 1 if x is zero and 0 otherwise. \[\begin{equation*} \text{operator!}(x) = +\begin{cases} 0 & \text{if $x \neq 0.0$} \\ 1 & \text{if $x = 0.0$} +\end{cases} \end{equation*}\] deprecated; - use operator== instead.

+Available since 2.0, deprecated in 2.31 + +

+

int operator&&(int x, int y)

+

Return 1 if x is unequal to 0 and y is unequal to 0. \[\begin{equation*} +\mathrm{operator\&\&}(x,y) = \begin{cases} 1 & \text{if $x \neq 0$} +\text{ and } y \neq 0\\ 0 & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

int operator&&(real x, real y)
Return 1 if x is unequal to 0.0 and y is unequal to 0.0. \[\begin{equation*} +\mathrm{operator\&\&}(x,y) = \begin{cases} 1 & \text{if $x \neq 0.0$} +\text{ and } y \neq 0.0\\ 0 & \text{otherwise} \end{cases} \end{equation*}\] deprecated

+Available since 2.0, deprecated in 2.31 + +

+

int operator||(int x, int y)
Return 1 if x is unequal to 0 or y is unequal to 0. \[\begin{equation*} +\text{operator||}(x,y) = \begin{cases} 1 & \text{if $x \neq 0$} +\textrm{ or } y \neq 0\\ 0 & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

int operator||(real x, real y)
Return 1 if x is unequal to 0.0 or y is unequal to 0.0. \[\begin{equation*} +\text{operator||}(x,y) = \begin{cases} 1 & \text{if $x \neq 0.0$} +\textrm{ or } y \neq 0.0\\ 0 & \text{otherwise} \end{cases} \end{equation*}\] deprecated

+Available since 2.0, deprecated in 2.31 +
+

Boolean operator short circuiting

+

Like in C++, the boolean operators && and || and are implemented to short circuit directly to a return value after evaluating the first argument if it is sufficient to resolve the result. In evaluating a || b, if a evaluates to a value other than zero, the expression returns the value 1 without evaluating the expression b. Similarly, evaluating a && b first evaluates a, and if the result is zero, returns 0 without evaluating b.

+
+
+
+

Logical functions

+

The logical functions introduce conditional behavior functionally and are primarily provided for compatibility with BUGS and JAGS.

+ +

+

real step(real x)
Return 1 if x is positive and 0 otherwise. \[\begin{equation*} \text{step}(x) = +\begin{cases} 0 & \text{if } x < 0 \\ 1 & \text{otherwise} \end{cases} +\end{equation*}\] Warning: int_step(0) and int_step(NaN) return 0 whereas step(0) and step(NaN) return 1.

+

The step function is often used in BUGS to perform conditional operations. For instance, step(a-b) evaluates to 1 if a is greater than b and evaluates to 0 otherwise. step is a step-like functions; see the warning in section step functions applied to expressions dependent on parameters.

+Available since 2.0 + +

+

int is_inf(real x)
Return 1 if x is infinite (positive or negative) and 0 otherwise.

+Available since 2.5 + +

+

int is_nan(real x)
Return 1 if x is NaN and 0 otherwise.

+Available since 2.5 +

Care must be taken because both of these indicator functions are step-like and thus can cause discontinuities in gradients when applied to parameters; see section step-like functions for details.

+
+
+
+

Real-valued arithmetic operators

+

The arithmetic operators are presented using C++ notation. For instance operator+(x,y) refers to the binary addition operator and operator-(x) to the unary negation operator. In Stan programs, these are written using the usual infix and prefix notations as x + y and -x, respectively.

+
+

Binary infix operators

+ +

+

real operator+(real x, real y)
Return the sum of x and y. \[\begin{equation*} (x + y) = \text{operator+}(x,y) = x+y \end{equation*}\]

+Available since 2.0 + +

+

real operator-(real x, real y)
Return the difference between x and y. \[\begin{equation*} (x - y) = +\text{operator-}(x,y) = x - y \end{equation*}\]

+Available since 2.0 + +

+

real operator*(real x, real y)
Return the product of x and y. \[\begin{equation*} (x * y) = \text{operator*}(x,y) = xy +\end{equation*}\]

+Available since 2.0 + +

+

real operator/(real x, real y)
Return the quotient of x and y. \[\begin{equation*} (x / y) = \text{operator/}(x,y) = +\frac{x}{y} \end{equation*}\]

+Available since 2.0 + +

+

real operator^(real x, real y)
Return x raised to the power of y. \[\begin{equation*} (x^\mathrm{\wedge}y) = +\text{operator}^\mathrm{\wedge}(x,y) = x^y \end{equation*}\]

+Available since 2.5 +
+
+

Unary prefix operators

+ +

+

real operator-(real x)
Return the negation of the subtrahend x. \[\begin{equation*} \text{operator-}(x) = (-x) +\end{equation*}\]

+Available since 2.0 + +

+

T operator-(T x)
Vectorized version of operator-. If T x is a (possibly nested) array of reals, -x is the same shape array where each individual number is negated.

+Available since 2.31 + +

+

real operator+(real x)
Return the value of x. \[\begin{equation*} \text{operator+}(x) = x \end{equation*}\]

+Available since 2.0 +
+
+
+

Step-like functions

+

Warning: These functions can seriously hinder sampling and optimization efficiency for gradient-based methods (e.g., NUTS, HMC, BFGS) if applied to parameters (including transformed parameters and local variables in the transformed parameters or model block). The problem is that they break gradients due to discontinuities coupled with zero gradients elsewhere. They do not hinder sampling when used in the data, transformed data, or generated quantities blocks.

+
+

Absolute value functions

+ +

+

T abs(T x)
The absolute value of x.

+

This function works elementwise over containers such as vectors. Given a type T which is real vector, row_vector, matrix, or an array of those types, abs returns the same type where each element has had its absolute value taken.

+Available since 2.0, vectorized in 2.30 + +

+

real fdim(real x, real y)
Return the positive difference between x and y, which is x - y if x is greater than y and 0 otherwise; see warning above. \[\begin{equation*} \text{fdim}(x,y) = \begin{cases} x-y & +\text{if } x \geq y \\ 0 & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

R fdim(T1 x, T2 y)
Vectorized implementation of the fdim function

+Available since 2.25 +
+
+

Bounds functions

+ +

+

real fmin(real x, real y)
Return the minimum of x and y; see warning above. \[\begin{equation*} \text{fmin}(x,y) = \begin{cases} x & +\text{if } x \leq y \\ y & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

R fmin(T1 x, T2 y)
Vectorized implementation of the fmin function

+Available since 2.25 + +

+

real fmax(real x, real y)
Return the maximum of x and y; see warning above. \[\begin{equation*} \text{fmax}(x,y) = \begin{cases} x & +\text{if } x \geq y \\ y & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

R fmax(T1 x, T2 y)
Vectorized implementation of the fmax function

+Available since 2.25 +
+
+

Arithmetic functions

+ +

+

real fmod(real x, real y)
Return the real value remainder after dividing x by y; see warning above. \[\begin{equation*} \text{fmod}(x,y) = x - \left\lfloor \frac{x}{y} \right\rfloor \, y \end{equation*}\] The operator \(\lfloor u \rfloor\) is the floor operation; see below.

+Available since 2.0 + +

+

R fmod(T1 x, T2 y)
Vectorized implementation of the fmod function

+Available since 2.25 +
+
+

Rounding functions

+

Warning: Rounding functions convert real values to integers. Because the output is an integer, any gradient information resulting from functions applied to the integer is not passed to the real value it was derived from. With MCMC sampling using HMC or NUTS, the MCMC acceptance procedure will correct for any error due to poor gradient calculations, but the result is likely to be reduced acceptance probabilities and less efficient sampling.

+

The rounding functions cannot be used as indices to arrays because they return real values. For operations over data or in the generated quantities block, the to_int() function can be used.

+ +

+

R floor(T x)
The floor of x, which is the largest integer less than or equal to x, converted to a real value; see warning at start of section step-like functions

+Available since 2.0, vectorized in 2.13 + +

+

R ceil(T x)
The ceiling of x, which is the smallest integer greater than or equal to x, converted to a real value; see warning at start of section step-like functions

+Available since 2.0, vectorized in 2.13 + +

+

R round(T x)
The nearest integer to x, converted to a real value; see warning at start of section step-like functions

+Available since 2.0, vectorized in 2.13 + +

+

R trunc(T x)
The integer nearest to but no larger in magnitude than x, converted to a double value; see warning at start of section step-like functions

+Available since 2.0, vectorized in 2.13 +
+
+
+

Power and logarithm functions

+ +

+

R sqrt(T x)
The square root of x

+Available since 2.0, vectorized in 2.13 + +

+

R cbrt(T x)
The cube root of x

+Available since 2.0, vectorized in 2.13 + +

+

R square(T x)
The square of x

+Available since 2.0, vectorized in 2.13 + +

+

R exp(T x)
The natural exponential of x

+Available since 2.0, vectorized in 2.13 + +

+

R exp2(T x)
The base-2 exponential of x

+Available since 2.0, vectorized in 2.13 + +

+

R log(T x)
The natural logarithm of x

+Available since 2.0, vectorized in 2.13 + +

+

R log2(T x)
The base-2 logarithm of x

+Available since 2.0, vectorized in 2.13 + +

+

R log10(T x)
The base-10 logarithm of x

+Available since 2.0, vectorized in 2.13 + +

+

real pow(real x, real y)
Return x raised to the power of y. \[\begin{equation*} \text{pow}(x,y) = x^y \end{equation*}\]

+Available since 2.0 + +

+

R pow(T1 x, T2 y)
Vectorized implementation of the pow function

+Available since 2.25 + +

+

R inv(T x)
The inverse of x

+Available since 2.0, vectorized in 2.13 + +

+

R inv_sqrt(T x)
The inverse of the square root of x

+Available since 2.0, vectorized in 2.13 + +

+

R inv_square(T x)
The inverse of the square of x

+Available since 2.0, vectorized in 2.13 +
+
+

Trigonometric functions

+ +

+

real hypot(real x, real y)
Return the length of the hypotenuse of a right triangle with sides of length x and y. \[\begin{equation*} \text{hypot}(x,y) = \begin{cases} \sqrt{x^2+y^2} & +\text{if } x,y\geq 0 \\ \textrm{NaN} & \text{otherwise} \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

R hypot(T1 x, T2 y)
Vectorized implementation of the hypot function

+Available since 2.25 + +

+

R cos(T x)
The cosine of the angle x (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R sin(T x)
The sine of the angle x (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R tan(T x)
The tangent of the angle x (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R acos(T x)
The principal arc (inverse) cosine (in radians) of x

+Available since 2.0, vectorized in 2.13 + +

+

R asin(T x)
The principal arc (inverse) sine (in radians) of x

+Available since 2.0 + +

+

R atan(T x)
The principal arc (inverse) tangent (in radians) of x, with values from \(-\pi/2\) to \(\pi/2\)

+Available since 2.0, vectorized in 2.13 + +

+

R atan2(T y, T x)
Return the principal arc (inverse) tangent (in radians) of y divided by x, \[\begin{equation*} \text{atan2}(y, x) = \arctan\left(\frac{y}{x}\right) \end{equation*}\]

+Available since 2.0, vectorized in 2.34 +
+
+

Hyperbolic trigonometric functions

+ +

+

R cosh(T x)
The hyperbolic cosine of x (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R sinh(T x)
The hyperbolic sine of x (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R tanh(T x)
The hyperbolic tangent of x (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R acosh(T x)
The inverse hyperbolic cosine (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R asinh(T x)
The inverse hyperbolic cosine (in radians)

+Available since 2.0, vectorized in 2.13 + +

+

R atanh(T x)
The inverse hyperbolic tangent (in radians) of x

+Available since 2.0, vectorized in 2.13 +
+ +
+

Probability-related functions

+
+

Normal cumulative distribution functions

+

The error function erf is related to the standard normal cumulative distribution function \(\Phi\) by scaling. See section normal distribution for the general normal cumulative distribution function (and its complement).

+ +

+

R erf(T x)
The error function, also known as the Gauss error function, of x

+Available since 2.0, vectorized in 2.13 + +

+

R erfc(T x)
The complementary error function of x

+Available since 2.0, vectorized in 2.13 + +

+

R inv_erfc(T x)
The inverse of the complementary error function of x

+Available since 2.29, vectorized in 2.29 + +

+

R Phi(T x)
The standard normal cumulative distribution function of x

+Available since 2.0, vectorized in 2.13 + +

+

R inv_Phi(T x)
Return the value of the inverse standard normal cdf \(\Phi^{-1}\) at the specified quantile x. The details of the algorithm can be found in (Wichura 1988). Quantile arguments below 1e-16 are untested; quantiles above 0.999999999 result in increasingly large errors.

+Available since 2.0, vectorized in 2.13 + +

+

R Phi_approx(T x)
The fast approximation of the unit (may replace Phi for probit regression with maximum absolute error of 0.00014, see (Bowling et al. 2009) for details)

+Available since 2.0, vectorized in 2.13 +
+ +
+
+

Combinatorial functions

+ +

+

real beta(real alpha, real beta)
Return the beta function applied to alpha and beta. The beta function, \(\text{B}(\alpha,\beta)\), computes the normalizing constant for the beta distribution, and is defined for \(\alpha > 0\) and \(\beta > 0\). See section appendix for definition of \(\text{B}(\alpha, \beta)\).

+Available since 2.25 + +

+

R beta(T1 x, T2 y)
Vectorized implementation of the beta function

+Available since 2.25 + +

+

real inc_beta(real alpha, real beta, real x)
Return the regularized incomplete beta function up to x applied to alpha and beta. See section appendix for a definition.

+Available since 2.10 + +

+

real inv_inc_beta(real alpha, real beta, real p)
Return the inverse of the regularized incomplete beta function. The return value x is the value that solves p = inc_beta(alpha, beta, x). See section appendix for a definition of the inc_beta.

+Available since 2.30 + +

+

real lbeta(real alpha, real beta)
Return the natural logarithm of the beta function applied to alpha and beta. The beta function, \(\text{B}(\alpha,\beta)\), computes the normalizing constant for the beta distribution, and is defined for \(\alpha > 0\) and \(\beta > 0\). \[\begin{equation*} +\text{lbeta}(\alpha,\beta) = \log \Gamma(\alpha) + \log \Gamma(\beta) - \log \Gamma(\alpha+\beta) +\end{equation*}\] See section appendix for definition of \(\text{B}(\alpha, \beta)\).

+Available since 2.0 + +

+

R lbeta(T1 x, T2 y)
Vectorized implementation of the lbeta function

+Available since 2.25 + +

+

R tgamma(T x)
The gamma function applied to x. The gamma function is the generalization of the factorial function to continuous variables, defined so that \(\Gamma(n+1) = n!\). See for a full definition of \(\Gamma(x)\). The function is defined for positive numbers and non-integral negative numbers,

+Available since 2.0, vectorized in 2.13 + +

+

R lgamma(T x)
The natural logarithm of the gamma function applied to x,

+Available since 2.0, vectorized in 2.15 + +

+

R digamma(T x)
The digamma function applied to x. The digamma function is the derivative of the natural logarithm of the Gamma function. The function is defined for positive numbers and non-integral negative numbers

+Available since 2.0, vectorized in 2.13 + +

+

R trigamma(T x)
The trigamma function applied to x. The trigamma function is the second derivative of the natural logarithm of the Gamma function

+Available since 2.0, vectorized in 2.13 + +

+

real lmgamma(int n, real x)
Return the natural logarithm of the multivariate gamma function \(\Gamma_n\) with n dimensions applied to x. \[\begin{equation*} +\text{lmgamma}(n,x) = +\begin{cases} \frac{n(n-1)}{4} \log \pi + \sum_{j=1}^n \log \Gamma\left(x + \frac{1 - j}{2}\right) +& \text{if } x\not\in \{\dots,-3,-2,-1,0\}\\ \textrm{error} & \text{otherwise} \end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

R lmgamma(T1 x, T2 y)
Vectorized implementation of the lmgamma function

+Available since 2.25 + +

+

real gamma_p(real a, real z)
Return the normalized lower incomplete gamma function of a and z defined for positive a and nonnegative z. \[\begin{equation*} +\mathrm{gamma\_p}(a,z) = +\begin{cases} \frac{1}{\Gamma(a)}\int_0^zt^{a-1}e^{-t}dt & +\text{if } a > 0, z \geq 0 \\ \textrm{error} & \text{otherwise} \end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

R gamma_p(T1 x, T2 y)
Vectorized implementation of the gamma_p function

+Available since 2.25 + +

+

real gamma_q(real a, real z)
Return the normalized upper incomplete gamma function of a and z defined for positive a and nonnegative z. \[\begin{equation*} +\mathrm{gamma\_q}(a,z) = +\begin{cases} \frac{1}{\Gamma(a)}\int_z^\infty t^{a-1}e^{-t}dt & +\text{if } a > 0, z \geq 0 \\[6pt] \textrm{error} & \text{otherwise} +\end{cases} +\end{equation*}\]

+Available since 2.0 + +

+

R gamma_q(T1 x, T2 y)
Vectorized implementation of the gamma_q function

+Available since 2.25 + +

+

int choose(int x, int y)
Return the binomial coefficient of x and y. For non-negative integer inputs, the binomial coefficient function is written as \(\binom{x}{y}\) and pronounced “x choose y.” In its the antilog of the lchoose function but returns an integer rather than a real number with no non-zero decimal places. For \(0 \leq y \leq x\), the binomial coefficient function can be defined via the factorial function \[\begin{equation*} +\text{choose}(x,y) = \frac{x!}{\left(y!\right)\left(x - y\right)!}. +\end{equation*}\]

+Available since 2.14 + +

+

R choose(T1 x, T2 y)
Vectorized implementation of the choose function

+Available since 2.25 + +

+

real bessel_first_kind(int v, real x)
Return the Bessel function of the first kind with order v applied to x. \[\begin{equation*} +\mathrm{bessel\_first\_kind}(v,x) = J_v(x), +\end{equation*}\] where \[\begin{equation*} +J_v(x)=\left(\frac{1}{2}x\right)^v \sum_{k=0}^\infty +\frac{\left(-\frac{1}{4}x^2\right)^k}{k!\, \Gamma(v+k+1)} +\end{equation*}\]

+Available since 2.5 + +

+

R bessel_first_kind(T1 x, T2 y)
Vectorized implementation of the bessel_first_kind function

+Available since 2.25 + +

+

real bessel_second_kind(int v, real x)
Return the Bessel function of the second kind with order v applied to x defined for positive x and v. For \(x,v > 0\), \[\begin{equation*} +\mathrm{bessel\_second\_kind}(v,x) = +\begin{cases} Y_v(x) & \text{if } x > 0 \\ \textrm{error} & \text{otherwise} \end{cases} +\end{equation*}\] where \[\begin{equation*} +Y_v(x)=\frac{J_v(x)\cos(v\pi)-J_{-v}(x)}{\sin(v\pi)} +\end{equation*}\]

+Available since 2.5 + +

+

R bessel_second_kind(T1 x, T2 y)
Vectorized implementation of the bessel_second_kind function

+Available since 2.25 + +

+

real modified_bessel_first_kind(int v, real z)
Return the modified Bessel function of the first kind with order v applied to z defined for all z and integer v. \[\begin{equation*} +\mathrm{modified\_bessel\_first\_kind}(v,z) = I_v(z) +\end{equation*}\] where \[\begin{equation*} +{I_v}(z) = \left(\frac{1}{2}z\right)^v\sum_{k=0}^\infty \frac{\left(\frac{1}{4}z^2\right)^k}{k!\Gamma(v+k+1)} +\end{equation*}\]

+Available since 2.1 + +

+

R modified_bessel_first_kind(T1 x, T2 y)
Vectorized implementation of the modified_bessel_first_kind function

+Available since 2.25 + +

+

real log_modified_bessel_first_kind(real v, real z)
Return the log of the modified Bessel function of the first kind. v does not have to be an integer.

+Available since 2.26 + +

+

R log_modified_bessel_first_kind(T1 x, T2 y)
Vectorized implementation of the log_modified_bessel_first_kind function

+Available since 2.26 + +

+

real modified_bessel_second_kind(int v, real z)
Return the modified Bessel function of the second kind with order v applied to z defined for positive z and integer v. \[\begin{equation*} +\mathrm{modified\_bessel\_second\_kind}(v,z) = +\begin{cases} K_v(z) & \text{if } z > 0 \\ \textrm{error} & \text{if } z \leq 0 \end{cases} +\end{equation*}\] where \[\begin{equation*} {K_v}(z) = \frac{\pi}{2}\cdot\frac{I_{-v}(z) - I_{v}(z)}{\sin(v\pi)} +\end{equation*}\]

+Available since 2.1 + +

+

R modified_bessel_second_kind(T1 x, T2 y)
Vectorized implementation of the modified_bessel_second_kind function

+Available since 2.25 + +

+

real falling_factorial(real x, real n)
Return the falling factorial of x with power n defined for positive x and real n. \[\begin{equation*} +\mathrm{falling\_factorial}(x,n) = +\begin{cases} (x)_n & \text{if } x > 0 \\ \textrm{error} & \text{if } x \leq 0 \end{cases} +\end{equation*}\] where \[\begin{equation*} +(x)_n=\frac{\Gamma(x+1)}{\Gamma(x-n+1)} +\end{equation*}\]

+Available since 2.0 + +

+

R falling_factorial(T1 x, T2 y)
Vectorized implementation of the falling_factorial function

+Available since 2.25 + +

+

real lchoose(real x, real y)
Return the natural logarithm of the generalized binomial coefficient of x and y. For non-negative integer inputs, the binomial coefficient function is written as \(\binom{x}{y}\) and pronounced “x choose y.” This function generalizes to real numbers using the gamma function. For \(0 \leq y \leq x\), \[\begin{equation*} \mathrm{binomial\_coefficient\_log}(x,y) = +\log\Gamma(x+1) - \log\Gamma(y+1) - \log\Gamma(x-y+1). \end{equation*}\]

+Available since 2.10 + +

+

R lchoose(T1 x, T2 y)
Vectorized implementation of the lchoose function

+Available since 2.29 + +

+

real log_falling_factorial(real x, real n)
Return the log of the falling factorial of x with power n defined for positive x and real n. \[\begin{equation*} \mathrm{log\_falling\_factorial}(x,n) = +\begin{cases} \log (x)_n & \text{if } x > 0 \\ \textrm{error} & +\text{if } x \leq 0 \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

real rising_factorial(real x, int n)
Return the rising factorial of x with power n defined for positive x and integer n. \[\begin{equation*} +\mathrm{rising\_factorial}(x,n) = \begin{cases} x^{(n)} & \text{if } x > 0 \\ \textrm{error} & \text{if } x \leq 0 \end{cases} +\end{equation*}\] where \[\begin{equation*} x^{(n)}=\frac{\Gamma(x+n)}{\Gamma(x)} \end{equation*}\]

+Available since 2.20 + +

+

R rising_factorial(T1 x, T2 y)
Vectorized implementation of the rising_factorial function

+Available since 2.25 + +

+

real log_rising_factorial(real x, real n)
Return the log of the rising factorial of x with power n defined for positive x and real n. \[\begin{equation*} \mathrm{log\_rising\_factorial}(x,n) = +\begin{cases} \log x^{(n)} & \text{if } x > 0 \\ \textrm{error} & +\text{if } x \leq 0 \end{cases} \end{equation*}\]

+Available since 2.0 + +

+

R log_rising_factorial(T1 x, T2 y)
Vectorized implementation of the log_rising_factorial function

+Available since 2.25 +
+
+

Composed functions

+

The functions in this section are equivalent in theory to combinations of other functions. In practice, they are implemented to be more efficient and more numerically stable than defining them directly using more basic Stan functions.

+ +

+

R expm1(T x)
The natural exponential of x minus 1

+Available since 2.0, vectorized in 2.13 + +

+

real fma(real x, real y, real z)
Return z plus the result of x multiplied by y. \[\begin{equation*} \text{fma}(x,y,z) = +(x \times y) + z \end{equation*}\]

+Available since 2.0 + +

+

real ldexp(real x, int y)
Return the product of x and two raised to the y power. \[\begin{equation*} +\text{ldexp}(x,y) = x 2^y \end{equation*}\]

+Available since 2.25 + +

+

R ldexp(T1 x, T2 y)
Vectorized implementation of the ldexp function

+Available since 2.25 + +

+

real lmultiply(real x, real y)
Return the product of x and the natural logarithm of y. \[\begin{equation*} +\text{lmultiply}(x,y) = \begin{cases} 0 & \text{if } x = y = 0 \\ x +\log y & \text{if } x, y \neq 0 \\ \text{NaN} & \text{otherwise} +\end{cases} \end{equation*}\]

+Available since 2.10 + +

+

R lmultiply(T1 x, T2 y)
Vectorized implementation of the lmultiply function

+Available since 2.25 + +

+

R log1p(T x)
The natural logarithm of 1 plus x

+Available since 2.0, vectorized in 2.13 + +

+

R log1m(T x)
The natural logarithm of 1 minus x

+Available since 2.0, vectorized in 2.13 + +

+

R log1p_exp(T x)
The natural logarithm of one plus the natural exponentiation of x

+Available since 2.0, vectorized in 2.13 + +

+

R log1m_exp(T x)
The logarithm of one minus the natural exponentiation of x

+Available since 2.0, vectorized in 2.13 + +

+

real log_diff_exp(real x, real y)
Return the natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y. \[\begin{equation*} +\mathrm{log\_diff\_exp}(x,y) = \begin{cases} \log(\exp(x)-\exp(y)) & +\text{if } +\infty > x \ge y \\[6pt] +\textrm{NaN} & \text{otherwise} \end{cases} +\end{equation*}\]

+

When x is equal to y, log_diff_exp(x, y) returns \(-\infty\), consistent with log(0) returning \(-\infty\). This includes the case in which x and y are both equal to \(-\infty\), which corresponds to log(0 - 0) because exp(negative_infinity()) returns 0.

+Available since 2.0 + +

+

R log_diff_exp(T1 x, T2 y)
Vectorized implementation of the log_diff_exp function

+Available since 2.25 + +

+

real log_mix(real theta, real lp1, real lp2)
Return the log mixture of the log densities lp1 and lp2 with mixing proportion theta, defined by \[\begin{eqnarray*} +\mathrm{log\_mix}(\theta, \lambda_1, \lambda_2) & = & \log \!\left( +\theta \exp(\lambda_1) + \left( 1 - \theta \right) \exp(\lambda_2) +\right) \\[3pt] & = & \mathrm{log\_sum\_exp}\!\left(\log(\theta) + +\lambda_1, \ \log(1 - \theta) + \lambda_2\right). \end{eqnarray*}\]

+Available since 2.6 + +

+

R log_mix(T1 thetas, T2 lps)

+

Calculates the log mixture density given thetas, mixing proportions which should be between 0 and 1 and sum to 1, and lps, log densities. The lps variable must be either a 1-d container of the same length as thetas, or an array of such.

+

\[\begin{eqnarray*} +\mathrm{log\_mix}(\theta, \lambda) +& = & \log \!\left( \sum_{n=1}^N \theta_n * \exp(\lambda_n) \right) \\[3pt] +& = & \mathrm{log\_sum\_exp}\!\left(\log(\theta) + \lambda\right). +\end{eqnarray*}\]

+

This is a generalization of the above signature of three arguments to more than two densities. For example, log_mix(lambda, lp1, lp2) == log_mix({lambda, 1 - lambda}, {lp1, lp2}).

+Available since 2.26 + +

+

R log_sum_exp(T1 x, T2 y)
Return the natural logarithm of the sum of the natural exponentiation of x and the natural exponentiation of y. \[\begin{equation*} +\mathrm{log\_sum\_exp}(x,y) = \log(\exp(x)+\exp(y)) \end{equation*}\]

+Available since 2.0, vectorized in 2.33 + +

+

R log_inv_logit(T x)
The natural logarithm of the inverse logit function of x

+Available since 2.0, vectorized in 2.13 + +

+

R log_inv_logit_diff(T1 x, T2 y)
The natural logarithm of the difference of the inverse logit function of x and the inverse logit function of y

+Available since 2.25 + +

+

R log1m_inv_logit(T x)
The natural logarithm of 1 minus the inverse logit function of x

+Available since 2.0, vectorized in 2.13 +
+
+

Special functions

+ +

+

R lambert_w0(T x)
Implementation of the \(W_0\) branch of the Lambert W function, i.e., solution to the function \(W_0(x) \exp^{ W_0(x)} = x\)

+Available since 2.25 + +

+

R lambert_wm1(T x)
Implementation of the \(W_{-1}\) branch of the Lambert W function, i.e., solution to the function \(W_{-1}(x) \exp^{W_{-1}(x)} = x\)

+Available since 2.25 +
+
+

Hypergeometric Functions

+

Hypergeometric functions refer to a power series of the form \[\begin{equation*} +_pF_q(a_1,...,a_p;b_1,...,b_q;z) = \sum_{n=0}^\infty \frac{(a_1)_n\cdot\cdot\cdot(a_p)_n}{(b_1)_n\cdot\cdot\cdot(b_q)_n} \frac{z^n}{n!} +\end{equation*}\] where \((a)_n\) is the Pochhammer symbol defined as \((a)_n = \frac{\Gamma(a+n)}{\Gamma(a)}\).

+

The gradients of the hypergeometric function are given by: \[\begin{equation*} +\frac{\partial }{\partial a_1} = + \sum_{k=0}^{\infty}{ + \frac + {\psi\left(k+a_1\right)\left(\prod_{j=1}^p\left(a_j\right)_k\right)z^k} + {k!\prod_{j=1}^q\left(b_j\right)_k}} + - \psi\left(a_1\right){}_pF_q(a_1,...,a_p;b_1,...,b_q;z) +\end{equation*}\] \[\begin{equation*} +\frac{\partial }{\partial b_1} = + \psi\left(b_1\right){}_pF_q(a_1,...,a_p;b_1,...,b_q;z) - + \sum_{k=0}^{\infty}{ + \frac + {\psi\left(k+b_1\right)\left(\prod_{j=1}^p\left(a_j\right)_k\right)z^k} + {k!\prod_{j=1}^q\left(b_j\right)_k}} +\end{equation*}\] \[\begin{equation*} + \frac{\partial }{\partial z} = + \frac{\prod_{j=1}^{p}a_j}{\prod_{j=1}^{q} b_j}{}_pF_q(a_1+1,...,a_p+1;b_1+1,...,b_q+1;z) +\end{equation*}\]

+

Stan provides both the generalized hypergeometric function as well as several special cases for particular values of p and q.

+ +

+

real hypergeometric_1F0(real a, real z)
Special case of the hypergeometric function with \(p=1\) and \(q=0\).

+Available since 2.37 + +

+

real hypergeometric_2F1(real a1, real a2, real b1, real z)
Special case of the hypergeometric function with \(p=2\) and \(q=1\). If the function does not meet convergence criteria for given inputs, the function will attempt to apply Euler’s transformation to improve convergence: \[\begin{equation*} +{}_2F_1(a_1,a_2, b_1, z)={}_2F_1(b_1 - a_1,a_2, b_1, \frac{z}{z-1})\cdot(1-z)^{-a_2} +\end{equation*}\]

+Available since 2.37 + +

+

real hypergeometric_3F2(T1 a, T2 b, real z)
Special case of the hypergeometric function with \(p=3\) and \(q=2\), where a and b are vectors of length 3 and 2, respectively.

+Available since 2.37 + +

+

real hypergeometric_pFq(T1 a, T2 b, real z)
Generalized hypergeometric function, where a and b are vectors of length p and q, respectively.

+Available since 2.37 + + + +
+
+ + Back to top

References

+
+Bowling, Shannon R., Mohammad T. Khasawneh, Sittichai Kaewkuekool, and Byung Rae Cho. 2009. “A Logistic Approximation to the Cumulative Normal Distribution.” Journal of Industrial Engineering and Management 2 (1): 114–27. +
+
+Wichura, Michael J. 1988. “Algorithm AS 241: The Percentage Points of the Normal Distribution.” Journal of the Royal Statistical Society. Series C (Applied Statistics) 37 (3): 477–84. http://www.jstor.org/stable/2347330. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/references.html b/docs/2_39/functions-reference/references.html new file mode 100644 index 000000000..8c2cd1b2a --- /dev/null +++ b/docs/2_39/functions-reference/references.html @@ -0,0 +1,821 @@ + + + + + + + + + +references + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ + + + +
+

References

+ + +
+ + Back to top
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/removed_functions.html b/docs/2_39/functions-reference/removed_functions.html new file mode 100644 index 000000000..869196b5c --- /dev/null +++ b/docs/2_39/functions-reference/removed_functions.html @@ -0,0 +1,1209 @@ + + + + + + + + + +Removed Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Removed Functions

+

Functions which once existed in the Stan language and have since been replaced or removed are listed here.

+
+

multiply_log and binomial_coefficient_log functions

+

Removed: Currently two non-conforming functions ending in suffix _log.

+

Replacement: Replace multiply_log(...) with lmultiply(...). Replace binomial_coefficient_log(...) with lchoose(...).

+

Removed In: Stan 2.33

+
+
+

get_lp() function

+

Removed: The built-in no-argument function get_lp() is deprecated.

+

Replacement: Use the no-argument function target() instead.

+

Removed In: Stan 2.33

+
+
+

fabs function

+

Removed: The unary function fabs is deprecated.

+

Replacement: Use the unary function abs instead. Note that the return type for abs is different for integer overloads, but this replacement is safe due to Stan’s type promotion rules.

+

Removed In: Stan 2.33

+
+
+

Exponentiated quadratic covariance functions

+

These covariance functions have been replaced by those described in Gaussian Process Covariance Functions

+

With magnitude \(\alpha\) and length scale \(l\), the exponentiated quadratic kernel is:

+

\[ +k(x_i, x_j) = \alpha^2 \exp \left(-\dfrac{1}{2\rho^2} \sum_{d=1}^D (x_{i,d} - x_{j,d})^2 \right) +\]

+ +

+

matrix cov_exp_quad(row_vectors x, real alpha, real rho)
The covariance matrix with an exponentiated quadratic kernel of x.

+Available since 2.16, deprecated since 2.20, removed in in 2.33 + +

+

matrix cov_exp_quad(vectors x, real alpha, real rho)
The covariance matrix with an exponentiated quadratic kernel of x.

+Available since 2.16, deprecated since 2.20, removed in in 2.33 + +

+

matrix cov_exp_quad(array[] real x, real alpha, real rho)
The covariance matrix with an exponentiated quadratic kernel of x.

+Available since 2.16, deprecated since 2.20, removed in in 2.33 + +

+

matrix cov_exp_quad(row_vectors x1, row_vectors x2, real alpha, real rho)
The covariance matrix with an exponentiated quadratic kernel of x1 and x2.

+Available since 2.18, deprecated since 2.20, removed in in 2.33 + +

+

matrix cov_exp_quad(vectors x1, vectors x2, real alpha, real rho)
The covariance matrix with an exponentiated quadratic kernel of x1 and x2.

+Available since 2.18, deprecated since 2.20, removed in in 2.33 + +

+

matrix cov_exp_quad(array[] real x1, array[] real x2, real alpha, real rho)
The covariance matrix with an exponentiated quadratic kernel of x1 and x2.

+Available since 2.18, deprecated since 2.20, removed in in 2.33 +
+
+

Real arguments to logical operators operator&&, operator||, and operator!

+

Removed: A nonzero real number (even NaN) was interpreted as true and a zero was interpreted as false.

+

Replacement: Explicit x != 0 comparison is preferred instead.

+

Removed In: Stan 2.34

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/simplex_distributions.html b/docs/2_39/functions-reference/simplex_distributions.html new file mode 100644 index 000000000..8d7aafd24 --- /dev/null +++ b/docs/2_39/functions-reference/simplex_distributions.html @@ -0,0 +1,1256 @@ + + + + + + + + + +Simplex Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Simplex Distributions

+

The simplex probabilities have support on the unit \(K\)-simplex for a specified \(K\). A \(K\)-dimensional vector \(\theta\) is a unit \(K\)-simplex if \(\theta_k \geq 0\) for \(k \in \{1,\ldots,K\}\) and \(\sum_{k = 1}^K \theta_k = 1\).

+
+

Dirichlet distribution

+
+

Probability density function

+

If \(K \in \mathbb{N}\) and \(\alpha \in (\mathbb{R}^+)^{K}\), then for \(\theta \in \text{$K$-simplex}\),

+

\[\begin{equation*} +\text{Dirichlet}(\theta|\alpha) = +\frac{\Gamma \! \left( \sum_{k=1}^K \alpha_k \right)}{\prod_{k=1}^K \Gamma(\alpha_k)} \prod_{k=1}^K \theta_k^{\alpha_k -1} +\end{equation*}\]

+

Warning: If any of the components of \(\theta\) satisfies \(\theta_i = 0\) or \(\theta_i = 1\), then the probability is \(0\) and the log probability is \(-\infty\). Similarly, the distribution requires strictly positive parameters, with \(\alpha_i > 0\) for each \(i\).

+
+
+

Meaning of Dirichlet parameters

+

A symmetric Dirichlet prior is \([\alpha, \ldots, \alpha]^{\top}\). To code this in Stan,

+
 data {
+   int<lower=1> K;
+   real<lower=0> alpha;
+ }
+ generated quantities {
+   vector[K] theta = dirichlet_rng(rep_vector(alpha, K));
+ }
+

Taking \(K = 10\), here are the first five draws for \(\alpha = 1\). For \(\alpha = 1\), the distribution is uniform over simplexes.

+
 1) 0.17 0.05 0.07 0.17 0.03 0.13 0.03 0.03 0.27 0.05
+ 2) 0.08 0.02 0.12 0.07 0.52 0.01 0.07 0.04 0.01 0.06
+ 3) 0.02 0.03 0.22 0.29 0.17 0.10 0.09 0.00 0.05 0.03
+ 4) 0.04 0.03 0.21 0.13 0.04 0.01 0.10 0.04 0.22 0.18
+ 5) 0.11 0.22 0.02 0.01 0.06 0.18 0.33 0.04 0.01 0.01
+

That does not mean it’s uniform over the marginal probabilities of each element. As the size of the simplex grows, the marginal draws become more and more concentrated below (not around) \(1/K\). When one component of the simplex is large, the others must all be relatively small to compensate. For example, in a uniform distribution on \(10\)-simplexes, the probability that a component is greater than the mean of \(1/10\) is only 39%. Most of the posterior marginal probability mass for each component is in the interval \((0, 0.1)\).

+

When the \(\alpha\) value is small, the draws gravitate to the corners of the simplex. Here are the first five draws for \(\alpha = 0.001\).

+
 1) 3e-203 0e+00 2e-298 9e-106 1e+000 0e+00 0e+000 1e-047 0e+00 4e-279
+ 2) 1e+000 0e+00 5e-279 2e-014 1e-275 0e+00 3e-285 9e-147 0e+00 0e+000
+ 3) 1e-308 0e+00 1e-213 0e+000 0e+000 8e-75 0e+000 1e+000 4e-58 7e-112
+ 4) 6e-166 5e-65 3e-068 3e-147 0e+000 1e+00 3e-249 0e+000 0e+00 0e+000
+ 5) 2e-091 0e+00 0e+000 0e+000 1e-060 0e+00 4e-312 1e+000 0e+00 0e+000
+

Each row denotes a draw. Each draw has a single value that rounds to one and other values that are very close to zero or rounded down to zero.

+

As \(\alpha\) increases, the draws become increasingly uniform. For \(\alpha = 1000\),

+
 1) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10
+ 2) 0.10 0.10 0.09 0.10 0.10 0.10 0.11 0.10 0.10 0.10
+ 3) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10
+ 4) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10
+ 5) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10
+
+
+

Distribution statement

+

theta ~ dirichlet(alpha)

+

Increment target log probability density with dirichlet_lupdf(theta | alpha).

+Available since 2.0 + +

+
+
+

Stan functions

+

The Dirichlet probability functions are overloaded to allow the simplex \(\theta\) and prior counts (plus one) \(\alpha\) to be vectors or row vectors (or to mix the two types). The density functions are also vectorized, so they allow arrays of row vectors or vectors as arguments; see section vectorized function signatures for a description of vectorization.

+ +

+

real dirichlet_lpdf(vectors theta | vectors alpha)
The log of the Dirichlet density for simplex(es) theta given prior counts (plus one) alpha

+Available since 2.12, vectorized in 2.21 + +

+

real dirichlet_lupdf(vectors theta | vectors alpha)
The log of the Dirichlet density for simplex(es) theta given prior counts (plus one) alpha dropping constant additive terms

+Available since 2.25 + +

+

vector dirichlet_rng(vector alpha)
Generate a Dirichlet variate with prior counts (plus one) alpha; may only be used in transformed data and generated quantities blocks

+Available since 2.0 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/sparse_matrix_operations.html b/docs/2_39/functions-reference/sparse_matrix_operations.html new file mode 100644 index 000000000..e7596edd0 --- /dev/null +++ b/docs/2_39/functions-reference/sparse_matrix_operations.html @@ -0,0 +1,1217 @@ + + + + + + + + + +Sparse Matrix Operations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Sparse Matrix Operations

+

For sparse matrices, for which many elements are zero, it is more efficient to use specialized representations to save memory and speed up matrix arithmetic (including derivative calculations). Given Stan’s implementation, there is substantial space (memory) savings by using sparse matrices. Because of the ease of optimizing dense matrix operations, speed improvements only arise at 90% or even greater sparsity; below that level, dense matrices are faster but use more memory.

+

Because of this speedup and space savings, it may even be useful to read in a dense matrix and convert it to a sparse matrix before multiplying it by a vector. This chapter covers a very specific form of sparsity consisting of a sparse matrix multiplied by a dense vector.

+
+

Compressed row storage

+

Sparse matrices are represented in Stan using compressed row storage (CSR). For example, the matrix \[\begin{equation*} +A = \begin{bmatrix} 19 & 27 & 0 & 0 \\ 0 & 0 & 0 & 0 \\ 0 & 0 & 0 & 52 \\ 81 & 0 & 95 & 33 \end{bmatrix} +\end{equation*}\] is translated into a vector of the non-zero real values, read by row from the matrix \(A\), \[\begin{equation*} +w(A) = \begin{bmatrix} 19 & 27 & 52 & 81 & 95 & 33 \end{bmatrix}^{\top} \! \! \! , +\end{equation*}\] an array of integer column indices for the values, \[\begin{equation*} +v(A) = \begin{bmatrix} 1 & 2 & 4 & 1 & 3 & 4 \end{bmatrix} \! , +\end{equation*}\] and an array of integer indices indicating where in \(w(A)\) a given row’s values start, \[\begin{equation*} u(A) = \begin{bmatrix} 1 & 3 & 3 & 4 & 7 \end{bmatrix} \! , +\end{equation*}\] with a padded value at the end to guarantee that \[\begin{equation*} u(A)[n+1] - u(A)[n] \end{equation*}\] is the number of non-zero elements in row \(n\) of the matrix (here \(2\), \(0\), \(1\), and \(3\)). Note that because the second row has no non-zero elements both the second and third elements of \(u(A)\) correspond to the third element of \(w(A)\), which is \(52\). The values \((w(A), \, v(A), \, u(A))\) are sufficient to reconstruct \(A\).

+

The values are structured so that there is a real value and integer column index for each non-zero entry in the array, plus one integer for each row of the matrix, plus one for padding. There is also underlying storage for internal container pointers and sizes. The total memory usage is roughly \(12 K + M\) bytes plus a small constant overhead, which is often considerably fewer bytes than the \(M \times +N\) required to store a dense matrix. Even more importantly, zero values do not introduce derivatives under multiplication or addition, so many storage and evaluation steps are saved when sparse matrices are multiplied.

+
+
+

Conversion functions

+

Conversion functions between dense and sparse matrices are provided.

+
+

Dense to sparse conversion

+

Converting a dense matrix \(m\) to a sparse representation produces a vector \(w\) and two integer arrays, \(u\) and \(v\).

+ +

+

vector csr_extract_w(matrix a)
Return non-zero values in matrix a; see section compressed row storage.

+Available since 2.8 + +

+

array[] int csr_extract_v(matrix a)
Return column indices for values in csr_extract_w(a); see compressed row storage.

+Available since 2.8 + +

+

array[] int csr_extract_u(matrix a)
Return array of row starting indices for entries in csr_extract_w(a) followed by the size of csr_extract_w(a) plus one; see section compressed row storage.

+Available since 2.8 + +

+

tuple(vector, array[] int, array[] int) csr_extract(matrix a)
Return all three components of the CSR representation of the matrix a; see section compressed row storage. This function is equivalent to (csr_extract_w(a), csr_extract_v(a), csr_extract_u(a)).

+Available since 2.33 +
+
+

Sparse to dense conversion

+

To convert a sparse matrix representation to a dense matrix, there is a single function.

+ +

+

matrix csr_to_dense_matrix(int m, int n, vector w, array[] int v, array[] int u)
Return dense \(\text{m} \times \text{n}\) matrix with non-zero matrix entries w, column indices v, and row starting indices u; the vector w and array v must be the same size (corresponding to the total number of nonzero entries in the matrix), array v must have index values bounded by m, array u must have length equal to m + 1 and contain index values bounded by the number of nonzeros (except for the last entry, which must be equal to the number of nonzeros plus one). See section compressed row storage for more details.

+Available since 2.10 +
+
+
+

Sparse matrix arithmetic

+
+

Sparse matrix multiplication

+

The only supported operation is the multiplication of a sparse matrix \(A\) and a dense vector \(b\) to produce a dense vector \(A\,b\). Multiplying a dense row vector \(b\) and a sparse matrix \(A\) can be coded using transposition as \[\begin{equation*} +b \, A = (A^{\top} \, b^{\top})^{\top}, +\end{equation*}\] but care must be taken to represent \(A^{\top}\) rather than \(A\) as a sparse matrix.

+ +

+

vector csr_matrix_times_vector(int m, int n, vector w, array[] int v, array[] int u, vector b)
Multiply the \(\text{m} \times \text{n}\) matrix represented by values w, column indices v, and row start indices u by the vector b; see compressed row storage.

+Available since 2.18 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/transform_functions.html b/docs/2_39/functions-reference/transform_functions.html new file mode 100644 index 000000000..7311ee935 --- /dev/null +++ b/docs/2_39/functions-reference/transform_functions.html @@ -0,0 +1,1469 @@ + + + + + + + + + +Variable Transformation Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Variable Transformation Functions

+

Variable transformation functions provide implementations of the built-in constraining and unconstraining transforms defined in Stan Reference Manual.

+

For each of the built-in variable transforms there are three functions named after the transform with differing suffixes. A _unconstrain function that maps from the constrained space back to free variables (the “transform”), A _constrain function that maps from free variables to constrained variables (the “inverse transform”), and a _jacobian function, which computes the same value as the _constrain function while also incrementing the Jacobian accumulator with the log Jacobian determinant.

+

For this page, variables named y are unconstrained, while variables named x are in the constrained space. The unconstraining functions will reject if their input does not satisfy the declared constraint.

+
+

Transforms for scalars

+

These transformations take unconstrained values on the real number line and either constrain the, to a subset of the real line with a lower bound, upper bound, or both, or provide an affine map that does not constrain values but can help with shifting and scaling them so they are more standardized.

+

The functions are all overloaded to apply to containers elementwise. If the y argument is a container, the others must be either scalars or containers of exactly the same size.

+
+

Lower bounds

+

These functions perform the transform and inverse transform described in the Lower Bounded Scalar section.

+ +

+

reals lower_bound_constrain(reals y, reals lb)
Takes a value y and lower bound lb and returns the corresponding value which is greater than lb (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).

+Available since 2.37 + +

+

reals lower_bound_jacobian(reals y, reals lb)
Takes a value y and lower bound lb and returns the corresponding value which is greater than lb (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

reals lower_bound_unconstrain(reals x, reals lb)
Takes a value x which is greater than lb and returns the corresponding unconstrained value.

+Available since 2.37 +
+
+

Upper bounds

+

These functions perform the transform and inverse transform described in the Upper Bounded Scalar section.

+ +

+

reals upper_bound_constrain(reals y, reals ub)
Takes a value y and upper bound ub and returns the corresponding value which is less than ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).

+Available since 2.37 + +

+

reals upper_bound_jacobian(reals x, reals ub)
Takes a value y and upper bound ub and returns the corresponding value which is less than ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

reals upper_bound_unconstrain(reals x, reals ub)
Takes a value x which is less than ub and returns the corresponding unconstrained value.

+Available since 2.37 +
+
+

Upper and lower bounds

+

These functions perform the transform and inverse transform described in the Lower and Upper Bounded Scalar section.

+ +

+

reals lower_upper_bound_constrain(reals y, reals lb, reals ub)
Takes a value y, lower bound lb, and upper bound ub and returns the corresponding value which is bounded between lb and ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).

+Available since 2.37 + +

+

reals lower_upper_bound_jacobian(reals y, reals lb, reals ub)
Takes a value y, lower bound lb, and upper bound ub and returns the corresponding value which is bounded between lb and ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

reals lower_upper_bound_unconstrain(reals x, reals lb, reals ub)
Takes a value x which is bounded between lb and ub and returns returns the corresponding unconstrained value.

+Available since 2.37 +
+
+

Affine transforms

+

These functions perform the transform and inverse transform described in the Affinely Transformed Scalar section.

+ +

+

reals offset_multiplier_constrain(reals y, reals offset, reals mult)
Takes a value y, shift offset, and scale mult and returns a rescaled and shifted value.

+Available since 2.37 + +

+

reals offset_multiplier_jacobian(reals y, reals offset, reals mult)
Takes a value y, shift offset, and scale mult and returns a rescaled and shifted value.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

reals offset_multiplier_unconstrain(reals x, reals offset, reals mult)
Takes a value x, shift offset, and scale mult and a value which has been un-scaled and un-shifted.

+Available since 2.37 +
+
+
+

Transforms for constrained vectors

+

These functions constrain entire vectors hollistically. Some transforms also change the length of the vector, as noted in the documentation.

+

Where vectors is used, this indicates that either a vector or a (possibly multidimensional) array of vectors may be provided. The array will be processed element by element.

+
+

Ordered vectors

+

These functions perform the transform and inverse transform described in the Ordered Vector section.

+ +

+

vectors ordered_constrain(vectors y)
Takes a free vector y and returns a vector with elements in ascending order.

+Available since 2.37 + +

+

vectors ordered_jacobian(vectors y)
Takes a free vector y and returns a vector with elements in ascending order.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors ordered_unconstrain(vectors x)
Takes an ordered vector x and returns the corresponding free vector.

+Available since 2.37 +
+
+

Positive order vectors

+

These functions perform the transform and inverse transform described in the Positive Ordered Vector section.

+ +

+

vectors positive_ordered_constrain(vectors y)
Takes a free vector y and returns a vector with positive elements in ascending order.

+Available since 2.37 + +

+

vectors positive_ordered_jacobian(vectors y)
Takes a free vector y and returns a vector with positive elements in ascending order.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors positive_ordered_unconstrain(vectors x)
Takes an ordered vector x with positive entries and returns the corresponding free vector.

+Available since 2.37 +
+
+

Simplexes

+

These functions perform the transform and inverse transform described in the Unit Simplex section.

+ +

+

vectors simplex_constrain(vectors y)
Takes a free vector y and returns a simplex (a vector such that each element is between 0 and 1, and the sum of the elements is 1, up to rounding errors).

+

This returned vector will have one extra element compared to the input y.

+Available since 2.37 + +

+

vectors simplex_jacobian(vectors y)
Takes a free vector y and returns a simplex (a vector such that each element is between 0 and 1, and the sum of the elements is 1, up to rounding errors).

+

This returned vector will have one extra element compared to the input y.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors simplex_unconstrain(vectors x)
Takes a simplex x and returns the corresponding free vector.

+

This returned vector will have one fewer elements compared to the input x.

+Available since 2.37 +
+
+

Sum-to-zero vectors

+

These functions perform the transform and inverse transform described in the Zero Sum Vector section.

+ +

+

vectors sum_to_zero_constrain(vectors y)
Takes a free vector y and returns a vector such that the elements sum to 0.

+

This returned vector will have one extra element compared to the input y.

+Available since 2.37 + +

+

vectors sum_to_zero_jacobian(vectors y)
Takes a free vector y and returns a vector such that the elements sum to 0.

+

The returned vector will have one extra element compared to the input y.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors sum_to_zero_unconstrain(vectors x)
Takes a vector x with elements that sum to 0 and returns the corresponding free vector.

+

This returned vector will have one fewer elements compared to the input x.

+Available since 2.37 +
+
+

Unit vectors

+

These functions perform the transform and inverse transform described in the Unit Vector section.

+ +

+

vectors unit_vectors_constrain(vectors y)
Takes a free vector y and returns a vector with unit length, i.e., norm2(unit_vectors_constrain(y)) == 1 for any y that has a positive and finite norm itself (if y does not, the function rejects). Note that, in particular, this implies the function rejects if given a vector of all zeros.

+Available since 2.37 + +

+

vectors unit_vectors_jacobian(vectors y)
Takes a free vector y and returns a vector with unit length. This function rejects if given a vector of all zeros.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors unit_vectors_unconstrain(vectors x)
Takes a vector x of unit length and returns the corresponding free vector.

+Available since 2.37 +
+
+
+

Transforms for constrained matrices

+

Similarly to the above, vectors means a vector or array thereof, and matrices means a matrix or array thereof.

+
+

Cholesky factors of correlation matrices

+

These functions perform the transform and inverse transform described in the Cholesky Factors of Correlation Matrices section.

+ +

+

matrices cholesky_factor_corr_constrain(vectors y, int K)
Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K Cholesky factor of a correlation matrix. This matrix is a Cholesky factor of a covariance matrix (i.e., a lower triangular matrix with a strictly positive diagonal), but with the additional constraint that each row is of unit length.

+Available since 2.37 + +

Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K Cholesky factor of a correlation matrix.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment. matrices cholesky_factor_corr_jacobian(vectors y, int K)

+Available since 2.37 + +

+

vectors cholesky_factor_corr_unconstrain(matrices x)
Takes x, a (\(K \times K\)) matrix which is the Cholesky factor of a correlation matrix (a lower triangular matrix with a strictly positive diagonal and each row having unit length), and returns the corresponding free vector of length $ imes $.

+Available since 2.37 +
+
+

Cholesky factors of covariance matrices

+

These functions perform the transform and inverse transform described in the Cholesky Factors of Covariance Matrices section.

+ +

+

matrices cholesky_factor_cov_constrain(vectors y, int M, int N)
Takes a free vector y and integers M and N and returns the M by N Cholesky factor of a covariance matrix. This matrix is a lower triangular matrix \(L\), with a strictly positive diagonal, such that \(L^T L\) is positive definite.

+

Note that y must have length N + choose(N, 2) + (M - N) * N, and M must be greater than or equal to N.

+Available since 2.37 + +

+

matrices cholesky_factor_cov_jacobian(vectors y, int M, int N)
Takes a free vector y and integers M and N and returns the M by N Cholesky factor of a covariance matrix. This matrix is a lower triangular matrix \(L\), with a strictly positive diagonal, such that \(L^T L\) is positive definite.

+

Note that y must have length N + choose(N, 2) + (M - N) * N, and M must be greater than or equal to N.

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors cholesky_factor_cov_unconstrain(matrices x)
Takes a \(M \times N\) matrix x which is a Cholesky factor of a covariance matrix (a matrix \(L\) such that \(L^T L\) is positive definite) and returns the corresponding free vector of length \(N + \binom{N}{2} + (M - N)N\).

+Available since 2.37 +
+
+

Correlation matrices

+

These functions perform the transform and inverse transform described in the Correlation Matrices section.

+ +

+

matrices corr_matrix_constrain(vectors y, int K)
Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K correlation matrix (a positive definite matrix with a unit diagonal).

+Available since 2.37 + +

+

matrices corr_matrix_jacobian(vectors y, int K)
Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K correlation matrix (a positive definite matrix with a unit diagonal).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors corr_matrix_unconstrain(matrices x)
Takes a \(K \times K\) matrix x which is a correlation matrix (a positive definite matrix with a unit diagonal) and returns the corresponding free vector of size \(\binom{K}{2}\).

+Available since 2.37 +
+
+

Covariance matrices

+

These functions perform the transform and inverse transform described in the Covariance Matrices section.

+ +

+

matrices cov_matrix_constrain(vectors y, int K)
Takes a vector y and integer K, where length(y) == K + choose(K, 2), and returns a K by K covariance matrix (a positive definite matrix).

+Available since 2.37 + +

+

matrices cov_matrix_jacobian(vectors y, int K)
Takes a vector y and integer K, where length(y) == K + choose(K, 2), and returns a K by K covariance matrix (a positive definite matrix).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

vectors cov_matrix_unconstrain(matrices x)
Takes a \(K \times K\) positive definite matrix x and returns the corresponding free vector of size \(K + \binom{K}{2}\).

+Available since 2.37 +
+
+

Column-stochastic matrices

+

These functions perform the transform and inverse transform described in the Stochastic Matrix section for column (left) stochastic matrices.

+ +

+

matrices stochastic_column_constrain(matrices y)
Takes a free matrix y of size \(N \times M\) and returns a left stochastic matrix (a matrix where each column is a simplex) of size \(N+1 \times M\).

+Available since 2.37 + +

+

matrices stochastic_column_jacobian(matrices y)
Takes a free matrix y of size \(N \times M\) and returns a left stochastic matrix (a matrix where each column is a simplex) of size \(N+1 \times M\).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

matrices stochastic_column_unconstrain(matrices x)
Takes a left stochastic matrix x of size \(N+1 \times M\) and returns the corresponding free matrix of size \(N \times M\).

+Available since 2.37 +
+
+

Row-stochastic matrices

+

These functions perform the transform and inverse transform described in the Stochastic Matrix section for row (right) stochastic matrices.

+ +

+

matrices stochastic_row_constrain(matrices y)
Takes a free matrix y of size \(N \times M\) and returns a right stochastic matrix (a matrix where each row is a simplex) of size \(N \times M+1\).

+Available since 2.37 + +

+

matrices stochastic_row_jacobian(matrices y)
Takes a free matrix y of size \(N \times M\) and returns a right stochastic matrix (a matrix where each row is a simplex) of size \(N \times M+1\).

+

This function also increments the Jacobian accumulator with the corresponding change of variables adjustment.

+Available since 2.37 + +

+

matrices stochastic_row_unconstrain(matrices x)
Takes a right stochastic matrix x of size \(N \times M+1\) and returns the corresponding free matrix of size \(N \times M\).

+Available since 2.37 +
+
+

Sum-to-zero matrices

+

The sum-to-zero matrix transforms map between unconstrained values and matrices whose rows and columns sum to zero; full definitions of the function and Jacobian can be found in the sum-to-zero matrix section of the Reference Manual.

+ +

+

matrices sum_to_zero_constrain(matrices y)
The constraining function maps an unconstrained N x M matrix to an (N + 1) x (M + 1) matrix for which the rows and columns all sum to zero. This function covers the incrementation of the log Jacobian because the incrementation is zero.

+

This returned matrix will have one extra row and column compared to the input y.

+Available since 2.37 + +

+

matrices sum_to_zero_jacobian(matrices y)
The constraining function maps an unconstrained N x M matrix to an (N + 1) x (M + 1) matrix for which the rows and columns all sum to zero. Because the log Jacobian incrementation is zero, this is identical to sum_to_zero_constrain.

+

This returned matrix will have one extra row and column compared to the input y.

+Available since 2.37 + +

+

matrices sum_to_zero_unconstrain(matrices x)
This function maps a matrix with rows that sum to zero and columns that sum to zero to an unconstrained matrix with one fewer row and and one fewer column.

+Available since 2.37 + + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/unbounded_continuous_distributions.html b/docs/2_39/functions-reference/unbounded_continuous_distributions.html new file mode 100644 index 000000000..807dfed4c --- /dev/null +++ b/docs/2_39/functions-reference/unbounded_continuous_distributions.html @@ -0,0 +1,1805 @@ + + + + + + + + + +Unbounded Continuous Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Unbounded Continuous Distributions

+

The unbounded univariate continuous probability distributions have support on all real numbers.

+
+

Normal distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in +\mathbb{R}\), \[\begin{equation*} +\text{Normal}(y|\mu,\sigma) = +\frac{1}{\sqrt{2 \pi} \ \sigma} \exp\left( - \, \frac{1}{2} \left(\frac{y - \mu}{\sigma} \right)^2 \right) \!. +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ normal(mu, sigma)

+

Increment target log probability density with normal_lupdf(y | mu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real normal_lpdf(reals y | reals mu, reals sigma)
The log of the normal density of y given location mu and scale sigma

+Available since 2.12 + +

+

real normal_lupdf(reals y | reals mu, reals sigma)
The log of the normal density of y given location mu and scale sigma dropping constant additive terms.

+Available since 2.25 + +

+

real normal_cdf(reals y | reals mu, reals sigma)
The cumulative normal distribution of y given location mu and scale sigma; normal_cdf will underflow to 0 for \(\frac{{y}-{\mu}}{{\sigma}}\) below -37.5 and overflow to 1 for \(\frac{{y}-{\mu}}{{\sigma}}\) above 8.25; the function Phi_approx is more robust in the tails, but must be scaled and translated for anything other than a standard normal.

+Available since 2.0 + +

+

real normal_lcdf(reals y | reals mu, reals sigma)
The log of the cumulative normal distribution of y given location mu and scale sigma; normal_lcdf will underflow to \(-\infty\) for \(\frac{{y}-{\mu}}{{\sigma}}\) below -37.5 and overflow to 0 for \(\frac{{y}-{\mu}}{{\sigma}}\) above 8.25; log(Phi_approx(...)) is more robust in the tails, but must be scaled and translated for anything other than a standard normal.

+Available since 2.12 + +

+

real normal_lccdf(reals y | reals mu, reals sigma)
The log of the complementary cumulative normal distribution of y given location mu and scale sigma; normal_lccdf will overflow to 0 for \(\frac{{y}-{\mu}}{{\sigma}}\) below -37.5 and underflow to \(-\infty\) for \(\frac{{y}-{\mu}}{{\sigma}}\) above 8.25; log1m(Phi_approx(...)) is more robust in the tails, but must be scaled and translated for anything other than a standard normal.

+Available since 2.15 + +

+

R normal_rng(reals mu, reals sigma)
Generate a normal variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+

Standard normal distribution

+

The standard normal distribution is so-called because its parameters are the units for their respective operations—the location (mean) is zero and the scale (standard deviation) one. The standard normal is parameter-free, and the unit parameters allow considerable simplification of the expression for the density. \[\begin{equation*} +\text{StdNormal}(y) \ = \ \text{Normal}(y \mid 0, 1) \ = \ \frac{1}{\sqrt{2 \pi}} \, \exp \left( \frac{-y^2}{2} \right)\!. +\end{equation*}\] Up to a proportion on the log scale, where Stan computes, \[\begin{equation*} +\log \text{Normal}(y \mid 0, 1) \ = \ \frac{-y^2}{2} + \text{const}. +\end{equation*}\] With no logarithm, no subtraction, and no division by a parameter, the standard normal log density is much more efficient to compute than the normal log density with constant location \(0\) and scale \(1\).

+
+
+

Distribution statement

+

y ~ std_normal()

+

Increment target log probability density with std_normal_lupdf(y).

+Available since 2.19 + +

+
+
+

Stan functions

+ +

+

real std_normal_lpdf(reals y)
The standard normal (location zero, scale one) log probability density of y.

+Available since 2.18 + +

+

real std_normal_lupdf(reals y)
The standard normal (location zero, scale one) log probability density of y dropping constant additive terms.

+Available since 2.25 + +

+

real std_normal_cdf(reals y)
The cumulative standard normal distribution of y; std_normal_cdf will underflow to 0 for \(y\) below -37.5 and overflow to 1 for \(y\) above 8.25; the function Phi_approx is more robust in the tails.

+Available since 2.21 + +

+

real std_normal_lcdf(reals y)
The log of the cumulative standard normal distribution of y; std_normal_lcdf will underflow to \(-\infty\) for \(y\) below -37.5 and overflow to 0 for \(y\) above 8.25; log(Phi_approx(...)) is more robust in the tails.

+Available since 2.21 + +

+

real std_normal_lccdf(reals y)
The log of the complementary cumulative standard normal distribution of y; std_normal_lccdf will overflow to 0 for \(y\) below -37.5 and underflow to \(-\infty\) for \(y\) above 8.25; log1m(Phi_approx(...)) is more robust in the tails.

+Available since 2.21 + +

+

R std_normal_qf(T x)
Returns the value of the inverse standard normal cdf \(\Phi^{-1}\) at the specified quantile x. The std_normal_qf is equivalent to the inv_Phi function.

+Available since 2.31 + +

+

R std_normal_log_qf(T x)
Return the value of the inverse standard normal cdf \(\Phi^{-1}\) evaluated at the log of the specified quantile x. This function is equivalent to std_normal_qf(exp(x)) but is more numerically stable.

+Available since 2.31 + +

+

real std_normal_rng()
Generate a normal variate with location zero and scale one; may only be used in transformed data and generated quantities blocks.

+Available since 2.21 +
+
+
+

Normal-id generalized linear model (linear regression)

+

Stan also supplies a single function for a generalized linear model with normal distribution and identity link function, i.e. a function for a linear regression. This provides a more efficient implementation of linear regression than a manually written regression in terms of a normal distribution and matrix multiplication.

+
+

Probability distribution function

+

If \(x\in \mathbb{R}^{n\cdot m}, \alpha \in \mathbb{R}^n, \beta\in +\mathbb{R}^m, \sigma\in \mathbb{R}^+\), then for \(y \in \mathbb{R}^n\), \[\begin{equation*} \text{NormalIdGLM}(y|x, \alpha, \beta, \sigma) = \prod_{1\leq i +\leq n}\text{Normal}(y_i|\alpha_i + x_i\cdot \beta, \sigma). \end{equation*}\]

+
+
+

Distribution statement

+

y ~ normal_id_glm(x, alpha, beta, sigma)

+

Increment target log probability density with normal_id_glm_lupdf(y | x, alpha, beta, sigma).

+Available since 2.19 + +

+
+
+

Stan functions

+ +

+

real normal_id_glm_lpdf(real y | matrix x, real alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.29 + +

+

real normal_id_glm_lupdf(real y | matrix x, real alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.29 + +

+

real normal_id_glm_lpdf(real y | matrix x, vector alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.29 + +

+

real normal_id_glm_lupdf(real y | matrix x, vector alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.29 + +

+

real normal_id_glm_lpdf(real y | matrix x, real alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.23 + +

+

real normal_id_glm_lupdf(real y | matrix x, real alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.25 + +

+

real normal_id_glm_lpdf(real y | matrix x, vector alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.23 + +

+

real normal_id_glm_lupdf(real y | matrix x, vector alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.25 + +

+

real normal_id_glm_lpdf(vector y | row_vector x, real alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.29 + +

+

real normal_id_glm_lupdf(vector y | row_vector x, real alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.29 + +

+

real normal_id_glm_lpdf(vector y | row_vector x, vector alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.29 + +

+

real normal_id_glm_lupdf(vector y | row_vector x, vector alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.29 + +

+

real normal_id_glm_lpdf(vector y | matrix x, real alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.23 + +

+

real normal_id_glm_lupdf(vector y | matrix x, real alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.23 + +

+

real normal_id_glm_lpdf(vector y | matrix x, vector alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.23 + +

+

real normal_id_glm_lupdf(vector y | matrix x, vector alpha, vector beta, real sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.23 + +

+

real normal_id_glm_lpdf(vector y | matrix x, real alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.30 + +

+

real normal_id_glm_lupdf(vector y | matrix x, real alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.30 + +

+

real normal_id_glm_lpdf(vector y | matrix x, vector alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma.

+Available since 2.30 + +

+

real normal_id_glm_lupdf(vector y | matrix x, vector alpha, vector beta, vector sigma)
The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.

+Available since 2.30 +
+
+
+

Exponentially modified normal distribution

+

Exponentially modified Gaussian describes the distribution of \(Z = X + Y\) when \(X\) and \(Y\) are independent and \(X\) is normally distributed (with mean \(\mu\) and standard deviation \(\sigma\)) and \(Y\) is exponentially distributed (with rate \(\lambda\)).

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\), \(\sigma \in \mathbb{R}^+\), and \(\lambda \in +\mathbb{R}^+\), then for \(y \in \mathbb{R}\), \[\begin{equation*} +\text{ExpModNormal}(y|\mu,\sigma,\lambda) = \frac{\lambda}{2} \ \exp +\left(\frac{\lambda}{2} \left(2\mu + \lambda \sigma^2 - +2y\right)\right) \text{erfc}\left(\frac{\mu + \lambda\sigma^2 - +y}{\sqrt{2}\sigma}\right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ exp_mod_normal(mu, sigma, lambda)

+

Increment target log probability density with exp_mod_normal_lupdf(y | mu, sigma, lambda).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real exp_mod_normal_lpdf(reals y | reals mu, reals sigma, reals lambda)
The log of the exponentially modified normal density of y given location mu, scale sigma, and rate lambda

+Available since 2.18 + +

+

real exp_mod_normal_lupdf(reals y | reals mu, reals sigma, reals lambda)
The log of the exponentially modified normal density of y given location mu, scale sigma, and rate lambda dropping constant additive terms

+Available since 2.25 + +

+

real exp_mod_normal_cdf(reals y | reals mu, reals sigma, reals lambda)
The exponentially modified normal cumulative distribution function of y given location mu, scale sigma, and rate lambda

+Available since 2.0 + +

+

real exp_mod_normal_lcdf(reals y | reals mu, reals sigma, reals lambda)
The log of the exponentially modified normal cumulative distribution function of y given location mu, scale sigma, and rate lambda

+Available since 2.18 + +

+

real exp_mod_normal_lccdf(reals y | reals mu, reals sigma, reals lambda)
The log of the exponentially modified normal complementary cumulative distribution function of y given location mu, scale sigma, and rate lambda

+Available since 2.18 + +

+

R exp_mod_normal_rng(reals mu, reals sigma, reals lambda)
Generate a exponentially modified normal variate with location mu, scale sigma, and rate lambda; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Skew normal distribution

+
+

Probability density function

+

If \(\xi \in \mathbb{R}\), \(\omega \in \mathbb{R}^+\), and \(\alpha \in +\mathbb{R}\), then for \(y \in \mathbb{R}\), \[\begin{equation*} \text{SkewNormal}(y \mid +\xi, \omega, \alpha) = \frac{1}{\omega\sqrt{2\pi}} \ \exp\left( - \, +\frac{1}{2} \left( \frac{y - \xi}{\omega} \right)^2 +\right) \ \left(1 + \text{erf}\left( \alpha\left(\frac{y - +\xi}{\omega\sqrt{2}}\right)\right)\right) . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ skew_normal(xi, omega, alpha)

+

Increment target log probability density with skew_normal_lupdf(y | xi, omega, alpha).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real skew_normal_lpdf(reals y | reals xi, reals omega, reals alpha)
The log of the skew normal density of y given location xi, scale omega, and shape alpha

+Available since 2.16 + +

+

real skew_normal_lupdf(reals y | reals xi, reals omega, reals alpha)
The log of the skew normal density of y given location xi, scale omega, and shape alpha dropping constant additive terms

+Available since 2.25 + +

+

real skew_normal_cdf(reals y | reals xi, reals omega, reals alpha)
The skew normal distribution function of y given location xi, scale omega, and shape alpha

+Available since 2.16 + +

+

real skew_normal_lcdf(reals y | reals xi, reals omega, reals alpha)
The log of the skew normal cumulative distribution function of y given location xi, scale omega, and shape alpha

+Available since 2.18 + +

+

real skew_normal_lccdf(reals y | reals xi, reals omega, reals alpha)
The log of the skew normal complementary cumulative distribution function of y given location xi, scale omega, and shape alpha

+Available since 2.18 + +

+

R skew_normal_rng(reals xi, reals omega, real alpha)
Generate a skew normal variate with location xi, scale omega, and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Student-t distribution

+
+

Probability density function

+

If \(\nu \in \mathbb{R}^+\), \(\mu \in \mathbb{R}\), and \(\sigma \in +\mathbb{R}^+\), then for \(y \in \mathbb{R}\), \[\begin{equation*} +\text{StudentT}(y|\nu,\mu,\sigma) = \frac{\Gamma\left((\nu + +1)/2\right)} {\Gamma(\nu/2)} \ \frac{1}{\sqrt{\nu \pi} \ \sigma} +\ \left( 1 + \frac{1}{\nu} \left(\frac{y - \mu}{\sigma}\right)^2 +\right)^{-(\nu + 1)/2} \! . \end{equation*}\]

+
+
+

Distribution statement

+

y ~ student_t(nu, mu, sigma)

+

Increment target log probability density with student_t_lupdf(y | nu, mu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real student_t_lpdf(reals y | reals nu, reals mu, reals sigma)
The log of the Student-\(t\) density of y given degrees of freedom nu, location mu, and scale sigma

+Available since 2.12 + +

+

real student_t_lupdf(reals y | reals nu, reals mu, reals sigma)
The log of the Student-\(t\) density of y given degrees of freedom nu, location mu, and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real student_t_cdf(reals y | reals nu, reals mu, reals sigma)
The Student-\(t\) cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma

+Available since 2.0 + +

+

real student_t_lcdf(reals y | reals nu, reals mu, reals sigma)
The log of the Student-\(t\) cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma

+Available since 2.12 + +

+

real student_t_lccdf(reals y | reals nu, reals mu, reals sigma)
The log of the Student-\(t\) complementary cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma

+Available since 2.12 + +

+

R student_t_rng(reals nu, reals mu, reals sigma)
Generate a Student-\(t\) variate with degrees of freedom nu, location mu, and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Cauchy distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in \mathbb{R}\), \[\begin{equation*} +\text{Cauchy}(y|\mu,\sigma) = \frac{1}{\pi \sigma} \ \frac{1}{1 + \left((y - \mu)/\sigma\right)^2} . +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ cauchy(mu, sigma)

+

Increment target log probability density with cauchy_lupdf(y | mu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real cauchy_lpdf(reals y | reals mu, reals sigma)
The log of the Cauchy density of y given location mu and scale sigma

+Available since 2.12 + +

+

real cauchy_lupdf(reals y | reals mu, reals sigma)
The log of the Cauchy density of y given location mu and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real cauchy_cdf(reals y | reals mu, reals sigma)
The Cauchy cumulative distribution function of y given location mu and scale sigma

+Available since 2.0 + +

+

real cauchy_lcdf(reals y | reals mu, reals sigma)
The log of the Cauchy cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

real cauchy_lccdf(reals y | reals mu, reals sigma)
The log of the Cauchy complementary cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

R cauchy_rng(reals mu, reals sigma)
Generate a Cauchy variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Double exponential (Laplace) distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in +\mathbb{R}\), \[\begin{equation*} \text{DoubleExponential}(y|\mu,\sigma) = +\frac{1}{2\sigma} \exp \left( - \, \frac{|y - \mu|}{\sigma} \right) +. \end{equation*}\] Note that the double exponential distribution is parameterized in terms of the scale, in contrast to the exponential distribution (see section exponential distribution), which is parameterized in terms of inverse scale.

+

The double-exponential distribution can be defined as a compound exponential-normal distribution (Ding and Blitzstein 2018). Using the inverse scale parameterization for the exponential distribution, and the standard deviation parameterization for the normal distribution, one can write \[\begin{equation*} \alpha \sim +\mathsf{Exponential}\left( \frac{1}{2 \sigma^2} \right) \end{equation*}\] and \[\begin{equation*} \beta \mid +\alpha \sim \mathsf{Normal}(\mu, \sqrt{\alpha}), \end{equation*}\] then \[\begin{equation*} \beta \sim +\mathsf{DoubleExponential}(\mu, \sigma ). \end{equation*}\] This may be used to code a non-centered parameterization by taking \[\begin{equation*} \beta^{\text{raw}} \sim +\mathsf{Normal}(0, 1) \end{equation*}\] and defining \[\begin{equation*} \beta = \mu + \sqrt{\alpha} \, +\beta^{\text{raw}}. \end{equation*}\]

+
+
+

Distribution statement

+

y ~ double_exponential(mu, sigma)

+

Increment target log probability density with double_exponential_lupdf(y | mu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real double_exponential_lpdf(reals y | reals mu, reals sigma)
The log of the double exponential density of y given location mu and scale sigma

+Available since 2.12 + +

+

real double_exponential_lupdf(reals y | reals mu, reals sigma)
The log of the double exponential density of y given location mu and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real double_exponential_cdf(reals y | reals mu, reals sigma)
The double exponential cumulative distribution function of y given location mu and scale sigma

+Available since 2.0 + +

+

real double_exponential_lcdf(reals y | reals mu, reals sigma)
The log of the double exponential cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

real double_exponential_lccdf(reals y | reals mu, reals sigma)
The log of the double exponential complementary cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

R double_exponential_rng(reals mu, reals sigma)
Generate a double exponential variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Logistic distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\sigma \in \mathbb{R}^+\), then for \(y \in \mathbb{R}\), \[\begin{equation*} +\text{Logistic}(y|\mu,\sigma) = \frac{1}{\sigma} \ \exp\!\left( - \, \frac{y - \mu}{\sigma} \right) +\ \left(1 + \exp \!\left( - \, \frac{y - \mu}{\sigma} \right) \right)^{\!-2} \! . +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ logistic(mu, sigma)

+

Increment target log probability density with logistic_lupdf(y | mu, sigma).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real logistic_lpdf(reals y | reals mu, reals sigma)
The log of the logistic density of y given location mu and scale sigma

+Available since 2.12 + +

+

real logistic_lupdf(reals y | reals mu, reals sigma)
The log of the logistic density of y given location mu and scale sigma dropping constant additive terms

+Available since 2.25 + +

+

real logistic_cdf(reals y | reals mu, reals sigma)
The logistic cumulative distribution function of y given location mu and scale sigma

+Available since 2.0 + +

+

real logistic_lcdf(reals y | reals mu, reals sigma)
The log of the logistic cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

real logistic_lccdf(reals y | reals mu, reals sigma)
The log of the logistic complementary cumulative distribution function of y given location mu and scale sigma

+Available since 2.12 + +

+

R logistic_rng(reals mu, reals sigma)
Generate a logistic variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Gumbel distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\) and \(\beta \in \mathbb{R}^+\), then for \(y \in \mathbb{R}\), \[\begin{equation*} +\text{Gumbel}(y|\mu,\beta) = \frac{1}{\beta} \ \exp\left(-\frac{y-\mu}{\beta}-\exp\left(-\frac{y-\mu}{\beta}\right)\right) . +\end{equation*}\]

+
+
+

Distribution statement

+

y ~ gumbel(mu, beta)

+

Increment target log probability density with gumbel_lupdf(y | mu, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real gumbel_lpdf(reals y | reals mu, reals beta)
The log of the gumbel density of y given location mu and scale beta

+Available since 2.12 + +

+

real gumbel_lupdf(reals y | reals mu, reals beta)
The log of the gumbel density of y given location mu and scale beta dropping constant additive terms

+Available since 2.25 + +

+

real gumbel_cdf(reals y | reals mu, reals beta)
The gumbel cumulative distribution function of y given location mu and scale beta

+Available since 2.0 + +

+

real gumbel_lcdf(reals y | reals mu, reals beta)
The log of the gumbel cumulative distribution function of y given location mu and scale beta

+Available since 2.12 + +

+

real gumbel_lccdf(reals y | reals mu, reals beta)
The log of the gumbel complementary cumulative distribution function of y given location mu and scale beta

+Available since 2.12 + +

+

R gumbel_rng(reals mu, reals beta)
Generate a gumbel variate with location mu and scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.18 +
+
+
+

Skew double exponential distribution

+
+

Probability density function

+

If \(\mu \in \mathbb{R}\), \(\sigma \in \mathbb{R}^+\) and \(\tau \in [0, 1]\), then for \(y \in \mathbb{R}\), \[\begin{aligned} +& \text{SkewDoubleExponential} (y|\mu,\sigma, \tau) = \\ +& \qquad \qquad \frac{2 \tau (1 - \tau) }{\sigma} \exp \left[ - \frac{2}{\sigma} \left[ \left(1 - \tau \right) I(y < \mu) (\mu - y) + \tau I(y > \mu)(y-\mu) \right] \right] +\end{aligned}\]

+
+
+

Distribution statement

+

y ~ skew_double_exponential(mu, sigma, tau)

+

Increment target log probability density with skew_double_exponential(y | mu, sigma, tau)

+Available since 2.28 + +

+
+
+

Stan functions

+ +

+

real skew_double_exponential_lpdf(reals y | reals mu, reals sigma, reals tau)
The log of the skew double exponential density of y given location mu, scale sigma and skewness tau

+Available since 2.28 + +

+

real skew_double_exponential_lupdf(reals y | reals mu, reals sigma, reals tau)
The log of the skew double exponential density of y given location mu, scale sigma and skewness tau dropping constant additive terms

+Available since 2.28 + +

+

real skew_double_exponential_cdf(reals y | reals mu, reals sigma, reals tau)
The skew double exponential cumulative distribution function of y given location mu, scale sigma and skewness tau

+Available since 2.28 + +

+

real skew_double_exponential_lcdf(reals y | reals mu, reals sigma, reals tau)
The log of the skew double exponential cumulative distribution function of y given location mu, scale sigma and skewness tau

+Available since 2.28 + +

+

real skew_double_exponential_lccdf(reals y | reals mu, reals sigma, reals tau)
The log of the skew double exponential complementary cumulative distribution function of y given location mu, scale sigma and skewness tau

+Available since 2.28 + +

+

R skew_double_exponential_rng(reals mu, reals sigma, reals tau)
Generate a skew double exponential variate with location mu, scale sigma and skewness tau; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.

+Available since 2.28 + + + +
+
+
+ + Back to top

References

+
+Ding, Peng, and Joseph K. Blitzstein. 2018. “On the Gaussian Mixture Representation of the Laplace Distribution.” The American Statistician 72 (2): 172–74. https://doi.org/10.1080/00031305.2017.1291448. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/unbounded_discrete_distributions.html b/docs/2_39/functions-reference/unbounded_discrete_distributions.html new file mode 100644 index 000000000..d06a2783e --- /dev/null +++ b/docs/2_39/functions-reference/unbounded_discrete_distributions.html @@ -0,0 +1,1662 @@ + + + + + + + + + +Unbounded Discrete Distributions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Unbounded Discrete Distributions

+

The unbounded discrete distributions have support over the natural numbers (i.e., the non-negative integers).

+
+

Negative binomial distribution

+

For the negative binomial distribution Stan uses the parameterization described in Gelman et al. (2013). For alternative parameterizations, see section negative binomial glm.

+
+

Probability mass function

+

If \(\alpha \in \mathbb{R}^+\) and \(\beta \in \mathbb{R}^+\), then for \(n +\in \mathbb{N}\), \[\begin{equation*} \text{NegBinomial}(n~|~\alpha,\beta) = \binom{n + +\alpha - 1}{\alpha - 1} \, \left( \frac{\beta}{\beta+1} +\right)^{\!\alpha} \, \left( \frac{1}{\beta + 1} \right)^{\!n} \!. \end{equation*}\]

+

The mean and variance of a random variable \(n \sim +\text{NegBinomial}(\alpha,\beta)\) are given by \[\begin{equation*} \mathbb{E}[n] = +\frac{\alpha}{\beta} \ \ \text{ and } \ \ \text{Var}[n] = +\frac{\alpha}{\beta^2} (\beta + 1). \end{equation*}\]

+
+
+

Distribution statement

+

n ~ neg_binomial(alpha, beta)

+

Increment target log probability density with neg_binomial_lupmf(n | alpha, beta).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real neg_binomial_lpmf(ints n | reals alpha, reals beta)
The log negative binomial probability mass of n given shape alpha and inverse scale beta

+Available since 2.12 + +

+

real neg_binomial_lupmf(ints n | reals alpha, reals beta)
The log negative binomial probability mass of n given shape alpha and inverse scale beta dropping constant additive terms

+Available since 2.25 + +

+

real neg_binomial_cdf(ints n | reals alpha, reals beta)
The negative binomial cumulative distribution function of n given shape alpha and inverse scale beta

+Available since 2.0 + +

+

real neg_binomial_lcdf(ints n | reals alpha, reals beta)
The log of the negative binomial cumulative distribution function of n given shape alpha and inverse scale beta

+Available since 2.12 + +

+

real neg_binomial_lccdf(ints n | reals alpha, reals beta)
The log of the negative binomial complementary cumulative distribution function of n given shape alpha and inverse scale beta

+Available since 2.12 + +

+

R neg_binomial_rng(reals alpha, reals beta)
Generate a negative binomial variate with shape alpha and inverse scale beta; may only be used in transformed data and generated quantities blocks. alpha \(/\) beta must be less than \(2 ^ {29}\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.18 +
+
+
+

Negative binomial distribution (alternative parameterization)

+

Stan also provides an alternative parameterization of the negative binomial distribution directly using a mean (i.e., location) parameter and a parameter that controls overdispersion relative to the square of the mean. Section combinatorial functions, below, provides a second alternative parameterization directly in terms of the log mean.

+
+

Probability mass function

+

The first parameterization is for \(\mu \in \mathbb{R}^+\) and \(\phi \in +\mathbb{R}^+\), which for \(n \in \mathbb{N}\) is defined as \[\begin{equation*} +\text{NegBinomial2}(n \, | \, \mu, \phi) += \binom{n + \phi - 1}{n} \, \left( \frac{\mu}{\mu+\phi} \right)^{\!n} \, \left( \frac{\phi}{\mu+\phi} \right)^{\!\phi} \!. +\end{equation*}\]

+

The mean and variance of a random variable \(n \sim +\text{NegBinomial2}(n~|~\mu,\phi)\) are \[\begin{equation*} +\mathbb{E}[n] = \mu \ \ \ \text{ and } \ \ \ \text{Var}[n] = \mu + \frac{\mu^2}{\phi}. +\end{equation*}\] Recall that \(\text{Poisson}(\mu)\) has variance \(\mu\), so \(\mu^2 / \phi > 0\) is the additional variance of the negative binomial above that of the Poisson with mean \(\mu\). So the inverse of parameter \(\phi\) controls the overdispersion, scaled by the square of the mean, \(\mu^2\).

+
+
+

Distribution statement

+

n ~ neg_binomial_2(mu, phi)

+

Increment target log probability density with neg_binomial_2_lupmf(n | mu, phi).

+Available since 2.3 + +

+
+
+

Stan functions

+ +

+

real neg_binomial_2_lpmf(ints n | reals mu, reals phi)
The log negative binomial probability mass of n given location mu and precision phi.

+Available since 2.20 + +

+

real neg_binomial_2_lupmf(ints n | reals mu, reals phi)
The log negative binomial probability mass of n given location mu and precision phi dropping constant additive terms.

+Available since 2.25 + +

+

real neg_binomial_2_cdf(ints n | reals mu, reals phi)
The negative binomial cumulative distribution function of n given location mu and precision phi.

+Available since 2.6 + +

+

real neg_binomial_2_lcdf(ints n | reals mu, reals phi)
The log of the negative binomial cumulative distribution function of n given location mu and precision phi.

+Available since 2.12 + +

+

real neg_binomial_2_lccdf(ints n | reals mu, reals phi)
The log of the negative binomial complementary cumulative distribution function of n given location mu and precision phi.

+Available since 2.12 + +

+

R neg_binomial_2_rng(reals mu, reals phi)
Generate a negative binomial variate with location mu and precision phi; may only be used in transformed data and generated quantities blocks. mu must be less than \(2 ^ {29}\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.18 +
+
+
+

Negative binomial distribution (log alternative parameterization)

+

Related to the parameterization in section negative binomial, alternative parameterization, the following parameterization uses a log mean parameter \(\eta = \log(\mu)\), defined for \(\eta \in \mathbb{R}\), \(\phi \in \mathbb{R}^+\), so that for \(n \in +\mathbb{N}\), \[\begin{equation*} \text{NegBinomial2Log}(n \, | \, \eta, \phi) = +\text{NegBinomial2}(n | \exp(\eta), \phi). \end{equation*}\] This alternative may be used for sampling, as a function, and for random number generation, but as of yet, there are no CDFs implemented for it. This is especially useful for log-linear negative binomial regressions.

+
+

Distribution statement

+

n ~ neg_binomial_2_log(eta, phi)

+

Increment target log probability density with neg_binomial_2_log_lupmf(n | eta, phi).

+Available since 2.3 + +

+
+
+

Stan functions

+ +

+

real neg_binomial_2_log_lpmf(ints n | reals eta, reals phi)
The log negative binomial probability mass of n given log-location eta and inverse overdispersion parameter phi.

+Available since 2.20 + +

+

real neg_binomial_2_log_lupmf(ints n | reals eta, reals phi)
The log negative binomial probability mass of n given log-location eta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 + +

+

R neg_binomial_2_log_rng(reals eta, reals phi)
Generate a negative binomial variate with log-location eta and inverse overdispersion control phi; may only be used in transformed data and generated quantities blocks. eta must be less than \(29 \log 2\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.18 +
+
+
+

Negative-binomial-2-log generalized linear model (negative binomial regression)

+

Stan also supplies a single function for a generalized linear model with negative binomial distribution and log link function, i.e. a function for a negative binomial regression. This provides a more efficient implementation of negative binomial regression than a manually written regression in terms of a negative binomial distribution and matrix multiplication.

+
+

Probability mass function

+

If \(x\in \mathbb{R}^{n\cdot m}, \alpha \in \mathbb{R}^n, \beta\in +\mathbb{R}^m, \phi\in \mathbb{R}^+\), then for \(y \in \mathbb{N}^n\), \[\begin{equation*} +\text{NegBinomial2LogGLM}(y~|~x, \alpha, \beta, \phi) = \prod_{1\leq i +\leq n}\text{NegBinomial2}(y_i~|~\exp(\alpha_i + x_i\cdot \beta), +\phi). \end{equation*}\]

+
+
+

Distribution statement

+

y ~ neg_binomial_2_log_glm(x, alpha, beta, phi)

+

Increment target log probability density with neg_binomial_2_log_glm_lupmf(y | x, alpha, beta, phi).

+Available since 2.19 + +

+
+
+

Stan functions

+ +

+

real neg_binomial_2_log_glm_lpmf(int y | matrix x, real alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.

+Available since 2.23 + +

+

real neg_binomial_2_log_glm_lupmf(int y | matrix x, real alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 + +

+

real neg_binomial_2_log_glm_lpmf(int y | matrix x, vector alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.

+Available since 2.23 + +

+

real neg_binomial_2_log_glm_lupmf(int y | matrix x, vector alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 + +

+

real neg_binomial_2_log_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.

+Available since 2.23 + +

+

real neg_binomial_2_log_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 + +

+

real neg_binomial_2_log_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.

+Available since 2.23 + +

+

real neg_binomial_2_log_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 + +

+

real neg_binomial_2_log_glm_lpmf(array[] int y | matrix x, real alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.

+Available since 2.18 + +

+

real neg_binomial_2_log_glm_lupmf(array[] int y | matrix x, real alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 + +

+

real neg_binomial_2_log_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.

+Available since 2.18 + +

+

real neg_binomial_2_log_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta, real phi)
The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.

+Available since 2.25 +
+
+
+

Poisson distribution

+
+

Probability mass function

+

If \(\lambda \in \mathbb{R}^+\), then for \(n \in \mathbb{N}\), \[\begin{equation*} +\text{Poisson}(n|\lambda) = \frac{1}{n!} \, \lambda^n \, +\exp(-\lambda). \end{equation*}\]

+
+
+

Distribution statement

+

n ~ poisson(lambda)

+

Increment target log probability density with poisson_lupmf(n | lambda).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real poisson_lpmf(ints n | reals lambda)
The log Poisson probability mass of n given rate lambda

+Available since 2.12 + +

+

real poisson_lupmf(ints n | reals lambda)
The log Poisson probability mass of n given rate lambda dropping constant additive terms

+Available since 2.25 + +

+

real poisson_cdf(ints n | reals lambda)
The Poisson cumulative distribution function of n given rate lambda

+Available since 2.0 + +

+

real poisson_lcdf(ints n | reals lambda)
The log of the Poisson cumulative distribution function of n given rate lambda

+Available since 2.12 + +

+

real poisson_lccdf(ints n | reals lambda)
The log of the Poisson complementary cumulative distribution function of n given rate lambda

+Available since 2.12 + +

+

R poisson_rng(reals lambda)
Generate a Poisson variate with rate lambda; may only be used in transformed data and generated quantities blocks. lambda must be less than \(2^{30}\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.18 +
+
+
+

Poisson distribution, log parameterization

+

Stan also provides a parameterization of the Poisson using the log rate \(\alpha = \log \lambda\) as a parameter. This is useful for log-linear Poisson regressions so that the predictor does not need to be exponentiated and passed into the standard Poisson probability function.

+
+

Probability mass function

+

If \(\alpha \in \mathbb{R}\), then for \(n \in \mathbb{N}\), \[\begin{equation*} +\text{PoissonLog}(n|\alpha) = \frac{1}{n!} \, \exp \left(n\alpha - +\exp(\alpha) \right). \end{equation*}\]

+
+
+

Distribution statement

+

n ~ poisson_log(alpha)

+

Increment target log probability density with poisson_log_lupmf(n | alpha).

+Available since 2.0 + +

+
+
+

Stan functions

+ +

+

real poisson_log_lpmf(ints n | reals alpha)
The log Poisson probability mass of n given log rate alpha

+Available since 2.12 + +

+

real poisson_log_lupmf(ints n | reals alpha)
The log Poisson probability mass of n given log rate alpha dropping constant additive terms

+Available since 2.25 + +

+

R poisson_log_rng(reals alpha)
Generate a Poisson variate with log rate alpha; may only be used in transformed data and generated quantities blocks. alpha must be less than \(30 \log 2\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.18 +
+
+
+

Poisson-log generalized linear model (Poisson regression)

+

Stan also supplies a single function for a generalized linear model with Poisson distribution and log link function, i.e. a function for a Poisson regression. This provides a more efficient implementation of Poisson regression than a manually written regression in terms of a Poisson distribution and matrix multiplication.

+
+

Probability mass function

+

If \(x\in \mathbb{R}^{n\cdot m}, \alpha \in \mathbb{R}^n, \beta\in +\mathbb{R}^m\), then for \(y \in \mathbb{N}^n\), \[\begin{equation*} +\text{PoissonLogGLM}(y|x, \alpha, \beta) = \prod_{1\leq i \leq +n}\text{Poisson}(y_i|\exp(\alpha_i + x_i\cdot \beta)). \end{equation*}\]

+
+
+

Distribution statement

+

y ~ poisson_log_glm(x, alpha, beta)

+

Increment target log probability density with poisson_log_glm_lupmf(y | x, alpha, beta).

+Available since 2.19 + +

+
+
+

Stan functions

+ +

+

real poisson_log_glm_lpmf(int y | matrix x, real alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta.

+Available since 2.23 + +

+

real poisson_log_glm_lupmf(int y | matrix x, real alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real poisson_log_glm_lpmf(int y | matrix x, vector alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta.

+Available since 2.23 + +

+

real poisson_log_glm_lupmf(int y | matrix x, vector alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real poisson_log_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta.

+Available since 2.23 + +

+

real poisson_log_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real poisson_log_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta.

+Available since 2.23 + +

+

real poisson_log_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real poisson_log_glm_lpmf(array[] int y | matrix x, real alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta.

+Available since 2.18 + +

+

real poisson_log_glm_lupmf(array[] int y | matrix x, real alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.

+Available since 2.25 + +

+

real poisson_log_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta.

+Available since 2.18 + +

+

real poisson_log_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta)
The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.

+Available since 2.25 +
+
+
+

Beta negative binomial distribution

+
+

Probability mass function

+

If \(r \in \mathbb{R}^+\), \(\alpha \in \mathbb{R}^+\), and \(\beta \in \mathbb{R}^+\), then for \(n \in \mathbb{N}\), \[\begin{equation*} +\text{BetaNegBinomial}(n|r,\alpha,\beta) = \frac {\Gamma (n+r )}{n!\;\Gamma (r )} +\frac {\mathrm {B} (\beta+n,\alpha +r )}{\mathrm {B} (\beta,\alpha )}. \end{equation*}\]

+
+
+

Distribution statement

+

n ~ beta_neg_binomial(r,alpha,beta)

+

Increment target log probability density with beta_neg_binomial_lupmf(n | r, alpha, beta).

+Available since 2.36 + +

+
+
+

Stan functions

+ +

+

real beta_neg_binomial_lpmf(ints n | reals r, reals alpha, reals beta)
The log beta negative binomial probability mass of n given parameters r, alpha and beta.

+Available since 2.36 + +

+

real beta_neg_binomial_lupmf(ints n | reals r, reals alpha, reals beta)
The log beta negative binomial probability mass of n given parameters r, alpha and beta dropping constant additive terms.

+Available since 2.36 + +

+

real beta_neg_binomial_cdf(ints n | reals r, reals alpha, reals beta)
The beta negative binomial cumulative distribution function of n given parameters r, alpha and beta.

+Available since 2.36 + +

+

real beta_neg_binomial_lcdf(ints n | reals r, reals alpha, reals beta)
The log of the beta negative binomial cumulative distribution function of n given parameters r, alpha and beta.

+Available since 2.36 + +

+

real beta_neg_binomial_lccdf(ints n | reals r, reals alpha, reals beta)
The log of the beta negative binomial complementary cumulative distribution function of n given parameters r, alpha and beta.

+Available since 2.36 + +

+

R beta_neg_binomial_rng(reals r, reals alpha, reals beta)
Generate a beta negative binomial variate with parameters r, alpha and beta; may only be used in transformed data and generated quantities blocks. r \(\cdot\) beta \(/\) (alpha\(-1\)) must be less than \(2 ^ {29}\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.36 +
+
+
+

Yule Simon distribution

+
+

Probability mass function

+

If \(\alpha \in \mathbb{R}^+\), then for \(n \in \mathbb{N}^+=\{1,2,...\}\), \[\begin{equation*} +\text{YuleSimon}(n|\alpha) = \alpha \, \mathrm{B}(\alpha + 1, n) = \alpha \, \frac{\Gamma(n) \, \Gamma(\alpha + 1)}{\Gamma(n + \alpha + 1)}. +\end{equation*}\]

+
+
+

Distribution statement

+

n ~ yule_simon(alpha)

+

Increment target log probability density with yule_simon_lupmf(n | alpha).

+Available since 2.39 + +

+
+
+

Stan functions

+ +

+

real yule_simon_lpmf(ints n | reals alpha)
The log Yule Simon probability mass of n given parameter alpha.

+Available since 2.39 + +

+

real yule_simon_lupmf(ints n | reals alpha)
The log Yule Simon probability mass of n given parameter alpha dropping constant additive terms.

+Available since 2.39 + +

+

real yule_simon_cdf(ints n | reals alpha)
The Yule Simon cumulative distribution function of n given parameter alpha.

+Available since 2.39 + +

+

real yule_simon_lcdf(ints n | reals alpha)
The log of the Yule Simon cumulative distribution function of n given parameter alpha.

+Available since 2.39 + +

+

real yule_simon_lccdf(ints n | reals alpha)
The log of the Yule Simon complementary cumulative distribution function of n given parameter alpha.

+Available since 2.39 + +

+

R yule_simon_rng(reals alpha)
Generate a Yule Simon variate with parameter alpha; may only be used in transformed data and generated quantities blocks. alpha \(/\) (alpha\(-1\)) must be less than \(2 ^ {29}\). For a description of argument and return types, see section vectorized function signatures.

+Available since 2.39 + + + +
+
+
+ + Back to top

References

+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/functions-reference/void_functions.html b/docs/2_39/functions-reference/void_functions.html new file mode 100644 index 000000000..3cf06665a --- /dev/null +++ b/docs/2_39/functions-reference/void_functions.html @@ -0,0 +1,1148 @@ + + + + + + + + + +Void Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Void Functions

+

Stan supports a few special statements for printing or for signaling an issue with the program.

+

Although print, reject, and fatal_error appear to have the syntax of functions, they are actually special kinds of statements with slightly different form and behavior than other functions. First, they are the constructs that allow a variable number of arguments. Second, they are the the only constructs to accept string literals (e.g., "hello world") as arguments. Third, they have no effect on the log density function and operate solely through side effects.

+

The special keyword void is used for their return type because they behave like variadic functions with void return type, even though they are special kinds of statements built in to the language.

+ +
+

Reject statement

+

The reject statement has the same syntax as the print statement, accepting an arbitrary number of arguments of any type (including string literals). The effect of executing a reject statement is to throw an exception internally that terminates the current iteration with a rejection (the behavior of which will depend on the algorithmic context in which it occurs).

+ +

+

void reject(T1 x1,..., TN xN)
Reject the current iteration and print the values denoted by the arguments x1 through xN on the output message stream. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).

+Available since 2.18 +
+
+

Fatal error statement

+

The fatal error statement has the same syntax as the print and reject statements, accepting an arbitrary number of arguments of any type (including string literals). The effect of executing a fatal_error statement is to throw an exception internally that terminates the algorithm completely. It can be viewed as an unrecoverable version of reject, and as such should be used only when exiting the algorithm is the only option.

+ +

+

void fatal_error(T1 x1,..., TN xN)
Print the values denoted by the arguments x1 through xN on the output message stream and then exit the currently running algorithm. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).

+Available since 2.35 + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/img/logo_tm.png b/docs/2_39/img/logo_tm.png new file mode 100644 index 000000000..48c9769c7 Binary files /dev/null and b/docs/2_39/img/logo_tm.png differ diff --git a/docs/2_39/img/ulam-fermiac.png b/docs/2_39/img/ulam-fermiac.png new file mode 100644 index 000000000..6eecc6afe Binary files /dev/null and b/docs/2_39/img/ulam-fermiac.png differ diff --git a/docs/2_39/index.html b/docs/2_39/index.html new file mode 100644 index 000000000..5ba34cc12 --- /dev/null +++ b/docs/2_39/index.html @@ -0,0 +1,1129 @@ + + + + + + + + + + +Stan Documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ +
+
+

Stan Documentation

+

Version 2.39

+
+ + + +
+ +
+
Author
+
+

Stan Development Team

+
+
+ + + +
+ + + +
+ + +

+

This is the official documentation for Stan.

+ +

There are also separate installation and getting started guides for CmdStan (pdf), the command-line interface to the Stan inference engine, and the R, Python, and Julia interfaces.

+
+

Older Versions

+

This documentation is for Stan 2.39. Older versions of each of the documents linked above can be found in the table below:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
VersionStan Reference ManualStan Users GuideCmdStan GuideStan Functions Reference
2.39html pdfhtml pdfhtml pdfhtml pdf
2.38html pdfhtml pdfhtml pdfhtml pdf
2.37html pdfhtml pdfhtml pdfhtml pdf
2.36html pdfhtml pdfhtml pdfhtml pdf
2.35html pdfhtml pdfhtml pdfhtml pdf
2.34html pdfhtml pdfhtml pdfhtml pdf
2.33html pdfhtml pdfhtml pdfhtml pdf
2.32html pdfhtml pdfhtml pdfhtml pdf
2.31html pdfhtml pdfhtml pdfhtml pdf
2.30html pdfhtml pdfhtml pdfhtml pdf
2.29html pdfhtml pdfhtml pdfhtml pdf
2.28html pdfhtml pdfhtml pdfhtml pdf
2.27html pdfhtml pdfhtml pdfhtml pdf
2.26html pdfhtml pdfhtml pdfhtml pdf
2.25html pdfhtml pdfhtml pdfhtml pdf
2.24html pdfhtml pdfhtml pdfhtml pdf
2.23html pdfhtml pdfhtmlhtml pdf
2.22html pdfhtml pdfhtml pdf
2.21html pdfhtml pdfhtml pdf
2.20html pdfhtml pdfhtml pdf
2.19html pdfhtml pdfhtml pdf
2.18html pdfhtml pdfhtml pdf
+

Prior to version 2.18, all documentation was part of a single document called the Stan User’s Guide and Reference Manual. These versions are still available for download as PDFs:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
VersionStan User’s Guide and Reference Manual
2.17pdf
2.16pdf
2.15pdf
2.14pdf
2.13pdf
2.12pdf
2.11pdf
2.10pdf
2.9pdf
2.8pdf
2.7pdf
2.6pdf
2.5pdf
2.4pdf
2.3pdf
2.2pdf
2.1pdf
2.0pdf
1.3pdf
1.2pdf
1.1pdf
1.0pdf
+ +
+

Licensing

+ + + +
+
+ + Back to top
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/quarto-config/quarto_styles.css b/docs/2_39/quarto-config/quarto_styles.css new file mode 100644 index 000000000..c796b6023 --- /dev/null +++ b/docs/2_39/quarto-config/quarto_styles.css @@ -0,0 +1,299 @@ +/* quarto website styling */ + +@media (min-width: 1020px) { + .navbar-brand-container { + margin-right: 1em; + } +} + +@media (max-width: 1060px) and (min-width: 991.98px) { + #navbarCollapse ul:last-of-type a.nav-link { + padding-left: 0.25em; + padding-right: 0.25em; + } + + .navbar #quarto-search { + margin-left: 0.1em; + } +} + +/* navbar sizing -- important to avoid odd-colored gaps */ + +.navbar-logo { + max-height: 44px; +} + +/* Remove top and bottom padding from navbar image */ +.navbar-brand { + padding-top: 0; + padding-bottom: 0; +} + +html { + scroll-padding-top: 61px; /* height of header */ +} + +/* controls what padding is on page-load before JS overwrites */ +body.nav-fixed { + padding-top: 61px; +} + +.btn-action-primary { + color: var(--stan-bg); + background-color: var(--stan-secondary) !important; +} + +.btn-action-primary:hover { + color: white; +} + +.btn-action { + min-width: 165px; + border-radius: 30px; + border: none; +} + +/* stan-dev custom styling */ + +a { + color: var(--stan-secondary); +} +a.nav-link.active { + color: var(--stan-secondary); +} +a.sidebar-item-text.sidebar-link.active { + color: var(--stan-secondary); +} +.sidebar.a.active { + color: var(--stan-secondary); +} + +div [data-bs-target^="#quarto-sidebar-section"] { + font-weight: bold; + font-style: italic; + color: var(--stan-secondary); +} + +code { + white-space: inherit; + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +code:not(.sourceCode) { + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +p code { + white-space: inherit; + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +p code:not(.sourceCode) { + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +li code:not(.sourceCode) { + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +td code:not(.sourceCode) { + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +pre { + word-break: normal; + word-wrap: normal; + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} +pre code { + white-space: inherit; + margin: 0; + padding: 0; + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +pre.sourceCode { + white-space: inherit; + margin: 0; + padding: 0; + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +#quarto-appendix { + color: var(--bs-dark-warm); + background-color: var(--stan-bg); +} + +.align-equations { + text-align: left; + margin-left: 2em; /* Adjust as needed */ +} + +.book .book-body .page-wrapper .page-inner section.normal pre { + font-family: "Lucida Console", Monaco, Menlo, monospaced; + margin-left: 2em; + margin-below: 0.6em; + margin-above: -0.3em; + padding: 0; +} + +.sidebar.sidebar-navigation > * { + padding-top: 0em; +} + +#quarto-sidebar + > div.sidebar-menu-container + > ul + > li:nth-child(1) + > div + > a + > span { + font-weight: bold; + font-style: italic; + font-size: 1.3em; + color: var(--stan-secondary); + margin-above: -20em; + padding: 0; +} + + +/* custom CSS for the install page selection */ + +.grid-container { + display: grid; + grid-template-columns: auto repeat(4, 1fr); + gap: 0px; + width: 80%; +} +.option { + border: 1px solid var(--stan-dark); + display: flex; + justify-content: center; + align-items: center; + cursor: pointer; +} +.option.selected { + background-color: var(--stan-highlight); + color: var(--stan-dark); +} +.option.disabled { + cursor: not-allowed; + color: var(--stan-highlight); +} +.grid-item { + padding: 10px; + border: 1px solid var(--stan-dark); + color: var(--stan-dark); +} + +.flex-grid { + display: flex; + flex-wrap: wrap; + overflow: hidden; + margin-left: 1px; +} + +.flex-grid .col { + flex: 1 1 100px; + margin-top: 0.25rem; +} + +/* when we think we're all on one line, remove the double bars */ +@container (min-width: 500px) { + .grid-item + .grid-item { + border-left: none; + } +} +/* otherwise, put a little space */ +@container (max-width: 500px) { + .flex-grid { + gap: 0.25rem; + } + + .flex-grid .col { + margin-top: 0; + } +} + +.row-label { + padding: 10px; + color: var(--stan-dark); + background-color: var(--stan-bg); + font-weight: bold; + text-align: left; + width: 20%; +} + +/* on small screens */ +@container (max-width: 700px) { + /* Move the label above, rather than to the side */ + .row-label { + flex-basis: 100%; + } +} + +.highlight-box { + background-color: var(--stan-highlight); + padding-left: 10px; +} + +#install-grid, +#prerequsite-box, +#install-box { + container-type: inline-size; +} + +/* Table Styles */ +table { + width: 100%; + border-collapse: collapse; + border-top: 1px solid var(--stan-dark); /* Adds top border to the table */ + border-bottom: 1px solid var(--stan-dark); /* Adds bottom border to the table */ +} + +th, +td { + border: none; /* Removes borders from table cells */ + padding: 0.5em; /* Adjusts padding as needed */ +} + +thead th { + background-color: var(--stan-bg); + color: var(--stan-dark); +} + +tbody td:first-child { + background-color: var(--stan-bg); + color: var(--stan-dark); +} + +/* functions reference */ +.index-container { + display: flex; + flex-flow: row nowrap; + align-items: flex-end; + justify-content: space-between; +} + +a.unlink { + color: inherit; + text-decoration: none; +} + +span.detail { + font-size: small; + color: var(--quarto-text-muted); +} + +strong a.unlink code { + font-weight: bolder; +} diff --git a/docs/2_39/reference-manual-2_39.pdf b/docs/2_39/reference-manual-2_39.pdf new file mode 100644 index 000000000..d36bfff9d Binary files /dev/null and b/docs/2_39/reference-manual-2_39.pdf differ diff --git a/docs/2_39/reference-manual/analysis.html b/docs/2_39/reference-manual/analysis.html new file mode 100644 index 000000000..70de8e8bb --- /dev/null +++ b/docs/2_39/reference-manual/analysis.html @@ -0,0 +1,1359 @@ + + + + + + + + + +Posterior Analysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Posterior Analysis

+

Stan uses Markov chain Monte Carlo (MCMC) techniques to generate draws from the posterior distribution for full Bayesian inference. Markov chain Monte Carlo (MCMC) methods were developed for situations in which it is not straightforward to make independent draws Metropolis et al. (1953).

+

Stan’s variational inference algorithm provides draws from the variational approximation to the posterior which may be analyzed just as any other MCMC output, despite the fact that it is not actually a Markov chain.

+

Stan’s Laplace algorithm produces a sample from a normal approximation centered at the mode of a distribution in the unconstrained space. If the mode is a maximum a posteriori (MAP) estimate, the sample provides an estimate of the mean and standard deviation of the posterior distribution. If the mode is a maximum likelihood estimate (MLE), the sample provides an estimate of the standard error of the likelihood.

+
+

Markov chains

+

A Markov chain is a sequence of random variables \(\theta^{(1)}, +\theta^{(2)},\ldots\) where each variable is conditionally independent of all other variables given the value of the previous value. Thus if \(\theta = \theta^{(1)}, \theta^{(2)},\ldots, \theta^{(N)}\), then

+

\[ +p(\theta) = p(\theta^{(1)}) \prod_{n=2}^N p(\theta^{(n)}|\theta^{(n-1)}). +\]

+

Stan uses Hamiltonian Monte Carlo to generate a next state in a manner described in the Hamiltonian Monte Carlo chapter.

+

The Markov chains Stan and other MCMC samplers generate are ergodic in the sense required by the Markov chain central limit theorem, meaning roughly that there is a reasonable chance of reaching one value of \(\theta\) from another. The Markov chains are also stationary, meaning that the transition probabilities do not change at different positions in the chain, so that for \(n, n' \geq 0\), the probability function \(p(\theta^{(n+1)}|\theta^{(n)})\) is the same as \(p(\theta^{(n'+1)}|\theta^{(n')})\) (following the convention of overloading random and bound variables and picking out a probability function by its arguments).

+

Stationary Markov chains have an equilibrium distribution on states in which each has the same marginal probability function, so that \(p(\theta^{(n)})\) is the same probability function as \(p(\theta^{(n+1)})\). In Stan, this equilibrium distribution \(p(\theta^{(n)})\) is the target density \(p(\theta)\) defined by a Stan program, which is typically a proper Bayesian posterior density \(p(\theta | y)\) defined on the log scale up to a constant.

+

Using MCMC methods introduces two difficulties that are not faced by independent sample Monte Carlo methods. The first problem is determining when a randomly initialized Markov chain has converged to its equilibrium distribution. The second problem is that the draws from a Markov chain may be correlated or even anti-correlated, and thus the central limit theorem’s bound on estimation error no longer applies. These problems are addressed in the next two sections.

+

Stan’s posterior analysis tools compute a number of summary statistics, estimates, and diagnostics for Markov chain Monte Carlo (MCMC) sample. Stan’s estimators and diagnostics are more robust in the face of non-convergence, antithetical sampling, and long-term Markov chain correlations than most of the other tools available. The algorithms Stan uses to achieve this are described in this chapter.

+
+
+

Convergence

+

By definition, a Markov chain samples from the target distribution only after it has converged to equilibrium (i.e., equilibrium is defined as being achieved when \(p(\theta^{(n)})\) is the target density). The following point cannot be expressed strongly enough:

+
    +
  • In theory, convergence is only guaranteed asymptotically as the number of draws grows without bound.

  • +
  • In practice, diagnostics must be applied to monitor convergence for the finite number of draws actually available.

  • +
+
+
+

Notation for samples, chains, and draws

+

To establish basic notation, suppose a target Bayesian posterior density \(p(\theta | y)\) given real-valued vectors of parameters \(\theta\) and real- and discrete-valued data \(y\).1

+

An MCMC sample consists of a set of a sequence of \(M\) Markov chains, each consisting of an ordered sequence of \(N\) draws from the posterior.2 The sample thus consists of \(M \times N\) draws from the posterior.

+
+

Potential scale reduction

+

One way to monitor whether a chain has converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. This is the motivation for the Gelman and Rubin (1992) potential scale reduction statistic, \(\hat{R}\). The \(\hat{R}\) statistic measures the ratio of the average variance of drawss within each chain to the variance of the pooled draws across chains; if all chains are at equilibrium, these will be the same and \(\hat{R}\) will be one. If the chains have not converged to a common distribution, the \(\hat{R}\) statistic will be greater than one.

+

Gelman and Rubin’s recommendation is that the independent Markov chains be initialized with diffuse starting values for the parameters and sampled until all values for \(\hat{R}\) are below some threshold. Vehtari et al. (2021) suggest in general to use a threshold \(1.01\), but othe thresholds can be used depending on the use case. Stan allows users to specify initial values for parameters and it is also able to draw diffuse random initializations automatically satisfying the declared parameter constraints.

+

The \(\hat{R}\) statistic is defined for a set of \(M\) Markov chains, \(\theta_m\), each of which has \(N\) draws \(\theta^{(n)}_m\). The between-chain variance estimate is

+

\[ +B += +\frac{N}{M-1} +\, +\sum_{m=1}^M (\bar{\theta}^{(\bullet)}_{m} + - \bar{\theta}^{(\bullet)}_{\bullet})^2, +\]

+

where

+

\[ +\bar{\theta}_m^{(\bullet)} += \frac{1}{N} \sum_{n = 1}^N \theta_m^{(n)} +\]

+

and

+

\[ +\bar{\theta}^{(\bullet)}_{\bullet} += \frac{1}{M} \, \sum_{m=1}^M \bar{\theta}_m^{(\bullet)}. +\]

+

The within-chain variance is averaged over the chains,

+

\[ +W = \frac{1}{M} \, \sum_{m=1}^M s_m^2, +\]

+

where

+

\[ +s_m^2 += +\frac{1}{N-1} +\, \sum_{n=1}^N (\theta^{(n)}_m - \bar{\theta}^{(\bullet)}_m)^2. +\]

+

The variance estimator is a mixture of the within-chain and cross-chain sample variances,

+

\[ +\widehat{\mbox{var}}^{+}\!(\theta|y) += \frac{N-1}{N}\, W \, + \, \frac{1}{N} \, B. +\]

+

Finally, the potential scale reduction statistic is defined by

+

\[ +\hat{R} +\, = \, +\sqrt{\frac{\widehat{\mbox{var}}^{+}\!(\theta|y)}{W}}. +\]

+
+
+

Split R-hat for detecting non-stationarity

+

Before Stan calculating the potential-scale-reduction statistic \(\hat{R}\), each chain is split into two halves. This provides an additional means to detect non-stationarity in the individual chains. If one chain involves gradually increasing values and one involves gradually decreasing values, they have not mixed well, but they can have \(\hat{R}\) values near unity. In this case, splitting each chain into two parts leads to \(\hat{R}\) values substantially greater than 1 because the first half of each chain has not mixed with the second half.

+
+
+

Rank normalization helps when there are heavy tails

+

Split R-hat and the effective sample size (ESS) are well defined only if the marginal posteriors have finite mean and variance. Therefore, following Vehtari et al. (2021), we compute the rank normalized parameter values and then feed them into the formulas for split R-hat and ESS.

+

Rank normalization proceeds as follows:

+
    +
  • First, replace each value \(\theta^{(nm)}\) by its rank \(r^{(nm)}\) within the pooled draws from all chains. Average rank for ties are used to conserve the number of unique values of discrete quantities.

  • +
  • Second, transform ranks to normal scores using the inverse normal transformation and a fractional offset:

  • +
+

\[ +z_{(nm)} = \Phi^{-1} \left( \frac{r_{(nm)} - 3/8}{S - 1/4} \right) +\]

+

To further improve sensitivity to chains having different scales,

+

rank normalized R-hat is computed also for the for the corresponding folded draws \(\zeta^{(mn)}\), absolute deviations from the median, \[ +\label{zeta} +\zeta^{(mn)} = \left|\theta^{(nm)}-{\rm median}(\theta)\right|. +\] The rank normalized split-\(\widehat{R}\) measure computed on the \(\zeta^{(mn)}\) values is called -\(\widehat{R}\). This measures convergence in the tails rather than in the bulk of the distribution.

+

To obtain a single conservative \(\widehat{R}\) estimate, we propose to report the maximum of rank normalized split-\(\widehat{R}\) and rank normalized folded-split-\(\widehat{R}\) for each parameter.

+

Bulk-ESS is defined as ESS for rank normalized split chains. Tail-ESS is defined as the minimum ESS for the 5% and 95% quantiles. See Effective Sample Size section for details on how ESS is estimated.

+
+
+

Convergence is global

+

A question that often arises is whether it is acceptable to monitor convergence of only a subset of the parameters or generated quantities. The short answer is “no,” but this is elaborated further in this section.

+

For example, consider the value lp__, which is the log posterior density (up to a constant).3

+

It is thus a mistake to declare convergence in any practical sense if lp__ has not converged, because different chains are really in different parts of the space. Yet measuring convergence for lp__ is particularly tricky, as noted below.

+
+

Asymptotics and transience vs. equilibrium

+

Markov chain convergence is a global property in the sense that it does not depend on the choice of function of the parameters that is monitored. There is no hard cutoff between pre-convergence “transience” and post-convergence “equilibrium.” What happens is that as the number of states in the chain approaches infinity, the distribution of possible states in the chain approaches the target distribution and in that limit the expected value of the Monte Carlo estimator of any integrable function converges to the true expectation. There is nothing like warmup here, because in the limit, the effects of initial state are completely washed out.

+
+
+

Multivariate convergence of functions

+

The \(\hat{R}\) statistic considers the composition of a Markov chain and a function, and if the Markov chain has converged then each Markov chain and function composition will have converged. Multivariate functions converge when all of their margins have converged by the Cramer-Wold theorem.

+

The transformation from unconstrained space to constrained space is just another function, so does not effect convergence.

+

Different functions may have different autocorrelations, but if the Markov chain has equilibrated then all Markov chain plus function compositions should be consistent with convergence. Formally, any function that appears inconsistent is of concern and although it would be unreasonable to test every function, lp__ and other measured quantities should at least be consistent.

+

The obvious difference in lp__ is that it tends to vary quickly with position and is consequently susceptible to outliers.

+
+
+

Finite numbers of states

+

The question is what happens for finite numbers of states? If we can prove a strong geometric ergodicity property (which depends on the sampler and the target distribution), then one can show that there exists a finite time after which the chain forgets its initial state with a large probability. This is both the autocorrelation time and the warmup time. But even if you can show it exists and is finite (which is nigh impossible) you can’t compute an actual value analytically.

+

So what we do in practice is hope that the finite number of draws is large enough for the expectations to be reasonably accurate. Removing warmup iterations improves the accuracy of the expectations but there is no guarantee that removing any finite number of draws will be enough.

+
+
+

Why inconsistent R-hat?

+

Firstly, as noted above, for any finite number of draws, there will always be some residual effect of the initial state, which typically manifests as some small (or large if the autocorrelation time is huge) probability of having a large outlier. Functions robust to such outliers (say, quantiles) will appear more stable and have better \(\hat{R}\). Functions vulnerable to such outliers may show fragility.

+

Secondly, use of the \(\hat{R}\) statistic makes very strong assumptions. In particular, it assumes that the functions being considered are Gaussian or it only uses the first two moments and assumes some kind of independence. The point is that strong assumptions are made that do not always hold. In particular, the distribution for the log posterior density (lp__) almost never looks Gaussian, instead it features long tails that can lead to large \(\hat{R}\) even in the large \(N\) limit. Tweaks to \(\hat{R}\), such as using quantiles in place of raw values, have the flavor of making the sample of interest more Gaussian and hence the \(\hat{R}\) statistic more accurate.

+
+
+

Final words on convergence monitoring

+

“Convergence” is a global property and holds for all integrable functions at once, but employing the \(\hat{R}\) statistic requires additional assumptions and thus may not work for all functions equally well.

+

Note that if you just compare the expectations between chains then we can rely on the Markov chain asymptotics for Gaussian distributions and can apply the standard tests.

+
+
+
+
+

Effective sample size

+

The second technical difficulty posed by MCMC methods is that the draws will typically be autocorrelated (or anticorrelated) within a chain. This increases (or reduces) the uncertainty of the estimation of posterior quantities of interest, such as means, variances, or quantiles; see Charles J. Geyer (2011).

+

Stan estimates an effective sample size for each parameter, which plays the role in the Markov chain Monte Carlo central limit theorem (MCMC CLT) as the number of independent draws plays in the standard central limit theorem (CLT).

+

Unlike most packages, the particular calculations used by Stan follow those for split-\(\hat{R}\), which involve both cross-chain (mean) and within-chain calculations (autocorrelation); see Gelman et al. (2013) and Vehtari et al. (2021).

+
+

Definition of effective sample size

+

The amount by which autocorrelation within the chains increases uncertainty in estimates can be measured by effective sample size (ESS). Given independent sample (with finite variance), the central limit theorem bounds uncertainty in estimates based on the sample size \(N\). Given dependent sample, the sample size is replaced with the effective sample size \(N_{\mathrm{eff}}\).
+For example, Monte Carlo standard error (MCSE) is proportional to \(1 / \sqrt{N_{\mathrm{eff}}}\) rather than \(1/\sqrt{N}\).

+

The effective sample size of a sequence is defined in terms of the autocorrelations within the sequence at different lags. The autocorrelation \(\rho_t\) at lag \(t \geq 0\) for a chain with joint probability function \(p(\theta)\) with mean \(\mu\) and variance \(\sigma^2\) is defined to be

+

\[ +\rho_t += +\frac{1}{\sigma^2} \, \int_{\Theta} (\theta^{(n)} - \mu) +(\theta^{(n+t)} - \mu) \, p(\theta) \, d\theta. +\]

+

This is the correlation between the two chains offset by \(t\) positions (i.e., a lag in time-series terminology). Because we know \(\theta^{(n)}\) and \(\theta^{(n+t)}\) have the same marginal distribution in an MCMC setting, multiplying the two difference terms and reducing yields

+

\[ +\rho_t += \frac{1}{\sigma^2} +\, \int_{\Theta} + \theta^{(n)} \, \theta^{(n+t)} \, p(\theta) + \, d\theta - \frac{\mu^2}{\sigma^2}. +\]

+

The effective sample size of \(N\) draws generated by a process with autocorrelations \(\rho_t\) is defined by \[ +N_{\mathrm{eff}} +\ = \ +\frac{N}{\sum_{t = -\infty}^{\infty} \rho_t} +\ = \ +\frac{N}{1 + 2 \sum_{t = 1}^{\infty} \rho_t}. +\]

+

For independent draws, the effective sample size is just the number of iterations. For correlated draws, the effective sample size is usually lower than the number of iterations, but in case of anticorrelated draws, the effective sample size can be larger than the number of iterations. In this latter case, MCMC can work better than independent sampling for some estimation problems. Hamiltonian Monte Carlo, including the no-U-turn sampler used by default in Stan, can produce anticorrelated draws if the posterior is close to Gaussian with little posterior correlation.

+
+
+

Estimation of effective sample size

+

In practice, the probability function in question cannot be tractably integrated and thus the autocorrelation cannot be calculated, nor the effective sample size. Instead, these quantities must be estimated from the draws themselves. The rest of this section describes a autocorrelations and split-\(\hat{R}\) based effective sample size estimator, based on multiple chains. As before, each chain \(\theta_m\) will be assumed to be of length \(N\).

+

Stan carries out the autocorrelation computations for all lags simultaneously using Eigen’s fast Fourier transform (FFT) package with appropriate padding; see Charles J. Geyer (2011) for more detail on using FFT for autocorrelation calculations. The autocorrelation estimates \(\hat{\rho}_{t,m}\) at lag \(t\) from multiple chains \(m \in +(1,\ldots,M)\) are combined with within-sample variance estimate \(W\) and multi-chain variance estimate \(\widehat{\mbox{var}}^{+}\) introduced in the previous section to compute the combined autocorrelation at lag \(t\) as

+

\[ +\hat{\rho}_t += 1 - \frac{\displaystyle W + - \textstyle \frac{1}{M}\sum_{m=1}^M s_m^2 \hat{\rho}_{t,m}} + {\widehat{\mbox{var}}^{+}}. +\]

+

If the chains have not converged, the variance estimator \(\widehat{\mbox{var}}^{+}\) will overestimate variance, leading to an overestimate of autocorrelation and an underestimate effective sample size.

+

Because of the noise in the correlation estimates \(\hat{\rho}_t\) as \(t\) increases, a typical truncated sum of \(\hat{\rho}_t\) is used. Negative autocorrelations may occur only on odd lags and by summing over pairs starting from lag 0, the paired autocorrelation is guaranteed to be positive, monotone and convex modulo estimator noise Charles J. Geyer (1992), Charles J. Geyer (2011). Stan uses Geyer’s initial monotone sequence criterion. The effective sample size estimator is defined as

+

\[ +\hat{N}_{\mathrm{eff}} = \frac{M \cdot N}{\hat{\tau}}, +\]

+

where

+

\[ +\hat{\tau} = 1 + 2 \sum_{t=1}^{2m+1} \hat{\rho}_t = -1 + 2 \sum_{t'=0}^{m} \hat{P}_{t'}, +\]

+

where \(\hat{P}_{t'}=\hat{\rho}_{2t'}+\hat{\rho}_{2t'+1}\). Initial positive sequence estimators is obtained by choosing the largest \(m\) such that \(\hat{P}_{t'}>0, \quad t' = 1,\ldots,m\). The initial monotone sequence is obtained by further reducing \(\hat{P}_{t'}\) to the minimum of the preceding ones so that the estimated sequence is monotone.

+
+
+

Estimation of MCMC standard error

+

The posterior standard deviation of a parameter \(\theta_n\) conditioned on observed data \(y\) is just the standard deviation of the posterior density \(p(\theta_n | y)\). This is estimated by the standard deviation of the combined posterior draws across chains,

+

\[ +\hat{\sigma}_n = \mathrm{sd}(\theta^{(1)}_n, \ldots, \theta^{(m)}_n). +\]

+

The previous section showed how to estimate \(N_{\mathrm{eff}}\) for a parameter \(\theta_n\) based on multiple chains of posterior draws.

+

The mean of the posterior draws of \(\theta_n\) \[ +\hat{\theta}_n += \mathrm{mean}(\theta^{(1)}_n, \ldots, \theta^{(m)}_n) +\]

+

is treated as an estimator of the true posterior mean,

+

\[ +\mathbb{E}[\theta_n \mid y] +\ = \ +\int_{-\infty}^{\infty} + \, \theta \, p(\theta | y) +\, \mathrm{d}\theta_n, +\]

+

based the observed data \(y\).

+

The standard error for the estimator \(\hat{\theta}_n\) is given by the posterior standard deviation divided by the square root of the effective sample size. This standard error is itself estimated as \(\hat{\sigma}_n / \sqrt{N_{\mathrm{eff}}}\). The smaller the standard error, the closer the estimate \(\hat{\theta}_n\) is expected to be to the true value. This is just the MCMC CLT applied to an estimator; see Charles J. Geyer (2011) for more details of the MCMC central limit theorem.

+
+
+

Thinning

+

In complex posteriors, draws are almost always positively correlated. In these situations, the autocorrelation at lag \(t\), \(\rho_t\), decreases as the lag, \(t\), increases. In this situation, thinning the sample by keeping only every \(N\)-th draw will reduce the autocorrelation of the resulting chain. This is particularly useful if we need to save storage or re-use the draws for inference.

+

For instance, consider generating one thousand posterior draws in one of the following two ways.

+
    +
  • Generate 1000 draws after convergence and save all of them.

  • +
  • Generate 10,000 draws after convergence and save every tenth draw.

  • +
+

Even though both produce a sample consisting one thousand draws, the second approach with thinning can produce a higher effective sample size when the draws are positively correlated. That’s because the autocorrelation \(\rho_t\) for the thinned sequence is equivalent to \(\rho_{10t}\) in the unthinned sequence, so the sum of the autocorrelations usually will be lower and thus the effective sample size higher.

+

Now contrast the second approach above with the unthinned alternative,

+
    +
  • Generate 10,000 draws after convergence and save every draw.
  • +
+

This will typically have a higher effective sample than the thinned sample consisting of every tenth drawn. But the gap might not be very large. To summarize, the only reason to thin a sample is to reduce memory requirements.

+

If draws are anticorrelated, then thinning will increase correlation and further reduce the overall effective sample size.

+ + + +
+
+
+ + + Back to top

References

+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Gelman, Andrew, and Donald B. Rubin. 1992. “Inference from Iterative Simulation Using Multiple Sequences.” Statistical Science 7 (4): 457–72. +
+
+Geyer, Charles J. 1992. “Practical Markov Chain Monte Carlo.” Statistical Science, 473–83. +
+
+Geyer, Charles J. 2011. “Introduction to Markov Chain Monte Carlo.” In Handbook of Markov Chain Monte Carlo, edited by Steve Brooks, Andrew Gelman, Galin L. Jones, and Xiao-Li Meng, 3–48. Chapman; Hall/CRC. +
+
+Metropolis, N., A. Rosenbluth, M. Rosenbluth, M. Teller, and E. Teller. 1953. “Equations of State Calculations by Fast Computing Machines.” Journal of Chemical Physics 21: 1087–92. +
+
+Vehtari, Aki, Andrew Gelman, Daniel Simpson, Bob Carpenter, and Paul-Christian Bürkner. 2021. “Rank-Normalization, Folding, and Localization: An Improved \(\widehat{R}\) for Assessing Convergence of MCMC.” Bayesian Analysis 16: 667–718. +
+

Footnotes

+ +
    +
  1. Using vectors simplifies high level exposition at the expense of collapsing structure.↩︎

  2. +
  3. The structure is assumed to be rectangular; in the future, this needs to be generalized to ragged samples.↩︎

  4. +
  5. The lp__ value also represents the potential energy in the Hamiltonian system and is rate bounded by the randomly supplied kinetic energy each iteration, which follows a Chi-square distribution in the number of parameters.↩︎

  6. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/blocks.html b/docs/2_39/reference-manual/blocks.html new file mode 100644 index 000000000..15e8de1b5 --- /dev/null +++ b/docs/2_39/reference-manual/blocks.html @@ -0,0 +1,1556 @@ + + + + + + + + + +Program Blocks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Program Blocks

+

A Stan program is organized into a sequence of named blocks, the bodies of which consist of variable declarations, followed in the case of some blocks with statements.

+
+

Overview of Stan’s program blocks

+

The full set of named program blocks is exemplified in the following skeletal Stan program.

+
functions {
+  // ... function declarations and definitions ...
+}
+data {
+  // ... declarations ...
+}
+transformed data {
+   // ... declarations ... statements ...
+}
+parameters {
+   // ... declarations ...
+}
+transformed parameters {
+   // ... declarations ... statements ...
+}
+model {
+   // ... declarations ... statements ...
+}
+generated quantities {
+   // ... declarations ... statements ...
+}
+

The function-definition block contains user-defined functions. The data block declares the required data for the model. The transformed data block allows the definition of constants and transforms of the data. The parameters block declares the model’s parameters — the unconstrained version of the parameters is what’s sampled or optimized. The transformed parameters block allows variables to be defined in terms of data and parameters that may be used later and will be saved. The model block is where the log probability function is defined. The generated quantities block allows derived quantities based on parameters, data, and optionally (pseudo) random number generation.

+
+

Optionality and ordering

+

All of the blocks are optional. A consequence of this is that the empty string is a valid Stan program, although it will trigger a warning message from the Stan compiler. The Stan program blocks that occur must occur in the order presented in the skeletal program above. Within each block, both declarations and statements are optional, subject to the restriction that the declarations come before the statements.

+
+
+

Variable scope

+

The variables declared in each block have scope over all subsequent statements. Thus a variable declared in the transformed data block may be used in the model block. But a variable declared in the generated quantities block may not be used in any earlier block, including the model block. The exception to this rule is that variables declared in the model block are always local to the model block and may not be accessed in the generated quantities block; to make a variable accessible in the model and generated quantities block, it must be declared as a transformed parameter.

+

Variables declared as function parameters have scope only within that function definition’s body, and may not be assigned to (they are constant).

+
+
+

Function scope

+

Functions defined in the function block may be used in any appropriate block. Most functions can be used in any block and applied to a mixture of parameters and data (including constants or program literals).

+

Random-number-generating functions are restricted to transformed data and generated quantities blocks, and within user-defined functions ending in _rng; such functions are suffixed with _rng. Log-probability modifying functions to blocks where the log probability accumulator is in scope (transformed parameters and model); such functions are suffixed with _lp.

+

Density functions defined in the program may be used in distribution statements.

+
+
+

Automatic variable definitions

+

The variables declared in the data and parameters block are treated differently than other variables in that they are automatically defined by the context in which they are used. This is why there are no statements allowed in the data or parameters block.

+

The variables in the data block are read from an external input source such as a file or a designated R data structure. The variables in the parameters block are read from the sampler’s current parameter values (either standard HMC or NUTS). The initial values may be provided through an external input source, which is also typically a file or a designated R data structure. In each case, the parameters are instantiated to the values for which the model defines a log probability function.

+
+
+

Transformed variables

+

The transformed data and transformed parameters block behave similarly to each other. Both allow new variables to be declared and then defined through a sequence of statements. Because variables scope over every statement that follows them, transformed data variables may be defined in terms of the data variables.

+

Before generating any draws, data variables are read in, then the transformed data variables are declared and the associated statements executed to define them. This means the statements in the transformed data block are only ever evaluated once.1

+

Transformed parameters work the same way, being defined in terms of the parameters, transformed data, and data variables. The difference is the frequency of evaluation. Parameters are read in and (inverse) transformed to constrained representations on their natural scales once per log probability and gradient evaluation. This means the inverse transforms and their log absolute Jacobian determinants are evaluated once per leapfrog step. Transformed parameters are then declared and their defining statements executed once per leapfrog step.

+
+
+

Generated quantities

+

The generated quantity variables are defined once per sample after all the leapfrog steps have been completed. These may be random quantities, so the block must be rerun even if the Metropolis adjustment of HMC or NUTS rejects the update proposal.

+
+
+

Variable read, write, and definition summary

+

A table summarizing the point at which variables are read, written, and defined is given in the block actions table.

+

Block Actions Table. The read, write, transform, and evaluate actions and periodicities listed in the last column correspond to the Stan program blocks in the first column. The middle column indicates whether the block allows statements. The last row indicates that parameter initialization requires a read and transform operation applied once per chain.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
blockstatementaction / period
datanoread / chain
transformed datayesevaluate / chain
parametersnoinv. transform, Jacobian / leapfrog
  inv. transform, write / sample
transformed parametersyesevaluate / leapfrog
  write / sample
modelyesevaluate / leapfrog step
generated quantitiesyeseval / sample
  write / sample
(initialization)n/aread, transform / chain
+

Variable Declaration Table. This table indicates where variables that are not basic data or parameters should be declared, based on whether it is defined in terms of parameters, whether it is used in the log probability function defined in the model block, and whether it is printed. The two lines marked with asterisks (\(*\)) should not be used as there is no need to print a variable every iteration that does not depend on the value of any parameters.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
param dependin targetsavedeclare in
+++transformed parameters
++-model (local)
+-+generated quantities
+--generated quantities (local)
-++transformed data   and generated quantities
-+-transformed data
--+generated quantities
---transformed data (local)
+

Another way to look at the variables is in terms of their function. To decide which variable to use, consult the charts in the variable declaration table. The last line has no corresponding location, as there is no need to print a variable every iteration that does not depend on parameters.2

+

The rest of this chapter provides full details on when and how the variables and statements in each block are executed.

+
+
+
+

Statistical variable taxonomy

+

Statistical Variable Taxonomy Table. Variables of the kind indicated in the left column must be declared in one of the blocks declared in the right column.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
variable kinddeclaration block
constantsdata, transformed data
unmodeled datadata, transformed data
modeled datadata, transformed data
missing dataparameters, transformed parameters
modeled parametersparameters, transformed parameters
unmodeled parametersdata, transformed data
derived quantitiestransformed data, transformed parameters, generated quantities
loop indicesloop statement
+

Page 366 of (Gelman and Hill 2007) provides a taxonomy of the kinds of variables used in Bayesian models. The table of kinds of variables contains Gelman and Hill’s taxonomy along with a missing-data kind along with the corresponding locations of declarations and definitions in Stan.

+

Constants can be built into a model as literals, data variables, or as transformed data variables. If specified as variables, their definition must be included in data files. If they are specified as transformed data variables, they cannot be used to specify the sizes of elements in the data block.

+

The following program illustrates various variables kinds, listing the kind of each variable next to its declaration.

+
data {
+  int<lower=0> N;           // unmodeled data
+  array[N] real y;          // modeled data
+  real mu_mu;               // config. unmodeled param
+  real<lower=0> sigma_mu;   // config. unmodeled param
+}
+transformed data {
+  real<lower=0> alpha;      // const. unmodeled param
+  real<lower=0> beta;       // const. unmodeled param
+  alpha = 0.1;
+  beta = 0.1;
+}
+parameters {
+  real mu_y;                // modeled param
+  real<lower=0> tau_y;      // modeled param
+}
+transformed parameters {
+  real<lower=0> sigma_y;    // derived quantity (param)
+  sigma_y = pow(tau_y, -0.5);
+}
+model {
+  tau_y ~ gamma(alpha, beta);
+  mu_y ~ normal(mu_mu, sigma_mu);
+  for (n in 1:N) {
+    y[n] ~ normal(mu_y, sigma_y);
+  }
+}
+generated quantities {
+  real variance_y;       // derived quantity (transform)
+  variance_y = sigma_y * sigma_y;
+}
+

In this example, y is an array of modeled data. Although it is specified in the data block, and thus must have a known value before the program may be run, it is modeled as if it were generated randomly as described by the model.

+

The variable N is a typical example of unmodeled data. It is used to indicate a size that is not part of the model itself.

+

The other variables declared in the data and transformed data block are examples of unmodeled parameters, also known as hyperparameters. Unmodeled parameters are parameters to probability densities that are not themselves modeled probabilistically. In Stan, unmodeled parameters that appear in the data block may be specified on a per-model execution basis as part of the data read. In the above model, mu_mu and sigma_mu are configurable unmodeled parameters.

+

Unmodeled parameters that are hard coded in the model must be declared in the transformed data block. For example, the unmodeled parameters alpha and beta are both hard coded to the value 0.1. To allow such variables to be configurable based on data supplied to the program at run time, they must be declared in the data block, like the variables mu_mu and sigma_mu.

+

This program declares two modeled parameters, mu_y and tau_y. These are the location and precision used in the normal model of the values in y. The heart of the model will be sampling the values of these parameters from their posterior distribution.

+

The modeled parameter tau_y is transformed from a precision to a scale parameter and assigned to the variable sigma_y in the transformed parameters block. Thus the variable sigma_y is considered a derived quantity — its value is entirely determined by the values of other variables.

+

The generated quantities block defines a value variance_y, which is defined as a transform of the scale or deviation parameter sigma_y. It is defined in the generated quantities block because it is not used in the model. Making it a generated quantity allows it to be monitored for convergence (being a non-linear transform, it will have different autocorrelation and hence convergence properties than the deviation itself).

+

In later versions of Stan which have random number generators for the distributions, the generated quantities block will be usable to generate replicated data for model checking.

+

Finally, the variable n is used as a loop index in the model block.

+
+
+

Program block: data

+

The rest of this chapter will lay out the details of each block in order, starting with the data block in this section.

+
+

Variable reads and transformations

+

The data block is for the declaration of variables that are read in as data. With the current model executable, each Markov chain of draws will be executed in a different process, and each such process will read the data exactly once.3

+

Data variables are not transformed in any way. The format for data files or data in memory depends on the interface; see the user’s guides and interface documentation for PyStan, RStan, and CmdStan for details.

+
+
+

Statements

+

The data block does not allow statements.

+
+
+

Variable constraint checking

+

Each variable’s value is validated against its declaration as it is read. For example, if a variable sigma is declared as real<lower=0>, then trying to assign it a negative value will raise an error. As a result, data type errors will be caught as early as possible. Similarly, attempts to provide data of the wrong size for a compound data structure will also raise an error.

+
+
+
+

Program block: transformed data

+

The transformed data block is for declaring and defining variables that do not need to be changed when running the program.

+
+

Variable reads and transformations

+

For the transformed data block, variables are all declared in the variable declarations and defined in the statements. There is no reading from external sources and no transformations performed.

+

Variables declared in the data block may be used to declare transformed variables.

+
+
+

Statements

+

The statements in a transformed data block are used to define (provide values for) variables declared in the transformed data block. Assignments are only allowed to variables declared in the transformed data block.

+

These statements are executed once, in order, right after the data is read into the data variables. This means they are executed once per chain.

+

Variables declared in the data block may be used in statements in the transformed data block.

+
+

Restriction on operations in transformed data

+

The statements in the transformed data block are designed to be executed once and have a deterministic result. Therefore, log probability is not accumulated and distribution statements may not be used.

+
+
+
+

Variable constraint checking

+

Any constraints on variables declared in the transformed data block are checked after the statements are executed. If any defined variable violates its constraints, Stan will halt with a diagnostic error message.

+
+
+
+

Program block: parameters

+

The variables declared in the parameters program block correspond directly to the variables being sampled by Stan’s samplers (HMC and NUTS). From a user’s perspective, the parameters in the program block are the parameters being sampled by Stan.

+

Variables declared as parameters cannot be directly assigned values. So there is no block of statements in the parameters program block. Variable quantities derived from parameters may be declared in the transformed parameters or generated quantities blocks, or may be defined as local variables in any statement blocks following their declaration.

+

There is a substantial amount of computation involved for parameter variables in a Stan program at each leapfrog step within the HMC or NUTS samplers, and a bit more computation along with writes involved for saving the parameter values corresponding to a sample.

+
+

Constraining inverse transform

+

Stan’s two samplers, standard Hamiltonian Monte Carlo (HMC) and the adaptive No-U-Turn sampler (NUTS), are most easily (and often most effectively) implemented over a multivariate probability density that has support on all of \(\mathbb{R}^n\). To do this, the parameters defined in the parameters block must be transformed so they are unconstrained.

+

In practice, the samplers keep an unconstrained parameter vector in memory representing the current state of the sampler. The model defined by the compiled Stan program defines an (unnormalized) log probability function over the unconstrained parameters. In order to do this, the log probability function must apply the inverse transform to the unconstrained parameters to calculate the constrained parameters defined in Stan’s parameters program block. The log Jacobian of the inverse transform is then added to the accumulated log probability function. This then allows the Stan model to be defined in terms of the constrained parameters.

+

In some cases, the number of parameters is reduced in the unconstrained space. For instance, a \(K\)-simplex only requires \(K-1\) unconstrained parameters, and a \(K\)-correlation matrix only requires \(\binom{K}{2}\) unconstrained parameters. This means that the probability function defined by the compiled Stan program may have fewer parameters than it would appear from looking at the declarations in the parameters program block.

+

The probability function on the unconstrained parameters is defined in such a way that the order of the parameters in the vector corresponds to the order of the variables defined in the parameters program block. The details of the specific transformations are provided in the variable transforms chapter.

+
+
+

Gradient calculation

+

Hamiltonian Monte Carlo requires the gradient of the (unnormalized) log probability function with respect to the unconstrained parameters to be evaluated during every leapfrog step. There may be one leapfrog step per sample or hundreds, with more being required for models with complex posterior distribution geometries.

+

Gradients are calculated behind the scenes using Stan’s algorithmic differentiation library. The time to compute the gradient does not depend directly on the number of parameters, only on the number of subexpressions in the calculation of the log probability. This includes the expressions added from the transforms’ Jacobians.

+

The amount of work done by the sampler does depend on the number of unconstrained parameters, but this is usually dwarfed by the gradient calculations.

+
+
+

Writing draws

+

In the basic Stan compiled program, there is a file to which the values of variables are written for each draw. The constrained versions of the variables are written in the order they are defined in the parameters block. In order to do this, the transformed parameter, model, and generated quantities statements must also be executed.

+
+
+
+

Program block: transformed parameters

+

The transformed parameters program block consists of optional variable declarations followed by statements. After the statements are executed, the constraints on the transformed parameters are validated. Any variable declared as a transformed parameter is part of the output produced for draws.

+

Any variable that is defined wholly in terms of data or transformed data should be declared and defined in the transformed data block. Defining such quantities in the transformed parameters block is legal, but much less efficient than defining them as transformed data.

+
+

Constraints are for error checking

+

Like the constraints on data, the constraints on transformed parameters is meant to catch programming errors as well as convey programmer intent. They are not automatically transformed in such a way as to be satisfied. What will happen if a transformed parameter does not match its constraint is that the current parameter values will be rejected. This can cause Stan’s algorithms to hang or to devolve to random walks. It is not intended to be a way to enforce ad hoc constraints in Stan programs. See the section on reject statements for further discussion of the behavior of reject statements.

+
+
+
+

Program block: model

+

The model program block consists of optional variable declarations followed by statements. The variables in the model block are local variables and are not written as part of the output.

+

Local variables may not be defined with constraints because there is no well-defined way to have them be both flexible and easy to validate.

+

The statements in the model block typically define the model. This is the block in which probability (distribution notation) statements are allowed. These are typically used when programming in the BUGS idiom to define the probability model.

+
+
+

Program block: generated quantities

+

The generated quantities program block is rather different than the other blocks. Nothing in the generated quantities block affects the sampled parameter values. The block is executed only after a sample has been generated.

+

Among the applications of posterior inference that can be coded in the generated quantities block are

+
    +
  • forward sampling to generate simulated data for model testing,
  • +
  • generating predictions for new data,
  • +
  • calculating posterior event probabilities, including multiple comparisons, sign tests, etc.,
  • +
  • calculating posterior expectations,
  • +
  • transforming parameters for reporting,
  • +
  • applying full Bayesian decision theory,
  • +
  • calculating log likelihoods, deviances, etc. for model comparison.
  • +
+

Parameter estimates, predictions, statistics, and event probabilities calculated directly using plug-in estimates. Stan automatically provides full Bayesian inference by producing draws from the posterior distribution of any calculated event probabilities, predictions, or statistics.

+

Within the generated quantities block, the values of all other variables declared in earlier program blocks (other than local variables) are available for use in the generated quantities block.

+

It is more efficient to define a variable in the generated quantities block instead of the transformed parameters block. Therefore, if a quantity does not play a role in the model, it should be defined in the generated quantities block.

+

After the generated quantities statements are executed, the constraints on the declared generated quantity variables are validated.

+

All variables declared as generated quantities are printed as part of the output. Variables declared in nested blocks are local variables, not generated quantities, and thus won’t be printed. For example:

+
generated quantities {
+  int a; // added to the output
+
+  {
+    int b; // not added to the output
+  }
+}
+ + + +
+
+ + + Back to top

References

+
+Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge, United Kingdom: Cambridge University Press. +
+

Footnotes

+ +
    +
  1. If the C++ code is configured for concurrent threads, the data and transformed data blocks can be executed once and reused for multiple chains.↩︎

  2. +
  3. It is possible to print a variable every iteration that does not depend on parameters—just define it (or redefine it if it is transformed data) in the generated quantities block.↩︎

  4. +
  5. With multiple threads, or even running chains sequentially in a single thread, data could be read only once per set of chains. Stan was designed to be thread safe and future versions will provide a multithreading option for Markov chains.↩︎

  6. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/comments.html b/docs/2_39/reference-manual/comments.html new file mode 100644 index 000000000..ac1c5115d --- /dev/null +++ b/docs/2_39/reference-manual/comments.html @@ -0,0 +1,1091 @@ + + + + + + + + + +Comments + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Comments

+

Stan supports C++-style line-based and bracketed comments. Comments may be used anywhere whitespace is allowed in a Stan program.

+
+

Line-based comments

+

Any characters on a line following two forward slashes (//) is ignored along with the slashes. These may be used, for example, to document variables,

+
data {
+  int<lower=0> N;  // number of observations
+  array[N] real y;  // observations
+}
+
+
+

Bracketed comments

+

For bracketed comments, any text between a forward-slash and asterisk pair (/*) and an asterisk and forward-slash pair (*/) is ignored.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/deprecations.html b/docs/2_39/reference-manual/deprecations.html new file mode 100644 index 000000000..eca6bfe04 --- /dev/null +++ b/docs/2_39/reference-manual/deprecations.html @@ -0,0 +1,1110 @@ + + + + + + + + + +Deprecated Features + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Deprecated Features

+

This appendix lists currently deprecated functionality along with how to replace it.

+

Starting with Stan 2.29, minor (syntax-level) deprecations can be removed 3 versions after release; e.g., syntax deprecated in Stan 2.20 will be removed in Stan 2.23 and placed in Removed Features. The Stan compiler can automatically update many of these on the behalf of the user for at least one version after they are removed.

+

Any feature which changes semantic meaning (such as the upgraded ODE solver interface) will not be removed until a major version change (e.g., Stan 3.0).

+
+

lkj_cov distribution

+

Deprecated:The distribution lkj_cov is deprecated.

+

Replacement: Replace lkj_cov_lpdf(...) with an lkj_corr distribution on the correlation matrix and independent lognormal distributions on the scales. That is, replace

+
cov_matrix[K] Sigma;
+// ...
+Sigma ~ lkj_cov(mu, tau, eta);
+

with

+
corr_matrix[K] Omega;
+vector<lower=0>[K] sigma;
+// ...
+Omega ~ lkj_corr(eta);
+sigma ~ lognormal(mu, tau);
+// ...
+cov_matrix[K] Sigma;
+Sigma <- quad_form_diag(Omega, sigma);
+

The variable Sigma may be defined as a local variable in the model block or as a transformed parameter. An even more efficient transform would use Cholesky factors rather than full correlation matrix types.

+

Scheduled Removal: Stan 3.0 or later.

+
+
+

Use of _lp functions in transformed parameters

+

Deprecated: Using functions that end in _lp in the transformed parameters block.

+

Replacement: Use _jacobian functions and the jacobian += statement instead. These allow for change-of-variable adjustments which can be conditionally enabled by Stan’s algorithms.

+
+
+

Deprecated Functions

+

Several built-in Stan functions have been deprecated. Consult the functions reference for more information.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/diagnostics.html b/docs/2_39/reference-manual/diagnostics.html new file mode 100644 index 000000000..d2888edbd --- /dev/null +++ b/docs/2_39/reference-manual/diagnostics.html @@ -0,0 +1,1132 @@ + + + + + + + + + +Diagnostic Mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Diagnostic Mode

+

Stan’s diagnostic mode runs a Stan program with data, initializing parameters either randomly or with user-specified initial values, and then evaluates the log probability and its gradients. The gradients computed by the Stan program are compared to values calculated by finite differences.

+

Diagnostic mode may be configured with two parameters.

+

Diagnostic Mode Configuration Table. The diagnostic model configuration parameters, constraints, and default values.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + +
parameterdescriptionconstraintsdefault
epsilonfinite difference size(0, infty)1e–6
errorerror threshold for matching(0, infty)1e–6
+

If the difference between the Stan program’s gradient value and that calculated by finite difference is higher than the specified threshold, the argument will be flagged.

+
+

Diagnostic mode output

+

Diagnostic mode prints the log posterior density (up to a proportion) calculated by the Stan program for the specified initial values. For each parameter, it prints the gradient at the initial parameter values calculated by Stan’s program and by finite differences over Stan’s program for the log probability.

+
+

Unconstrained scale

+

The output is for the variable values and their gradients are on the unconstrained scale, which means each variable is a vector of size corresponding to the number of unconstrained variables required to define it. For example, an \(N \times N\) correlation matrix, requires \(\binom{N}{2}\) unconstrained parameters. The transformations from constrained to unconstrained parameters are based on the constraints in the parameter declarations and described in the reference manual chapter on transforms.

+
+
+

Includes Jacobian

+

The log density includes the Jacobian adjustment implied by the constraints declared on variables. The Jacobian adjustment for constrained parameter transforms may be turned off for optimization, but there is as of yet no way to turn it off in diagnostic mode.

+
+
+
+

Configuration options

+

The general configuration options for diagnostics are the same as those for MCMC. Initial values may be specified, or they may be drawn at random. Setting the random number generator will only have an effect if a random initialization is specified.

+
+
+

Speed warning and data trimming

+

Due to the application of finite differences, the computation time grows linearly with the number of parameters. This can be require a very long time, especially in models with latent parameters that grow with the data size. It can be helpful to diagnose a model with smaller data sizes in such cases.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/encoding.html b/docs/2_39/reference-manual/encoding.html new file mode 100644 index 000000000..9d12a9011 --- /dev/null +++ b/docs/2_39/reference-manual/encoding.html @@ -0,0 +1,1065 @@ + + + + + + + + + +Character Encoding + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Character Encoding

+
+

Content characters

+

The content of a Stan program must be coded in ASCII. All identifiers must consist of only ASCII alpha-numeric characters and the underscore character. All arithmetic operators and punctuation must be coded in ASCII.

+
+

Compatibility with Latin-1 and UTF-8

+

The UTF-8 encoding of Unicode and the Latin-1 (ISO-8859-1) encoding share the first 128 code points with ASCII and thus cannot be distinguished from ASCII. That means you can set editors, etc., to use UTF-8 or Latin-1 (or the other Latin-n variants) without worrying that the content of a Stan program will be destroyed.

+
+
+
+

Comment characters

+

Any bytes on a line after a line-comment sequence (// or #) are ignored up until the ASCII newline character (\n). They may thus be written in any character encoding which is convenient.

+

Any content after a block comment open sequence in ASCII (/*) up to the closing block comment (*/) is ignored, and thus may also be written in whatever character set is convenient.

+
+
+

String literals

+

The raw byte sequence within a string literal is escaped according to the C++ standard. In particular, this means that UTF-8 encoded strings are supported, however they are not tested for invalid byte sequences. A print, reject, or fatal_error statement should properly display Unicode characters if your terminal supports the encoding used in the input. In other words, Stan simply preserves any string of bytes between two double quotes (") when passing to C++. On compliant terminals, this allows the use of glyphs and other characters from encodings such as UTF-8 that fall outside the ASCII-compatible range.

+

ASCII is the recommended encoding for maximum portability, because it encodes the ASCII characters (Unicode code points 0–127) using the same sequence of bytes as the UTF-8 encoding of Unicode and common ISO-8859 extensions of Latin.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/execution.html b/docs/2_39/reference-manual/execution.html new file mode 100644 index 000000000..04dd08047 --- /dev/null +++ b/docs/2_39/reference-manual/execution.html @@ -0,0 +1,1263 @@ + + + + + + + + + +Program Execution + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Program Execution

+

This chapter provides a sketch of how a compiled Stan model is executed using sampling. Optimization shares the same data reading and initialization steps, but then does optimization rather than sampling.

+

This sketch is elaborated in the following chapters of this part, which cover variable declarations, expressions, statements, and blocks in more detail.

+
+

Reading and transforming data

+

The reading and transforming data steps are the same for sampling, optimization and diagnostics.

+
+

Read data

+

The first step of execution is to read data into memory. Data may be read in through file (in CmdStan) or through memory (RStan and PyStan); see their respective manuals for details.1

+

All of the variables declared in the data block will be read. If a variable cannot be read, the program will halt with a message indicating which data variable is missing.

+

After each variable is read, if it has a declared constraint, the constraint is validated. For example, if a variable N is declared as int<lower=0>, after N is read, it will be tested to make sure it is greater than or equal to zero. If a variable violates its declared constraint, the program will halt with a warning message indicating which variable contains an illegal value, the value that was read, and the constraint that was declared.

+
+
+

Define transformed data

+

After data is read into the model, the transformed data variable statements are executed in order to define the transformed data variables. As the statements execute, declared constraints on variables are not enforced.

+

Transformed data variables are initialized with real values set to NaN and integer values set to the smallest integer (large absolute value negative number).

+

After the statements are executed, all declared constraints on transformed data variables are validated. If the validation fails, execution halts and the variable’s name, value and constraints are displayed.

+
+
+
+

Initialization

+

Initialization is the same for all of Stan’s algorithms.

+
+

User-supplied initial values

+

If there are user-supplied initial values for parameters, these are read using the same input mechanism and same file format as data reads. Any constraints declared on the parameters are validated for the initial values. If a variable’s value violates its declared constraint, the program halts and a diagnostic message is printed.

+

After being read, initial values are transformed to unconstrained values that will be used to initialize the sampler.

+
+

Boundary values are problematic

+

Because of the way Stan defines its transforms from the constrained to the unconstrained space, initializing parameters on the boundaries of their constraints is usually problematic. For instance, with a constraint

+
parameters {
+  real<lower=0, upper=1> theta;
+  // ...
+}
+

an initial value of 0 for theta leads to an unconstrained value of \(-\infty\), whereas a value of 1 leads to an unconstrained value of \(+\infty\). While this will be inverse transformed back correctly given the behavior of floating point arithmetic, the Jacobian will be infinite and the log probability function will fail and raise an exception.

+
+
+
+

Random initial values

+

If there are no user-supplied initial values, the default initialization strategy is to initialize the unconstrained parameters directly with values drawn uniformly from the interval \((-2,2)\). The bounds of this initialization can be changed but it is always symmetric around 0. The value of 0 is special in that it represents the median of the initialization. An unconstrained value of 0 corresponds to different parameter values depending on the constraints declared on the parameters.

+

An unconstrained real does not involve any transform, so an initial value of 0 for the unconstrained parameters is also a value of 0 for the constrained parameters.

+

For parameters that are bounded below at 0, the initial value of 0 on the unconstrained scale corresponds to \(\exp(0) = 1\) on the constrained scale. A value of -2 corresponds to \(\exp(-2) = .13\) and a value of 2 corresponds to \(\exp(2) = 7.4\).

+

For parameters bounded above and below, the initial value of 0 on the unconstrained scale corresponds to a value at the midpoint of the constraint interval. For probability parameters, bounded below by 0 and above by 1, the transform is the inverse logit, so that an initial unconstrained value of 0 corresponds to a constrained value of 0.5, -2 corresponds to 0.12 and 2 to 0.88. Bounds other than 0 and 1 are just scaled and translated.

+

Simplexes with initial values of 0 on the unconstrained basis correspond to symmetric values on the constrained values (i.e., each value is \(1/K\) in a \(K\)-simplex).

+

Cholesky factors for positive-definite matrices are initialized to 1 on the diagonal and 0 elsewhere; this is because the diagonal is log transformed and the below-diagonal values are unconstrained.

+

The initial values for other parameters can be determined from the transform that is applied. The transforms are all described in full detail in the chapter on variable transforms.

+
+
+

Zero initial values

+

The initial values may all be set to 0 on the unconstrained scale. This can be helpful for diagnosis, and may also be a good starting point for sampling. Once a model is running, multiple chains with more diffuse starting points can help diagnose problems with convergence; see the user’s guide for more information on convergence monitoring.

+
+
+
+

Sampling

+

Sampling is based on simulating the Hamiltonian of a particle with a starting position equal to the current parameter values and an initial momentum (kinetic energy) generated randomly. The potential energy at work on the particle is taken to be the negative log (unnormalized) total probability function defined by the model. In the usual approach to implementing HMC, the Hamiltonian dynamics of the particle is simulated using the leapfrog integrator, which discretizes the smooth path of the particle into a number of small time steps called leapfrog steps.

+
+

Leapfrog steps

+

For each leapfrog step, the negative log probability function and its gradient need to be evaluated at the position corresponding to the current parameter values (a more detailed sketch is provided in the next section). These are used to update the momentum based on the gradient and the position based on the momentum.

+

For simple models, only a few leapfrog steps with large step sizes are needed. For models with complex posterior geometries, many small leapfrog steps may be needed to accurately model the path of the parameters.

+

If the user specifies the number of leapfrog steps (i.e., chooses to use standard HMC), that number of leapfrog steps are simulated. If the user has not specified the number of leapfrog steps, the No-U-Turn sampler (NUTS) will determine the number of leapfrog steps adaptively (Hoffman and Gelman 2014).

+
+
+

Log probability and gradient calculation

+

During each leapfrog step, the log probability function and its gradient must be calculated. This is where most of the time in the Stan algorithm is spent. This log probability function, which is used by the sampling algorithm, is defined over the unconstrained parameters.

+

The first step of the calculation requires the inverse transform of the unconstrained parameter values back to the constrained parameters in terms of which the model is defined. There is no error checking required because the inverse transform is a total function on every point in whose range satisfies the constraints.

+

Because the probability statements in the model are defined in terms of constrained parameters, the log Jacobian of the inverse transform must be added to the accumulated log probability.

+

Next, the transformed parameter statements are executed. After they complete, any constraints declared for the transformed parameters are checked. If the constraints are violated, the model will halt with a diagnostic error message.

+

The final step in the log probability function calculation is to execute the statements defined in the model block.

+

As the log probability function executes, it accumulates an in-memory representation of the expression tree used to calculate the log probability. This includes all of the transformed parameter operations and all of the Jacobian adjustments. This tree is then used to evaluate the gradients by propagating partial derivatives backward along the expression graph. The gradient calculations account for the majority of the cycles consumed by a Stan program.

+
+
+

Metropolis accept/reject

+

A standard Metropolis accept/reject step is required to retain detailed balance and ensure draws are marginally distributed according to the probability function defined by the model. This Metropolis adjustment is based on comparing log probabilities, here defined by the Hamiltonian, which is the sum of the potential (negative log probability) and kinetic (squared momentum) energies. In theory, the Hamiltonian is invariant over the path of the particle and rejection should never occur. In practice, the probability of rejection is determined by the accuracy of the leapfrog approximation to the true trajectory of the parameters.

+

If step sizes are small, very few updates will be rejected, but many steps will be required to move the same distance. If step sizes are large, more updates will be rejected, but fewer steps will be required to move the same distance. Thus a balance between effort and rejection rate is required. If the user has not specified a step size, Stan will tune the step size during warmup sampling to achieve a desired rejection rate (thus balancing rejection versus number of steps).

+

If the proposal is accepted, the parameters are updated to their new values. Otherwise, the sample is the current set of parameter values.

+
+
+
+

Optimization

+

Optimization runs very much like sampling in that it starts by reading the data and then initializing parameters. Unlike sampling, it produces a deterministic output which requires no further analysis other than to verify that the optimizer itself converged to a posterior mode. The output for optimization is also similar to that for sampling.

+
+
+

Variational inference

+

Variational inference also runs similar to sampling. It begins by reading the data and initializing the algorithm. The initial variational approximation is a random draw from the standard normal distribution in the unconstrained (real-coordinate) space. Again, similar to sampling, it outputs draws from the approximate posterior once the algorithm has decided that it has converged. Thus, the tools we use for analyzing the result of Stan’s sampling routines can also be used for variational inference.

+
+
+

Model diagnostics

+

Model diagnostics are like sampling and optimization in that they depend on a model’s data being read and its parameters being initialized. The user’s guides for the interfaces (RStan, PyStan, CmdStan) provide more details on the diagnostics available; as of Stan 2.0, that’s just gradients on the unconstrained scale and log probabilities.

+
+
+

Output

+

For each final draw (not counting draws during warmup or draws that are thinned), there is an output stage of writing the draw.

+
+

Generated quantities

+

Before generating any output, the statements in the generated quantities block are executed. This can be used for any forward simulation based on parameters of the model. Or it may be used to transform parameters to an appropriate form for output.

+

After the generated quantities statements execute, the constraints declared on generated quantities variables are validated. If these constraints are violated, the program will terminate with a diagnostic message.

+
+
+

Write

+

The final step is to write the actual values. The values of all variables declared as parameters, transformed parameters, or generated quantities are written. Local variables are not written, nor is the data or transformed data. All values are written in their constrained forms, that is the form that is used in the model definitions.

+

In the executable form of a Stan models, parameters, transformed parameters, and generated quantities are written to a file in comma-separated value (CSV) notation with a header defining the names of the parameters (including indices for multivariate parameters).2

+ + + +
+
+
+ + + Back to top

References

+
+Hoffman, Matthew D., and Andrew Gelman. 2014. The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.” Journal of Machine Learning Research 15: 1593–623. http://jmlr.org/papers/v15/hoffman14a.html. +
+

Footnotes

+ +
    +
  1. The C++ code underlying Stan is flexible enough to allow data to be read from memory or file. Calls from R, for instance, can be configured to read data from file or directly from R’s memory.↩︎

  2. +
  3. In the R version of Stan, the values may either be written to a CSV file or directly back to R’s memory.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/expressions.html b/docs/2_39/reference-manual/expressions.html new file mode 100644 index 000000000..08aab3a8f --- /dev/null +++ b/docs/2_39/reference-manual/expressions.html @@ -0,0 +1,2293 @@ + + + + + + + + + +Expressions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Expressions

+

An expression is the syntactic unit in a Stan program that denotes a value. Every expression in a well-formed Stan program has a type that is determined statically (at compile time), based only on the type of its variables and the types of the functions used in it. If an expressions type cannot be determined statically, the Stan compiler will report the location of the problem.

+

This chapter covers the syntax, typing, and usage of the various forms of expressions in Stan.

+
+

Numeric literals

+

The simplest form of expression is a literal that denotes a primitive numerical value.

+
+

Integer literals

+

Integer literals represent integers of type int. Integer literals are written in base 10 without any separators. Integer literals may contain a single negative sign. (The expression --1 is interpreted as the negation of the literal -1.)

+

The following list contains well-formed integer literals.

+
0, 1, -1, 256, -127098, 24567898765
+

Integer literals must have values that fall within the bounds for integer values (see the section on numerical data types).

+

Integer literals may not contain decimal points (.). Thus the expressions 1. and 1.0 are of type real and may not be used where a value of type int is required.

+
+
+

Real literals

+

A number written with a period or with scientific notation is assigned to a the continuous numeric type real. Real literals are written in base 10 with a period (.) as a separator and optionally an exponent with optional sign. Examples of well-formed real literals include the following.

+
0.0, 1.0, 3.14, -217.9387, 2.7e3, -2E-5, 1.23e+3.
+

The notation e or E followed by a positive or negative integer denotes a power of 10 to multiply. For instance, 2.7e3 and 2.7e+3 denote \(2.7 \times 10^3\), whereas -2E-5 denotes \(-2 \times +10^{-5}\).

+
+
+

Imaginary literals

+

A number followed by the character i denotes an imaginary number and is assigned to the numeric type complex. The number preceding i may be either a real or integer literal and determines the magnitude of the imaginary number. Examples of well-formed imaginary literals include the following.

+
1i, 2i, -325.786i, 1e10i, 2.87e-10i.
+

Note that the character i by itself is not a well-formed imaginary literal. The unit imaginary number must be written as 1i.

+
+
+

Complex literals

+

Stan does not include complex literals directly, but a real or integer literal can be added to an imaginary literal to derive an expression that behaves like a complex literal. Examples include the following.

+
1 + 2i, -3.2e9 + 1e10i
+

These will be assigned the type complex, which is the result of adding a real or integer and a complex number. They will also function like literals in the sense that the C++ compiler is able to reduce them to a single complex constant at compile time.

+
+
+
+

Variables

+

A variable by itself is a well-formed expression of the same type as the variable. Variables in Stan consist of ASCII strings containing only the basic lower-case and upper-case Roman letters, digits, and the underscore (_) character. Variables must start with a letter (a--z and A--Z) and may not end with two underscores (__).

+

Examples of legal variable identifiers are as follows.

+
a, a3, a_3, Sigma, my_cpp_style_variable, myCamelCaseVariable
+

Unlike in R and BUGS, variable identifiers in Stan may not contain a period character.

+
+

Reserved names

+

Stan reserves many strings for internal use and these may not be used as the name of a variable. An attempt to name a variable after an internal string results in the stanc translator halting with an error message indicating which reserved name was used and its location in the model code.

+
+

Model name

+

The name of the model cannot be used as a variable within the model. This is usually not a problem because the default in bin/stanc is to append _model to the name of the file containing the model specification. For example, if the model is in file foo.stan, it would not be legal to have a variable named foo_model when using the default model name through bin/stanc. With user-specified model names, variables cannot match the model.

+
+
+

Reserved words from Stan language

+

The following list contains reserved words for Stan’s programming language. Not all of these features are implemented in Stan yet, but the tokens are reserved for future use.

+
for, in, while, repeat, until, if, then, else,
+true, false, target, struct, typedef, export,
+auto, extern, var, static, lower, upper, offset,
+multiplier
+

Variables should not be named after types, either, and thus may not be any of the following.

+
int, real, complex, vector, simplex, unit_vector,
+sum_to_zero_vector, sum_to_zero_matrix, ordered, positive_ordered,
+row_vector, matrix, cholesky_factor_corr,
+column_stochastic_matrix,row_stochastic_matrix,
+cholesky_factor_cov, corr_matrix, cov_matrix, array
+

The following keywords or built-in functions are also reserved and cannot be used as variable names:

+
print, reject, profile, fatal_error, target, jacobian
+

The following block identifiers are reserved and cannot be used as variable names:

+
functions, model, data, parameters, quantities,
+transformed, generated
+
+
+

Reserved distribution names

+

Variable names will also conflict with the names of distributions suffixed with _lpdf, _lpmf, _lcdf, and _lccdf, _cdf, and _ccdf, such as normal_lcdf_log. No user-defined variable can take a name ending in _lupdf or _lupmf even if a corresponding _lpdf or _lpmf is not defined.

+

Using any of these variable names causes the stanc translator to halt and report the name and location of the variable causing the conflict.

+
+
+

Reserved names backend languages

+

Stan primarily generates code in C++, which features its own reserved words. It is legal to name a variable any of the following names, however doing so will lead to it being renamed _stan_NAME (e.g. _stan_public) behind the scenes (in the generated C++ code).

+ +
alignas, alignof, and, and_eq, asm, bitand, bitor, bool,
+case, catch, char, char16_t, char32_t, class, compl, const,
+constexpr, const_cast, decltype, default, delete, do,
+double, dynamic_cast, enum, explicit, float, friend, goto,
+inline, long, mutable, namespace, new, noexcept, not, not_eq,
+nullptr, operator, or, or_eq, private, protected, public,
+register, reinterpret_cast, short, signed, sizeof,
+static_assert, static_cast, switch, template, this, thread_local,
+throw, try, typeid, typename, union, unsigned, using, virtual,
+volatile, wchar_t, xor, xor_eq, fvar, STAN_MAJOR, STAN_MINOR,
+STAN_PATCH, STAN_MATH_MAJOR, STAN_MATH_MINOR, STAN_MATH_PATCH
+
+
+ +
+
+

Container expressions

+

Expressions for the Stan container objects, namely arrays, vectors, row vectors, matrices, and tuples, can all be constructed using expressions.

+
+

Vector expressions

+

Square brackets may be wrapped around a sequence of comma separated primitive expressions to produce a row vector expression. For example, the expression [ 1, 10, 100 ] denotes a row vector of three elements with real values 1.0, 10.0, and 100.0. Applying the transpose operator to a row vector expression produces a vector expression. This syntax provides a way declare and define small vectors a single line, as follows.

+
row_vector[2] rv2 =  [ 1, 2 ];
+vector[3] v3 = [ 3, 4, 5 ]';
+

The vector expression values may be compound expressions or variable names, so it is legal to write [ 2 * 3, 1 + 4] or [ x, y ], providing that x and y are primitive variables.

+
+
+

Matrix expressions

+

A matrix expression consists of square brackets wrapped around a sequence of comma separated row vector expressions. This syntax provides a way declare and define a matrix in a single line, as follows.

+
matrix[3, 2] m1 = [ [ 1, 2 ], [ 3, 4 ], [5, 6 ] ];
+

Any expression denoting a row vector can be used in a matrix expression. For example, the following code is valid:

+
vector[2] vX = [ 1, 10 ]';
+row_vector[2] vY = [ 100, 1000 ];
+matrix[3, 2] m2 = [ vX', vY, [ 1, 2 ]  ];
+
+
+

Complex vector and matrix expressions

+

Complex vector expressions work the same way as real vector expressions. For example, the following are all legal Stan expressions and assignments.

+
complex_vector[3] = [1 + 2i, 3 - 1.7i, 0]';
+complex_row_vector[2] = [12, -2i];
+complex_matrix[2, 3] = [[1 + 2i, 3 - 1.7i, 0],
+                        [3.9 - 1.234i, 176i, 1 + 1i]];
+
+

No empty vector or matrix expressions

+

The empty expression [ ] is ambiguous and therefore is not allowed and similarly expressions such as [ [ ] ] or [ [ ], [ ] ] are not allowed.

+
+
+
+

Empty vectors and matrices

+

If needed, it is possible to create an empty vector with

+
rep_vector(e, 0)
+

where the first expression e needs to scalar of type real.

+

If needed, it is possible to create an empty matrix with

+
rep_matrix(e, 0, 0)
+

where the first expression e needs to scalar of type real.

+
+
+

Array expressions

+

Curly braces may be wrapped around a sequence of expressions to produce an array expression. For example, the expression { 1, 10, 100 } denotes an integer array of three elements with values 1, 10, and 100. This syntax is particularly convenient to define small arrays in a single line, as follows.

+
array[3] int a = { 1, 10, 100 };
+

The values may be compound expressions, so it is legal to write { 2 * 3, 1 + 4 }. It is also possible to write two dimensional arrays directly, as in the following example.

+
array[2, 3] int b = { { 1, 2, 3 }, { 4, 5, 6 } };
+

This way, b[1] is { 1, 2, 3 } and b[2] is { 4, 5, 6 }.

+

Whitespace is always interchangeable in Stan, so the above can be laid out as follows to more clearly indicate the row and column structure of the resulting two dimensional array.

+
array[2, 3] int b = { { 1, 2, 3 },
+                { 4, 5, 6 } };
+
+
+

Empty arrays

+

The empty array expression ({ }) is not allowed. See more about restrictions on array expressions in subsection Restrictions on values.

+

If needed, it is possible to create an empty array with

+
rep_array(e, 0)
+

where the first expression e determines the type of the array. For example, rep_array(0.0, 0) returns an empty real array of type real[], whereas rep_array({123}, 0) returns an empty two dimensional integer array of type int[ , ]. Only the type of the first argument is used, so the integer arrays {123} and {0} produce equivalent values.

+
+
+

Array expression types

+

Any type of expression may be used within braces to form an array expression. In the simplest case, all of the elements will be of the same type and the result will be an array of elements of that type. For example, the elements of the array can be vectors, in which case the result is an array of vectors.

+
vector[3] b;
+vector[3] c;
+// ...
+array[2] vector[3] d = { b, c };
+

The elements may also be a mixture of int and real typed expressions, in which case the result is an array of real values.

+
array[2] real b = { 1, 1.9 };
+
+
+

Tuple expressions and types

+

Stan uses parentheses around a comma-separated sequence of expressions to construct a tuple. For example, we can construct a 2-tuple as follows.

+
tuple(int, vector[3]) xy = (42, [1, 2.9, -1.3]');
+

The expression 42 is of type int and the expression [1, 2.9, -1.3] is of type row_vector so that [1, 2.9, -1.3]' is of type vector and of size 3. The whole tuple expression (42, [1, 2.9, -1.3]') thus has a sized type of tuple(int, vector[3]) and an unsized type (e.g., for a function argument) of tuple(int, vector).

+

A tuple of one element can be created using the same style as languages like Python, with a trailing comma, e.g., (3.14,). For longer tuples, Stan does not support trailing commas.

+
+
+

Restrictions on values

+

There are some restrictions on how array expressions may be used that arise from their types being calculated bottom up and the basic data type and assignment rules of Stan.

+
+

Rectangular array expressions only

+

Although it is tempting to try to define a ragged array expression, all Stan data types are rectangular (or boxes or other higher-dimensional generalizations). Thus the following nested array expression will cause an error when it tries to create a non-rectangular array.

+
{ { 1, 2, 3 }, { 4, 5 } }  // compile time error: size mismatch
+

This may appear to be OK, because it is creating a two-dimensional integer array (array[,] int) out of two one-dimensional array integer arrays (array[] int). But it is not allowed because the two one-dimensional arrays are not the same size. If the elements are array expressions, this can be diagnosed at compile time. If one or both expressions is a variable, then that won’t be caught until runtime.

+
{ { 1, 2, 3 }, m }  // runtime error if m not size 3
+
+
+

No empty array expressions

+

Because there is no way to infer the type of the result, the empty array expression ({ }) is not allowed. This does not sacrifice expressive power, because a declaration is sufficient to initialize a zero-element array.

+
array[0] int a;   // a is fully defined as zero element array
+
+
+

No zero-tuples

+

There is no way to declare or construct a zero-tuple in Stan. Tuples must be at least one element long. The expression () does not pick out a zero-tuple—it is ill formed.

+

One-tuples need a trailing comma, like (1,). The expression (1), without the comma, is of type int rather than a tuple.

+
+
+
+
+

Parentheses for grouping

+

Any expression wrapped in parentheses is also an expression. Like in C++, but unlike in R, only the round parentheses, ( and ), are allowed. The square brackets [ and ] are reserved for array indexing and the curly braces { and } for grouping statements.

+

With parentheses it is possible to explicitly group subexpressions with operators. Without parentheses, the expression 1 + 2 * 3 has a subexpression 2 * 3 and evaluates to 7. With parentheses, this grouping may be made explicit with the expression 1 + (2 * 3). More importantly, the expression (1 + 2) * 3 has 1 + 2 as a subexpression and evaluates to 9.

+
+
+

Arithmetic and matrix operations on expressions

+

For integer and real-valued expressions, Stan supports the basic binary arithmetic operations of addition (+), subtraction (-), multiplication (*) and division (/) in the usual ways.

+

For integer expressions, Stan supports the modulus (%) binary arithmetic operation. Stan also supports the unary operation of negation for integer and real-valued expressions. For example, assuming n and m are integer variables and x and y real variables, the following expressions are legal.

+
3.0 + 0.14
+-15
+2 * 3 + 1
+(x - y) / 2.0
+(n * (n + 1)) / 2
+x / n
+m % n
+

The negation, addition, subtraction, and multiplication operations are extended to matrices, vectors, and row vectors. The transpose operation, written using an apostrophe (') is also supported for vectors, row vectors, and matrices. Return types for matrix operations are the smallest types that can be statically guaranteed to contain the result. The full set of allowable input types and corresponding return types is detailed in the list of functions.

+

For example, if y and mu are variables of type vector and Sigma is a variable of type matrix, then (y - mu)' * Sigma * (y - mu) is a well-formed expression of type real. The type of the complete expression is inferred working outward from the subexpressions. The subexpression(s) y - mu are of type vector because the variables y and mu are of type vector. The transpose of this expression, the subexpression (y - mu)' is of type row_vector. Multiplication is left associative and transpose has higher precedence than multiplication, so the above expression is equivalent to the following fully specified form (((y - mu)') * Sigma) * (y - mu).

+

The type of subexpression (y - mu)' * Sigma is inferred to be row_vector, being the result of multiplying a row vector by a matrix. The whole expression’s type is thus the type of a row vector multiplied by a (column) vector, which produces a real value.

+

Stan provides elementwise matrix multiplication (e.g., a .* b) and division (e.g., a ./ b) operations. These provide a shorthand to replace loops, but are not intrinsically more efficient than a version programmed with an elementwise calculations and assignments in a loop. For example, given declarations,

+
vector[N] a;
+vector[N] b;
+vector[N] c;
+

the assignment,

+
c = a .* b;
+

produces the same result with roughly the same efficiency as the loop

+
for (n in 1:N) {
+  c[n] = a[n] * b[n];
+}
+

Stan supports exponentiation (^) of integer and real-valued expressions. The return type of exponentiation is always a real-value. For example, assuming n and m are integer variables and x and y real variables, the following expressions are legal.

+
3 ^ 2
+3.0 ^ -2
+3.0 ^ 0.14
+x ^ n
+n ^ x
+n ^ m
+x ^ y
+

Exponentiation is right associative, so the expression 2 ^ 3 ^ 4 is equivalent to the fully specified form 2 ^ (3 ^ 4).

+
+

Operator precedence and associativity

+

The precedence and associativity of operators, as well as built-in syntax such as array indexing and function application is given in tabular form in the following table.

+
+
Operator Precedence Table
+

Stan’s unary, binary, and ternary operators, with their precedences, associativities, place in an expression, and a description. The last two lines list the precedence of function application and array, matrix, and vector indexing. The operators are listed in order of precedence, from least tightly binding to most tightly binding. The full set of legal arguments and corresponding result types are provided in the function documentation for the operators (i.e., operator*(int, int):int indicates the application of the multiplication operator to two integers, which returns an integer). Parentheses may be used to group expressions explicitly rather than relying on precedence and associativity.

+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Op.Prec.Assoc.PlacementDescription
? ~ :10rightternary infixconditional
||9leftbinary infixlogical or
&&8leftbinary infixlogical and
==7leftbinary infixequality
!=7leftbinary infixinequality
<6leftbinary infixless than
<=6leftbinary infixless than or equal
>6leftbinary infixgreater than
>=6leftbinary infixgreater than or equal
+5leftbinary infixaddition
-5leftbinary infixsubtraction
*4leftbinary infixmultiplication
.*4leftbinary infixelementwise multiplication
/4leftbinary infix(right) division
./4leftbinary infixelementwise division
%4leftbinary infixmodulus
\3leftbinary infixleft division
%/%3leftbinary infixinteger division
!2n/aunary prefixlogical negation
-2n/aunary prefixnegation
+2n/aunary prefixpromotion (no-op in Stan)
^1rightbinary infixexponentiation
.^1rightbinary infixelementwise exponentiation
'0n/aunary postfixtransposition
()0n/aprefix, wrapfunction application
[]0leftprefix, wraparray, matrix indexing
+

Other expression-forming operations, such as function application and subscripting bind more tightly than any of the arithmetic operations.

+

The precedence and associativity determine how expressions are interpreted. Because addition is left associative, the expression a + b + c is interpreted as (a + b) + c. Similarly, a / b * c is interpreted as (a / b) * c.

+

Because multiplication has higher precedence than addition, the expression a * b + c is interpreted as (a * b) + c and the expression a + b * c is interpreted as a + (b * c). Similarly, 2 * x + 3 * - y is interpreted as (2 * x) + (3 * (-y)).

+

Transposition and exponentiation bind more tightly than any other arithmetic or logical operation. For vectors, row vectors, and matrices, -u' is interpreted as -(u'), u * v' as u* (v'), and u' * v as (u') * v. For integer and reals, -n ^ 3 is interpreted as -(n ^ 3).

+
+
+
+
+

Conditional operator

+
+

Conditional operator syntax

+

The ternary conditional operator is unique in that it takes three arguments and uses a mixed syntax. If a is an expression of type int and b and c are expressions that can be converted to one another (e.g., compared with ==), then

+
a ? b : c
+

is an expression of the promoted type of b and c. The result of this expression is b if condition a is true, c otherwise. The only promotion allowed in Stan is integer -> real -> complex; e.g. if one argument is of type int and the other of type real, the conditional expression as a whole is of type real. In other cases, the arguments have to be of the same underlying Stan type (i.e., constraints don’t count, only the shape) and the conditional expression is of that type.

+
+

Conditional operator precedence

+

The conditional operator is the most loosely binding operator, so its arguments rarely require parentheses for disambiguation. For example,

+
a > 0 || b < 0 ? c + d : e - f
+

is equivalent to the explicitly grouped version

+
(a > 0 || b < 0) ? (c + d) : (e - f)
+

The latter is easier to read even if the parentheses are not strictly necessary.

+
+
+

Conditional operator associativity

+

The conditional operator is right associative, so that

+
a ? b : c ? d : e
+

parses as if explicitly grouped as

+
a ? b : (c ? d : e)
+

Again, the explicitly grouped version is easier to read.

+
+
+
+

Conditional operator semantics

+

Stan’s conditional operator works very much like its C++ analogue. The first argument must be an expression denoting an integer. Typically this is a variable or a relation operator, as in the variable a in the example above. Then there are two resulting arguments, the first being the result returned if the condition evaluates to true (i.e., non-zero) and the second if the condition evaluates to false (i.e., zero). In the example above, the value b is returned if the condition evaluates to a non-zero value and c is returned if the condition evaluates to zero.

+
+

Lazy evaluation of results

+

The key property of the conditional operator that makes it so useful in high-performance computing is that it only evaluates the returned subexpression, not the alternative expression. In other words, it is not like a typical function that evaluates its argument expressions eagerly in order to pass their values to the function. As usual, the saving is mostly in the derivatives that do not get computed rather than the unnecessary function evaluation itself.

+
+
+

Promotion to parameter

+

If one return expression is a data value (an expression involving only constants and variables defined in the data or transformed data block), and the other is not, then the ternary operator will promote the data value to a parameter value. This can cause needless work calculating derivatives in some cases and be less efficient than a full if-then conditional statement. For example,

+
data {
+  array[10] real x;
+  // ...
+}
+parameters {
+  array[10] real z;
+  // ...
+}
+model {
+  y ~ normal(cond ? x : z, sigma);
+  // ...
+}
+

would be more efficiently (if not more transparently) coded as

+
if (cond) {
+  y ~ normal(x, sigma);
+} else {
+  y ~ normal(z, sigma);
+}
+

The conditional statement, like the conditional operator, only evaluates one of the result statements. In this case, the variable x will not be promoted to a parameter and thus not cause any needless work to be carried out when propagating the chain rule during derivative calculations.

+
+
+
+
+

Indexing

+

Stan arrays, matrices, vectors, and row vectors are all accessed using the same array-like notation. For instance, if x is a variable of type array [] real (a one-dimensional array of reals) then x[1] is the value of the first element of the array.

+

Subscripting has higher precedence than any of the arithmetic operations. For example, alpha * x[1] is equivalent to alpha * (x[1]).

+

Multiple subscripts may be provided within a single pair of square brackets. If x is of type array[,] real, a two-dimensional array, then x[2, 501] is of type real.

+
+

Accessing subarrays

+

The subscripting operator also returns subarrays of arrays. For example, if x is of type array[,,] real, then x[2] is of type array[,] real, and x[2, 3] is of type array[] real. As a result, the expressions x[2, 3] and x[2][3] have the same meaning.

+
+
+

Accessing matrix rows

+

If Sigma is a variable of type matrix, then Sigma[1] denotes the first row of Sigma and has the type row_vector.

+
+
+

Mixing array and vector/matrix indexes

+

Stan supports mixed indexing of arrays and their vector, row vector or matrix values. For example, if m is of type matrix[ , ], a two-dimensional array of matrices, then m[1] refers to the first row of the array, which is a one-dimensional array of matrices. More than one index may be used, so that m[1, 2] is of type matrix and denotes the matrix in the first row and second column of the array. Continuing to add indices, m[1, 2, 3] is of type row_vector and denotes the third row of the matrix denoted by m[1, 2]. Finally, m[1, 2, 3, 4] is of type real and denotes the value in the third row and fourth column of the matrix that is found at the first row and second column of the array m.

+
+
+
+

Multiple indexing and range indexing

+

In addition to single integer indexes, as described in the language indexing section, Stan supports multiple indexing. Multiple indexes can be integer arrays of indexes, lower bounds, upper bounds, lower and upper bounds, or simply shorthand for all of the indexes. If the upper bound is smaller than the lower bound, the range is empty (unlike, e.g., in R). The upper bound and lower bound can be expressions that evaluate to integer. A complete list of index types is given in the following table.

+
+
Indexing Options Table
+

Types of indexes and examples with one-dimensional containers of size N and an integer array ii of type array [] real size K.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
index typeexamplevalue
integera[11]value of a at index 11
integer arraya[ii]a[ii[1]], …, a[ii[K]]
lower bounda[3:]a[3], …, a[N]
upper bounda[:5]a[1], …, a[5]
rangea[2:7]a[2], …, a[7]
rangea[7:2][]
rangea[5-3:5+2]a[2], …, a[7]
alla[:]a[1], …, a[N]
alla[]a[1], …, a[N]
+

The range indexing with : allows only increasing sequences. Indexing with a decereasing sequence can be made by creating an integer array in the following way:

+
  array[6] int ii = reverse(linspaced_int_array(6, 2, 7));
+

Then a[ii] evaluates to a[7], …, a[2].

+
+
+

Multiple index semantics

+

The fundamental semantic rule for dealing with multiple indexes is the following. If idxs is a multiple index, then it produces an indexable position in the result. To evaluate that index position in the result, the index is first passed to the multiple index, and the resulting index used.

+
a[idxs, ...][i, ...] = a[idxs[i], ...][...]
+

On the other hand, if idx is a single index, it reduces the dimensionality of the output, so that

+
a[idx, ...] = a[idx][...]
+

The only issue is what happens with matrices and vectors. Vectors work just like arrays. Matrices with multiple row indexes and multiple column indexes produce matrices. Matrices with multiple row indexes and a single column index become (column) vectors. Matrices with a single row index and multiple column indexes become row vectors. The types are summarized in the following table.

+
+
Matrix Indexing Table
+

Special rules for reducing matrices based on whether the argument is a single or multiple index. Examples are for a matrix a, with integer single indexes i and j and integer array multiple indexes is and js. The same typing rules apply for all multiple indexes.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
examplerow indexcolumn indexresult type
a[i]singlen/arow vector
a[is]multiplen/amatrix
a[i, j]singlesinglereal
a[i, js]singlemultiplerow vector
a[is, j]multiplesinglevector
a[is, js]multiplemultiplematrix
+

Evaluation of matrices with multiple indexes is defined to respect the following distributivity conditions.

+
m[idxs1, idxs2][i, j] = m[idxs1[i], idxs2[j]]
+m[idxs, idx][j] = m[idxs[j], idx]
+m[idx, idxs][j] = m[idx, idxs[j]]
+

Evaluation of arrays of matrices and arrays of vectors or row vectors is defined recursively, beginning with the array dimensions.

+
+
+
+
+

Function application

+

Stan provides a range of built in mathematical and statistical functions, which are documented in the built-in function documentation.

+

Expressions in Stan may consist of the name of function followed by a sequence of zero or more argument expressions. For instance, log(2.0) is the expression of type real denoting the result of applying the natural logarithm to the value of the real literal 2.0.

+

Syntactically, function application has higher precedence than any of the other operators, so that y + log(x) is interpreted as y + (log(x)).

+
+

Type signatures and result type inference

+

Each function has a type signature which determines the allowable type of its arguments and its return type. For instance, the function signature for the logarithm function can be expressed as

+
real log(real);
+

and the signature for the lmultiply function is

+

real lmultiply(real, real);

+

A function is uniquely determined by its name and its sequence of argument types. For instance, the following two functions are different functions.

+

real mean(array [] real);

+

real mean(vector);

+

The first applies to a one-dimensional array of real values and the second to a vector.

+

The identity conditions for functions explicitly forbids having two functions with the same name and argument types but different return types. This restriction also makes it possible to infer the type of a function expression compositionally by only examining the type of its subexpressions.

+
+
+

Constants

+

Constants in Stan are nothing more than nullary (no-argument) functions. For instance, the mathematical constants \(\pi\) and \(e\) are represented as nullary functions named pi() and e(). See the Stan Functions Reference built-in constants section for a list of built-in constants.

+
+
+

Type promotion and function resolution

+

Because of integer to real type promotion, rules must be established for which function is called given a sequence of argument types. The scheme employed by Stan is the same as that used by C++, which resolves a function call to the function requiring the minimum number of type promotions.

+

For example, consider a situation in which the following two function signatures have been registered for foo.

+
real foo(real, real);
+int foo(int, int);
+

The use of foo in the expression foo(1.0, 1.0) resolves to foo(real, real), and thus the expression foo(1.0, 1.0) itself is assigned a type of real.

+

Because integers may be promoted to real values, the expression foo(1, 1) could potentially match either foo(real, real) or foo(int, int). The former requires two type promotions and the latter requires none, so foo(1, 1) is resolved to function foo(int, int) and is thus assigned the type int.

+

The expression foo(1, 1.0) has argument types (int, real) and thus does not explicitly match either function signature. By promoting the integer expression 1 to type real, it is able to match foo(real, real), and hence the type of the function expression foo(1, 1.0) is real.

+

In some cases (though not for any built-in Stan functions), a situation may arise in which the function referred to by an expression remains ambiguous. For example, consider a situation in which there are exactly two functions named bar with the following signatures.

+
real bar(real, int);
+real bar(int, real);
+

With these signatures, the expression bar(1.0, 1) and bar(1, 1.0) resolve to the first and second of the above functions, respectively. The expression bar(1.0, 1.0) is illegal because real values may not be demoted to integers. The expression bar(1, 1) is illegal for a different reason. If the first argument is promoted to a real value, it matches the first signature, whereas if the second argument is promoted to a real value, it matches the second signature. The problem is that these both require one promotion, so the function name bar is ambiguous. If there is not a unique function requiring fewer promotions than all others, as with bar(1, 1) given the two declarations above, the Stan compiler will flag the expression as illegal.

+
+
+

Random-number generating functions

+

For most of the distributions supported by Stan, there is a corresponding random-number generating function. These random number generators are named by the distribution with the suffix _rng. For example, a univariate normal random number can be generated by normal_rng(0, 1); only the parameters of the distribution, here a location (0) and scale (1) are specified because the variate is generated.

+
+

Random-number generators locations

+

The use of random-number generating functions is restricted to the transformed data and generated quantities blocks; attempts to use them elsewhere will result in a parsing error with a diagnostic message. They may also be used in the bodies of user-defined functions whose names end in _rng.

+

This allows the random number generating functions to be used for simulation in general, and for Bayesian posterior predictive checking in particular.

+
+
+

Posterior predictive checking

+

Posterior predictive checks typically use the parameters of the model to generate simulated data (at the individual and optionally at the group level for hierarchical models), which can then be compared informally using plots and formally by means of test statistics, to the actual data in order to assess the suitability of the model; see Chapter 6 of (Gelman et al. 2013) for more information on posterior predictive checks.

+
+
+
+
+

Type inference

+

Stan is strongly statically typed, meaning that the implementation type of an expression can be resolved at compile time.

+
+

Implementation types

+

The primitive implementation types for Stan are

+
int, real, complex, vector, row_vector,  matrix, complex_vector,
+complex_row_vector, complex_matrix
+

Every basic declared type corresponds to a primitive type; the following table shows the mapping from types to their primitive types.

+
+
Primitive Type Table
+

The table shows the variable declaration types of Stan and their corresponding primitive implementation type. Stan functions, operators, and probability functions have argument and result types declared in terms of primitive types plus array dimensionality.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
typeprimitive type
intint
realreal
vectorvector
simplexvector
unit_vectorvector
sum_to_zero_vectorvector
orderedvector
positive_orderedvector
row_vectorrow_vector
matrixmatrix
cov_matrixmatrix
corr_matrixmatrix
cholesky_factor_covmatrix
cholesky_factor_corrmatrix
column_stochastic_matrixmatrix
row_stochastic_matrixmatrix
sum_to_zero_matrixmatrix
complex_vectorcomplex_vector
complex_row_vectorcomplex_row_vector
complex_matrixcomplex_matrix
+

A full implementation type consists of a primitive implementation type and an integer array dimensionality greater than or equal to zero. These will be written to emphasize their array-like nature. For example, array [] real has an array dimensionality of 1, int an array dimensionality of 0, and array [,,] int an array dimensionality of 3. The implementation type matrix[ , , ] has a total of five dimensions and takes up to five indices, three from the array and two from the matrix.

+

Recall that the array dimensions come before the matrix or vector dimensions in an expression such as the following declaration of a three-dimensional array of matrices.

+
array[I, J, K] matrix[M, N] a;
+

The matrix a is indexed as a[i, j, k, m, n] with the array indices first, followed by the matrix indices, with a[i, j, k] being a matrix and a[i, j, k, m] being a row vector.

+
+
+
+

Type inference rules

+

Stan’s type inference rules define the implementation type of an expression based on a background set of variable declarations. The rules work bottom up from primitive literal and variable expressions to complex expressions.

+
+
+

Promotion

+

There are two basic promotion rules,

+
    +
  1. int types may be promoted to real, and
  2. +
  3. real types may be promoted to complex.
  4. +
+

Plus, promotion is transitive, so that

+
    +
  1. if type U can be promoted to type V and type V can be promoted to type T, then U can be promoted to T.
  2. +
+

The first rule means that expressions of type int may be used anywhere an expression of type real is specified, namely in assignment or function argument passing. An integer is promoted to real by casting it in the underlying C++ code.

+

The remaining rules have to do with covariant typing rules, which say that a container of type U may be promoted to a container of the same shape of type T if U can be promoted to T. For vector and matrix types, this induces three rules,

+
    +
  1. vector may be promoted to complex_vector,
  2. +
  3. row_vector may be promoted to complex_row_vector
  4. +
  5. matrix may be promoted to complex_matrix.
  6. +
+

For array types, there’s a single rule

+
    +
  1. array[...] U may be promoted to array[...] T if U can be promoted to T.
  2. +
+

For example, this means array[,] int may be used where array [,] real or array [,] complex is required; as another example, array[] real may be used anywhere array[] complex is required.

+

Tuples have the natural extension of the above rules, applied to all sub-types at once

+
    +
  1. A tuple(U1, ..., UN) may be promoted to a tuple(T1, ..., TN) if every Un can be promoted to Tn for n in 1:N
  2. +
+
+

Literals

+

An integer literal expression such as 42 is of type int. Real literals such as 42.0 are of type real. Imaginary literals such as -17i are of type complex. the expression 7 - 2i acts like a complex literal, but technically it combines a real literal 7 and an imaginary literal 2i through subtraction.

+
+
+

Variables

+

The type of a variable declared locally or in a previous block is determined by its declaration. The type of a loop variable is int.

+

There is always a unique declaration for each variable in each scope because Stan prohibits the redeclaration of an already-declared variables.1

+
+
+

Indexing

+

If x is an expression of total dimensionality greater than or equal to \(N\), then the type of expression e[i1, i2, ..., iN] is the same as that of e[i1][i2]...[iN], so it suffices to define the type of a singly-indexed function. Suppose e is an expression and i is an expression of primitive type int. Then

+
    +
  • if e is an expression of type array[i1, i2, ..., iN] T and k, i1, …, iN are expressions of type int, then e[k] is an expression of type array[i2, ..., iN] T,
  • +
  • if e is an expression of type array[i] T with i and k expressions of type int, then e[k] is of type T,
  • +
  • if e has implementation type vector or row_vector, dimensionality 0, then e[i] has implementation type real,
  • +
  • if e has implementation type matrix, then e[i] has type row_vector,
  • +
  • if e has implementation type complex_vector or complex_row_vector and i is an expression of type int, then e[i] is an expression of type complex, and
  • +
  • if e has implementation type complex_matrix, and i is an expression of type int, then e[i] is an expression of type complex_row_vector.
  • +
+
+
+

Function application

+

If f is the name of a function and e1,...,eN are expressions for \(N \geq 0\), then f(e1,...,eN) is an expression whose type is determined by the return type in the function signature for f given e1 through eN. Recall that a function signature is a declaration of the argument types and the result type.

+

In looking up functions, binary operators like real * real are defined as operator*(real, real) in the documentation and index.

+

In matching a function definition, all of the promotion rules are in play (integers may be promoted to reals, reals to complex, and containers may be promoted if their types are promoted). For example, arguments of type int may be promoted to type real or complex if necessary (see the subsection on type promotion in the function application section, a real argument will be promoted to complex if necessary, a vector will be promoted to complex_vector if necessary, and so on.

+

In general, matrix operations return the lowest inferable type. For example, row_vector * vector returns a value of type real, which is declared in the function documentation and index as real operator*(row_vector, vector).

+
+
+
+
+

Higher-order functions

+

There are several expression constructions in Stan that act as higher-order functions.2

+

The higher-order functions and the signature of their argument functions are listed in the following pair of tables.

+
+
Higher-order Functions Table
+

Higher-order functions in Stan with their argument function types. The first group of arguments can be a function of parameters or data. The second group of arguments, consisting of a real and integer array in all cases, must be expressions involving only data and literals.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
functionparameter or data argsdata argsreturn type
algebra_solvervector, vectorarray [] real, array [] realvector
algebra_solver_newtonvector, vectorarray [] real, array [] realvector
integrate_1d,real, real, array [] realarray [] real, array [] realreal
integrate_ode_X,real, array [] real, array [] realarray [] real, array [] realarray [] real
map_rectvector, vectorarray [] real, array [] realvector
+

For example, the integrate_ode_rk45 function can be used to integrate differential equations in Stan:

+
functions {
+  array [] real foo(real t,
+                    array [] real y,
+                    array [] real theta,
+                    array [] real x_r,
+                    array [] real x_i) {
+    // ...
+  }
+}
+// ...
+int<lower=1> T;
+array[2] real y0;
+real t0;
+array[T] real ts;
+array[1] real theta;
+array[0] real x_r;
+array[0] int x_i;
+// ...
+array[T, 2] real y_hat = integrate_ode_rk45(foo, y0, t0,
+                                              ts, theta, x_r, x_i);
+

The function argument is foo, the name of the user-defined function; as shown in the higher-order functions table, integrate_ode_rk45 takes a real array, a real, three more real arrays, and an integer array as arguments and returns 2D real array.

+
+
+
Variadic Higher-order Functions Table
+

Variadic Higher-order functions in Stan with their argument function types. The first group of arguments are restricted in type. The sequence of trailing arguments can be of any length with any types.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
functionrestricted argsreturn type
solve_Xvectorvector
ode_X,vector, real, array [] realvector[]
reduce_sumarray[] T, T1, T2real
+

T, T1, and T2 can be any Stan type.

+

For example, the ode_rk45 function can be used to integrate differential equations in Stan:

+
functions {
+  vector foo(real t, vector y, real theta, vector beta,
+            array [] real x_i, int index) {
+    // ...
+  }
+}
+// ...
+int<lower=1> T;
+vector[2] y0;
+real t0;
+array[T] real ts;
+real theta;
+vector[7] beta;
+array[10] int x_i;
+int index;
+// ...
+vector[2] y_hat[T] = ode_rk45(foo, y0, t0, ts, theta,
+                              beta, x_i, index);
+

The function argument is foo, the name of the user-defined function. As shown in the variadic higher-order functions table, ode_rk45 takes a real, a vector, a real, a real array, and a sequence of arguments whose types match those at the end of foo and returns an array of vectors.

+
+
+

Functions passed by reference

+

The function argument to higher-order functions is always passed as the first argument. This function argument must be provided as the name of a user-defined or built-in function. No quotes are necessary.

+
+
+

Data-restricted arguments

+

Some of the arguments to higher-order functions are restricted to data. This means they must be expressions containing only data variables, transformed data variables, or literals; the may contain arbitrary functions applied to data variables or literals, but must not contain parameters, transformed parameters, or local variables from any block other than transformed data.

+

For user-defined functions the qualifier data may be prepended to the type to restrict the argument to data-only variables.

+
+
+
+

Chain rule and derivatives

+

Derivatives of the log probability function defined by a model are used in several ways by Stan. The Hamiltonian Monte Carlo samplers, including NUTS, use gradients to guide updates. The BFGS optimizers also use gradients to guide search for posterior modes.

+
+

Errors due to chain rule

+

Unlike evaluations in pure mathematics, evaluation of derivatives in Stan is done by applying the chain rule on an expression-by-expression basis, evaluating using floating-point arithmetic. As a result, models such as the following are problematic for inference involving derivatives.

+
parameters {
+  real x;
+}
+model {
+  x ~ normal(sqrt(x - x), 1);
+}
+

Algebraically, the distribution statement in the model could be reduced to

+
  x ~ normal(0, 1);
+

and it would seem the model should produce unit normal draws for x. But rather than canceling, the expression sqrt(x - x) causes a problem for derivatives. The cause is the mechanistic evaluation of the chain rule,

+

\[ +\begin{array}{rcl} +\frac{d}{dx} \sqrt{x - x} +& = & +\frac{1}{2 \sqrt{x - x}} \times \frac{d}{dx} (x - x) +\\[4pt] +& = & +\frac{1}{0} \times (1 - 1) +\\[4pt] +& = & +\infty \times 0 +\\[4pt] +& = & \mathrm{NaN}. +\end{array} +\]

+

Rather than the \(x - x\) canceling out, it introduces a 0 into the numerator and denominator of the chain-rule evaluation.

+

The only way to avoid this kind problem is to be careful to do the necessary algebraic reductions as part of the model and not introduce expressions like sqrt(x - x) for which the chain rule produces not-a-number values.

+
+
+

Diagnosing problems with derivatives

+

The best way to diagnose whether something is going wrong with the derivatives is to use the test-gradient option to the sampler or optimizer inputs; this option is available in both Stan and RStan (though it may be slow, because it relies on finite differences to make a comparison to the built-in automatic differentiation).

+

For example, compiling the above model to an executable sqrt-x-minus-x in CmdStan, the test can be run as

+
> ./sqrt-x-minus-x diagnose test=gradient
+

which produces

+
...
+TEST GRADIENT MODE
+
+ Log probability=-0.393734
+
+ param idx           value           model     finite diff           error
+         0       -0.887393             nan               0             nan
+

Even though finite differences calculates the right gradient of 0, automatic differentiation follows the chain rule and produces a not-a-number output.

+ + + +
+
+
+ + + Back to top

References

+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+

Footnotes

+ +
    +
  1. Languages such as C++ and R allow the declaration of a variable of a given name in a narrower scope to hide (take precedence over for evaluation) a variable defined in a containing scope.↩︎

  2. +
  3. Internally, they are implemented as their own expression types because Stan doesn’t have object-level functional types (yet).↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/img/logo_tm.png b/docs/2_39/reference-manual/img/logo_tm.png new file mode 100644 index 000000000..48c9769c7 Binary files /dev/null and b/docs/2_39/reference-manual/img/logo_tm.png differ diff --git a/docs/2_39/reference-manual/img/warmup-epochs.png b/docs/2_39/reference-manual/img/warmup-epochs.png new file mode 100644 index 000000000..7ccf22e05 Binary files /dev/null and b/docs/2_39/reference-manual/img/warmup-epochs.png differ diff --git a/docs/2_39/reference-manual/includes.html b/docs/2_39/reference-manual/includes.html new file mode 100644 index 000000000..d34a68824 --- /dev/null +++ b/docs/2_39/reference-manual/includes.html @@ -0,0 +1,1152 @@ + + + + + + + + + +Includes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Includes

+

Stan allows one file to be included within another file using a syntax similar to that from C++. For example, suppose the file my-std-normal.stan defines the standard normal log probability density function (up to an additive constant).

+
functions {
+  real my_std_normal_lpdf(vector y) {
+    return -0.5 * y' * y;
+  }
+}
+

Suppose we also have a file containing a Stan program with an include statement.

+
#include my-std-normal.stan
+parameters {
+  real y;
+}
+model {
+  y ~ my_std_normal();
+}
+

This Stan program behaves as if the contents of the file my-std-normal.stan replace the line with the #include statement, behaving as if a single Stan program were provided.

+
functions {
+  real my_std_normal_lpdf(vector y) {
+    return -0.5 * y' * y;
+  }
+}
+parameters {
+  real y;
+}
+model {
+  y ~ my_std_normal();
+}
+

There are no restrictions on where include statements may be placed within a file or what the contents are of the replaced file.

+
+

Space before includes

+

It is possible to use includes on a line non-initially. For example, the previous example could’ve included space before the # in the include line:

+
    #include my-std-normal.stan
+parameters {
+// ...
+

If there is initial space before an include, it will be discarded.

+
+
+

Comments after includes

+

It is also possible to include line-based comments after the include. For example, the previous example can be coded as:

+
#include my-std-normal.stan  // definition of standard normal
+parameters {
+// ...
+

Line comments are discarded when the entire line is replaced with the contents of the included file.

+
+
+

Recursive includes

+

Recursive includes will lead to a compiler error. For example, suppose a.stan contains

+
#include b.stan
+

and b.stan contains

+
#include a.stan
+

This will result in an error explaining the circular dependency:

+
Syntax error in './b.stan', line 1, column 0, included from
+'./a.stan', line 1, column 0, included from
+'./b.stan', line 1, column 0, included from
+'a.stan', line 1, column 0, include error:
+   -------------------------------------------------
+     1:  #include a.stan
+         ^
+   -------------------------------------------------
+
+File a.stan recursively included itself.
+
+
+

Include paths

+

The Stan interfaces may provide a mechanism for specifying a sequence of system paths in which to search for include files. The file included is the first one that is found in the sequence.

+
+

Slashes in include paths

+

If there is not a final / or \ in the path, a / will be appended between the path and the included file name.

+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/index.html b/docs/2_39/reference-manual/index.html new file mode 100644 index 000000000..c4a192f88 --- /dev/null +++ b/docs/2_39/reference-manual/index.html @@ -0,0 +1,1062 @@ + + + + + + + + + +Stan Reference Manual + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ +
+
+

Stan Reference Manual

+

Version 2.39

+
+ + + +
+ + + + +
+ + + +
+ + +

+

This is the official reference manual for Stan’s programming language for coding probability models, inference algorithms for fitting models and making predictions, and posterior analysis tools for evaluating the results. This manual applies to all Stan interfaces.

+

The first part of the reference manual provides a full specification of the Stan programming language. The language is responsible for defining a log density function conditioned on data. Typically, this is a Bayesian posterior, but it may also be a penalized likelihood function. The second part of the manual specifies the inference algorithms and posterior inference tools. The third part provides auxiliary information about the use of Stan.

+

Download the pdf version of this manual.

+ +
+

Licensing

+ + + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/laplace.html b/docs/2_39/reference-manual/laplace.html new file mode 100644 index 000000000..14b05ecbd --- /dev/null +++ b/docs/2_39/reference-manual/laplace.html @@ -0,0 +1,1078 @@ + + + + + + + + + +Laplace Approximation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Laplace Approximation

+

Stan provides a Laplace approximation algorithm which can be used to obtain draws from an approximated posterior. The Laplace approximation works in the unconstrained space, so that if there are constrained parameters, the normal approximation is centered at the mode in the unconstrained space and then the implemented method transforms the normal approximation sample to the constrained space before outputting them.

+

Given the estimate of the mode \(\widehat{\theta}\), the Hessian \(H(\widehat{\theta})\) is computed using central finite differences of the model functor. Next the algorithm computes the Cholesky factor of the negative inverse Hessian:

+

\(R^{-1} = \textrm{chol}(-H(\widehat{\theta})) \backslash \mathbf{1}\).

+

Each draw is generated on the unconstrained scale by sampling

+

\(\theta^{\textrm{std}(m)} \sim \textrm{normal}(0, \textrm{I})\)

+

and defining draw \(m\) to be

+

\(\theta^{(m)} = \widehat{\theta} + R^{-1} \cdot \theta^{\textrm{std}(m)}\)

+

Finally, each \(\theta^{(m)}\) is transformed back to the constrained scale.

+

The one-time computation of the Cholesky factor incurs a high constant overhead of \(\mathcal{O}(N^3)\) in \(N\) dimensions. It also requires \(2N\) gradient calculations to use as the basis, which scales at best as \(\mathcal{O}(N^2)\) and is worse for models whose gradient calculation is super-linear in dimension. The algorithm also has a high per-draw overhead, requiring \(N\) standard normal pseudorandom numbers and \(\mathcal{O}(N^2)\) per draw (to multiply by the Cholesky factor). For \(M\) draws, the total cost is proportional to \(\mathcal{O}(N^3 + M \cdot N^2)\).

+ + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/laplace_embedded.html b/docs/2_39/reference-manual/laplace_embedded.html new file mode 100644 index 000000000..9e3a54307 --- /dev/null +++ b/docs/2_39/reference-manual/laplace_embedded.html @@ -0,0 +1,1226 @@ + + + + + + + + + +Embedded Laplace Approximation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Embedded Laplace Approximation

+

The embedded Laplace approximation replaces explicit sampling of potentially high-dimensional Gaussian latent variables with a local Gaussian approximation, marginalizing them out so that inference proceeds over the remaining hyperparameters alone. This approach is often referred to as the integrated Laplace approximation, although the exact details of the method can vary. The details of Stan’s implementation can be found in references (Margossian et al. 2020; Margossian 2023).

+

A standard approach to fit a latent Gaussian model would be to perform inference jointly over the latent Gaussian variables and the hyperparameters. Instead, the embedded Laplace approximation can be used to do approximate marginalization of the latent Gaussian variables; we can then use any inference over the remaining hyperparameters. By marginalizing out the latent variables, the sampler explores a lower-dimensional, better-behaved marginal posterior. Individual iterations are more expensive (each requires an inner optimization), but the sampler typically needs far fewer iterations to achieve the same effective sample size. How this trade-off resolves depends on the specific problem at hand.

+

For complete function signatures and the built-in likelihood wrappers (Poisson, Negative Binomial, Bernoulli), see the Embedded Laplace functions reference. For worked examples with full data blocks, see the Gaussian Processes chapter.

+
+

Latent Gaussian models

+

The embedded Laplace approximation is used for latent Gaussian models. A latent Gaussian model is defined by three components:

+
    +
  • \(\phi\): hyperparameters (e.g., GP kernel length-scale and magnitude, or variance components in a hierarchical model),
  • +
  • \(\theta\): latent Gaussian variables (the high-dimensional quantity to be marginalized out),
  • +
  • \(y\): observed data.
  • +
+

These components are related through a hierarchical structure. The hyperparameters \(\phi\) are given a prior \(p(\phi)\). The latent variables \(\theta\) have a multivariate normal prior centered at 0 with covariance matrix \(K(\phi)\). An non-zero mean offset can be incorporated into the likelihood function. The observations \(y\) have a data model \(p(y \mid \theta, \phi)\). The prior on \(\theta\) is centered at zero; an offset can be incorporated into the data model if a non-zero mean is needed.

+

\[\begin{eqnarray*} + \phi & \sim & p(\phi) \\ + \theta & \sim & \text{Multi-Normal}(0, K(\phi)) \\ + y & \sim & p(y \mid \theta, \phi). +\end{eqnarray*}\]

+

The generative model above defines a joint distribution over all three quantities, \(p(\phi, \theta, y) = p(\phi) \, p(\theta \mid \phi) \, p(y \mid \theta, \phi)\). After observing data \(y\), Bayes’ theorem gives the joint posterior \(p(\phi, \theta \mid y) \propto p(\phi) \, p(\theta \mid \phi) \, p(y \mid \theta, \phi)\), where \(p(y \mid \theta, \phi)\) as function of \(\theta\) and \(\phi\) is the joint likelihood function.

+

Sampling directly from the joint posterior \(p(\phi, \theta \mid y)\) of this model is often difficult. Challenging geometries (e.g., funnels) frustrate inference algorithms, including Hamiltonian Monte Carlo and variational inference. However, the marginal posterior \(p(\phi \mid y)\) is often well-behaved and low-dimensional, making it much easier to sample. With an embedded Laplace approximation, we can obtain an approximation of the marginal posterior \(p(\phi \mid y)\). This is done via an intermediate approximation of the conditional posterior \(p(\theta \mid \phi, y)\) by a normal distribution and this normal approximation is well-justified when the likelihood \(p(y \mid \theta, \phi)\) as function of \(\theta\) is log concave (given that we already have a normal prior \(p(\theta \mid \phi)\)). Once we obtain (approximate) samples \(\phi^{(i)} \sim p(\phi \mid y)\), we can in turn generate posterior draws for \(\theta\) using the normal approximation to \(p(\theta \mid y, \phi)\), evaluated at the posterior draws \(\phi^{(i)}\).

+
+
+

Approximation of the conditional posterior and marginal likelihood

+

The two-step inference strategy for using embedded Laplace in a latent Gaussian model requires approximations to both the conditional posterior \(p(\theta \mid y, \phi)\) and the marginal likelihood \(p(y \mid \phi)\). The Laplace approximation is the normal distribution that matches the mode and curvature of the conditional posterior \(p(\theta \mid y, \phi)\). The mode, defined as the value of \(\theta\) that maximizes the conditional posterior, is estimated by a Newton solver, \[ + \theta^* = \underset{\theta}{\text{argmax}} \ p(\theta \mid y, \phi), +\]

+

Since the approximation is normal, the curvature is matched by setting the covariance to the negative Hessian of the log conditional posterior, evaluated at the mode,

+

\[ + \Sigma^* = - \left . \frac{\partial^2}{\partial \theta^2} + \log p (\theta \mid \phi, y) \right |_{\theta =\theta^*}. +\]

+

The resulting Laplace approximation is a multivariate normal centered at the mode with covariance given by the inverse curvature,

+

\[ +\hat p_\mathcal{L} (\theta \mid y, \phi) = \text{Multi-Normal}(\theta^*, \Sigma^*) +\approx p(\theta \mid y, \phi). +\]

+

This approximation also yields an approximation to the marginal likelihood, obtained by evaluating the prior, likelihood, and approximate posterior at the mode \(\theta^*\),

+

\[ + \hat p_\mathcal{L}(y \mid \phi) := \frac{p(\theta^* \mid \phi) \ + p(y \mid \theta^*, \phi) }{ \hat p_\mathcal{L} (\theta^* \mid \phi, y) } + \approx p(y \mid \phi). +\]

+

Hence, a strategy to approximate the posterior of the latent Gaussian model is to first estimate the marginal posterior \(\hat p_\mathcal{L}(\phi \mid y) \propto p(\phi) p_\mathcal{L} (y \mid \phi)\) using any algorithm supported by Stan. Approximate posterior draws for the latent Gaussian variables are then obtained by first sampling \(\phi \sim \hat p_\mathcal{L}(\phi \mid y)\) and then \(\theta \sim \hat p_\mathcal{L}(\theta \mid \phi, y)\).

+
+
+

Trade-offs of the approximation

+

The embedded Laplace approximation presents several trade-offs with standard inference over the joint posterior \(p(\theta, \phi \mid y)\). The main advantage of the embedded Laplace approximation is that it side-steps the intricate geometry of hierarchical models. The marginal posterior \(p(\phi \mid y)\) can then be handled by Hamiltonian Monte Carlo sampling without extensive tuning or reparameterization, and the mixing time is faster, meaning we can run shorter chains to achieve a desired precision. One additional benefit is that approximate methods, e.g. variational inference, which work poorly on the joint \(p(\theta, \phi \mid y)\) can work well on the marginal posterior \(p(\phi \mid y)\).

+

On the other hand, the embedded Laplace approximation presents certain disadvantages. First, we need to perform a Laplace approximation each time the log marginal likelihood is evaluated, meaning each iteration can be expensive. Secondly, the approximation can introduce non-negligible error, especially with non-log-concave likelihoods (note the prior is always multivariate normal). How these trade-offs are resolved depends on the application; see Margossian et al. (2020) for some examples.

+
+

When the approximation is appropriate

+

The quality of the Laplace approximation depends on how close the true conditional posterior \(p(\theta \mid y, \phi)\) is to Gaussian.

+

Works well. Log-concave likelihoods, for example from a Poisson model with log link or negative binomial with log link. These produce unimodal conditional posteriors when combined with a Gaussian prior. The approximation error is typically negligible with these likelihoods, especially with moderate-to-large counts (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). If the likelihood is normal, there is no error in the approximation but in this case the marginalization can be worked analytically and the resulting implementation is much faster than using the embedded Laplace approximation.

+

Works adequately. Bernoulli model with logit link has technically log-concave likelihood, but the likelihood can be very skewed making the Gaussian approximation less accurate than for count data. The embedded Laplace is still useful when \(\theta\) is high-dimensional and joint sampling is infeasible; see Vehtari et al. (2016) and Margossian et al. (2020) for discussion.

+

Not appropriate. For likelihoods that are not log-concave in \(\theta\), the conditional posterior may be multimodal and the Newton solver finds only a single mode or can fail completely. When \(\theta\) is low-dimensional (a few dozen or fewer), the overhead of the inner optimization may not pay for itself and standard joint HMC sampling is often adequate.

+
+
+
+

Details of the approximation

+

When the embedded Laplace approximation does not converge or produces unexpected results, the solver configuration may need adjustment. This section describes the internals of the Newton solver and the options available for tuning it.

+
+

Tuning the Newton solver

+

A critical component of the embedded Laplace approximation is the Newton solver used to estimate the mode \(\theta^*\) of \(p(\theta \mid \phi, y)\). The objective function being maximized is the log joint density of the prior and likelihood with respect to \(\theta\).

+

\[ +\Psi(\theta) = \log p(\theta \mid \phi) + \log p(y \mid \theta, \phi), +\]

+

Convergence is declared when the change in the objective between successive iterations falls below a tolerance \(\Delta\).

+

\[ +| \Psi (\theta^{(i + 1)}) - \Psi (\theta^{(i)}) | \le \Delta. +\]

+

The solver also stops after reaching a pre-specified maximum number of steps. In that case, Stan throws a warning, but still returns the last iteration’s parameters. If you see this warning you should check the diagnostics to understand why the solver failed to converge.

+

To help with cases where the Newton step does not lead to a decrease in the objective function, the Newton iteration is augmented with a wolfe line-search to ensure that at each iteration the objective function \(\Psi\) decreases. Specifically, suppose the objective increases after a Newton step, indicating the step overshot a region of improvement,

+

\[ +\Psi (\theta^{(i + 1)}) < \Psi (\theta^{(i)}). +\]

+

This can indicate that the Newton step \(\alpha\) at iteration \(i\) is too large and that we skipped a region where the objective function decreases. In that case, we can fallback to a Wolfe line search to find a step size which satisfies the Wolfe conditions. The wolfe line search attempts to find a search direction \(p_i\) and step size \(\alpha_k\) such that an accepted step both increases our objective while ensuring the slope of the accepted step is flatter than our previous position. Together these help push the algorithm towards a minimum.

+

\[ +f(x_i + \alpha_k p_i) \le f(x_i) + c_1 \alpha_k \nabla f(x_i)^T p_i +-p^T_i \Delta f(x_i + \alpha_k p_i) \le -c_2 p^T_i \Delta f(x_i) +\]

+

\[ + \theta^{(i + 1)} \leftarrow \frac{\theta^{(i + 1)} + \theta^{(i)}}{2}. +\]

+

We repeat this halving of steps until \(\Psi (\theta^{(i + 1)}) \ge \Psi (\theta^{(i)})\), or until a maximum number of linesearch steps is reached. For certain problems, adding a linesearch can make the optimization more stable.

+
+
+

Solver Strategies

+

The embedded Laplace approximation uses a custom Newton solver, specialized to find the mode of \(p(\theta \mid \phi, y)\). A key step for efficient optimization is to ensure all matrix inversions are numerically stable. This can be done using the Woodbury-Sherman-Morrison formula and requires one of three matrix decompositions:

+
    +
  1. Cholesky decomposition of the Hessian of the negative log likelihood \(W = - \partial^2_\theta \log p(y \mid \theta, \phi)\).

  2. +
  3. Cholesky decomposition of the prior covariance matrix \(K(\phi)\).

  4. +
  5. LU-decomposition of \(I + KW\), where \(I\) is the identity matrix.

  6. +
+

The first solver (1) should be used if the negative log likelihood is positive-definite. Otherwise the user should rely on (2). In rarer cases where it is not numerically safe to invert the covariance matrix \(K\), users can use the third solver as a last-resort option.

+
+
+

Sparse Hessian of the log likelihood

+

A key step to speed up computation is to take advantage of the sparsity of \(H\), the Hessian of the log likelihood with respect to the latent variables, \[ + H = \frac{\partial^2}{\partial \theta^2} \log p(y \mid \theta, \phi). +\] For example, if the observations \((y_1, \cdots, y_N)\) are conditionally independent and each depends on only one component of \(\theta\), the log likelihood decomposes into a sum of per-observation terms, \[ + \log p(y \mid \theta, \phi) = \sum_{i = 1}^N \log p(y_i \mid \theta_i, \phi), +\] and the Hessian is diagonal. This leads to faster calculations of the Hessian and subsequently sparse matrix operations. This case is common in Gaussian process models, and certain hierarchical models.

+

Stan’s suite of functions for the embedded Laplace approximation exploits block-diagonal structure in the Hessian, where the user specifies the block size B. The user can specify the size \(B\) of these blocks. The user is responsible for working out what \(B\) is. If the Hessian is dense, then we simply set \(B = N\). The diagonal case above corresponds to B = 1. Arbitrary sparsity patterns beyond block-diagonal structure are not currently supported.

+ + + +
+
+
+ + Back to top

References

+
+Cseke, Botond, and Tom Heskes. 2011. “Approximate Marginals in Latent Gaussian Models.” Journal of Machine Learning Research 12. +
+
+Kuss, Malte, and Carl E Rasmussen. 2005. “Assessing Approximate Inference for Binary Gaussian Process Classification.” Journal of Machine Learning Research 6: 1679–1704. +
+
+Margossian, Charles C. 2023. “General Adjoint-Differentiated Laplace Approximation.” arXiv:2306.14976. +
+
+Margossian, Charles C, Aki Vehtari, Daniel Simpson, and Raj Agrawal. 2020. “Hamiltonian Monte Carlo Using an Adjoint-Differentiated Laplace Approximation: Bayesian Inference for Latent Gaussian Models and Beyond.” Advances in Neural Information Processing Systems 34. +
+
+Vanhatalo, Jarno, Ville Pietiläinen, and Aki Vehtari. 2010. “Approximate Inference for Disease Mapping with Sparse Gaussian Processes.” Statistics in Medicine 29 (15): 1580–1607. +
+
+Vehtari, Aki, Tommi Mononen, Ville Tolvanen, Tuomas Sivula, and Ole Winther. 2016. “Bayesian Leave-One-Out Cross-Validation Approximations for Gaussian Latent Variable Models.” Journal of Machine Learning Research 17 (103): 1–38. http://jmlr.org/papers/v17/14-540.html. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/licenses.html b/docs/2_39/reference-manual/licenses.html new file mode 100644 index 000000000..0e58ea5e0 --- /dev/null +++ b/docs/2_39/reference-manual/licenses.html @@ -0,0 +1,1101 @@ + + + + + + + + + +Licenses and Dependencies + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Licenses and Dependencies

+

Stan and its dependent libraries, are distributed under generous, freedom-respecting licenses approved by the Open Source Initiative.

+

In particular, the licenses for Stan and its dependent libraries have no “copyleft” provisions requiring applications of Stan to be open source if they are redistributed.

+

This chapter specifies the licenses for the libraries on which Stan’s math library, language, and algorithms depend. The last tool mentioned, Google Test, is only used for testing and is not needed to run Stan.

+
+

Stan license

+

Stan is distributed under

+ +

The copyright holder of each contribution is the developer or his or her assignee.1

+
+
+

Boost license

+

Stan uses the Boost library for template metaprograms, traits programs, the parser, and various numerical libraries for special functions, probability functions, and random number generators. Boost is distributed under the

+ +

The copyright for each Boost package is held by its developers or their assignees.

+
+
+

Eigen license

+

Stan uses the Eigen library for matrix arithmetic and linear algebra. Eigen is distributed under the

+ +

The copyright of Eigen is owned jointly by its developers or their assignees.

+
+
+

SUNDIALS license

+

Stan uses the SUNDIALS package for solving differential equations. SUNDIALS is distributed under the

+ +

The copyright of SUNDIALS is owned by Lawrence Livermore National Security Lab.

+
+
+

Threaded Building Blocks (TBB) License

+

Stan uses the Threaded Building Blocks (TBB) library for parallel computations. TBB is distributed under the

+ +

The copyright of TBB is owned by Intel Corporation.

+
+
+

Google test license

+

Stan uses Google Test for unit testing; it is not required to compile or execute models. Google Test is distributed under the

+ +

The copyright of Google Test is owned by Google, Inc.

+ + +
+
+ + + Back to top

Footnotes

+ +
    +
  1. Universities or companies often own the copyright of computer programs developed by their employees.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/mcmc.html b/docs/2_39/reference-manual/mcmc.html new file mode 100644 index 000000000..0c9a17252 --- /dev/null +++ b/docs/2_39/reference-manual/mcmc.html @@ -0,0 +1,1551 @@ + + + + + + + + + +MCMC Sampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

MCMC Sampling

+

This chapter presents the two Markov chain Monte Carlo (MCMC) algorithms used in Stan, the Hamiltonian Monte Carlo (HMC) algorithm and its adaptive variant the no-U-turn sampler (NUTS), along with details of their implementation and configuration.

+
+

Hamiltonian Monte Carlo

+

Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) method that uses the derivatives of the density function being sampled to generate efficient transitions spanning the posterior (see, e.g., Betancourt and Girolami (2013), Neal (2011) for more details). It uses an approximate Hamiltonian dynamics simulation based on numerical integration which is then corrected by performing a Metropolis acceptance step.

+

This section translates the presentation of HMC by Betancourt and Girolami (2013) into the notation of Gelman et al. (2013).

+
+

Target density

+

The goal of sampling is to draw from a density \(p(\theta)\) for parameters \(\theta\). This is typically a Bayesian posterior \(p(\theta|y)\) given data \(y\), and in particular, a Bayesian posterior coded as a Stan program.

+
+
+

Auxiliary momentum variable

+

HMC introduces auxiliary momentum variables \(\rho\) and draws from a joint density

+

\[ +p(\rho, \theta) = p(\rho | \theta) p(\theta). +\]

+

In most applications of HMC, including Stan, the auxiliary density is a multivariate normal that does not depend on the parameters \(\theta\),

+

\[ +\rho \sim \mathsf{MultiNormal}(0, M). +\]

+

\(M\) is the Euclidean metric. It can be seen as a transform of parameter space that makes sampling more efficient; see Betancourt (2017) for details.

+

By default Stan sets \(M^{-1}\) equal to a diagonal estimate of the covariance computed during warmup.

+
+
+

The Hamiltonian

+

The joint density \(p(\rho, \theta)\) defines a Hamiltonian

+

\[ +\begin{array}{rcl} +H(\rho, \theta) & = & - \log p(\rho, \theta) +\\[3pt] +& = & - \log p(\rho | \theta) - \log p(\theta). +\\[3pt] +& = & T(\rho | \theta) + V(\theta), +\end{array} +\]

+

where the term

+

\[ +T(\rho | \theta) = - \log p(\rho | \theta) +\]

+

is called the “kinetic energy” and the term

+

\[ +V(\theta) = - \log p(\theta) +\]

+

is called the “potential energy.” The potential energy is specified by the Stan program through its definition of a log density.

+
+
+

Generating transitions

+

Starting from the current value of the parameters \(\theta\), a transition to a new state is generated in two stages before being subjected to a Metropolis accept step.

+

First, a value for the momentum is drawn independently of the current parameter values,

+

\[ +\rho \sim \mathsf{MultiNormal}(0, M). +\]

+

Thus momentum does not persist across iterations.

+

Next, the joint system \((\theta,\rho)\) made up of the current parameter values \(\theta\) and new momentum \(\rho\) is evolved via Hamilton’s equations,

+

\[ +\begin{array}{rcccl} +\displaystyle +\frac{d\theta}{dt} +& = & +\displaystyle ++ \frac{\partial H}{\partial \rho} +& = & +\displaystyle ++ \frac{\partial T}{\partial \rho} +\\[12pt] +\displaystyle +\frac{d\rho}{dt} +& = & +\displaystyle +- \frac{\partial H}{\partial \theta } +& = & +\displaystyle +- \frac{\partial T}{\partial \theta} +- \frac{\partial V}{\partial \theta}. +\end{array} +\]

+

With the momentum density being independent of the target density, i.e., \(p(\rho | \theta) = p(\rho)\), the first term in the momentum time derivative, \({\partial T} / {\partial \theta}\) is zero, yielding the pair time derivatives

+

\[ +\begin{array}{rcl} +\frac{d \theta}{d t} & = & +\frac{\partial T}{\partial \rho} +\\[2pt] +\frac{d \rho}{d t} & = & -\frac{\partial V}{\partial \theta}. +\end{array} +\]

+
+
+

Leapfrog integrator

+

The last section leaves a two-state differential equation to solve. Stan, like most other HMC implementations, uses the leapfrog integrator, which is a numerical integration algorithm that’s specifically adapted to provide stable results for Hamiltonian systems of equations.

+

Like most numerical integrators, the leapfrog algorithm takes discrete steps of some small time interval \(\epsilon\). The leapfrog algorithm begins by drawing a fresh momentum term independently of the parameter values \(\theta\) or previous momentum value.

+

\[ +\rho \sim \mathsf{MultiNormal}(0, M). +\] It then alternates half-step updates of the momentum and full-step updates of the position.

+

\[ +\begin{array}{rcl} +\rho & \leftarrow + & \rho \, - \, \frac{\epsilon}{2} \frac{\partial V}{\partial \theta} +\\[6pt] +\theta & \leftarrow + & \theta \, + \, \epsilon \, M^{-1} \, \rho +\\[6pt] +\rho & \leftarrow + & \rho \, - \, \frac{\epsilon}{2} \frac{\partial V}{\partial \theta}. +\end{array} +\]

+

By applying \(L\) leapfrog steps, a total of \(L \, \epsilon\) time is simulated. The resulting state at the end of the simulation (\(L\) repetitions of the above three steps) will be denoted \((\rho^{*}, \theta^{*})\).

+

The leapfrog integrator’s error is on the order of \(\epsilon^3\) per step and \(\epsilon^2\) globally, where \(\epsilon\) is the time interval (also known as the step size); Leimkuhler and Reich (2004) provide a detailed analysis of numerical integration for Hamiltonian systems, including a derivation of the error bound for the leapfrog integrator.

+
+
+

Metropolis accept step

+

If the leapfrog integrator were perfect numerically, there would no need to do any more randomization per transition than generating a random momentum vector. Instead, what is done in practice to account for numerical errors during integration is to apply a Metropolis acceptance step, where the probability of keeping the proposal \((\rho^{*}, \theta^{*})\) generated by transitioning from \((\rho, \theta)\) is

+

\[ +\min \! +\left( +1, +\ \exp \! \left( H(\rho, \theta) - H(\rho^{*}, \theta^{*}) \right) +\right). +\]

+

If the proposal is not accepted, the previous parameter value is returned for the next draw and used to initialize the next iteration.

+
+
+

Algorithm summary

+

The Hamiltonian Monte Carlo algorithm starts at a specified initial set of parameters \(\theta\); in Stan, this value is either user-specified or generated randomly. Then, for a given number of iterations, a new momentum vector is sampled and the current value of the parameter \(\theta\) is updated using the leapfrog integrator with discretization time \(\epsilon\) and number of steps \(L\) according to the Hamiltonian dynamics. Then a Metropolis acceptance step is applied, and a decision is made whether to update to the new state \((\theta^{*}, \rho^{*})\) or keep the existing state.

+
+
+
+

HMC algorithm parameters

+

The Hamiltonian Monte Carlo algorithm has three parameters which must be set,

+
    +
  • discretization time \(\epsilon\),
  • +
  • metric \(M\), and
  • +
  • number of steps taken \(L\).
  • +
+

In practice, sampling efficiency, both in terms of iteration speed and iterations per effective sample, is highly sensitive to these three tuning parameters Neal (2011), Hoffman and Gelman (2014).

+

If \(\epsilon\) is too large, the leapfrog integrator will be inaccurate and too many proposals will be rejected. If \(\epsilon\) is too small, too many small steps will be taken by the leapfrog integrator leading to long simulation times per interval. Thus the goal is to balance the acceptance rate between these extremes.

+

If \(L\) is too small, the trajectory traced out in each iteration will be too short and sampling will devolve to a random walk. If \(L\) is too large, the algorithm will do too much work on each iteration.

+

If the inverse metric \(M^{-1}\) is a poor estimate of the posterior covariance, the step size \(\epsilon\) must be kept small to maintain arithmetic precision. This would lead to a large \(L\) to compensate.

+
+

Integration time

+

The actual integration time is \(L \, \epsilon\), a function of number of steps. Some interfaces to Stan set an approximate integration time \(t\) and the discretization interval (step size) \(\epsilon\). In these cases, the number of steps will be rounded down as

+

\[ +L = \left\lfloor \frac{t}{\epsilon} \right\rfloor. +\]

+

and the actual integration time will still be \(L \, \epsilon\).

+
+
+

Automatic parameter tuning

+

Stan is able to automatically optimize \(\epsilon\) to match an acceptance-rate target, able to estimate \(M\) based on warmup sample iterations, and able to dynamically adapt \(L\) on the fly during sampling (and during warmup) using the no-U-turn sampling (NUTS) algorithm Hoffman and Gelman (2014).

+

Warmup Epochs Figure. Adaptation during warmup occurs in three stages: an initial fast adaptation interval (I), a series of expanding slow adaptation intervals (II), and a final fast adaptation interval (III). For HMC, both the fast and slow intervals are used for adapting the step size, while the slow intervals are used for learning the (co)variance necessitated by the metric. Iteration numbering starts at 1 on the left side of the figure and increases to the right.

+

+

When adaptation is engaged (it may be turned off by fixing a step size and metric), the warmup period is split into three stages, as illustrated in the warmup adaptation figure, with two fast intervals surrounding a series of growing slow intervals. Here fast and slow refer to parameters that adapt using local and global information, respectively; the Hamiltonian Monte Carlo samplers, for example, define the step size as a fast parameter and the (co)variance as a slow parameter. The size of the the initial and final fast intervals and the initial size of the slow interval are all customizable, although user-specified values may be modified slightly in order to ensure alignment with the warmup period.

+

The motivation behind this partitioning of the warmup period is to allow for more robust adaptation. The stages are as follows.

+
    +
  1. In the initial fast interval the chain is allowed to converge towards the typical set,1 with only parameters that can learn from local information adapted.

  2. +
  3. After this initial stage parameters that require global information, for example (co)variances, are estimated in a series of expanding, memoryless windows; often fast parameters will be adapted here as well.

  4. +
  5. Lastly, the fast parameters are allowed to adapt to the final update of the slow parameters.

  6. +
+

These intervals may be controlled through the following configuration parameters, all of which must be positive integers:

+

Adaptation Parameters Table. The parameters controlling adaptation and their default values.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +
parameterdescriptiondefault
initial bufferwidth of initial fast adaptation interval75
term bufferwidth of final fast adaptation interval50
windowinitial width of slow adaptation interval25
+
+
+

Discretization-interval adaptation parameters

+

Stan’s HMC algorithms utilize dual averaging Nesterov (2009) to optimize the step size.2

+

This warmup optimization procedure is extremely flexible and for completeness, Stan exposes each tuning option for dual averaging, using the notation of Hoffman and Gelman (2014). In practice, the efficacy of the optimization is sensitive to the value of these parameters, but we do not recommend changing the defaults without experience with the dual-averaging algorithm. For more information, see the discussion of dual averaging in Hoffman and Gelman (2014).

+

The full set of dual-averaging parameters are:

+

Step Size Adaptation Parameters Table The parameters controlling step size adaptation, with constraints and default values.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
parameterdescriptionconstraintdefault
deltatarget Metropolis acceptance rate[0, 1]0.8
gammaadaptation regularization scale(0, infty)0.05
kappaadaptation relaxation exponent(0, infty)0.75
t_0adaptation iteration offset(0, infty)10
+

By setting the target acceptance parameter \(\delta\) to a value closer to 1 (its value must be strictly less than 1 and its default value is 0.8), adaptation will be forced to use smaller step sizes. This can improve sampling efficiency (effective sample size per iteration) at the cost of increased iteration times. Raising the value of \(\delta\) will also allow some models that would otherwise get stuck to overcome their blockages.

+
+
+

Step-size jitter

+

All implementations of HMC use numerical integrators requiring a step size (equivalently, discretization time interval). Stan allows the step size to be adapted or set explicitly. Stan also allows the step size to be “jittered” randomly during sampling to avoid any poor interactions with a fixed step size and regions of high curvature. The jitter is a proportion that may be added or subtracted, so the maximum amount of jitter is 1, which will cause step sizes to be selected in the range of 0 to twice the adapted step size. The default value is 0, producing no jitter.

+

Small step sizes can get HMC samplers unstuck that would otherwise get stuck with higher step sizes. The downside is that jittering below the adapted value will increase the number of leapfrog steps required and thus slow down iterations, whereas jittering above the adapted value can cause premature rejection due to simulation error in the Hamiltonian dynamics calculation. See Neal (2011) for further discussion of step-size jittering.

+
+
+

Euclidean metric

+

All HMC implementations in Stan utilize quadratic kinetic energy functions which are specified up to the choice of a symmetric, positive-definite matrix known as a mass matrix or, more formally, a metric Betancourt (2017).

+

If the metric is constant then the resulting implementation is known as Euclidean HMC. Stan allows a choice among three Euclidean HMC implementations,

+
    +
  • a unit metric (diagonal matrix of ones),
  • +
  • a diagonal metric (diagonal matrix with positive diagonal entries), and
  • +
  • a dense metric (a dense, symmetric positive definite matrix)
  • +
+

to be configured by the user.

+

If the metric is specified to be diagonal, then regularized variances are estimated based on the iterations in each slow-stage block (labeled II in the warmup adaptation stages figure). Each of these estimates is based only on the iterations in that block. This allows early estimates to be used to help guide warmup and then be forgotten later so that they do not influence the final covariance estimate.

+

If the metric is specified to be dense, then regularized covariance estimates will be carried out, regularizing the estimate to a diagonal matrix, which is itself regularized toward a unit matrix.

+

Variances or covariances are estimated using Welford accumulators to avoid a loss of precision over many floating point operations.

+
+

Warmup times and estimating the metric

+

The metric can compensate for linear (i.e. global) correlations in the posterior which can dramatically improve the performance of HMC in some problems. This requires knowing the global correlations.

+

In complex models, the global correlations are usually difficult, if not impossible, to derive analytically; for example, nonlinear model components convolve the scales of the data, so standardizing the data does not always help. Therefore, Stan estimates these correlations online with an adaptive warmup. In models with strong nonlinear (i.e. local) correlations this learning can be slow, even with regularization. This is ultimately why warmup in Stan often needs to be so long, and why a sufficiently long warmup can yield such substantial performance improvements.

+
+
+

Nonlinearity

+

The metric compensates for only linear (equivalently global or position-independent) correlations in the posterior. The hierarchical parameterizations, on the other hand, affect some of the nasty nonlinear (equivalently local or position-dependent) correlations common in hierarchical models.3

+

One of the biggest difficulties with dense metrics is the estimation of the metric itself which introduces a bit of a chicken-and-egg scenario; in order to estimate an appropriate metric for sampling, convergence is required, and in order to converge, an appropriate metric is required.

+
+
+

Dense vs. diagonal metrics

+

Statistical models for which sampling is problematic are not typically dominated by linear correlations for which a dense metric can adjust. Rather, they are governed by more complex nonlinear correlations that are best tackled with better parameterizations or more advanced algorithms, such as Riemannian HMC.

+
+
+

Warmup times and curvature

+

MCMC convergence time is roughly equivalent to the autocorrelation time. Because HMC (and NUTS) chains tend to be lowly autocorrelated they also tend to converge quite rapidly.

+

This only applies when there is uniformity of curvature across the posterior, an assumption which is violated in many complex models. Quite often, the tails have large curvature while the bulk of the posterior mass is relatively well-behaved; in other words, warmup is slow not because the actual convergence time is slow but rather because the cost of an HMC iteration is more expensive out in the tails.

+

Poor behavior in the tails is the kind of pathology that can be uncovered by running only a few warmup iterations. By looking at the acceptance probabilities and step sizes of the first few iterations provides an idea of how bad the problem is and whether it must be addressed with modeling efforts such as tighter priors or reparameterizations.

+
+
+
+

NUTS and its configuration

+

The no-U-turn sampler (NUTS) automatically selects an appropriate number of leapfrog steps in each iteration in order to allow the proposals to traverse the posterior without doing unnecessary work. The motivation is to maximize the expected squared jump distance (see, e.g., Roberts, Gelman, and Gilks (1997)) at each step and avoid the random-walk behavior that arises in random-walk Metropolis or Gibbs samplers when there is correlation in the posterior. For a precise definition of the NUTS algorithm and a proof of detailed balance, see Hoffman and Gelman (2014).

+

NUTS generates a proposal by starting at an initial position determined by the parameters drawn in the last iteration. It then generates an independent standard normal random momentum vector. It then evolves the initial system both forwards and backwards in time to form a balanced binary tree. At each iteration of the NUTS algorithm the tree depth is increased by one, doubling the number of leapfrog steps and effectively doubles the computation time. The algorithm terminates in one of two ways, either

+
    +
  • the NUTS criterion (i.e., a U-turn in Euclidean space on a subtree) is satisfied for a new subtree or the completed tree, or
  • +
  • the depth of the completed tree hits the maximum depth allowed.
  • +
+

Rather than using a standard Metropolis step, the final parameter value is selected via multinomial sampling with a bias toward the second half of the steps in the trajectory Betancourt (2016b).4

+

Configuring the no-U-turn sample involves putting a cap on the depth of the trees that it evaluates during each iteration. This is controlled through a maximum depth parameter. The number of leapfrog steps taken is then bounded by 2 to the power of the maximum depth minus 1.

+

Both the tree depth and the actual number of leapfrog steps computed are reported along with the parameters in the output as treedepth__ and n_leapfrog__, respectively. Because the final subtree may only be partially constructed, these two will always satisfy

+

\[ +2^{\mathrm{treedepth} - 1} - 1 +\ < \ +N_{\mathrm{leapfrog}} +\ \le \ +2^{\mathrm{treedepth} } - 1. +\]

+

Tree depth is an important diagnostic tool for NUTS. For example, a tree depth of zero occurs when the first leapfrog step is immediately rejected and the initial state returned, indicating extreme curvature and poorly-chosen step size (at least relative to the current position). On the other hand, a tree depth equal to the maximum depth indicates that NUTS is taking many leapfrog steps and being terminated prematurely to avoid excessively long execution time. Taking very many steps may be a sign of poor adaptation, may be due to targeting a very high acceptance rate, or may simply indicate a difficult posterior from which to sample. In the latter case, reparameterization may help with efficiency. But in the rare cases where the model is correctly specified and a large number of steps is necessary, the maximum depth should be increased to ensure that that the NUTS tree can grow as large as necessary.

+
+
+
+

Sampling without parameters

+

In some situations, such as pure forward data simulation in a directed graphical model (e.g., where you can work down generatively from known hyperpriors to simulate parameters and data), there is no need to declare any parameters in Stan, the model block will be empty (and thus can be omitted), and all output quantities will be produced in the generated quantities block.

+

For example, to generate a sequence of \(N\) draws from a binomial with trials \(K\) and chance of success \(\theta\), the following program suffices.

+
data {
+  real<lower=0, upper=1> theta;
+  int<lower=0> K;
+  int<lower=0> N;
+}
+generated quantities {
+  array[N] int<lower=0, upper=K> y;
+  for (n in 1:N) {
+    y[n] = binomial_rng(K, theta);
+  }
+}
+

For this model, the sampler must be configured to use the fixed-parameters setting because there are no parameters. Without parameter sampling there is no need for adaptation and the number of warmup iterations should be set to zero.

+

Most models that are written to be sampled without parameters will not declare any parameters, instead putting anything parameter-like in the data block. Nevertheless, it is possible to include parameters for fixed-parameters sampling and initialize them in any of the usual ways (randomly, fixed to zero on the unconstrained scale, or with user-specified values). For example, theta in the example above could be declared as a parameter and initialized as a parameter.

+
+
+

General configuration options

+

Stan’s interfaces provide a number of configuration options that are shared among the MCMC algorithms (this chapter), the optimization algorithms chapter, and the diagnostics chapter.

+
+

Random number generator

+

The random-number generator’s behavior is fully determined by the unsigned seed (positive integer) it is started with. If a seed is not specified, or a seed of 0 or less is specified, the system time is used to generate a seed. The seed is recorded and included with Stan’s output regardless of whether it was specified or generated randomly from the system time.

+

Stan also allows a chain identifier to be specified, which is useful when running multiple Markov chains for sampling. The chain identifier is used to advance the random number generator a very large number of random variates so that two chains with different identifiers draw from non-overlapping subsequences of the random-number sequence determined by the seed. When running multiple chains from a single command, Stan’s interfaces will manage the chain identifiers.

+
+

Replication

+

Together, the seed and chain identifier determine the behavior of the underlying random number generator. For complete reproducibility, every aspect of the environment needs to be locked down from the OS and version to the C++ compiler and version to the version of Stan and all dependent libraries.

+
+
+
+

Initialization

+

The initial parameter values for Stan’s algorithms (MCMC, optimization, or diagnostic) may be either specified by the user or generated randomly. If user-specified values are provided, all parameters must be given initial values or Stan will abort with an error message.

+
+

User-defined initialization

+

If the user specifies initial values, they must satisfy the constraints declared in the model (i.e., they are on the constrained scale).

+
+
+

System constant zero initialization

+

It is also possible to provide an initialization of 0, which causes all variables to be initialized with zero values on the unconstrained scale. The transforms are arranged in such a way that zero initialization provides reasonable variable initializations for most parameters, such as 0 for unconstrained parameters, 1 for parameters constrained to be positive, 0.5 for variables to constrained to lie between 0 and 1, a symmetric (uniform) vector for simplexes, unit matrices for both correlation and covariance matrices, and so on.

+
+
+

System random initialization

+

Random initialization by default initializes the parameter values with values drawn at random from a \(\mathsf{Uniform}(-2, 2)\) distribution. Alternatively, a value other than 2 may be specified for the absolute bounds. These values are on the unconstrained scale, so must be inverse transformed back to satisfy the constraints declared for parameters.

+

Because zero is chosen to be a reasonable default initial value for most parameters, the interval around zero provides a fairly diffuse starting point. For instance, unconstrained variables are initialized randomly in \((-2, 2)\), variables constrained to be positive are initialized roughly in \((0.14, 7.4)\), variables constrained to fall between 0 and 1 are initialized with values roughly in \((0.12, 0.88)\).

+
+
+
+
+

Divergent transitions

+

The Hamiltonian Monte Carlo algorithms (HMC and NUTS) simulate the trajectory of a fictitious particle representing parameter values when subject to a potential energy field, the value of which at a point is the negative log posterior density (up to a constant that does not depend on location). Random momentum is imparted independently in each direction, by drawing from a standard normal distribution. The Hamiltonian is defined to be the sum of the potential energy and kinetic energy of the system. The key feature of the Hamiltonian is that it is conserved along the trajectory the particle moves.

+

In Stan, we use the leapfrog algorithm to simulate the path of a particle along the trajectory defined by the initial random momentum and the potential energy field. This is done by alternating updates of the position based on the momentum and the momentum based on the position. The momentum updates involve the potential energy and are applied along the gradient. This is essentially a stepwise (discretized) first-order approximation of the trajectory. Leimkuhler and Reich (2004) provide details and error analysis for the leapfrog algorithm.

+

A divergence arises when the simulated Hamiltonian trajectory departs from the true trajectory as measured by departure of the Hamiltonian value from its initial value. When this divergence is too high,5 the simulation has gone off the rails and cannot be trusted. The positions along the simulated trajectory after the Hamiltonian diverges will never be selected as the next draw of the MCMC algorithm, potentially reducing Hamiltonian Monte Carlo to a simple random walk and biasing estimates by not being able to thoroughly explore the posterior distribution. Betancourt (2016a) provides details of the theory, computation, and practical implications of divergent transitions in Hamiltonian Monte Carlo.

+

The Stan interfaces report divergences as warnings and provide ways to access which iterations encountered divergences. ShinyStan provides visualizations that highlight the starting point of divergent transitions to diagnose where the divergences arise in parameter space. A common location is in the neck of the funnel in a centered parameterization, an example of which is provided in the user’s guide.

+

If the posterior is highly curved, very small step sizes are required for this gradient-based simulation of the Hamiltonian to be accurate. When the step size is too large (relative to the curvature), the simulation diverges from the true Hamiltonian. This definition is imprecise in the same way that stiffness for a differential equation is imprecise; both are defined by the way they cause traditional stepwise algorithms to diverge from where they should be.

+

The primary cause of divergent transitions in Euclidean HMC (other than bugs in the code) is highly varying posterior curvature, for which small step sizes are too inefficient in some regions and diverge in other regions. If the step size is too small, the sampler becomes inefficient and halts before making a U-turn (hits the maximum tree depth in NUTS); if the step size is too large, the Hamiltonian simulation diverges.

+
+

Diagnosing and eliminating divergences

+

In some cases, simply lowering the initial step size and increasing the target acceptance rate will keep the step size small enough that sampling can proceed. In other cases, a reparameterization is required so that the posterior curvature is more manageable; see the funnel example in the user’s guide for an example.

+

Before reparameterization, it may be helpful to plot the posterior draws, highlighting the divergent transitions to see where they arise. This is marked as a divergent transition in the interfaces; for example, ShinyStan and RStan have special plotting facilities to highlight where divergent transitions arise.

+ + + +
+
+
+ + + Back to top

References

+
+Betancourt, Michael. 2016a. “Diagnosing Suboptimal Cotangent Disintegrations in Hamiltonian Monte Carlo.” arXiv 1604.00695. https://arxiv.org/abs/1604.00695. +
+
+———. 2016b. “Identifying the Optimal Integration Time in Hamiltonian Monte Carlo.” arXiv 1601.00225. https://arxiv.org/abs/1601.00225. +
+
+———. 2017. “A Conceptual Introduction to Hamiltonian Monte Carlo.” arXiv 1701.02434. https://arxiv.org/abs/1701.02434. +
+
+Betancourt, Michael, and Mark Girolami. 2013. Hamiltonian Monte Carlo for Hierarchical Models.” arXiv 1312.0906. http://arxiv.org/abs/1312.0906. +
+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Hoffman, Matthew D., and Andrew Gelman. 2014. The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.” Journal of Machine Learning Research 15: 1593–623. http://jmlr.org/papers/v15/hoffman14a.html. +
+
+Leimkuhler, Benedict, and Sebastian Reich. 2004. Simulating Hamiltonian Dynamics. Cambridge: Cambridge University Press. +
+
+Neal, Radford. 2011. MCMC Using Hamiltonian Dynamics.” In Handbook of Markov Chain Monte Carlo, edited by Steve Brooks, Andrew Gelman, Galin L. Jones, and Xiao-Li Meng, 116–62. Chapman; Hall/CRC. +
+
+Nesterov, Y. 2009. “Primal-Dual Subgradient Methods for Convex Problems.” Mathematical Programming 120 (1): 221–59. +
+
+Roberts, G. O., Andrew Gelman, and Walter R. Gilks. 1997. “Weak Convergence and Optimal Scaling of Random Walk Metropolis Algorithms.” Annals of Applied Probability 7 (1): 110–20. +
+

Footnotes

+ +
    +
  1. The typical set is a concept borrowed from information theory and refers to the neighborhood (or neighborhoods in multimodal models) of substantial posterior probability mass through which the Markov chain will travel in equilibrium.↩︎

  2. +
  3. This optimization of step size during adaptation of the sampler should not be confused with running Stan’s optimization method.↩︎

  4. +
  5. In Riemannian HMC the metric compensates for nonlinear correlations.↩︎

  6. +
  7. Stan previously used slice sampling along the trajectory, following the original NUTS paper of Hoffman and Gelman (2014).↩︎

  8. +
  9. The current default threshold is a factor of \(10^3\), whereas when the leapfrog integrator is working properly, the divergences will be around \(10^{-7}\) and do not compound due to the symplectic nature of the leapfrog integrator.↩︎

  10. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/optimization.html b/docs/2_39/reference-manual/optimization.html new file mode 100644 index 000000000..074423a20 --- /dev/null +++ b/docs/2_39/reference-manual/optimization.html @@ -0,0 +1,1213 @@ + + + + + + + + + +Optimization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Optimization

+

Stan provides optimization algorithms which find modes of the density specified by a Stan program. Such modes may be used as parameter estimates or as the basis of approximations to a Bayesian posterior.

+

Stan provides three different optimizers, a Newton optimizer, and two related quasi-Newton algorithms, BFGS and L-BFGS; see Nocedal and Wright (2006) for thorough description and analysis of all of these algorithms. The L-BFGS algorithm is the default optimizer. Newton’s method is the least efficient of the three, but has the advantage of setting its own stepsize.

+
+

General configuration

+

All of the optimizers have the option of including the the log absolute Jacobian determinant of inverse parameter transforms in the log probability computation. If the Jacobian adjustment is not included (the default), the optimization returns parameter values that correspond to a mode of the target in the constrained space (if such mode exists). Thus this option is useful for any optimization where we want to find the mode in the original constrained parameter space. If the Jacobian adjustment is included, the optimization returns parameter values that correspond to a mode in the unconstrained space. This is useful, for example, if we want to make a distributional approximation of the posterior at the mode (see, Laplace sampling, as then Jacobian adjustment needs to be included for correct results.

+

All of the optimizers are iterative and allow the maximum number of iterations to be specified; the default maximum number of iterations is 2000.

+

All of the optimizers are able to stream intermediate output reporting on their progress. Whether or not to save the intermediate iterations and stream progress is configurable.

+
+
+

BFGS and L-BFGS configuration

+
+

Convergence monitoring

+

Convergence monitoring in (L-)BFGS is controlled by a number of tolerance values, any one of which being satisfied causes the algorithm to terminate with a solution. Any of the convergence tests can be disabled by setting its corresponding tolerance parameter to zero. The tests for convergence are as follows.

+
+

Parameter convergence

+

The parameters \(\theta_i\) in iteration \(i\) are considered to have converged with respect to tolerance tol_param if

+

\[ +|| \theta_{i} - \theta_{i-1} || < \mathtt{tol\_param}. +\]

+
+
+

Density convergence

+

The (unnormalized) log density \(\log p(\theta_{i}|y)\) for the parameters \(\theta_i\) in iteration \(i\) given data \(y\) is considered to have converged with respect to tolerance tol_obj if

+

\[ +\left| \log p(\theta_{i}|y) - \log p(\theta_{i-1}|y) \right| < +\mathtt{tol\_obj}. +\]

+

The log density is considered to have converged to within relative tolerance tol_rel_obj if

+

\[ +\frac{\left| \log p(\theta_{i}|y) - \log p(\theta_{i-1}|y) \right|}{\ + \max\left(\left| \log p(\theta_{i}|y)\right|,\left| \log + p(\theta_{i-1}|y)\right|,1.0\right)} +< \mathtt{tol\_rel\_obj} * \epsilon. +\]

+
+
+

Gradient convergence

+

The gradient is considered to have converged to 0 relative to a specified tolerance tol_grad if

+

\[ +|| g_{i} || < \mathtt{tol\_grad}, +\] where \(\nabla_{\theta}\) is the gradient operator with respect to \(\theta\) and \(g_{i} = \nabla_{\theta} \log p(\theta | y)\) is the gradient at iteration \(i\) evaluated at \(\theta^{(i)}\), the value on the \(i\)-th posterior iteration.

+

The gradient is considered to have converged to 0 relative to a specified relative tolerance tol_rel_grad if

+

\[ +\frac{g_{i}^T \hat{H}_{i}^{-1} g_{i} }{ \max\left(\left|\log +p(\theta_{i}|y)\right|,1.0\right) } +\ < \ +\mathtt{tol\_rel\_grad} * \epsilon, +\]

+

where \(\hat{H}_{i}\) is the estimate of the Hessian at iteration \(i\), \(|u|\) is the absolute value (L1 norm) of \(u\), \(||u||\) is the vector length (L2 norm) of \(u\), and \(\epsilon \approx 2e-16\) is machine precision.

+
+
+
+

Initial step size

+

The initial step size parameter \(\alpha\) for BFGS-style optimizers may be specified. If the first iteration takes a long time (and requires a lot of function evaluations) initialize \(\alpha\) to be the roughly equal to the \(\alpha\) used in that first iteration. The default value is intentionally small, 0.001, which is reasonable for many problems but might be too large or too small depending on the objective function and initialization. Being too big or too small just means that the first iteration will take longer (i.e., require more gradient evaluations) before the line search finds a good step length. It’s not a critical parameter, but for optimizing the same model multiple times (as you tweak things or with different data), being able to tune \(\alpha\) can save some real time.

+
+
+

L-BFGS history size

+

L-BFGS has a command-line argument which controls the size of the history it uses to approximate the Hessian. The value should be less than the dimensionality of the parameter space and, in general, relatively small values (5–10) are sufficient; the default value is 5.

+

If L-BFGS performs poorly but BFGS performs well, consider increasing the history size. Increasing history size will increase the memory usage, although this is unlikely to be an issue for typical Stan models.

+
+
+
+

Writing models for optimization

+
+

Constrained vs. unconstrained parameters

+

For constrained optimization problems, for instance, with a standard deviation parameter \(\sigma\) constrained so that \(\sigma > 0\), it can be much more efficient to declare a parameter sigma with no constraints. This allows the optimizer to easily get close to 0 without having to tend toward \(-\infty\) on the \(\log \sigma\) scale.

+

With unconstrained parameterizations of parameters with constrained support, it is important to provide a custom initialization that is within the support. For example, declaring a vector

+
vector[M] sigma;
+

and using the default random initialization which is \(\mathsf{Uniform}(-2, 2)\) on the unconstrained scale means that there is only a \(2^{-M}\) chance that the initialization will be within support.

+

For any given optimization problem, it is probably worthwhile trying the program both ways, with and without the constraint, to see which one is more efficient.

+ + + +
+
+
+ + Back to top

References

+
+Nocedal, Jorge, and Stephen J. Wright. 2006. Numerical Optimization. Second. Berlin: Springer-Verlag. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/pathfinder.html b/docs/2_39/reference-manual/pathfinder.html new file mode 100644 index 000000000..e7ce67d5b --- /dev/null +++ b/docs/2_39/reference-manual/pathfinder.html @@ -0,0 +1,1117 @@ + + + + + + + + + +Pathfinder + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Pathfinder

+

Stan supports the Pathfinder algorithm (Zhang et al. 2022). Pathfinder is a variational method for approximately sampling from differentiable log densities. Starting from a random initialization, Pathfinder locates normal approximations to the target density along a quasi-Newton optimization path, with local covariance estimated using the negative inverse Hessian estimates produced by the LBFGS optimizer. Pathfinder returns draws from the Gaussian approximation with the lowest estimated Kullback-Leibler (KL) divergence to the true posterior.

+

Stan provides two versions of the Pathfinder algorithm: single-path Pathfinder and multi-path Pathfinder. Single-path Pathfinder generates a set of approximate draws from one run of the basic Pathfinder algorithm. Multi-path Pathfinder uses importance resampling over the draws from multiple runs of Pathfinder. This better matches non-normal target densities and also mitigates the problem of L-BFGS getting stuck at local optima or in saddle points on plateaus. Compared to ADVI and short dynamic HMC runs, Pathfinder requires one to two orders of magnitude fewer log density and gradient evaluations, with greater reductions for more challenging posteriors. While the evaluations by Zhang et al. (2022) found that single-path and multi-path Pathfinder outperform ADVI for most of the models in the PosteriorDB (Magnusson et al. 2024) evaluation set, we recognize the need for further experiments on a wider range of models.

+
+

Diagnosing Pathfinder

+

Pathfinder diagnoses the accuracy of the approximation by computing the density ratio of the true posterior and the approximation and using Pareto-\(\hat{k}\) diagnostic (Vehtari et al. 2024) to assess whether these ratios can be used to improve the approximation via resampling. The normalization for the posterior can be estimated reliably (Vehtari et al. 2024, sec. 3), which is the first requirement for reliable resampling. If estimated Pareto-\(\hat{k}\) for the ratios is smaller than 0.7, there is still need to further diagnose reliability of importance sampling estimate for all quantities of interest (Vehtari et al. 2024, sec. 2.2). If estimated Pareto-\(\hat{k}\) is larger than 0.7, then the estimate for the normalization is unreliable and any Monte Carlo estimate may have a big error. The resampled draws can still contain some useful information about the location and shape of the posterior which can be used in early parts of Bayesian workflow (Gelman et al. 2020).

+
+
+

Using Pathfinder for initializing MCMC

+

If estimated Pareto-\(\hat{k}\) for the ratios is smaller than 0.7, the resampled posterior draws are almost as good for initializing MCMC as would independent draws from the posterior be. If estimated Pareto-\(\hat{k}\) for the ratios is larger than 0.7, the Pathfinder draws are not reliable for posterior inference directly, but they are still very likely better for initializing MCMC than random draws from an arbitrary pre-defined distribution (e.g. uniform from -2 to 2 used by Stan by default). If Pareto-\(\hat{k}\) is larger than 0.7, it is likely that one of the ratios is much bigger than others and the default resampling with replacement would produce copies of one unique draw. For initializing several Markov chains, it is better to use resampling without replacement to guarantee unique initialization for each chain. At the moment Stan allows turning off the resampling completely, and then the resampling without replacement can be done outside of Stan.

+ + + +
+
+ + Back to top

References

+
+Gelman, Andrew, Aki Vehtari, Daniel Simpson, Charles C Margossian, Bob Carpenter, Yuling Yao, Lauren Kennedy, Jonah Gabry, Paul-Christian Bürkner, and Martin Modrák. 2020. “Bayesian Workflow.” arXiv Preprint arXiv:2011.01808. +
+
+Magnusson, Måns, Jakob Torgander, Paul-Christian Bürkner, Lu Zhang, Bob Carpenter, and Aki Vehtari. 2024. “Posteriordb: Testing, Benchmarking and Developing Bayesian Inference Algorithms.” arXiv Preprint arXiv:2407.04967. +
+
+Vehtari, Aki, Daniel Simpson, Andrew Gelman, Yuling Yao, and Jonah Gabry. 2024. “Pareto Smoothed Importance Sampling.” Journal of Machine Learning Research 25 (72): 1–58. +
+
+Zhang, Lu, Bob Carpenter, Andrew Gelman, and Aki Vehtari. 2022. “Pathfinder: Parallel Quasi-Newton Variational Inference.” Journal of Machine Learning Research 23 (306): 1–49. http://jmlr.org/papers/v23/21-0889.html. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/references.html b/docs/2_39/reference-manual/references.html new file mode 100644 index 000000000..d08beeba7 --- /dev/null +++ b/docs/2_39/reference-manual/references.html @@ -0,0 +1,821 @@ + + + + + + + + + +references + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ + + + +
+

References

+ + +
+ + Back to top
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/removals.html b/docs/2_39/reference-manual/removals.html new file mode 100644 index 000000000..c4c380cba --- /dev/null +++ b/docs/2_39/reference-manual/removals.html @@ -0,0 +1,1203 @@ + + + + + + + + + +Removed Features + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Removed Features

+

This chapter lists functionalities that were once present in the language but have since been removed, along with how to replace them.

+
+

lp__ variable

+

Removed: The variable lp__ is no longer available for direct access or manipulation.

+

Replacement: General manipulation of the value of the lp__ variable is not allowed, but

+
lp__ <- lp__ + e;
+

can be replaced with

+
target += e;
+

The value of lp__ is available through the no-argument function target().

+
+
+

Assignment with <-

+

Removed: The operator <- for assignment, e.g.,

+
a <- b;
+

is no longer available.

+

Replacement: The new syntax uses the operator = for assignment, e.g.,

+
a = b;
+

Removed In: Stan 2.33

+
+
+

increment_log_prob statement

+

Removed: The increment_log_prob(u) statement for incrementing the log density accumulator by u is no longer available.

+

Replacement: Replace the above statement with

+
target += u;
+

Removed In: Stan 2.33

+
+
+

get_lp() function

+

Removed: The built-in no-argument function get_lp() is no longer available.

+

Replacement: Use the no-argument function target() instead.

+

Removed In: Stan 2.33

+
+
+

_log density and mass functions

+

Removed: Formerly, the probability function for the distribution foo would be applied to an outcome variable y and sequence of zero or more parameters ... to produce the expression foo_log(y, ...). This suffix is no longer a special value.

+

Replacement: If y can be a real value (including vectors or matrices), replace

+
foo_log(y, ...)
+

with the log probability density function notation

+
foo_lpdf(y | ...).
+

If y must be an integer (including arrays), instead replace

+
foo_log(y, ...
+

with the log probability mass function

+
foo_lpmf(y | ...).
+

Removed In: Stan 2.33

+
+
+

cdf_log and ccdf_log cumulative distribution functions

+

Removed: The log cumulative distribution and complementary cumulative distribution functions for a distribution foo were formerly written as foo_cdf_log and foo_ccdf_log.

+

Replacement:

+

Replace foo_cdf_log(y, ...) with foo_lcdf(y | ...).

+

Replace foo_ccdf_log(y, ...) with foo_lccdf(y | ...).

+
+
+

User-defined function with _log suffix

+

Removed: A user-defined function ending in _log can be no longer be used in statements.qmd#distribution-statements.section.

+

Replacement: Replace the _log suffix with _lpdf for density functions or _lpmf for mass functions in the user-defined function.

+

Removed In: Stan 2.33

+

Note: Following Stan 2.33, users can stil define a function ending in _log, it simply no longer has a special meaning or is supported in the ~ syntax.

+
+
+

if_else function

+

Removed: The function if_else is no longer available.

+

Replacement: Use the conditional operator which allows more flexibility in the types of b and c and is much more efficient in that it only evaluates whichever of b or c is returned.

+
x = if_else(a, b, c);
+

with

+
x = a ? b : c;
+

Removed In: Stan 2.33

+
+
+

Character # as comment prefix

+

Removed: The use of # for line-based comments is no longer permitted. # may only be used for #include statements.

+

Replacement: Use a pair of forward slashes, //, for line comments.

+

Removed In: Stan 2.33

+
+
+

Postfix brackets array syntax

+

Before Stan 2.26, arrays were declared by writing syntax after the variable.

+

Removed: The use of array declarations like

+
int n[5];
+real a[3, 4];
+real<lower=0> z[5, 4, 2];
+vector[7] mu[3];
+matrix[7, 2] mu[15, 12];
+cholesky_factor_cov[5, 6] mu[2, 3, 4];
+

Replacement: The use of the array keyword, which replaces the above examples with

+
array[5] int n;
+array[3, 4] real a;
+array[5, 4, 2] real<lower=0> z;
+array[3] vector[7] mu;
+array[15, 12] matrix[7, 2] mu;
+array[2, 3, 4] cholesky_factor_cov[5, 6] mu;
+

Removed In: Stan 2.33

+
+
+

Nested multiple indexing in assignments

+

Stan interprets nested indexing in assingments as flat indexing so that a statement like

+
a[:][1] = b;
+

is the same as

+
a[:,1] = b;
+

However, this is inconsistent with multiple indexing rules.

+

To avoid confusion nested multiple indexing in assignment became an error in Stan 2.33. Nesting single indexing is still allowed as it cannot lead to ambiguity.

+

Removed In: Stan 2.33

+
+
+

Real values in conditionals

+

Removed: Using a real value in a conditional is no longer permitted.

+
real x = 1.0;
+if (x) {
+

The value was interpreted as true if it is nonzero.

+

Replacement: For the exact equivalent, use a comparison operator to make the intent clear.

+
real x = 1.0;
+if (x != 0) {
+

However, one should keep in mind that floating point calculations are subject to rounding errors and precise equality is fragile. It is worth considering whether the more robust alternative abs(x) < machine_precision() is appropriate for the use case.

+

Removed In: Stan 2.34

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/reproducibility.html b/docs/2_39/reference-manual/reproducibility.html new file mode 100644 index 000000000..127106597 --- /dev/null +++ b/docs/2_39/reference-manual/reproducibility.html @@ -0,0 +1,1094 @@ + + + + + + + + + +Reproducibility + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Reproducibility

+

Floating point operations on modern computers are notoriously difficult to replicate because the fundamental arithmetic operations, right down to the IEEE 754 encoding level, are not fully specified. The primary problem is that the precision of operations varies across different hardware platforms and software implementations.

+

Stan is designed to allow full reproducibility. However, this is only possible up to the external constraints imposed by floating point arithmetic.

+

Stan results will only be exactly reproducible if all of the following components are identical:

+
    +
  • Stan version
  • +
  • Stan interface (RStan, PyStan, CmdStan) and version, plus version of interface language (R, Python, shell)
  • +
  • versions of included libraries (Boost and Eigen)
  • +
  • operating system version
  • +
  • computer hardware including CPU, motherboard and memory
  • +
  • C++ compiler, including version, compiler flags, and linked libraries
  • +
  • same configuration of call to Stan, including random seed, chain ID, initialization and data
  • +
+

It doesn’t matter if you use a stable release version of Stan or the version with a particular Git hash tag. The same goes for all of the interfaces, compilers, and so on. The point is that if any of these moving parts changes in some way, floating point results may change.

+

Concretely, if you compile a single Stan program using the same CmdStan code base, but changed the optimization flag (-O3 vs. -O2 or -O0), the two programs may not return the identical stream of results. Thus it is very hard to guarantee reproducibility on externally managed hardware, like in a cluster or even a desktop managed by an IT department or with automatic updates turned on.

+

If, however, you compiled a Stan program today using one set of flags, took the computer away from the internet and didn’t allow it to update anything, then came back in a decade and recompiled the Stan program in the same way, you should get the same results.

+

The data needs to be the same down to the bit level. For example, if you are running in RStan, Rcpp handles the conversion between R’s floating point numbers and C++ doubles. If Rcpp changes the conversion process or use different types, the results are not guaranteed to be the same down to the bit level.

+

The compiler and compiler settings can also be an issue. There is a nice discussion of the issues and how to control reproducibility in Intel’s proprietary compiler by Corden and Kreitzer (2014).

+
+

Notable changes across versions

+

As noted above, there is no guarantee that the same results will be reproducible between two different versions of Stan, even if the same settings and environment are used.

+

However, there are occassionally notable changes which would affect many if not all users, and these are noted here. The absence of a version from this list still does not guarantee exact reproducibility between it and other versions.

+
    +
  • Stan 2.28 changed the default chain ID for MCMC from 0 to 1. Users who had set a seed but not a chain ID would observe completely different outputs.
  • +
  • Stan 2.35 changed the default pseudo-random number generator used by the Stan algorithms. There is no relationship between seeds in versions pre-2.35 and version 2.35 and on.
  • +
+ + + +
+
+ + Back to top

References

+
+Corden, Martyn J., and David Kreitzer. 2014. “Consistency of Floating-Point Results Using the Intel Compiler or Why Doesn’t My Application Always Give the Same Answer?” Intel Corporation. https://software.intel.com/en-us/articles/consistency-of-floating-point-results-using-the-intel-compiler. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/statements.html b/docs/2_39/reference-manual/statements.html new file mode 100644 index 000000000..6fd7ff97d --- /dev/null +++ b/docs/2_39/reference-manual/statements.html @@ -0,0 +1,1951 @@ + + + + + + + + + +Statements + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Statements

+

The blocks of a Stan program are made up of variable declarations and statements; see the blocks chapter for details. Unlike programs in BUGS, the declarations and statements making up a Stan program are executed in the order in which they are written. Variables must be defined to have some value (as well as declared to have some type) before they are used — if they do not, the behavior is undefined.

+

The basis of Stan’s execution is the evaluation of a log probability function (specifically, a log probability density function) for a given set of (real-valued) parameters. Log probability functions can be constructed by using distribution statements and log probability increment statements. Statements may be grouped into sequences and into for-each loops. In addition, Stan allows local variables to be declared in blocks and also allows an empty statement consisting only of a semicolon.

+
+

Statement block contexts

+

The data and parameters blocks do not allow statements of any kind because these blocks are solely used to declare the data variables for input and the parameter variables for sampling. All other blocks allow statements. In these blocks, both variable declarations and statements are allowed. All top-level variables in a block are considered block variables. See the blocks chapter for more information about the block structure of Stan programs.

+
+
+

Assignment statements

+

An assignment statement consists of a variable (possibly multivariate with indexing information) and an expression. Executing an assignment statement evaluates the expression on the right-hand side and assigns it to the (indexed) variable on the left-hand side. An example of a simple assignment is as follows.

+
n = 0;
+

Executing this statement assigns the value of the expression 0, which is the integer zero, to the variable n. For an assignment to be well formed, the type of the expression on the right-hand side should be compatible with the type of the (indexed) variable on the left-hand side. For the above example, because 0 is an expression of type int, the variable n must be declared as being of type int or of type real. If the variable is of type real, the integer zero is promoted to a floating-point zero and assigned to the variable. After the assignment statement executes, the variable n will have the value zero (either as an integer or a floating-point value, depending on its type).

+

Syntactically, every assignment statement must be followed by a semicolon. Otherwise, whitespace between the tokens does not matter (the tokens here being the left-hand-side (indexed) variable, the assignment operator, the right-hand-side expression and the semicolon).

+

Because the right-hand side is evaluated first, it is possible to increment a variable in Stan just as in C++ and other programming languages by writing

+
n = n + 1;
+

Such self assignments are not allowed in BUGS, because they induce a cycle into the directed graphical model.

+

The left-hand side of an assignment may contain indices for array, matrix, or vector data structures. For instance, if Sigma is of type matrix, then

+
Sigma[1, 1] = 1.0;
+

sets the value in the first column of the first row of Sigma to one.

+

Assignments to subcomponents of larger multi-variate data structures are supported by Stan. For example, a is an array of type array[,] real and b is an array of type array[] real, then the following two statements are both well-formed.

+
a[3] = b;
+b = a[4];
+

Similarly, if x is a variable declared to have type row_vector and Y is a variable declared as type matrix, then the following sequence of statements to swap the first two rows of Y is well formed.

+
x = Y[1];
+Y[1] = Y[2];
+Y[2] = x;
+
+

Promotion

+

Stan allows assignment of lower types to higher types, but not vice-versa. That is, we can assign an expression of type int to an lvalue of type real, and we can assign an expression of type real to an lvalue of type complex. Furthermore, promotion is transitive, so that we can assign an expression of type int to an lvalue of type complex.

+

Promotion extends to containers, so that arrays of int can be promoted to arrays of real during assignment, and arrays of real can be assigned to an lvalue of type array of complex. Similarly, an expression of type vector may be assigned to an lvalue of type complex_vector, and similarly for row vectors and matrices.

+
+
+

Lvalue summary

+

The expressions that are legal left-hand sides of assignment statements are known as “lvalues.” In Stan, there are three kinds of legal lvalues,

+
    +
  • a variable, or
  • +
  • a variable with one or more indices, or
  • +
  • a comma separated list of lvalues surrounded by ( and )
  • +
+

To be used as an lvalue, an indexed variable must have at least as many dimensions as the number of indices provided. An array of real or integer types has as many dimensions as it is declared for. A matrix has two dimensions and a vector or row vector one dimension; this also holds for the constrained types, covariance and correlation matrices and their Cholesky factors and ordered, positive ordered, and simplex vectors. An array of matrices has two more dimensions than the array and an array of vectors or row vectors has one more dimension than the array. Note that the number of indices can be less than the number of dimensions of the variable, meaning that the right hand side must itself be multidimensional to match the remaining dimensions.

+
+
+

Multiple indexes

+

Multiple indexes, as described in the multi-indexing section, are also permitted on the left-hand side of assignments. Indexing on the left side works exactly as it does for expressions, with multiple indexes preserving index positions and single indexes reducing them. The type on the left side must still match the type on the right side.

+
+

Aliasing

+

All assignment is carried out as if the right-hand side is copied before the assignment. This resolves any potential aliasing issues arising from he right-hand side changing in the middle of an assignment statement’s execution.

+
+
+
+

Compound arithmetic and assignment statement

+

Stan’s arithmetic operators may be used in compound arithmetic and assignment operations. For example, consider the following example of compound addition and assignment.

+
real x = 5;
+x += 7;  // value of x is now 12
+

The compound arithmetic and assignment statement above is equivalent to the following long form.

+
x = x + 7;
+

In general, the compound form

+
x op= y
+

will be equivalent to

+
x = x op y;
+

The compound statement will be legal whenever the long form is legal. This requires that the operation x op y must itself be well formed and that the result of the operation be assignable to x. For the expression x to be assignable, it must be an indexed variable where the variable is defined in the current block. For example, the following compound addition and assignment statement will increment a single element of a vector by two.

+
vector[N] x;
+x[3] += 2;
+

As a further example, consider

+
matrix[M, M] x;
+vector[M] y;
+real z;
+x *= x;  // OK, (x * x) is a matrix
+x *= z;  // OK, (x * z) is a matrix
+x *= y;  // BAD, (x * y) is a vector
+

The supported compound arithmetic and assignment operations are listed in the compound arithmetic/assignment table; they are also listed in the index prefaced by operator, e.g., operator+=.

+

Compound Arithmetic/Assignment Table. Stan allows compound arithmetic and assignment statements of the forms listed in the table. The compound form is legal whenever the corresponding long form would be legal and it has the same effect.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
operationcompoundunfolded
additionx += yx = x + y
subtractionx -= yx = x - y
multiplicationx *= yx = x * y
divisionx /= yx = x / y
elementwise multiplicationx .*= yx = x .* y
elementwise divisionx ./= yx = x ./ y
+
+
+
+

Increment log density

+

The basis of Stan’s execution is the evaluation of a log probability function (specifically, a log probability density function) for a given set of (real-valued) parameters; this function returns the log density of the posterior up to an additive constant. Data and transformed data are fixed before the log density is evaluated. The total log probability is initialized to zero. Next, any log Jacobian adjustments accrued by the variable constraints are added to the log density (the Jacobian adjustment may be skipped for maximum likelihood estimation via optimization). Distribution statements and log probability increment statements may add to the log density in the model block. A log probability increment statement directly increments the log density with the value of an expression as follows.1

+
target += -0.5 * y * y;
+

The keyword target here is actually not a variable, and may not be accessed as such (though see below on how to access the value of target through a special function).

+

In this example, the unnormalized log probability of a unit normal variable \(y\) is added to the total log probability. In the general case, the argument can be any expression.2

+

An entire Stan model can be implemented this way. For instance, the following model has a single variable according to a unit normal probability.

+
parameters {
+  real y;
+}
+model {
+  target += -0.5 * y * y;
+}
+

This model defines a log probability function

+

\[ +\log p(y) = - \, \frac{y^2}{2} - \log Z +\]

+

where \(Z\) is a normalizing constant that does not depend on \(y\). The constant \(Z\) is conventionally written this way because on the linear scale, \[ +p(y) = \frac{1}{Z} \exp\left(-\frac{y^2}{2}\right). +\] which is typically written without reference to \(Z\) as \[ +p(y) \propto \exp\left(-\frac{y^2}{2}\right). +\]

+

Stan only requires models to be defined up to a constant that does not depend on the parameters. This is convenient because often the normalizing constant \(Z\) is either time-consuming to compute or intractable to evaluate.

+
+

Built in distributions

+

The built in distribution functions in Stan are all available in normalized and unnormalized form. The normalized forms include all of the terms in the log density, and the unnormalized forms drop terms which are not directly or indirectly a function of the model parameters.

+

For instance, the normal_lpdf function returns the log density of a normal distribution:

+

\[ +\textsf{normal\_lpdf}(x | \mu, \sigma) = +-\log \left( \sigma \sqrt{2 \pi} \right) +-\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right)^2 +\]

+

The normal_lupdf function returns the log density of an unnormalized distribution. With the unnormalized version of the function, Stan does not define what the normalization constant will be, though usually as many terms as possible are dropped to make the calculation fast. Dropping a constant sigma term, normal_lupdf would be equivalent to:

+

\[ +\textsf{normal\_lupdf}(x | \mu, \sigma) = +-\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right)^2 +\]

+

All functions ending in _lpdf have a corresponding _lupdf version which evaluates and returns the unnormalized density. The same is true for _lpmf and _lupmf.

+
+
+

Relation to compound addition and assignment

+

The increment log density statement looks syntactically like compound addition and assignment (see the compound arithmetic/assignment section, it is treated as a primitive statement because target is not itself a variable. So, even though

+
target += lp;
+

is a legal statement, the corresponding long form is not legal.

+
target = target + lp;  // BAD, target is not a variable
+
+
+

Vectorization

+

The target += ... statement accepts an argument in place of ... for any expression type, including integers, reals, vectors, row vectors, matrices, and arrays of any dimensionality, including arrays of vectors and matrices. For container arguments, their sum will be added to the total log density.

+
+
+
+

Increment log density with a change of variables adjustment

+

A variant of the target += statement described above is the jacobian += statement. This can be used in the transformed parameters block or in functions ending with _jacobian to mimic the log Jacobian adjustments accrued by built-in variable transforms.

+

Similarly to those implemented for the built-in transforms, these Jacobian adjustment may be skipped for maximum likelihood estimation via optimization.

+

For example, here is a program which recreates the existing <upper=x> transform on real numbers:

+
functions {
+  real my_upper_bound_jacobian(real x, real ub) {
+    jacobian += x;
+    return ub - exp(x);
+  }
+}
+data {
+  real ub;
+}
+parameters {
+  real b_raw;
+}
+transformed parameters {
+  real b = my_upper_bound_jacobian(b_raw, ub);
+}
+model {
+  // use b as if it was declared `real<upper=ub> b;` in parameters
+  // e.g.
+  // b ~ lognormal(0, 1);
+}
+
+

Accessing the log density

+

To access the accumulated log density up to the current execution point, the function target() may be used.

+
+
+
+

Sampling statements

+

The term “sampling statement” has been replaced with distribution statement.

+
+
+

Distribution statements

+

Stan supports writing probability statements also using distribution statements, for example

+
y ~ normal(mu, sigma);
+mu ~ normal(0, 10);
+sigma ~ normal(0, 1);
+

The symbol \(\sim\) is called tilde. Due to historical reasons, the distribution statements used to be called “sampling statements” in Stan, but that term is not recommended anymore as it is a less accurate description.

+

In general, we can read \(\sim\) as “is distributed as,” and overall this notation is used as a shorthand for defining distributions, so that the above example can be written also as \[ +\begin{aligned} + p(y| \mu, \sigma) & = \mathrm{normal}(y | \mu, \sigma)\\ + p(\mu) & = \mathrm{normal}(\mu | 0, 10)\\ + p(\sigma) & = \mathrm{normal}^+(\sigma | 0, 1). +\end{aligned} +\] A collection of distribution statements define a joint distribution as the product of component distributions \[ +p(y,\mu,\sigma) = p(y| \mu, \sigma )p(\mu) p(\sigma). +\]

+

This works even if the model is not constructed generatively. For example, suppose you include the following code in a Stan model:

+
  a ~ normal(0, 1);
+  a ~ normal(0, 1);
+

This is translated to \[ + p(a) = \mathrm{normal}(a | 0, 1)\mathrm{normal}(a | 0, 1), +\] which in this case is \(\mathrm{normal}(a|0,1/\sqrt{2})\). One might expect that the above two lines of code would represent a redundant expression of a \(\mathrm{normal}(a|0,1)\) prior, but, no, each line of code corresponds to an additional term in the target, or log posterior density. You can think of each line as representing an additional piece of information.

+

When the joint distribution is considered as a function of parameters (e.g. \(\mu\), \(\sigma\)) given fixed data, it is proportional to the posterior distribution. In general, the posterior distribution is not a normalized probability density function—that is, it will be positive but will not in general integrate to 1—but the proportionality is sufficient for the Stan algorithms.

+

Stan always constructs the target function—in Bayesian terms, the log posterior density function of the parameter vector—by adding terms in the model block. Equivalently, each \(\sim\) statement corresponds to a multiplicative factor in the unnormalized posterior density.

+

Distribution statements (~) accept only built-in or user-defined distributions on the right side. The left side of a distribution statement may be data, parameter, or a complex expression, but the evaluated type needs to match one of the allowed types of the distribution on the right (see more below).

+

In Stan, a distribution statement is merely a notational convenience following the typical notation used to present models in the literature. The above model defined with distribution statements could be expressed as a direct increment on the total log probability density as

+
target += normal_lpdf(y | mu, sigma);
+target += normal_lpdf(mu | 0, 10);
+target += normal_lpdf(sigma | 0, 1);
+

Stan models can mix distribution statements and log probability increment statements. Although statistical models are usually defined with distributions in the literature, there are several scenarios in which we may want to code the log likelihood or parts of it directly, for example, due to computational efficiency (e.g. censored data model) or coding language limitations (e.g. mixture models in Stan). This is possible with log probability increment statements. See also the discussion below about Jacobians.

+

In general, a distribution statement of the form

+
y ~ dist(theta1, ..., thetaN);
+

involving subexpressions y and theta1 through thetaN (including the case where N is zero) will be well formed if and only if the corresponding log probability increment statement is well-formed. For densities allowing real y values, the log probability density function is used,

+
target += dist_lpdf(y | theta1, ..., thetaN);
+

For those restricted to integer y values, the log probability mass function is used,

+
target += dist_lpmf(y | theta1, ..., thetaN);
+

This will be well formed if and only if dist_lpdf(y | theta1, ..., thetaN) or dist_lpmf(y | theta1, ..., thetaN) is a well-formed expression of type real. User defined distributions can be defined in functions block by using function names ending with _lpdf.

+
+

Log probability increment vs. distribution statement

+

Although both lead to the same inference algorithm behavior in Stan, there is one critical difference between using the distribution statement, as in

+
y ~ normal(mu, sigma);
+

and explicitly incrementing the log probability function, as in

+
target += normal_lpdf(y | mu, sigma);
+

The distribution statement drops all the terms in the log probability function that are constant, whereas the explicit call to normal_lpdf adds all of the terms in the definition of the log normal probability function, including all of the constant normalizing terms. Therefore, the explicit increment form can be used to recreate the exact log probability values for the model. Otherwise, the distribution statement form will be faster if any of the input expressions, y, mu, or sigma, involve only constants, data variables, and transformed data variables. See the section Built in distributions above discussing _lupdf and _lupmf functions that also drops all the constant terms.

+
+
+

User-transformed variables

+

The left-hand side of a distribution statement may be an arbitrary expression (of compatible type)“. For instance, it is legal syntactically to write

+
parameters {
+  real<lower=0> beta;
+}
+// ...
+model {
+  log(beta) ~ normal(mu, sigma);
+}
+

Unfortunately, this is not enough to properly model beta as having a lognormal distribution. Whenever a nonlinear transform is applied to a parameter, such as the logarithm function being applied to beta here, and then used on the left-hand side of a distribution statement or on the left of a vertical bar in a log pdf function, an adjustment must be made to account for the differential change in scale and ensure beta gets the correct distribution. The correction required is to add the log Jacobian of the transform to the target log density; see the change of variables section for full definitions. For the case above, the following adjustment will account for the log transform.3

+
target += - log(abs(y));
+
+
+

Truncated distributions

+

Stan supports truncating distributions with lower bounds, upper bounds, or both.

+
+

Truncating with lower and upper bounds

+

A probability density function \(p(x)\) for a continuous distribution may be truncated to an interval \([a, b]\) to define a new density \(p_{[a, b]}(x)\) with support \([a, b]\) by setting

+

\[ +p_{[a, b]}(x) += \frac{p(x)} + {\int_a^b p(u) \, du}. +\]

+

A probability mass function \(p(x)\) for a discrete distribution may be truncated to the closed interval \([a, b]\) by

+

\[ +p_{[a, b]}(x) = \frac{p(x)} + {\sum_{u = a}^b p(u)}. +\]

+
+
+

Truncating with a lower bound

+

A probability density function \(p(x)\) can be truncated to \([a, \infty]\) by defining

+

\[ +p_{[a, \infty]}(x) += \frac{p(x)} + {\int_a^{\infty} p(u) \, du}. +\]

+

A probability mass function \(p(x)\) is truncated to \([a, \infty]\) by defining

+

\[ +p_{[a, \infty]}(x) = \frac{p(x)} + {\sum_{a <= u} p(u)}. +\]

+
+
+

Truncating with an upper bound

+

A probability density function \(p(x)\) can be truncated to \([-\infty, b]\) by defining

+

\[ +p_{[-\infty, b]}(x) += \frac{p(x)} + {\int_{-\infty}^b p(u) \, du}. +\]

+

A probability mass function \(p(x)\) is truncated to \([-\infty, b]\) by defining

+

\[ +p_{[-\infty,b]}(x) = \frac{p(x)} + {\sum_{u <= b} p(u)}. +\]

+
+
+

Cumulative distribution functions

+

Given a probability function \(p_X(x)\) for a random variable \(X\), its cumulative distribution function (cdf) \(F_X(x)\) is defined to be the probability that \(X \leq x\),

+

\[ +F_X(x) = \Pr[X \leq x]. +\]

+

The upper-case variable \(X\) is the random variable whereas the lower-case variable \(x\) is just an ordinary bound variable. For continuous random variables, the definition of the cdf works out to

+

\[ +F_X(x) \ = \ \int_{-\infty}^{x} p_X(u) \, du, +\]

+

For discrete variables, the cdf is defined to include the upper bound given by the argument,

+

\[ +F_X(x) = \sum_{u \leq x} p_X(u). +\]

+
+
+

Complementary cumulative distribution functions

+

The complementary cumulative distribution function (ccdf) in both the continuous and discrete cases is given by

+

\[ +F^C_X(x) +\ = \ \Pr[X > x] +\ = \ 1 - F_X(x). +\]

+

Unlike the cdf, the ccdf is exclusive of the bound, hence the event \(X > x\) rather than the cdf’s event \(X \leq x\).

+

For continuous distributions, the ccdf works out to

+

\[ +F^C_X(x) +\ = \ 1 - \int_{-\infty}^x p_X(u) \, du +\ = \ \int_x^{\infty} p_X(u) \, du. +\]

+

The lower boundary can be included in the integration bounds because it is a single point on a line and hence has no probability mass. For the discrete case, the lower bound must be excluded in the summation explicitly by summing over \(u > x\),

+

\[ +F^C_X(x) +\ = \ 1 - \sum_{u \leq x} p_X(u) +\ = \ \sum_{u > x} p_X(u). +\]

+

Cumulative distribution functions provide the necessary integral calculations to define truncated distributions. For truncation with lower and upper bounds, the denominator is defined by \[ +\int_a^b p(u) \, du = F_X(b) - F_X(a). +\] This allows truncated distributions to be defined as \[ +p_{[a,b]}(x) = \frac{p_X(x)} + {F_X(b) - F_X(a)}. +\]

+

For discrete distributions, a slightly more complicated form is required to explicitly insert the lower truncation point, which is otherwise excluded from \(F_X(b) - F_X(a)\),

+

\[ +p_{[a,b]}(x) = \frac{p_X(x)} + {F_X(b) - F_X(a) + p_X(a)}. +\]

+
+
+

Truncation with lower and upper bounds in Stan

+

Stan allows probability functions to be truncated. For example, a truncated unit normal distributions restricted to \([-0.5, 2.1]\) can be coded with the following distribution statement.

+
y ~ normal(0, 1) T[-0.5, 2.1];
+

Truncated distributions are translated as an additional term in the accumulated log density function plus error checking to make sure the variate in the distribution statement is within the bounds of the truncation.

+

In general, the truncation bounds and parameters may be parameters or local variables.

+

Because the example above involves a continuous distribution, it behaves the same way as the following more verbose form.

+
y ~ normal(0, 1);
+if (y < -0.5 || y > 2.1) {
+  target += negative_infinity();
+} else {
+  target += -log_diff_exp(normal_lcdf(2.1 | 0, 1),
+                          normal_lcdf(-0.5 | 0, 1));
+}
+

Because a Stan program defines a log density function, all calculations are on the log scale. The function normal_lcdf is the log of the cumulative normal distribution function and the function log_diff_exp(a, b) is a more arithmetically stable form of log(exp(a) - exp(b)).

+

For a discrete distribution, another term is necessary in the denominator to account for the excluded boundary. The truncated discrete distribution

+
y ~ poisson(3.7) T[2, 10];
+

behaves in the same way as the following code.

+
y ~ poisson(3.7);
+if (y < 2 || y > 10) {
+  target += negative_infinity();
+} else {
+  target += -log_sum_exp(poisson_lpmf(2 | 3.7),
+                         log_diff_exp(poisson_lcdf(10 | 3.7),
+                                      poisson_lcdf(2 | 3.7)));
+}
+

Recall that log_sum_exp(a, b) is just the arithmetically stable form of log(exp(a) + exp(b)).

+
+
+

Truncation with lower bounds in Stan

+

For truncating with only a lower bound, the upper limit is left blank.

+
y ~ normal(0, 1) T[-0.5, ];
+

This truncated distribution statement has the same behavior as the following code.

+
y ~ normal(0, 1);
+if (y < -0.5) {
+  target += negative_infinity();
+} else {
+  target += -normal_lccdf(-0.5 | 0, 1);
+}
+

The normal_lccdf function is the normal complementary cumulative distribution function.

+

As with lower and upper truncation, the discrete case requires a more complicated denominator to add back in the probability mass for the lower bound. Thus

+
y ~ poisson(3.7) T[2, ];
+

behaves the same way as

+
y ~ poisson(3.7);
+if (y < 2) {
+  target += negative_infinity();
+} else {
+  target += -log_sum_exp(poisson_lpmf(2 | 3.7),
+                         poisson_lccdf(2 | 3.7));
+}
+
+
+

Truncation with upper bounds in Stan

+

To truncate with only an upper bound, the lower bound is left blank. The upper truncated distribution statement

+
y ~ normal(0, 1) T[ , 2.1];
+

produces the same result as the following code.

+
target += normal_lpdf(y | 0, 1);
+if (y > 2.1) {
+  target += negative_infinity();
+} else {
+  target += -normal_lcdf(2.1 | 0, 1);
+}
+

With only an upper bound, the discrete case does not need a boundary adjustment. The upper-truncated distribution statement

+
y ~ poisson(3.7) T[ , 10];
+

behaves the same way as the following code.

+
y ~ poisson(3.7);
+if (y > 10) {
+  target += negative_infinity();
+} else {
+  target += -poisson_lcdf(10 | 3.7);
+}
+
+
+

Cumulative distributions must be defined

+

In all cases, the truncation is only well formed if the appropriate log density or mass function and necessary log cumulative distribution functions are defined. Not every distribution built into Stan has log cdf and log ccdfs defined, nor will every user-defined distribution. The discrete probability function documentations describes the available discrete and continuous cumulative distribution functions; most univariate distributions have log cdf and log ccdf functions.

+
+
+

Type constraints on bounds

+

For continuous distributions, truncation points must be expressions of type int or real. For discrete distributions, truncation points must be expressions of type int.

+
+
+

Variates outside of truncation bounds

+

For a truncated distribution statement, if the value sampled is not within the bounds specified by the truncation expression, the result is zero probability and the entire statement adds \(-\infty\) to the total log probability, which in turn results in the sample being rejected.

+
+
+

Vectorizing truncated distributions

+

Vectorization of distribution functions with truncation is available if the underlying distribution, lcdf, and lccdf functions meet the required signatures.

+

The equivalent code for a vectorized truncation depends on which of the variables are non-scalars (arrays, vectors, etc.):

+
    +
  1. If the variate y is the only non-scalar, the result is the same as described in the above sections, but the lcdf/lccdf calculation is multiplied by size(y).

  2. +
  3. If the other arguments to the distribution are non-scalars, then the vectorized version of the lcdf/lccdf is used. These functions return the sum of their terms, so no multiplication by the size is needed.

  4. +
  5. The exception to the above is when a non-variate is a vector and both a lower and upper bound are specified in the truncation. In this case, a for loop is generated over the elements of the non-scalar arguments. This is required since the log_diff_exp of two sums is not the same as the sum of the pairwise log_diff_exp operations.

  6. +
+

Note that while a lower-and-upper truncated distribution may generate a for-loop internally as part of translating the truncation statement, this is still preferable to manually constructing a loop, since the distribution function itself can still be evaluated in a vectorized manner.

+
+
+
+
+

For loops

+

Suppose N is a variable of type int, y is a one-dimensional array of type array[] real, and mu and sigma are variables of type real. Furthermore, suppose that n has not been defined as a variable. Then the following is a well-formed for-loop statement.

+
for (n in 1:N) {
+  y[n] ~ normal(mu, sigma);
+}
+

The loop variable is n, the loop bounds are the values in the range 1:N, and the body is the statement following the loop bounds.

+
+

Loop variable typing and scope

+

The type of the loop variable is int. Unlike in C++ and similarly to R, this variable must not be declared explicitly.

+

The bounds in a for loop must be integers. Unlike in R, the loop is always interpreted as an upward counting loop. The range L:H will cause the loop to execute the loop with the loop variable taking on all integer values greater than or equal to L and less than or equal to H. For example, the loop for (n in 2:5) will cause the body of the for loop to be executed with n equal to 2, 3, 4, and 5, in order. The variable and bound for (n in 5:2) will not execute anything because there are no integers greater than or equal to 5 and less than or equal to 2.

+

The scope of the loop variable is limited to the body of the loop.

+
+
+

Order sensitivity and repeated variables

+

Unlike in BUGS, Stan allows variables to be reassigned. For example, the variable theta in the following program is reassigned in each iteration of the loop.

+
for (n in 1:N) {
+  theta = inv_logit(alpha + x[n] * beta);
+  y[n] ~ bernoulli(theta);
+}
+

Such reassignment is not permitted in BUGS. In BUGS, for loops are declarative, defining plates in directed graphical model notation, which can be thought of as repeated substructures in the graphical model. Therefore, it is illegal in BUGS or JAGS to have a for loop that repeatedly reassigns a value to a variable.4

+

In Stan, assignments are executed in the order they are encountered. As a consequence, the following Stan program has a very different interpretation than the previous one.

+
for (n in 1:N) {
+  y[n] ~ bernoulli(theta);
+  theta = inv_logit(alpha + x[n] * beta);
+}
+

In this program, theta is assigned after it is used in the probability statement. This presupposes it was defined before the first loop iteration (otherwise behavior is undefined), and then each loop uses the assignment from the previous iteration.

+

Stan loops may be used to accumulate values. Thus it is possible to sum the values of an array directly using code such as the following.

+
total = 0.0;
+for (n in 1:N) {
+  total = total + x[n];
+}
+

After the for loop is executed, the variable total will hold the sum of the elements in the array x. This example was purely pedagogical; it is easier and more efficient to write

+
total = sum(x);
+

A variable inside (or outside) a loop may even be reassigned multiple times, as in the following legal code.

+
for (n in 1:100) {
+  y += y * epsilon;
+  epsilon = 0.5 * epsilon;
+  y += y * epsilon;
+}
+
+
+
+

Foreach loops

+

A second form of for loops allows iteration over elements of containers. If ys is an expression denoting a container (vector, row vector, matrix, or array) with elements of type T, then the following is a well-formed foreach statement.

+
for (y in ys) {
+  // ... do something with y ...
+}
+

The order in which elements of ys are visited is defined for container types as follows.

+
    +
  • vector, row_vector: elements visited in order, y is of type double

  • +
  • matrix: elements visited in column-major order, y is of type double

  • +
  • array[] T: elements visited in order, y is of type T.

  • +
+

Consequently, if ys is a two dimensional array array[,] real, y will be a one-dimensional array of real values (type array[] real). If ’ysis a matrix, thenywill be a real value (typereal`). To loop over all values of a two-dimensional array using foreach statements would require a doubly-nested loop,

+
array[2, 3] real yss;
+for (ys in yss) {
+  for (y in ys) {
+    // ... do something with y ...
+  }
+}
+

whereas a matrix can be looped over in one foreach statement

+
matrix[2, 3] yss;
+for (y in yss) {
+   // ... do something with y...
+}
+

In both cases, the loop variable y is of type real. The elements of the matrix are visited in column-major order (e.g.,y[1, 1],y[2, 1],y[1, 2], ...,y[2, 3]), whereas the elements of the two-dimensional array are visited in row-major order (e.g.,y[1, 1],y[1, 2],y[1, 3],y[2, 1], ...,y[2, 3]`).

+
+
+

Conditional statements

+

Stan supports full conditional statements using the same if-then-else syntax as C++. The general format is

+
if (condition1)
+  statement1
+else if (condition2)
+  statement2
+// ...
+else if (conditionN-1)
+  statementN-1
+else
+  statementN
+

There must be a single leading if clause, which may be followed by any number of else if clauses, all of which may be optionally followed by an else clause. Each condition must be an integer value, with non-zero values interpreted as true and the zero value as false.

+

The entire sequence of if-then-else clauses forms a single conditional statement for evaluation. The conditions are evaluated in order until one of the conditions evaluates to a non-zero value, at which point its corresponding statement is executed and the conditional statement finishes execution. If none of the conditions evaluate to a non-zero value and there is a final else clause, its statement is executed.

+
+
+

While statements

+

Stan supports standard while loops using the same syntax as C++. The general format is as follows.

+
while (condition)
+  body
+

The condition must be an integer expression and the body can be any statement (or sequence of statements in curly braces).

+

Evaluation of a while loop starts by evaluating the condition. If the condition evaluates to a false (zero) value, the execution of the loop terminates and control moves to the position after the loop. If the loop’s condition evaluates to a true (non-zero) value, the body statement is executed, then the whole loop is executed again. Thus the loop is continually executed as long as the condition evaluates to a true value.

+

The rest of the body of a while loop may be skipped using a continue. The loop will be exited with a break statement. See the section on continue and break statements for more details.

+
+
+

Statement blocks and local variable declarations

+

Just as parentheses may be used to group expressions, curly brackets may be used to group a sequence of zero or more statements into a statement block. At the beginning of each block, local variables may be declared that are scoped over the rest of the statements in the block.

+
+

Blocks in for loops

+

Blocks are often used to group a sequence of statements together to be used in the body of a for loop. Because the body of a for loop can be any statement, for loops with bodies consisting of a single statement can be written as follows.

+
for (n in 1:N) {
+  y[n] ~ normal(mu, sigma);
+}
+

To put multiple statements inside the body of a for loop, a block is used, as in the following example.

+
for (n in 1:N) {
+  lambda[n] ~ gamma(alpha, beta);
+  y[n] ~ poisson(lambda[n]);
+}
+

The open curly bracket ({) is the first character of the block and the close curly bracket (}) is the last character.

+

Because whitespace is ignored in Stan, the following program will not compile.

+
for (n in 1:N)
+  y[n] ~ normal(mu, sigma);
+  z[n] ~ normal(mu, sigma); // ERROR!
+

The problem is that the body of the for loop is taken to be the statement directly following it, which is y[n] ~ normal(mu, sigma). This leaves the probability statement for z[n] hanging, as is clear from the following equivalent program.

+
for (n in 1:N) {
+  y[n] ~ normal(mu, sigma);
+}
+z[n] ~ normal(mu, sigma); // ERROR!
+

Neither of these programs will compile. If the loop variable n was defined before the for loop, the for-loop declaration will raise an error. If the loop variable n was not defined before the for loop, then the use of the expression z[n] will raise an error.

+
+
+

Local variable declarations

+

A for loop has a statement as a body. It is often convenient in writing programs to be able to define a local variable that will be used temporarily and then forgotten. For instance, the for loop example of repeated assignment should use a local variable for maximum clarity and efficiency, as in the following example.

+
for (n in 1:N) {
+  real theta;
+  theta = inv_logit(alpha + x[n] * beta);
+  y[n] ~ bernoulli(theta);
+}
+

The local variable theta is declared here inside the for loop. The scope of a local variable is just the block in which it is defined. Thus theta is available for use inside the for loop, but not outside of it. As in other situations, Stan does not allow variable hiding. So it is illegal to declare a local variable theta if the variable theta is already defined in the scope of the for loop. For instance, the following is not legal.

+
for (m in 1:M) {
+  real theta;
+  for (n in 1:N) {
+    real theta; // ERROR!
+    theta = inv_logit(alpha + x[m, n] * beta);
+    y[m, n] ~ bernoulli(theta);
+// ...
+

The compiler will flag the second declaration of theta with a message that it is already defined.

+
+
+

No constraints on local variables

+

Local variables may not have constraints on their declaration. The only types that may be used are listed in the types table under “local”.

+
+
+

Blocks within blocks

+

A block is itself a statement, so anywhere a sequence of statements is allowed, one or more of the statements may be a block. For instance, in a for loop, it is legal to have the following

+
for (m in 1:M) {
+  {
+     int n = 2 * m;
+     sum += n;
+  }
+  for (n in 1:N) {
+    sum += x[m, n];
+  }
+}
+

The variable declaration int n; is the first element of an embedded block and so has scope within that block. The for loop defines its own local block implicitly over the statement following it in which the loop variable is defined. As far as Stan is concerned, these two uses of n are unrelated.

+
+
+
+

Break and continue statements

+

The one-token statements continue and break may be used within loops to alter control flow; continue causes the next iteration of the loop to run immediately, whereas break terminates the loop and causes execution to resume after the loop. Both control structures must appear in loops. Both break and continue scope to the most deeply nested loop, but pass through non-loop statements.

+

Although these control statements may seem undesirable because of their goto-like behavior, their judicious use can greatly improve readability by reducing the level of nesting or eliminating bookkeeping inside loops.

+
+

Break statements

+

When a break statement is executed, the most deeply nested loop currently being executed is ended and execution picks up with the next statement after the loop. For example, consider the following program:

+
while (1) {
+  if (n < 0) {
+    break;
+  }
+  foo(n);
+  n = n - 1;
+}
+

The while~(1) loop is a “forever” loop, because 1 is the true value, so the test always succeeds. Within the loop, if the value of n is less than 0, the loop terminates, otherwise it executes foo(n) and then decrements n. The statement above does exactly the same thing as

+
while (n >= 0) {
+  foo(n);
+  n = n - 1;
+}
+

This case is simply illustrative of the behavior; it is not a case where a break simplifies the loop.

+
+
+

Continue statements

+

The continue statement ends the current operation of the loop and returns to the condition at the top of the loop. Such loops are typically used to exclude some values from calculations. For example, we could use the following loop to sum the positive values in the array x,

+
real sum;
+sum = 0;
+for (n in 1:size(x)) {
+  if (x[n] <= 0) {
+    continue;
+  }
+  sum += x[n];
+}
+

When the continue statement is executed, control jumps back to the conditional part of the loop. With while and for loops, this causes control to return to the conditional of the loop. With for loops, this advances the loop variable, so the the above program will not go into an infinite loop when faced with an x[n] less than zero. Thus the above program could be rewritten with deeper nesting by reversing the conditional,

+
real sum;
+sum = 0;
+for (n in 1:size(x)) {
+  if (x[n] > 0) {
+    sum += x[n];
+  }
+}
+

While the latter form may seem more readable in this simple case, the former has the main line of execution nested one level less deep. Instead, the conditional at the top finds cases to exclude and doesn’t require the same level of nesting for code that’s not excluded. When there are several such exclusion conditions, the break or continue versions tend to be much easier to read.

+
+
+

Breaking and continuing nested loops

+

If there is a loop nested within a loop, a break or continue statement only breaks out of the inner loop. So

+
while (cond1) {
+  // ...
+  while (cond2) {
+    // ...
+    if (cond3) {
+      break;
+    }
+    // ...
+  }
+  // execution continues here after break
+  // ...
+}
+

If the break is triggered by cond3 being true, execution will continue after the nested loop.

+

As with break statements, continue statements go back to the top of the most deeply nested loop in which the continue appears.

+

Although break and continue must appear within loops, they may appear in nested statements within loops, such as within the conditionals shown above or within nested statements. The break and continue statements jump past any control structure other than while-loops and for-loops.

+
+
+ +
+

Reject statements

+

The Stan reject statement provides a mechanism to report errors or problematic values encountered during program execution and either halt processing or reject iterations.

+

Like the print statement, the reject statement accepts any number of quoted string literals or Stan expressions as arguments.

+

Reject statements are typically embedded in a conditional statement in order to detect variables in illegal states. For example, the following code handles the case where a variable x’s value is negative.

+
if (x < 0) {
+  reject("x must not be negative; found x=", x);
+}
+
+

Behavior of reject statements

+

Reject statements have the same behavior as exceptions thrown by built-in Stan functions. For example, the normal_lpdf function raises an exception if the input scale is not positive and finite. The effect of a reject statement depends on the program block in which the rejection occurs.

+

In all cases of rejection, the interface accessing the Stan program should print the arguments to the reject statement.

+
+

Rejections in functions

+

Rejections in user-defined functions are just passed to the calling function or program block. Reject statements can be used in functions to validate the function arguments, allowing user-defined functions to fully emulate built-in function behavior. It is better to find out earlier rather than later when there is a problem.

+
+
+

Fatal exception contexts

+

Rejections are fatal in the transformed data block. This is because if initialization fails there is no way to recover values, so the algorithm will not begin execution.

+

Reject statements placed in the transformed data block can be used to validate both the data and transformed data (if any). This allows more complicated constraints to be enforced that can be specified with Stan’s constrained variable declarations.

+

Fatal errors in other blocks may also be signaled by use of the fatal_error statement.

+
+
+

Recoverable rejection contexts

+

Rejections in the transformed parameters and model blocks are not in and of themselves instantly fatal. The result has the same effect as assigning a \(-\infty\) log probability, which causes rejection of the current proposal in MCMC samplers and adjustment of search parameters in optimization.

+

If the log probability function results in a rejection every time it is called, the containing application (MCMC sampler or optimization) should diagnose this problem and terminate with an appropriate error message. To aid in diagnosing problems, the message for each reject statement will be printed as a result of executing it.

+
+
+
+

Rejection is not for constraints

+

Rejection should be used for error handling, not defining arbitrary constraints. Consider the following errorful Stan program.

+
parameters {
+  real a;
+  real<lower=a> b;
+  real<lower=a, upper=b> theta;
+  // ...
+}
+model {
+  // **wrong** needs explicit truncation
+  theta ~ normal(0, 1);
+  // ...
+}
+

This program is wrong because its truncation bounds on theta depend on parameters, and thus need to be accounted for using an explicit truncation on the distribution. This is the right way to do it.

+
  theta ~ normal(0, 1) T[a, b];
+

The conceptual issue is that the prior does not integrate to one over the admissible parameter space; it integrates to one over all real numbers and integrates to something less than one over \([a ,b]\); in these simple univariate cases, we can overcome that with the T[ , ] notation, which essentially divides by whatever the prior integrates to over \([a, b]\).

+

This problem is exactly the same problem as you would get using reject statements to enforce complicated inequalities on multivariate functions. In this case, it is wrong to try to deal with truncation through constraints.

+
  if (theta < a || theta > b) {
+    reject("theta not in (a, b)");
+  }
+  // still **wrong**, needs T[a,b]
+  theta ~ normal(0, 1);
+

In this case, the prior integrates to something less than one over the region of the parameter space where the complicated inequalities are satisfied. But we don’t generally know what value the prior integrates to, so we can’t increment the log probability function to compensate.

+

Even if this adjustment to a proper probability model may seem minor in particular models where the amount of truncated posterior density is negligible or constant, we can’t sample from that truncated posterior efficiently. Programs need to use one-to-one mappings that guarantee the constraints are satisfied and only use reject statements to raise errors or help with debugging.

+
+
+
+

Fatal error statements

+

The Stan fatal_error statement provides a mechanism to report errors or problematic values encountered during program execution and uniformly halt processing.

+

Like the print or reject statements, the fatal error statement accepts any number of quoted string literals or Stan expressions as arguments.

+

The fatal error may be used to signal an unrecoverable error in blocks where reject leads to the algorithm attempting to try again, such as the model block.

+ + +
+
+ + + Back to top

Footnotes

+ +
    +
  1. The current notation replaces two previous versions. Originally, a variable lp__ was directly exposed and manipulated; this is no longer allowed. The original statement syntax for target += u was increment_log_prob(u), but this form was removed in Stan 2.33↩︎

  2. +
  3. Writing this model with the expression -0.5 * y * y is more efficient than with the equivalent expression y * y / -2 because multiplication is more efficient than division; in both cases, the negation is rolled into the numeric literal (-0.5 and -2). Writing square(y) instead of y * y would be even more efficient because the derivatives can be precomputed, reducing the memory and number of operations required for automatic differentiation.↩︎

  4. +
  5. Because \(\log | \frac{d}{dy} \log y | = \log | 1/y | = - \log |y|\).↩︎

  6. +
  7. A programming idiom in BUGS code simulates a local variable by replacing theta in the above example with theta[n], effectively creating N different variables, theta[1], …, theta[N]. Of course, this is not a hack if the value of theta[n] is required for all n.↩︎

  8. +
  9. The adjoint component is always zero during execution for the algorithmic differentiation variables used to implement parameters, transformed parameters, and local variables in the model.↩︎

  10. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/syntax.html b/docs/2_39/reference-manual/syntax.html new file mode 100644 index 000000000..4d89e58a0 --- /dev/null +++ b/docs/2_39/reference-manual/syntax.html @@ -0,0 +1,1480 @@ + + + + + + + + + +Language Syntax + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Language Syntax

+

This chapter defines the basic syntax of the Stan modeling language using a Backus-Naur form (BNF) grammar plus extra-grammatical constraints on function typing and operator precedence and associativity.

+
+

BNF grammars

+
+

Syntactic conventions

+

In the following BNF grammars, tokens are represented in ALLCAPS. Grammar non-terminals are surrounded by < and >. A square brackets ([A]) indicates optionality of A. A postfixed Kleene star (A*) indicates zero or more occurrences of A. Parenthesis can be used to group symbols together in productions.

+

Finally, this grammar uses the concept of “parameterized nonterminals” as used in the parsing library Menhir. A rule like <list(x)> ::= x (COMMA x)* declares a generic list rule, which can later be applied to others by the symbol <list(<expression>)>.

+

The following representation is constructed directly from the OCaml reference parser using a tool called Obelisk. The raw output is available here.

+ +
+
+

Programs

+
<program> ::= [<function_block>] [<data_block>] [<transformed_data_block>]
+              [<parameters_block>] [<transformed_parameters_block>]
+              [<model_block>] [<generated_quantities_block>] EOF
+
+<functions_only> ::= <function_def>* EOF
+
+<function_block> ::= FUNCTIONBLOCK LBRACE <function_def>* RBRACE
+
+<data_block> ::= DATABLOCK LBRACE <top_var_decl_no_assign>* RBRACE
+
+<transformed_data_block> ::= TRANSFORMEDDATABLOCK LBRACE
+                             <top_vardecl_or_statement>* RBRACE
+
+<parameters_block> ::= PARAMETERSBLOCK LBRACE <top_var_decl_no_assign>*
+                       RBRACE
+
+<transformed_parameters_block> ::= TRANSFORMEDPARAMETERSBLOCK LBRACE
+                                   <top_vardecl_or_statement>* RBRACE
+
+<model_block> ::= MODELBLOCK LBRACE <vardecl_or_statement>* RBRACE
+
+<generated_quantities_block> ::= GENERATEDQUANTITIESBLOCK LBRACE
+                                 <top_vardecl_or_statement>* RBRACE
+
+
+

Function declarations and definitions

+
<function_def> ::= <return_type> <decl_identifier> LPAREN [<arg_decl> (COMMA
+                   <arg_decl>)*] RPAREN <statement>
+
+<return_type> ::= VOID
+                | <unsized_type>
+
+<arg_decl> ::= [DATABLOCK] <unsized_type> <decl_identifier>
+
+<unsized_type> ::= ARRAY <unsized_dims> <basic_type>
+                 | ARRAY <unsized_dims> <unsized_tuple_type>
+                 | <basic_type>
+                 | <unsized_tuple_type>
+
+<unsized_tuple_type> ::= TUPLE LPAREN <unsized_type> COMMA [<unsized_type>
+                         (COMMA <unsized_type>)*] RPAREN
+
+<basic_type> ::= INT
+               | REAL
+               | COMPLEX
+               | VECTOR
+               | ROWVECTOR
+               | MATRIX
+               | COMPLEXVECTOR
+               | COMPLEXROWVECTOR
+               | COMPLEXMATRIX
+
+<unsized_dims> ::= LBRACK COMMA* RBRACK
+
+
+
+

Variable declarations and compound definitions

+
<identifier> ::= IDENTIFIER
+               | TRUNCATE
+
+<decl_identifier> ::= <identifier>
+
+<no_assign> ::= UNREACHABLE
+
+<optional_assignment(rhs)> ::= [ASSIGN rhs]
+
+<id_and_optional_assignment(rhs)> ::= <decl_identifier>
+                                      <optional_assignment(rhs)>
+
+<decl(type_rule, rhs)> ::= type_rule <decl_identifier> <dims>
+                           <optional_assignment(rhs)> SEMICOLON
+                         | <higher_type(type_rule)>
+                           <id_and_optional_assignment(rhs)> (COMMA
+                           <id_and_optional_assignment(rhs)>)* SEMICOLON
+
+<higher_type(type_rule)> ::= <array_type(type_rule)>
+                           | <tuple_type(type_rule)>
+                           | type_rule
+
+<array_type(type_rule)> ::= <arr_dims> type_rule
+                          | <arr_dims> <tuple_type(type_rule)>
+
+<tuple_type(type_rule)> ::= TUPLE LPAREN <higher_type(type_rule)> COMMA
+                            [<higher_type(type_rule)> (COMMA
+                            <higher_type(type_rule)>)*] RPAREN
+
+<var_decl> ::= <decl(<sized_basic_type>, <expression>)>
+
+<top_var_decl> ::= <decl(<top_var_type>, <expression>)>
+
+<top_var_decl_no_assign> ::= <decl(<top_var_type>, <no_assign>)>
+                           | SEMICOLON
+
+<sized_basic_type> ::= INT
+                     | REAL
+                     | COMPLEX
+                     | VECTOR LBRACK <expression> RBRACK
+                     | ROWVECTOR LBRACK <expression> RBRACK
+                     | MATRIX LBRACK <expression> COMMA <expression> RBRACK
+                     | COMPLEXVECTOR LBRACK <expression> RBRACK
+                     | COMPLEXROWVECTOR LBRACK <expression> RBRACK
+                     | COMPLEXMATRIX LBRACK <expression> COMMA <expression>
+                       RBRACK
+
+<top_var_type> ::= INT [LABRACK <range> RABRACK]
+                 | REAL <type_constraint>
+                 | COMPLEX <type_constraint>
+                 | VECTOR <type_constraint> LBRACK <expression> RBRACK
+                 | ROWVECTOR <type_constraint> LBRACK <expression> RBRACK
+                 | MATRIX <type_constraint> LBRACK <expression> COMMA
+                   <expression> RBRACK
+                 | COMPLEXVECTOR <type_constraint> LBRACK <expression> RBRACK
+                 | COMPLEXROWVECTOR <type_constraint> LBRACK <expression>
+                   RBRACK
+                 | COMPLEXMATRIX <type_constraint> LBRACK <expression> COMMA
+                   <expression> RBRACK
+                 | ORDERED LBRACK <expression> RBRACK
+                 | POSITIVEORDERED LBRACK <expression> RBRACK
+                 | SIMPLEX LBRACK <expression> RBRACK
+                 | UNITVECTOR LBRACK <expression> RBRACK
+                 | SUMTOZEROVEC LBRACK <expression> RBRACK
+                 | CHOLESKYFACTORCORR LBRACK <expression> RBRACK
+                 | CHOLESKYFACTORCOV LBRACK <expression> [COMMA <expression>]
+                   RBRACK
+                 | CORRMATRIX LBRACK <expression> RBRACK
+                 | COVMATRIX LBRACK <expression> RBRACK
+                 | SUMTOZEROMATRIX LBRACK <expression> COMMA <expression> RBRACK
+                 | STOCHASTICCOLUMNMATRIX LBRACK <expression> COMMA
+                   <expression> RBRACK
+                 | STOCHASTICROWMATRIX LBRACK <expression> COMMA <expression>
+                   RBRACK
+
+<type_constraint> ::= [LABRACK <range> RABRACK]
+                    | LABRACK <offset_mult> RABRACK
+
+<range> ::= LOWER ASSIGN <constr_expression> COMMA UPPER ASSIGN
+            <constr_expression>
+          | UPPER ASSIGN <constr_expression> COMMA LOWER ASSIGN
+            <constr_expression>
+          | LOWER ASSIGN <constr_expression>
+          | UPPER ASSIGN <constr_expression>
+
+<offset_mult> ::= OFFSET ASSIGN <constr_expression> COMMA MULTIPLIER ASSIGN
+                  <constr_expression>
+                | MULTIPLIER ASSIGN <constr_expression> COMMA OFFSET ASSIGN
+                  <constr_expression>
+                | OFFSET ASSIGN <constr_expression>
+                | MULTIPLIER ASSIGN <constr_expression>
+
+<arr_dims> ::= ARRAY LBRACK <expression> (COMMA <expression>)* RBRACK
+
+
+
+

Expressions

+
<expression> ::= <expression> QMARK <expression> COLON <expression>
+               | <expression> <infixOp> <expression>
+               | <prefixOp> <expression>
+               | <expression> <postfixOp>
+               | <common_expression>
+
+<constr_expression> ::= <constr_expression> <arithmeticBinOp>
+                        <constr_expression>
+                      | <prefixOp> <constr_expression>
+                      | <constr_expression> <postfixOp>
+                      | <common_expression>
+
+<common_expression> ::= <identifier>
+                      | INTNUMERAL
+                      | REALNUMERAL
+                      | DOTNUMERAL
+                      | IMAGNUMERAL
+                      | LBRACE <expression> (COMMA <expression>)* RBRACE
+                      | LBRACK [<expression> (COMMA <expression>)*] RBRACK
+                      | <identifier> LPAREN [<expression> (COMMA
+                        <expression>)*] RPAREN
+                      | TARGET LPAREN RPAREN
+                      | <identifier> LPAREN <expression> BAR [<expression>
+                        (COMMA <expression>)*] RPAREN
+                      | LPAREN <expression> COMMA [<expression> (COMMA
+                        <expression>)*] RPAREN
+                      | <common_expression> DOTNUMERAL
+                      | <common_expression> LBRACK <indexes> RBRACK
+                      | LPAREN <expression> RPAREN
+
+<prefixOp> ::= BANG
+             | MINUS
+             | PLUS
+
+<postfixOp> ::= TRANSPOSE
+
+<infixOp> ::= <arithmeticBinOp>
+            | <logicalBinOp>
+
+<arithmeticBinOp> ::= PLUS
+                    | MINUS
+                    | TIMES
+                    | DIVIDE
+                    | IDIVIDE
+                    | MODULO
+                    | LDIVIDE
+                    | ELTTIMES
+                    | ELTDIVIDE
+                    | HAT
+                    | ELTPOW
+
+<logicalBinOp> ::= OR
+                 | AND
+                 | EQUALS
+                 | NEQUALS
+                 | LABRACK
+                 | LEQ
+                 | RABRACK
+                 | GEQ
+
+<indexes> ::= epsilon
+            | COLON
+            | <expression>
+            | <expression> COLON
+            | COLON <expression>
+            | <expression> COLON <expression>
+            | <indexes> COMMA <indexes>
+
+<printables> ::= <expression>
+               | <string_literal>
+               | <printables> COMMA <printables>
+
+
+

Statements

+
<statement> ::= <atomic_statement>
+              | <nested_statement>
+
+<atomic_statement> ::= <common_expression> <assignment_op> <expression>
+                       SEMICOLON
+                     | <identifier> LPAREN [<expression> (COMMA
+                       <expression>)*] RPAREN SEMICOLON
+                     | <expression> TILDE <identifier> LPAREN [<expression>
+                       (COMMA <expression>)*] RPAREN [<truncation>] SEMICOLON
+                     | TARGET PLUSASSIGN <expression> SEMICOLON
+                     | JACOBIAN PLUSASSIGN <expression> SEMICOLON
+                     | BREAK SEMICOLON
+                     | CONTINUE SEMICOLON
+                     | PRINT LPAREN <printables> RPAREN SEMICOLON
+                     | REJECT LPAREN <printables> RPAREN SEMICOLON
+                     | FATAL_ERROR LPAREN <printables> RPAREN SEMICOLON
+                     | RETURN <expression> SEMICOLON
+                     | RETURN SEMICOLON
+                     | SEMICOLON
+
+<assignment_op> ::= ASSIGN
+                  | PLUSASSIGN
+                  | MINUSASSIGN
+                  | TIMESASSIGN
+                  | DIVIDEASSIGN
+                  | ELTTIMESASSIGN
+                  | ELTDIVIDEASSIGN
+
+<string_literal> ::= STRINGLITERAL
+
+<truncation> ::= TRUNCATE LBRACK [<expression>] COMMA [<expression>] RBRACK
+
+<nested_statement> ::= IF LPAREN <expression> RPAREN <vardecl_or_statement>
+                       ELSE <vardecl_or_statement>
+                     | IF LPAREN <expression> RPAREN <vardecl_or_statement>
+                     | WHILE LPAREN <expression> RPAREN
+                       <vardecl_or_statement>
+                     | FOR LPAREN <identifier> IN <expression> COLON
+                       <expression> RPAREN <vardecl_or_statement>
+                     | FOR LPAREN <identifier> IN <expression> RPAREN
+                       <vardecl_or_statement>
+                     | PROFILE LPAREN <string_literal> RPAREN LBRACE
+                       <vardecl_or_statement>* RBRACE
+                     | LBRACE <vardecl_or_statement>* RBRACE
+
+<vardecl_or_statement> ::= <statement>
+                         | <var_decl>
+
+<top_vardecl_or_statement> ::= <statement>
+                             | <top_var_decl>
+
+
+
+

Tokenizing rules

+

Many of the tokens used in the BNF grammars follow obviously from their names: DATABLOCK is the literal string ‘data’, COMMA is a single ‘,’ character, etc. The literal representation of each operator is additionally provided in the operator precedence table.

+

A few tokens are not so obvious, and are defined here in regular expressions:

+
IDENTIFIER = [a-zA-Z] [a-zA-Z0-9_]*
+
+STRINGLITERAL = ".*"
+
+INTNUMERAL = [0-9]+ (_ [0-9]+)*
+
+EXPLITERAL = [eE] [+-]? INTNUMERAL
+
+REALNUMERAL = INTNUMERAL \. INTNUMERAL? EXPLITERAL?
+            | \. INTNUMERAL EXPLITERAL
+            | INTNUMERAL EXPLITERAL
+
+IMAGNUMERAL = (REALNUMERAL | INTNUMERAL) i
+
+DOTNUMERAL = \. INTNUMERAL
+
+
+

Extra-grammatical constraints

+
+

Type constraints

+

A well-formed Stan program must satisfy the type constraints imposed by functions and distributions. For example, the binomial distribution requires an integer total count parameter and integer variate and when truncated would require integer truncation points. If these constraints are violated, the program will be rejected during compilation with an error message indicating the location of the problem.

+
+
+

Operator precedence and associativity

+

In the Stan grammar provided in this chapter, the expression 1 + 2 * 3 has two parses. As described in the operator precedence table, Stan disambiguates between the meaning \(1 ++ (2 \times 3)\) and the meaning \((1 + 2) \times 3\) based on operator precedences and associativities.

+
+
+

Typing of compound declaration and definition

+

In a compound variable declaration and definition, the type of the right-hand side expression must be assignable to the variable being declared. The assignability constraint restricts compound declarations and definitions to local variables and variables declared in the transformed data, transformed parameters, and generated quantities blocks.

+
+
+

Typing of array expressions

+

The types of expressions used for elements in array expressions ('{' expressions '}') must all be of the same type or a mixture of scalar (int, real and complex) types (in which case the result is promoted to be of the highest type on the int -> real -> complex hierarchy).

+
+
+

Forms of numbers

+

Integer literals longer than one digit may not start with 0 and real literals cannot consist of only a period or only an exponent.

+
+
+

Conditional arguments

+

Both the conditional if-then-else statement and while-loop statement require the expression denoting the condition to be a primitive type, integer or real.

+
+
+

For loop containers

+

The for loop statement requires that we specify in addition to the loop identifier, either a range consisting of two expressions denoting an integer, separated by ‘:’, or a single expression denoting a container. The loop variable will be of type integer in the former case and of the contained type in the latter case. Furthermore, the loop variable must not be in scope (i.e., there is no masking of variables).

+
+ +
+

Only break and continue in loops

+

The break and continue statements may only be used within the body of a for-loop or while-loop.

+
+
+

Block-specific restrictions

+

Some constructs in the Stan language are only allowed in certain blocks or in certain kinds of user-defined functions.

+
+

PRNG functions

+

Functions ending in _rng may only be called in the transformed data and generated quantities block, and within the bodies of user-defined functions with names ending in _rng.

+
+
+

Unnormalized distributions

+

Unnormalized distributions (with suffixes _lupmf or _lupdf) may only be called in the model block, user-defined probability functions, or within the bodies of user defined functions which end in _lp.

+
+
+

Incrementing and accessing target

+

target += statements can only be used inside of the model block or user-defined functions which end in _lp.

+

User defined functions which end in _lp and the target() function can only be used in the model block, transformed parameters block, and in the bodies of other user defined functions which end in _lp.

+

Sampling statements (using ~) can only be used in the model block or in the bodies of user-defined functions which end in _lp.

+

jacobian += statements can only be used inside of the transformed parameters block or in functions that end with _jacobian.

+
+
+
+

Probability function naming

+

A probability function literal must have one of the following suffixes: _lpdf, _lpmf, _lcdf, or _lccdf.

+
+
+

Indexes

+

Standalone expressions used as indexes must denote either an integer (int) or an integer array (array[] int). Expressions participating in range indexes (e.g., a and b in a : b) must denote integers (int).

+

A second condition is that there not be more indexes provided than dimensions of the underlying expression (in general) or variable (on the left side of assignments) being indexed. A vector or row vector adds 1 to the array dimension and a matrix adds 2. That is, the type array[ , , ] matrix, a three-dimensional array of matrices, has five index positions: three for the array, one for the row of the matrix and one for the column.

+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/transforms.html b/docs/2_39/reference-manual/transforms.html new file mode 100644 index 000000000..bdf7d1c0e --- /dev/null +++ b/docs/2_39/reference-manual/transforms.html @@ -0,0 +1,2232 @@ + + + + + + + + + +Constraint Transforms + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Constraint Transforms

+

To avoid having to deal with constraints while simulating the Hamiltonian dynamics during sampling, every (multivariate) parameter in a Stan model is transformed to an unconstrained variable behind the scenes by the model compiler. The transform is based on the constraints, if any, in the parameter’s definition. Scalars or the scalar values in vectors, row vectors or matrices may be constrained with lower and/or upper bounds. Vectors may alternatively be constrained to be ordered, positive ordered, or simplexes. Matrices may be constrained to be correlation matrices or covariance matrices. This chapter provides a definition of the transforms used for each type of variable. For examples of how to declare and define these variables in a Stan program, see section Variable declaration. To directly access the functional form of these transformations from inside the Stan language, see Variable Transformation Functions in the Functions Reference.

+

Stan converts models to C++ classes which define probability functions with support on all of \(\mathbb{R}^K\), where \(K\) is the number of unconstrained parameters needed to define the constrained parameters defined in the program. The C++ classes also include code to transform the parameters from unconstrained to constrained and apply the appropriate Jacobians.

+
+

Limitations due to finite accuracy presentation

+

In this section the transformations are described mathematically. However, observed behavior can be different from the exact arithmetic.

+

Stan’s arithmetic is implemented using double-precision floating-point numbers, which may cause computation to behave differently than mathematics. For example, the lower bound constraint is defined above by an exponential inverse transform which mathematically excludes the lower bound, but if the closest floating-point number for the inverse transformed value is the boundary, then the value is rounded to the boundary. This may cause unexpected warnings or errors, if in other parts of the code the boundary value is invalid. For example, we may observe floating-point value 0 for a variance parameter that has been declared with lower=0. In general, double-precision floating-point numbers cannot reliably store more than 16 digits of a number in decimal. See more about floating point arithmetic in the Stan User’s Guide.

+

These issues are exacerbated by the fact that CmdStan stores the output to CSV files with 8 digits precision by default. More digits can be requested by the user at the cost of additional disk usage, as discussed in the CmdStan Command-Line Interface Overview.

+
+
+

Changes of variables

+

The support of a random variable \(X\) with density \(p_X(x)\) is that subset of values for which it has non-zero density,

+

\[ +\mathrm{supp}(X) = \{ x | p_X(x) > 0 \}. +\]

+

If \(f\) is a total function defined on the support of \(X\), then \(Y = +f(X)\) is a new random variable. This section shows how to compute the probability density function of \(Y\) for well-behaved transforms \(f\). The rest of the chapter details the transforms used by Stan.

+
+

Univariate changes of variables

+

Suppose \(X\) is one dimensional and \(f: \mathrm{supp}(X) \rightarrow +\mathbb{R}\) is a one-to-one, monotonic function with a differentiable inverse \(f^{-1}\). Then the density of \(Y\) is given by

+

\[ +p_Y(y) = p_X(f^{-1}(y)) + \, + \left| \, \frac{d}{dy} f^{-1}(y)\, \right|. +\]

+

The absolute derivative of the inverse transform measures how the scale of the transformed variable changes with respect to the underlying variable.

+
+
+

Multivariate changes of variables

+

The multivariate generalization of an absolute derivative is a Jacobian, or more fully the absolute value of the determinant of the Jacobian matrix of the transform. The Jacobian matrix measures the change of each output variable relative to every input variable and the absolute determinant uses that to determine the differential change in volume at a given point in the parameter space.

+

Suppose \(X\) is a \(K\)-dimensional random variable with probability density function \(p_X(x)\). A new random variable \(Y = f(X)\) may be defined by transforming \(X\) with a suitably well-behaved function \(f\). It suffices for what follows to note that if \(f\) is one-to-one and its inverse \(f^{-1}\) has a well-defined Jacobian, then the density of \(Y\) is

+

\[ +p_Y(y) = p_X(f^{-1}(y)) \, \left| \, \det \, J_{f^{-1}}(y) \, \right|, +\]

+

where \(\det{}\) is the matrix determinant operation and \(J_{f^{-1}}(y)\) is the Jacobian matrix of \(f^{-1}\) evaluated at \(y\). Taking \(x = +f^{-1}(y)\), the Jacobian matrix is defined by

+

\[ +J_{f^{-1}}(y) = +\left[ +\begin{array}{ccc}\displaystyle +\frac{\partial x_1}{\partial y_1} +& \cdots +& \displaystyle \frac{\partial x_1}{\partial y_{K}} +\\ +\vdots & \vdots & \vdots +\\ +\displaystyle\frac{\partial x_{K}}{\partial y_1} +& \cdots +& \displaystyle\frac{\partial x_{K}}{\partial y_{K}} +\end{array} +\right]. +\]

+

If the Jacobian matrix is triangular, the determinant reduces to the product of the diagonal entries,

+

\[ +\det \, J_{f^{-1}}(y) += \prod_{k=1}^K \frac{\partial x_k}{\partial y_k}. +\]

+

Triangular matrices naturally arise in situations where the variables are ordered, for instance by dimension, and each variable’s transformed value depends on the previous variable’s transformed values. Diagonal matrices, a simple form of triangular matrix, arise if each transformed variable only depends on a single untransformed variable.

+
+
+
+

Lower bounded scalar

+

Stan uses a logarithmic transform for lower and upper bounds.

+
+

Lower bound transform

+

If a variable \(X\) is declared to have lower bound \(a\), it is transformed to an unbounded variable \(Y\), where

+

\[ +Y = \log(X - a). +\]

+
+
+

Lower bound inverse transform

+

The inverse of the lower-bound transform maps an unbounded variable \(Y\) to a variable \(X\) that is bounded below by \(a\) by

+

\[ +X = \exp(Y) + a. +\]

+
+
+

Absolute derivative of the lower bound inverse transform

+

The absolute derivative of the inverse transform is

+

\[ +\left| \, +\frac{d}{dy} \left( \exp(y) + a \right) +\, \right| += \exp(y). +\]

+

Therefore, given the density \(p_X\) of \(X\), the density of \(Y\) is

+

\[ +p_Y(y) += p_X\!\left( \exp(y) + a \right) \cdot \exp(y). +\]

+
+
+
+

Upper bounded scalar

+

Stan uses a negated logarithmic transform for upper bounds.

+
+

Upper bound transform

+

If a variable \(X\) is declared to have an upper bound \(b\), it is transformed to the unbounded variable \(Y\) by

+

\[ +Y = \log(b - X). +\]

+
+
+

Upper bound inverse transform

+

The inverse of the upper bound transform converts the unbounded variable \(Y\) to the variable \(X\) bounded above by \(b\) through

+

\[ +X = b - \exp(Y). +\]

+
+
+

Absolute derivative of the upper bound inverse transform

+

The absolute derivative of the inverse of the upper bound transform is

+

\[ +\left| \, +\frac{d}{dy} \left( b - \exp(y) \right) +\, \right| += \exp(y). +\]

+

Therefore, the density of the unconstrained variable \(Y\) is defined in terms of the density of the variable \(X\) with an upper bound of \(b\) by

+

\[ +p_Y(y) += p_X \!\left( b - \exp(y) \right) \cdot \exp(y). +\]

+
+
+
+

Lower and upper bounded scalar

+

For lower and upper-bounded variables, Stan uses a scaled and translated log-odds transform.

+
+

Log odds and the logistic sigmoid

+

The log-odds function is defined for \(u \in (0,1)\) by

+

\[ +\mathrm{logit}(u) = \log \frac{u}{1 - u}. +\]

+

The inverse of the log odds function is the logistic sigmoid, defined for \(v \in (-\infty,\infty)\) by

+

\[ +\mathrm{logit}^{-1}(v) = \frac{1}{1 + \exp(-v)}. +\]

+

The derivative of the logistic sigmoid is

+

\[ +\frac{d}{dy} \mathrm{logit}^{-1}(y) += \mathrm{logit}^{-1}(y) \cdot \left( 1 - \mathrm{logit}^{-1}(y) \right). +\]

+
+
+

Lower and upper bounds transform

+

For variables constrained to be in the open interval \((a, b)\), Stan uses a scaled and translated log-odds transform. If variable \(X\) is declared to have lower bound \(a\) and upper bound \(b\), then it is transformed to a new variable \(Y\), where

+

\[ +Y = \mathrm{logit} \left( \frac{X - a}{b - a} \right). +\]

+
+
+

Lower and upper bounds inverse transform

+

The inverse of this transform is

+

\[ +X = a + (b - a) \cdot \mathrm{logit}^{-1}(Y). +\]

+
+
+

Absolute derivative of the lower and upper bounds inverse transform

+

The absolute derivative of the inverse transform is given by

+

\[ +\left| + \frac{d}{dy} + \left( + a + (b - a) \cdot \mathrm{logit}^{-1}(y) + \right) + \right| += (b - a) + \cdot \mathrm{logit}^{-1}(y) + \cdot \left( 1 - \mathrm{logit}^{-1}(y) \right). +\]

+

Therefore, the density of the transformed variable \(Y\) is

+

\[ +p_Y(y) += +p_X \! \left( a + (b - a) \cdot \mathrm{logit}^{-1}(y) \right) + \cdot (b - a) + \cdot \mathrm{logit}^{-1}(y) + \cdot \left( 1 - \mathrm{logit}^{-1}(y) \right). +\]

+

Despite the apparent complexity of this expression, most of the terms are repeated and thus only need to be evaluated once. Most importantly, \(\mathrm{logit}^{-1}(y)\) only needs to be evaluated once, so there is only one call to \(\exp(-y)\).

+
+
+
+

Affinely transformed scalar

+

Stan uses an affine transform to be able to specify parameters with a given offset and multiplier.

+
+

Affine transform

+

For variables with expected offset \(\mu\) and/or (positive) multiplier \(\sigma\), Stan uses an affine transform. Such a variable \(X\) is transformed to a new variable \(Y\), where

+

\[ +Y = \frac{X - \mu}{\sigma}. +\]

+

The default value for the offset \(\mu\) is \(0\) and for the multiplier \(\sigma\) is \(1\) in case not both are specified.

+
+
+

Affine inverse transform

+

The inverse of this transform is

+

\[ +X = \mu + \sigma \cdot Y. +\]

+
+
+

Absolute derivative of the affine inverse transform

+

The absolute derivative of the affine inverse transform is

+

\[ +\left| + \frac{d}{dy} + \left( + \mu + \sigma \cdot y + \right) + \right| += \sigma. +\]

+

Therefore, the density of the transformed variable \(Y\) is

+

\[ +p_Y(y) += +p_X \! \left( \mu + \sigma \cdot y \right) + \cdot \sigma. +\]

+

For an example of how to code this in Stan, see section Affinely Transformed Real.

+
+
+
+

Ordered vector

+

For some modeling tasks, a vector-valued random variable \(X\) is required with support on ordered sequences. One example is the set of cut points in ordered logistic regression.

+

In constraint terms, an ordered \(K\)-vector \(x \in \mathbb{R}^K\) satisfies

+

\[ +x_k < x_{k+1} +\]

+

for \(k \in \{ 1, \ldots, K-1 \}\).

+
+

Ordered transform

+

Stan’s transform follows the constraint directly. It maps an increasing vector \(x \in \mathbb{R}^{K}\) to an unconstrained vector \(y \in +\mathbb{R}^K\) by setting

+

\[ +y_k += +\left\{ +\begin{array}{ll} +x_1 & \mbox{if } k = 1, \mbox{ and} +\\ +\log \left( x_{k} - x_{k-1} \right) & \mbox{if } 1 < k \leq K. +\end{array} +\right. +\]

+
+
+

Ordered inverse transform

+

The inverse transform for an unconstrained \(y \in \mathbb{R}^K\) to an ordered sequence \(x \in \mathbb{R}^K\) is defined by the recursion

+

\[ +x_k += +\left\{ +\begin{array}{ll} +y_1 & \mbox{if } k = 1, \mbox{ and} +\\ +x_{k-1} + \exp(y_k) & \mbox{if } 1 < k \leq K. +\end{array} +\right. +\]

+

\(x_k\) can also be expressed iteratively as

+

\[ +x_k = y_1 + \sum_{k'=2}^k \exp(y_{k'}). +\]

+
+
+

Absolute Jacobian determinant of the ordered inverse transform

+

The Jacobian of the inverse transform \(f^{-1}\) is lower triangular, with diagonal elements for \(1 \leq k \leq K\) of

+

\[ +J_{k,k} = +\left\{ +\begin{array}{ll} +1 & \mbox{if } k = 1, \mbox{ and} +\\ +\exp(y_k) & \mbox{if } 1 < k \leq K. +\end{array} +\right. +\]

+

Because \(J\) is triangular, the absolute Jacobian determinant is

+

\[ +\left| \, \det \, J \, \right| +\ = \ +\left| \, \prod_{k=1}^K J_{k,k} \, \right| +\ = \ +\prod_{k=2}^K \exp(y_k). +\]

+

Putting this all together, if \(p_X\) is the density of \(X\), then the transformed variable \(Y\) has density \(p_Y\) given by

+

\[ +p_Y(y) += p_X(f^{-1}(y)) +\ +\prod_{k=2}^K \exp(y_k). +\]

+
+
+
+

Positive ordered vector

+

Placeholder.

+

The positive ordered transformation is defined in the same style as the ordered transformation above, but with the first element being exponentiated to ensure positivity.

+
+
+

Sum-to-zero transforms

+

Stan provides built-in constraint transforms for sum-to-zero vectors and sum-to-zero matrices. The sum-to-zero vector is a vector of length \(N\) with real values and the sum of the vector equals zero. The sum-to-zero matrix is an \(N \times M\) matrix where both the rows and columns sum-to-zero.

+

Stan uses an orthogonal basis as the initial point of construction. The orthogonal basis balances the constraint across each unconstrained value. The basis is a matrix \(H\) such that \(H\in\mathbb R^{N\times (N-1)}\) and \(H^{\mathsf T}H=I_{N-1},\;H^{\mathsf T}\mathbf 1=0\). The sum-to-zero vector lies in the subspace where the vector sums to zero. Although this seems tautological, the orthogonal basis construction allows all the marginal variances of the contrained vector to be the same, see, e.g.,(Seyboldt 2024). Simpler alternatives, such as setting the final element to the negative sum of the first elements, do not result in equal variances. It is worth noting that even with marginal variances being equal each value in the sum-to-zero constrained space is negatively correlated.

+

In many cases one wishes to model the sum-to-zero vector as normally distributed and the induced covariance matrix is fully known:

+

\[ + \sigma^2 \begin{pmatrix} + 1-\tfrac{1}{N} & -\tfrac{1}{N} & \cdots&-\tfrac{1}{N} \\ + -\tfrac{1}{N} & 1-\tfrac{1}{N} & \cdots & -\tfrac{1}{N} \\ + \vdots & \vdots & \ddots & \vdots \\ + -\tfrac{1}{N} & -\tfrac{1}{N} & \cdots &1-\tfrac{1}{N} + \end{pmatrix}. +\]

+

The marginal standard deviation no longer corresponds to \(\sigma\) but is \(\sigma \sqrt{1-\tfrac{1}{N}}\). The properties of the normal distribution allow multiplying the sum-to-zero vector by the reciprocal of \(\sqrt{1-\tfrac{1}{N}}\) to adjust the variance to the intended \(\sigma\):

+
sum_to_zero_vector[N] x;
+x ~ normal(0, sqrt(N %/% (N - 1)));
+

When \(\sigma\) is a parameter there is an additional adjustment when using the centered version of the sum-to-zero constraint. If we let \(y\) be the unconstrained \(N - 1\) vector and \(y\) is implicitly given a standard normal prior, then the sum-to-zero vector distributed as normal with a mean of zero and a standard deviation of \(\sigma\) is given by \[ +x = \underbrace{\sigma\sqrt{\frac{N}{N-1}}\;H}_{\text{ size of } N \times (N - 1)}\,y. +\] This is the classic non-centered model. The crucial detail from above is that the operation of multiplying \(\sigma\) to \(y\) is on \(N - 1\) dimensions, not \(N\) dimensions. When writing the centered model using \[ +x \sim \mathcal N \bigg(0,\, \sigma\sqrt{\frac{N}{N-1}} \bigg), +\] we are incrementing the log density by an additional \(-\log(\sigma)\) and so must increment the log density by

+
target += log(sigma * sqrt(N * inv(N - 1)));
+
+// or
+// because `sqrt(N * inv(N - 1))` is constant
+
+target += log(sigma);
+

which correctly adjusts for the \(N - 1\) free variables.

+

More details of the Helmert matrix are in (Lancaster 1965), for the basic definitions of the isometric log ratio transform see (Egozcue et al. 2003) and Chapter 3 of (Filzmoser, Hron, and Templ 2018) for the pivot coordinate version used here.

+
+

Sum-to-zero vector

+

Vectors that are constrained to sum-to-zero are useful for, among other things, additive varying effects, such as varying slopes or intercepts in a regression model (e.g., for income deciles).

+

A sum-to-zero \(K\)-vector \(x \in \mathbb{R}^K\) satisfies the constraint \[ +\sum_{k=1}^K x_k = 0. +\]

+
+
+

Sum-to-zero vector transform

+

The transform is defined iteratively. Given an \(x \in \mathbb{R}^{N + 1}\) that sums to zero (i.e., \(\sum_{n=1}^{N+1} x_n = 0\)), the transform proceeds as follows to produce an unconstrained \(y \in \mathbb{R}^N\). This is mathematically equivalent to pre-multiplying the unconstrained \(y\) by an orthogonal standard basis, e.g. constructing orthogonal vectors from the standard basis using the Gram-Schmidt process. The single loop version below achieves low computational and memory costs as no matrices are created or multplied.

+

The transform is initialized by setting \[ +S_N = 0 +\] and \[ +y_N = -x_{N + 1} \sqrt{1 + \frac{1}{N}}. +\] The for each \(n\) from \(N - 1\) down to \(1\), let \[ +w_{n + 1} = \frac{y_{n + 1}}{\sqrt{(n + 1)(n + 2)}}, +\] \[ +S_n = S_{n + 1} + w_{n + 1}, +\] and \[ +y_n = (S_n - x_{n + 1}) \frac{\sqrt{n (n + 1)}}{n}. +\]

+

This transform is expressed in Stan code as:

+
  vector manual_sum_to_zero_jacobian(vector x) {
+    int N = size(x) - 1;
+    vector[N] y;
+    y[N] = -x[N+1] * sqrt(1 + 1. / N);
+    real sum_w = 0;
+    for (n in 1:(N-1)) {
+      int i = N - n;
+      int i_p_1 = i + 1;
+      real w = y[i_p_1] * inv_sqrt(i_p_1 * (i_p_1 + 1));
+      sum_w += w;
+      y[i] = (sum_w - x[i_p_1]) * sqrt(i_p_1 * i) / i;
+    }
+    return y;
+  }
+
+
+

Sum-to-zero vector inverse transform

+

The inverse transform follows the isometric logratio tranform. It maps an unconstrained vector \(y \in \mathbb{R}^N\) to a sum-to-zero vector \(x \in \mathbb{R}^{N + 1}\) such that \[ +\sum_{n=1}^{N + 1} x_n = 0. +\] The values are defined inductively, starting with \[ +x_1 = \sum_{n=1}^N \frac{y_n}{\sqrt{n (n + 1)}} +\] and then setting \[ +x_{n + 1} = \sum_{i = n + 1}^N \frac{y_i}{\sqrt{i (i + 1)}} +- n \cdot \frac{y_n}{\sqrt{n (n + 1)}}. +\] for \(n \in 1{:}N\).

+

The definition is such that \[ +\sum_{n = 1}^{N + 1} x_n = 0 +\] by construction, because each of the terms added to \(x_{n}\) is then subtracted from \(x_{n + 1}\) the number of times it shows up in earlier terms.

+
+
+

Absolute Jacobian determinant of the zero sum inverse transform

+

The inverse transform is a linear operation, leading to a constant Jacobian determinant which is therefore not included.

+

The sum-to-zero inverse transform is expressed within Stan as:

+
  vector manual_sum_to_zero_inverse_jacobian(vector y) {
+    int N = num_elements(y);
+    vector[N + 1] x = zeros_vector(N + 1);
+    real sum_w = 0;
+    for (n in 1:N) {
+      int i = N - n + 1;
+      real w = y[i] * inv_sqrt(i * (i + 1));
+      sum_w += w;
+      x[i] += sum_w;
+      x[i + 1] -= w * i;
+    }
+    return x;
+  }
+

Note that there is no target += increment because the Jacobian is zero.

+
+
+

Sum-to-zero matrix transform

+

The matrix case of the sum-to-zero transform generalizes the vector case to ensure that every row of the matrix sums to zero and every column the matrix sums to zero. In fact, any \(N\)-dimensional array can be constructed into a sum-to-zero N-dimensional array using the sum-to-zero vector. This is because the vector transform is a linear bijection and produces an orthogonally constructed sum-to-zero object by applying the one dimensional transform across each array slice. For the matrix case, there are two slices present, the rows and columns, to perform the transform over. The sum-to-zero vector is applied over the vectorized slice of either the row or column slice and subsequently to the other slice.

+

Let the unconstrained matrix be \[ +\mathcal Y \in \mathbb R^{n_1 \times n_2} +\] and the zero sum vector transform as \[ +\mathbf z = \mathcal C_n(\mathbf y)\;=\; +\begin{bmatrix}H_n\\[2pt]-\mathbf 1_{1\times d}\end{bmatrix}\mathbf y +\;\in\mathbb R^{n+1}, +\] where \(H_n\in\mathbb R^{n \times n}\) is the orthogonal Helmert matrix and satisfies \(\mathbf 1_{1\times d}A_d^{\!\top}=0\).

+

Applying \(C_n\) to each slice results in

+

\[ +\mathcal Z \;=\; +\mathcal X +\times_1 \bigl[\mathcal C_{n_1}\bigr] +\times_2 \bigl[\mathcal C_{n_2}\bigr] +\] where \[ +\mathcal Z \in +\mathbb R^{(n_1+1)\times\cdots\times(n_2+1)}. +\]

+

Because each \(\mathbb R^{d_1\times\cdots\times d_N}\) is invertible on the \(\mathbf 1^\perp\) subspace the composite map applied to \(\mathcal Z\) is a linear bijection between \(\mathbb R^{n_1\times n_2}\) and the codomain \(\mathbb R^{(n_1+1)\times\cdots\times(n_2+1)}\).

+
+
+
+

Unit simplex

+

Variables constrained to the unit simplex show up in multivariate discrete models as both parameters (categorical and multinomial) and as variates generated by their priors (Dirichlet and multivariate logistic).

+

The unit \(K\)-simplex is the set of points \(x \in \mathbb{R}^K\) such that for \(1 \leq k \leq K\),

+

\[ +x_k > 0, +\]

+

and

+

\[ +\sum_{k=1}^K x_k = 1. +\]

+

An alternative definition is to take the convex closure of the vertices. For instance, in 2-dimensions, the simplex vertices are the extreme values \((0,1)\), and \((1,0)\) and the unit 2-simplex is the line connecting these two points; values such as \((0.3,0.7)\) and \((0.99,0.01)\) lie on the line. In 3-dimensions, the basis is \((0,0,1)\), \((0,1,0)\) and \((1,0,0)\) and the unit 3-simplex is the boundary and interior of the triangle with these vertices. Points in the 3-simplex include \((0.5,0.5,0)\), \((0.2,0.7,0.1)\) and all other triplets of non-negative values summing to 1.

+

As these examples illustrate, the simplex always picks out a subspace of \(K-1\) dimensions from \(\mathbb{R}^K\). Therefore a point \(x\) in the \(K\)-simplex is fully determined by its first \(K-1\) elements \(x_1, x_2, +\ldots, x_{K-1}\), with

+

\[ +x_K = 1 - \sum_{k=1}^{K-1} x_k. +\]

+
+

Unit simplex inverse transform

+

The length-\(K\) unit simplex inverse transform is given by the softmax of a sum-to-zero vector of length \(K\).

+

Let \(y\) represent the unconstrained \(K - 1\) values in \((-\infty, \infty)\). The intermediate sum-to-zero vector \(z = \text{sum\_to\_zero\_transform}(y)\) is length \(K\). The unit simplex is then given by \[ +x_i = \text{softmax}(z) = \frac{\exp(z_i)}{\sum_{i = 1}^K \exp(z_i)} +\]

+

The sum-to-zero vector transform is described in further detail at the sum-to-zero vector section of the Reference Manual.

+
+
+
+ +
+
+Note +
+
+
+

All versions of Stan pre-2.37 used the stick-breaking transform. This is documented at Stan 2.36 Reference Manual: Simplex Transform.

+
+
+
+

Absolute Jacobian determinant of the unit-simplex inverse transform

+

The Jacobian \(J\) of the inverse unit-simplex transform is found by restricting \(J\) to the subspace spanned by the sum-to-zero vector \(z\). The Jacobian is given as the \((K - 1) \times (K - 1)\) matrix \(J\) where

+

\[ +J_{ij} = \frac{\partial x_i}{\partial z_j} = +\frac{\partial}{\partial z_i} \left( \frac{\exp(z_i)}{{\sum_{i = 1}^K \exp(z_i)}} \right) +\] and \(i,j \in 1, \ldots, K - 1\).

+

The diagonal and off-diagonal derivatives are found using the derivative quotient rule and algebraic simplification

+

\[ +J_{ij} = +\begin{cases} +x_i (1 - x_i), & \text{if } i = j, \\ +-x_i x_j, & \text{if } i \neq j. +\end{cases} +\]

+

In matrix form this can be expressed as

+

\[ +J = \text{diag}(x) - x x^\top +\]

+

The determinant of this matrix can be found using the Matrix Determinant Lemma:

+

\[ +\det\bigl(A + u v^{\top}\bigr) += +\det(A)\,\bigl(1 + v^{\top}A^{-1}u\bigr). +\]

+

Here,

+

\[ +A \;=\; \operatorname{diag}(x_{1},\ldots, x_{K-1}), +\quad +u \;=\; -\bigl(x_1,\ldots, x_{K-1}\bigr)^{\!\top}, +\quad +v \;=\; \bigl(x_{1}, \ldots, x_{K-1}\bigr)^{\!\top}. +\] Therefore,

+

\[ +\begin{aligned} +\det(J) +&= +\bigg(\prod_{i=1}^{K-1} x_i \bigg) +\bigg(1 + (x_{1},\ldots, x_{K-1})\,\mathrm{diag}\bigl(x_{1}^{-1},\ldots,x_{K-1}^{-1}\bigr)\, +\big(-x_{1},\ldots,-x_{K-1}\big)^{\top} +\bigg) \\ +&= +\bigg(\prod_{i=1}^{K-1} x_{i}\bigg) +\bigg(1 - \sum_{i=1}^{K-1} x_{i}\bigg) += +\bigg(\prod_{i=1}^{K-1} x_{i}\bigg) x_{K} \\ +&= +\prod_{i=1}^{K} x_{i}. +\end{aligned} +\]

+
+
+
+

Unit simplex transform

+

The transform \(Y = f(X)\) can be derived by reversing the stages of the inverse transform,

+

\[ +y_k += H^\top \bigg(\log(x_k) +- \frac{1}{K}\sum_{i=1}^K\log(x_i) \bigg) +. +\]

+

The matrix \(H\) is the orthogonal basis matrix the sum-to-zero vector uses. Since the matrix is orthonormal, the transpose is the same as the inverse.

+
+
+
+

Stochastic Matrix

+

The column_stochastic_matrix[N, M] and row_stochastic_matrix[M, N] type in Stan represents an \(N \times M\) matrix where each column (row) is a unit simplex of dimension \(N\). In other words, each column (row) of the matrix is a vector constrained to have non-negative entries that sum to one.

+
+

Definition of a Stochastic Matrix

+

A column stochastic matrix \(X \in \mathbb{R}^{N \times M}\) is defined such that each column is a simplex. For column \(m\) (where \(1 \leq m \leq M\)):

+

\[ +X_{n, m} \geq 0 \quad \text{for } 1 \leq n \leq N, +\]

+

and

+

\[ +\sum_{n=1}^N X_{n, m} = 1. +\]

+

A row stochastic matrix is any matrix whose transpose is a column stochastic matrix (i.e. the rows of the matrix are simplexes)

+

\[ +X_{n, m} \geq 0 \quad \text{for } 1 \leq n \leq N, +\]

+

and

+

\[ +\sum_{m=1}^N X_{n, m} = 1. +\]

+

This definition ensures that each column (row) of the matrix \(X\) lies on the \(N-1\) dimensional unit simplex, similar to the simplex[N] type, but extended across multiple columns(rows).

+
+
+

Inverse Transform for Stochastic Matrix

+

For the column and row stochastic matrices the inverse transform is the same as simplex, but applied to each column (row).

+
+
+

Absolute Jacobian Determinant for the Inverse Transform

+

The Jacobian determinant of the inverse transform for each column \(m\) in the matrix is given by the product of the diagonal entries \(J_{n, m}\) of the lower-triangular Jacobian matrix. This determinant is calculated as:

+

\[ +\left| \det J_m \right| = \prod_{n=1}^{N-1} \left( z_{n, m} (1 - z_{n, m}) \left( 1 - \sum_{n'=1}^{n-1} X_{n', m} \right) \right). +\]

+

Thus, the overall Jacobian determinant for the entire column_stochastic_matrix and row_stochastic_matrix is the product of the determinants for each column (row):

+

\[ +\left| \det J \right| = \prod_{m=1}^{M} \left| \det J_m \right|. +\]

+
+
+

Transform for Stochastic Matrix

+

For the column and row stochastic matrices the transform is the same as simplex, but applied to each column (row).

+
+
+
+

Unit vector

+

An \(n\)-dimensional vector \(x \in \mathbb{R}^n\) is said to be a unit vector if it has unit Euclidean length, so that

+

\[ +\Vert x \Vert +\ = \ \sqrt{x^{\top}\,x} +\ = \ \sqrt{x_1^2 + x_2^2 + \cdots + x_n^2} +\ = \ 1\ . +\]

+
+

Unit vector inverse transform

+

Stan divides an unconstrained vector \(y \in \mathbb{R}^{n}\) by its norm, \(\Vert y \Vert = \sqrt{y^\top y}\), to obtain a unit vector \(x\),

+

\[ +x = \frac{y}{\Vert y \Vert}. +\]

+

To generate a unit vector, Stan generates points at random in \(\mathbb{R}^n\) with independent unit normal distributions, which are then standardized by dividing by their Euclidean length. Muller (1959) showed this generates points uniformly at random on \(S^{n-1}\). That is, if we draw \(y_n \sim \mathsf{Normal}(0, 1)\) for \(n \in 1{:}n\), then \(x = \frac{y}{\Vert y \Vert}\) has a uniform distribution over \(S^{n-1}\). This allows us to use an \(n\)-dimensional basis for \(S^{n-1}\) that preserves local neighborhoods in that points that are close to each other in \(\mathbb{R}^n\) map to points near each other in \(S^{n-1}\). The mapping is not perfectly distance preserving, because there are points arbitrarily far away from each other in \(\mathbb{R}^n\) that map to identical points in \(S^{n-1}\).

+
+

Warning: undefined at zero!

+

The above mapping from \(\mathbb{R}^n\) to \(S^n\) is not defined at zero. While this point outcome has measure zero during sampling, and may thus be ignored, it is the default initialization point and thus unit vector parameters cannot be initialized at zero. A simple workaround is to initialize from a very small interval around zero, which is an option built into all of the Stan interfaces.

+
+
+
+

Absolute Jacobian determinant of the unit vector inverse transform

+

The Jacobian matrix relating the input vector \(y\) to the output vector \(x\) is singular because \(x^\top x = 1\) for any non-zero input vector \(y\). Thus, there technically is no unique transformation from \(x\) to \(y\). To circumvent this issue, let \(r = \sqrt{y^\top y}\) so that \(y = r +x\). The transformation from \(\left(r, x_{-n}\right)\) to \(y\) is well-defined but \(r\) is arbitrary, so we set \(r = 1\). In this case, the determinant of the Jacobian is proportional to \(e^{-\frac{1}{2} y^\top y}\), which is the kernel of a standard multivariate normal distribution with \(n\) independent dimensions.

+
+
+
+

Correlation matrices

+

A \(K \times K\) correlation matrix \(x\) must be symmetric, so that

+

\[ +x_{k,k'} = x_{k',k} +\]

+

for all \(k,k' \in \{ 1, \ldots, K \}\), it must have a unit diagonal, so that

+

\[ +x_{k,k} = 1 +\]

+

for all \(k \in \{ 1, \ldots, K \}\), and it must be positive definite, so that for every non-zero \(K\)-vector \(a\),

+

\[ +a^{\top} x a > 0. +\]

+

The number of free parameters required to specify a \(K \times K\) correlation matrix is \(\binom{K}{2}\).

+

There is more than one way to map from \(\binom{K}{2}\) unconstrained parameters to a \(K \times K\) correlation matrix. Stan implements the Lewandowski-Kurowicka-Joe (LKJ) transform Lewandowski, Kurowicka, and Joe (2009).

+
+

Correlation matrix inverse transform

+

It is easiest to specify the inverse, going from its \(\binom{K}{2}\) parameter basis to a correlation matrix. The basis will actually be broken down into two steps. To start, suppose \(y\) is a vector containing \(\binom{K}{2}\) unconstrained values. These are first transformed via the bijective function \(\tanh : \mathbb{R} \rightarrow +(-1, 1)\)

+

\[ +\tanh y = \frac{\exp(2y) - 1}{\exp(2y) + 1}. +\]

+

Then, define a \(K \times K\) matrix \(z\), the upper triangular values of which are filled by row with the transformed values, and the diagonal entries are set to one. For example, in the \(4 \times 4\) case, there are \(\binom{4}{2}\) values arranged as

+

\[ +z += +\left[ +\begin{array}{cccc} +1 & \tanh y_1 & \tanh y_2 & \tanh y_4 +\\ +0 & 1 & \tanh y_3 & \tanh y_5 +\\ +0 & 0 & 1 & \tanh y_6 +\\ +0 & 0 & 0 & 1 +\end{array} +\right] +. +\]

+

Lewandowski, Kurowicka and Joe (LKJ) show how to bijectively map the array \(z\) to a correlation matrix \(x\). The entry \(z_{i,j}\) for \(i < +j\) is interpreted as the canonical partial correlation (CPC) between \(i\) and \(j\), which is the correlation between \(i\)’s residuals and \(j\)’s residuals when both \(i\) and \(j\) are regressed on all variables \(i'\) such that \(i'< i\). In the case of \(i=1\), there are no earlier variables, so \(z_{1,j}\) is just the Pearson correlation between \(i\) and \(j\).

+

In Stan, the LKJ transform is reformulated in terms of a Cholesky factor \(w\) of the final correlation matrix, defined for \(1 \leq i,j \leq K\) by

+

\[ +w_{i,j} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } i > j, +\\ +1 & \mbox{if } 1 = i = j, +\\ +\prod_{i'=1}^{i - 1} \left( 1 - z_{i'\!,\,j}^2 \right)^{1/2} +& \mbox{if } 1 < i = j, +\\ +z_{i,j} & \mbox{if } 1 = i < j, \mbox{ and} +\\\ +z_{i,j} \, \prod_{i'=1}^{i-1} \left( 1 - z_{i'\!,\,j}^2 \right)^{1/2} +& \mbox{ if } 1 < i < j. +\end{array} +\right. +\]

+

This does not require as much computation per matrix entry as it may appear; calculating the rows in terms of earlier rows yields the more manageable expression

+

\[ +w_{i,j} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } i > j, +\\ +1 & \mbox{if } 1 = i = j, +\\ +z_{i,j} & \mbox{if } 1 = i < j, \mbox{ and} +\\ +\frac{z_{i,j}}{z_{i-1,j}} \ w_{i-1,j} \left( 1 - z_{i-1,j}^2 \right)^{1/2} +& \mbox{ if } 1 < i \leq j. +\end{array} +\right. +\]

+

Given the upper-triangular Cholesky factor \(w\), the final correlation matrix is

+

\[ +x = w^{\top} w. +\]

+

Lewandowski, Kurowicka, and Joe (2009) show that the determinant of the correlation matrix can be defined in terms of the canonical partial correlations as

+

\[ +\mbox{det} \, x = \prod_{i=1}^{K-1} \ \prod_{j=i+1}^K \ (1 - z_{i,j}^2) += \prod_{1 \leq i < j \leq K} (1 - z_{i,j}^2), +\]

+
+
+

Absolute Jacobian determinant of the correlation matrix inverse transform

+

From the inverse of equation 11 in (Lewandowski, Kurowicka, and Joe 2009), the absolute Jacobian determinant is

+

\[ +\sqrt{\prod_{i=1}^{K-1}\prod_{j=i+1}^K \left(1-z_{i,j}^2\right)^{K-i-1}} \ +\times \prod_{i=1}^{K-1}\prod_{j=i+1}^K +\frac{\partial z_{i,j}}{\partial y_{i,j}} +\]

+
+
+

Correlation matrix transform

+

The correlation transform is defined by reversing the steps of the inverse transform defined in the previous section.

+

Starting with a correlation matrix \(x\), the first step is to find the unique upper triangular \(w\) such that \(x = w w^{\top}\). Because \(x\) is positive definite, this can be done by applying the Cholesky decomposition,

+

\[ +w = \mbox{chol}(x). +\]

+

The next step from the Cholesky factor \(w\) back to the array \(z\) of canonical partial correlations (CPCs) is simplified by the ordering of the elements in the definition of \(w\), which when inverted yields

+

\[ +z_{i,j} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } i \leq j, +\\ +w_{i,j} & \mbox{if } 1 = i < j, \mbox{ and} +\\ +{w_{i,j}} +\ +\prod_{i'=1}^{i-1} \left( 1 - z_{i'\!,j}^2 \right)^{-1/2} +& \mbox{if } 1 < i < j. +\end{array} +\right. +\]

+

The final stage of the transform reverses the hyperbolic tangent transform, which is defined by

+

\[ +y = \tanh^{-1} z = \frac{1}{2} \log \left( \frac{1 + z}{1 - z} \right). +\]

+

The inverse hyperbolic tangent function, \(\tanh^{-1}\), is also called the Fisher transformation.

+
+
+
+

Covariance matrices

+

A \(K \times K\) matrix is a covariance matrix if it is symmetric and positive definite (see the previous section for definitions). It requires \(K + \binom{K}{2}\) free parameters to specify a \(K \times K\) covariance matrix.

+
+

Covariance matrix transform

+

Stan’s covariance transform is based on a Cholesky decomposition composed with a log transform of the positive-constrained diagonal elements.1

+

If \(x\) is a covariance matrix (i.e., a symmetric, positive definite matrix), then there is a unique lower-triangular matrix \(z = +\mathrm{chol}(x)\) with positive diagonal entries, called a Cholesky factor, such that

+

\[ +x = z \, z^{\top}. +\]

+

The off-diagonal entries of the Cholesky factor \(z\) are unconstrained, but the diagonal entries \(z_{k,k}\) must be positive for \(1 \leq k +\leq K\).

+

To complete the transform, the diagonal is log-transformed to produce a fully unconstrained lower-triangular matrix \(y\) defined by

+

\[ +y_{m,n} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } m < n, +\\ +\log z_{m,m} & \mbox{if } m = n, \mbox{ and} +\\ +z_{m,n} & \mbox{if } m > n. +\end{array} +\right. +\]

+
+
+

Covariance matrix inverse transform

+

The inverse transform reverses the two steps of the transform. Given an unconstrained lower-triangular \(K \times K\) matrix \(y\), the first step is to recover the intermediate matrix \(z\) by reversing the log transform,

+

\[ +z_{m,n} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } m < n, +\\ +\exp(y_{m,m}) & \mbox{if } m = n, \mbox{ and} +\\ +y_{m,n} & \mbox{if } m > n. +\end{array} +\right. +\]

+

The covariance matrix \(x\) is recovered from its Cholesky factor \(z\) by taking

+

\[ +x = z \, z^{\top}. +\]

+
+
+

Absolute Jacobian determinant of the covariance matrix inverse transform

+

The Jacobian is the product of the Jacobians of the exponential transform from the unconstrained lower-triangular matrix \(y\) to matrix \(z\) with positive diagonals and the product transform from the Cholesky factor \(z\) to \(x\).

+

The transform from unconstrained \(y\) to Cholesky factor \(z\) has a diagonal Jacobian matrix, the absolute determinant of which is thus

+

\[ +\prod_{k=1}^K \frac{\partial}{\partial_{y_{k,k}}} \, \exp(y_{k,k}) +\ = \ +\prod_{k=1}^K \exp(y_{k,k}) +\ = \ +\prod_{k=1}^K z_{k,k}. +\]

+

The Jacobian matrix of the second transform from the Cholesky factor \(z\) to the covariance matrix \(x\) is also triangular, with diagonal entries corresponding to pairs \((m,n)\) with \(m \geq n\), defined by

+

\[ +\frac{\partial}{\partial z_{m,n}} +\left( z \, z^{\top} \right)_{m,n} +\ = \ +\frac{\partial}{\partial z_{m,n}} +\left( \sum_{k=1}^K z_{m,k} \, z_{n,k} \right) +\ = \ +\left\{ +\begin{array}{cl} +2 \, z_{n,n} & \mbox{if } m = n \mbox{ and } +\\ +z_{n,n} & \mbox{if } m > n. +\end{array} +\right. +\]

+

The absolute Jacobian determinant of the second transform is thus

+

\[ +2^{K} \ \prod_{m = 1}^{K} \ \prod_{n=1}^{m} z_{n,n} +\ = \ +\prod_{n=1}^K \ \prod_{m=n}^K z_{n,n} +\ = \ +2^{K} \ \prod_{k=1}^K z_{k,k}^{K - k + 1}. +\]

+

Finally, the full absolute Jacobian determinant of the inverse of the covariance matrix transform from the unconstrained lower-triangular \(y\) to a symmetric, positive definite matrix \(x\) is the product of the Jacobian determinants of the exponentiation and product transforms,

+

\[ +\left( \prod_{k=1}^K z_{k,k} \right) +\left( +2^{K} \ \prod_{k=1}^K z_{k,k}^{K - k + 1} +\right) +\ = \ +2^K +\, \prod_{k=1}^K z_{k,k}^{K-k+2}. +\]

+

Let \(f^{-1}\) be the inverse transform from a \(K + \binom{K}{2}\)-vector \(y\) to the \(K \times K\) covariance matrix \(x\). A density function \(p_X(x)\) defined on \(K \times K\) covariance matrices is transformed to the density \(p_Y(y)\) over \(K + \binom{K}{2}\) vectors \(y\) by

+

\[ +p_Y(y) = p_X(f^{-1}(y)) \ 2^K \ \prod_{k=1}^K z_{k,k}^{K-k+2}. +\]

+
+
+
+

Cholesky factors of covariance matrices

+

An \(M \times M\) covariance matrix \(\Sigma\) can be Cholesky factored to a lower triangular matrix \(L\) such that \(L\,L^{\top} = \Sigma\). If \(\Sigma\) is positive definite, then \(L\) will be \(M \times M\). If \(\Sigma\) is only positive semi-definite, then \(L\) will be \(M \times N\), with \(N < M\).

+

A matrix is a Cholesky factor for a covariance matrix if and only if it is lower triangular, the diagonal entries are positive, and \(M \geq +N\). A matrix satisfying these conditions ensures that \(L \, +L^{\top}\) is positive semi-definite if \(M > N\) and positive definite if \(M = N\).

+

A Cholesky factor of a covariance matrix requires \(N + \binom{N}{2} + +(M - N)N\) unconstrained parameters.

+
+

Cholesky factor of covariance matrix transform

+

Stan’s Cholesky factor transform only requires the first step of the covariance matrix transform, namely log transforming the positive diagonal elements. Suppose \(x\) is an \(M \times N\) Cholesky factor. The above-diagonal entries are zero, the diagonal entries are positive, and the below-diagonal entries are unconstrained. The transform required is thus

+

\[ +y_{m,n} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } m < n, +\\ +\log x_{m,m} & \mbox{if } m = n, \mbox{ and} +\\ +x_{m,n} & \mbox{if } m > n. +\end{array} +\right. +\]

+
+
+

Cholesky factor of covariance matrix inverse transform

+

The inverse transform need only invert the logarithm with an exponentiation. If \(y\) is the unconstrained matrix representation, then the elements of the constrained matrix \(x\) is defined by

+

\[ +x_{m,n} = +\left\{ +\begin{array}{cl} +0 & \mbox{if } m < n, +\\ +\exp(y_{m,m}) & \mbox{if } m = n, \mbox{ and} +\\ +y_{m,n} & \mbox{if } m > n. +\end{array} +\right. +\]

+
+
+

Absolute Jacobian determinant of Cholesky factor inverse transform

+

The transform has a diagonal Jacobian matrix, the absolute determinant of which is

+

\[ +\prod_{n=1}^N \frac{\partial}{\partial_{y_{n,n}}} \, \exp(y_{n,n}) +\ = \ +\prod_{n=1}^N \exp(y_{n,n}) +\ = \ +\prod_{n=1}^N x_{n,n}. +\]

+

Let \(x = f^{-1}(y)\) be the inverse transform from a \(N + \binom{N}{2} ++ (M - N)N\) vector to an \(M \times N\) Cholesky factor for a covariance matrix \(x\) defined in the previous section. A density function \(p_X(x)\) defined on \(M \times N\) Cholesky factors of covariance matrices is transformed to the density \(p_Y(y)\) over \(N + \binom{N}{2} ++ (M - N)N\) vectors \(y\) by

+

\[ +p_Y(y) = p_X(f^{-1}(y)) \prod_{N=1}^N x_{n,n}. +\]

+
+
+
+

Cholesky factors of correlation matrices

+

A \(K \times K\) correlation matrix \(\Omega\) is positive definite and has a unit diagonal. Because it is positive definite, it can be Cholesky factored to a \(K \times K\) lower-triangular matrix \(L\) with positive diagonal elements such that \(\Omega = L\,L^{\top}\). Because the correlation matrix has a unit diagonal,

+

\[ +\Omega_{k,k} = L_k\,L_k^{\top} = 1, +\]

+

each row vector \(L_k\) of the Cholesky factor is of unit length. The length and positivity constraint allow the diagonal elements of \(L\) to be calculated from the off-diagonal elements, so that a Cholesky factor for a \(K \times K\) correlation matrix requires only \(\binom{K}{2}\) unconstrained parameters.

+
+

Cholesky factor of correlation matrix inverse transform

+

It is easiest to start with the inverse transform from the \(\binom{K}{2}\) unconstrained parameters \(y\) to the \(K \times K\) lower-triangular Cholesky factor \(x\). The inverse transform is based on the hyperbolic tangent function, \(\tanh\), which satisfies \(\tanh(x) \in (-1,1)\). Here it will function like an inverse logit with a sign to pick out the direction of an underlying canonical partial correlation; see the section on correlation matrix transforms for more information on the relation between canonical partial correlations and the Cholesky factors of correlation matrices.

+

Suppose \(y\) is a vector of \(\binom{K}{2}\) unconstrained values. Let \(z\) be a lower-triangular matrix with zero diagonal and below diagonal entries filled by row. For example, in the \(3 \times 3\) case,

+

\[ +z = +\left[ +\begin{array}{ccc} +0 & 0 & 0 +\\ +\tanh y_1 & 0 & 0 +\\ +\tanh y_2 & \tanh y_3 & 0 +\end{array} +\right] +\]

+

The matrix \(z\), with entries in the range \((-1, 1)\), is then transformed to the Cholesky factor \(x\), by taking2

+

\[ +x_{i,j} += +\left\{ +\begin{array}{lll} +0 & \mbox{ if } i < j & \mbox{ [above diagonal]} +\\ +\sqrt{1 - \sum_{j' < j} x_{i,j'}^2} + & \mbox{ if } i = j & \mbox{ [on diagonal]} +\\ +z_{i,j} \ \sqrt{1 - \sum_{j' < j} x_{i,j'}^2} + & \mbox{ if } i > j & \mbox{ [below diagonal]} +\end{array} +\right. +\]

+

In the \(3 \times 3\) case, this yields

+

\[ +x = +\left[ +\begin{array}{ccc} +1 & 0 & 0 +\\ +z_{2,1} & \sqrt{1 - x_{2,1}^2} & 0 +\\ +z_{3,1} & z_{3,2} \sqrt{1 - x_{3,1}^2} + & \sqrt{1 - (x_{3,1}^2 + x_{3,2}^2)} +\end{array} +\right], +\]

+

where the \(z_{i,j} \in (-1,1)\) are the \(\tanh\)-transformed \(y\).

+

The approach is a signed stick-breaking process on the quadratic (Euclidean length) scale. Starting from length 1 at \(j=1\), each below-diagonal entry \(x_{i,j}\) is determined by the (signed) fraction \(z_{i,j}\) of the remaining length for the row that it consumes. The diagonal entries \(x_{i,i}\) get any leftover length from earlier entries in their row. The above-diagonal entries are zero.

+
+
+

Cholesky factor of correlation matrix transform

+

Suppose \(x\) is a \(K \times K\) Cholesky factor for some correlation matrix. The first step of the transform reconstructs the intermediate values \(z\) from \(x\),

+

\[ +z_{i,j} = \frac{x_{i,j}}{\sqrt{1 - \sum_{j' < j}x_{i,j'}^2}}. +\]

+

The mapping from the resulting \(z\) to \(y\) inverts \(\tanh\),

+

\[ +y +\ = \ +\tanh^{-1} z +\ = \ +\frac{1}{2} \left( \log (1 + z) - \log (1 - z) \right). +\]

+
+
+

Absolute Jacobian determinant of inverse transform

+

The Jacobian of the full transform is the product of the Jacobians of its component transforms.

+

First, for the inverse transform \(z = \tanh y\), the derivative is

+

\[ +\frac{d}{dy} \tanh y = \frac{1}{(\cosh y)^2}. +\]

+

Second, for the inverse transform of \(z\) to \(x\), the resulting Jacobian matrix \(J\) is of dimension \(\binom{K}{2} \times +\binom{K}{2}\), with indexes \((i,j)\) for \((i > j)\). The Jacobian matrix is lower triangular, so that its determinant is the product of its diagonal entries, of which there is one for each \((i,j)\) pair,

+

\[ +\left| \, \mbox{det} \, J \, \right| + \ = \ \prod_{i > j} \left| \frac{d}{dz_{i,j}} x_{i,j} \right|, +\]

+

where

+

\[ +\frac{d}{dz_{i,j}} x_{i,j} += \sqrt{1 - \sum_{j' < j} x^2_{i,j'}}. +\]

+

So the combined density for unconstrained \(y\) is

+

\[ +p_Y(y) += p_X(f^{-1}(y)) + \ \ + \prod_{n < \binom{K}{2}} \frac{1}{(\cosh y)^2} + \ \ + \prod_{i > j} \left( 1 - \sum_{j' < j} x_{i,j'}^2 + \right)^{1/2}, +\]

+

where \(x = f^{-1}(y)\) is used for notational convenience. The log Jacobian determinant of the complete inverse transform \(x = f^{-1}(y)\) is given by

+

\[ +\log \left| \, \det J \, \right| += +-2 \sum_{n \leq \binom{K}{2}} +\log \cosh y +\ ++ +\ +\frac{1}{2} \ +\sum_{i > j} +\log \left( 1 - \sum_{j' < j} x_{i,j'}^2 \right) +. +\]

+ + + +
+
+
+ + + Back to top

References

+
+Egozcue, Juan José, Vera Pawlowsky-Glahn, Glòria Mateu-Figueras, and Carles Barcelo-Vidal. 2003. “Isometric Logratio Transformations for Compositional Data Analysis.” Mathematical Geology 35 (3): 279–300. +
+
+Filzmoser, Peter, Karel Hron, and Matthias Templ. 2018. Geometrical Properties of Compositional Data. Springer. +
+
+Lancaster, H. O. 1965. “The Helmert Matrices.” The American Mathematical Monthly 72 (1): 4–12. http://www.jstor.org/stable/2312989. +
+
+Lewandowski, Daniel, Dorota Kurowicka, and Harry Joe. 2009. “Generating Random Correlation Matrices Based on Vines and Extended Onion Method.” Journal of Multivariate Analysis 100: 1989–2001. +
+
+Muller, Mervin E. 1959. “A Note on a Method for Generating Points Uniformly on n-Dimensional Spheres.” Commun. ACM 2 (4): 19–20. https://doi.org/10.1145/377939.377946. +
+
+Seyboldt, Adrian. 2024. “Add ZeroSumNormal Distribution.” https://github.com/pyro-ppl/numpyro/pull/1751#issuecomment-1980569811. +
+

Footnotes

+ +
    +
  1. An alternative to the transform in this section, which can be coded directly in Stan, is to parameterize a covariance matrix as a scaled correlation matrix. An arbitrary \(K \times K\) covariance matrix \(\Sigma\) can be expressed in terms of a \(K\)-vector \(\sigma\) and correlation matrix \(\Omega\) as \[\Sigma = \mbox{diag}(\sigma) \times \Omega \times \mbox{diag}(\sigma),\] so that each entry is just a deviation-scaled correlation, \[\Sigma_{m,n} = \sigma_m \times \sigma_n \times \Omega_{m,n}.\]↩︎

  2. +
  3. For convenience, a summation with no terms, such as \(\sum_{j' < 1} x_{i,j'}\), is defined to be 0. This implies \(x_{1,1} = 1\) and that \(x_{i,1} = z_{i,1}\) for \(i > 1\).↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/types.html b/docs/2_39/reference-manual/types.html new file mode 100644 index 000000000..b9fc1d933 --- /dev/null +++ b/docs/2_39/reference-manual/types.html @@ -0,0 +1,2326 @@ + + + + + + + + + +Data Types and Declarations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Data Types and Declarations

+

This chapter covers the data types for expressions in Stan. Every variable used in a Stan program must have a declared data type. Only values of that type will be assignable to the variable (except for temporary states of transformed data and transformed parameter values). This follows the convention of programming languages like C++, not the conventions of scripting languages like Python or statistical languages such as R or BUGS.

+

The motivation for strong, static typing is threefold.

+
    +
  1. Strong typing forces the programmer’s intent to be declared with the variable, making programs easier to comprehend and hence easier to debug and maintain.
  2. +
  3. Strong typing allows programming errors relative to the declared intent to be caught sooner (at compile time) rather than later (at run time). The Stan compiler (called through an interface such as CmdStan, RStan, or PyStan) will flag any type errors and indicate the offending expressions quickly when the program is compiled.
  4. +
  5. Constrained types will catch runtime data, initialization, and intermediate value errors as soon as they occur rather than allowing them to propagate and potentially pollute final results.
  6. +
+

Strong typing disallows assigning the same variable to objects of different types at different points in the program or in different invocations of the program.

+
+

Overview of data types

+

Arguments for built-in and user-defined functions and local variables are required to be basic data types, meaning an unconstrained scalar, vector, or matrix type, or an array of such.

+

Passing arguments to functions in Stan works just like assignment to basic types. Stan functions are only specified for the basic data types of their arguments, including array dimensionality, but not for sizes or constraints. Of course, functions often check constraints as part of their behavior.

+
+

Primitive types

+

Stan provides two primitive data types, real for continuous values and int for integer values. These are both considered scalar types.

+
+
+

Complex types

+

Stan provides a complex number data type complex, where a complex number contains both a real and an imaginary component, both of which are of type real. Complex types are considered scalar types.

+
+
+

Vector and matrix types

+

Stan provides three real-valued matrix data types, vector for column vectors, row_vector for row vectors, and matrix for matrices.

+

Stan also provides three complex-valued matrix data types, complex_vector for column vectors, complex_row_vector for row vectors, and complex_matrix for matrices.

+
+
+

Array types

+

Any type (including the constrained types discussed in the next section) can be made into an array type by declaring array arguments. For example,

+
array[10] real x;
+array[6, 7] matrix[3, 3] m;
+array[12, 8, 15] complex z;
+

declares x to be a one-dimensional array of size 10 containing real values, declares m to be a two-dimensional array of size \(6 \times 7\) containing values that are \(3 \times 3\) matrices, and declares z to be a \(12 \times 8 \times 15\) array of complex numbers.

+

Prior to 2.26 Stan models used a different syntax which has since been removed. See the Removed Features chapter for more details.

+
+
+

Tuple types

+

For any sequence of types, Stan provides a tuple data type. For example,

+
tuple(real, array[5] int) xi;
+

declares xi to be a tuple holding two values, the first of which is of type type real and the second of which a 5-dimensional array of type int.

+
+
+

Constrained data types

+

Declarations of variables other than local variables may be provided with constraints. These constraints are not part of the underlying data type for a variable, but determine error checking in the transformed data, transformed parameter, and generated quantities block, and the transform from unconstrained to constrained space in the parameters block.

+

All of the basic data types other than complex may be given lower and upper bounds using syntax such as

+
int<lower=1> N;
+real<upper=0> log_p;
+vector<lower=-1, upper=1>[3] rho;
+

There are also special data types for structured vectors and matrices. There are five constrained vector data types, simplex for unit simplexes, unit_vector for unit-length vectors, sum_to_zero_vector for vectors that sum to zero, ordered for ordered vectors of scalars, and positive_ordered for vectors of positive ordered scalars. There are specialized matrix data types corr_matrix and cov_matrix for correlation matrices (symmetric, positive definite, unit diagonal) and covariance matrices (symmetric, positive definite). The type cholesky_factor_cov is for Cholesky factors of covariance matrices (lower triangular, positive diagonal, product with own transpose is a covariance matrix). The type cholesky_factor_corr is for Cholesky factors of correlation matrices (lower triangular, positive diagonal, unit-length rows). The type sum_to_zero_matrix is for matrices that sum to zero across rows and columns.

+

Constraints provide error checking for variables defined in the data, transformed data, transformed parameters, and generated quantities blocks. Constraints are critical for variables declared in the parameters block, where they determine the transformation from constrained variables (those satisfying the declared constraint) to unconstrained variables (those ranging over all of \(\mathbb{R}^n\)).

+

It is worth calling out the most important aspect of constrained data types:

+
+

The model must have support (non-zero density, equivalently finite log density) at parameter values that satisfy the declared constraints.

+
+

If this condition is violated with parameter values that satisfy declared constraints but do not have finite log density, then the samplers and optimizers may have any of a number of pathologies including just getting stuck, failure to initialize, excessive Metropolis rejection, or biased draws due to inability to explore the tails of the distribution.

+
+
+
+

Primitive numerical data types

+

Unfortunately, the lovely mathematical abstraction of integers and real numbers is only partially supported by finite-precision computer arithmetic.

+
+

Integers

+

Stan uses 32-bit (4-byte) integers for all of its integer representations. The maximum value that can be represented as an integer is \(2^{31}-1\); the minimum value is \(-(2^{31})\).

+

When integers overflow, their value is determined by the underlying architecture. On most, their values wrap, but this cannot be guaranteed. Thus it is up to the Stan programmer to make sure the integer values in their programs stay in range. In particular, every intermediate expression must have an integer value that is in range.

+

Integer arithmetic works in the expected way for addition, subtraction, and multiplication, but truncates the result of division (see the Stan Functions Reference integer-valued arithmetic operators section for more information).

+
+
+

Reals

+

Stan uses 64-bit (8-byte) floating point representations of real numbers. Stan roughly1 follows the IEEE 754 standard for floating-point computation. The range of a 64-bit number is roughly \(\pm 2^{1022}\), which is slightly larger than \(\pm 10^{307}\). It is a good idea to stay well away from such extreme values in Stan models as they are prone to cause overflow.

+

64-bit floating point representations have roughly 15 decimal digits of accuracy. But when they are combined, the result often has less accuracy. In some cases, the difference in accuracy between two operands and their result is large.

+

There are three special real values used to represent (1) not-a-number value for error conditions, (2) positive infinity for overflow, and (3) negative infinity for overflow. The behavior of these special numbers follows standard IEEE 754 behavior.

+
+

Not-a-number

+

The not-a-number value propagates. If an argument to a real-valued function is not-a-number, it either rejects (an exception in the underlying C++) or returns not-a-number itself. For boolean-valued comparison operators, if one of the arguments is not-a-number, the return value is always zero (i.e., false).

+
+
+

Infinite values

+

Positive infinity is greater than all numbers other than itself and not-a-number; negative infinity is similarly smaller. Adding an infinite value to a finite value returns the infinite value. Dividing a finite number by an infinite value returns zero; dividing an infinite number by a finite number returns the infinite number of appropriate sign. Dividing a finite number by zero returns positive infinity. Dividing two infinite numbers produces a not-a-number value as does subtracting two infinite numbers. Some functions are sensitive to infinite values; for example, the exponential function returns zero if given negative infinity and positive infinity if given positive infinity. Often the gradients will break down when values are infinite, making these boundary conditions less useful than they may appear at first.

+
+
+
+

Promoting integers to reals

+

Stan automatically promotes integer values to real values if necessary, but does not automatically demote real values to integers. For very large integers, this will cause a rounding error to fewer significant digits in the floating point representation than in the integer representation.

+

Unlike in C++, real values are never demoted to integers. Therefore, real values may only be assigned to real variables. Integer values may be assigned to either integer variables or real variables. Internally, the integer representation is cast to a floating-point representation. This operation is not without overhead and should thus be avoided where possible.

+
+
+
+

Complex numerical data type

+

The complex data type is a scalar, but unlike real and int types, it contains two components, a real and imaginary component, both of which are of type real. That is, the real and imaginary components of a complex number are 64-bit, IEEE 754-complaint floating point numbers.

+
+

Constructing and accessing complex numbers

+

Imaginary literals are written in mathematical notation using a numeral followed by the suffix i. For example, the following example constructs a complex number \(2 - 1.3i\) and assigns it to the variable z.

+
complex z = 2 - 1.3i;
+real re = get_real(z);  // re has value 2.0
+real im = get_imag(z);  // im has value -1.3
+

The getter functions then extract the real and imaginary components of z and assign them to re and im respectively.

+

The function to_complex constructs a complex number from its real and imaginary components. The functional form needs to be used whenever the components are not literal numerals, as in the following example.

+
vector[K] re;
+vector[K] im;
+// ...
+for (k in 1:K) {
+  complex z = to_complex(re[k], im[k]);
+  // ...
+}
+
+
+

Promoting real to complex

+

Expressions of type real may be assigned to variables of type complex. For example, the following is a valid sequence of Stan statements.

+
real x = 5.0;
+complex z = x;  // get_real(z) == 5.0, get_imag(z) == 0
+

The real number assigned to a complex number determine’s the complex number’s real component, with the imaginary component set to zero.

+

Assignability is transitive, so that expressions of type int may also be assigned to variables of type complex, as in the following example.

+
int n = 2;
+complex z = n;
+

Function arguments also support promotion of integer or real typed expressions to type complex.

+
+
+
+

Scalar data types and variable declarations

+

All variables used in a Stan program must have an explicitly declared data type. The form of a declaration includes the type and the name of a variable. This section covers scalar types, namely integer, real, and complex. The next section covers vector and matrix types, and the following section array types.

+
+

Unconstrained integer

+

Unconstrained integers are declared using the int keyword. For example, the variable N is declared to be an integer as follows.

+
int N;
+
+
+

Constrained integer

+

Integer data types may be constrained to allow values only in a specified interval by providing a lower bound, an upper bound, or both. For instance, to declare N to be a positive integer, use the following.

+
int<lower=1> N;
+

This illustrates that the bounds are inclusive for integers.

+

To declare an integer variable cond to take only binary values, that is zero or one, a lower and upper bound must be provided, as in the following example.

+
int<lower=0, upper=1> cond;
+
+
+

Unconstrained real

+

Unconstrained real variables are declared using the keyword real. The following example declares theta to be an unconstrained continuous value.

+
real theta;
+
+
+

Unconstrained complex

+

Unconstrained complex numbers are declared using the keyword complex. The following example declares z to be an unconstrained complex variable.

+
complex z;
+
+
+

Constrained real

+

Real variables may be bounded using the same syntax as integers. In theory (that is, with arbitrary-precision arithmetic), the bounds on real values would be exclusive. Unfortunately, finite-precision arithmetic rounding errors will often lead to values on the boundaries, so they are allowed in Stan.

+

The variable sigma may be declared to be non-negative as follows.

+
real<lower=0> sigma;
+

The following declares the variable x to be less than or equal to \(-1\).

+
real<upper=-1> x;
+

To ensure rho takes on values between \(-1\) and \(1\), use the following declaration.

+
real<lower=-1, upper=1> rho;
+
+

Infinite constraints

+

Lower bounds that are negative infinity or upper bounds that are positive infinity are ignored. Stan provides constants positive_infinity() and negative_infinity() which may be used for this purpose, or they may be supplied as data.

+
+
+
+

Affinely transformed real

+

Real variables may be declared on a space that has been transformed using an affine transformation \(x\mapsto \mu + \sigma * x\) with offset \(\mu\) and (positive) multiplier \(\sigma\), using a syntax similar to that for bounds. While these transforms do not change the asymptotic sampling behaviour of the resulting Stan program (in a sense, the model the program implements), they can be useful for making the sampling process more efficient by transforming the geometry of the problem to a more natural multiplier and to a more natural offset for the sampling process, for instance by facilitating a non-centered parameterisation. While these affine transformation declarations do not impose a hard constraint on variables, they behave like the bounds constraints in many ways and could perhaps be viewed as acting as a sort of soft constraint.

+

The variable x may be declared to have offset \(1\) as follows.

+
real<offset=1> x;
+

Similarly, it can be declared to have multiplier \(2\) as follows.

+
real<multiplier=2> x;
+

Finally, we can combine both declarations to declare a variable with offset \(1\) and multiplier \(2\).

+
real<offset=1, multiplier=2> x;
+

As an example, we can give x a normal distribution with non-centered parameterization as follows.

+
parameters {
+  real<offset=mu, multiplier=sigma> x;
+}
+model {
+  x ~ normal(mu, sigma);
+}
+

Recall that the centered parameterization is achieved with the code

+
parameters {
+  real x;
+}
+model {
+  x ~ normal(mu, sigma);
+}
+

or equivalently

+
parameters {
+  real<offset=0, multiplier=1> x;
+}
+model {
+  x ~ normal(mu, sigma);
+}
+
+
+

Expressions as bounds and offset/multiplier

+

Bounds (and offset and multiplier) for integer or real variables may be arbitrary expressions. The only requirement is that they only include variables that have been declared (though not necessarily defined) before the declaration. If the bounds themselves are parameters, the behind-the-scenes variable transform accounts for them in the log Jacobian.

+

For example, it is acceptable to have the following declarations.

+
data {
+ real lb;
+}
+parameters {
+   real<lower=lb> phi;
+}
+

This declares a real-valued parameter phi to take values greater than the value of the real-valued data variable lb. Constraints may be arbitrary expressions, but must be of type int for integer variables and of type real for real variables (including constraints on vectors, row vectors, and matrices). Variables used in constraints can be any variable that has been defined at the point the constraint is used. For instance,

+
data {
+   int<lower=1> N;
+   array[N] real y;
+}
+parameters {
+   real<lower=min(y), upper=max(y)> phi;
+}
+

This declares a positive integer data variable N, an array y of real-valued data of length N, and then a parameter ranging between the minimum and maximum value of y. As shown in the example code, the functions min() and max() may be applied to containers such as arrays.

+

A more subtle case involves declarations of parameters or transformed parameters based on parameters declared previously. For example, the following program will work as intended.

+
parameters {
+  real a;
+  real<lower=a> b;  // enforces a < b
+}
+transformed parameters {
+  real c;
+  real<lower=c> d;
+  c = a;
+  d = b;
+}
+

The parameters instance works because all parameters are defined externally before the block is executed. The transformed parameters case works even though c isn’t defined at the point it is used, because constraints on transformed parameters are only validated at the end of the block. Data variables work like parameter variables, whereas transformed data and generated quantity variables work like transformed parameter variables.

+
+
+

Declaring optional variables

+

A variable may be declared with a size that depends on a boolean constant. For example, consider the definition of alpha in the following program fragment.

+
data {
+  int<lower=0, upper=1> include_alpha;
+  // ...
+}
+parameters {
+  vector[include_alpha ? N : 0] alpha;
+  // ...
+}
+

If include_alpha is true, the model will include the vector alpha; if the flag is false, the model will not include alpha (technically, it will include alpha of size 0, which means it won’t contain any values and won’t be included in any output).

+

This technique is not just useful for containers. If the value of N is set to 1, then the vector alpha will contain a single element and thus alpha[1] behaves like an optional scalar, the existence of which is controlled by include_alpha.

+

This coding pattern allows a single Stan program to define different models based on the data provided as input. This strategy is used extensively in the implementation of the RStanArm package.

+
+
+
+

Vector and matrix data types

+

Stan provides three types of container objects: arrays, vectors, and matrices. Vectors and matrices are more limited kinds of data structures than arrays. Vectors are intrinsically one-dimensional collections of real or complex values, whereas matrices are intrinsically two dimensional. Vectors, matrices, and arrays are not assignable to one another, even if their dimensions are identical. A \(3 \times 4\) matrix is a different kind of object in Stan than a \(3 +\times 4\) array.

+

The intention of using matrix types is to call out their usage in the code. There are three situations in Stan where only vectors and matrices may be used,

+
    +
  • matrix arithmetic operations (e.g., matrix multiplication)
  • +
  • linear algebra functions (e.g., eigenvalues and determinants), and
  • +
  • multivariate function parameters and outcomes (e.g., multivariate normal distribution arguments).
  • +
+

Vectors and matrices cannot be typed to return integer values. They are restricted to real and complex values.

+

For constructing vectors and matrices in Stan, see Vector, Matrix, and Array Expressions.

+
+

Indexing from 1

+

Vectors and matrices, as well as arrays, are indexed starting from one (1) in Stan. This follows the convention in statistics and linear algebra as well as their implementations in the statistical software packages R, MATLAB, BUGS, and JAGS. General computer programming languages, on the other hand, such as C++ and Python, index arrays starting from zero.

+
+
+

Vectors

+

Vectors in Stan are column vectors; see below for information on row vectors. Vectors are declared with a size (i.e., a dimensionality). For example, a 3-dimensional real vector is declared with the keyword vector, as follows.

+
vector[3] u;
+

Vectors may also be declared with constraints, as in the following declaration of a 3-vector of non-negative values.

+
vector<lower=0>[3] u;
+

Similarly, they may be declared with a offset and/or multiplier, as in the following example

+
vector<offset=42, multiplier=3>[3] u;
+
+
+

Complex vectors

+

Like real vectors, complex vectors are column vectors and are declared with a size. For example, a 3-dimensional complex vector is declared with the keyword complex_vector, as follows.

+
complex_vector[3] v;
+

Complex vector declarations do not support any constraints.

+
+
+

Unit simplexes

+

A unit simplex is a vector with non-negative values whose entries sum to 1. For instance, \([0.2,0.3,0.4,0.1]^{\top}\) is a unit 4-simplex. Unit simplexes are most often used as parameters in categorical or multinomial distributions, and they are also the sampled variate in a Dirichlet distribution. Simplexes are declared with their full dimensionality. For instance, theta is declared to be a unit \(5\)-simplex by

+
simplex[5] theta;
+

Unit simplexes are implemented as vectors and may be assigned to other vectors and vice-versa. Simplex variables, like other constrained variables, are validated to ensure they contain simplex values; for simplexes, this is only done up to a statically specified accuracy threshold \(\epsilon\) to account for errors arising from floating-point imprecision.

+

In high dimensional problems, simplexes may require smaller step sizes in the inference algorithms in order to remain stable; this can be achieved through higher target acceptance rates for samplers and longer warmup periods, tighter tolerances for optimization with more iterations, and in either case, with less dispersed parameter initialization or custom initialization if there are informative priors for some parameters.

+
+
+

Stochastic Matrices

+

A stochastic matrix is a matrix where each column or row is a unit simplex, meaning that each column (row) vector has non-negative values that sum to 1. The following example is a \(3 \times 4\) column-stochastic matrix.

+

\[ +\begin{bmatrix} +0.2 & 0.5 & 0.1 & 0.3 \\ +0.3 & 0.3 & 0.6 & 0.4 \\ +0.5 & 0.2 & 0.3 & 0.3 +\end{bmatrix} +\]

+

An example of a \(3 \times 4\) row-stochastic matrix is the following.

+

\[ +\begin{bmatrix} +0.2 & 0.5 & 0.1 & 0.2 \\ +0.2 & 0.1 & 0.6 & 0.1 \\ +0.5 & 0.2 & 0.2 & 0.1 +\end{bmatrix} +\]

+

In the examples above, each column (or row) sums to 1, making the matrices valid column_stochastic_matrix and row_stochastic_matrix types.

+

Column-stochastic matrices are often used in models where each column represents a probability distribution across a set of categories such as in multiple multinomial distributions, factor models, transition matrices in Markov models, or compositional data analysis. They can also be used in situations where you need multiple simplexes of the same dimensionality.

+

The column_stochastic_matrix and row_stochastic_matrix types are declared with row and column sizes. For instance, a matrix theta with 3 rows and 4 columns, where each column is a 3-simplex, is declared like a matrix with 3 rows and 4 columns.

+
column_stochastic_matrix[3, 4] theta;
+

A matrix theta with 3 rows and 4 columns, where each row is a 4-simplex, is similarly declared as a matrix with 3 rows and 4 columns.

+
row_stochastic_matrix[3, 4] theta;
+

As with simplexes, column_stochastic_matrix and row_stochastic_matrix variables are subject to validation, ensuring that each column (row) satisfies the simplex constraints. This validation accounts for floating-point imprecision, with checks performed up to a statically specified accuracy threshold \(\epsilon\).

+
+

Stability Considerations

+

In high-dimensional settings, column_stochastic_matrix and row_stochastic_matrix types may require careful tuning of the inference algorithms. To ensure stability:

+
    +
  • Smaller Step Sizes: In samplers like Hamiltonian Monte Carlo (HMC), smaller step sizes can help maintain stability, especially in high dimensions.
  • +
  • Higher Target Acceptance Rates: Setting higher target acceptance rates can improve the robustness of the sampling process.
  • +
  • Longer Warmup Periods: Increasing the warmup period allows the sampler to better explore the parameter space before the actual sampling begins.
  • +
  • Tighter Optimization Tolerances: For optimization-based inference, tighter tolerances with more iterations can yield more accurate results.
  • +
  • Custom Initialization: If prior information about the parameters is available, custom initialization or less dispersed initialization can lead to more efficient inference.
  • +
+
+
+
+

Unit vectors

+

A unit vector is a vector with a norm of one. For instance, \([0.5, +0.5, 0.5, 0.5]^{\top}\) is a unit 4-vector. Unit vectors are sometimes used in directional statistics. Unit vectors are declared with their full dimensionality. For instance, theta is declared to be a unit \(5\)-vector by

+
unit_vector[5] theta;
+

Unit vectors are implemented as vectors and may be assigned to other vectors and vice-versa. Unit vector variables, like other constrained variables, are validated to ensure that they are indeed unit length; for unit vectors, this is only done up to a statically specified accuracy threshold \(\epsilon\) to account for errors arising from floating-point imprecision.

+
+
+

Vectors that sum to zero

+

A zero-sum vector is constrained such that the sum of its elements is always \(0\). These are sometimes useful for resolving identifiability issues in regression models. While the underlying vector has only \(N - 1\) degrees of freedom, zero sum vectors are declared with their full dimensionality. For instance, beta is declared to be a zero-sum \(5\)-vector (4 DoF) by

+
sum_to_zero_vector[5] beta;
+

Zero sum vectors are implemented as vectors and may be assigned to other vectors and vice-versa. Zero sum vector variables, like other constrained variables, are validated to ensure that they are indeed sum to zero; for zero sum vectors, this is only done up to a statically specified accuracy threshold \(\epsilon\) to account for errors arising from floating-point imprecision.

+
+
+

Ordered vectors

+

An ordered vector type in Stan represents a vector whose entries are sorted in ascending order. For instance, \((-1.3,2.7,2.71)^{\top}\) is an ordered 3-vector. Ordered vectors are most often employed as cut points in ordered logistic regression models (see section).

+

The variable c is declared as an ordered 5-vector by

+
ordered[5] c;
+

After their declaration, ordered vectors, like unit simplexes, may be assigned to other vectors and other vectors may be assigned to them. Constraints will be checked after executing the block in which the variables were declared.

+
+
+

Positive, ordered vectors

+

There is also a positive, ordered vector type which operates similarly to ordered vectors, but all entries are constrained to be positive. For instance, \((2,3.7,4,12.9)\) is a positive, ordered 4-vector.

+

The variable d is declared as a positive, ordered 5-vector by

+
positive_ordered[5] d;
+

Like ordered vectors, after their declaration, positive ordered vectors may be assigned to other vectors and other vectors may be assigned to them. Constraints will be checked after executing the block in which the variables were declared.

+
+
+

Row vectors

+

Row vectors are declared with the keyword row_vector. Like (column) vectors, they are declared with a size. For example, a 1093-dimensional row vector u would be declared as

+
row_vector[1093] u;
+

Constraints are declared as for vectors, as in the following example of a 10-vector with values between -1 and 1.

+
row_vector<lower=-1, upper=1>[10] u;
+

Offset and multiplier are also similar as for the following 3-row-vector with offset -42 and multiplier 3.

+
row_vector<offset=-42, multiplier=3>[3] u;
+

Row vectors may not be assigned to column vectors, nor may column vectors be assigned to row vectors. If assignments are required, they may be accommodated through the transposition operator.

+
+
+

Complex row vectors

+

Complex row vectors are declared with the keyword complex_row_vector and given a size in basic declarations. For example, a 12-dimensional complex row vector v would be declared as

+
complex_row_vector[12] v;
+

Complex row vectors do not allow constraints.

+
+
+

Matrices

+

Matrices are declared with the keyword matrix along with a number of rows and number of columns. For example,

+
matrix[3, 3] A;
+matrix[M, N] B;
+

declares A to be a \(3 \times 3\) matrix and B to be a \(M +\times N\) matrix. For the second declaration to be well formed, the variables M and N must be declared as integers in either the data or transformed data block and before the matrix declaration.

+

Matrices may also be declared with constraints, as in this (\(3 \times 4\)) matrix of non-positive values.

+
matrix<upper=0>[3, 4] B;
+

Similarly, matrices can be declared to have a set offset and/or multiplier, as in this matrix with multiplier 5.

+
matrix<multiplier=5>[3, 4] B;
+
+

Assigning to rows of a matrix

+

Rows of a matrix can be assigned by indexing the left-hand side of an assignment statement. For example, this is possible.

+
matrix[M, N] a;
+row_vector[N] b;
+// ...
+a[1] = b;
+

This copies the values from row vector b to a[1], which is the first row of the matrix a. If the number of columns in a is not the same as the size of b, a run-time error is raised; the number of columns of a is N, which is also the number of columns of b.

+

Assignment works by copying values in Stan. That means any subsequent assignment to a[1] does not affect b, nor does an assignment to b affect a.

+
+
+
+

Complex matrices

+

Complex matrices are declared with the keyword complex_matrix and a number of rows and columns. For example,

+
complex_matrix[3, 3] C;
+

Complex matrices do not allow constraints.

+
+
+

Covariance matrices

+

Matrix variables may be constrained to represent covariance matrices. A matrix is a covariance matrix if it is symmetric and positive definite. Like correlation matrices, covariance matrices only need a single dimension in their declaration. For instance,

+
cov_matrix[K] Sigma;
+

declares Sigma to be a \(K \times K\) covariance matrix, where \(K\) is the value of the data variable K.

+
+
+

Correlation matrices

+

Matrix variables may be constrained to represent correlation matrices. A matrix is a correlation matrix if it is symmetric and positive definite, has entries between \(-1\) and \(1\), and has a unit diagonal. Because correlation matrices are square, only one dimension needs to be declared. For example,

+
corr_matrix[3] Omega;
+

declares Omega to be a \(3 \times 3\) correlation matrix.

+

Correlation matrices may be assigned to other matrices, including unconstrained matrices, if their dimensions match, and vice-versa.

+
+
+

Cholesky factors of covariance matrices

+

Matrix variables may be constrained to represent the Cholesky factors of a covariance matrix. This is often more convenient or more efficient than representing covariance matrices directly.

+

A Cholesky factor \(L\) is an \(M \times N\) lower-triangular matrix (if \(m < n\) then \(L[m, n] =0\)) with a strictly positive diagonal (\(L[k, k] +> 0\)) and \(M \geq N\). If \(L\) is a Cholesky factor, then \(\Sigma = L +\, L^{\top}\) is a covariance matrix (i.e., it is positive definite). The mapping between positive definite matrices and their Cholesky factors is bijective—every covariance matrix has a unique Cholesky factorization.

+

The typical case of a square Cholesky factor may be declared with a single dimension,

+
cholesky_factor_cov[4] L;
+
+

Cholesky factors of positive semi-definite matrices

+

In general, two dimensions may be declared, with the above being equal to cholesky_factor_cov[4, 4]. The type cholesky_factor_cov[M, N] may be used for the general \(M \times N\) case to produce positive semi-definite matrices of rank \(M\).

+
+
+
+

Cholesky factors of correlation matrices

+

Matrix variables may be constrained to represent the Cholesky factors of a correlation matrix.

+

A Cholesky factor for a correlation matrix \(L\) is a \(K \times K\) lower-triangular matrix with positive diagonal entries and rows that are of length 1 (i.e., \(\sum_{n=1}^K L_{m,n}^2 = 1\)). If \(L\) is a Cholesky factor for a correlation matrix, then \(L\,L^{\top}\) is a correlation matrix (i.e., symmetric positive definite with a unit diagonal).

+

To declare the variable L to be a K by K Cholesky factor of a correlation matrix, the following code may be used.

+
cholesky_factor_corr[K] L;
+
+
+

Matrices that sum to zero

+

A sum-to-zero matrix is constrained such that the sum of rows and the sum of the columns is always \(0\). These are sometimes useful for resolving identifiability issues in regression models. While the underlying vector has only \((N - 1) \times (M - 1)\) degrees of freedom, zero sum matrices are declared with their full dimensionality. For instance, beta is declared to be a sum-to-zero \(5 \times 4\)-matrix (20 degrees of freedom) by

+
sum_to_zero_matrix[5, 4] beta;
+

Sum-to-zero matrices are implemented as matrices and may be assigned to other matrices and vice-versa. Zero sum matrix variables, like other constrained variables, are validated to ensure that they are indeed sum to zero; for zero sum matrices, this is only done up to an internally specified accuracy threshold \(\epsilon\) to account for errors arising from floating-point imprecision.

+
+
+

Assigning constrained variables

+

Constrained variables of all types may be assigned to other variables of the same unconstrained type and vice-versa. Matching is interpreted strictly as having the same basic type and number of array dimensions. Constraints are not considered, but basic data types are. For instance, a variable declared to be real<lower=0, upper=1> could be assigned to a variable declared as real and vice-versa. Similarly, a variable declared as matrix[3, 3] may be assigned to a variable declared as cov_matrix[3] or cholesky_factor_cov[3], and vice-versa.

+

Checks are carried out at the end of each relevant block of statements to ensure constraints are enforced. This includes run-time size checks. The Stan compiler isn’t able to catch the fact that an attempt may be made to assign a matrix of one dimensionality to a matrix of mismatching dimensionality.

+
+
+

Promoting real to complex matrixes

+

Real-valued vectors, row vectors and matrices may be assigned to complex-valued vectors, row vectors and matrices, respectively. For example, the following is legal.

+
vector[N] v = ...;
+complex_vector[N] u = 2 * v;
+

Row vectors and matrices work the same way.

+
+
+

Expressions as size declarations

+

Variables may be declared with sizes given by expressions. Such expressions are constrained to only contain data or transformed data variables. This ensures that all sizes are determined once the data is read in and transformed data variables defined by their statements. For example, the following is legal.

+
data {
+  int<lower=0> N_observed, N_missing;
+  // ...
+transformed parameters {
+  vector[N_observed + N_missing] y;
+  // ...
+
+
+

Accessing vector and matrix elements

+

If v is a column vector or row vector, then v[2] is the second element in the vector. If m is a matrix, then m[2, 3] is the value in the second row and third column.

+

Providing a matrix with a single index returns the specified row. For instance, if m is a matrix, then m[2] is the second row. This allows Stan blocks such as

+
matrix[M, N] m;
+row_vector[N] v;
+real x;
+// ...
+v = m[2];
+x = v[3];   // x == m[2][3] == m[2, 3]
+

The type of m[2] is row_vector because it is the second row of m. Thus it is possible to write m[2][3] instead of m[2, 3] to access the third element in the second row. When given a choice, the form m[2, 3] is preferred.

+

Complex versions work the same way,

+
complex_matrix[M, N] m = ...;
+complex_row_vector[N] u = m[3];
+complex_vector[M] v = m[ , 2];
+
+

Array index style

+

The form m[2, 3] is more efficient because it does not require the creation and use of an intermediate expression template for m[2]. In later versions, explicit calls to m[2][3] may be optimized to be as efficient as m[2, 3] by the Stan compiler.

+
+
+
+

Size declaration restrictions

+

An integer expression is used to pick out the sizes of vectors, matrices, and arrays. For instance, we can declare a vector of size M + N using

+
vector[M + N] y;
+

Any integer-denoting expression may be used for the size declaration, providing all variables involved are either data, transformed data, or local variables. That is, expressions used for size declarations may not include parameters or transformed parameters or generated quantities.

+
+
+
+

Array data types

+

Stan supports arrays of arbitrary dimension. The values in an array can be any type, so that arrays may contain values that are simple reals or integers, vectors, matrices, or other arrays. Arrays are the only way to store sequences of integers, and some functions in Stan, such as discrete distributions, require integer arguments.

+

A two-dimensional array is just an array of arrays, both conceptually and in terms of current implementation. When an index is supplied to an array, it returns the value at that index. When more than one index is supplied, this indexing operation is chained. For example, if a is a two-dimensional array, then a[m, n] is just a convenient shorthand for a[m][n].

+

Vectors, matrices, and arrays are not assignable to one another, even if their dimensions are identical.

+

For constructing arrays in Stan, see Vector, Matrix, and Array Expressions.

+
+

Declaring array variables

+

Arrays are declared with the keyword array followed by the dimensions enclosed in square brackets, the element type, and the name of the variable.

+

The variable n is declared as an array of five integers as follows.

+
array[5] int n;
+

A two-dimensional array of complex values with three rows and four columns is declared as follows.

+
array[3, 4] complex a;
+

A three-dimensional array z of positive reals with five rows, four columns, and two shelves can be declared as follows.

+
array[5, 4, 2] real<lower=0> z;
+

Arrays may also be declared to contain vectors. For example,

+
array[3] vector[7] mu;
+

declares mu to be an array of size 3 containing vectors with 7 elements. Arrays may also contain matrices. The example

+
array[15, 12] complex_matrix[7, 2] mu;
+

declares a 15 by 12 array of \(7 \times 2\) complex matrices. Any of the constrained types may also be used in arrays, as in the declaration

+
array[2, 3, 4] cholesky_factor_cov[5, 6] mu;
+

of a \(2 \times 3 \times 4\) array of \(5 \times 6\) Cholesky factors of covariance matrices.

+
+
+

Accessing array elements and subarrays

+

If x is a 1-dimensional array of length 5, then x[1] is the first element in the array and x[5] is the last. For a \(3 +\times 4\) array y of two dimensions, y[1, 1] is the first element and y[3, 4] the last element. For a three-dimensional array z, the first element is z[1, 1, 1], and so on.

+

Subarrays of arrays may be accessed by providing fewer than the full number of indexes. For example, suppose y is a two-dimensional array with three rows and four columns. Then y[3] is one-dimensional array of length four. This means that y[3][1] may be used instead of y[3, 1] to access the value of the first column of the third row of y. The form y[3, 1] is the preferred form (see note in this chapter).

+
+
+

Assigning

+

Subarrays may be manipulated and assigned just like any other variables. Similar to the behavior of matrices, Stan allows blocks such as

+
array[9, 10, 11] real w;
+array[10, 11] real x;
+array[11] real y;
+real z;
+// ...
+x = w[5];
+y = x[4];  // y == w[5][4] == w[5, 4]
+z = y[3];  // z == w[5][4][3] == w[5, 4, 3]
+

Complex-valued arrays work the same way.

+
+
+

Arrays of matrices and vectors

+

Arrays of vectors and matrices are accessed in the same way as arrays of doubles. Consider the following vector and scalar declarations.

+
array[3, 4] vector[5] a;
+array[4] vector[5] b;
+vector[5] c;
+real x;
+

With these declarations, the following assignments are legal.

+
b = a[1];       // result is array of vectors
+c = a[1, 3];    // result is vector
+c = b[3];       //   same result as above
+x = a[1, 3, 5]; // result is scalar
+x = b[3, 5];    //   same result as above
+x = c[5];       //   same result as above
+

Row vectors and other derived vector types (simplex and ordered) behave the same way in terms of indexing.

+

Consider the following matrix, vector and scalar declarations.

+
array[3, 4] matrix[6, 5] d;
+array[4] matrix[6, 5] e;
+matrix[6, 5] f;
+row_vector[5] g;
+real x;
+

With these declarations, the following definitions are legal.

+
e = d[1];           // result is array of matrices
+f = d[1, 3];        // result is matrix
+f = e[3];           //   same result as above
+g = d[1, 3, 2];     // result is row vector
+g = e[3, 2];        //   same result as above
+g = f[2];           //   same result as above
+x = d[1, 3, 5, 2];  // result is scalar
+x = e[3, 5, 2];     //   same result as above
+x = f[5, 2];        //   same result as above
+x = g[2];           //   same result as above
+

As shown, the result f[2] of supplying a single index to a matrix is the indexed row, here row 2 of matrix f.

+
+
+

Partial array assignment

+

Subarrays of arrays may be assigned by indexing on the left-hand side of an assignment statement. For example, the following is legal.

+
array[I, J, K] real x;
+array[J, K] real y;
+array[K] real z;
+// ...
+x[1] = y;
+x[1, 1] = z;
+

The sizes must match. Here, x[1] is a J by K array, as is y.

+

Partial array assignment also works for arrays of matrices, vectors, and row vectors.

+
+
+

Mixing array, vector, and matrix types

+

Arrays, row vectors, column vectors and matrices are not interchangeable in Stan. Thus a variable of any one of these fundamental types is not assignable to any of the others, nor may it be used as an argument where the other is required (use as arguments follows the assignment rules).

+
+

Mixing vectors and arrays

+

For example, vectors cannot be assigned to arrays or vice-versa.

+
array[4] real a;
+vector[4] b;
+row_vector[4] c;
+// ...
+a = b; // illegal assignment of vector to array
+b = a; // illegal assignment of array to vector
+a = c; // illegal assignment of row vector to array
+c = a; // illegal assignment of array to row vector
+
+
+

Mixing row and column vectors

+

It is not even legal to assign row vectors to column vectors or vice versa.

+
vector[4] b;
+row_vector[4] c;
+// ...
+b = c; // illegal assignment of row vector to column vector
+c = b; // illegal assignment of column vector to row vector
+
+
+

Mixing matrices and arrays

+

The same holds for matrices, where 2-dimensional arrays may not be assigned to matrices or vice-versa.

+
array[3, 4] real a;
+matrix[3, 4] b;
+// ...
+a = b;  // illegal assignment of matrix to array
+b = a;  // illegal assignment of array to matrix
+
+
+

Mixing matrices and vectors

+

A \(1 \times N\) matrix cannot be assigned a row vector or vice versa.

+
matrix[1, 4] a;
+row_vector[4] b;
+// ...
+a = b;  // illegal assignment of row vector to matrix
+b = a;  // illegal assignment of matrix to row vector
+

Similarly, an \(M \times 1\) matrix may not be assigned to a column vector.

+
matrix[4, 1] a;
+vector[4] b;
+// ...
+a = b;  // illegal assignment of column vector to matrix
+b = a;  // illegal assignment of matrix to column vector
+
+
+
+

Size declaration restrictions

+

An integer expression is used to pick out the sizes of arrays. The same restrictions as for vector and matrix sizes apply, namely that the size is declared with an integer-denoting expression that does not contain any parameters, transformed parameters, or generated quantities.

+
+
+

Size zero arrays

+

If any of an array’s dimensions is size zero, the entire array will be of size zero. That is, if we declare

+
array[3, 0] real a;
+

then the resulting size of a is zero and querying any of its dimensions at run time will result in the value zero. Declared as above, a[1] will be a size-zero one-dimensional array. For comparison, declaring

+
array[0, 3] real b;
+

also produces an array with an overall size of zero, but in this case, there is no way to index legally into b, because b[0] is undefined. The array will behave at run time as if it’s a \(0 \times +0\) array. For example, the result of to_matrix(b) will be a \(0 \times 0\) matrix, not a \(0 \times 3\) matrix.

+
+
+
+

Tuple data type

+

Stan supports tuples of arbitrary size. The values in a tuple can be of arbitrary type, but the component types must be declared along with the declaration of the tuple. Tuples can be manipulated as a whole, or their elements may be accessed and set individually.

+
+

Declaring tuple variables

+

Tuples are declared with the keyword tuple followed by a parenthesized sequence of types, which determine the types of the respective tuple entries. For example, a tuple with three elements may be declared as

+
tuple(int, vector[3], complex) abc;
+

Tuples must have at least one entry, so the following declaration is illegal.

+
tuple() nil;  // ILLEGAL
+

Tuples of length one must use a trailing comma, to align with the expression syntax.

+
tuple(int,) m; // CORRECT
+tuple(int) n;  // ILLEGAL
+

Tuples can be assigned as a whole if their elements can be assigned individually. For example, a can be assigned to b in the following example because int can be promoted to complex.

+
tuple(int, real) a;
+...
+tuple(complex, real) b = a;
+

Tuple types may have elements which are declared as tuples, such as the following example.

+
tuple(int, tuple(real, complex)) x;
+

In this case, it would probably be simpler to use a 3-tuple type, tuple(int, real, complex).

+

Tuples can be declared with constraints anywhere that ordinary variables can (i.e., as top-level block variables). That means any context in which it is legal to have a declaration

+
real<lower=0> sigma;
+real<lower=0, upper=1> theta;
+

it is legal to have a tuple with constraints such as

+
tuple(real<lower=0>, real<lower=0, upper=1>) sigma_theta;
+
+
+

Accessing tuple elements

+

Tuple elements may be accessed directly. For example, with our declaration of abc from the last section, Stan uses abc.1 for the first element, abc.2 for the second, and abc.3 for the third. These numbers must be integer literals (i.e., they cannot be variables), and must be within the size of the number of elements of tuples. The types of elements are as declared, so that abc.1 is of type int, abc.2 of type vector[3] and abc.3 of type complex.

+
+
+

Assigning tuple elements

+

Tuple elements can be assigned individually, allowing, e.g.,

+
tuple(int, real) ab;
+ab.1 = 123;
+ab.2 = 12.9;
+

As with other assignments, promotions will happen if necessary (of int to real and of real to complex, along with the corresponding container type promotions).

+
+
+

Unpacking assignment of tuples

+

For convenience of using values stored in tuples, Stan supports “unpacking” (or “destructuring”) of tuples in an assignment statement.

+

Given a tuple t of type tuple(T1, ..., Tn) and a sequence of assignable expressions of types v1, …, vn, where each vi has a type which is assignable from type Ti, individual elements of the tuple may be assigned to the corresponding variables in the sequence by the statement

+
(v1, /*...*/, vn) = t;
+

Note that the above parenthesis are required, unlike in some other languages with similar features (e.g., Python).

+

These unpacking assignments can be nested if the tuple on the right hand side contains nested tuples.

+

For example, if T is a tuple of type tuple(int, (real, real), complex), then the program

+
int i;
+real x, y;
+complex z;
+
+(i, (x, y), z) = T;
+

Assigns the result of T.1 to i, the result of T.2.1 to x, the result of T.2.2 to y, and the result of T.3 to z.

+

The left hand side must match in size the tuple on the right. Additionally, the same variable may not appear more than once in the left hand side of an unpacking assignment.

+
+
+
+

Variable types vs. constraints and sizes

+

The type information associated with a variable only contains the underlying type and dimensionality of the variable.

+
+

Type information excludes sizes

+

The size associated with a given variable is not part of its data type. For example, declaring a variable using

+
array[3] real a;
+

declares the variable a to be an array. The fact that it was declared to have size 3 is part of its declaration, but not part of its underlying type.

+
+

When are sizes checked?

+

Sizes are determined dynamically (at run time) and thus cannot be type-checked statically when the program is compiled. As a result, any conformance error on size will raise a run-time error. For example, trying to assign an array of size 5 to an array of size 6 will cause a run-time error. Similarly, multiplying an \(N \times M\) by a \(J \times K\) matrix will raise a run-time error if \(M \neq J\).

+
+
+
+

Type information excludes constraints

+

Like sizes, constraints are not treated as part of a variable’s type in Stan when it comes to the compile-time check of operations it may participate in. Anywhere Stan accepts a matrix as an argument, it will syntactically accept a correlation matrix or covariance matrix or Cholesky factor. Thus a covariance matrix may be assigned to a matrix and vice-versa.

+

Similarly, a bounded real may be assigned to an unconstrained real and vice-versa.

+
+

When are function argument constraints checked?

+

For arguments to functions, constraints are sometimes, but not always checked when the function is called. Exclusions include C++ standard library functions. All probability functions and cumulative distribution functions check that their arguments are appropriate at run time as the function is called.

+
+
+

When are declared variable constraints checked?

+

For data variables, constraints are checked after the variable is read from a data file or other source. For transformed data variables, the check is done after the statements in the transformed data block have executed. Thus it is legal for intermediate values of variables to not satisfy declared constraints.

+

For parameters, constraints are enforced by the transform applied and do not need to be checked. For transformed parameters, the check is done after the statements in the transformed parameter block have executed.

+

For all blocks defining variables (transformed data, transformed parameters, generated quantities), real values are initialized to NaN and integer values are initialized to the smallest legal integer (i.e., a large absolute value negative number).

+

For generated quantities, constraints are enforced after the statements in the generated quantities block have executed.

+
+
+
+

Type naming notation

+

In order to refer to data types, it is convenient to have a way to refer to them. The type naming notation outlined in this section is not part of the Stan programming language, but rather a convention adopted in this document to enable a concise description of a type.

+

Because size information is not part of a data type, data types will be written without size information. For instance, array[] real is the type of one-dimensional array of reals and matrix is the type of matrices. The three-dimensional integer array type is written as array[,,] int, indicating the number slots available for indexing. Similarly, array[,] vector is the type of a two-dimensional array of vectors.

+
+
+
+

Variable declaration

+

Variables in Stan are declared by giving a type and a name. For example

+
int N;
+vector[N] y;
+array[5] matrix[3, 4] A;
+

declares a variable N that is an integer, a variable y that is a vector of length N (the previously declared variable), and a variable A, which is a length-5 array where each element is a 3 by 4 matrix.

+

The size of top-level variables in the parameters, transformed parameters, and generated quantities must remain constant across all iterations, therefore only data variables can be used in top-level size declarations.

+
// illegal and will be flagged by the compiler:
+generated quantities {
+  int N = 10;
+  array[N] int foo;
+

Depending on where the variable is declared in the Stan program, it either must or cannot have size information, and constraints are either optional or not allowed.

+
// valid block variables, but not locals or function parameters
+vector<lower=0>[N] u;
+
+// valid as a block or local variable, but not a function parameter
+array[3] int is;
+
+// function parameters exclude sizes and cannot be constrained
+void pretty_print_tri_lower(matrix x) { ... }
+

Top-level variables can have constraints and must include sizes for their types, as in the above examples. Local variables, like those defined inside loops or local blocks cannot be constrained, but still include sizes. Finally, variables declared as function parameters are not constrained types and exclude sizes.

+

In the following table, the leftmost column is a list of the unconstrained and undimensioned basic types; these are used as function return types and argument types. The middle column is of unconstrained types with dimensions; these are used as local variable types. The variables M and N indicate number of columns and rows, respectively. The variable K is used for square matrices, i.e., K denotes both the number of rows and columns. The rightmost column lists the corresponding constrained types. An expression of any right-hand column type may be assigned to its corresponding left-hand column basic type. At runtime, dimensions are checked for consistency for all variables; containers of any sizes may be assigned to function arguments. The constrained matrix types cov_matrix[K], corr_matrix[K], cholesky_factor_cov[K], and cholesky_factor_corr[K] are only assignable to matrices of dimensions matrix[K, K] types.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Function Argument (unsized)
   Local
+

(unconstrained)

              Block
+          (constrained)
intintint
int<lower=L>
int<upper=U>
int<lower=L, upper=U>
int<offset=O>
int<multiplier=M>
int<offset=O, multiplier=M>
realrealreal
real<lower=L>
real<upper=U>
real<lower=L, upper=U>
real<offset=O>
real<multiplier=M>
real<offset=O, multiplier=M>
complexcomplexcomplex
vectorvector[N]vector[N]
vector[N]<lower=L>
vector[N]<upper=U>
vector[N]<lower=L, upper=U>
vector[N]<offset=O>
vector[N]<multiplier=M>
vector[N]<offset=O, multiplier=M>
ordered[N]
positive_ordered[N]
simplex[N]
unit_vector[N]
sum_to_zero_vector[N]
row_vectorrow_vector[N]row_vector[N]
row_vector[N]<lower=L>
row_vector[N]<upper=U>
row_vector[N]<lower=L, upper=U>
row_vector[N]<offset=O>
row_vector[N]<multiplier=M>
row_vector[N]<offset=O, multiplier=M>
matrixmatrix[M, N]matrix[M, N]
matrix[M, N]<lower=L>
matrix[M, N]<upper=U>
matrix[M, N]<lower=L, upper=U> |
matrix[M, N]<offset=O>
matrix[M, N]<multiplier=M>
matrix[M, N]<offset=O, multiplier=M>
column_stochastic_matrix[M, N]
row_stochastic_matrix[M, N]
sum_to_zero_matrix[M, N]
matrix[K, K]corr_matrix[K]
matrix[K, K]cov_matrix[K]
matrix[K, K]cholesky_factor_corr[K]
matrix[K, K]cholesky_factor_cov[K]
complex_vectorcomplex_vector[M]complex_vector[M]
complex_row_vectorcomplex_row_vector[N]complex_row_vector[N]
complex_matrixcomplex_matrix[M, N]complex_matrix[M,N]
array[] vectorarray[M] vector[N]array[M] vector[N]
array[M] vector[N]<lower=L>
array[M] vector[N]<upper=U>
array[M] vector[N]<lower=L, upper=U>
array[M] vector[N]<offset=O>
array[M] vector[N]<multiplier=M>
array[M] vector[N]<offset=O, multiplier=M>
array[M] ordered[N]
array[M] positive_ordered[N]
array[M] simplex[N]
array[M] unit_vector[N]
array[M] sum_to_zero_vector[N]
+

+

Additional array types follow the same basic template as the final example in the table and can contain any of the previous types. The unsized version of arrays with more than one dimension is specified by using commas, e.g. array[ , ] is a 2-D array.

+

For more on how function arguments and return types are declared, consult the User’s Guide chapter on functions.

+
+
+

Compound variable declaration and definition

+

Stan allows assignable variables to be declared and defined in a single statement. Assignable variables are

+
    +
  • local variables, and
  • +
  • variables declared in the transformed data, transformed parameters, or generated quantities blocks.
  • +
+

For example, the statement

+
int N = 5;
+

declares the variable N to be an integer scalar type and at the same time defines it to be the value of the expression 5.

+
+

Assignment typing

+

The type of the expression on the right-hand side of the assignment must be assignable to the type of the variable being declared. For example, it is legal to have

+
real sum = 0;
+

even though 0 is of type int and sum is of type real, because integer-typed scalar expressions can be assigned to real-valued scalar variables. In all other cases, the type of the expression on the right-hand side of the assignment must be identical to the type of the variable being declared.

+

Variables of any type may have values assigned to them. For example,

+
matrix[3, 2] a = b;
+

declares a \(3 \times 2\) matrix variable a and assigns a copy of the value of b to the variable a. The variable b must be of type matrix for the statement to be well formed. For the code to execute successfully, b must be the same shape as a, but this cannot be validated until run time. Because a copy is assigned, subsequent changes to a do not affect b and subsequent changes to b do not affect a.

+
+
+

Right-hand side expressions

+

The right-hand side may be any expression which has a type which is assignable to the variable being declared. For example,

+
matrix[3, 2] a = 0.5 * (b + c);
+

assigns the matrix variable a to half of the sum of b and c. The only requirement on b and c is that the expression b + c be of type matrix. For example, b could be of type matrix and c of type real, because adding a matrix to a scalar produces a matrix, and the multiplying by a scalar produces another matrix.

+

Similarly,

+
complex z = 2 + 3i;
+

assigns the the complex number \(2 + 3i\) to the complex scalar z. The right-hand side expression can be a call to a user defined function, allowing general algorithms to be applied that might not be otherwise expressible as simple expressions (e.g., iterative or recursive algorithms).

+
+
+

Scope within expressions

+

Any variable that is in scope and any function that is available in the block in which the compound declaration and definition appears may be used in the expression on the right-hand side of the compound declaration and definition statement.

+
+
+
+

Declaring multiple variables at once

+

Stan will interpret multiple comma-separated variable names following a single type as declaring multiple new variables. This is available for all variable declarations in all blocks.

+
+

Types for multiple declarations

+

The code:

+
real x, y;
+

is equivalent to

+
real x;
+real y;
+

As a result, all declarations on the same line must be of the same type.

+
+
+

Combining with other features

+

The ability to declare multiple variables can be combined with assignments whenever a declare-define is valid, as documented in the section introducing compound declarations and definitions :

+
real x = 3, y = 5.6;
+

Constrained data types can also be declared together, so long as the constraint for each variable is the same:

+
real<lower=0> x, y;
+ + +
+
+
+ + + Back to top

Footnotes

+ +
    +
  1. Stan compiles integers to int and reals to double types in C++. Precise details of rounding will depend on the compiler and hardware architecture on which the code is run.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/user-functions.html b/docs/2_39/reference-manual/user-functions.html new file mode 100644 index 000000000..2c1e2cbc5 --- /dev/null +++ b/docs/2_39/reference-manual/user-functions.html @@ -0,0 +1,1351 @@ + + + + + + + + + +User-Defined Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

User-Defined Functions

+

Stan allows users to define their own functions. The basic syntax is a simplified version of that used in C and C++. This chapter specifies how functions are declared, defined, and used in Stan.

+
+

Function-definition block

+

User-defined functions appear in a special function-definition block before all of the other program blocks.

+
functions {
+   // ... function declarations and definitions ...
+}
+data {
+  // ...
+

Function definitions and declarations may appear in any order. Forward declarations are allowed but not required.

+
+
+

Function names

+

The rules for function naming and function-argument naming are the same as for other variables; see the section on variables for more information on valid identifiers. For example,

+
real foo(real mu, real sigma);
+

declares a function named foo with two argument variables of types real and real. The arguments are named mu and sigma, but that is not part of the declaration.

+
+

Function overloading

+

Multiple user-defined functions may have the same name if they have different sequences of argument types. This is known as function overloading.

+

For example, the following two functions are both defined with the name add_up

+
real add_up(real a, real b){
+  return a + b;
+}
+
+real add_up(real a, real b, real c){
+  return a + b + c;
+}
+

The return types of overloaded functions do not need to be the same. One could define an additional add_up function as follows

+
int add_up(int a, int b){
+  return a + b;
+}
+

That being said, functions may not use the same name if their signature only differs by the return type.

+

For example, the following is not permitted

+
// illegal
+real baz(int x);
+int baz(int x);
+

Function names used in the Stan standard library may be overloaded by user-defined functions. Exceptions to this are the reduce_sum family of functions and ODE integrators, which cannot be overloaded.

+
+
+
+

Calling functions

+

All function arguments are mandatory—there are no default values.

+
+

Functions as expressions

+

Functions with non-void return types are called just like any other built-in function in Stan—they are applied to appropriately typed arguments to produce an expression, which has a value when executed.

+
+
+

Functions as statements

+

Functions with void return types may be applied to arguments and used as statements. These act like distribution statements or print statements. Such uses are only appropriate for functions that act through side effects, such as incrementing the log probability accumulator, printing, or raising exceptions.

+
+
+

Resolving overloads

+

Overloaded functions alongside type promotion can result in situations where there are multiple valid interpretations of a function call. Stan requires that there be a unique signature which minimizes the number of promotions required.

+

Consider the following two overloaded functions

+
real foo(int a, real b);
+real foo(real a, int b);
+

These functions do not have a unique minimum when called with two integer arguments foo(1,2), and therefore cannot be called as such.

+

Promotion of integers to complex numbers is considered as two separate promotions, one from int to real and a second from real to complex. Consider the following functions with real and complex signatures

+
real bar(real x);
+real bar(complex z);
+

A call bar(5) with an integer argument will be resolved to bar(real) because it only requires a single promotion, whereas the promotion to a complex number requires two promotions.

+
+
+

Argument promotion

+

The rules for calling functions work the same way as assignment as far as promotion goes. This means that we can promote arguments to the type expected by function arguments. For example, the following will work.

+
real foo(real x) { return ... };
+...
+int a = 5;
+real b = foo(a); // a promoted to type real
+

In addition to promoting int to real, Stan also promotes real to complex, and by transitivity, int to complex. This also works for containers, so an array of int may be assigned to an array of real of the same shape. And we can also promote vector to complex_vector and similarly for row vectors and matrices.

+
+
+

Probability functions in distribution statements

+

Functions whose name ends in _lpdf or _lpmf (log density and mass functions) may be used as probability functions and may be used in place of parameterized distributions on the right side of statements.qmd#distribution-statements.section.

+
+
+

Restrictions on placement

+

Functions of certain types are restricted on scope of usage. Functions whose names end in _lp assume access to the log probability accumulator and are only available in the transformed parameters and model blocks.

+

Functions whose name end in _jacobian assume access to the log probability accumulator may only be used within the transformed parameters block.

+

Functions whose names end in _rng assume access to the random number generator and may only be used within the generated quantities block, transformed data block, and within user-defined functions ending in _rng.

+

Functions whose names end in _lpdf and _lpmf can be used anywhere. However, _lupdf and _lupmf functions can only be used in the model block or user-defined probability functions.

+

See the section on function bodies for more information on these special types of function.

+
+
+
+

Argument types and qualifiers

+

Stan’s functions all have declared types for both arguments and returned value. As with built-in functions, user-defined functions are only declared for base argument type and dimensionality. This requires a different syntax than for declaring other variables. The choice of language was made so that return types and argument types could use the same declaration syntax.

+

The type void may not be used as an argument type, only a return type for a function with side effects.

+
+

Base variable type declaration

+

The base variable types are integer, real, complex, vector, row_vector, and matrix. No lower-bound or upper-bound constraints are allowed (e.g., real<lower=0> is illegal). Specialized constrained types are also not allowed (e.g., simplex is illegal).

+

Tuple types of the form tuple(T1, ..., TN) are also allowed, with all of the types T1 to TN being function argument types (i.e., no constraints and no sizes).

+
+
+

Dimensionality declaration

+

Arguments and return types may be arrays, and these are indicated with optional brackets and commas as would be used for indexing. For example, int denotes a single integer argument or return, whereas array[] real indicates a one-dimensional array of reals, array[,] real a two-dimensional array and array[,,] real a three-dimensional array; whitespace is optional, as usual.

+

The dimensions for vectors and matrices are not included, so that matrix is the type of a single matrix argument or return type. Thus if a variable is declared as matrix a, then a has two indexing dimensions, so that a[1] is a row vector and a[1, 1] a real value. Matrices implicitly have two indexing dimensions. The type declaration matrix[ , ] b specifies that b is a two-dimensional array of matrices, for a total of four indexing dimensions, with b[1, 1, 1, 1] picking out a real value.

+
+
+

Dimensionality checks and exceptions

+

Function argument and return types are not themselves checked for dimensionality. A matrix of any size may be passed in as a matrix argument. Nevertheless, a user-defined function might call a function (such as a multivariate normal density) that itself does dimensionality checks.

+

Dimensions of function return values will be checked if they’re assigned to a previously declared variable. They may also be checked if they are used as the argument to a function.

+

Any errors raised by calls to functions inside user functions or return type mismatches are simply passed on; this typically results in a warning message and rejection of a proposal during sampling or optimization.

+
+
+

Data-only qualifiers

+

Some of Stan’s built-in functions, like the differential equation solvers, have arguments that must be data. Such data-only arguments must be expressions involving only data, transformed data, and generated quantity variables.

+

In user-defined functions, the qualifier data may be placed before an argument type declaration to indicate that the argument must be data only. For example,

+
real foo(data real x) {
+  return x^2;
+}
+

requires the argument x to be data only.

+

Declaring an argument data only allows type inference to proceed in the body of the function so that, for example, the variable may be used as a data-only argument to a built-in function.

+
+
+
+

Function bodies

+

The body of a function is between an open curly brace ({) and close curly brace (}). The body may contain local variable declarations at the top of the function body’s block and these scope the same way as local variables used in any other statement block.

+

Any user-defined function may be used in the function body regardless of the order in which the function definitions appear in the file. Self-recursive and mutually recursive functions are possible without any additional declarations.

+

The only restrictions on statements in function bodies are external, and determine whether the log probability accumulator or random number generators are available; see the rest of this section for details.

+
+

Random number generating functions

+

Functions that call random number generating functions in their bodies must have a name that ends in _rng; attempts to use random-number generators in other functions lead to a compile-time error.

+

Like other random number generating functions, user-defined functions with names that end in _rng may be used only in the generated quantities block and transformed data block, or within the bodies of user-defined functions ending in _rng. An attempt to use such a function elsewhere results in a compile-time error.

+
+
+

Log probability access in functions

+

Functions that include distribution statements or log probability increment statements must have a name that ends in _lp. Attempts to use distribution statements or increment log probability statements in other functions lead to a compile-time error.

+

Like the target log density increment statement and distribution statements, user-defined functions with names that end in _lp may only be used in blocks where the log probability accumulator is accessible, namely the transformed parameters and model blocks. An attempt to use such a function elsewhere results in a compile-time error.

+
+
+

Defining probability functions for distribution statements

+

Functions whose names end in _lpdf and _lpmf (density and mass functions) can be used as probability functions in distribution statements. As with the built-in functions, the first argument will appear on the left of the distribution statement operator (~) in the distribution statement and the other arguments follow. For example, suppose a function returning the log of the density of y given parameter theta allows the use of the distribution statement is defined as follows.

+
real foo_lpdf(real y, vector theta) { ... }
+

Note that for function definitions, the comma is used rather than the vertical bar.

+

For every custom _lpdf and _lpmf defined there is a corresponding _lupdf and _lupmf defined automatically. The _lupdf and _lupmf versions of the functions cannot be defined directly (to do so will produce an error). The difference in the _lpdf and _lpmf and the corresponding _lupdf and _lupmf functions is that if any other unnormalized density functions are used inside the user-defined function, the _lpdf and _lpmf forms of the user-defined function will change these densities to be normalized. The _lupdf and _lupmf forms of the user-defined functions will instead allow other unnormalized density functions to drop additive constants.

+

The distribution statement shorthand

+
z ~ foo(phi);
+

will have the same effect as incrementing the target with the log of the unnormalized density:

+
target += foo_lupdf(z | phi);
+

Other _lupdf and _lupmf functions used in the definition of foo_lpdf will drop additive constants when foo_lupdf is called and will not drop additive constants when foo_lpdf is called.

+

If there are _lupdf and _lupmf functions used inside the following call to foo_lpdf, they will be forced to normalize (return the equivalent of their _lpdf and _lpmf forms):

+
target += foo_lpdf(z | phi);
+

If there are no _lupdf or _lupmf functions used in the definition of foo_lpdf, then there will be no difference between a foo_lpdf or foo_lupdf call.

+

The unnormalized _lupdf and _lupmf functions can only be used in the model block or in user-defined probability functions (those ending in _lpdf or _lpmf).

+

The same syntax and shorthand that works for _lpdf also works for log probability mass functions with suffixes _lpmf.

+

A function that is going to be accessed as distributions must return the log of the density or mass function it defines.

+
+
+
+

Parameters are constant

+

Within function definition bodies, the parameters may be used like any other variable. But the parameters are constant in the sense that they can’t be assigned to (i.e., can’t appear on the left side of an assignment (=) statement). In other words, their value remains constant throughout the function body. Attempting to assign a value to a function parameter value will raise a compile-time error.1

+

Local variables may be declared at the top of the function block and scope as usual.

+
+
+

Return value

+

Non-void functions must have a return statement that returns an appropriately typed expression. If the expression in a return statement does not have the same type as the return type declared for the function, a compile-time error is raised.

+

Void functions may use return only without an argument, but return statements are not mandatory.

+
+

Return guarantee required

+

Unlike C++, Stan enforces a syntactic guarantee for non-void functions that ensures control will leave a non-void function through an appropriately typed return statement or because an exception is raised in the execution of the function. To enforce this condition, functions must have a return statement as the last statement in their body. This notion of last is defined recursively in terms of statements that qualify as bodies for functions. The base case is that

+
    +
  • a return statement qualifies,
  • +
+

and the recursive cases are that

+
    +
  • a sequence of statements qualifies if its last statement qualifies,
  • +
  • a for loop or while loop qualifies if its body qualifies, and
  • +
  • a conditional statement qualifies if it has a default else clause and all of its body statements qualify.
  • +
+

An exception is made for “obviously infinite” loops like while (1), which contain a return statement and no break statements. The only way to exit such a loop is to return, so they are considered as returning statements.

+

These rules disqualify

+
real foo(real x) {
+  if (x > 2) {
+    return 1.0;
+  } else if (x <= 2) {
+    return -1.0;
+  }
+}
+

because there is no default else clause, and disqualify

+
real foo(real x) {
+  real y;
+  y = x;
+  while (x < 10) {
+    if (x > 0) {
+      return x;
+    }
+    y = x / 2;
+  }
+}
+

because the return statement is not the last statement in the while loop. A bogus dummy return could be placed after the while loop in this case. The rules for returns allow

+
real log_fancy(real x) {
+  if (x < 1e-30) {
+    return x;
+  } else if (x < 1e-14) {
+    return x * x;
+  } else {
+    return log(x);
+  }
+}
+

because there’s a default else clause and each condition body has return as its final statement.

+
+
+
+

Void Functions as Statements

+
+

Void functions

+

A function can be declared without a return value by using void in place of a return type. Note that the type void may only be used as a return type—arguments may not be declared to be of type void.

+
+
+

Usage as statement

+

A void function may be used as a statement.

+

Because there is no return, such a usage is only for side effects, such as incrementing the log probability function, printing, or raising an error.

+
+
+

Special return statements

+

In a return statement within a void function’s definition, the return keyword is followed immediately by a semicolon (;) rather than by the expression whose value is returned.

+
+
+
+

Declarations

+

Stan supports forward declarations, which look like function definitions without bodies. For example,

+
real unit_normal_lpdf(real y);
+

declares a function named unit_normal_lpdf that consumes a single real-valued input and produces a real-valued output. Declaring a function without a definition is only really useful when using an extension which supplies the definition in C++ rather than in the Stan code itself. How exactly this can be accomplished will differ depending on your Stan interface.

+

A function definition with a body simultaneously declares and defines the named function, as in

+
real unit_normal_lpdf(real y) {
+  return -0.5 * square(y);
+}
+

A function can be declared and (perhaps separately) defined at most once. However, functions with different argument types are considered distinct even if they have the same name; see the section on function overloading.

+ + +
+
+ + + Back to top

Footnotes

+ +
    +
  1. Despite being declared constant and appearing to have a pass-by-value syntax in Stan, the implementation of the language passes function arguments by constant reference in C++.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/variational.html b/docs/2_39/reference-manual/variational.html new file mode 100644 index 000000000..98e2fbaf2 --- /dev/null +++ b/docs/2_39/reference-manual/variational.html @@ -0,0 +1,1127 @@ + + + + + + + + + +Variational Inference + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Variational Inference

+

Stan implements an automatic variational inference algorithm, called Automatic Differentiation Variational Inference (ADVI) Kucukelbir et al. (2017). In this chapter, we describe the specifics of how ADVI maximizes the variational objective.

+
+

Stochastic gradient ascent

+

ADVI optimizes the ELBO in the real-coordinate space using stochastic gradient ascent. We obtain noisy (yet unbiased) gradients of the variational objective using automatic differentiation and Monte Carlo integration. The algorithm ascends these gradients using an adaptive stepsize sequence. We evaluate the ELBO also using Monte Carlo integration and measure convergence similar to the relative tolerance scheme in Stan’s optimization feature.

+
+

Monte Carlo approximation of the ELBO

+

ADVI uses Monte Carlo integration to approximate the variational objective function, the ELBO. The number of draws used to approximate the ELBO is denoted by elbo_samples. We recommend a default value of \(100\), as we only evaluate the ELBO every eval_elbo iterations, which also defaults to \(100\).

+
+
+

Monte Carlo approximation of the gradients

+

ADVI uses Monte Carlo integration to approximate the gradients of the ELBO. The number of draws used to approximate the gradients is denoted by grad_samples. We recommend a default value of \(1\), as this is the most efficient. It also a very noisy estimate of the gradient, but stochastic gradient ascent is capable of following such gradients.

+
+
+

Adaptive stepsize sequence

+

ADVI uses a finite-memory version of adaGrad Duchi, Hazan, and Singer (2011). This has a single parameter that we expose, denoted eta. We now have a warmup adaptation phase that selects a good value for eta. The procedure does a heuristic search over eta values that span 5 orders of magnitude.

+
+
+

Assessing convergence

+

ADVI tracks the progression of the ELBO through the stochastic optimization. Specifically, ADVI heuristically determines a rolling window over which it computes the average and the median change of the ELBO. Should either number fall below a threshold, denoted by tol_rel_obj, we consider the algorithm to have converged. The change in ELBO is calculated the same way as in Stan’s optimization module.

+ + + +
+
+
+ + Back to top

References

+
+Duchi, John, Elad Hazan, and Yoram Singer. 2011. “Adaptive Subgradient Methods for Online Learning and Stochastic Optimization.” The Journal of Machine Learning Research 12: 2121–59. +
+
+Kucukelbir, Alp, Dustin Tran, Rajesh Ranganath, Andrew Gelman, and David M Blei. 2017. “Automatic Differentiation Variational Inference.” Journal of Machine Learning Research. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/reference-manual/whitespace.html b/docs/2_39/reference-manual/whitespace.html new file mode 100644 index 000000000..4ba49ebf7 --- /dev/null +++ b/docs/2_39/reference-manual/whitespace.html @@ -0,0 +1,1058 @@ + + + + + + + + + +Whitespace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Whitespace

+
+

Whitespace characters

+

The whitespace characters (and their ASCII code points) are the space (0x20), tab (0x09), carriage return (0x0D), and line feed (0x0A).

+
+
+

Whitespace neutrality

+

Stan treats all whitespace characters identically. Specifically, there is no significance to indentation, to tabs, to carriage returns or line feeds, or to any vertical alignment of text. Any whitespace character is exchangeable with any other.

+

Other than for readability, the number of whitespaces is also irrelevant. One or more whitespace characters of any type are treated identically by the parser.

+
+
+

Whitespace location

+

Zero or more whitespace characters may be placed between symbols in a Stan program. For example, zero or more whitespace characters of any variety may be included before and after a binary operation such as a * b, before a statement-ending semicolon, around parentheses or brackets, before or after commas separating function arguments, etc.

+

Identifiers and literals may not be separated by whitespace. Thus it is not legal to write the number 10000 as 10 000 or to write the identifier normal_lpdf as normal _ lpdf.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/search.json b/docs/2_39/search.json new file mode 100644 index 000000000..4a90c7f3e --- /dev/null +++ b/docs/2_39/search.json @@ -0,0 +1,9494 @@ +[ + { + "objectID": "stan-users-guide/wiener_diffusion_model.html", + "href": "stan-users-guide/wiener_diffusion_model.html", + "title": "Wiener diffusion model", + "section": "", + "text": "Diffusion models, sometimes also called Wiener diffusion models, are among the most frequently used model families in modeling two-alternative forced-choice tasks (see Wagenmakers (2009), for a review). Diffusion models allow to model response times and responses jointly. The basic version of a diffusion model comprises four parameters: the boundary separation, \\(a\\), the relative starting point, \\(w\\), the drift rate, \\(v\\), and the non-decision time, \\(t0\\) (Ratcliff 1978). In the basic model, it is assumed that the four basic parameters are the same for the whole experiment. As this assumption is very strict and there are examples that suggest that the basic parameters can be different from trial to trial, so called inter-trial variabilities were introduced and the basic four- parameter model was extended to a seven-parameter model. In the seven-parameter extension of the diffusion model there are the following three parameters added: the inter-trial variability in relative starting point, \\(s_w\\), the inter-trial variability in drift rate, \\(s_v\\), and the inter-trial variability in non-decision time, \\(s_{t0}\\) Nicenboim, Schad, and Vasishth (2025).\nData for the diffusion model is two-dimensional: There is one vector for the reaction times, \\(y\\), and one vector for the given responses, \\(\\text{resp}\\). The reaction times shall be positive, continuous and in seconds, the responses shall be binary.\nAs a diffusion model describes the decision process for a decision with exactly two choices, there exist reaction time distributions for each response alternative. This means that the probability density function (\\(p\\)) splits into one part for one response alternative and one part for the other response alternative. In the following, we will refer to one alternative as the upper response boundary and to the other alternative as the lower response boundary. \\(p\\) of the lower response boundary can be obtained when inserting \\(-v\\) and \\(1-w\\) to \\(p\\) of the upper response boundary. Let’s call \\(p\\) for the lower response boundary \\(p_0\\) and \\(p\\) for the upper response boundary \\(p_1\\). Then:\n\\[\np_0(a,t0,v,w,sv,sw,st0) = p_1(a,t0,-v,1-w,sv,sw,st0)\n\\]\nUsually, a \\(PDF\\) integrates to 1. In the case of the diffusion model, only the sum of both parts, \\(p_0\\) and \\(p_1\\), integrates to 1. This is called defective.\n\n\n\n\n\n\nFigure 1: Figure 1: Realization of a Four-Parameter Diffusion Process Modeling the Binary Decision Process. Image from Henrich et al. (2024), distributed under the Creative Commons Attribution 4.0 International License. Note. The parameters are the boundary separation a for two response alternatives, the relative starting point w, the drift rate v, and the non-decision time t0. The decision process is illustrated as a jagged line between the two boundaries. The predicted distributions of the reaction times are depicted as curved lines below and above the response boundaries (blue).\n\n\n\nIn this model it is assumed that the decision process behaves like a random walk and we are interested in the first time that the random walk crosses one of the two decision boundaries. Hence, we are interested in the first-passage time of the decision process. The Stan function wiener_lpdf() returns the logarithm of the first-passage time density function for a diffusion model with up to seven parameters for upper boundary responses, \\(\\log(p_1)\\). As can be seen above, it suffices to implement the density for only one response boundary, as the other can be obtained by mirroring the starting point and drift rate. Any combination of fixed and estimated parameters can be specified. In other words, with this implementation it is not only possible to estimate parameters of the fullseven- parameter model, but also to estimate restricted models such as the basic four- parameter model, or a five or six-parameter model, or even a one-parameter model when fixing the other six parameters.\nFor example, it is possible to permit variability in just one or two parameters and to fix the other variabilities to 0, or even to estimate a three-parameter model when fixing more parameters (e.g., fixing the relative starting point at 0.5).\nIt is assumed that the reaction time data that correspond to the upper response boundary \\(y_\\text{upper}\\) is distributed according to wiener_lpdf():\n\\[\ny_\\text{lower} \\sim \\operatorname{wiener\\_lpdf}(a, t0, w, v, s_v, s_w, s_{t0})\n\\] and the reaction time data that correspond to the lower response boundary \\(y_\\text{lower}\\) is distributed according to wiener_lpdf() with mirrored starting point and drift rate:\n\\[\ny_\\text{upper} \\sim \\operatorname{wiener\\_lpdf}(a, t0, 1-w, -v, s_v, s_w, s_{t0})\n\\]\n\n\nThe following example demonstrates a diffusion model call in Stan:\ndata {\n int <lower=0> N; // Number of trials\n array[N] real rt; // response times (in seconds )\n array[N] int <lower=0, upper=1> resp; // responses {0 ,1}\n}\ntransformed data{\n real min_rt = min(rt);\n}\nparameters {\n real <lower=0> a; // boundary separation\n real v; // drift\n real <lower=0, upper=1> w; // relative starting point\n real <lower=0, upper=min_rt> t0; // non-decision time\n\n real <lower=0> sv; // variability in drift\n // variability in starting point\n real <lower=0, upper=fmin(2 * w, 2 * (1 - w))> sw; \n real <lower=0> st0; // variability in non-decision time\n}\ntransformed parameters{\n real one_minus_w = 1 - w;\n real neg_v = -v;\n}\nmodel {\n // prior\n a ~ normal(1, 1);\n w ~ normal(0.5, 0.1);\n v ~ normal(2, 3);\n t0 ~ normal(0.435, 0.12);\n\n sv ~ normal(1, 3);\n st0 ~ normal(0.183, 0.09);\n sw ~ beta(1, 3);\n\n // likelihood (diffusion model)\n for (i in 1:N) {\n if (resp[i] == 1) {\n // upper boundary\n target += wiener_full_lpdf(rt[i] | a, t0, w, v,\n sv, sw, st0);\n } else {\n // lower boundary: mirror drift and starting point\n target += wiener_full_lpdf(rt[i] | a, t0, one_minus_w,\n neg_v, sv, sw, st0);\n }\n }\n}\n\n\nThe data should consist of at least three variables:\n\nThe number of trials N,\nthe response, coded as 0 = “lower bound” and 1 = “upper bound”, and\nthe reaction times in seconds (not milliseconds).\n\nNote that two different ways of coding responses are commonly used: First, in response coding, the boundaries correspond to the two response alternatives. Second, in accuracy coding, the boundaries correspond to correct (upper bound) and wrong (lower bound) responses. This means, depending on the coding you choose, the bounds mentioned in the second variable above differ and the response variable will have a different form.\nMost often, an experimenter wants to find out whether an experimental manipulation influences the model parameters. As there exists psychological interpretations for each diffusion model parameter, the experimenter can draw conclusions from differing parameters. Therefore, usually an own diffusion model is being computed for each experimental group to enable a comparison of the parameters between the groups. This can be manipulation between different subjects, like an experimental group and a control group (so called between-subject manipulations). However, this can also be manipulations within the same subject by presenting stimuli from different experimental groups (so called within-subject manipulations). Depending on the experimental design, one would typically also provide the number of conditions and the condition associated with each trial as a vector. Then, one model for each condition will be computed. This means that the parameters also have to be defined for each condition.\nIn a hierarchical setting, the data block would also specify the number of participants and the participant associated with each trial as a vector. It is also possible to hand over a precision value in the data block.\n\n\n\nThe model arguments of the wiener_lpdf() function that are not fixed to a certain value are defned as parameters in the parameters block. In this block, it is also possible to insert restrictions on the parameters. Note that the MCMC algorithm iteratively searches for the next parameter set. If the suggested sample falls outside the internally defined parameter ranges, the program will throw an error, which causes the algorithm to restart the current iteration. Since this slows down the sampling process, it is advisable to include the parameter ranges in the defnition of the parameters in the parameters block to improve the sampling process (see table below for the parameter ranges). In addition, the parameter space is further constrained by the following conditions:\n\nThe non-decision time \\(t_0\\) has to be smaller or equal to the observed reaction time: \\(t0 \\leq y\\).\nThe varying relative starting point \\(w\\) has to be in the interval (0,1) and thus,\n\n\\[\n\\begin{aligned}\n&w + \\frac{s_w}{2} < 1 \\text{, and} \\\\\n&0 < w-\\frac{s_w}{2}\n\\end{aligned}\n\\]\n\n\n\nParameter\nRange\n\nParameter\nRange\n\n\n\n\n\\(a\\)\n(0, \\(\\infty\\))\n\n\\(y\\)\n(0, \\(\\infty\\))\n\n\n\\(v\\)\n(-\\(\\infty\\), \\(\\infty\\))\n\n\\(s_v\\)\n[0, \\(\\infty\\))\n\n\n\\(w\\)\n(0,1)\n\n\\(s_w\\)\n[0,min(2w, 2(1-w)))\n\n\n\\(t_0\\)\n[0,\\(\\infty\\))\n\n\\(s_{t0}\\)\n[0,\\(\\infty\\))\n\n\n\n\n\n\nIn the model block, the priors and likelihood are defined for the upper and the lower response boundary. Different kinds of priors can be specifed here. Generally, the regularization induced by mildly informative priors can help both statistically and computationally.\nIn the second part of the model block, the data generating distribution is applied to all responses. The drift rate \\(v\\) and relative starting point \\(w\\) have to be mirrored for responses at the lower boundary.\nFor more details regarding the application of the diffusion model in Stan, see Henrich et al. (2024).\n\n\n\n\nTruncation and censoring frequently occur in psychological data collection. For reaction time data, truncated and censored data regularly arise in psychological studies as a consequence of using response windows or deadlines. These are sometimes introduced in the analysis of data to exclude reaction times that appear too short or too long, but they are also sometimes already built into the study procedures to push participants to respond within a specifc temporal window.\nDepending on the implementation of the response window, two different types of data arise: truncated data or censored data. Since the effects of truncation or censoring on summary statistics such as mean, median, standard deviation, and skewness is regularly too large to ignore (Ulrich and Miller 1994), data analysts are well advised to account for these effects.\nAs described in the Truncated or Censored Data chapter, the cumulative distribution function (\\(F\\)) and its complement (\\(\\text{CCDF}\\)) are needed to model truncated and censored data.\nAs explained above, \\(p\\) is defined defectively, meaning that only the sum of \\(p\\)s for both response alternatives integrates to 1. For the same reason, \\(F\\) and \\(\\text{CCDF}\\) are also implemented defectively. Analogously, only the sum of the \\(F\\)s and \\(\\text{CCDF}\\)s for both response alternatives asymptotes above at 1.\nIn the case of the diffusion model, \\(F\\) asymptotes above at the probability \\(PROB\\) to hit the corresponding response boundary: (for simplicity, we omit the inter-trial variabilities in the following)\n\\[\n\\begin{aligned}\nF_1(\\infty\\mid a,w,v) &= \\text{PROB}(a,w,v) \\text{ and} \\\\\nF_0(\\infty\\mid a,w,v) &= F_1(\\infty\\mid a,1-w,-v) = \\text{PROB}(a,1-w,-v)\n\\end{aligned}\n\\]\n\n\nData are called truncated when there is no information available for analysis from trials with values larger (or smaller) than a right (or left) reaction-time bound. In reaction time experiments, reaction time data are truncated if trials with reaction times outside the response window are excluded from the analysis. Not even a count of those omitted trials is kept.\nLet \\(L\\) denote the left reaction-time bound and \\(U\\) denote the right reaction-time bound of a response window.\nThen, the density of truncated data for both response boundaries 0 and 1, here denoted as \\(\\text{resp}\\in\\{0,1\\}\\), can be formulated as follows:\n\\[\n\\begin{aligned}\n&p_{\\text{resp}}(y \\mid L<X\\leq U, a, w, v) = \\\\ &\\frac{p_{\\text{resp}}(y \\mid a, w, v)\\cdot \\mathbb{I}_{\\{L<y\\leq U\\}}}\n{\\bigl(F_0(U \\mid a, w, v)+F_1(U \\mid a, w, v)\\bigr) -\n\\bigl(F_0(L\\mid a, w, v)+F_1(L\\mid a, w, v)\\bigr)}\n\\end{aligned}\n\\]\nThe density of left truncated data can be formulated as follows. \\[\n\\begin{aligned}\np_{\\text{resp}}(y \\mid L<X, a, w, v) = \\frac{p_{\\text{resp}}(y \\mid a, w, v)\\cdot \\mathbb{I}_{\\{L<y\\}}}\n{1-\\bigl(F_0(L \\mid a, w, v)+F_1(L \\mid a, w, v)\\bigr)},\n\\end{aligned}\n\\]\nThe density of right truncated data can be formulated as follows.\n\\[\n\\begin{aligned}\np_{\\text{resp}}(y \\mid X\\leq U, a, w, v) = \\frac{p_{\\text{resp}}(y \\mid a, w, v)\\cdot \\mathbb{I}_{\\{y\\leq U\\}}}{F_0(U \\mid a, w, v)+F_1(U \\mid a, w, v)}\n\\end{aligned}\n\\]\nAs the functions are implemented defectively, a truncated diffusion model cannot be calculated with the truncation functor \\(T[,]\\) as it would usually be done in Stan. This means the function call: y ~ wiener(...)T[L,U] does not work the way it is supposed to. When the truncation functor is called in Stan, Stan searches for a CDF implementation internally. In the case of the diffusion model, Stan would find the CDF, but is not aware of its defective implementation and calculates the computations as if it were a non-defective CDF. This causes misleading and incorrect results.\nTo implement the truncated model, write out the function shown above on the log-scale with left_bound = L and right_bound = U, where wiener_lcdf_unnorm() calls the logarithmized CDF of the diffusion model at the response-1-boundary:\nmodel {\n real log_denom = log_diff_exp(\n log_sum_exp(\n wiener_lcdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),\n wiener_lcdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st)),\n log_sum_exp(\n wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st),\n wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st)));\n // likelihood\n for (i in 1:N) {\n if (resp[i] == 1) {\n // response -1 boundary\n target += wiener_lpdf (rt[i] | a, t0, w, v, sv, sw, st);\n } else { \n // response -0 boundary ( mirror v and w)\n target += wiener_lpdf (rt[i] | a, t0, one_minus_w, neg_v,\n sv, sw, st);\n }\n } // end for\n target += -N * log_denom;\n}\nFor details of how to call a truncated model within the parallelization routine of reduce_sum or with truncation to only on side, see Henrich and Klauer (2026).\n\n\n\nData are censored when observations that are above or below a right or left boundary value are reported as occurrences of the event \\((y >\nU)\\), for \\(U\\) the right bound, or as occurrences of the event \\((y \\leq\nL)\\), for \\(L\\) the left bound, respectively. Like for truncated data, the range of the possible values is restricted, but the number of observations that fall outside the boundaries is kept, whereas in truncation, no count would be kept.\nFor the censored model, we distinguish two cases. In the first case, the responses of the censored trials are known, but the reaction times are not known. In the second case, neither the responses nor the reaction times of the censored trials are known. Note that the second case differs from a truncated model in the fact that the number of censored trials is still known. Consider first the case where the response is known even for censored data.\nTo model such data in Stan, the left and right reaction time bounds, left_bound and right_bound, respectively, are handed over in the data block, as well as a vector censored that tracks whether a trial is censored (= 1) or not (= 0), and counts of trials censored at the left reaction time bound and counts of trials censored at the right reaction time bound for each response in {0,1}. There are four such count variables: N_cens_left_0, N_cens_left_1, N_cens_right_0, N_cens_right_1:\nmodel {\n for (i in 1:N) {\n if (censored[i] == 0) {\n if (resp[i] == 1) {\n y[i] ~ wiener(a, t0, w, v, sv, sw, st0);\n } else if (resp[i] == 0) {\n y[i] ~ wiener(a, t0, one_minus_w, neg_v, sv, sw, st0);\n }\n }\n }\n\n // likelihood (response = 0)\n target += N_cens_left_0 \n * wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st0);\n\n target += N_cens_right_0 \n * wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st0);\n\n // likelihood (response = 1)\n target += N_cens_left_1 \n * wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st0);\n\n target += N_cens_right_1 \n * wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st0);\n}\nWhen data are censored at only one side, meaning that the reaction time constraint only exists for one of the two boundaries, omit the lines for the other side in the code. A both sided reaction time window would be, for example, when only reaction times are accepted that occur between 0.2 and 0.8 seconds. A one sided reaction time constraint would be, for example, when all reaction times below 0.8 seconds are accepted.\nWhen data consist of many conditions (as explained in the beginning), it is sometimes more convenient to loop over all trials instead of using count variables as described above, using the following notation and code. A vector containing the information whether a trial is censored or not, here censored, needs to be handed over in the data block. This vector splits the data into three bins: all trials \\(i\\) withcensored[i]=0 are censored below the left reaction time bound, all trials \\(i\\) with censored[i]=1 fall between the reaction time bounds, and all trials \\(i\\) with censored[i]=2 are censored above the right reaction time bound. For non-censored trials, the log-PDF is computed, for left censored trials, the log-CDF is computed, and for right censored trials, the log-CCDF is computed:\nmodel { \n for (i in 1:N) { \n // right censored at right_bound\n if (resp [i] == 1) { \n // upper response boundary\n if (censored[i] == 0) {\n target += wiener_lcdf_unnorm(left_bound | a, t0, w, v, \n sv, sw, st0);\n } else if (censored[i] == 1) {\n target += wiener_lpdf(y[i] | a, t0, w, v, sv, sw, st0);\n } else if (censored[i] == 2) {\n target += wiener_lccdf_unnorm(right_bound | a, t0, w, v,\n sv, sw, st0);\n }\n } else { \n // lower response boundary (mirror drift and // starting point!)\n if (censored[i] == 0) {\n target += wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w,\n neg_v, sv, sw, st0);\n } else if (censored[i] == 1) {\n target += wiener_lpdf(y[i] | a, t0, one_minus_w, neg_v,\n sv, sw, st0);\n } else if (censored[i] == 2) {\n target += wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,\n neg_v, sv, sw, st0);\n }\n }\n }\n}\nWhen the data are censored on only one side, omit the case that is not needed.\nNote that this block can be inserted in the defnition of the parallelization function, partial_sum_wiener(), as defined below.\nSometimes also the response is missing (i.e., it is known that the reaction time in a trial fell outside the response window, but which response was given is unknown). One method that has been used to model such data has involved inferring the numbers of missing responses of either kind from the observed relative frequencies of the two responses. This approach has the problem that quite specifc assumptions on the missing data have to be made (namely, that the proportions of the two kinds of responses are the same for responses within and outside the response window).\nThe following is a more principled approach that uses the cumulative distribution functions and their complements to provide the data-generating distribution of censored data. As before, let \\(L\\) be the left reaction time bound, and \\(U\\) the right reaction time bound, and consider decision times without inter-trial variabilities for the sake of simplicity. It follows that the likelihood contribution \\(\\textit{lik}_l\\) for a left-censored data point is given by\n\\[\n\\begin{aligned}\n\\textit{lik}_l(a,w,v) = F_0(L\\mid a,w,v) + F_1(L\\mid a,w,v),\n\\end{aligned}\n\\]\nwhereas the likelihood contribution \\(lik_r\\) due to a right-censored data point is given by\n\\[\n\\begin{aligned}\n\\textit{lik}_r(a,w,v) = \\text{CCDF}_0(U\\mid a,w,v) + \\text{CCDF}_1(U\\mid a,w,v).\n\\end{aligned}\n\\]\nSee the following code for an example of Stan code implementing this second case of censoring. This model call deals with the problem of unknown responses by computing the probability of choosing the response-1 or response-0 boundary outside the response window. Here, the CDF and/or the CCDF are required, depending upon whether there is only left-censoring, right-censoring, or censoring both to the left and to the right. The following code shows the functions block for a model that is right-censored using the function partial_sum_wiener() to parallelize the execution of a single Stan chain across multiple cores:\nfunctions {\n real partial_sum_wiener(array[] real rt_slice, int start,\n int end, real a, real t0, real w,\n real v, real sv, real sw, real st,\n array[] int resp, real right_bound,\n array[] int censored) {\n real ans = 0;\n for (i in start:end) {\n if (censored[i] == 1) {\n // not censored\n if (resp[i] == 1) {\n // upper boundary\n ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, w, v,\n sv, sw, st);\n } else {\n // lower boundary(mirror v and w)\n ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, one_minus_w,\n neg_v, sv, sw, st);\n }\n } else { \n // censored\n ans += log_sum_exp (\n wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),\n wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,\n neg_v, sv, sw, st);\n }\n }\n return ans;\n }\n}\nCombine this block with the model block in the example above by using the function reduce_sum().\n target += reduce_sum(partial_sum_wiener, rt, 1,\n a, t0, w, v, sv, sw, st, resp, right_bound, censored);\n}\nFor more details, see Henrich and Klauer (2026).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Wiener diffusion model" + ] + }, + { + "objectID": "stan-users-guide/wiener_diffusion_model.html#function-call-example", + "href": "stan-users-guide/wiener_diffusion_model.html#function-call-example", + "title": "Wiener diffusion model", + "section": "", + "text": "The following example demonstrates a diffusion model call in Stan:\ndata {\n int <lower=0> N; // Number of trials\n array[N] real rt; // response times (in seconds )\n array[N] int <lower=0, upper=1> resp; // responses {0 ,1}\n}\ntransformed data{\n real min_rt = min(rt);\n}\nparameters {\n real <lower=0> a; // boundary separation\n real v; // drift\n real <lower=0, upper=1> w; // relative starting point\n real <lower=0, upper=min_rt> t0; // non-decision time\n\n real <lower=0> sv; // variability in drift\n // variability in starting point\n real <lower=0, upper=fmin(2 * w, 2 * (1 - w))> sw; \n real <lower=0> st0; // variability in non-decision time\n}\ntransformed parameters{\n real one_minus_w = 1 - w;\n real neg_v = -v;\n}\nmodel {\n // prior\n a ~ normal(1, 1);\n w ~ normal(0.5, 0.1);\n v ~ normal(2, 3);\n t0 ~ normal(0.435, 0.12);\n\n sv ~ normal(1, 3);\n st0 ~ normal(0.183, 0.09);\n sw ~ beta(1, 3);\n\n // likelihood (diffusion model)\n for (i in 1:N) {\n if (resp[i] == 1) {\n // upper boundary\n target += wiener_full_lpdf(rt[i] | a, t0, w, v,\n sv, sw, st0);\n } else {\n // lower boundary: mirror drift and starting point\n target += wiener_full_lpdf(rt[i] | a, t0, one_minus_w,\n neg_v, sv, sw, st0);\n }\n }\n}\n\n\nThe data should consist of at least three variables:\n\nThe number of trials N,\nthe response, coded as 0 = “lower bound” and 1 = “upper bound”, and\nthe reaction times in seconds (not milliseconds).\n\nNote that two different ways of coding responses are commonly used: First, in response coding, the boundaries correspond to the two response alternatives. Second, in accuracy coding, the boundaries correspond to correct (upper bound) and wrong (lower bound) responses. This means, depending on the coding you choose, the bounds mentioned in the second variable above differ and the response variable will have a different form.\nMost often, an experimenter wants to find out whether an experimental manipulation influences the model parameters. As there exists psychological interpretations for each diffusion model parameter, the experimenter can draw conclusions from differing parameters. Therefore, usually an own diffusion model is being computed for each experimental group to enable a comparison of the parameters between the groups. This can be manipulation between different subjects, like an experimental group and a control group (so called between-subject manipulations). However, this can also be manipulations within the same subject by presenting stimuli from different experimental groups (so called within-subject manipulations). Depending on the experimental design, one would typically also provide the number of conditions and the condition associated with each trial as a vector. Then, one model for each condition will be computed. This means that the parameters also have to be defined for each condition.\nIn a hierarchical setting, the data block would also specify the number of participants and the participant associated with each trial as a vector. It is also possible to hand over a precision value in the data block.\n\n\n\nThe model arguments of the wiener_lpdf() function that are not fixed to a certain value are defned as parameters in the parameters block. In this block, it is also possible to insert restrictions on the parameters. Note that the MCMC algorithm iteratively searches for the next parameter set. If the suggested sample falls outside the internally defined parameter ranges, the program will throw an error, which causes the algorithm to restart the current iteration. Since this slows down the sampling process, it is advisable to include the parameter ranges in the defnition of the parameters in the parameters block to improve the sampling process (see table below for the parameter ranges). In addition, the parameter space is further constrained by the following conditions:\n\nThe non-decision time \\(t_0\\) has to be smaller or equal to the observed reaction time: \\(t0 \\leq y\\).\nThe varying relative starting point \\(w\\) has to be in the interval (0,1) and thus,\n\n\\[\n\\begin{aligned}\n&w + \\frac{s_w}{2} < 1 \\text{, and} \\\\\n&0 < w-\\frac{s_w}{2}\n\\end{aligned}\n\\]\n\n\n\nParameter\nRange\n\nParameter\nRange\n\n\n\n\n\\(a\\)\n(0, \\(\\infty\\))\n\n\\(y\\)\n(0, \\(\\infty\\))\n\n\n\\(v\\)\n(-\\(\\infty\\), \\(\\infty\\))\n\n\\(s_v\\)\n[0, \\(\\infty\\))\n\n\n\\(w\\)\n(0,1)\n\n\\(s_w\\)\n[0,min(2w, 2(1-w)))\n\n\n\\(t_0\\)\n[0,\\(\\infty\\))\n\n\\(s_{t0}\\)\n[0,\\(\\infty\\))\n\n\n\n\n\n\nIn the model block, the priors and likelihood are defined for the upper and the lower response boundary. Different kinds of priors can be specifed here. Generally, the regularization induced by mildly informative priors can help both statistically and computationally.\nIn the second part of the model block, the data generating distribution is applied to all responses. The drift rate \\(v\\) and relative starting point \\(w\\) have to be mirrored for responses at the lower boundary.\nFor more details regarding the application of the diffusion model in Stan, see Henrich et al. (2024).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Wiener diffusion model" + ] + }, + { + "objectID": "stan-users-guide/wiener_diffusion_model.html#wiener-truncation", + "href": "stan-users-guide/wiener_diffusion_model.html#wiener-truncation", + "title": "Wiener diffusion model", + "section": "", + "text": "Truncation and censoring frequently occur in psychological data collection. For reaction time data, truncated and censored data regularly arise in psychological studies as a consequence of using response windows or deadlines. These are sometimes introduced in the analysis of data to exclude reaction times that appear too short or too long, but they are also sometimes already built into the study procedures to push participants to respond within a specifc temporal window.\nDepending on the implementation of the response window, two different types of data arise: truncated data or censored data. Since the effects of truncation or censoring on summary statistics such as mean, median, standard deviation, and skewness is regularly too large to ignore (Ulrich and Miller 1994), data analysts are well advised to account for these effects.\nAs described in the Truncated or Censored Data chapter, the cumulative distribution function (\\(F\\)) and its complement (\\(\\text{CCDF}\\)) are needed to model truncated and censored data.\nAs explained above, \\(p\\) is defined defectively, meaning that only the sum of \\(p\\)s for both response alternatives integrates to 1. For the same reason, \\(F\\) and \\(\\text{CCDF}\\) are also implemented defectively. Analogously, only the sum of the \\(F\\)s and \\(\\text{CCDF}\\)s for both response alternatives asymptotes above at 1.\nIn the case of the diffusion model, \\(F\\) asymptotes above at the probability \\(PROB\\) to hit the corresponding response boundary: (for simplicity, we omit the inter-trial variabilities in the following)\n\\[\n\\begin{aligned}\nF_1(\\infty\\mid a,w,v) &= \\text{PROB}(a,w,v) \\text{ and} \\\\\nF_0(\\infty\\mid a,w,v) &= F_1(\\infty\\mid a,1-w,-v) = \\text{PROB}(a,1-w,-v)\n\\end{aligned}\n\\]\n\n\nData are called truncated when there is no information available for analysis from trials with values larger (or smaller) than a right (or left) reaction-time bound. In reaction time experiments, reaction time data are truncated if trials with reaction times outside the response window are excluded from the analysis. Not even a count of those omitted trials is kept.\nLet \\(L\\) denote the left reaction-time bound and \\(U\\) denote the right reaction-time bound of a response window.\nThen, the density of truncated data for both response boundaries 0 and 1, here denoted as \\(\\text{resp}\\in\\{0,1\\}\\), can be formulated as follows:\n\\[\n\\begin{aligned}\n&p_{\\text{resp}}(y \\mid L<X\\leq U, a, w, v) = \\\\ &\\frac{p_{\\text{resp}}(y \\mid a, w, v)\\cdot \\mathbb{I}_{\\{L<y\\leq U\\}}}\n{\\bigl(F_0(U \\mid a, w, v)+F_1(U \\mid a, w, v)\\bigr) -\n\\bigl(F_0(L\\mid a, w, v)+F_1(L\\mid a, w, v)\\bigr)}\n\\end{aligned}\n\\]\nThe density of left truncated data can be formulated as follows. \\[\n\\begin{aligned}\np_{\\text{resp}}(y \\mid L<X, a, w, v) = \\frac{p_{\\text{resp}}(y \\mid a, w, v)\\cdot \\mathbb{I}_{\\{L<y\\}}}\n{1-\\bigl(F_0(L \\mid a, w, v)+F_1(L \\mid a, w, v)\\bigr)},\n\\end{aligned}\n\\]\nThe density of right truncated data can be formulated as follows.\n\\[\n\\begin{aligned}\np_{\\text{resp}}(y \\mid X\\leq U, a, w, v) = \\frac{p_{\\text{resp}}(y \\mid a, w, v)\\cdot \\mathbb{I}_{\\{y\\leq U\\}}}{F_0(U \\mid a, w, v)+F_1(U \\mid a, w, v)}\n\\end{aligned}\n\\]\nAs the functions are implemented defectively, a truncated diffusion model cannot be calculated with the truncation functor \\(T[,]\\) as it would usually be done in Stan. This means the function call: y ~ wiener(...)T[L,U] does not work the way it is supposed to. When the truncation functor is called in Stan, Stan searches for a CDF implementation internally. In the case of the diffusion model, Stan would find the CDF, but is not aware of its defective implementation and calculates the computations as if it were a non-defective CDF. This causes misleading and incorrect results.\nTo implement the truncated model, write out the function shown above on the log-scale with left_bound = L and right_bound = U, where wiener_lcdf_unnorm() calls the logarithmized CDF of the diffusion model at the response-1-boundary:\nmodel {\n real log_denom = log_diff_exp(\n log_sum_exp(\n wiener_lcdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),\n wiener_lcdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st)),\n log_sum_exp(\n wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st),\n wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st)));\n // likelihood\n for (i in 1:N) {\n if (resp[i] == 1) {\n // response -1 boundary\n target += wiener_lpdf (rt[i] | a, t0, w, v, sv, sw, st);\n } else { \n // response -0 boundary ( mirror v and w)\n target += wiener_lpdf (rt[i] | a, t0, one_minus_w, neg_v,\n sv, sw, st);\n }\n } // end for\n target += -N * log_denom;\n}\nFor details of how to call a truncated model within the parallelization routine of reduce_sum or with truncation to only on side, see Henrich and Klauer (2026).\n\n\n\nData are censored when observations that are above or below a right or left boundary value are reported as occurrences of the event \\((y >\nU)\\), for \\(U\\) the right bound, or as occurrences of the event \\((y \\leq\nL)\\), for \\(L\\) the left bound, respectively. Like for truncated data, the range of the possible values is restricted, but the number of observations that fall outside the boundaries is kept, whereas in truncation, no count would be kept.\nFor the censored model, we distinguish two cases. In the first case, the responses of the censored trials are known, but the reaction times are not known. In the second case, neither the responses nor the reaction times of the censored trials are known. Note that the second case differs from a truncated model in the fact that the number of censored trials is still known. Consider first the case where the response is known even for censored data.\nTo model such data in Stan, the left and right reaction time bounds, left_bound and right_bound, respectively, are handed over in the data block, as well as a vector censored that tracks whether a trial is censored (= 1) or not (= 0), and counts of trials censored at the left reaction time bound and counts of trials censored at the right reaction time bound for each response in {0,1}. There are four such count variables: N_cens_left_0, N_cens_left_1, N_cens_right_0, N_cens_right_1:\nmodel {\n for (i in 1:N) {\n if (censored[i] == 0) {\n if (resp[i] == 1) {\n y[i] ~ wiener(a, t0, w, v, sv, sw, st0);\n } else if (resp[i] == 0) {\n y[i] ~ wiener(a, t0, one_minus_w, neg_v, sv, sw, st0);\n }\n }\n }\n\n // likelihood (response = 0)\n target += N_cens_left_0 \n * wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st0);\n\n target += N_cens_right_0 \n * wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,\n sv, sw, st0);\n\n // likelihood (response = 1)\n target += N_cens_left_1 \n * wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st0);\n\n target += N_cens_right_1 \n * wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st0);\n}\nWhen data are censored at only one side, meaning that the reaction time constraint only exists for one of the two boundaries, omit the lines for the other side in the code. A both sided reaction time window would be, for example, when only reaction times are accepted that occur between 0.2 and 0.8 seconds. A one sided reaction time constraint would be, for example, when all reaction times below 0.8 seconds are accepted.\nWhen data consist of many conditions (as explained in the beginning), it is sometimes more convenient to loop over all trials instead of using count variables as described above, using the following notation and code. A vector containing the information whether a trial is censored or not, here censored, needs to be handed over in the data block. This vector splits the data into three bins: all trials \\(i\\) withcensored[i]=0 are censored below the left reaction time bound, all trials \\(i\\) with censored[i]=1 fall between the reaction time bounds, and all trials \\(i\\) with censored[i]=2 are censored above the right reaction time bound. For non-censored trials, the log-PDF is computed, for left censored trials, the log-CDF is computed, and for right censored trials, the log-CCDF is computed:\nmodel { \n for (i in 1:N) { \n // right censored at right_bound\n if (resp [i] == 1) { \n // upper response boundary\n if (censored[i] == 0) {\n target += wiener_lcdf_unnorm(left_bound | a, t0, w, v, \n sv, sw, st0);\n } else if (censored[i] == 1) {\n target += wiener_lpdf(y[i] | a, t0, w, v, sv, sw, st0);\n } else if (censored[i] == 2) {\n target += wiener_lccdf_unnorm(right_bound | a, t0, w, v,\n sv, sw, st0);\n }\n } else { \n // lower response boundary (mirror drift and // starting point!)\n if (censored[i] == 0) {\n target += wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w,\n neg_v, sv, sw, st0);\n } else if (censored[i] == 1) {\n target += wiener_lpdf(y[i] | a, t0, one_minus_w, neg_v,\n sv, sw, st0);\n } else if (censored[i] == 2) {\n target += wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,\n neg_v, sv, sw, st0);\n }\n }\n }\n}\nWhen the data are censored on only one side, omit the case that is not needed.\nNote that this block can be inserted in the defnition of the parallelization function, partial_sum_wiener(), as defined below.\nSometimes also the response is missing (i.e., it is known that the reaction time in a trial fell outside the response window, but which response was given is unknown). One method that has been used to model such data has involved inferring the numbers of missing responses of either kind from the observed relative frequencies of the two responses. This approach has the problem that quite specifc assumptions on the missing data have to be made (namely, that the proportions of the two kinds of responses are the same for responses within and outside the response window).\nThe following is a more principled approach that uses the cumulative distribution functions and their complements to provide the data-generating distribution of censored data. As before, let \\(L\\) be the left reaction time bound, and \\(U\\) the right reaction time bound, and consider decision times without inter-trial variabilities for the sake of simplicity. It follows that the likelihood contribution \\(\\textit{lik}_l\\) for a left-censored data point is given by\n\\[\n\\begin{aligned}\n\\textit{lik}_l(a,w,v) = F_0(L\\mid a,w,v) + F_1(L\\mid a,w,v),\n\\end{aligned}\n\\]\nwhereas the likelihood contribution \\(lik_r\\) due to a right-censored data point is given by\n\\[\n\\begin{aligned}\n\\textit{lik}_r(a,w,v) = \\text{CCDF}_0(U\\mid a,w,v) + \\text{CCDF}_1(U\\mid a,w,v).\n\\end{aligned}\n\\]\nSee the following code for an example of Stan code implementing this second case of censoring. This model call deals with the problem of unknown responses by computing the probability of choosing the response-1 or response-0 boundary outside the response window. Here, the CDF and/or the CCDF are required, depending upon whether there is only left-censoring, right-censoring, or censoring both to the left and to the right. The following code shows the functions block for a model that is right-censored using the function partial_sum_wiener() to parallelize the execution of a single Stan chain across multiple cores:\nfunctions {\n real partial_sum_wiener(array[] real rt_slice, int start,\n int end, real a, real t0, real w,\n real v, real sv, real sw, real st,\n array[] int resp, real right_bound,\n array[] int censored) {\n real ans = 0;\n for (i in start:end) {\n if (censored[i] == 1) {\n // not censored\n if (resp[i] == 1) {\n // upper boundary\n ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, w, v,\n sv, sw, st);\n } else {\n // lower boundary(mirror v and w)\n ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, one_minus_w,\n neg_v, sv, sw, st);\n }\n } else { \n // censored\n ans += log_sum_exp (\n wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),\n wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,\n neg_v, sv, sw, st);\n }\n }\n return ans;\n }\n}\nCombine this block with the model block in the example above by using the function reduce_sum().\n target += reduce_sum(partial_sum_wiener, rt, 1,\n a, t0, w, v, sv, sw, st, resp, right_bound, censored);\n}\nFor more details, see Henrich and Klauer (2026).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Wiener diffusion model" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html", + "href": "stan-users-guide/user-functions.html", + "title": "User-Defined Functions", + "section": "", + "text": "This chapter explains functions from a user perspective with examples; see the language reference for a full specification. User-defined functions allow computations to be encapsulated into a single named unit and invoked elsewhere by name. Similarly, functions allow complex procedures to be broken down into more understandable components. Writing modular code using descriptively named functions is easier to understand than a monolithic program, even if the latter is heavily commented.1\n\n\nHere’s an example of a skeletal Stan program with a user-defined relative difference function employed in the generated quantities block to compute a relative differences between two parameters.\nfunctions {\n real relative_diff(real x, real y) {\n real abs_diff;\n real avg_scale;\n abs_diff = abs(x - y);\n avg_scale = (abs(x) + abs(y)) / 2;\n return abs_diff / avg_scale;\n }\n}\n// ...\ngenerated quantities {\n real rdiff;\n rdiff = relative_diff(alpha, beta);\n}\nThe function is named relative_diff, and is declared to have two real-valued arguments and return a real-valued result. It is used the same way a built-in function would be used in the generated quantities block.\n\n\nAll functions are defined in their own block, which is labeled functions and must appear before all other program blocks. The user-defined functions block is optional.\n\n\n\nThe body (the part between the curly braces) contains ordinary Stan code, including local variables. The new function is used in the generated quantities block just as any of Stan’s built-in functions would be used.\n\n\n\nReturn statements, such as the one on the last line of the definition of relative_diff above, are only allowed in the bodies of function definitions. Return statements may appear anywhere in a function, but functions with non-void return types must end in a return statement.\n\n\n\nThe Stan reject statement provides a mechanism to report errors or problematic values encountered during program execution. It accepts any number of quoted string literals or Stan expressions as arguments. This statement is typically embedded in a conditional statement in order to detect bad or illegal outcomes of some processing step.\nIf an error is indicative of a problem from which it is not expected to be able to recover, Stan provides a fatal_error statement.\n\n\nRejection is used to flag errors that arise in inputs or in program state. It is far better to fail early with a localized informative error message than to run into problems much further downstream (as in rejecting a state or failing to compute a derivative).\nThe most common errors that are coded is to test that all of the arguments to a function are legal. The following function takes a square root of its input, so requires non-negative inputs; it is coded to guard against illegal inputs.\nreal dbl_sqrt(real x) {\n if (!(x >= 0)) {\n reject(\"dbl_sqrt(x): x must be positive; found x = \", x);\n }\n return 2 * sqrt(x);\n}\nThe negation of the positive test is important, because it also catches the case where x is a not-a-number value. If the condition had been coded as (x < 0) it would not catch the not-a-number case, though it could be written as (x < 0 || is_nan(x)). The positive infinite case is allowed through, but could also be checked with the is_inf(x) function. The square root function does not itself reject, but some downstream consumer of dbl_sqrt(-2) would be likely to raise an error, at which point the origin of the illegal input requires detective work. Or even worse, as Matt Simpson pointed out in the GitHub comments, the function could go into an infinite loop if it starts with an infinite value and tries to reduce it by arithmetic, likely consuming all available memory and crashing an interface. Much better to catch errors early and report on their origin.\nThe effect of rejection depends on the program block in which the rejection is executed. In transformed data, rejections cause the program to fail to load. In transformed parameters or in the model block, rejections cause the current state to be rejected in the Metropolis sense.2\nIn generated quantities there is no way to recover and generate the remaining parameters, so rejections cause subsequent values to be reported as NaNs. Extra care should be taken in calling functions which may reject in the generated quantities block.\n\n\n\n\nFunction argument and return types for vector and matrix types are not declared with their sizes, unlike type declarations for variables. Function argument type declarations may not be declared with constraints, either lower or upper bounds or structured constraints like forming a simplex or correlation matrix, (as is also the case for local variables); see the table of types in the reference manual for full details.\nFor example, here’s a function to compute the entropy of a categorical distribution with simplex parameter theta.\nreal entropy(vector theta) {\n return sum(theta .* log(theta));\n}\nAlthough theta must be a simplex, only the type vector is used.3\nUpper or lower bounds on values or constrained types are not allowed as return types or argument types in function declarations.\n\n\n\nArray arguments have their own syntax, which follows that used in this manual for function signatures. For example, a function that operates on a two-dimensional array to produce a one-dimensional array might be declared as follows.\narray[] real baz(array[,] real x);\nThe notation [ ] is used for one-dimensional arrays (as in the return above), [ , ] for two-dimensional arrays, [ , , ] for three-dimensional arrays, and so on.\nFunctions support arrays of any type, including matrix and vector types. As with other types, no constraints are allowed.\n\n\n\nA function argument which is a real-valued type or a container of a real-valued type, i.e., not an integer type or integer array type, can be qualified using the prefix qualifier data. The following is an example of a data-only function argument.\nreal foo(real y, data real mu) {\n return -0.5 * (y - mu)^2;\n}\nThis qualifier restricts this argument to being invoked with expressions which consist only of data variables, transformed data variables, literals, and function calls. A data-only function argument cannot involve real variables declared in the parameters, transformed parameters, or model block. Attempts to invoke a function using an expression which contains parameter, transformed parameters, or model block variables as a data-only argument will result in an error message from the parser.\nUse of the data qualifier must be consistent between the forward declaration and the definition of a functions.\nThis qualifier should be used when writing functions that call the built-in ordinary differential equation (ODE) solvers, algebraic solvers, or map functions. These higher-order functions have strictly specified signatures where some arguments of are data only expressions. (See the ODE solver chapter for more usage details and the functions reference manual for full definitions.) When writing a function which calls the ODE or algebraic solver, arguments to that function which are passed into the call to the solver, either directly or indirectly, should have the data prefix qualifier. This allows for compile-time type checking and increases overall program understandability.\n\n\n\n\nIn some cases, it makes sense to have functions that do not return a value. For example, a routine to print the lower-triangular portion of a matrix can be defined as follows.\nfunctions {\n void pretty_print_tri_lower(matrix x) {\n if (rows(x) == 0) {\n print(\"empty matrix\");\n return;\n }\n print(\"rows=\", rows(x), \" cols=\", cols(x));\n for (m in 1:rows(x)) {\n for (n in 1:m) {\n print(\"[\", m, \",\", n, \"]=\", x[m, n]);\n }\n }\n }\n}\nThe special symbol void is used as the return type. This is not a type itself in that there are no values of type void; it merely indicates the lack of a value. As such, return statements for void functions are not allowed to have arguments, as in the return statement in the body of the previous example.\nVoid functions applied to appropriately typed arguments may be used on their own as statements. For example, the pretty-print function defined above may be applied to a covariance matrix being defined in the transformed parameters block.\ntransformed parameters {\n cov_matrix[K] Sigma;\n // ... code to set Sigma ...\n pretty_print_tri_lower(Sigma);\n // ...\n}\n\n\n\nFunctions whose names end in _lp are allowed to use sampling statements and target += statements; other functions are not. Because of this access, their use is restricted to the transformed parameters and model blocks.\nHere is an example of a function to assign standard normal priors to a vector of coefficients, along with a center and scale, and return the translated and scaled coefficients; see the reparameterization section for more information on efficient non-centered parameterizations\nfunctions {\n vector center_lp(vector beta_raw, real mu, real sigma) {\n beta_raw ~ std_normal();\n sigma ~ cauchy(0, 5);\n mu ~ cauchy(0, 2.5);\n return sigma * beta_raw + mu;\n }\n // ...\n}\nparameters {\n vector[K] beta_raw;\n real mu_beta;\n real<lower=0> sigma_beta;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n // ...\n beta = center_lp(beta_raw, mu_beta, sigma_beta);\n // ...\n}\n\n\n\nFunctions whose names end in _jacobian can use the jacobian += statement. This can be used to implement a custom change of variables for arbitrary parameters.\nFor example, this function recreates the built-in <upper=x> transform on real numbers:\nreal my_upper_bound_jacobian(real x, real ub) {\n jacobian += x;\n return ub - exp(x);\n}\nIt can be used as a replacement for real<lower=ub> as follows:\nfunctions {\n // my_upper_bound_jacobian as above\n}\ndata {\n real ub;\n}\nparameters {\n real b_raw;\n}\ntransformed parameters {\n real b = my_upper_bound_jacobian(b_raw, ub);\n}\nmodel {\n b ~ lognormal(0, 1);\n // ...\n}\n\n\n\nA user-specified function can be declared to act as a (pseudo) random number generator (PRNG) by giving it a name that ends in _rng. Giving a function a name that ends in _rng allows it to access built-in functions and user-defined functions that end in _rng, which includes all the built-in PRNG functions. Only functions ending in _rng are able access the built-in PRNG functions. The use of functions ending in _rng must therefore be restricted to transformed data and generated quantities blocks like other PRNG functions; they may also be used in the bodies of other user-defined functions ending in _rng.\nFor example, the following function generates an \\(N \\times K\\) data matrix, the first column of which is filled with 1 values for the intercept and the remaining entries of which have values drawn from a standard normal PRNG.\nmatrix predictors_rng(int N, int K) {\n matrix[N, K] x;\n for (n in 1:N) {\n x[n, 1] = 1.0; // intercept\n for (k in 2:K) {\n x[n, k] = normal_rng(0, 1);\n }\n }\n return x;\n}\nThe following function defines a simulator for regression outcomes based on a data matrix x, coefficients beta, and noise scale sigma.\nvector regression_rng(vector beta, matrix x, real sigma) {\n vector[rows(x)] y;\n vector[rows(x)] mu;\n mu = x * beta;\n for (n in 1:rows(x)) {\n y[n] = normal_rng(mu[n], sigma);\n }\n return y;\n}\nThese might be used in a generated quantity block to simulate some fake data from a fitted regression model as follows.\nparameters {\n vector[K] beta;\n real<lower=0> sigma;\n // ...\n}\ngenerated quantities {\n matrix[N_sim, K] x_sim;\n vector[N_sim] y_sim;\n x_sim = predictors_rng(N_sim, K);\n y_sim = regression_rng(beta, x_sim, sigma);\n}\nA more sophisticated simulation might fit a multivariate normal to the predictors x and use the resulting parameters to generate multivariate normal draws for x_sim.\n\n\n\nProbability functions are distinguished in Stan by names ending in _lpdf for density functions and _lpmf for mass functions; in both cases, they must have real return types.\nSuppose a model uses several standard normal distributions, for which there is not a specific overloaded density nor defaults in Stan. So rather than writing out the location of 0 and scale of 1 for all of them, a new density function may be defined and reused.\nfunctions {\n real unit_normal_lpdf(real y) {\n return normal_lpdf(y | 0, 1);\n }\n}\n// ...\nmodel {\n alpha ~ unit_normal();\n beta ~ unit_normal();\n // ...\n}\nThe ability to use the unit_normal function as a density is keyed off its name ending in _lpdf (names ending in _lpmf for probability mass functions work the same way).\nIn general, if foo_lpdf is defined to consume \\(N + 1\\) arguments, then\ny ~ foo(theta1, ..., thetaN);\ncan be used as shorthand for\ntarget += foo_lpdf(y | theta1, ..., thetaN);\nAs with the built-in functions, the suffix _lpdf is dropped and the first argument moves to the left of the tilde symbol (~) in the distribution statement.\nFunctions ending in _lpmf (for probability mass functions), behave exactly the same way. The difference is that the first argument of a density function (_lpdf) must be continuous (not an integer or integer array), whereas the first argument of a mass function (_lpmf) must be discrete (integer or integer array).\n\n\n\nAs described in the reference manual function overloading is permitted in Stan, beginning in version 2.29.\nThis means multiple functions can be defined with the same name as long as they accept different numbers or types of arguments. User-defined functions can also overload Stan library functions.\n\n\nOverloading is a powerful productivity tool in programming languages, but it can also lead to confusion. In particular, it can be unclear at first glance which version of a function is being called at any particular call site, especially with type promotion allowed between scalar types. Because of this, it is a programming best practice that overloaded functions maintain the same meaning across definitions.\nFor example, consider a function triple which has the following three signatures\nreal triple(real x);\ncomplex triple(complex x);\narray[] real triple(array[] real);\nOne should expect that all overloads of this function perform the same basic task. This should lead to definitions of these functions which would satisfy the following assumptions that someone reading the program would expect\n// The function does what it says\ntriple(3.0) == 9.0\n// It is defined reasonably for different types\ntriple(to_complex(3.0)) == to_complex(triple(3.0))\n// A container version of this function works by element\ntriple({3.0, 4.0})[0] == triple({3.0, 4.0}[0])\nNote that none of these properties are enforced by Stan, they are mentioned merely to warn against uses of overloading which cause confusion.\n\n\n\nStan resolves overloaded functions by the number and type of arguments passed to the function. This can be subtle when multiple signatures with the same number of arguments are present.\nConsider the following function signatures\nreal foo(int a, real b);\nreal foo(real a, real b);\nGiven these, the function call foo(1.5, 2.5) is unambiguous - it must resolve to the second signature. But, the function call foo(1, 1.5) could be valid for either under Stan’s promotion rules, which allow integers to be promoted to real numbers.\nTo resolve this, Stan selects the signature which requires the fewest number of promotions for a given function call. In the above case, this means the call foo(1, 1.5) would select the first signature, because it requires 0 promotions (the second signature would require 1 promotion).\nFurthermore, there must be only one such signature, e.g., the minimum number of promotions must be a unique minimum. This requirement forbids certain kinds of overloading. For example, consider the function signatures\nreal bar(int x, real y);\nreal bar(real x, int y);\nThese signatures do not have a unique minimum number of promotions for the call bar(1, 2). Both signatures require one int to real promotion, and so it cannot be determined which is correct. Stan will produce a compilation error in this case.\nPromotion from integers to complex numbers is considered to be two separate promotions, first from int to real, then from real to complex. This means that integer arguments will “prefer” a signature with real types over complex types.\nFor example, consider the function signatures\nreal pop(real x);\nreal pop(complex x);\nStan will select the first signature when pop is called with an integer argument such as pop(0).\n\n\n\n\nFunctions will ideally be documented at their interface level. The Stan style guide for function documentation follows the same format as used by the Doxygen (C++) and Javadoc (Java) automatic documentation systems. Such specifications indicate the variables and their types and the return value, prefaced with some descriptive text.\nFor example, here’s some documentation for the prediction matrix generator.\n/**\n * Return a data matrix of specified size with rows\n * corresponding to items and the first column filled\n * with the value 1 to represent the intercept and the\n * remaining columns randomly filled with unit-normal draws.\n *\n * @param N Number of rows corresponding to data items\n * @param K Number of predictors, counting the intercept, per\n * item.\n * @return Simulated predictor matrix.\n */\nmatrix predictors_rng(int N, int K) {\n // ...\nThe comment begins with /**, ends with */, and has an asterisk (*) on each line. It uses @param followed by the argument’s identifier to document a function argument. The tag @return is used to indicate the return value. Stan does not (yet) have an automatic documentation generator like Javadoc or Doxygen, so this just looks like a big comment starting with /* and ending with */ to the Stan parser.\nFor functions that raise exceptions, exceptions can be documented using @throws.4\nFor example,\n /** ...\n * @param theta\n * @throws If any of the entries of theta is negative.\n */\nreal entropy(vector theta) {\n // ...\n}\nUsually an exception type would be provided, but these are not exposed as part of the Stan language, so there is no need to document them.\n\n\n\nFunctions may have a void or non-void return type and they may or may not have one of the special suffixes, _lpdf, _lpmf, _lp, or _rng.\n\n\nOnly functions declared to return void may be used as statements. These are also the only functions that use return statements with no arguments.\nOnly functions declared to return non-void values may be used as expressions. These functions require return statements with arguments of a type that matches the declared return type.\n\n\n\nOnly functions ending in _lpmf or _lpdf and with return type real may be used as probability functions in distribution statements.\nOnly functions ending in _lp may access the log probability accumulator through distribution statements or target += statements. Such functions may only be used in the transformed parameters or model blocks.\nOnly functions ending in _rng may access the built-in pseudo-random number generators. Such functions may only be used in the generated quantities block or transformed data block, or in the bodies of other user-defined functions ending in _rng.\n\n\n\n\nStan supports recursive function definitions, which can be useful for some applications. For instance, consider the matrix power operation, \\(A^n\\), which is defined for a square matrix \\(A\\) and positive integer \\(n\\) by \\[\nA^n\n=\n\\begin{cases}\n\\textrm{I} & \\quad\\text{if } n = 0, \\text{ and} \\\\\nA \\, A^{n-1} & \\quad\\text{if } n > 0.\n\\end{cases}\n\\]\nwhere \\(\\textrm{I}\\) is the identity matrix. This definition can be directly translated to a recursive function definition.\nmatrix matrix_pow(matrix a, int n) {\n if (n == 0) {\n return diag_matrix(rep_vector(1, rows(a)));\n } else {\n return a * matrix_pow(a, n - 1);\n }\n}\nIt would be more efficient to not allow the recursion to go all the way to the base case, adding the following conditional clause.\nelse if (n == 1) {\n return a;\n}\n\n\n\n\n\nTo generate random numbers, it is often sufficient to invert their cumulative distribution functions. This is built into many of the random number generators. For example, to generate a standard logistic variate, first generate a uniform variate \\(u \\sim \\textsf{uniform}(0, 1)\\), then run through the inverse cumulative distribution function, \\(y = \\textrm{logit}(u)\\). If this were not already built in as logistic_rng(0, 1), it could be coded in Stan directly as\nreal standard_logistic_rng() {\n real u = uniform_rng(0, 1);\n real y = logit(u);\n return y;\n}\nFollowing the same pattern, a standard normal RNG could be coded as\nreal standard_normal_rng() {\n real u = uniform_rng(0, 1);\n real y = inv_Phi(u);\n return y;\n}\nthat is, \\(y = \\Phi^{-1}(u)\\), where \\(\\Phi^{-1}\\) is the inverse cumulative distribution function for the standard normal distribution, implemented in the Stan function inv_Phi.\nIn order to generate non-standard variates of the location-scale variety, the variate is scaled by the scale parameter and shifted by the location parameter. For example, to generate \\(\\textsf{normal}(\\mu, \\sigma)\\) variates, it is enough to generate a uniform variate \\(u \\sim \\textsf{uniform}(0, 1)\\), then convert it to a standard normal variate, \\(z = \\Phi^{-1}(u)\\), where \\(\\Phi^{-1}(\\cdot)\\) is the inverse cumulative distribution function for the standard normal, and then, finally, scale and translate it, \\(y = \\mu +\n\\sigma \\times z\\). In code,\nreal my_normal_rng(real mu, real sigma) {\n real u = uniform_rng(0, 1);\n real z = inv_Phi(u);\n real y = mu + sigma * z;\n return y;\n}\nA robust version of this function would test that the arguments are finite and that sigma is non-negative, e.g.,\n if (is_nan(mu) || is_inf(mu)) {\n reject(\"my_normal_rng: mu must be finite; \",\n \"found mu = \", mu);\n }\n if (is_nan(sigma) || is_inf(sigma) || sigma < 0) {\n reject(\"my_normal_rng: sigma must be finite and non-negative; \",\n \"found sigma = \", sigma);\n }\n\n\n\nOften truncated uniform variates are needed, as in survival analysis when a time of death is censored beyond the end of the observations. To generate a truncated random variate, the cumulative distribution is used to find the truncation point in the inverse CDF, a uniform variate is generated in range, and then the inverse CDF translates it back.\n\n\nFor example, the following code generates a \\(\\textsf{Weibull}(\\alpha, \\sigma)\\) variate truncated below at a time \\(t\\),5\nreal weibull_lb_rng(real alpha, real sigma, real t) {\n real p = weibull_cdf(t | alpha, sigma); // cdf for lb\n real u = uniform_rng(p, 1); // unif in bounds\n real y = sigma * (-log1m(u))^inv(alpha); // inverse cdf\n return y;\n}\n\n\n\nIf there is a lower bound and upper bound, then the CDF trick is used twice to find a lower and upper bound. For example, to generate a \\(\\textsf{normal}(\\mu, \\sigma)\\) truncated to a region \\((a, b)\\), the following code suffices,\nreal normal_lub_rng(real mu, real sigma, real lb, real ub) {\n real p_lb = normal_cdf(lb | mu, sigma);\n real p_ub = normal_cdf(ub | mu, sigma);\n real u = uniform_rng(p_lb, p_ub);\n real y = mu + sigma * inv_Phi(u);\n return y;\n}\nTo make this more robust, all variables should be tested for finiteness, sigma should be tested for positiveness, and lb and ub should be tested to ensure the upper bound is greater than the lower bound. While it may be tempting to compress lines, the variable names serve as a kind of chunking of operations and naming for readability; compare the multiple statement version above with the single statement\n return mu + sigma * inv_Phi(uniform_rng(normal_cdf(lb | mu, sigma),\n normal_cdf(ub | mu, sigma)));\nfor readability. The names like p indicate probabilities, and p_lb and p_ub indicate the probabilities of the bounds. The variable u is clearly named as a uniform variate, and y is used to denote the variate being generated itself.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#basic-functions.section", + "href": "stan-users-guide/user-functions.html#basic-functions.section", + "title": "User-Defined Functions", + "section": "", + "text": "Here’s an example of a skeletal Stan program with a user-defined relative difference function employed in the generated quantities block to compute a relative differences between two parameters.\nfunctions {\n real relative_diff(real x, real y) {\n real abs_diff;\n real avg_scale;\n abs_diff = abs(x - y);\n avg_scale = (abs(x) + abs(y)) / 2;\n return abs_diff / avg_scale;\n }\n}\n// ...\ngenerated quantities {\n real rdiff;\n rdiff = relative_diff(alpha, beta);\n}\nThe function is named relative_diff, and is declared to have two real-valued arguments and return a real-valued result. It is used the same way a built-in function would be used in the generated quantities block.\n\n\nAll functions are defined in their own block, which is labeled functions and must appear before all other program blocks. The user-defined functions block is optional.\n\n\n\nThe body (the part between the curly braces) contains ordinary Stan code, including local variables. The new function is used in the generated quantities block just as any of Stan’s built-in functions would be used.\n\n\n\nReturn statements, such as the one on the last line of the definition of relative_diff above, are only allowed in the bodies of function definitions. Return statements may appear anywhere in a function, but functions with non-void return types must end in a return statement.\n\n\n\nThe Stan reject statement provides a mechanism to report errors or problematic values encountered during program execution. It accepts any number of quoted string literals or Stan expressions as arguments. This statement is typically embedded in a conditional statement in order to detect bad or illegal outcomes of some processing step.\nIf an error is indicative of a problem from which it is not expected to be able to recover, Stan provides a fatal_error statement.\n\n\nRejection is used to flag errors that arise in inputs or in program state. It is far better to fail early with a localized informative error message than to run into problems much further downstream (as in rejecting a state or failing to compute a derivative).\nThe most common errors that are coded is to test that all of the arguments to a function are legal. The following function takes a square root of its input, so requires non-negative inputs; it is coded to guard against illegal inputs.\nreal dbl_sqrt(real x) {\n if (!(x >= 0)) {\n reject(\"dbl_sqrt(x): x must be positive; found x = \", x);\n }\n return 2 * sqrt(x);\n}\nThe negation of the positive test is important, because it also catches the case where x is a not-a-number value. If the condition had been coded as (x < 0) it would not catch the not-a-number case, though it could be written as (x < 0 || is_nan(x)). The positive infinite case is allowed through, but could also be checked with the is_inf(x) function. The square root function does not itself reject, but some downstream consumer of dbl_sqrt(-2) would be likely to raise an error, at which point the origin of the illegal input requires detective work. Or even worse, as Matt Simpson pointed out in the GitHub comments, the function could go into an infinite loop if it starts with an infinite value and tries to reduce it by arithmetic, likely consuming all available memory and crashing an interface. Much better to catch errors early and report on their origin.\nThe effect of rejection depends on the program block in which the rejection is executed. In transformed data, rejections cause the program to fail to load. In transformed parameters or in the model block, rejections cause the current state to be rejected in the Metropolis sense.2\nIn generated quantities there is no way to recover and generate the remaining parameters, so rejections cause subsequent values to be reported as NaNs. Extra care should be taken in calling functions which may reject in the generated quantities block.\n\n\n\n\nFunction argument and return types for vector and matrix types are not declared with their sizes, unlike type declarations for variables. Function argument type declarations may not be declared with constraints, either lower or upper bounds or structured constraints like forming a simplex or correlation matrix, (as is also the case for local variables); see the table of types in the reference manual for full details.\nFor example, here’s a function to compute the entropy of a categorical distribution with simplex parameter theta.\nreal entropy(vector theta) {\n return sum(theta .* log(theta));\n}\nAlthough theta must be a simplex, only the type vector is used.3\nUpper or lower bounds on values or constrained types are not allowed as return types or argument types in function declarations.\n\n\n\nArray arguments have their own syntax, which follows that used in this manual for function signatures. For example, a function that operates on a two-dimensional array to produce a one-dimensional array might be declared as follows.\narray[] real baz(array[,] real x);\nThe notation [ ] is used for one-dimensional arrays (as in the return above), [ , ] for two-dimensional arrays, [ , , ] for three-dimensional arrays, and so on.\nFunctions support arrays of any type, including matrix and vector types. As with other types, no constraints are allowed.\n\n\n\nA function argument which is a real-valued type or a container of a real-valued type, i.e., not an integer type or integer array type, can be qualified using the prefix qualifier data. The following is an example of a data-only function argument.\nreal foo(real y, data real mu) {\n return -0.5 * (y - mu)^2;\n}\nThis qualifier restricts this argument to being invoked with expressions which consist only of data variables, transformed data variables, literals, and function calls. A data-only function argument cannot involve real variables declared in the parameters, transformed parameters, or model block. Attempts to invoke a function using an expression which contains parameter, transformed parameters, or model block variables as a data-only argument will result in an error message from the parser.\nUse of the data qualifier must be consistent between the forward declaration and the definition of a functions.\nThis qualifier should be used when writing functions that call the built-in ordinary differential equation (ODE) solvers, algebraic solvers, or map functions. These higher-order functions have strictly specified signatures where some arguments of are data only expressions. (See the ODE solver chapter for more usage details and the functions reference manual for full definitions.) When writing a function which calls the ODE or algebraic solver, arguments to that function which are passed into the call to the solver, either directly or indirectly, should have the data prefix qualifier. This allows for compile-time type checking and increases overall program understandability.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#functions-as-statements", + "href": "stan-users-guide/user-functions.html#functions-as-statements", + "title": "User-Defined Functions", + "section": "", + "text": "In some cases, it makes sense to have functions that do not return a value. For example, a routine to print the lower-triangular portion of a matrix can be defined as follows.\nfunctions {\n void pretty_print_tri_lower(matrix x) {\n if (rows(x) == 0) {\n print(\"empty matrix\");\n return;\n }\n print(\"rows=\", rows(x), \" cols=\", cols(x));\n for (m in 1:rows(x)) {\n for (n in 1:m) {\n print(\"[\", m, \",\", n, \"]=\", x[m, n]);\n }\n }\n }\n}\nThe special symbol void is used as the return type. This is not a type itself in that there are no values of type void; it merely indicates the lack of a value. As such, return statements for void functions are not allowed to have arguments, as in the return statement in the body of the previous example.\nVoid functions applied to appropriately typed arguments may be used on their own as statements. For example, the pretty-print function defined above may be applied to a covariance matrix being defined in the transformed parameters block.\ntransformed parameters {\n cov_matrix[K] Sigma;\n // ... code to set Sigma ...\n pretty_print_tri_lower(Sigma);\n // ...\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#functions-accessing-the-log-probability-accumulator", + "href": "stan-users-guide/user-functions.html#functions-accessing-the-log-probability-accumulator", + "title": "User-Defined Functions", + "section": "", + "text": "Functions whose names end in _lp are allowed to use sampling statements and target += statements; other functions are not. Because of this access, their use is restricted to the transformed parameters and model blocks.\nHere is an example of a function to assign standard normal priors to a vector of coefficients, along with a center and scale, and return the translated and scaled coefficients; see the reparameterization section for more information on efficient non-centered parameterizations\nfunctions {\n vector center_lp(vector beta_raw, real mu, real sigma) {\n beta_raw ~ std_normal();\n sigma ~ cauchy(0, 5);\n mu ~ cauchy(0, 2.5);\n return sigma * beta_raw + mu;\n }\n // ...\n}\nparameters {\n vector[K] beta_raw;\n real mu_beta;\n real<lower=0> sigma_beta;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n // ...\n beta = center_lp(beta_raw, mu_beta, sigma_beta);\n // ...\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#functions-implementing-change-of-variable-adjustments", + "href": "stan-users-guide/user-functions.html#functions-implementing-change-of-variable-adjustments", + "title": "User-Defined Functions", + "section": "", + "text": "Functions whose names end in _jacobian can use the jacobian += statement. This can be used to implement a custom change of variables for arbitrary parameters.\nFor example, this function recreates the built-in <upper=x> transform on real numbers:\nreal my_upper_bound_jacobian(real x, real ub) {\n jacobian += x;\n return ub - exp(x);\n}\nIt can be used as a replacement for real<lower=ub> as follows:\nfunctions {\n // my_upper_bound_jacobian as above\n}\ndata {\n real ub;\n}\nparameters {\n real b_raw;\n}\ntransformed parameters {\n real b = my_upper_bound_jacobian(b_raw, ub);\n}\nmodel {\n b ~ lognormal(0, 1);\n // ...\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#functions-acting-as-random-number-generators", + "href": "stan-users-guide/user-functions.html#functions-acting-as-random-number-generators", + "title": "User-Defined Functions", + "section": "", + "text": "A user-specified function can be declared to act as a (pseudo) random number generator (PRNG) by giving it a name that ends in _rng. Giving a function a name that ends in _rng allows it to access built-in functions and user-defined functions that end in _rng, which includes all the built-in PRNG functions. Only functions ending in _rng are able access the built-in PRNG functions. The use of functions ending in _rng must therefore be restricted to transformed data and generated quantities blocks like other PRNG functions; they may also be used in the bodies of other user-defined functions ending in _rng.\nFor example, the following function generates an \\(N \\times K\\) data matrix, the first column of which is filled with 1 values for the intercept and the remaining entries of which have values drawn from a standard normal PRNG.\nmatrix predictors_rng(int N, int K) {\n matrix[N, K] x;\n for (n in 1:N) {\n x[n, 1] = 1.0; // intercept\n for (k in 2:K) {\n x[n, k] = normal_rng(0, 1);\n }\n }\n return x;\n}\nThe following function defines a simulator for regression outcomes based on a data matrix x, coefficients beta, and noise scale sigma.\nvector regression_rng(vector beta, matrix x, real sigma) {\n vector[rows(x)] y;\n vector[rows(x)] mu;\n mu = x * beta;\n for (n in 1:rows(x)) {\n y[n] = normal_rng(mu[n], sigma);\n }\n return y;\n}\nThese might be used in a generated quantity block to simulate some fake data from a fitted regression model as follows.\nparameters {\n vector[K] beta;\n real<lower=0> sigma;\n // ...\n}\ngenerated quantities {\n matrix[N_sim, K] x_sim;\n vector[N_sim] y_sim;\n x_sim = predictors_rng(N_sim, K);\n y_sim = regression_rng(beta, x_sim, sigma);\n}\nA more sophisticated simulation might fit a multivariate normal to the predictors x and use the resulting parameters to generate multivariate normal draws for x_sim.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#user-defined-probability-functions", + "href": "stan-users-guide/user-functions.html#user-defined-probability-functions", + "title": "User-Defined Functions", + "section": "", + "text": "Probability functions are distinguished in Stan by names ending in _lpdf for density functions and _lpmf for mass functions; in both cases, they must have real return types.\nSuppose a model uses several standard normal distributions, for which there is not a specific overloaded density nor defaults in Stan. So rather than writing out the location of 0 and scale of 1 for all of them, a new density function may be defined and reused.\nfunctions {\n real unit_normal_lpdf(real y) {\n return normal_lpdf(y | 0, 1);\n }\n}\n// ...\nmodel {\n alpha ~ unit_normal();\n beta ~ unit_normal();\n // ...\n}\nThe ability to use the unit_normal function as a density is keyed off its name ending in _lpdf (names ending in _lpmf for probability mass functions work the same way).\nIn general, if foo_lpdf is defined to consume \\(N + 1\\) arguments, then\ny ~ foo(theta1, ..., thetaN);\ncan be used as shorthand for\ntarget += foo_lpdf(y | theta1, ..., thetaN);\nAs with the built-in functions, the suffix _lpdf is dropped and the first argument moves to the left of the tilde symbol (~) in the distribution statement.\nFunctions ending in _lpmf (for probability mass functions), behave exactly the same way. The difference is that the first argument of a density function (_lpdf) must be continuous (not an integer or integer array), whereas the first argument of a mass function (_lpmf) must be discrete (integer or integer array).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#overloading-functions", + "href": "stan-users-guide/user-functions.html#overloading-functions", + "title": "User-Defined Functions", + "section": "", + "text": "As described in the reference manual function overloading is permitted in Stan, beginning in version 2.29.\nThis means multiple functions can be defined with the same name as long as they accept different numbers or types of arguments. User-defined functions can also overload Stan library functions.\n\n\nOverloading is a powerful productivity tool in programming languages, but it can also lead to confusion. In particular, it can be unclear at first glance which version of a function is being called at any particular call site, especially with type promotion allowed between scalar types. Because of this, it is a programming best practice that overloaded functions maintain the same meaning across definitions.\nFor example, consider a function triple which has the following three signatures\nreal triple(real x);\ncomplex triple(complex x);\narray[] real triple(array[] real);\nOne should expect that all overloads of this function perform the same basic task. This should lead to definitions of these functions which would satisfy the following assumptions that someone reading the program would expect\n// The function does what it says\ntriple(3.0) == 9.0\n// It is defined reasonably for different types\ntriple(to_complex(3.0)) == to_complex(triple(3.0))\n// A container version of this function works by element\ntriple({3.0, 4.0})[0] == triple({3.0, 4.0}[0])\nNote that none of these properties are enforced by Stan, they are mentioned merely to warn against uses of overloading which cause confusion.\n\n\n\nStan resolves overloaded functions by the number and type of arguments passed to the function. This can be subtle when multiple signatures with the same number of arguments are present.\nConsider the following function signatures\nreal foo(int a, real b);\nreal foo(real a, real b);\nGiven these, the function call foo(1.5, 2.5) is unambiguous - it must resolve to the second signature. But, the function call foo(1, 1.5) could be valid for either under Stan’s promotion rules, which allow integers to be promoted to real numbers.\nTo resolve this, Stan selects the signature which requires the fewest number of promotions for a given function call. In the above case, this means the call foo(1, 1.5) would select the first signature, because it requires 0 promotions (the second signature would require 1 promotion).\nFurthermore, there must be only one such signature, e.g., the minimum number of promotions must be a unique minimum. This requirement forbids certain kinds of overloading. For example, consider the function signatures\nreal bar(int x, real y);\nreal bar(real x, int y);\nThese signatures do not have a unique minimum number of promotions for the call bar(1, 2). Both signatures require one int to real promotion, and so it cannot be determined which is correct. Stan will produce a compilation error in this case.\nPromotion from integers to complex numbers is considered to be two separate promotions, first from int to real, then from real to complex. This means that integer arguments will “prefer” a signature with real types over complex types.\nFor example, consider the function signatures\nreal pop(real x);\nreal pop(complex x);\nStan will select the first signature when pop is called with an integer argument such as pop(0).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#documenting-functions.section", + "href": "stan-users-guide/user-functions.html#documenting-functions.section", + "title": "User-Defined Functions", + "section": "", + "text": "Functions will ideally be documented at their interface level. The Stan style guide for function documentation follows the same format as used by the Doxygen (C++) and Javadoc (Java) automatic documentation systems. Such specifications indicate the variables and their types and the return value, prefaced with some descriptive text.\nFor example, here’s some documentation for the prediction matrix generator.\n/**\n * Return a data matrix of specified size with rows\n * corresponding to items and the first column filled\n * with the value 1 to represent the intercept and the\n * remaining columns randomly filled with unit-normal draws.\n *\n * @param N Number of rows corresponding to data items\n * @param K Number of predictors, counting the intercept, per\n * item.\n * @return Simulated predictor matrix.\n */\nmatrix predictors_rng(int N, int K) {\n // ...\nThe comment begins with /**, ends with */, and has an asterisk (*) on each line. It uses @param followed by the argument’s identifier to document a function argument. The tag @return is used to indicate the return value. Stan does not (yet) have an automatic documentation generator like Javadoc or Doxygen, so this just looks like a big comment starting with /* and ending with */ to the Stan parser.\nFor functions that raise exceptions, exceptions can be documented using @throws.4\nFor example,\n /** ...\n * @param theta\n * @throws If any of the entries of theta is negative.\n */\nreal entropy(vector theta) {\n // ...\n}\nUsually an exception type would be provided, but these are not exposed as part of the Stan language, so there is no need to document them.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#summary-of-function-types", + "href": "stan-users-guide/user-functions.html#summary-of-function-types", + "title": "User-Defined Functions", + "section": "", + "text": "Functions may have a void or non-void return type and they may or may not have one of the special suffixes, _lpdf, _lpmf, _lp, or _rng.\n\n\nOnly functions declared to return void may be used as statements. These are also the only functions that use return statements with no arguments.\nOnly functions declared to return non-void values may be used as expressions. These functions require return statements with arguments of a type that matches the declared return type.\n\n\n\nOnly functions ending in _lpmf or _lpdf and with return type real may be used as probability functions in distribution statements.\nOnly functions ending in _lp may access the log probability accumulator through distribution statements or target += statements. Such functions may only be used in the transformed parameters or model blocks.\nOnly functions ending in _rng may access the built-in pseudo-random number generators. Such functions may only be used in the generated quantities block or transformed data block, or in the bodies of other user-defined functions ending in _rng.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#recursive-functions", + "href": "stan-users-guide/user-functions.html#recursive-functions", + "title": "User-Defined Functions", + "section": "", + "text": "Stan supports recursive function definitions, which can be useful for some applications. For instance, consider the matrix power operation, \\(A^n\\), which is defined for a square matrix \\(A\\) and positive integer \\(n\\) by \\[\nA^n\n=\n\\begin{cases}\n\\textrm{I} & \\quad\\text{if } n = 0, \\text{ and} \\\\\nA \\, A^{n-1} & \\quad\\text{if } n > 0.\n\\end{cases}\n\\]\nwhere \\(\\textrm{I}\\) is the identity matrix. This definition can be directly translated to a recursive function definition.\nmatrix matrix_pow(matrix a, int n) {\n if (n == 0) {\n return diag_matrix(rep_vector(1, rows(a)));\n } else {\n return a * matrix_pow(a, n - 1);\n }\n}\nIt would be more efficient to not allow the recursion to go all the way to the base case, adding the following conditional clause.\nelse if (n == 1) {\n return a;\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#truncated-random-number-generation", + "href": "stan-users-guide/user-functions.html#truncated-random-number-generation", + "title": "User-Defined Functions", + "section": "", + "text": "To generate random numbers, it is often sufficient to invert their cumulative distribution functions. This is built into many of the random number generators. For example, to generate a standard logistic variate, first generate a uniform variate \\(u \\sim \\textsf{uniform}(0, 1)\\), then run through the inverse cumulative distribution function, \\(y = \\textrm{logit}(u)\\). If this were not already built in as logistic_rng(0, 1), it could be coded in Stan directly as\nreal standard_logistic_rng() {\n real u = uniform_rng(0, 1);\n real y = logit(u);\n return y;\n}\nFollowing the same pattern, a standard normal RNG could be coded as\nreal standard_normal_rng() {\n real u = uniform_rng(0, 1);\n real y = inv_Phi(u);\n return y;\n}\nthat is, \\(y = \\Phi^{-1}(u)\\), where \\(\\Phi^{-1}\\) is the inverse cumulative distribution function for the standard normal distribution, implemented in the Stan function inv_Phi.\nIn order to generate non-standard variates of the location-scale variety, the variate is scaled by the scale parameter and shifted by the location parameter. For example, to generate \\(\\textsf{normal}(\\mu, \\sigma)\\) variates, it is enough to generate a uniform variate \\(u \\sim \\textsf{uniform}(0, 1)\\), then convert it to a standard normal variate, \\(z = \\Phi^{-1}(u)\\), where \\(\\Phi^{-1}(\\cdot)\\) is the inverse cumulative distribution function for the standard normal, and then, finally, scale and translate it, \\(y = \\mu +\n\\sigma \\times z\\). In code,\nreal my_normal_rng(real mu, real sigma) {\n real u = uniform_rng(0, 1);\n real z = inv_Phi(u);\n real y = mu + sigma * z;\n return y;\n}\nA robust version of this function would test that the arguments are finite and that sigma is non-negative, e.g.,\n if (is_nan(mu) || is_inf(mu)) {\n reject(\"my_normal_rng: mu must be finite; \",\n \"found mu = \", mu);\n }\n if (is_nan(sigma) || is_inf(sigma) || sigma < 0) {\n reject(\"my_normal_rng: sigma must be finite and non-negative; \",\n \"found sigma = \", sigma);\n }\n\n\n\nOften truncated uniform variates are needed, as in survival analysis when a time of death is censored beyond the end of the observations. To generate a truncated random variate, the cumulative distribution is used to find the truncation point in the inverse CDF, a uniform variate is generated in range, and then the inverse CDF translates it back.\n\n\nFor example, the following code generates a \\(\\textsf{Weibull}(\\alpha, \\sigma)\\) variate truncated below at a time \\(t\\),5\nreal weibull_lb_rng(real alpha, real sigma, real t) {\n real p = weibull_cdf(t | alpha, sigma); // cdf for lb\n real u = uniform_rng(p, 1); // unif in bounds\n real y = sigma * (-log1m(u))^inv(alpha); // inverse cdf\n return y;\n}\n\n\n\nIf there is a lower bound and upper bound, then the CDF trick is used twice to find a lower and upper bound. For example, to generate a \\(\\textsf{normal}(\\mu, \\sigma)\\) truncated to a region \\((a, b)\\), the following code suffices,\nreal normal_lub_rng(real mu, real sigma, real lb, real ub) {\n real p_lb = normal_cdf(lb | mu, sigma);\n real p_ub = normal_cdf(ub | mu, sigma);\n real u = uniform_rng(p_lb, p_ub);\n real y = mu + sigma * inv_Phi(u);\n return y;\n}\nTo make this more robust, all variables should be tested for finiteness, sigma should be tested for positiveness, and lb and ub should be tested to ensure the upper bound is greater than the lower bound. While it may be tempting to compress lines, the variable names serve as a kind of chunking of operations and naming for readability; compare the multiple statement version above with the single statement\n return mu + sigma * inv_Phi(uniform_rng(normal_cdf(lb | mu, sigma),\n normal_cdf(ub | mu, sigma)));\nfor readability. The names like p indicate probabilities, and p_lb and p_ub indicate the probabilities of the bounds. The variable u is clearly named as a uniform variate, and y is used to denote the variate being generated itself.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/user-functions.html#footnotes", + "href": "stan-users-guide/user-functions.html#footnotes", + "title": "User-Defined Functions", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe main problem with comments is that they can be misleading, either due to misunderstandings on the programmer’s part or because the program’s behavior is modified after the comment is written. The program always behaves the way the code is written, which is why refactoring complex code into understandable units is preferable to simply adding comments.↩︎\nJust because this makes it possible to code a rejection sampler does not make it a good idea. Rejections break differentiability and the smooth exploration of the posterior. In Hamiltonian Monte Carlo, it can cause the sampler to be reduced to a diffusive random walk.↩︎\nA range of built-in validation routines is coming to Stan soon! Alternatively, the reject statement can be used to check constraints on the simplex.↩︎\nAs of Stan 2.9.0, the only way a user-defined producer will raise an exception is if a function it calls (including distribution statements) raises an exception via the reject statement.↩︎\nThe original code and impetus for including this in the manual came from the Stan forums post; by user lcomm, who also explained truncation above and below.↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "User-Defined Functions" + ] + }, + { + "objectID": "stan-users-guide/time-series.html", + "href": "stan-users-guide/time-series.html", + "title": "Time-Series Models", + "section": "", + "text": "Times series data come arranged in temporal order. This chapter presents two kinds of time series models, regression-like models such as autoregressive and moving average models, and hidden Markov models.\nThe Gaussian processes chapter presents Gaussian processes, which may also be used for time-series (and spatial) data.\n\n\nA first-order autoregressive model (AR(1)) with normal noise takes each point \\(y_n\\) in a sequence \\(y\\) to be generated according to \\[\ny_n \\sim \\textsf{normal}(\\alpha + \\beta y_{n-1}, \\sigma).\n\\]\nThat is, the expected value of \\(y_n\\) is \\(\\alpha + \\beta y_{n-1}\\), with noise scaled as \\(\\sigma\\).\n\n\nWith improper flat priors on the regression coefficients \\(\\alpha\\) and \\(\\beta\\) and on the positively-constrained noise scale (\\(\\sigma\\)), the Stan program for the AR(1) model is as follows.1\ndata {\n int<lower=0> N;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n for (n in 2:N) {\n y[n] ~ normal(alpha + beta * y[n-1], sigma);\n }\n}\nThe first observed data point, y[1], is not modeled here because there is nothing to condition on; instead, it acts to condition y[2]. This model also uses an improper prior for sigma, but there is no obstacle to adding an informative prior if information is available on the scale of the changes in y over time, or a weakly informative prior to help guide inference if rough knowledge of the scale of y is available.\n\n\nAlthough perhaps a bit more difficult to read, a much more efficient way to write the above model is by slicing the vectors, with the model above being replaced with the one-liner\nmodel {\n y[2:N] ~ normal(alpha + beta * y[1:(N - 1)], sigma);\n}\nThe left-hand side slicing operation pulls out the last \\(N-1\\) elements and the right-hand side version pulls out the first \\(N-1\\).\n\n\n\n\nProper priors of a range of different families may be added for the regression coefficients and noise scale. The normal noise model can be changed to a Student-\\(t\\) distribution or any other distribution with unbounded support. The model could also be made hierarchical if multiple series of observations are available.\nTo enforce the estimation of a stationary AR(1) process, the slope coefficient beta may be constrained with bounds as follows.\nreal<lower=-1, upper=1> beta;\nIn practice, such a constraint is not recommended. If the data are not well fit by a stationary model it is best to know this. Stationary parameter estimates can be encouraged with a prior favoring values of beta near zero.\n\n\n\nExtending the order of the model is also straightforward. For example, an AR(2) model could be coded with the second-order coefficient gamma and the following model statement.\nfor (n in 3:N) {\n y[n] ~ normal(alpha + beta*y[n-1] + gamma*y[n-2], sigma);\n}\n\n\n\nA general model where the order is itself given as data can be coded by putting the coefficients in an array and computing the linear predictor in a loop.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] real y;\n}\nparameters {\n real alpha;\n array[K] real beta;\n real sigma;\n}\nmodel {\n for (n in (K+1):N) {\n real mu = alpha;\n for (k in 1:K) {\n mu += beta[k] * y[n-k];\n }\n y[n] ~ normal(mu, sigma);\n }\n}\n\n\n\nEconometric and financial time-series models usually assume heteroscedasticity: they allow the scale of the noise terms defining the series to vary over time. The simplest such model is the autoregressive conditional heteroscedasticity (ARCH) model (Engle 1982). Unlike the autoregressive model AR(1), which modeled the mean of the series as varying over time but left the noise term fixed, the ARCH(1) model takes the scale of the noise terms to vary over time but leaves the mean term fixed. Models could be defined where both the mean and scale vary over time; the econometrics literature presents a wide range of time-series modeling choices.\nThe ARCH(1) model is typically presented as the following sequence of equations, where \\(r_t\\) is the observed return at time point \\(t\\) and \\(\\mu\\), \\(\\alpha_0\\), and \\(\\alpha_1\\) are unknown regression coefficient parameters.\n\\[\\begin{align*}\nr_t &= \\mu + a_t \\\\\na_t &= \\sigma_t \\epsilon_t \\\\\n\\epsilon_t &\\sim \\textsf{normal}(0,1) \\\\\n\\sigma^2_t &= \\alpha_0 + \\alpha_1 a_{t-1}^2\n\\end{align*}\\]\nIn order to ensure the noise terms \\(\\sigma^2_t\\) are positive, the scale coefficients are constrained to be positive, \\(\\alpha_0, \\alpha_1\n> 0\\). To ensure stationarity of the time series, the slope is constrained to be less than one, i.e., \\(\\alpha_1 < 1\\).2\nThe ARCH(1) model may be coded directly in Stan as follows.\ndata {\n int<lower=0> T; // number of time points\n array[T] real r; // return at time t\n}\nparameters {\n real mu; // average return\n real<lower=0> alpha0; // noise intercept\n real<lower=0, upper=1> alpha1; // noise slope\n}\nmodel {\n for (t in 2:T) {\n r[t] ~ normal(mu, sqrt(alpha0 + alpha1\n * pow(r[t - 1] - mu,2)));\n }\n}\nThe loop in the model is defined so that the return at time \\(t=1\\) is not modeled; the model in the next section shows how to model the return at \\(t=1\\). The model can be vectorized to be more efficient; the model in the next section provides an example.\n\n\n\n\nA set of variables is homoscedastic if their variances are all the same; the variables are heteroscedastic if they do not all have the same variance. Heteroscedastic time-series models allow the noise term to vary over time.\n\n\nThe basic generalized autoregressive conditional heteroscedasticity (GARCH) model, GARCH(1,1), extends the ARCH(1) model by including the squared previous difference in return from the mean at time \\(t-1\\) as a predictor of volatility at time \\(t\\), defining \\[\n\\sigma^2_t = \\alpha_0 + \\alpha_1 a^2_{t-1} + \\beta_1 \\sigma^2_{t-1}.\n\\]\nTo ensure the scale term is positive and the resulting time series stationary, the coefficients must all satisfy \\(\\alpha_0, \\alpha_1,\n\\beta_1 > 0\\) and the slopes \\(\\alpha_1 + \\beta_1 < 1\\).\ndata {\n int<lower=0> T;\n array[T] real r;\n real<lower=0> sigma1;\n}\nparameters {\n real mu;\n real<lower=0> alpha0;\n real<lower=0, upper=1> alpha1;\n real<lower=0, upper=(1-alpha1)> beta1;\n}\ntransformed parameters {\n array[T] real<lower=0> sigma;\n sigma[1] = sigma1;\n for (t in 2:T) {\n sigma[t] = sqrt(alpha0\n + alpha1 * pow(r[t - 1] - mu, 2)\n + beta1 * pow(sigma[t - 1], 2));\n }\n}\nmodel {\n r ~ normal(mu, sigma);\n}\nTo get the recursive definition of the volatility regression off the ground, the data declaration includes a non-negative value sigma1 for the scale of the noise at \\(t = 1\\).\nThe constraints are coded directly on the parameter declarations. This declaration is order-specific in that the constraint on beta1 depends on the value of alpha1.\nA transformed parameter array of non-negative values sigma is used to store the scale values at each time point. The definition of these values in the transformed parameters block is where the regression is now defined. There is an intercept alpha0, a slope alpha1 for the squared difference in return from the mean at the previous time, and a slope beta1 for the previous noise scale squared. Finally, the whole regression is inside the sqrt function because Stan requires scale (deviation) parameters (not variance parameters) for the normal distribution.\nWith the regression in the transformed parameters block, the model reduces a single vectorized distribution statement. Because r and sigma are of length T, all of the data are modeled directly.\n\n\n\n\nA moving average model uses previous errors as predictors for future outcomes. For a moving average model of order \\(Q\\), \\(\\mbox{MA}(Q)\\), there is an overall mean parameter \\(\\mu\\) and regression coefficients \\(\\theta_q\\) for previous error terms. With \\(\\epsilon_t\\) being the noise at time \\(t\\), the model for outcome \\(y_t\\) is defined by \\[\ny_t = \\mu + \\theta_1 \\epsilon_{t-1} + \\dotsb + \\theta_Q \\epsilon_{t-Q}\n+ \\epsilon_t,\n\\] with the noise term \\(\\epsilon_t\\) for outcome \\(y_t\\) modeled as normal, \\[\n\\epsilon_t \\sim \\textsf{normal}(0,\\sigma).\n\\] In a proper Bayesian model, the parameters \\(\\mu\\), \\(\\theta\\), and \\(\\sigma\\) must all be given priors.\n\n\nAn \\(\\mbox{MA}(2)\\) model can be coded in Stan as follows.\ndata {\n int<lower=3> T; // number of observations\n vector[T] y; // observation at time T\n}\nparameters {\n real mu; // mean\n real<lower=0> sigma; // error scale\n vector[2] theta; // lag coefficients\n}\ntransformed parameters {\n vector[T] epsilon; // error terms\n epsilon[1] = y[1] - mu;\n epsilon[2] = y[2] - mu - theta[1] * epsilon[1];\n for (t in 3:T) {\n epsilon[t] = ( y[t] - mu\n - theta[1] * epsilon[t - 1]\n - theta[2] * epsilon[t - 2] );\n }\n}\nmodel {\n mu ~ cauchy(0, 2.5);\n theta ~ cauchy(0, 2.5);\n sigma ~ cauchy(0, 2.5);\n for (t in 3:T) {\n y[t] ~ normal(mu\n + theta[1] * epsilon[t - 1]\n + theta[2] * epsilon[t - 2],\n sigma);\n }\n}\nThe error terms \\(\\epsilon_t\\) are defined as transformed parameters in terms of the observations and parameters. The definition of the distribution statement (which also defines the likelihood) follows the definition, which can only be applied to \\(y_n\\) for \\(n > Q\\). In this example, the parameters are all given Cauchy (half-Cauchy for \\(\\sigma\\)) priors, although other priors can be used just as easily.\nThis model could be improved in terms of speed by vectorizing the distribution statement in the model block. Vectorizing the calculation of the \\(\\epsilon_t\\) could also be sped up by using a dot product instead of a loop.\n\n\n\nA general \\(\\mbox{MA}(Q)\\) model with a vectorized distribution statement may be defined as follows.\ndata {\n int<lower=0> Q; // num previous noise terms\n int<lower=3> T; // num observations\n vector[T] y; // observation at time t\n}\nparameters {\n real mu; // mean\n real<lower=0> sigma; // error scale\n vector[Q] theta; // error coeff, lag -t\n}\ntransformed parameters {\n vector[T] epsilon; // error term at time t\n for (t in 1:T) {\n epsilon[t] = y[t] - mu;\n for (q in 1:min(t - 1, Q)) {\n epsilon[t] = epsilon[t] - theta[q] * epsilon[t - q];\n }\n }\n}\nmodel {\n vector[T] eta;\n mu ~ cauchy(0, 2.5);\n theta ~ cauchy(0, 2.5);\n sigma ~ cauchy(0, 2.5);\n for (t in 1:T) {\n eta[t] = mu;\n for (q in 1:min(t - 1, Q)) {\n eta[t] = eta[t] + theta[q] * epsilon[t - q];\n }\n }\n y ~ normal(eta, sigma);\n}\nHere all of the data are modeled, with missing terms just dropped from the regressions as in the calculation of the error terms. Both models converge quickly and mix well at convergence, with the vectorized model being faster (per iteration, not to converge—they compute the same model).\n\n\n\n\nAutoregressive moving-average models (ARMA), combine the predictors of the autoregressive model and the moving average model. An ARMA(1,1) model, with a single state of history, can be encoded in Stan as follows.\ndata {\n int<lower=1> T; // num observations\n array[T] real y; // observed outputs\n}\nparameters {\n real mu; // mean coeff\n real phi; // autoregression coeff\n real theta; // moving avg coeff\n real<lower=0> sigma; // noise scale\n}\nmodel {\n vector[T] nu; // prediction for time t\n vector[T] err; // error for time t\n nu[1] = mu + phi * mu; // assume err[0] == 0\n err[1] = y[1] - nu[1];\n for (t in 2:T) {\n nu[t] = mu + phi * y[t - 1] + theta * err[t - 1];\n err[t] = y[t] - nu[t];\n }\n mu ~ normal(0, 10); // priors\n phi ~ normal(0, 2);\n theta ~ normal(0, 2);\n sigma ~ cauchy(0, 5);\n err ~ normal(0, sigma); // error model\n}\nThe data are declared in the same way as the other time-series regressions and the parameters are documented in the code.\nIn the model block, the local vector nu stores the predictions and err the errors. These are computed similarly to the errors in the moving average models described in the previous section.\nThe priors are weakly informative for stationary processes. The data model only involves the error term, which is efficiently vectorized here.\nOften in models such as these, it is desirable to inspect the calculated error terms. This could easily be accomplished in Stan by declaring err as a transformed parameter, then defining it the same way as in the model above. The vector nu could still be a local variable, only now it will be in the transformed parameter block.\nWayne Folta suggested encoding the model without local vector variables as follows.\nmodel {\n real err;\n mu ~ normal(0, 10);\n phi ~ normal(0, 2);\n theta ~ normal(0, 2);\n sigma ~ cauchy(0, 5);\n err = y[1] - (mu + phi * mu);\n err ~ normal(0, sigma);\n for (t in 2:T) {\n err = y[t] - (mu + phi * y[t - 1] + theta * err);\n err ~ normal(0, sigma);\n }\n}\nThis approach to ARMA models illustrates how local variables, such as err in this case, can be reused in Stan. Folta’s approach could be extended to higher order moving-average models by storing more than one error term as a local variable and reassigning them in the loop.\nBoth encodings are fast. The original encoding has the advantage of vectorizing the normal distribution, but it uses a bit more memory. A halfway point would be to vectorize just err.\n\n\nMA and ARMA models are not identifiable if the roots of the characteristic polynomial for the MA part lie inside the unit circle, so it’s necessary to add the following constraint3\nreal<lower=-1, upper=1> theta;\nWhen the model is run without the constraint, using synthetic data generated from the model, the simulation can sometimes find modes for (theta, phi) outside the \\([-1,1]\\) interval, which creates a multiple mode problem in the posterior and also causes the NUTS tree depth to get large (often above 10). Adding the constraint both improves the accuracy of the posterior and dramatically reduces the tree depth, which speeds up the simulation considerably (typically by much more than an order of magnitude).\nFurther, unless one thinks that the process is really non-stationary, it’s worth adding the following constraint to ensure stationarity.\nreal<lower=-1, upper=1> phi;\n\n\n\n\nStochastic volatility models treat the volatility (i.e., variance) of a return on an asset, such as an option to buy a security, as following a latent stochastic process in discrete time (Kim, Shephard, and Chib 1998). The data consist of mean corrected (i.e., centered) returns \\(y_t\\) on an underlying asset at \\(T\\) equally spaced time points. Kim et al. formulate a typical stochastic volatility model using the following regression-like equations, with a latent parameter \\(h_t\\) for the log volatility, along with parameters \\(\\mu\\) for the mean log volatility, and \\(\\phi\\) for the persistence of the volatility term. The variable \\(\\epsilon_t\\) represents the white-noise shock (i.e., multiplicative error) on the asset return at time \\(t\\), whereas \\(\\delta_t\\) represents the shock on volatility at time \\(t\\). \\[\\begin{align*}\ny_t &= \\epsilon_t \\exp(h_t / 2) \\\\\nh_{t+1} &= \\mu + \\phi (h_t - \\mu) + \\delta_t \\sigma \\\\\nh_1 &\\sim \\textsf{normal}\\left( \\mu, \\frac{\\sigma}{\\sqrt{1 - \\phi^2}} \\right) \\\\\n\\epsilon_t &\\sim \\textsf{normal}(0,1) \\\\\n\\delta_t &\\sim \\textsf{normal}(0,1)\n\\end{align*}\\]\nRearranging the first line, \\(\\epsilon_t = y_t \\exp(-h_t / 2)\\), allowing the distribution for \\(y_t\\) to be written as \\[\ny_t \\sim \\textsf{normal}(0,\\exp(h_t/2)).\n\\] The recurrence equation for \\(h_{t+1}\\) may be combined with the scaling of \\(\\delta_t\\) to yield the distribution \\[\nh_t \\sim \\mathsf{normal}(\\mu + \\phi(h_{t-1} - \\mu), \\sigma).\n\\] This formulation can be directly encoded, as shown in the following Stan model.\ndata {\n int<lower=0> T; // # time points (equally spaced)\n vector[T] y; // mean corrected return at time t\n}\nparameters {\n real mu; // mean log volatility\n real<lower=-1, upper=1> phi; // persistence of volatility\n real<lower=0> sigma; // white noise shock scale\n vector[T] h; // log volatility at time t\n}\nmodel {\n phi ~ uniform(-1, 1);\n sigma ~ cauchy(0, 5);\n mu ~ cauchy(0, 10);\n h[1] ~ normal(mu, sigma / sqrt(1 - phi * phi));\n for (t in 2:T) {\n h[t] ~ normal(mu + phi * (h[t - 1] - mu), sigma);\n }\n for (t in 1:T) {\n y[t] ~ normal(0, exp(h[t] / 2));\n }\n}\nCompared to the Kim et al. formulation, the Stan model adds priors for the parameters \\(\\phi\\), \\(\\sigma\\), and \\(\\mu\\). The shock terms \\(\\epsilon_t\\) and \\(\\delta_t\\) do not appear explicitly in the model, although they could be calculated efficiently in a generated quantities block.\nThe posterior of a stochastic volatility model such as this one typically has high posterior variance. For example, simulating 500 data points from the above model with \\(\\mu = -1.02\\), \\(\\phi = 0.95\\), and \\(\\sigma = 0.25\\) leads to 95% posterior intervals for \\(\\mu\\) of \\((-1.23, -0.54)\\), for \\(\\phi\\) of \\((0.82, 0.98)\\), and for \\(\\sigma\\) of \\((0.16, 0.38)\\).\nThe NUTS draws show a high degree of autocorrelation, both for this model and the stochastic volatility model evaluated in (Hoffman and Gelman 2014). Using a non-diagonal mass matrix provides faster convergence and higher effective sample size than a diagonal mass matrix, but will not scale to large values of \\(T\\).\nIt is relatively straightforward to speed up the effective sample size per second generated by this model by one or more orders of magnitude. First, the distribution statements for return \\(y\\) is easily vectorized to\ny ~ normal(0, exp(h / 2));\nThis speeds up the iterations, but does not change the effective sample size because the underlying parameterization and log probability function have not changed. Mixing is improved by reparameterizing in terms of a standardized volatility, then rescaling. This requires a standardized parameter h_std to be declared instead of h.\nparameters {\n // ...\n vector[T] h_std; // std log volatility time t\n}\nThe original value of h is then defined in a transformed parameter block.\ntransformed parameters {\n vector[T] h = h_std * sigma; // now h ~ normal(0, sigma)\n h[1] /= sqrt(1 - phi * phi); // rescale h[1]\n h += mu;\n for (t in 2:T) {\n h[t] += phi * (h[t - 1] - mu);\n }\n}\nThe first assignment rescales h_std to have a \\(\\textsf{normal}(0,\\sigma)\\) distribution and temporarily assigns it to h. The second assignment rescales h[1] so that its prior differs from that of h[2] through h[T]. The next assignment supplies a mu offset, so that h[2] through h[T] are now distributed \\(\\textsf{normal}(\\mu,\\sigma)\\); note that this shift must be done after the rescaling of h[1]. The final loop adds in the moving average so that h[2] through h[T] are appropriately modeled relative to phi and mu.\nAs a final improvement, the distribution statements for h[1] to h[T] are replaced with a single vectorized standard normal distribution statement.\nmodel {\n // ...\n h_std ~ std_normal();\n}\nAlthough the original model can take hundreds and sometimes thousands of iterations to converge, the reparameterized model reliably converges in tens of iterations. Mixing is also dramatically improved, which results in higher effective sample sizes per iteration. Finally, each iteration runs in roughly a quarter of the time of the original iterations.\n\n\n\nA Hidden Markov model is a probabilistic model over \\(N\\) observations \\(y_{1:N}\\) and \\(N\\) hidden states \\(z_{1:N}\\). This models is defined by the conditional distributions \\(p(y_n \\mid z_n, \\phi)\\) and \\(p(z_n \\mid z_{n-1}, \\phi)\\). Here we make the dependency on additional model parameters \\(\\phi\\) explicit. (\\(\\phi\\) may be a vector of parameters.) The complete data likelihood is then \\[\np(y, z \\mid \\phi) = \\prod_n p(y_n \\mid z_n, \\phi) p(z_n \\mid z_{n - 1}, \\phi)\n\\] When \\(z_{1:N}\\) is continuous, the user can explicitly encode these distributions in Stan and use Markov chain Monte Carlo to integrate \\(z\\) out.\nWhen each state \\(z\\) takes a value over a discrete and finite set, say \\(\\{1, 2, ..., K\\}\\), we can use Stan’s suite of HMM functions to marginalize out \\(z_{1:N}\\) and compute \\[\np(y_{1:N} \\mid \\phi) = \\int_{\\mathcal Z} p(y, z \\mid \\phi) \\text d z.\n\\] We start by defining the conditional observation distribution, stored in a \\(K \\times N\\) matrix \\(\\omega\\) with \\[\n\\omega_{kn} = p(y_n \\mid z_n = k, \\phi).\n\\] Next, we introduce the \\(K \\times K\\) transition matrix, \\(\\Gamma\\), with \\[\n\\Gamma_{ij} = p(z_n = j \\mid z_{n - 1} = i, \\phi).\n\\] (This is a right-stochastic matrix.) Finally, we define the initial state \\(K\\)-vector \\(\\rho\\), with \\[\n\\rho_k = p(z_0 = k \\mid \\phi).\n\\] It is common practice to set \\(\\rho\\) to be the stationary distribution of the HMM, that is \\(\\rho\\) is the first eigenvector of \\(\\Gamma\\) and solves \\(\\Gamma \\rho = \\rho\\).\nAs an example, consider a three-state model with \\(K=3\\). The observations are normally distributed conditional on the HMM states with \\[\n y_n \\sim \\text{normal}(\\mu_k, \\sigma),\n\\] where \\(\\mu = (1, 5, 9)\\) and the standard deviation \\(\\sigma\\) is the same across all observations. The model is then\ndata {\n int N; // Number of observations\n array[N] real y;\n}\n\nparameters {\n // Rows of the transition matrix\n array[3] simplex[3] gamma_arr;\n\n // Initial state\n simplex[3] rho;\n\n // Parameters of measurement model\n vector[3] mu;\n real<lower = 0.0> sigma;\n}\n\ntransformed parameters {\n // Build transition matrix\n matrix[3, 3] gamma;\n for (k in 1:3) gamma[k, ] = to_row_vector(gamma_arr[k]);\n\n // Compute the log likelihoods in each possible state\n matrix[3, N] log_omega;\n for (n in 1:N) {\n for (i in 1:3) {\n log_omega[i, n] = normal_lpdf(y[n] | mu[i], sigma);\n }\n }\n}\n\nmodel {\n // prior\n mu ~ normal(0, 1);\n sigma ~ normal(0, 1);\n \n // no explicit prior on gamma_arr, meaning we default to a\n // uniform prior over the simplexes.\n\n // Increment target by log p(y | mu, sigma, Gamma, rho)\n target += hmm_marginal(log_omega, gamma, rho);\n}\nThe last function hmm_marginal takes in all the ingredients of the HMM and computes the relevant log marginal distribution, \\(\\log p(y \\mid \\phi)\\).\nIf we desire draws from the posterior distribution of \\(z\\), we use the generated quantities block and draw, for each sample \\(\\phi\\), a sample from \\(p(z \\mid y, \\phi)\\). In effect, MCMC produces draws from \\(p(\\phi \\mid y)\\) and with the draws in generated quantities, we obtain draws from \\(p(\\phi \\mid y) p(z \\mid y, \\phi) = p(z, \\phi \\mid y)\\). It is also possible to compute the posterior probbability of each hidden state, that is \\(\\text{Pr}(z_n = k \\mid \\phi, y)\\). Averagging these probabilities over all MCMC draws, we obtain \\(\\text{Pr}(z_n = k \\mid y)\\).\ngenerated quantities {\n array[N] int latent_states = hmm_latent_rng(log_omega, gamma, rho);\n matrix[3, N] hidden_probs = hmm_hidden_state_prob(log_omega, gamma, rho);\n}\nhmm_hidden_state_prob returns the marginal probabilities of each state, \\(\\text{Pr}(z_n = k \\mid \\phi, y)\\). This function cannot be used to compute the joint probability \\(\\text{Pr}(z \\mid \\phi, y)\\), because such calculation requires accounting for the posterior correlation between the different components of \\(z\\). Therefore, hidden_probs should not be used to obtain posterior draws. Instead, users should rely on hmm_latent_rng.\ngenerated quantities {\n array[N] int<lower=1, upper=K> z = hmm_latent_rng(...fill-in params here to match example...);\n}\nThe example in this section is derived from the more detailed case study by Ben Bales: https://mc-stan.org/users/documentation/case-studies/hmm-example.html.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#autoregressive.section", + "href": "stan-users-guide/time-series.html#autoregressive.section", + "title": "Time-Series Models", + "section": "", + "text": "A first-order autoregressive model (AR(1)) with normal noise takes each point \\(y_n\\) in a sequence \\(y\\) to be generated according to \\[\ny_n \\sim \\textsf{normal}(\\alpha + \\beta y_{n-1}, \\sigma).\n\\]\nThat is, the expected value of \\(y_n\\) is \\(\\alpha + \\beta y_{n-1}\\), with noise scaled as \\(\\sigma\\).\n\n\nWith improper flat priors on the regression coefficients \\(\\alpha\\) and \\(\\beta\\) and on the positively-constrained noise scale (\\(\\sigma\\)), the Stan program for the AR(1) model is as follows.1\ndata {\n int<lower=0> N;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n for (n in 2:N) {\n y[n] ~ normal(alpha + beta * y[n-1], sigma);\n }\n}\nThe first observed data point, y[1], is not modeled here because there is nothing to condition on; instead, it acts to condition y[2]. This model also uses an improper prior for sigma, but there is no obstacle to adding an informative prior if information is available on the scale of the changes in y over time, or a weakly informative prior to help guide inference if rough knowledge of the scale of y is available.\n\n\nAlthough perhaps a bit more difficult to read, a much more efficient way to write the above model is by slicing the vectors, with the model above being replaced with the one-liner\nmodel {\n y[2:N] ~ normal(alpha + beta * y[1:(N - 1)], sigma);\n}\nThe left-hand side slicing operation pulls out the last \\(N-1\\) elements and the right-hand side version pulls out the first \\(N-1\\).\n\n\n\n\nProper priors of a range of different families may be added for the regression coefficients and noise scale. The normal noise model can be changed to a Student-\\(t\\) distribution or any other distribution with unbounded support. The model could also be made hierarchical if multiple series of observations are available.\nTo enforce the estimation of a stationary AR(1) process, the slope coefficient beta may be constrained with bounds as follows.\nreal<lower=-1, upper=1> beta;\nIn practice, such a constraint is not recommended. If the data are not well fit by a stationary model it is best to know this. Stationary parameter estimates can be encouraged with a prior favoring values of beta near zero.\n\n\n\nExtending the order of the model is also straightforward. For example, an AR(2) model could be coded with the second-order coefficient gamma and the following model statement.\nfor (n in 3:N) {\n y[n] ~ normal(alpha + beta*y[n-1] + gamma*y[n-2], sigma);\n}\n\n\n\nA general model where the order is itself given as data can be coded by putting the coefficients in an array and computing the linear predictor in a loop.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] real y;\n}\nparameters {\n real alpha;\n array[K] real beta;\n real sigma;\n}\nmodel {\n for (n in (K+1):N) {\n real mu = alpha;\n for (k in 1:K) {\n mu += beta[k] * y[n-k];\n }\n y[n] ~ normal(mu, sigma);\n }\n}\n\n\n\nEconometric and financial time-series models usually assume heteroscedasticity: they allow the scale of the noise terms defining the series to vary over time. The simplest such model is the autoregressive conditional heteroscedasticity (ARCH) model (Engle 1982). Unlike the autoregressive model AR(1), which modeled the mean of the series as varying over time but left the noise term fixed, the ARCH(1) model takes the scale of the noise terms to vary over time but leaves the mean term fixed. Models could be defined where both the mean and scale vary over time; the econometrics literature presents a wide range of time-series modeling choices.\nThe ARCH(1) model is typically presented as the following sequence of equations, where \\(r_t\\) is the observed return at time point \\(t\\) and \\(\\mu\\), \\(\\alpha_0\\), and \\(\\alpha_1\\) are unknown regression coefficient parameters.\n\\[\\begin{align*}\nr_t &= \\mu + a_t \\\\\na_t &= \\sigma_t \\epsilon_t \\\\\n\\epsilon_t &\\sim \\textsf{normal}(0,1) \\\\\n\\sigma^2_t &= \\alpha_0 + \\alpha_1 a_{t-1}^2\n\\end{align*}\\]\nIn order to ensure the noise terms \\(\\sigma^2_t\\) are positive, the scale coefficients are constrained to be positive, \\(\\alpha_0, \\alpha_1\n> 0\\). To ensure stationarity of the time series, the slope is constrained to be less than one, i.e., \\(\\alpha_1 < 1\\).2\nThe ARCH(1) model may be coded directly in Stan as follows.\ndata {\n int<lower=0> T; // number of time points\n array[T] real r; // return at time t\n}\nparameters {\n real mu; // average return\n real<lower=0> alpha0; // noise intercept\n real<lower=0, upper=1> alpha1; // noise slope\n}\nmodel {\n for (t in 2:T) {\n r[t] ~ normal(mu, sqrt(alpha0 + alpha1\n * pow(r[t - 1] - mu,2)));\n }\n}\nThe loop in the model is defined so that the return at time \\(t=1\\) is not modeled; the model in the next section shows how to model the return at \\(t=1\\). The model can be vectorized to be more efficient; the model in the next section provides an example.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#modeling-temporal-heteroscedasticity", + "href": "stan-users-guide/time-series.html#modeling-temporal-heteroscedasticity", + "title": "Time-Series Models", + "section": "", + "text": "A set of variables is homoscedastic if their variances are all the same; the variables are heteroscedastic if they do not all have the same variance. Heteroscedastic time-series models allow the noise term to vary over time.\n\n\nThe basic generalized autoregressive conditional heteroscedasticity (GARCH) model, GARCH(1,1), extends the ARCH(1) model by including the squared previous difference in return from the mean at time \\(t-1\\) as a predictor of volatility at time \\(t\\), defining \\[\n\\sigma^2_t = \\alpha_0 + \\alpha_1 a^2_{t-1} + \\beta_1 \\sigma^2_{t-1}.\n\\]\nTo ensure the scale term is positive and the resulting time series stationary, the coefficients must all satisfy \\(\\alpha_0, \\alpha_1,\n\\beta_1 > 0\\) and the slopes \\(\\alpha_1 + \\beta_1 < 1\\).\ndata {\n int<lower=0> T;\n array[T] real r;\n real<lower=0> sigma1;\n}\nparameters {\n real mu;\n real<lower=0> alpha0;\n real<lower=0, upper=1> alpha1;\n real<lower=0, upper=(1-alpha1)> beta1;\n}\ntransformed parameters {\n array[T] real<lower=0> sigma;\n sigma[1] = sigma1;\n for (t in 2:T) {\n sigma[t] = sqrt(alpha0\n + alpha1 * pow(r[t - 1] - mu, 2)\n + beta1 * pow(sigma[t - 1], 2));\n }\n}\nmodel {\n r ~ normal(mu, sigma);\n}\nTo get the recursive definition of the volatility regression off the ground, the data declaration includes a non-negative value sigma1 for the scale of the noise at \\(t = 1\\).\nThe constraints are coded directly on the parameter declarations. This declaration is order-specific in that the constraint on beta1 depends on the value of alpha1.\nA transformed parameter array of non-negative values sigma is used to store the scale values at each time point. The definition of these values in the transformed parameters block is where the regression is now defined. There is an intercept alpha0, a slope alpha1 for the squared difference in return from the mean at the previous time, and a slope beta1 for the previous noise scale squared. Finally, the whole regression is inside the sqrt function because Stan requires scale (deviation) parameters (not variance parameters) for the normal distribution.\nWith the regression in the transformed parameters block, the model reduces a single vectorized distribution statement. Because r and sigma are of length T, all of the data are modeled directly.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#moving-average-models", + "href": "stan-users-guide/time-series.html#moving-average-models", + "title": "Time-Series Models", + "section": "", + "text": "A moving average model uses previous errors as predictors for future outcomes. For a moving average model of order \\(Q\\), \\(\\mbox{MA}(Q)\\), there is an overall mean parameter \\(\\mu\\) and regression coefficients \\(\\theta_q\\) for previous error terms. With \\(\\epsilon_t\\) being the noise at time \\(t\\), the model for outcome \\(y_t\\) is defined by \\[\ny_t = \\mu + \\theta_1 \\epsilon_{t-1} + \\dotsb + \\theta_Q \\epsilon_{t-Q}\n+ \\epsilon_t,\n\\] with the noise term \\(\\epsilon_t\\) for outcome \\(y_t\\) modeled as normal, \\[\n\\epsilon_t \\sim \\textsf{normal}(0,\\sigma).\n\\] In a proper Bayesian model, the parameters \\(\\mu\\), \\(\\theta\\), and \\(\\sigma\\) must all be given priors.\n\n\nAn \\(\\mbox{MA}(2)\\) model can be coded in Stan as follows.\ndata {\n int<lower=3> T; // number of observations\n vector[T] y; // observation at time T\n}\nparameters {\n real mu; // mean\n real<lower=0> sigma; // error scale\n vector[2] theta; // lag coefficients\n}\ntransformed parameters {\n vector[T] epsilon; // error terms\n epsilon[1] = y[1] - mu;\n epsilon[2] = y[2] - mu - theta[1] * epsilon[1];\n for (t in 3:T) {\n epsilon[t] = ( y[t] - mu\n - theta[1] * epsilon[t - 1]\n - theta[2] * epsilon[t - 2] );\n }\n}\nmodel {\n mu ~ cauchy(0, 2.5);\n theta ~ cauchy(0, 2.5);\n sigma ~ cauchy(0, 2.5);\n for (t in 3:T) {\n y[t] ~ normal(mu\n + theta[1] * epsilon[t - 1]\n + theta[2] * epsilon[t - 2],\n sigma);\n }\n}\nThe error terms \\(\\epsilon_t\\) are defined as transformed parameters in terms of the observations and parameters. The definition of the distribution statement (which also defines the likelihood) follows the definition, which can only be applied to \\(y_n\\) for \\(n > Q\\). In this example, the parameters are all given Cauchy (half-Cauchy for \\(\\sigma\\)) priors, although other priors can be used just as easily.\nThis model could be improved in terms of speed by vectorizing the distribution statement in the model block. Vectorizing the calculation of the \\(\\epsilon_t\\) could also be sped up by using a dot product instead of a loop.\n\n\n\nA general \\(\\mbox{MA}(Q)\\) model with a vectorized distribution statement may be defined as follows.\ndata {\n int<lower=0> Q; // num previous noise terms\n int<lower=3> T; // num observations\n vector[T] y; // observation at time t\n}\nparameters {\n real mu; // mean\n real<lower=0> sigma; // error scale\n vector[Q] theta; // error coeff, lag -t\n}\ntransformed parameters {\n vector[T] epsilon; // error term at time t\n for (t in 1:T) {\n epsilon[t] = y[t] - mu;\n for (q in 1:min(t - 1, Q)) {\n epsilon[t] = epsilon[t] - theta[q] * epsilon[t - q];\n }\n }\n}\nmodel {\n vector[T] eta;\n mu ~ cauchy(0, 2.5);\n theta ~ cauchy(0, 2.5);\n sigma ~ cauchy(0, 2.5);\n for (t in 1:T) {\n eta[t] = mu;\n for (q in 1:min(t - 1, Q)) {\n eta[t] = eta[t] + theta[q] * epsilon[t - q];\n }\n }\n y ~ normal(eta, sigma);\n}\nHere all of the data are modeled, with missing terms just dropped from the regressions as in the calculation of the error terms. Both models converge quickly and mix well at convergence, with the vectorized model being faster (per iteration, not to converge—they compute the same model).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#autoregressive-moving-average-models", + "href": "stan-users-guide/time-series.html#autoregressive-moving-average-models", + "title": "Time-Series Models", + "section": "", + "text": "Autoregressive moving-average models (ARMA), combine the predictors of the autoregressive model and the moving average model. An ARMA(1,1) model, with a single state of history, can be encoded in Stan as follows.\ndata {\n int<lower=1> T; // num observations\n array[T] real y; // observed outputs\n}\nparameters {\n real mu; // mean coeff\n real phi; // autoregression coeff\n real theta; // moving avg coeff\n real<lower=0> sigma; // noise scale\n}\nmodel {\n vector[T] nu; // prediction for time t\n vector[T] err; // error for time t\n nu[1] = mu + phi * mu; // assume err[0] == 0\n err[1] = y[1] - nu[1];\n for (t in 2:T) {\n nu[t] = mu + phi * y[t - 1] + theta * err[t - 1];\n err[t] = y[t] - nu[t];\n }\n mu ~ normal(0, 10); // priors\n phi ~ normal(0, 2);\n theta ~ normal(0, 2);\n sigma ~ cauchy(0, 5);\n err ~ normal(0, sigma); // error model\n}\nThe data are declared in the same way as the other time-series regressions and the parameters are documented in the code.\nIn the model block, the local vector nu stores the predictions and err the errors. These are computed similarly to the errors in the moving average models described in the previous section.\nThe priors are weakly informative for stationary processes. The data model only involves the error term, which is efficiently vectorized here.\nOften in models such as these, it is desirable to inspect the calculated error terms. This could easily be accomplished in Stan by declaring err as a transformed parameter, then defining it the same way as in the model above. The vector nu could still be a local variable, only now it will be in the transformed parameter block.\nWayne Folta suggested encoding the model without local vector variables as follows.\nmodel {\n real err;\n mu ~ normal(0, 10);\n phi ~ normal(0, 2);\n theta ~ normal(0, 2);\n sigma ~ cauchy(0, 5);\n err = y[1] - (mu + phi * mu);\n err ~ normal(0, sigma);\n for (t in 2:T) {\n err = y[t] - (mu + phi * y[t - 1] + theta * err);\n err ~ normal(0, sigma);\n }\n}\nThis approach to ARMA models illustrates how local variables, such as err in this case, can be reused in Stan. Folta’s approach could be extended to higher order moving-average models by storing more than one error term as a local variable and reassigning them in the loop.\nBoth encodings are fast. The original encoding has the advantage of vectorizing the normal distribution, but it uses a bit more memory. A halfway point would be to vectorize just err.\n\n\nMA and ARMA models are not identifiable if the roots of the characteristic polynomial for the MA part lie inside the unit circle, so it’s necessary to add the following constraint3\nreal<lower=-1, upper=1> theta;\nWhen the model is run without the constraint, using synthetic data generated from the model, the simulation can sometimes find modes for (theta, phi) outside the \\([-1,1]\\) interval, which creates a multiple mode problem in the posterior and also causes the NUTS tree depth to get large (often above 10). Adding the constraint both improves the accuracy of the posterior and dramatically reduces the tree depth, which speeds up the simulation considerably (typically by much more than an order of magnitude).\nFurther, unless one thinks that the process is really non-stationary, it’s worth adding the following constraint to ensure stationarity.\nreal<lower=-1, upper=1> phi;", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#stochastic-volatility-models", + "href": "stan-users-guide/time-series.html#stochastic-volatility-models", + "title": "Time-Series Models", + "section": "", + "text": "Stochastic volatility models treat the volatility (i.e., variance) of a return on an asset, such as an option to buy a security, as following a latent stochastic process in discrete time (Kim, Shephard, and Chib 1998). The data consist of mean corrected (i.e., centered) returns \\(y_t\\) on an underlying asset at \\(T\\) equally spaced time points. Kim et al. formulate a typical stochastic volatility model using the following regression-like equations, with a latent parameter \\(h_t\\) for the log volatility, along with parameters \\(\\mu\\) for the mean log volatility, and \\(\\phi\\) for the persistence of the volatility term. The variable \\(\\epsilon_t\\) represents the white-noise shock (i.e., multiplicative error) on the asset return at time \\(t\\), whereas \\(\\delta_t\\) represents the shock on volatility at time \\(t\\). \\[\\begin{align*}\ny_t &= \\epsilon_t \\exp(h_t / 2) \\\\\nh_{t+1} &= \\mu + \\phi (h_t - \\mu) + \\delta_t \\sigma \\\\\nh_1 &\\sim \\textsf{normal}\\left( \\mu, \\frac{\\sigma}{\\sqrt{1 - \\phi^2}} \\right) \\\\\n\\epsilon_t &\\sim \\textsf{normal}(0,1) \\\\\n\\delta_t &\\sim \\textsf{normal}(0,1)\n\\end{align*}\\]\nRearranging the first line, \\(\\epsilon_t = y_t \\exp(-h_t / 2)\\), allowing the distribution for \\(y_t\\) to be written as \\[\ny_t \\sim \\textsf{normal}(0,\\exp(h_t/2)).\n\\] The recurrence equation for \\(h_{t+1}\\) may be combined with the scaling of \\(\\delta_t\\) to yield the distribution \\[\nh_t \\sim \\mathsf{normal}(\\mu + \\phi(h_{t-1} - \\mu), \\sigma).\n\\] This formulation can be directly encoded, as shown in the following Stan model.\ndata {\n int<lower=0> T; // # time points (equally spaced)\n vector[T] y; // mean corrected return at time t\n}\nparameters {\n real mu; // mean log volatility\n real<lower=-1, upper=1> phi; // persistence of volatility\n real<lower=0> sigma; // white noise shock scale\n vector[T] h; // log volatility at time t\n}\nmodel {\n phi ~ uniform(-1, 1);\n sigma ~ cauchy(0, 5);\n mu ~ cauchy(0, 10);\n h[1] ~ normal(mu, sigma / sqrt(1 - phi * phi));\n for (t in 2:T) {\n h[t] ~ normal(mu + phi * (h[t - 1] - mu), sigma);\n }\n for (t in 1:T) {\n y[t] ~ normal(0, exp(h[t] / 2));\n }\n}\nCompared to the Kim et al. formulation, the Stan model adds priors for the parameters \\(\\phi\\), \\(\\sigma\\), and \\(\\mu\\). The shock terms \\(\\epsilon_t\\) and \\(\\delta_t\\) do not appear explicitly in the model, although they could be calculated efficiently in a generated quantities block.\nThe posterior of a stochastic volatility model such as this one typically has high posterior variance. For example, simulating 500 data points from the above model with \\(\\mu = -1.02\\), \\(\\phi = 0.95\\), and \\(\\sigma = 0.25\\) leads to 95% posterior intervals for \\(\\mu\\) of \\((-1.23, -0.54)\\), for \\(\\phi\\) of \\((0.82, 0.98)\\), and for \\(\\sigma\\) of \\((0.16, 0.38)\\).\nThe NUTS draws show a high degree of autocorrelation, both for this model and the stochastic volatility model evaluated in (Hoffman and Gelman 2014). Using a non-diagonal mass matrix provides faster convergence and higher effective sample size than a diagonal mass matrix, but will not scale to large values of \\(T\\).\nIt is relatively straightforward to speed up the effective sample size per second generated by this model by one or more orders of magnitude. First, the distribution statements for return \\(y\\) is easily vectorized to\ny ~ normal(0, exp(h / 2));\nThis speeds up the iterations, but does not change the effective sample size because the underlying parameterization and log probability function have not changed. Mixing is improved by reparameterizing in terms of a standardized volatility, then rescaling. This requires a standardized parameter h_std to be declared instead of h.\nparameters {\n // ...\n vector[T] h_std; // std log volatility time t\n}\nThe original value of h is then defined in a transformed parameter block.\ntransformed parameters {\n vector[T] h = h_std * sigma; // now h ~ normal(0, sigma)\n h[1] /= sqrt(1 - phi * phi); // rescale h[1]\n h += mu;\n for (t in 2:T) {\n h[t] += phi * (h[t - 1] - mu);\n }\n}\nThe first assignment rescales h_std to have a \\(\\textsf{normal}(0,\\sigma)\\) distribution and temporarily assigns it to h. The second assignment rescales h[1] so that its prior differs from that of h[2] through h[T]. The next assignment supplies a mu offset, so that h[2] through h[T] are now distributed \\(\\textsf{normal}(\\mu,\\sigma)\\); note that this shift must be done after the rescaling of h[1]. The final loop adds in the moving average so that h[2] through h[T] are appropriately modeled relative to phi and mu.\nAs a final improvement, the distribution statements for h[1] to h[T] are replaced with a single vectorized standard normal distribution statement.\nmodel {\n // ...\n h_std ~ std_normal();\n}\nAlthough the original model can take hundreds and sometimes thousands of iterations to converge, the reparameterized model reliably converges in tens of iterations. Mixing is also dramatically improved, which results in higher effective sample sizes per iteration. Finally, each iteration runs in roughly a quarter of the time of the original iterations.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#hmms.section", + "href": "stan-users-guide/time-series.html#hmms.section", + "title": "Time-Series Models", + "section": "", + "text": "A Hidden Markov model is a probabilistic model over \\(N\\) observations \\(y_{1:N}\\) and \\(N\\) hidden states \\(z_{1:N}\\). This models is defined by the conditional distributions \\(p(y_n \\mid z_n, \\phi)\\) and \\(p(z_n \\mid z_{n-1}, \\phi)\\). Here we make the dependency on additional model parameters \\(\\phi\\) explicit. (\\(\\phi\\) may be a vector of parameters.) The complete data likelihood is then \\[\np(y, z \\mid \\phi) = \\prod_n p(y_n \\mid z_n, \\phi) p(z_n \\mid z_{n - 1}, \\phi)\n\\] When \\(z_{1:N}\\) is continuous, the user can explicitly encode these distributions in Stan and use Markov chain Monte Carlo to integrate \\(z\\) out.\nWhen each state \\(z\\) takes a value over a discrete and finite set, say \\(\\{1, 2, ..., K\\}\\), we can use Stan’s suite of HMM functions to marginalize out \\(z_{1:N}\\) and compute \\[\np(y_{1:N} \\mid \\phi) = \\int_{\\mathcal Z} p(y, z \\mid \\phi) \\text d z.\n\\] We start by defining the conditional observation distribution, stored in a \\(K \\times N\\) matrix \\(\\omega\\) with \\[\n\\omega_{kn} = p(y_n \\mid z_n = k, \\phi).\n\\] Next, we introduce the \\(K \\times K\\) transition matrix, \\(\\Gamma\\), with \\[\n\\Gamma_{ij} = p(z_n = j \\mid z_{n - 1} = i, \\phi).\n\\] (This is a right-stochastic matrix.) Finally, we define the initial state \\(K\\)-vector \\(\\rho\\), with \\[\n\\rho_k = p(z_0 = k \\mid \\phi).\n\\] It is common practice to set \\(\\rho\\) to be the stationary distribution of the HMM, that is \\(\\rho\\) is the first eigenvector of \\(\\Gamma\\) and solves \\(\\Gamma \\rho = \\rho\\).\nAs an example, consider a three-state model with \\(K=3\\). The observations are normally distributed conditional on the HMM states with \\[\n y_n \\sim \\text{normal}(\\mu_k, \\sigma),\n\\] where \\(\\mu = (1, 5, 9)\\) and the standard deviation \\(\\sigma\\) is the same across all observations. The model is then\ndata {\n int N; // Number of observations\n array[N] real y;\n}\n\nparameters {\n // Rows of the transition matrix\n array[3] simplex[3] gamma_arr;\n\n // Initial state\n simplex[3] rho;\n\n // Parameters of measurement model\n vector[3] mu;\n real<lower = 0.0> sigma;\n}\n\ntransformed parameters {\n // Build transition matrix\n matrix[3, 3] gamma;\n for (k in 1:3) gamma[k, ] = to_row_vector(gamma_arr[k]);\n\n // Compute the log likelihoods in each possible state\n matrix[3, N] log_omega;\n for (n in 1:N) {\n for (i in 1:3) {\n log_omega[i, n] = normal_lpdf(y[n] | mu[i], sigma);\n }\n }\n}\n\nmodel {\n // prior\n mu ~ normal(0, 1);\n sigma ~ normal(0, 1);\n \n // no explicit prior on gamma_arr, meaning we default to a\n // uniform prior over the simplexes.\n\n // Increment target by log p(y | mu, sigma, Gamma, rho)\n target += hmm_marginal(log_omega, gamma, rho);\n}\nThe last function hmm_marginal takes in all the ingredients of the HMM and computes the relevant log marginal distribution, \\(\\log p(y \\mid \\phi)\\).\nIf we desire draws from the posterior distribution of \\(z\\), we use the generated quantities block and draw, for each sample \\(\\phi\\), a sample from \\(p(z \\mid y, \\phi)\\). In effect, MCMC produces draws from \\(p(\\phi \\mid y)\\) and with the draws in generated quantities, we obtain draws from \\(p(\\phi \\mid y) p(z \\mid y, \\phi) = p(z, \\phi \\mid y)\\). It is also possible to compute the posterior probbability of each hidden state, that is \\(\\text{Pr}(z_n = k \\mid \\phi, y)\\). Averagging these probabilities over all MCMC draws, we obtain \\(\\text{Pr}(z_n = k \\mid y)\\).\ngenerated quantities {\n array[N] int latent_states = hmm_latent_rng(log_omega, gamma, rho);\n matrix[3, N] hidden_probs = hmm_hidden_state_prob(log_omega, gamma, rho);\n}\nhmm_hidden_state_prob returns the marginal probabilities of each state, \\(\\text{Pr}(z_n = k \\mid \\phi, y)\\). This function cannot be used to compute the joint probability \\(\\text{Pr}(z \\mid \\phi, y)\\), because such calculation requires accounting for the posterior correlation between the different components of \\(z\\). Therefore, hidden_probs should not be used to obtain posterior draws. Instead, users should rely on hmm_latent_rng.\ngenerated quantities {\n array[N] int<lower=1, upper=K> z = hmm_latent_rng(...fill-in params here to match example...);\n}\nThe example in this section is derived from the more detailed case study by Ben Bales: https://mc-stan.org/users/documentation/case-studies/hmm-example.html.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/time-series.html#footnotes", + "href": "stan-users-guide/time-series.html#footnotes", + "title": "Time-Series Models", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe intercept in this model is \\(\\alpha / (1 - \\beta)\\). An alternative parameterization in terms of an intercept \\(\\gamma\\) suggested Mark Scheuerell on GitHub is \\(y_n \\sim \\textsf{normal}\\left(\\gamma + \\beta \\cdot (y_{n-1} - \\gamma), \\sigma\\right)\\).↩︎\nIn practice, it can be useful to remove the constraint to test whether a non-stationary set of coefficients provides a better fit to the data. It can also be useful to add a trend term to the model, because an unfitted trend will manifest as non-stationarity.↩︎\nThis subsection is a lightly edited comment of Jonathan Gilligan’s on GitHub; see https://github.com/stan-dev/stan/issues/1617#issuecomment-160249142.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Time-Series Models" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html", + "href": "stan-users-guide/style-guide.html", + "title": "Stan Program Style Guide", + "section": "", + "text": "This chapter describes the preferred style for laying out Stan models. These are not rules of the language, but simply recommendations for laying out programs in a text editor. Although these recommendations may seem arbitrary, they are similar to those of many teams for many programming languages. Like rules for typesetting text, the goal is to achieve readability without wasting white space either vertically or horizontally. This is the style used in the Stan documentation, and should align with the auto-formatting ability of stanc3.\n\n\nThe most important point of style is consistency. Consistent coding style makes it easier to read not only a single program, but multiple programs. So when departing from this style guide, the number one recommendation is to do so consistently.\n\n\n\nLine lengths should not exceed 80 characters.1\nThis is a typical recommendation for many programming language style guides because it makes it easier to lay out text edit windows side by side and to view the code on the web without wrapping, easier to view diffs from version control, etc. About the only thing that is sacrificed is laying out expressions on a single line.\n\n\n\nThe recommended file extension for Stan model files is .stan. Files which contain only function definitions (intended for use with #include) should be given the .stanfunctions extension. A .stanfunctions file only includes the function definition and does not require the functions{} block wrapped around the function. A simple example of usage where the function is defined and saved in the file foo.stanfunctions:\nreal foo(real x, real y) {\n return sqrt(x * log(y));\n}\nThe function foo can be accessed in the Stan program by including the path to the foo.stanfunctions file as:\nfunctions {\n #include foo.stanfunctions;\n}\n// ...body...\nFor Stan data dump files, the recommended extension is .R, or more informatively, .data.R. For JSON output, the recommended extension is .json.\n\n\n\nThe recommended variable naming is to follow C/C++ naming conventions, in which variables are lowercase, with the underscore character (_) used as a separator. Thus it is preferred to use sigma_y, rather than the run together sigmay, camel-case sigmaY, or capitalized camel-case SigmaY. An exception is often made for terms appearing in mathematical expressions with standard names, like A for a matrix.\nAnother exception to the lowercasing recommendation, which follows the C/C++ conventions, is for size constants, for which the recommended form is a single uppercase letter. The reason for this is that it allows the loop variables to match. So loops over the indices of an \\(M \\times N\\) matrix \\(a\\) would look as follows.\nfor (m in 1:M) {\n for (n in 1:N) {\n a[m, n] = ...\n }\n}\n\n\n\nDeclaring local variables in the block in which they are used aids in understanding programs because it cuts down on the amount of text scanning or memory required to reunite the declaration and definition.\nThe following Stan program corresponds to a direct translation of a BUGS model, which uses a different element of mu in each iteration.\nmodel {\n array[N] real mu;\n for (n in 1:N) {\n mu[n] = alpha * x[n] + beta;\n y[n] ~ normal(mu[n],sigma);\n }\n}\nBecause variables can be reused in Stan and because they should be declared locally for clarity, this model should be recoded as follows.\nmodel {\n for (n in 1:N) {\n real mu;\n mu = alpha * x[n] + beta;\n y[n] ~ normal(mu,sigma);\n }\n}\nThe local variable can be eliminated altogether, as follows.\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(alpha * x[n] + beta, sigma);\n }\n}\nThere is unlikely to be any measurable efficiency difference between the last two implementations, but both should be a bit more efficient than the BUGS translation.\n\n\nIn the case of local variables for compound structures, such as arrays, vectors, or matrices, if they are built up component by component rather than in large chunks, it can be more efficient to declare a local variable for the structure outside of the block in which it is used. This allows it to be allocated once and then reused.\nmodel {\n vector[K] mu;\n for (n in 1:N) {\n for (k in 1:K) {\n mu[k] = // ...\n }\n y[n] ~ multi_normal(mu,Sigma);\n}\nIn this case, the vector mu will be allocated outside of both loops, and used a total of N times.\n\n\n\n\n\n\nSingle-statement blocks can be rendered in several ways. The preferred style is fully bracketed with the statement appearing on its own line, as follows.\nfor (n in 1:N) {\n y[n] ~ normal(mu,1);\n}\nThe use of loops and conditionals without brackets can be dangerous. For instance, consider this program.\nfor (n in 1:N)\n z[n] ~ normal(nu,1);\n y[n] ~ normal(mu,1);\nBecause Stan ignores whitespace and the parser completes a statement as eagerly as possible (just as in C++), the previous program is equivalent to the following program.\nfor (n in 1:N) {\n z[n] ~ normal(nu,1);\n}\ny[n] ~ normal(mu,1);\nTherefore, one should prefer to use braces. The only exception is when nesting if-else clauses, where the else branch contains exactly one conditional. Then, it is preferred to place the following if on the same line, as in the following.\nif (x) {\n // ...\n} else if (y) {\n // ...\n} else {\n // ...\n}\n\n\n\nThe preferred style for operators minimizes parentheses. This reduces clutter in code that can actually make it harder to read expressions. For example, the expression a + b * c is preferred to the equivalent a + (b * c) or (a + (b * c)). The operator precedences and associativities follow those of pretty much every programming language including Fortran, C++, R, and Python; full details are provided in the reference manual.\nSimilarly, comparison operators can usually be written with minimal bracketing, with the form y[n] > 0 || x[n] != 0 preferred to the bracketed form (y[n] > 0) || (x[n] != 0).\n\n\n\nVertical space is valuable as it controls how much of a program you can see. The preferred Stan style is with the opening brace appearing at the end of a line.\nfor (n in 1:N) {\n y[n] ~ normal(mu,1);\n}\nThis also goes for parameters blocks, transformed data blocks, which should look as follows.\ntransformed parameters {\n real sigma;\n // ...\n}\nThe exception to this rule is local blocks which only exist for scoping reasons. The opening brace of these blocks is not associated with any control flow or block structure, so it should appear on its own line.\n\n\n\n\nWhile Stan supports the full C++-style conditional syntax, allowing real or integer values to act as conditions, real values should be avoided. For a real-valued x, one should use\nif (x != 0) { ...\nin place of\nif (x) { ...\nBeyond stylistic choices, one should be careful using real values in a conditional expression, as direct comparison can have unexpected results due to numerical accuracy.\n\n\n\nFunctions are laid out the same way as in languages such as Java and C++. For example,\nreal foo(real x, real y) {\n return sqrt(x * log(y));\n}\nThe return type is flush left, the parentheses for the arguments are adjacent to the arguments and function name, and there is a space after the comma for arguments after the first. The open curly brace for the body is on the same line as the function name, following the layout of loops and conditionals. The body itself is indented; here we use two spaces. The close curly brace appears on its own line.\nIf function names or argument lists are long, they can be written as\nmatrix\nfunction_to_do_some_hairy_algebra(matrix thingamabob,\n vector doohickey2) {\n // ...body...\n}\nThe function starts a new line, under the type. The arguments are aligned under each other.\nFunction documentation should follow the Javadoc and Doxygen styles. Here’s an example repeated from the documenting functions section.\n/**\n * Return a data matrix of specified size with rows\n * corresponding to items and the first column filled\n * with the value 1 to represent the intercept and the\n * remaining columns randomly filled with unit-normal draws.\n *\n * @param N Number of rows correspond to data items\n * @param K Number of predictors, counting the intercept, per\n * item.\n * @return Simulated predictor matrix.\n */\nmatrix predictors_rng(int N, int K) {\n // ...\n}\nThe open comment is /**, asterisks are aligned below the first asterisk of the open comment, and the end comment */ is also aligned on the asterisk. The tags @param and @return are used to label function arguments (i.e., parameters) and return values.\n\n\n\nStan allows spaces between elements of a program. The white space characters allowed in Stan programs include the space (ASCII 0x20), line feed (ASCII 0x0A), carriage return (0x0D), and tab (0x09). Stan treats all whitespace characters interchangeably, with any sequence of whitespace characters being syntactically equivalent to a single space character. Nevertheless, effective use of whitespace is the key to good program layout.\n\n\nEach statement of a program should appear on its own line. Declaring multiple variables of the same type can be accomplished in a single statement with the syntax\nreal mu, sigma;\n\n\n\nStan programs should not contain tab characters. Using tabs to layout a program is highly unportable because the number of spaces represented by a single tab character varies depending on which program is doing the rendering and how it is configured.\n\n\n\nStan has standardized on two space characters of indentation, which is the standard convention for C/C++ code.\n\n\n\nUse a space after ifs. For instance, use if (x < y) {..., not if(x < y){ ....\n\n\n\nThere should not be space between a function name and the arguments it applies to. For instance, use normal(0, 1), not normal (0,1).\n\n\n\nThere should be spaces around binary operators. For instance, use y[1] = x, not y[1]=x, use (x + y) * z not (x+y)*z.\nUnary operators are written without a space, such as in -x, !y.\n\n\n\nAnother exception to the above rule is when the assignment operator (=) is used inside a type constraint, such as\nreal<lower=0> x;\nSpaces should still be used in arithmetic and following commas, as in\nreal<lower=0, upper=a * x + b> x;\n\n\n\nSometimes expressions are too long to fit on a single line. In that case, the recommended form is to break before an operator,2 aligning the operator to a term above to indicate scoping. For example, use the following form\nvector[J] p_distance = Phi((distance_tolerance - overshot)\n ./ ((x + overshot) * sigma_distance))\n - Phi((-overshot)\n ./ ((x + overshot) * sigma_distance));\nHere, the elementwise division operator (./) is aligned to clearly signal the division is occurring inside the parethenesis, while the subtraction indicates it is between the function applications (Phi).\nFor functions with multiple arguments, break after a comma and line the next argument up underneath as follows.\ny[n] ~ normal(alpha + beta * x + gamma * y,\n pow(tau,-0.5));\n\n\n\nCommas should always be followed by spaces, including in function arguments, sequence literals, between variable declarations, etc.\nFor example,\nnormal(alpha * x[n] + beta, sigma);\nis preferred over\nnormal(alpha * x[n] + beta,sigma);\n\n\n\nWherever possible, Stan programs should use a single line feed character to separate lines. All of the Stan developers (so far, at least) work on Unix-like operating systems and using a standard newline makes the programs easier for us to read and share.\n\n\nNewlines are signaled in Unix-like operating systems such as Linux and Mac OS X with a single line-feed (LF) character (ASCII code point 0x0A). Newlines are signaled in Windows using two characters, a carriage return (CR) character (ASCII code point 0x0D) followed by a line-feed (LF) character.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#choose-a-consistent-style", + "href": "stan-users-guide/style-guide.html#choose-a-consistent-style", + "title": "Stan Program Style Guide", + "section": "", + "text": "The most important point of style is consistency. Consistent coding style makes it easier to read not only a single program, but multiple programs. So when departing from this style guide, the number one recommendation is to do so consistently.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#line-length", + "href": "stan-users-guide/style-guide.html#line-length", + "title": "Stan Program Style Guide", + "section": "", + "text": "Line lengths should not exceed 80 characters.1\nThis is a typical recommendation for many programming language style guides because it makes it easier to lay out text edit windows side by side and to view the code on the web without wrapping, easier to view diffs from version control, etc. About the only thing that is sacrificed is laying out expressions on a single line.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#file-extensions", + "href": "stan-users-guide/style-guide.html#file-extensions", + "title": "Stan Program Style Guide", + "section": "", + "text": "The recommended file extension for Stan model files is .stan. Files which contain only function definitions (intended for use with #include) should be given the .stanfunctions extension. A .stanfunctions file only includes the function definition and does not require the functions{} block wrapped around the function. A simple example of usage where the function is defined and saved in the file foo.stanfunctions:\nreal foo(real x, real y) {\n return sqrt(x * log(y));\n}\nThe function foo can be accessed in the Stan program by including the path to the foo.stanfunctions file as:\nfunctions {\n #include foo.stanfunctions;\n}\n// ...body...\nFor Stan data dump files, the recommended extension is .R, or more informatively, .data.R. For JSON output, the recommended extension is .json.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#variable-naming", + "href": "stan-users-guide/style-guide.html#variable-naming", + "title": "Stan Program Style Guide", + "section": "", + "text": "The recommended variable naming is to follow C/C++ naming conventions, in which variables are lowercase, with the underscore character (_) used as a separator. Thus it is preferred to use sigma_y, rather than the run together sigmay, camel-case sigmaY, or capitalized camel-case SigmaY. An exception is often made for terms appearing in mathematical expressions with standard names, like A for a matrix.\nAnother exception to the lowercasing recommendation, which follows the C/C++ conventions, is for size constants, for which the recommended form is a single uppercase letter. The reason for this is that it allows the loop variables to match. So loops over the indices of an \\(M \\times N\\) matrix \\(a\\) would look as follows.\nfor (m in 1:M) {\n for (n in 1:N) {\n a[m, n] = ...\n }\n}", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#local-variable-scope", + "href": "stan-users-guide/style-guide.html#local-variable-scope", + "title": "Stan Program Style Guide", + "section": "", + "text": "Declaring local variables in the block in which they are used aids in understanding programs because it cuts down on the amount of text scanning or memory required to reunite the declaration and definition.\nThe following Stan program corresponds to a direct translation of a BUGS model, which uses a different element of mu in each iteration.\nmodel {\n array[N] real mu;\n for (n in 1:N) {\n mu[n] = alpha * x[n] + beta;\n y[n] ~ normal(mu[n],sigma);\n }\n}\nBecause variables can be reused in Stan and because they should be declared locally for clarity, this model should be recoded as follows.\nmodel {\n for (n in 1:N) {\n real mu;\n mu = alpha * x[n] + beta;\n y[n] ~ normal(mu,sigma);\n }\n}\nThe local variable can be eliminated altogether, as follows.\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(alpha * x[n] + beta, sigma);\n }\n}\nThere is unlikely to be any measurable efficiency difference between the last two implementations, but both should be a bit more efficient than the BUGS translation.\n\n\nIn the case of local variables for compound structures, such as arrays, vectors, or matrices, if they are built up component by component rather than in large chunks, it can be more efficient to declare a local variable for the structure outside of the block in which it is used. This allows it to be allocated once and then reused.\nmodel {\n vector[K] mu;\n for (n in 1:N) {\n for (k in 1:K) {\n mu[k] = // ...\n }\n y[n] ~ multi_normal(mu,Sigma);\n}\nIn this case, the vector mu will be allocated outside of both loops, and used a total of N times.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#parentheses-and-brackets", + "href": "stan-users-guide/style-guide.html#parentheses-and-brackets", + "title": "Stan Program Style Guide", + "section": "", + "text": "Single-statement blocks can be rendered in several ways. The preferred style is fully bracketed with the statement appearing on its own line, as follows.\nfor (n in 1:N) {\n y[n] ~ normal(mu,1);\n}\nThe use of loops and conditionals without brackets can be dangerous. For instance, consider this program.\nfor (n in 1:N)\n z[n] ~ normal(nu,1);\n y[n] ~ normal(mu,1);\nBecause Stan ignores whitespace and the parser completes a statement as eagerly as possible (just as in C++), the previous program is equivalent to the following program.\nfor (n in 1:N) {\n z[n] ~ normal(nu,1);\n}\ny[n] ~ normal(mu,1);\nTherefore, one should prefer to use braces. The only exception is when nesting if-else clauses, where the else branch contains exactly one conditional. Then, it is preferred to place the following if on the same line, as in the following.\nif (x) {\n // ...\n} else if (y) {\n // ...\n} else {\n // ...\n}\n\n\n\nThe preferred style for operators minimizes parentheses. This reduces clutter in code that can actually make it harder to read expressions. For example, the expression a + b * c is preferred to the equivalent a + (b * c) or (a + (b * c)). The operator precedences and associativities follow those of pretty much every programming language including Fortran, C++, R, and Python; full details are provided in the reference manual.\nSimilarly, comparison operators can usually be written with minimal bracketing, with the form y[n] > 0 || x[n] != 0 preferred to the bracketed form (y[n] > 0) || (x[n] != 0).\n\n\n\nVertical space is valuable as it controls how much of a program you can see. The preferred Stan style is with the opening brace appearing at the end of a line.\nfor (n in 1:N) {\n y[n] ~ normal(mu,1);\n}\nThis also goes for parameters blocks, transformed data blocks, which should look as follows.\ntransformed parameters {\n real sigma;\n // ...\n}\nThe exception to this rule is local blocks which only exist for scoping reasons. The opening brace of these blocks is not associated with any control flow or block structure, so it should appear on its own line.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#conditionals", + "href": "stan-users-guide/style-guide.html#conditionals", + "title": "Stan Program Style Guide", + "section": "", + "text": "While Stan supports the full C++-style conditional syntax, allowing real or integer values to act as conditions, real values should be avoided. For a real-valued x, one should use\nif (x != 0) { ...\nin place of\nif (x) { ...\nBeyond stylistic choices, one should be careful using real values in a conditional expression, as direct comparison can have unexpected results due to numerical accuracy.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#functions", + "href": "stan-users-guide/style-guide.html#functions", + "title": "Stan Program Style Guide", + "section": "", + "text": "Functions are laid out the same way as in languages such as Java and C++. For example,\nreal foo(real x, real y) {\n return sqrt(x * log(y));\n}\nThe return type is flush left, the parentheses for the arguments are adjacent to the arguments and function name, and there is a space after the comma for arguments after the first. The open curly brace for the body is on the same line as the function name, following the layout of loops and conditionals. The body itself is indented; here we use two spaces. The close curly brace appears on its own line.\nIf function names or argument lists are long, they can be written as\nmatrix\nfunction_to_do_some_hairy_algebra(matrix thingamabob,\n vector doohickey2) {\n // ...body...\n}\nThe function starts a new line, under the type. The arguments are aligned under each other.\nFunction documentation should follow the Javadoc and Doxygen styles. Here’s an example repeated from the documenting functions section.\n/**\n * Return a data matrix of specified size with rows\n * corresponding to items and the first column filled\n * with the value 1 to represent the intercept and the\n * remaining columns randomly filled with unit-normal draws.\n *\n * @param N Number of rows correspond to data items\n * @param K Number of predictors, counting the intercept, per\n * item.\n * @return Simulated predictor matrix.\n */\nmatrix predictors_rng(int N, int K) {\n // ...\n}\nThe open comment is /**, asterisks are aligned below the first asterisk of the open comment, and the end comment */ is also aligned on the asterisk. The tags @param and @return are used to label function arguments (i.e., parameters) and return values.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#white-space", + "href": "stan-users-guide/style-guide.html#white-space", + "title": "Stan Program Style Guide", + "section": "", + "text": "Stan allows spaces between elements of a program. The white space characters allowed in Stan programs include the space (ASCII 0x20), line feed (ASCII 0x0A), carriage return (0x0D), and tab (0x09). Stan treats all whitespace characters interchangeably, with any sequence of whitespace characters being syntactically equivalent to a single space character. Nevertheless, effective use of whitespace is the key to good program layout.\n\n\nEach statement of a program should appear on its own line. Declaring multiple variables of the same type can be accomplished in a single statement with the syntax\nreal mu, sigma;\n\n\n\nStan programs should not contain tab characters. Using tabs to layout a program is highly unportable because the number of spaces represented by a single tab character varies depending on which program is doing the rendering and how it is configured.\n\n\n\nStan has standardized on two space characters of indentation, which is the standard convention for C/C++ code.\n\n\n\nUse a space after ifs. For instance, use if (x < y) {..., not if(x < y){ ....\n\n\n\nThere should not be space between a function name and the arguments it applies to. For instance, use normal(0, 1), not normal (0,1).\n\n\n\nThere should be spaces around binary operators. For instance, use y[1] = x, not y[1]=x, use (x + y) * z not (x+y)*z.\nUnary operators are written without a space, such as in -x, !y.\n\n\n\nAnother exception to the above rule is when the assignment operator (=) is used inside a type constraint, such as\nreal<lower=0> x;\nSpaces should still be used in arithmetic and following commas, as in\nreal<lower=0, upper=a * x + b> x;\n\n\n\nSometimes expressions are too long to fit on a single line. In that case, the recommended form is to break before an operator,2 aligning the operator to a term above to indicate scoping. For example, use the following form\nvector[J] p_distance = Phi((distance_tolerance - overshot)\n ./ ((x + overshot) * sigma_distance))\n - Phi((-overshot)\n ./ ((x + overshot) * sigma_distance));\nHere, the elementwise division operator (./) is aligned to clearly signal the division is occurring inside the parethenesis, while the subtraction indicates it is between the function applications (Phi).\nFor functions with multiple arguments, break after a comma and line the next argument up underneath as follows.\ny[n] ~ normal(alpha + beta * x + gamma * y,\n pow(tau,-0.5));\n\n\n\nCommas should always be followed by spaces, including in function arguments, sequence literals, between variable declarations, etc.\nFor example,\nnormal(alpha * x[n] + beta, sigma);\nis preferred over\nnormal(alpha * x[n] + beta,sigma);\n\n\n\nWherever possible, Stan programs should use a single line feed character to separate lines. All of the Stan developers (so far, at least) work on Unix-like operating systems and using a standard newline makes the programs easier for us to read and share.\n\n\nNewlines are signaled in Unix-like operating systems such as Linux and Mac OS X with a single line-feed (LF) character (ASCII code point 0x0A). Newlines are signaled in Windows using two characters, a carriage return (CR) character (ASCII code point 0x0D) followed by a line-feed (LF) character.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/style-guide.html#footnotes", + "href": "stan-users-guide/style-guide.html#footnotes", + "title": "Stan Program Style Guide", + "section": "Footnotes", + "text": "Footnotes\n\n\nEven 80 characters may be too many for rendering in print; for instance, in this manual, the number of code characters that fit on a line is about 65.↩︎\nThis is the usual convention in both typesetting and other programming languages. Neither R nor BUGS allows breaks before an operator because they allow newlines to signal the end of an expression or statement.↩︎", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Stan Program Style Guide" + ] + }, + { + "objectID": "stan-users-guide/simulation-based-calibration.html", + "href": "stan-users-guide/simulation-based-calibration.html", + "title": "Simulation-Based Calibration Checking", + "section": "", + "text": "A Bayesian posterior is calibrated if the posterior intervals have appropriate coverage. For example, 80% intervals are expected to contain the true parameter 80% of the time. If data is generated according to a model, Bayesian posterior inference with respect to that model is calibrated by construction. Simulation-based calibration checking (SBC) exploits this property of Bayesian inference to assess the soundness of a posterior sampler. Roughly, the way it works is by simulating parameters according to the prior, then simulating data conditioned on the simulated parameters, then testing posterior calibration of the inference algorithm over independently simulated data sets. This chapter follows Talts et al. (2018), which improves on the original approach developed by Cook, Gelman, and Rubin (2006). See also Modrák et al. (2023) for further improvements.\n\n\nSuppose a Bayesian model is given in the form of a prior density \\(p(\\theta)\\) and sampling density \\(p(y \\mid \\theta).\\) Now consider a process that first simulates parameters from the prior, \\[\n\\theta^{\\textrm{sim}} \\sim p(\\theta),\n\\] and then simulates data given the parameters, \\[\ny^{\\textrm{sim}} \\sim p(y \\mid \\theta^{\\textrm{sim}}).\n\\] By the definition of conditional densities, the simulated data and parameters constitute an independent draw from the model’s joint distribution, \\[\n(y^{\\textrm{sim}}, \\theta^{\\textrm{sim}}) \\sim p(y, \\theta).\n\\] From Bayes’s rule, it follows that for any observed (fixed) data \\(y\\), \\[\np(\\theta \\mid y) \\propto p(y, \\theta).\n\\] Therefore, the simulated parameters constitute a draw from the posterior for the simulated data, \\[\n\\theta^{\\textrm{sim}} \\sim p(\\theta \\mid y^{\\textrm{sim}}).\n\\] Now consider an algorithm that produces a sequence of draws from the posterior given this simulated data, \\[\n\\theta^{(1)}, \\ldots, \\theta^{(M)}\n\\sim p(\\theta \\mid y^{\\textrm{sim}}).\n\\] Because \\(\\theta^{\\textrm{sim}}\\) is also distributed as a draw from the posterior, the rank statistics of \\(\\theta^{\\textrm{sim}}\\) with respect to \\(\\theta^{(1)}, \\ldots \\theta^{(M)}\\) should be uniform.\nThis is one way to define calibration, because it follows that posterior intervals will have appropriate coverage (Dawid 1982; Gneiting, Balabdaoui, and Raftery 2007). If the rank of \\(\\theta^{\\textrm{sim}}\\) is uniform among the draws \\(\\theta^{(1)}, \\ldots, \\theta^{(M)},\\) then for any 90% interval selected, the probability the true value \\(\\theta^{\\textrm{sim}}\\) falls in it will also be 90%. The same goes for any other posterior interval.\n\n\n\nSuppose the Bayesian model to test has joint density \\[\np(y, \\theta) = p(y \\mid \\theta) \\cdot p(\\theta),\n\\] with data \\(y\\) and parameters \\(\\theta\\) (both are typically multivariate). Simulation-based calibration checking works by generating \\(N\\) simulated parameter and data pairs according to the joint density, \\[\n(y^{\\textrm{sim}(1)}, \\theta^{\\textrm{sim}(1)}),\n\\ldots, (y^{\\textrm{sim}(N)}, \\theta^{\\textrm{sim}(N)}),\n\\sim p(y, \\theta).\n\\] For each simulated data set \\(y^{\\textrm{sim}(n)}\\), use the algorithm to be tested to generate \\(M\\) posterior draws, which if everything is working properly, will be distributed marginally as \\[\n\\theta^{(n, 1)}, \\ldots, \\theta^{(n, M)}\n\\sim p(\\theta \\mid y^{\\textrm{sim}(n)}).\n\\] For a simulation \\(n\\) and parameter \\(k\\), the rank of the simulated parameter among the posterior draws is \\[\\begin{eqnarray*}\nr_{n, k}\n& = &\n\\textrm{rank}(\\theta_k^{\\textrm{sim}(n)},\n (\\theta^{(n, 1)}, \\ldots, \\theta^{(n,M)}))\n\\\\[4pt]\n& = &\n\\sum_{m = 1}^M\n \\textrm{I}[\\theta_k^{(n,m)} < \\theta_k^{\\textrm{sim}(n)}].\n\\end{eqnarray*}\\] That is, the rank is the number of posterior draws \\(\\theta^{(n,m)}_k\\) that are less than the simulated draw \\(\\theta^{\\textrm{sim}(n)}_k.\\)\nIf the algorithm generates posterior draws according to the posterior, the ranks should have uniform discrete distribution from \\(0\\) to \\(M\\), so that the ranks plus one are uniformly distributed from \\(1\\) to \\(M + 1\\), \\[\nr_{n, k} + 1\n\\sim\n\\textrm{categorical}\\! \\left(\\frac{1}{M + 1}, \\ldots, \\frac{1}{M + 1}\\right).\n\\] Simulation-based calibration checking uses this expected behavior to test the calibration of each parameter of a model on simulated data. Talts et al. (2018) suggest plotting binned counts of \\(r_{1:N,\nk}\\) for different parameters \\(k\\); Säilynoja, Bürkner, and Vehtari (2022) provide a graphical test for discrete uniformity testing. Before uniformity testing the Markov chains should be thinned to remove autocorrelation as these uniformity tests assume independence (Säilynoja, Bürkner, and Vehtari 2022).\n\n\n\nRunning simulation-based calibration checking in Stan will test whether Stan’s sampling algorithm can sample from the posterior associated with data generated according to the model. The data simulation and posterior fitting and rank calculation can all be done within a single Stan program. Then Stan’s posterior sampler has to be run multiple times. Each run produces a rank for each parameter being assessed for uniformity. The total set of ranks can then be tested for uniformity.\n\n\nFor illustration, a very simple model will suffice. Suppose there are two parameters \\((\\mu, \\sigma)\\) with independent priors, \\[\n\\mu \\sim \\textrm{normal}(0, 1),\n\\] and \\[\n\\sigma \\sim \\textrm{lognormal}(0, 1).\n\\] The data \\(y = y_1, \\ldots, y_N\\) is drawn conditionally independently given the parameters, \\[\ny_n \\sim \\textrm{normal}(\\mu, \\sigma).\n\\] The joint prior density is thus \\[\np(\\mu, \\sigma)\n= \\textrm{normal}(\\mu \\mid 0, 1)\n \\cdot \\textrm{lognormal}(\\sigma \\mid 0, 1),\n\\] and the data model is \\[\np(y \\mid \\mu, \\sigma)\n= \\prod_{n=1}^N \\textrm{normal}(y_n \\mid \\mu, \\sigma).\n\\]\nFor example, suppose the following two parameter values are drawn from the prior in the first simulation, \\[\n(\\mu^{\\textrm{sim(1)}}, \\sigma^{\\textrm{sim(1)}}) = (1.01, 0.23).\n\\] Then data \\(y^{\\textrm{sim}(1)} \\sim p(y \\mid \\mu^{\\textrm{sim(1)}},\n\\sigma^{\\textrm{sim(1)}})\\) is drawn according to the data model. Next, \\(M = 4\\) draws are taken from the posterior \\(\\mu^{(1,m)}, \\sigma^{(1,m)} \\sim p(\\mu, \\sigma \\mid y^{\\textrm{sim}(1)})\\), \\[\n\\begin{array}{r|rr}\nm & \\mu^{(1,m)} & \\sigma^{(1,m)}\n\\\\ \\hline\n1 & 1.07 & 0.33\n\\\\\n2 & -0.32 & 0.14\n\\\\\n3 & -0.99 & 0.26\n\\\\\n4 & 1.51 & 0.31\n\\end{array}\n\\] Then the comparisons on which ranks are based look as follows, \\[\n\\begin{array}{r|cc}\nm & \\textrm{I}(\\mu^{(1,m)} < \\mu^{\\textrm{sim}(1)})\n& \\textrm{I}(\\sigma^{(1,m)} < \\sigma^{\\textrm{sim}(1)})\n\\\\ \\hline\n1 & 0 & 0\n\\\\\n2 & 1 & 1\n\\\\\n3 & 1 & 0\n\\\\\n4 & 0 & 0\n\\end{array}\n\\] The ranks are the column sums, \\(r_{1,1} = 2\\) and \\(r_{1,2} = 1\\). Because the simulated parameters are distributed according to the posterior, these ranks should be distributed uniformly between \\(0\\) and \\(M\\), the number of posterior draws.\n\n\n\nTo code simulation-based calibration checking in a Stan program, the transformed data block can be used to simulate parameters and data from the model. The parameters, transformed parameters, and model block then define the model over the simulated data. Then, in the generated quantities block, the program records an indicator for whether each parameter is less than the simulated value. As shown above, the rank is then the sum of the simulated indicator variables.\ntransformed data {\n real mu_sim = normal_rng(0, 1);\n real<lower=0> sigma_sim = lognormal_rng(0, 1);\n int<lower=0> J = 10;\n vector[J] y_sim;\n for (j in 1:J) {\n y_sim[j] = normal_rng(mu_sim, sigma_sim);\n }\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n mu ~ normal(0, 1);\n sigma ~ lognormal(0, 1);\n y_sim ~ normal(mu, sigma);\n}\ngenerated quantities {\n array[2] int<lower=0, upper=1> lt_sim\n = { mu < mu_sim, sigma < sigma_sim };\n}\nTo avoid confusion with the number of simulated data sets used for simulation-based calibration checking, J is used for the number of simulated data points.\nThe model is implemented twice—once as a data generating process using random number generators in the transformed data block, then again in the parameters and model block. This duplication is a blessing and a curse. The curse is that it’s more work and twice the chance for errors. The blessing is that by implementing the model twice and comparing results, the chance of there being a mistake in the model is reduced.\n\n\n\nThe entire simulation-based calibration checking process is as follows, where\n\np(theta) is the prior density\np(y | theta) is the sampling density\nK is the number of parameters\nN is the total number of simulated data sets and fits\nM is the number of posterior draws per simulated data set\n\nSBC(p(theta), p(y | theta), K, N, M)\n------------------------------------\nfor (n in 1:N) {\n // simulate parameters and data\n theta(sim(n)) ~ p(theta)\n y(sim(n)) ~ p(y | theta(sim(n)))\n\n // posterior draws given simulated data\n for (m in 1:M) {\n theta(n, m) ~ p(theta | y(sim(n)))\n }\n // calculate rank of sim among posterior draws\n for (k in 1:K) {\n rank(n, k) = SUM_m I(theta[k](n,m) < theta[k](sim(n)))\n }\n}\n// test uniformity of each parameter\nfor (k in 1:K) {\n test uniformity of rank(1:N, k)\n}\n\n\n\nThe draws from the posterior are assumed to be roughly independent. If they are not, artifacts may arise in the uniformity tests due to correlation in the posterior draws (Säilynoja, Bürkner, and Vehtari 2022). Thus it is best to thin the posterior draws down to the point where the effective sample size is roughly the same as the number of thinned draws. This may require running the code a few times to judge the number of draws required to produce a target effective sample size. This operation that can be put into a loop that doubles the number of iterations until all parameters have an effective sample size of M, then thinning down to M draws.\n\n\n\n\nA simple, though not very highly powered, \\(\\chi^2\\)-squared test for uniformity can be formulated by binning the ranks \\(0:M\\) into \\(J\\) bins and testing that the bins all have roughly the expected number of draws in them. Many other tests for uniformity are possible. For example, Säilynoja, Bürkner, and Vehtari (2022) use binomial model pointiwise for the empirical cumlative distribution function and adjust to obtain simulatenous envelope to be used as graphical uniformity test.\nThe bins don’t need to be exactly the same size. In general, if \\(b_j\\) is the number of ranks that fall into bin \\(j\\) and \\(e_j\\) is the number of ranks expected to fall into bin \\(j\\) (which will be proportional to its size under uniformity), the test statistic is \\[\nX^2 = \\sum_{j = 1}^J \\frac{(b_j - e_j)^2}{e_j}.\n\\] The terms are approximately square standard normal, so that under the null hypothesis of uniformity, \\[\nX^2 \\sim \\textrm{chiSquared}(J - 1),\n\\] with the corresponding \\(p\\)-value given by the complementary cumulative distribution function (CCDF) of \\(\\textrm{chiSquared}(J - 1)\\) applied to \\(X^2\\). Because this test relies on the binomial being approximately normal, the traditional advice is to make sure the expected count in each bin is at least five, i.e., \\(e_j \\geq 5.\\)\n\n\nBecause there are \\(M + 1\\) possible ranks, with \\(J\\) bins, it is easiest to have \\(M + 1\\) be divisible by \\(J\\). For instance, if \\(J = 20\\) and \\(M = 999\\), then there are \\(1000\\) possible ranks and an expected count in each bin of \\(\\frac{M + 1}{J} = 50.\\)\nDistributing the ranks into bins is another fiddly operation that can be done with integer arithmetic or the floor operation. Using floor, the following function determines the bin for a rank, \\[\n\\textrm{bin}(r_{n, m}, M, J)\n= 1 + \\left\\lfloor \\frac{r_{n, m}}{(M + 1) / J} \\right\\rfloor.\n\\] For example, with \\(M = 999\\) and \\(J = 20\\), \\((M + 1) / J = 50\\). The lowest rank checks out, \\[\n\\textrm{bin}(0, 999, 20) = 1 + \\lfloor 0 / 50 \\rfloor = 1,\n\\] as does the 50th rank, \\[\n\\textrm{bin}(49, 999, 20) = 1 + \\lfloor 49 / 50 \\rfloor = 1,\n\\] and the 51st is appropriately put in the second bin, \\[\n\\textrm{bin}(50, 999, 20) = 1 + \\lfloor 50 / 50 \\rfloor = 2.\n\\] The highest rank also checks out, with \\(\\textrm{bin}(1000, 999, 20) = 50.\\)\nTo summarize, the following pseudocode computes the \\(b_j\\) values for the \\(\\chi^2\\) test or for visualization in a histogram.\nInputs: M draws, J bins, N parameters, ranks r[n, m]\nb[1:J] = 0\nfor (m in 1:M) {\n ++b[1 + floor(r[n, m] * J / (M + 1))]\n}\nwhere the ++b[n] notation is a common form of syntactic sugar for b[n] = b[n] + 1.\nIn general, a great deal of care must be taken in visualizing discrete data because it’s easy to introduce off-by-one errors and artifacts at the edges because of the way boundaries are computed by default. That’s why so much attention must be devoted to indexing and binning.\n\n\n\n\nThis section will show what the results look like when the tests pass and then when they fail. The passing test will compare a normal model and normal data generating process, whereas the second will compare a normal model with a Student-t data generating process. The first will produce calibrated posteriors, the second will not.\n\n\nConsider the following simple model for a normal distribution with standard normal and lognormal priors on the location and scale parameters. \\[\\begin{eqnarray*}\n\\mu & \\sim & \\textrm{normal}(0, 1)\n\\\\[4pt]\n\\sigma & \\sim & \\textrm{lognormal}(0, 1)\n\\\\[4pt]\ny_{1:10} & \\sim & \\textrm{normal}(\\mu, \\sigma).\n\\end{eqnarray*}\\] The Stan program for evaluating SBC for this model is\ntransformed data {\n real mu_sim = normal_rng(0, 1);\n real<lower=0> sigma_sim = lognormal_rng(0, 1);\n\n int<lower=0> J = 10;\n vector[J] y_sim;\n for (j in 1:J) {\n y_sim[j] = student_t_rng(4, mu_sim, sigma_sim);\n }\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n mu ~ normal(0, 1);\n sigma ~ lognormal(0, 1);\n\n y_sim ~ normal(mu, sigma);\n}\ngenerated quantities {\n array[2] int<lower=0, upper=1> I_lt_sim\n = { mu < mu_sim, sigma < sigma_sim };\n}\nAfter running this for enough iterations so that the effective sample size is larger than \\(M\\), then thinning to \\(M\\) draws (here \\(M = 999\\)), the ranks are computed and binned, and then plotted.\n\n\n\n\n\n\nFigure 1: Simulation based calibration plots for location and scale of a normal model with standard normal prior on the location, standard lognormal prior on the scale. Both histograms appear uniform, which is consistent with inference being well calibrated.\n\n\n\n\n\n\nNow consider using a Student-t data generating process with a normal model. Compare the apparent uniformity of the well specified model with the ill-specified situation with Student-t generative process and normal model.\n\n\n\n\n\n\nFigure 2: Simulation based calibration plots for location and scale of a normal model with standard normal prior on the location standard lognormal prior on the scale with mismatched generative model using a Student-t data model with 4 degrees of freedom. The mean histogram appears uniform, but the scale parameter shows simulated values much smaller than fit values, clearly signaling the lack of calibration.\n\n\n\n\n\n\nThe example in the previous sections show hard-coded pathological behavior. The usual application of SBC is to diagnose problems with a sampler.\nThis can happen in Stan with well-specified models if the posterior geometry is too difficult (usually due to extreme stiffness that varies). A simple example is the eight schools problem, the data for which consists of sample means \\(y_j\\) and standard deviations \\(\\sigma_j\\) of differences in test score after the same intervention in \\(J = 8\\) different schools. Rubin (1981) applies a hierarchical model for a meta-analysis of the results, estimating the mean intervention effect and a varying effect for each school. With a standard parameterization and weak priors, this model has very challenging posterior geometry, as shown by Talts et al. (2018); this section replicates their results.\nThe meta-analysis model has parameters for a population mean \\(\\mu\\) and standard deviation \\(\\tau > 0\\) as well as the effect \\(\\theta_j\\) of the treatment in each school. The model has weak normal and half-normal priors for the population-level parameters, \\[\\begin{eqnarray*}\n\\mu & \\sim & \\textrm{normal}(0, 5)\n\\\\[4pt]\n\\tau & \\sim & \\textrm{normal}_{+}(0, 5).\n\\end{eqnarray*}\\] School level effects are modeled as normal given the population parameters, \\[\n\\theta_j \\sim \\textrm{normal}(\\mu, \\tau).\n\\] The data is modeled as in a meta-analysis, given the school effect and sample standard deviation in the school, \\[\ny_j \\sim \\textrm{normal}(\\theta_j, \\sigma_j).\n\\]\nThis model can be coded in Stan with a data-generating process that simulates the parameters and then simulates data according to the parameters.\ntransformed data {\n real mu_sim = normal_rng(0, 5);\n real tau_sim = abs(normal_rng(0, 5));\n int<lower=0> J = 8;\n array[J] real theta_sim = normal_rng(rep_vector(mu_sim, J), tau_sim);\n array[J] real<lower=0> sigma = abs(normal_rng(rep_vector(0, J), 5));\n array[J] real y = normal_rng(theta_sim, sigma);\n}\nparameters {\n real mu;\n real<lower=0> tau;\n array[J] real theta;\n}\nmodel {\n tau ~ normal(0, 5);\n mu ~ normal(0, 5);\n theta ~ normal(mu, tau);\n y ~ normal(theta, sigma);\n}\ngenerated quantities {\n int<lower=0, upper=1> mu_lt_sim = mu < mu_sim;\n int<lower=0, upper=1> tau_lt_sim = tau < tau_sim;\n int<lower=0, upper=1> theta1_lt_sim = theta[1] < theta_sim[1];\n}\nAs usual for simulation-based calibration checking, the transformed data encodes the data-generating process using random number generators. Here, the population parameters \\(\\mu\\) and \\(\\tau\\) are first simulated, then the school-level effects \\(\\theta\\), and then finally the observed data \\(\\sigma_j\\) and \\(y_j.\\) The parameters and model are a direct encoding of the mathematical presentation using vectorized sampling statements. The generated quantities block includes indicators for parameter comparisons, saving only \\(\\theta_1\\) because the schools are exchangeable in the simulation.\nWhen fitting the model in Stan, multiple warning messages are provided that the sampler has diverged. The divergence warnings are in Stan’s sampler precisely to diagnose the sampler’s inability to follow the curvature in the posterior and provide independent confirmation that Stan’s sampler cannot fit this model as specified.\nSBC also diagnoses the problem. Here’s the rank plots for running \\(N =\n200\\) simulations with 1000 warmup iterations and \\(M = 999\\) draws per simulation used to compute the ranks.\n\n\n\n\n\n\n\n\n\n\n\n(a) \\(\\mu\\)\n\n\n\n\n\n\n\n\n\n\n\n(b) \\(\\tau\\)\n\n\n\n\n\n\n\n\n\n\n\n(c) \\({\\theta}_1\\)\n\n\n\n\n\n\n\nFigure 3: Simulation based calibration plots for the eight-schools model with centered parameterization in Stan. The geometry is too difficult for the NUTS sampler to handle, as indicated by the plot for \\(\\theta_1\\) (Figure 3 (c)).\n\n\n\nAlthough the population mean and standard deviation \\(\\mu\\) and \\(\\tau\\) appear well calibrated, \\(\\theta_1\\) tells a very different story. The simulated values are much smaller than the values fit from the data. This is because Stan’s no-U-turn sampler is unable to sample with the model formulated in the centered parameterization—the posterior geometry has regions of extremely high curvature as \\(\\tau\\) approaches zero and the \\(\\theta_j\\) become highly constrained. The chapter on reparameterization explains how to remedy this problem and fit this kind of hierarchical model with Stan.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Simulation-Based Calibration Checking" + ] + }, + { + "objectID": "stan-users-guide/simulation-based-calibration.html#bayes-is-calibrated-by-construction", + "href": "stan-users-guide/simulation-based-calibration.html#bayes-is-calibrated-by-construction", + "title": "Simulation-Based Calibration Checking", + "section": "", + "text": "Suppose a Bayesian model is given in the form of a prior density \\(p(\\theta)\\) and sampling density \\(p(y \\mid \\theta).\\) Now consider a process that first simulates parameters from the prior, \\[\n\\theta^{\\textrm{sim}} \\sim p(\\theta),\n\\] and then simulates data given the parameters, \\[\ny^{\\textrm{sim}} \\sim p(y \\mid \\theta^{\\textrm{sim}}).\n\\] By the definition of conditional densities, the simulated data and parameters constitute an independent draw from the model’s joint distribution, \\[\n(y^{\\textrm{sim}}, \\theta^{\\textrm{sim}}) \\sim p(y, \\theta).\n\\] From Bayes’s rule, it follows that for any observed (fixed) data \\(y\\), \\[\np(\\theta \\mid y) \\propto p(y, \\theta).\n\\] Therefore, the simulated parameters constitute a draw from the posterior for the simulated data, \\[\n\\theta^{\\textrm{sim}} \\sim p(\\theta \\mid y^{\\textrm{sim}}).\n\\] Now consider an algorithm that produces a sequence of draws from the posterior given this simulated data, \\[\n\\theta^{(1)}, \\ldots, \\theta^{(M)}\n\\sim p(\\theta \\mid y^{\\textrm{sim}}).\n\\] Because \\(\\theta^{\\textrm{sim}}\\) is also distributed as a draw from the posterior, the rank statistics of \\(\\theta^{\\textrm{sim}}\\) with respect to \\(\\theta^{(1)}, \\ldots \\theta^{(M)}\\) should be uniform.\nThis is one way to define calibration, because it follows that posterior intervals will have appropriate coverage (Dawid 1982; Gneiting, Balabdaoui, and Raftery 2007). If the rank of \\(\\theta^{\\textrm{sim}}\\) is uniform among the draws \\(\\theta^{(1)}, \\ldots, \\theta^{(M)},\\) then for any 90% interval selected, the probability the true value \\(\\theta^{\\textrm{sim}}\\) falls in it will also be 90%. The same goes for any other posterior interval.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Simulation-Based Calibration Checking" + ] + }, + { + "objectID": "stan-users-guide/simulation-based-calibration.html#simulation-based-calibration-checking-1", + "href": "stan-users-guide/simulation-based-calibration.html#simulation-based-calibration-checking-1", + "title": "Simulation-Based Calibration Checking", + "section": "", + "text": "Suppose the Bayesian model to test has joint density \\[\np(y, \\theta) = p(y \\mid \\theta) \\cdot p(\\theta),\n\\] with data \\(y\\) and parameters \\(\\theta\\) (both are typically multivariate). Simulation-based calibration checking works by generating \\(N\\) simulated parameter and data pairs according to the joint density, \\[\n(y^{\\textrm{sim}(1)}, \\theta^{\\textrm{sim}(1)}),\n\\ldots, (y^{\\textrm{sim}(N)}, \\theta^{\\textrm{sim}(N)}),\n\\sim p(y, \\theta).\n\\] For each simulated data set \\(y^{\\textrm{sim}(n)}\\), use the algorithm to be tested to generate \\(M\\) posterior draws, which if everything is working properly, will be distributed marginally as \\[\n\\theta^{(n, 1)}, \\ldots, \\theta^{(n, M)}\n\\sim p(\\theta \\mid y^{\\textrm{sim}(n)}).\n\\] For a simulation \\(n\\) and parameter \\(k\\), the rank of the simulated parameter among the posterior draws is \\[\\begin{eqnarray*}\nr_{n, k}\n& = &\n\\textrm{rank}(\\theta_k^{\\textrm{sim}(n)},\n (\\theta^{(n, 1)}, \\ldots, \\theta^{(n,M)}))\n\\\\[4pt]\n& = &\n\\sum_{m = 1}^M\n \\textrm{I}[\\theta_k^{(n,m)} < \\theta_k^{\\textrm{sim}(n)}].\n\\end{eqnarray*}\\] That is, the rank is the number of posterior draws \\(\\theta^{(n,m)}_k\\) that are less than the simulated draw \\(\\theta^{\\textrm{sim}(n)}_k.\\)\nIf the algorithm generates posterior draws according to the posterior, the ranks should have uniform discrete distribution from \\(0\\) to \\(M\\), so that the ranks plus one are uniformly distributed from \\(1\\) to \\(M + 1\\), \\[\nr_{n, k} + 1\n\\sim\n\\textrm{categorical}\\! \\left(\\frac{1}{M + 1}, \\ldots, \\frac{1}{M + 1}\\right).\n\\] Simulation-based calibration checking uses this expected behavior to test the calibration of each parameter of a model on simulated data. Talts et al. (2018) suggest plotting binned counts of \\(r_{1:N,\nk}\\) for different parameters \\(k\\); Säilynoja, Bürkner, and Vehtari (2022) provide a graphical test for discrete uniformity testing. Before uniformity testing the Markov chains should be thinned to remove autocorrelation as these uniformity tests assume independence (Säilynoja, Bürkner, and Vehtari 2022).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Simulation-Based Calibration Checking" + ] + }, + { + "objectID": "stan-users-guide/simulation-based-calibration.html#sbc-in-stan", + "href": "stan-users-guide/simulation-based-calibration.html#sbc-in-stan", + "title": "Simulation-Based Calibration Checking", + "section": "", + "text": "Running simulation-based calibration checking in Stan will test whether Stan’s sampling algorithm can sample from the posterior associated with data generated according to the model. The data simulation and posterior fitting and rank calculation can all be done within a single Stan program. Then Stan’s posterior sampler has to be run multiple times. Each run produces a rank for each parameter being assessed for uniformity. The total set of ranks can then be tested for uniformity.\n\n\nFor illustration, a very simple model will suffice. Suppose there are two parameters \\((\\mu, \\sigma)\\) with independent priors, \\[\n\\mu \\sim \\textrm{normal}(0, 1),\n\\] and \\[\n\\sigma \\sim \\textrm{lognormal}(0, 1).\n\\] The data \\(y = y_1, \\ldots, y_N\\) is drawn conditionally independently given the parameters, \\[\ny_n \\sim \\textrm{normal}(\\mu, \\sigma).\n\\] The joint prior density is thus \\[\np(\\mu, \\sigma)\n= \\textrm{normal}(\\mu \\mid 0, 1)\n \\cdot \\textrm{lognormal}(\\sigma \\mid 0, 1),\n\\] and the data model is \\[\np(y \\mid \\mu, \\sigma)\n= \\prod_{n=1}^N \\textrm{normal}(y_n \\mid \\mu, \\sigma).\n\\]\nFor example, suppose the following two parameter values are drawn from the prior in the first simulation, \\[\n(\\mu^{\\textrm{sim(1)}}, \\sigma^{\\textrm{sim(1)}}) = (1.01, 0.23).\n\\] Then data \\(y^{\\textrm{sim}(1)} \\sim p(y \\mid \\mu^{\\textrm{sim(1)}},\n\\sigma^{\\textrm{sim(1)}})\\) is drawn according to the data model. Next, \\(M = 4\\) draws are taken from the posterior \\(\\mu^{(1,m)}, \\sigma^{(1,m)} \\sim p(\\mu, \\sigma \\mid y^{\\textrm{sim}(1)})\\), \\[\n\\begin{array}{r|rr}\nm & \\mu^{(1,m)} & \\sigma^{(1,m)}\n\\\\ \\hline\n1 & 1.07 & 0.33\n\\\\\n2 & -0.32 & 0.14\n\\\\\n3 & -0.99 & 0.26\n\\\\\n4 & 1.51 & 0.31\n\\end{array}\n\\] Then the comparisons on which ranks are based look as follows, \\[\n\\begin{array}{r|cc}\nm & \\textrm{I}(\\mu^{(1,m)} < \\mu^{\\textrm{sim}(1)})\n& \\textrm{I}(\\sigma^{(1,m)} < \\sigma^{\\textrm{sim}(1)})\n\\\\ \\hline\n1 & 0 & 0\n\\\\\n2 & 1 & 1\n\\\\\n3 & 1 & 0\n\\\\\n4 & 0 & 0\n\\end{array}\n\\] The ranks are the column sums, \\(r_{1,1} = 2\\) and \\(r_{1,2} = 1\\). Because the simulated parameters are distributed according to the posterior, these ranks should be distributed uniformly between \\(0\\) and \\(M\\), the number of posterior draws.\n\n\n\nTo code simulation-based calibration checking in a Stan program, the transformed data block can be used to simulate parameters and data from the model. The parameters, transformed parameters, and model block then define the model over the simulated data. Then, in the generated quantities block, the program records an indicator for whether each parameter is less than the simulated value. As shown above, the rank is then the sum of the simulated indicator variables.\ntransformed data {\n real mu_sim = normal_rng(0, 1);\n real<lower=0> sigma_sim = lognormal_rng(0, 1);\n int<lower=0> J = 10;\n vector[J] y_sim;\n for (j in 1:J) {\n y_sim[j] = normal_rng(mu_sim, sigma_sim);\n }\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n mu ~ normal(0, 1);\n sigma ~ lognormal(0, 1);\n y_sim ~ normal(mu, sigma);\n}\ngenerated quantities {\n array[2] int<lower=0, upper=1> lt_sim\n = { mu < mu_sim, sigma < sigma_sim };\n}\nTo avoid confusion with the number of simulated data sets used for simulation-based calibration checking, J is used for the number of simulated data points.\nThe model is implemented twice—once as a data generating process using random number generators in the transformed data block, then again in the parameters and model block. This duplication is a blessing and a curse. The curse is that it’s more work and twice the chance for errors. The blessing is that by implementing the model twice and comparing results, the chance of there being a mistake in the model is reduced.\n\n\n\nThe entire simulation-based calibration checking process is as follows, where\n\np(theta) is the prior density\np(y | theta) is the sampling density\nK is the number of parameters\nN is the total number of simulated data sets and fits\nM is the number of posterior draws per simulated data set\n\nSBC(p(theta), p(y | theta), K, N, M)\n------------------------------------\nfor (n in 1:N) {\n // simulate parameters and data\n theta(sim(n)) ~ p(theta)\n y(sim(n)) ~ p(y | theta(sim(n)))\n\n // posterior draws given simulated data\n for (m in 1:M) {\n theta(n, m) ~ p(theta | y(sim(n)))\n }\n // calculate rank of sim among posterior draws\n for (k in 1:K) {\n rank(n, k) = SUM_m I(theta[k](n,m) < theta[k](sim(n)))\n }\n}\n// test uniformity of each parameter\nfor (k in 1:K) {\n test uniformity of rank(1:N, k)\n}\n\n\n\nThe draws from the posterior are assumed to be roughly independent. If they are not, artifacts may arise in the uniformity tests due to correlation in the posterior draws (Säilynoja, Bürkner, and Vehtari 2022). Thus it is best to thin the posterior draws down to the point where the effective sample size is roughly the same as the number of thinned draws. This may require running the code a few times to judge the number of draws required to produce a target effective sample size. This operation that can be put into a loop that doubles the number of iterations until all parameters have an effective sample size of M, then thinning down to M draws.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Simulation-Based Calibration Checking" + ] + }, + { + "objectID": "stan-users-guide/simulation-based-calibration.html#testing-uniformity", + "href": "stan-users-guide/simulation-based-calibration.html#testing-uniformity", + "title": "Simulation-Based Calibration Checking", + "section": "", + "text": "A simple, though not very highly powered, \\(\\chi^2\\)-squared test for uniformity can be formulated by binning the ranks \\(0:M\\) into \\(J\\) bins and testing that the bins all have roughly the expected number of draws in them. Many other tests for uniformity are possible. For example, Säilynoja, Bürkner, and Vehtari (2022) use binomial model pointiwise for the empirical cumlative distribution function and adjust to obtain simulatenous envelope to be used as graphical uniformity test.\nThe bins don’t need to be exactly the same size. In general, if \\(b_j\\) is the number of ranks that fall into bin \\(j\\) and \\(e_j\\) is the number of ranks expected to fall into bin \\(j\\) (which will be proportional to its size under uniformity), the test statistic is \\[\nX^2 = \\sum_{j = 1}^J \\frac{(b_j - e_j)^2}{e_j}.\n\\] The terms are approximately square standard normal, so that under the null hypothesis of uniformity, \\[\nX^2 \\sim \\textrm{chiSquared}(J - 1),\n\\] with the corresponding \\(p\\)-value given by the complementary cumulative distribution function (CCDF) of \\(\\textrm{chiSquared}(J - 1)\\) applied to \\(X^2\\). Because this test relies on the binomial being approximately normal, the traditional advice is to make sure the expected count in each bin is at least five, i.e., \\(e_j \\geq 5.\\)\n\n\nBecause there are \\(M + 1\\) possible ranks, with \\(J\\) bins, it is easiest to have \\(M + 1\\) be divisible by \\(J\\). For instance, if \\(J = 20\\) and \\(M = 999\\), then there are \\(1000\\) possible ranks and an expected count in each bin of \\(\\frac{M + 1}{J} = 50.\\)\nDistributing the ranks into bins is another fiddly operation that can be done with integer arithmetic or the floor operation. Using floor, the following function determines the bin for a rank, \\[\n\\textrm{bin}(r_{n, m}, M, J)\n= 1 + \\left\\lfloor \\frac{r_{n, m}}{(M + 1) / J} \\right\\rfloor.\n\\] For example, with \\(M = 999\\) and \\(J = 20\\), \\((M + 1) / J = 50\\). The lowest rank checks out, \\[\n\\textrm{bin}(0, 999, 20) = 1 + \\lfloor 0 / 50 \\rfloor = 1,\n\\] as does the 50th rank, \\[\n\\textrm{bin}(49, 999, 20) = 1 + \\lfloor 49 / 50 \\rfloor = 1,\n\\] and the 51st is appropriately put in the second bin, \\[\n\\textrm{bin}(50, 999, 20) = 1 + \\lfloor 50 / 50 \\rfloor = 2.\n\\] The highest rank also checks out, with \\(\\textrm{bin}(1000, 999, 20) = 50.\\)\nTo summarize, the following pseudocode computes the \\(b_j\\) values for the \\(\\chi^2\\) test or for visualization in a histogram.\nInputs: M draws, J bins, N parameters, ranks r[n, m]\nb[1:J] = 0\nfor (m in 1:M) {\n ++b[1 + floor(r[n, m] * J / (M + 1))]\n}\nwhere the ++b[n] notation is a common form of syntactic sugar for b[n] = b[n] + 1.\nIn general, a great deal of care must be taken in visualizing discrete data because it’s easy to introduce off-by-one errors and artifacts at the edges because of the way boundaries are computed by default. That’s why so much attention must be devoted to indexing and binning.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Simulation-Based Calibration Checking" + ] + }, + { + "objectID": "stan-users-guide/simulation-based-calibration.html#examples-of-simulation-based-calibration-checking", + "href": "stan-users-guide/simulation-based-calibration.html#examples-of-simulation-based-calibration-checking", + "title": "Simulation-Based Calibration Checking", + "section": "", + "text": "This section will show what the results look like when the tests pass and then when they fail. The passing test will compare a normal model and normal data generating process, whereas the second will compare a normal model with a Student-t data generating process. The first will produce calibrated posteriors, the second will not.\n\n\nConsider the following simple model for a normal distribution with standard normal and lognormal priors on the location and scale parameters. \\[\\begin{eqnarray*}\n\\mu & \\sim & \\textrm{normal}(0, 1)\n\\\\[4pt]\n\\sigma & \\sim & \\textrm{lognormal}(0, 1)\n\\\\[4pt]\ny_{1:10} & \\sim & \\textrm{normal}(\\mu, \\sigma).\n\\end{eqnarray*}\\] The Stan program for evaluating SBC for this model is\ntransformed data {\n real mu_sim = normal_rng(0, 1);\n real<lower=0> sigma_sim = lognormal_rng(0, 1);\n\n int<lower=0> J = 10;\n vector[J] y_sim;\n for (j in 1:J) {\n y_sim[j] = student_t_rng(4, mu_sim, sigma_sim);\n }\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n mu ~ normal(0, 1);\n sigma ~ lognormal(0, 1);\n\n y_sim ~ normal(mu, sigma);\n}\ngenerated quantities {\n array[2] int<lower=0, upper=1> I_lt_sim\n = { mu < mu_sim, sigma < sigma_sim };\n}\nAfter running this for enough iterations so that the effective sample size is larger than \\(M\\), then thinning to \\(M\\) draws (here \\(M = 999\\)), the ranks are computed and binned, and then plotted.\n\n\n\n\n\n\nFigure 1: Simulation based calibration plots for location and scale of a normal model with standard normal prior on the location, standard lognormal prior on the scale. Both histograms appear uniform, which is consistent with inference being well calibrated.\n\n\n\n\n\n\nNow consider using a Student-t data generating process with a normal model. Compare the apparent uniformity of the well specified model with the ill-specified situation with Student-t generative process and normal model.\n\n\n\n\n\n\nFigure 2: Simulation based calibration plots for location and scale of a normal model with standard normal prior on the location standard lognormal prior on the scale with mismatched generative model using a Student-t data model with 4 degrees of freedom. The mean histogram appears uniform, but the scale parameter shows simulated values much smaller than fit values, clearly signaling the lack of calibration.\n\n\n\n\n\n\nThe example in the previous sections show hard-coded pathological behavior. The usual application of SBC is to diagnose problems with a sampler.\nThis can happen in Stan with well-specified models if the posterior geometry is too difficult (usually due to extreme stiffness that varies). A simple example is the eight schools problem, the data for which consists of sample means \\(y_j\\) and standard deviations \\(\\sigma_j\\) of differences in test score after the same intervention in \\(J = 8\\) different schools. Rubin (1981) applies a hierarchical model for a meta-analysis of the results, estimating the mean intervention effect and a varying effect for each school. With a standard parameterization and weak priors, this model has very challenging posterior geometry, as shown by Talts et al. (2018); this section replicates their results.\nThe meta-analysis model has parameters for a population mean \\(\\mu\\) and standard deviation \\(\\tau > 0\\) as well as the effect \\(\\theta_j\\) of the treatment in each school. The model has weak normal and half-normal priors for the population-level parameters, \\[\\begin{eqnarray*}\n\\mu & \\sim & \\textrm{normal}(0, 5)\n\\\\[4pt]\n\\tau & \\sim & \\textrm{normal}_{+}(0, 5).\n\\end{eqnarray*}\\] School level effects are modeled as normal given the population parameters, \\[\n\\theta_j \\sim \\textrm{normal}(\\mu, \\tau).\n\\] The data is modeled as in a meta-analysis, given the school effect and sample standard deviation in the school, \\[\ny_j \\sim \\textrm{normal}(\\theta_j, \\sigma_j).\n\\]\nThis model can be coded in Stan with a data-generating process that simulates the parameters and then simulates data according to the parameters.\ntransformed data {\n real mu_sim = normal_rng(0, 5);\n real tau_sim = abs(normal_rng(0, 5));\n int<lower=0> J = 8;\n array[J] real theta_sim = normal_rng(rep_vector(mu_sim, J), tau_sim);\n array[J] real<lower=0> sigma = abs(normal_rng(rep_vector(0, J), 5));\n array[J] real y = normal_rng(theta_sim, sigma);\n}\nparameters {\n real mu;\n real<lower=0> tau;\n array[J] real theta;\n}\nmodel {\n tau ~ normal(0, 5);\n mu ~ normal(0, 5);\n theta ~ normal(mu, tau);\n y ~ normal(theta, sigma);\n}\ngenerated quantities {\n int<lower=0, upper=1> mu_lt_sim = mu < mu_sim;\n int<lower=0, upper=1> tau_lt_sim = tau < tau_sim;\n int<lower=0, upper=1> theta1_lt_sim = theta[1] < theta_sim[1];\n}\nAs usual for simulation-based calibration checking, the transformed data encodes the data-generating process using random number generators. Here, the population parameters \\(\\mu\\) and \\(\\tau\\) are first simulated, then the school-level effects \\(\\theta\\), and then finally the observed data \\(\\sigma_j\\) and \\(y_j.\\) The parameters and model are a direct encoding of the mathematical presentation using vectorized sampling statements. The generated quantities block includes indicators for parameter comparisons, saving only \\(\\theta_1\\) because the schools are exchangeable in the simulation.\nWhen fitting the model in Stan, multiple warning messages are provided that the sampler has diverged. The divergence warnings are in Stan’s sampler precisely to diagnose the sampler’s inability to follow the curvature in the posterior and provide independent confirmation that Stan’s sampler cannot fit this model as specified.\nSBC also diagnoses the problem. Here’s the rank plots for running \\(N =\n200\\) simulations with 1000 warmup iterations and \\(M = 999\\) draws per simulation used to compute the ranks.\n\n\n\n\n\n\n\n\n\n\n\n(a) \\(\\mu\\)\n\n\n\n\n\n\n\n\n\n\n\n(b) \\(\\tau\\)\n\n\n\n\n\n\n\n\n\n\n\n(c) \\({\\theta}_1\\)\n\n\n\n\n\n\n\nFigure 3: Simulation based calibration plots for the eight-schools model with centered parameterization in Stan. The geometry is too difficult for the NUTS sampler to handle, as indicated by the plot for \\(\\theta_1\\) (Figure 3 (c)).\n\n\n\nAlthough the population mean and standard deviation \\(\\mu\\) and \\(\\tau\\) appear well calibrated, \\(\\theta_1\\) tells a very different story. The simulated values are much smaller than the values fit from the data. This is because Stan’s no-U-turn sampler is unable to sample with the model formulated in the centered parameterization—the posterior geometry has regions of extremely high curvature as \\(\\tau\\) approaches zero and the \\(\\theta_j\\) become highly constrained. The chapter on reparameterization explains how to remedy this problem and fit this kind of hierarchical model with Stan.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Simulation-Based Calibration Checking" + ] + }, + { + "objectID": "stan-users-guide/regression.html", + "href": "stan-users-guide/regression.html", + "title": "Regression Models", + "section": "", + "text": "Stan supports regression models from simple linear regressions to multilevel generalized linear models.\n\n\nThe simplest linear regression model is the following, with a single predictor and a slope and intercept coefficient, and normally distributed noise. This model can be written using standard regression notation as \\[\ny_n = \\alpha + \\beta x_n + \\epsilon_n\n\\quad\\text{where}\\quad\n\\epsilon_n \\sim \\operatorname{normal}(0,\\sigma).\n\\]\nThis is equivalent to the following sampling involving the residual, \\[\ny_n - (\\alpha + \\beta X_n) \\sim \\operatorname{normal}(0,\\sigma),\n\\] and reducing still further, to \\[\ny_n \\sim \\operatorname{normal}(\\alpha + \\beta X_n, \\, \\sigma).\n\\]\nThis latter form of the model is coded in Stan as follows.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n}\nThere are N observations and for each observation, \\(n \\in N\\), we have predictor x[n] and outcome y[n]. The intercept and slope parameters are alpha and beta. The model assumes a normally distributed noise term with scale sigma. This model has improper priors for the two regression coefficients.\n\n\nThe distribution statement in the previous model is vectorized, with\ny ~ normal(alpha + beta * x, sigma);\nproviding the same model as the unvectorized version,\nfor (n in 1:N) {\n y[n] ~ normal(alpha + beta * x[n], sigma);\n}\nIn addition to being more concise, the vectorized form is much faster.1\nIn general, Stan allows the arguments to distributions such as normal to be vectors. If any of the other arguments are vectors or arrays, they have to be the same size. If any of the other arguments is a scalar, it is reused for each vector entry.\nThe other reason this works is that Stan’s arithmetic operators are overloaded to perform matrix arithmetic on matrices. In this case, because x is of type vector and beta of type real, the expression beta * x is of type vector. Because Stan supports vectorization, a regression model with more than one predictor can be written directly using matrix notation.\ndata {\n int<lower=0> N; // number of data items\n int<lower=0> K; // number of predictors\n matrix[N, K] x; // predictor matrix\n vector[N] y; // outcome vector\n}\nparameters {\n real alpha; // intercept\n vector[K] beta; // coefficients for predictors\n real<lower=0> sigma; // error scale\n}\nmodel {\n y ~ normal(x * beta + alpha, sigma); // data model\n}\nThe constraint lower=0 in the declaration of sigma constrains the value to be greater than or equal to 0. With no prior in the model block, the effect is an improper prior on non-negative real numbers. Although a more informative prior may be added, improper priors are acceptable as long as they lead to proper posteriors.\nIn the model above, x is an \\(N \\times K\\) matrix of predictors and beta a \\(K\\)-vector of coefficients, so x * beta is an \\(N\\)-vector of predictions, one for each of the \\(N\\) data items. These predictions line up with the outcomes in the \\(N\\)-vector y, so the entire model may be written using matrix arithmetic as shown. It would be possible to include a column of ones in the data matrix x to remove the alpha parameter.\nThe distribution statement in the model above is just a more efficient, vector-based approach to coding the model with a loop, as in the following statistically equivalent model.\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(x[n] * beta, sigma);\n }\n}\nWith Stan’s matrix indexing scheme, x[n] picks out row n of the matrix x; because beta is a column vector, the product x[n] * beta is a scalar of type real.\n\n\nIn the model formulation\ny ~ normal(x * beta, sigma);\nthere is no longer an intercept coefficient alpha. Instead, we have assumed that the first column of the input matrix x is a column of 1 values. This way, beta[1] plays the role of the intercept. If the intercept gets a different prior than the slope terms, then it would be clearer to break it out. It is also slightly more efficient in its explicit form with the intercept variable singled out because there’s one fewer multiplications; it should not make that much of a difference to speed, though, so the choice should be based on clarity.\n\n\n\n\n\nIn the previous example, the linear predictor can be written as \\(\\eta\n= x \\beta\\), where \\(\\eta\\) is a \\(N\\)-vector of predictions, \\(x\\) is a \\(N\n\\times K\\) matrix, and \\(\\beta\\) is a \\(K\\)-vector of coefficients. Presuming \\(N \\geq K\\), we can exploit the fact that any design matrix \\(x\\) can be decomposed using the thin QR decomposition into an orthogonal matrix \\(Q\\) and an upper-triangular matrix \\(R\\), i.e. \\(x = Q\nR\\).\nThe functions qr_thin_Q and qr_thin_R implement the thin QR decomposition, which is to be preferred to the fat QR decomposition that would be obtained by using qr_Q and qr_R, as the latter would more easily run out of memory (see the Stan Functions Reference for more information on the qr_thin_Q and qr_thin_R functions). In practice, it is best to write \\(x = Q^\\ast\nR^\\ast\\) where \\(Q^\\ast = Q * \\sqrt{n - 1}\\) and \\(R^\\ast =\n\\frac{1}{\\sqrt{n - 1}} R\\). Thus, we can equivalently write \\(\\eta = x\n\\beta = Q R \\beta = Q^\\ast R^\\ast \\beta\\). If we let \\(\\theta = R^\\ast\n\\beta\\), then we have \\(\\eta = Q^\\ast \\theta\\) and \\(\\beta = R^{\\ast^{-1}}\n\\theta\\). In that case, the previous Stan program becomes\ndata {\n int<lower=0> N; // number of data items\n int<lower=0> K; // number of predictors\n matrix[N, K] x; // predictor matrix\n vector[N] y; // outcome vector\n}\ntransformed data {\n matrix[N, K] Q_ast;\n matrix[K, K] R_ast;\n matrix[K, K] R_ast_inverse;\n // thin and scale the QR decomposition\n Q_ast = qr_thin_Q(x) * sqrt(N - 1);\n R_ast = qr_thin_R(x) / sqrt(N - 1);\n R_ast_inverse = inverse(R_ast);\n}\nparameters {\n real alpha; // intercept\n vector[K] theta; // coefficients on Q_ast\n real<lower=0> sigma; // error scale\n}\nmodel {\n y ~ normal(Q_ast * theta + alpha, sigma); // data model\n}\ngenerated quantities {\n vector[K] beta;\n beta = R_ast_inverse * theta; // coefficients on x\n}\nSince this Stan program generates equivalent predictions for \\(y\\) and the same posterior distribution for \\(\\alpha\\), \\(\\beta\\), and \\(\\sigma\\) as the previous Stan program, many wonder why the version with this QR reparameterization performs so much better in practice, often both in terms of wall time and in terms of effective sample size. The reasoning is threefold:\n\nThe columns of \\(Q^\\ast\\) are orthogonal whereas the columns of \\(x\\) generally are not. Thus, it is easier for a Markov Chain to move around in \\(\\theta\\)-space than in \\(\\beta\\)-space.\nThe columns of \\(Q^\\ast\\) have the same scale whereas the columns of \\(x\\) generally do not. Thus, a Hamiltonian Monte Carlo algorithm can move around the parameter space with a smaller number of larger steps\nSince the covariance matrix for the columns of \\(Q^\\ast\\) is an identity matrix, \\(\\theta\\) typically has a reasonable scale if the units of \\(y\\) are also reasonable. This also helps HMC move efficiently without compromising numerical accuracy.\n\nConsequently, this QR reparameterization is recommended for linear and generalized linear models in Stan whenever \\(K > 1\\) and you do not have an informative prior on the location of \\(\\beta\\). It can also be worthwhile to subtract the mean from each column of \\(x\\) before obtaining the QR decomposition, which does not affect the posterior distribution of \\(\\theta\\) or \\(\\beta\\) but does affect \\(\\alpha\\) and allows you to interpret \\(\\alpha\\) as the expectation of \\(y\\) in a linear model.\n\n\n\nSee our general discussion of priors for tips on priors for parameters in regression models.\nLater sections discuss univariate hierarchical priors and multivariate hierarchical priors, as well as priors used to identify models.\nHowever, as described in QR-reparameterization section, if you do not have an informative prior on the location of the regression coefficients, then you are better off reparameterizing your model so that the regression coefficients are a generated quantity. In that case, it usually does not matter much what prior is used on on the reparameterized regression coefficients and almost any weakly informative prior that scales with the outcome will do.\n\n\n\nThe standard approach to linear regression is to model the noise term \\(\\epsilon\\) as having a normal distribution. From Stan’s perspective, there is nothing special about normally distributed noise. For instance, robust regression can be accommodated by giving the noise term a Student-\\(t\\) distribution. To code this in Stan, the distribution distribution is changed to the following.\ndata {\n // ...\n real<lower=0> nu;\n}\n// ...\nmodel {\n y ~ student_t(nu, alpha + beta * x, sigma);\n}\nThe degrees of freedom constant nu is specified as data.\n\n\n\nFor binary outcomes, either of the closely related logistic or probit regression models may be used. These generalized linear models vary only in the link function they use to map linear predictions in \\((-\\infty,\\infty)\\) to probability values in \\((0,1)\\). Their respective link functions, the logistic function and the standard normal cumulative distribution function, are both sigmoid functions (i.e., they are both S-shaped).\nA logistic regression model with one predictor and an intercept is coded as follows.\ndata {\n int<lower=0> N;\n vector[N] x;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n real alpha;\n real beta;\n}\nmodel {\n y ~ bernoulli_logit(alpha + beta * x);\n}\nThe noise parameter is built into the Bernoulli formulation here rather than specified directly.\nLogistic regression is a kind of generalized linear model with binary outcomes and the log odds (logit) link function, defined by \\[\n\\operatorname{logit}(v) = \\log \\left( \\frac{v}{1-v} \\right).\n\\]\nThe inverse of the link function appears in the model: \\[\n\\operatorname{logit}^{-1}(u) = \\texttt{inv}\\mathtt{\\_}\\texttt{logit}(u) = \\frac{1}{1 + \\exp(-u)}.\n\\]\nThe model formulation above uses the logit-parameterized version of the Bernoulli distribution, which is defined by \\[\n\\texttt{bernoulli}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha \\right)\n=\n\\texttt{bernoulli}\\left(y \\mid \\operatorname{logit}^{-1}(\\alpha)\\right).\n\\]\nThe formulation is also vectorized in the sense that alpha and beta are scalars and x is a vector, so that alpha + beta * x is a vector. The vectorized formulation is equivalent to the less efficient version\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(alpha + beta * x[n]);\n}\nExpanding out the Bernoulli logit, the model is equivalent to the more explicit, but less efficient and less arithmetically stable\nfor (n in 1:N) {\n y[n] ~ bernoulli(inv_logit(alpha + beta * x[n]));\n}\nOther link functions may be used in the same way. For example, probit regression uses the cumulative normal distribution function, which is typically written as\n\\[\n\\Phi(x) = \\int_{-\\infty}^x \\textsf{normal}\\left(y \\mid 0,1 \\right) \\,\\textrm{d}y.\n\\]\nThe cumulative standard normal distribution function \\(\\Phi\\) is implemented in Stan as the function Phi. The probit regression model may be coded in Stan by replacing the logistic model’s distribution statement with the following.\ny[n] ~ bernoulli(Phi(alpha + beta * x[n]));\nA fast approximation to the cumulative standard normal distribution function \\(\\Phi\\) is implemented in Stan as the function Phi_approx.2 The approximate probit regression model may be coded with the following.\ny[n] ~ bernoulli(Phi_approx(alpha + beta * x[n]));\n\n\n\nMultiple outcome forms of logistic regression can be coded directly in Stan. For instance, suppose there are \\(K\\) possible outcomes for each output variable \\(y_n\\). Also suppose that there is a \\(D\\)-dimensional vector \\(x_n\\) of predictors for \\(y_n\\). The multi-logit model with \\(\\textsf{normal}(0,5)\\) priors on the coefficients is coded as follows.\ndata {\n int K;\n int N;\n int D;\n array[N] int y;\n matrix[N, D] x;\n}\nparameters {\n matrix[D, K] beta;\n}\nmodel {\n matrix[N, K] x_beta = x * beta;\n\n to_vector(beta) ~ normal(0, 5);\n\n for (n in 1:N) {\n y[n] ~ categorical_logit(x_beta[n]');\n\n }\n}\nwhere x_beta[n]' is the transpose of x_beta[n]. The prior on beta is coded in vectorized form. As of Stan 2.18, the categorical-logit distribution is not vectorized for parameter arguments, so the loop is required. The matrix multiplication is pulled out to define a local variable for all of the predictors for efficiency. Like the Bernoulli-logit, the categorical-logit distribution applies softmax internally to convert an arbitrary vector to a simplex, \\[\n\\texttt{categorical}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha\\right)\n=\n\\texttt{categorical}\\left(y \\mid \\texttt{softmax}(\\alpha)\\right),\n\\] where \\[\n\\texttt{softmax}(u) = \\exp(u) / \\operatorname{sum}\\left(\\exp(u)\\right).\n\\]\nThe categorical distribution with log-odds (logit) scaled parameters used above is equivalent to writing\ny[n] ~ categorical(softmax(x[n] * beta));\n\n\nThe data block in the above model is defined without constraints on sizes K, N, and D or on the outcome array y. Constraints on data declarations provide error checking at the point data are read (or transformed data are defined), which is before sampling begins. Constraints on data declarations also make the model author’s intentions more explicit, which can help with readability. The above model’s declarations could be tightened to\nint<lower=2> K;\nint<lower=0> N;\nint<lower=1> D;\narray[N] int<lower=1, upper=K> y;\nThese constraints arise because the number of categories, K, must be at least two in order for a categorical model to be useful. The number of data items, N, can be zero, but not negative; unlike R, Stan’s for-loops always move forward, so that a loop extent of 1:N when N is equal to zero ensures the loop’s body will not be executed. The number of predictors, D, must be at least one in order for beta * x[n] to produce an appropriate argument for softmax(). The categorical outcomes y[n] must be between 1 and K in order for the discrete sampling to be well defined.\nConstraints on data declarations are optional. Constraints on parameters declared in the parameters block, on the other hand, are not optional—they are required to ensure support for all parameter values satisfying their constraints. Constraints on transformed data, transformed parameters, and generated quantities are also optional.\n\n\n\nBecause softmax is invariant under adding a constant to each component of its input, the model is typically only identified if there is a suitable prior on the coefficients.\nAn alternative is to use \\((K-1)\\)-vectors by fixing one of them to be zero. The partially known parameters section discusses how to mix constants and parameters in a vector. In the multi-logit case, the parameter block would be redefined to use \\((K - 1)\\)-vectors\nparameters {\n matrix[D, K - 1] beta_raw;\n}\nand then these are transformed to parameters to use in the model. First, a transformed data block is added before the parameters block to define a vector of zero values,\ntransformed data {\n vector[D] zeros = rep_vector(0, D);\n}\nwhich can then be appended to beta_raw to produce the coefficient matrix beta,\ntransformed parameters {\n matrix[D, K] beta = append_col(beta_raw, zeros);\n}\nThe rep_vector(0, D) call creates a column vector of size D with all entries set to zero. The derived matrix beta is then defined to be the result of appending the vector zeros as a new column at the end of beta_raw; the vector zeros is defined as transformed data so that it doesn’t need to be constructed from scratch each time it is used.\nThis is not the same model as using \\(K\\)-vectors as parameters, because now the prior only applies to \\((K-1)\\)-vectors. In practice, this will cause the maximum likelihood solutions to be different and also the posteriors to be slightly different when taking priors centered around zero, as is typical for regression coefficients.\n\n\n\n\nWhen there are varying effects in a regression, the resulting likelihood is not identified unless further steps are taken. For example, we might have a global intercept \\(\\alpha\\) and then a varying effect \\(\\beta_k\\) for age group \\(k\\) to make a linear predictor \\(\\alpha +\n\\beta_k\\). With this predictor, we can add a constant to \\(\\alpha\\) and subtract from each \\(\\beta_k\\) and get exactly the same likelihood.\nThe traditional approach to identifying such a model is to pin the first varying effect to zero, i.e., \\(\\beta_1 = 0\\). With one of the varying effects fixed, you can no longer add a constant to all of them and the model’s likelihood is identified. In addition to the difficulty in specifying such a model in Stan, it is awkward to formulate priors because the other coefficients are all interpreted relative to \\(\\beta_1\\).\nIn a Bayesian setting, a proper prior on each of the \\(\\beta\\) is enough to identify the model. Unfortunately, this can lead to inefficiency during sampling as the model is still only weakly identified through the prior—there is a very simple example of the difference in the discussion of collinearity in the collinearity section.\nAn alternative identification strategy that allows a symmetric prior is to enforce a sum-to-zero constraint on the varying effects, i.e., \\(\\sum_{k=1}^K \\beta_k = 0.\\)\nA parameter vector constrained to sum to zero may also be used to identify a multi-logit regression parameter vector (see the multi-logit section for details), or may be used for ability or difficulty parameters (but not both) in an IRT model (see the item-response model section for details).\n\n\nAs of Stan 2.36, there is a built in sum_to_zero_vector type, which can be used as follows.\nparameters {\n sum_to_zero_vector[K] beta;\n // ...\n}\nThis produces a vector of size K such that sum(beta) = 0. In the unconstrained representation requires only K - 1 values because the last is determined by the first K - 1.\nPlacing a prior on beta in this parameterization, for example,\n beta ~ normal(0, 1);\nleads to a subtly different posterior than what you would get with the same prior on an unconstrained size-K vector. As explained below, the variance is reduced.\nThe sum-to-zero constraint can be implemented naively by setting the last element to the negative sum of the first elements, i.e., \\(\\beta_K\n= -\\sum_{k=1}^{K-1} \\beta_k.\\) But that leads to high correlation among the \\(\\beta_k\\).\nThe transform used in Stan eliminates these correlations by constructing an orthogonal basis and applying it to the zero-sum-constraint; Seyboldt (2024) provides an explanation. The Stan Reference Manual provides the details in the chapter on transforms. Although any orthogonal basis can be used, Stan uses the inverse isometric log transform because it is convenient to describe and the transform simplifies to efficient scalar operations rather than more expensive matrix operations.\n\n\nOn the Stan forums, Aaron Goodman provided the following code to produce a prior with standard normal marginals on the components of beta,\nmodel {\n beta ~ normal(0, inv(sqrt(1 - inv(K))));\n // ...\n}\nThe scale component can be multiplied by sigma to produce a normal(0, sigma) prior marginally.\nTo generate distributions with marginals other than standard normal, the resulting beta may be scaled by some factor sigma and translated to some new location mu.\n\n\n\n\nAdding a prior such as \\(\\beta \\sim \\textsf{normal}(0,\\epsilon)\\) for a small \\(\\epsilon\\) will provide a kind of soft centering of a parameter vector \\(\\beta\\) by preferring, all else being equal, that \\(\\sum_{k=1}^K\n\\beta_k = 0\\). This approach is only guaranteed to roughly center if \\(\\beta\\) and the elementwise addition \\(\\beta + c\\) for a scalar constant \\(c\\) produce the same likelihood (perhaps by another vector \\(\\alpha\\) being transformed to \\(\\alpha - c\\), as in the IRT models). This is another way of achieving a symmetric prior, though it requires choosing an \\(\\epsilon\\). If \\(\\epsilon\\) is too large, there won’t be a strong enough centering effect and if it is too small, it will add high curvature to the target density and impede sampling.\n\n\n\n\nOrdered regression for an outcome \\(y_n \\in \\{ 1, \\dotsc, k \\}\\) with predictors \\(x_n \\in \\mathbb{R}^D\\) is determined by a single coefficient vector \\(\\beta \\in \\mathbb{R}^D\\) along with a sequence of cutpoints \\(c \\in\n\\mathbb{R}^{K-1}\\) sorted so that \\(c_d < c_{d+1}\\). The discrete output is \\(k\\) if the linear predictor \\(x_n \\beta\\) falls between \\(c_{k-1}\\) and \\(c_k\\), assuming \\(c_0 = -\\infty\\) and \\(c_K = \\infty\\). The noise term is fixed by the form of regression, with examples for ordered logistic and ordered probit models.\n\n\nThe ordered logistic model can be coded in Stan using the ordered data type for the cutpoints and the built-in ordered_logistic distribution.\ndata {\n int<lower=2> K;\n int<lower=0> N;\n int<lower=1> D;\n array[N] int<lower=1, upper=K> y;\n matrix[N, D] x;\n}\nparameters {\n vector[D] beta;\n ordered[K - 1] c;\n}\nmodel {\n y ~ ordered_logistic(x * beta, c);\n}\nThe vector of cutpoints c is declared as ordered[K - 1], which guarantees that c[k] is less than c[k + 1].\nIf the cutpoints were assigned independent priors, the constraint effectively truncates the joint prior to support over points that satisfy the ordering constraint. Luckily, Stan does not need to compute the effect of the constraint on the normalizing term because the probability is needed only up to a proportion.\nThe equivalent model can be written using ordered_logistic_glm distribution, which can provide more efficient computation in case of higher dimensional beta.\n y ~ ordered_logistic_glm(x, beta, c);\n\n\nAn ordered probit model can be coded in exactly the same way by using the built-in ordered_probit distribution.\nmodel {\n ordered_probit(x * beta, c);\n}\nThere is not yet an ordered_probit_glm distribution in Stan.\n\n\n\n\n\nThe simplest multilevel model is a hierarchical model in which the data are grouped into \\(L\\) distinct categories (or levels). An extreme approach would be to completely pool all the data and estimate a common vector of regression coefficients \\(\\beta\\). At the other extreme, an approach with no pooling assigns each level \\(l\\) its own coefficient vector \\(\\beta_l\\) that is estimated separately from the other levels. A hierarchical model is an intermediate solution where the degree of pooling is determined by the data and a prior on the amount of pooling.\nSuppose each binary outcome \\(y_n \\in \\{ 0, 1 \\}\\) has an associated level, \\(ll_n \\in \\{ 1, \\dotsc, L \\}\\). Each outcome will also have an associated predictor vector \\(x_n \\in \\mathbb{R}^D\\). Each level \\(l\\) gets its own coefficient vector \\(\\beta_l \\in \\mathbb{R}^D\\). The hierarchical structure involves drawing the coefficients \\(\\beta_{l,d}\n\\in \\mathbb{R}\\) from a prior that is also estimated with the data. This hierarchically estimated prior determines the amount of pooling. If the data in each level are similar, strong pooling will be reflected in low hierarchical variance. If the data in the levels are dissimilar, weaker pooling will be reflected in higher hierarchical variance.\nThe following model encodes a hierarchical logistic regression model with a hierarchical prior on the regression coefficients.\ndata {\n int<lower=1> D;\n int<lower=0> N;\n int<lower=1> L;\n array[N] int<lower=0, upper=1> y;\n array[N] int<lower=1, upper=L> ll;\n array[N] row_vector[D] x;\n}\nparameters {\n array[D] real mu;\n array[D] real<lower=0> sigma;\n array[L] vector[D] beta;\n}\nmodel {\n for (d in 1:D) {\n mu[d] ~ normal(0, 100);\n for (l in 1:L) {\n beta[l, d] ~ normal(mu[d], sigma[d]);\n }\n }\n for (n in 1:N) {\n y[n] ~ bernoulli(inv_logit(x[n] * beta[ll[n]]));\n }\n}\nThe standard deviation parameter sigma gets an implicit uniform prior on \\((0,\\infty)\\) because of its declaration with a lower-bound constraint of zero. Stan allows improper priors as long as the posterior is proper. Nevertheless, it is usually helpful to have informative or at least weakly informative priors for all parameters; see the regression priors section for recommendations on priors for regression coefficients and scales.\n\n\nWhere possible, vectorizing distribution statements leads to faster log probability and derivative evaluations. The speed boost is not because loops are eliminated, but because vectorization allows sharing subcomputations in the log probability and gradient calculations and because it reduces the size of the expression tree required for gradient calculations.\nThe first optimization vectorizes the for-loop over D as\nmu ~ normal(0, 100);\nfor (l in 1:L) {\n beta[l] ~ normal(mu, sigma);\n}\nThe declaration of beta as an array of vectors means that the expression beta[l] denotes a vector. Although beta could have been declared as a matrix, an array of vectors (or a two-dimensional array) is more efficient for accessing rows; see the indexing efficiency section for more information on the efficiency tradeoffs among arrays, vectors, and matrices.\nThis model can be further sped up and at the same time made more arithmetically stable by replacing the application of inverse-logit inside the Bernoulli distribution with the logit-parameterized Bernoulli,3\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(x[n] * beta[ll[n]]);\n}\nUnlike in R or BUGS, loops, array access and assignments are fast in Stan because they are translated directly to C++. In most cases, the cost of allocating and assigning to a container is more than made up for by the increased efficiency due to vectorizing the log probability and gradient calculations. Thus the following version is faster than the original formulation as a loop over a distribution statement.\n{\n vector[N] x_beta_ll;\n for (n in 1:N) {\n x_beta_ll[n] = x[n] * beta[ll[n]];\n }\n y ~ bernoulli_logit(x_beta_ll);\n}\nThe brackets introduce a new scope for the local variable x_beta_ll; alternatively, the variable may be declared at the top of the model block.\nIn some cases, such as the above, the local variable assignment leads to models that are less readable. The recommended practice in such cases is to first develop and debug the more transparent version of the model and only work on optimizations when the simpler formulation has been debugged.\n\n\n\n\nPriors on priors, also known as “hyperpriors,” should be treated the same way as priors on lower-level parameters in that as much prior information as is available should be brought to bear. Because hyperpriors often apply to only a handful of lower-level parameters, care must be taken to ensure the posterior is both proper and not overly sensitive either statistically or computationally to wide tails in the priors.\n\n\nThe fundamental problem with maximum likelihood estimation (MLE) in the hierarchical model setting is that as the hierarchical variance drops and the values cluster around the hierarchical mean, the overall density grows without bound. As an illustration, consider a simple hierarchical linear regression (with fixed prior mean) of \\(y_n \\in\n\\mathbb{R}\\) on \\(x_n \\in \\mathbb{R}^K\\), formulated as \\[\\begin{align*}\ny_n & \\sim \\textsf{normal}(x_n \\beta, \\sigma) \\\\\n\\beta_k & \\sim \\textsf{normal}(0,\\tau) \\\\\n\\tau & \\sim \\textsf{Cauchy}(0,2.5) \\\\\n\\end{align*}\\]\nIn this case, as \\(\\tau \\rightarrow 0\\) and \\(\\beta_k \\rightarrow 0\\), the posterior density \\[ p(\\beta,\\tau,\\sigma|y,x) \\propto p(y|x,\\beta,\\tau,\\sigma) \\] grows without bound. See the Neal’s funnel density, which has similar behavior.\nThere is obviously no MLE estimate for \\(\\beta,\\tau,\\sigma\\) in such a case, and therefore the model must be modified if posterior modes are to be used for inference. The approach recommended by Chung et al. (2013) is to use a gamma distribution as a prior, such as \\[\n\\sigma \\sim \\textsf{Gamma}(2, 1/A),\n\\] for a reasonably large value of \\(A\\), such as \\(A = 10\\).\n\n\n\n\nItem-response theory (IRT) models the situation in which a number of students each answer one or more of a group of test questions. The model is based on parameters for the ability of the students, the difficulty of the questions, and in more articulated models, the discriminativeness of the questions and the probability of guessing correctly; see Gelman and Hill (2007, pps. 314–320) for a textbook introduction to hierarchical IRT models and Curtis (2010) for encodings of a range of IRT models in BUGS.\n\n\nThe data provided for an IRT model may be declared as follows to account for the fact that not every student is required to answer every question.\ndata {\n int<lower=1> J; // number of students\n int<lower=1> K; // number of questions\n int<lower=1> N; // number of observations\n array[N] int<lower=1, upper=J> jj; // student for observation n\n array[N] int<lower=1, upper=K> kk; // question for observation n\n array[N] int<lower=0, upper=1> y; // correctness for observation n\n}\nThis declares a total of N student-question pairs in the data set, where each n in 1:N indexes a binary observation y[n] of the correctness of the answer of student jj[n] on question kk[n].\nThe prior hyperparameters will be hard coded in the rest of this section for simplicity, though they could be coded as data in Stan for more flexibility.\n\n\n\nThe 1PL item-response model, also known as the Rasch model, has one parameter (1P) for questions and uses the logistic link function (L).\nThe model parameters are declared as follows.\nparameters {\n real delta; // mean student ability\n array[J] real alpha; // ability of student j - mean ability\n array[K] real beta; // difficulty of question k\n}\nThe parameter alpha[J] is the ability coefficient for student j and beta[k] is the difficulty coefficient for question k. The non-standard parameterization used here also includes an intercept term delta, which represents the average student’s response to the average question.4\nThe model itself is as follows.\nmodel {\n alpha ~ std_normal(); // informative true prior\n beta ~ std_normal(); // informative true prior\n delta ~ normal(0.75, 1); // informative true prior\n for (n in 1:N) {\n y[n] ~ bernoulli_logit(alpha[jj[n]] - beta[kk[n]] + delta);\n }\n}\nThis model uses the logit-parameterized Bernoulli distribution, where \\[\n\\texttt{bernoulli}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha\\right)\n=\n\\texttt{bernoulli}\\left(y \\mid \\operatorname{logit}^{-1}(\\alpha)\\right).\n\\]\nThe key to understanding it is the term inside the bernoulli_logit distribution, from which it follows that \\[\n\\Pr[y_n = 1] = \\operatorname{logit}^{-1}\\left(\\alpha_{jj[n]} - \\beta_{kk[n]}\n+ \\delta\\right).\n\\]\nThe model suffers from additive identifiability issues without the priors. For example, adding a term \\(\\xi\\) to each \\(\\alpha_j\\) and \\(\\beta_k\\) results in the same predictions. The use of priors for \\(\\alpha\\) and \\(\\beta\\) located at 0 identifies the parameters; see Gelman and Hill (2007) for a discussion of identifiability issues and alternative approaches to identification.\nFor testing purposes, the IRT 1PL model distributed with Stan uses informative priors that match the actual data generation process used to simulate the data in R (the simulation code is supplied in the same directory as the models). This is unrealistic for most practical applications, but allows Stan’s inferences to be validated. A simple sensitivity analysis with fatter priors shows that the posterior is fairly sensitive to the prior even with 400 students and 100 questions and only 25% missingness at random. For real applications, the priors should be fit hierarchically along with the other parameters, as described in the next section.\n\n\n\nThe simple 1PL model described in the previous section is generalized in this section with the addition of a discrimination parameter to model how noisy a question is and by adding multilevel priors for the question difficulty and discrimination parameters. The model parameters are declared as follows.\nparameters {\n real mu_beta; // mean question difficulty\n vector[J] alpha; // ability for j - mean\n vector[K] beta; // difficulty for k\n vector<lower=0>[K] gamma; // discrimination of k\n real<lower=0> sigma_beta; // scale of difficulties\n real<lower=0> sigma_gamma; // scale of log discrimination\n}\nThe parameters should be clearer after the model definition.\nmodel {\n alpha ~ std_normal();\n beta ~ normal(0, sigma_beta);\n gamma ~ lognormal(0, sigma_gamma);\n mu_beta ~ cauchy(0, 5);\n sigma_beta ~ cauchy(0, 5);\n sigma_gamma ~ cauchy(0, 5);\n y ~ bernoulli_logit(gamma[kk] .* (alpha[jj] - (beta[kk] + mu_beta)));\n}\nThe std_normal function is used here, defined by \\[\n\\texttt{std}\\mathtt{\\_}\\texttt{normal}(y)\n=\n\\textsf{normal}\\left(y \\mid 0, 1\\right).\n\\]\nThe distribution statement is also vectorized using elementwise multiplication; it is equivalent to\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(gamma[kk[n]]\n * (alpha[jj[n]] - (beta[kk[n]] + mu_beta));\n}\nThe 2PL model is similar to the 1PL model, with the additional parameter gamma[k] modeling how discriminative question k is. If gamma[k] is greater than 1, responses are more attenuated with less chance of getting a question right at random. The parameter gamma[k] is constrained to be positive, which prohibits there being questions that are easier for students of lesser ability; such questions are not unheard of, but they tend to be eliminated from most testing situations where an IRT model would be applied.\nThe model is parameterized here with student abilities alpha being given a standard normal prior. This is to identify both the scale and the location of the parameters, both of which would be unidentified otherwise; see the problematic posteriors chapter for further discussion of identifiability. The difficulty and discrimination parameters beta and gamma then have varying scales given hierarchically in this model. They could also be given weakly informative non-hierarchical priors, such as\nbeta ~ normal(0, 5);\ngamma ~ lognormal(0, 2);\nThe point is that the alpha determines the scale and location and beta and gamma are allowed to float.\nThe beta parameter is here given a non-centered parameterization, with parameter mu_beta serving as the mean beta location. An alternative would’ve been to take:\nbeta ~ normal(mu_beta, sigma_beta);\nand\ny[n] ~ bernoulli_logit(gamma[kk[n]] * (alpha[jj[n]] - beta[kk[n]]));\nNon-centered parameterizations tend to be more efficient in hierarchical models; see the reparameterization section for more information on non-centered reparameterizations.\nThe intercept term mu_beta can’t itself be modeled hierarchically, so it is given a weakly informative \\(\\textsf{Cauchy}(0,5)\\) prior. Similarly, the scale terms, sigma_beta, and sigma_gamma, are given half-Cauchy priors. As mentioned earlier, the scale and location for alpha are fixed to ensure identifiability. The truncation in the half-Cauchy prior is implicit; explicit truncation is not necessary because the log probability need only be calculated up to a proportion and the scale variables are constrained to \\((0,\\infty)\\) by their declarations.\n\n\n\n\n\n\nOne application of (hierarchical) priors is to identify the scale and/or location of a group of parameters. For example, in the IRT models discussed in the previous section, there is both a location and scale non-identifiability. With uniform priors, the posteriors will float in terms of both scale and location. See the collinearity section for a simple example of the problems this poses for estimation.\nThe non-identifiability is resolved by providing a standard normal (i.e., \\(\\textsf{normal}(0,1)\\)) prior on one group of coefficients, such as the student abilities. With a standard normal prior on the student abilities, the IRT model is identified in that the posterior will produce a group of estimates for student ability parameters that have a sample mean of close to zero and a sample variance of close to one. The difficulty and discrimination parameters for the questions should then be given a diffuse, or ideally a hierarchical prior, which will identify these parameters by scaling and locating relative to the student ability parameters.\n\n\n\nAnother case in which priors can help provide identifiability is in the case of collinearity in a linear regression. In linear regression, if two predictors are collinear (i.e, one is a linear function of the other), then their coefficients will have a correlation of 1 (or -1) in the posterior. This leads to non-identifiability. By placing normal priors on the coefficients, the maximum likelihood solution of two duplicated predictors (trivially collinear) will be half the value than would be obtained by only including one.\n\n\n\nIn a logistic regression, if a predictor is positive in cases of 1 outcomes and negative in cases of 0 outcomes, then the maximum likelihood estimate for the coefficient for that predictor diverges to infinity. This divergence can be controlled by providing a prior for the coefficient, which will “shrink” the estimate back toward zero and thus identify the model in the posterior.\nSimilar problems arise for sampling with improper flat priors. The sampler will try to draw large values. By providing a prior, the posterior will be concentrated around finite values, leading to well-behaved sampling.\n\n\n\n\nIn hierarchical regression models (and other situations), several individual-level variables may be assigned hierarchical priors. For example, a model with multiple varying intercepts and slopes within might assign them a multivariate prior.\nAs an example, the individuals might be people and the outcome income, with predictors such as education level and age, and the groups might be states or other geographic divisions. The effect of education level and age as well as an intercept might be allowed to vary by state. Furthermore, there might be state-level predictors, such as average state income and unemployment level.\n\n\nGelman and Hill (2007, chap. 13, Chapter 17) provide a discussion of a hierarchical model with \\(N\\) individuals organized into \\(J\\) groups. Each individual has a predictor row vector \\(x_n\\) of size \\(K\\); to unify the notation, they assume that \\(x_{n,1} = 1\\) is a fixed “intercept” predictor. To encode group membership, they assume individual \\(n\\) belongs to group \\(jj[n] \\in \\{ 1, \\dotsc, J \\}\\). Each individual \\(n\\) also has an observed outcome \\(y_n\\) taking on real values.\n\n\nThe model is a linear regression with slope and intercept coefficients varying by group, so that \\(\\beta_j\\) is the coefficient \\(K\\)-vector for group \\(j\\). The data model for individual \\(n\\) is then just \\[\ny_n \\sim \\textsf{normal}(x_n \\, \\beta_{jj[n]}, \\, \\sigma)\n\\quad\\text{for}\\quad n \\in \\{ 1, \\dotsc, N \\}.\n\\]\n\n\n\nGelman and Hill model the coefficient vectors \\(\\beta_j\\) as being drawn from a multivariate distribution with mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\), \\[\n\\beta_j \\sim \\textsf{multivariate normal}(\\mu_j, \\, \\Sigma)\n\\quad\\text{for}\\quad j \\in \\{ 1, \\dotsc, J \\}.\n\\]\nBelow, we discuss the full model of Gelman and Hill, which uses group-level predictors to model \\(\\mu\\); for now, we assume \\(\\mu\\) is a simple vector parameter.\n\n\n\nFor hierarchical modeling, the group-level mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\) must themselves be given priors. The group-level mean vector can be given a reasonable weakly-informative prior for independent coefficients, such as \\[\n\\mu_j \\sim \\textsf{normal}(0,5).\n\\] If more is known about the expected coefficient values \\(\\beta_{j, k}\\), this information can be incorporated into the prior for \\(\\mu_j\\).\nFor the prior on the covariance matrix, Gelman and Hill suggest using a scaled inverse Wishart. That choice was motivated primarily by convenience as it is conjugate to the multivariate likelihood function and thus simplifies Gibbs sampling\nIn Stan, there is no restriction to conjugacy for multivariate priors, and we in fact recommend a slightly different approach. Like Gelman and Hill, we decompose our prior into a scale and a matrix, but are able to do so in a more natural way based on the actual variable scales and a correlation matrix. Specifically, we define \\[\n\\Sigma = \\texttt{diag}\\mathtt{\\_}\\texttt{matrix}(\\tau) \\times \\Omega \\times \\texttt{diag}\\mathtt{\\_}\\texttt{matrix}(\\tau),\n\\] where \\(\\Omega\\) is a correlation matrix and \\(\\tau\\) is the vector of coefficient scales. This mapping from scale vector \\(\\tau\\) and correlation matrix \\(\\Omega\\) can be inverted, using \\[\n\\tau_k = \\sqrt{\\Sigma_{k,k}}\n\\quad\\textsf{and}\\quad\n\\Omega_{i, j} = \\frac{\\Sigma_{i, j}}{\\tau_i \\, \\tau_j}.\n\\]\nThe components of the scale vector \\(\\tau\\) can be given any reasonable prior for scales, but we recommend something weakly informative like a half-Cauchy distribution with a small scale, such as \\[\n\\tau_k \\sim \\textsf{Cauchy}(0, 2.5)\n\\quad\\text{for}\\quad k \\in \\{ 1, \\dotsc, K \\}\n\\quad\\text{constrained\\ by}\\quad \\tau_k > 0.\n\\] As for the prior means, if there is information about the scale of variation of coefficients across groups, it should be incorporated into the prior for \\(\\tau\\). For large numbers of exchangeable coefficients, the components of \\(\\tau\\) itself (perhaps excluding the intercept) may themselves be given a hierarchical prior.\nOur final recommendation is to give the correlation matrix \\(\\Omega\\) an LKJ prior with shape \\(\\eta \\geq 1\\),5\n\\[\n\\Omega \\sim \\textsf{LKJCorr}(\\eta).\n\\]\nThe LKJ correlation distribution is defined by \\[\n\\textsf{LKJCorr}\\left(\\Sigma \\mid \\eta\\right)\n\\propto\n\\operatorname{det}\\left(\\Sigma\\right)^{\\eta - 1}.\n\\]\nThe basic behavior of the LKJ correlation distribution is similar to that of a beta distribution. For \\(\\eta = 1\\), the result is a uniform distribution. Despite being the identity over correlation matrices, the marginal distribution over the entries in that matrix (i.e., the correlations) is not uniform between -1 and 1. Rather, it concentrates around zero as the dimensionality increases due to the complex constraints.\nFor \\(\\eta > 1\\), the density increasingly concentrates mass around the unit matrix, i.e., favoring less correlation. For \\(\\eta < 1\\), it increasingly concentrates mass in the other direction, i.e., favoring more correlation.\nThe LKJ prior may thus be used to control the expected amount of correlation among the parameters \\(\\beta_j\\). For a discussion of decomposing a covariance prior into a prior on correlation matrices and an independent prior on scales, see Barnard, McCulloch, and Meng (2000).\n\n\n\nTo complete Gelman and Hill’s model, suppose each group \\(j \\in \\{ 1, \\dotsc, J \\}\\) is supplied with an \\(L\\)-dimensional row-vector of group-level predictors \\(u_j\\). The prior mean for the \\(\\beta_j\\) can then itself be modeled as a regression, using an \\(L\\)-dimensional coefficient vector \\(\\gamma\\). The prior for the group-level coefficients then becomes \\[\n\\beta_j \\sim \\textsf{multivariate normal}(u_j \\, \\gamma, \\Sigma)\n\\]\nThe group-level coefficients \\(\\gamma\\) may themselves be given independent weakly informative priors, such as \\[\n\\gamma_l \\sim \\textsf{normal}(0,5).\n\\] As usual, information about the group-level means should be incorporated into this prior.\n\n\n\nThe Stan code for the full hierarchical model with multivariate priors on the group-level coefficients and group-level prior means follows its definition.\ndata {\n int<lower=0> N; // num individuals\n int<lower=1> K; // num ind predictors\n int<lower=1> J; // num groups\n int<lower=1> L; // num group predictors\n array[N] int<lower=1, upper=J> jj; // group for individual\n matrix[N, K] x; // individual predictors\n array[J] row_vector[L] u; // group predictors\n vector[N] y; // outcomes\n}\nparameters {\n corr_matrix[K] Omega; // prior correlation\n vector<lower=0>[K] tau; // prior scale\n matrix[L, K] gamma; // group coeffs\n array[J] vector[K] beta; // indiv coeffs by group\n real<lower=0> sigma; // prediction error scale\n}\nmodel {\n tau ~ cauchy(0, 2.5);\n Omega ~ lkj_corr(2);\n to_vector(gamma) ~ normal(0, 5);\n {\n array[J] row_vector[K] u_gamma;\n for (j in 1:J) {\n u_gamma[j] = u[j] * gamma;\n }\n beta ~ multi_normal(u_gamma, quad_form_diag(Omega, tau));\n }\n for (n in 1:N) {\n y[n] ~ normal(x[n] * beta[jj[n]], sigma);\n }\n}\nThe hyperprior covariance matrix is defined implicitly through the quadratic form in the code because the correlation matrix Omega and scale vector tau are more natural to inspect in the output; to output Sigma, define it as a transformed parameter. The function quad_form_diag is defined so that quad_form_diag(Sigma, tau) is equivalent to diag_matrix(tau) * Sigma * diag_matrix(tau), where diag_matrix(tau) returns the matrix with tau on the diagonal and zeroes off diagonal; the version using quad_form_diag should be faster. For details on these and other matrix arithmetic operators and functions, see the function reference manual.\n\n\n\nThe code in the Stan program above can be sped up dramatically by replacing the the distribution statement inside the for loop:\nfor (n in 1:N) {\n y[n] ~ normal(x[n] * beta[jj[n]], sigma);\n}\nwith the vectorized distribution statement:\n{\n vector[N] x_beta_jj;\n for (n in 1:N) {\n x_beta_jj[n] = x[n] * beta[jj[n]];\n }\n y ~ normal(x_beta_jj, sigma);\n}\nThe outer brackets create a local scope in which to define the variable x_beta_jj, which is then filled in a loop and used to define a vectorized distribution statement. The reason this is such a big win is that it allows us to take the log of sigma only once and it greatly reduces the size of the resulting expression graph by packing all of the work into a single distribution function.\nAlthough it is tempting to redeclare beta and include a revised model block distribution statement,\nparameters {\n matrix[J, K] beta;\n// ...\n}\nmodel {\n y ~ normal(rows_dot_product(x, beta[jj]), sigma);\n // ...\n}\nthis fails because it breaks the vectorization for beta,6\nbeta ~ multi_normal(...);\nwhich requires beta to be an array of vectors. Both vectorizations are important, so the best solution is to just use the loop above, because rows_dot_product cannot do much optimization in and of itself because there are no shared computations.\nThe code in the Stan program above also builds up an array of vectors for the outcomes and for the multivariate normal, which provides a major speedup by reducing the number of linear systems that need to be solved and differentiated.\n{\n matrix[K, K] Sigma_beta;\n Sigma_beta = quad_form_diag(Omega, tau);\n for (j in 1:J) {\n beta[j] ~ multi_normal((u[j] * gamma)', Sigma_beta);\n }\n}\nIn this example, the covariance matrix Sigma_beta is defined as a local variable so as not to have to repeat the quadratic form computation \\(J\\) times. This vectorization can be combined with the Cholesky-factor optimization in the next section.\n\n\n\nThe multivariate normal density and LKJ prior on correlation matrices both require their matrix parameters to be factored. Vectorizing, as in the previous section, ensures this is only done once for each density. An even better solution, both in terms of efficiency and numerical stability, is to parameterize the model directly in terms of Cholesky factors of correlation matrices using the multivariate version of the non-centered parameterization. For the model in the previous section, the program fragment to replace the full matrix prior with an equivalent Cholesky factorized prior is as follows.\ndata {\n matrix[L, J] u; // group predictors transposed\n // ...\n}\nparameters {\n matrix[K, J] z;\n cholesky_factor_corr[K] L_Omega;\n matrix[K, L] gamma;\n // ...\n}\ntransformed parameters {\n matrix[K, J] beta;\n beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;\n}\nmodel {\n to_vector(z) ~ std_normal();\n L_Omega ~ lkj_corr_cholesky(2);\n // ...\n}\nThe data variable u was originally an array of vectors, which is efficient for access; here it is redeclared as a matrix in order to use it in matrix arithmetic. Moreover, it is transposed, along with gamma and beta, to minimize the number of transposition operations. The new parameter L_Omega is the Cholesky factor of the original correlation matrix Omega, so that\nOmega = L_Omega * L_Omega'\nThe prior scale vector tau is unchanged, and furthermore, pre-multiplying the Cholesky factor by the scale produces the Cholesky factor of the final covariance matrix,\nSigma_beta\n = quad_form_diag(Omega, tau)\n = diag_pre_multiply(tau, L_Omega) * diag_pre_multiply(tau, L_Omega)'\nwhere the diagonal pre-multiply compound operation is defined by\ndiag_pre_multiply(a, b) = diag_matrix(a) * b\nThe new variable z is declared as a matrix, the entries of which are given independent standard normal priors; the to_vector operation turns the matrix into a vector so that it can be used as a vectorized argument to the univariate normal density. This results in every column of z being a \\(K\\)-variate normal random vector with the identity as covariance matrix. Therefore, multiplying z by the Cholesky factor of the covariance matrix and adding the mean (u * gamma)' produces a beta distributed as in the original model, where the variance is, letting \\(L = \\mathrm{diag}(\\tau)\\,\\Omega_L\\),\n\\[\n\\begin{aligned}\n\\mathbb{V}[\\beta] &= \\mathbb{E}\\big((L \\, z)(L \\, z)^\\top) \\\\\n&= \\mathbb{E}\\big((L \\, z \\, z^\\top \\, L^\\top) \\\\\n&= L \\, \\mathbb{E}(z \\, z^\\top) \\, L^\\top \\\\\n&= L \\, L^\\top =(\\mathrm{diag}(\\tau)\\,\\Omega_L)\\,(\\mathrm{diag}(\\tau)\\,\\Omega_L)^\\top \\\\\n&= \\mathrm{diag}(\\tau)\\,\\Omega\\,\\mathrm{diag}(\\tau) \\\\\n&= \\Sigma.\n\\end{aligned}\n\\] Where we have used the linearity of expectations (line 2 to 3), the definition of \\(\\Omega = \\Omega_L \\, \\Omega_L^\\top\\), and the fact that \\(\\mathbb{E}(z \\, z^\\top) = I\\) since \\(z \\sim \\mathcal{N}(0, I)\\).\nOmitting the remaining data declarations, which are the same as before with the exception of u, the optimized model is as follows.\nparameters {\n matrix[K, J] z;\n cholesky_factor_corr[K] L_Omega;\n vector<lower=0, upper=pi() / 2>[K] tau_unif; // prior scale\n matrix[K, L] gamma; // group coeffs\n real<lower=0> sigma; // prediction error scale\n}\ntransformed parameters {\n vector<lower=0>[K] tau = 2.5 * tan(tau_unif);\n matrix[K, J] beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;\n}\nmodel {\n vector[N] mu;\n for(n in 1:N) {\n mu[n] = x[n, ] * beta[, jj[n]];\n }\n to_vector(z) ~ std_normal();\n L_Omega ~ lkj_corr_cholesky(2);\n to_vector(gamma) ~ normal(0, 5);\n y ~ normal(mu, sigma);\n}\nThis model also reparameterizes the prior scale tau to avoid potential problems with the heavy tails of the Cauchy distribution. The statement tau_unif ~ uniform(0, pi() / 2) can be omitted from the model block because Stan increments the log posterior for parameters with uniform priors without it.\n\n\n\n\n\nStan models can be used for “predicting” the values of arbitrary model unknowns. When predictions are about the future, they’re called “forecasts;” when they are predictions about the past, as in climate reconstruction or cosmology, they are sometimes called “backcasts” (or “aftcasts” or “hindcasts” or “antecasts,” depending on the author’s feelings about the opposite of “fore”).\n\n\nAs a simple example, the following linear regression provides the same setup for estimating the coefficients beta as in our very first example, using y for the N observations and x for the N predictor vectors. The model parameters and model for observations are exactly the same as before.\nTo make predictions, we need to be given the number of predictions, N_new, and their predictor matrix, x_new. The predictions themselves are modeled as a parameter y_new. The model statement for the predictions is exactly the same as for the observations, with the new outcome vector y_new and prediction matrix x_new.\ndata {\n int<lower=1> K;\n int<lower=0> N;\n matrix[N, K] x;\n vector[N] y;\n\n int<lower=0> N_new;\n matrix[N_new, K] x_new;\n}\nparameters {\n vector[K] beta;\n real<lower=0> sigma;\n\n vector[N_new] y_new; // predictions\n}\nmodel {\n y ~ normal(x * beta, sigma); // observed model\n\n y_new ~ normal(x_new * beta, sigma); // prediction model\n}\n\n\n\nWhere possible, the most efficient way to generate predictions is to use the generated quantities block. This provides proper Monte Carlo (not Markov chain Monte Carlo) inference, which can have a much higher effective sample size per iteration.\n// ...data as above...\n\nparameters {\n vector[K] beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(x * beta, sigma);\n}\ngenerated quantities {\n vector[N_new] y_new;\n for (n in 1:N_new) {\n y_new[n] = normal_rng(x_new[n] * beta, sigma);\n }\n}\nNow the data are just as before, but the parameter y_new is now declared as a generated quantity, and the prediction model is removed from the model and replaced by a pseudo-random draw from a normal distribution.\n\n\nIt is possible for values to overflow or underflow in generated quantities. The problem is that if the result is NaN, then any constraints placed on the variables will be violated. It is possible to check a value assigned by an RNG and reject it if it overflows, but this is both inefficient and leads to biased posterior estimates. Instead, the conditions causing overflow, such as trying to generate a negative binomial random variate with a mean of \\(2^{31}\\), must be intercepted and dealt with. This is typically done by reparameterizing or reimplementing the random number generator using real values rather than integers, which are upper-bounded by \\(2^{31} - 1\\) in Stan.\n\n\n\n\n\nMost regressions are set up to model univariate observations (be they scalar, boolean, categorical, ordinal, or count). Even multinomial regressions are just repeated categorical regressions. In contrast, this section discusses regression when each observed value is multivariate. To relate multiple outcomes in a regression setting, their error terms are provided with covariance structure.\nThis section considers two cases, seemingly unrelated regressions for continuous multivariate quantities and multivariate probit regression for boolean multivariate quantities.\n\n\nThe first model considered is the “seemingly unrelated” regressions (SUR) of econometrics where several linear regressions share predictors and use a covariance error structure rather than independent errors (Zellner 1962; Greene 2011).\nThe model is easy to write down as a regression, \\[\\begin{align*}\ny_n &= x_n \\, \\beta + \\epsilon_n \\\\\n\\epsilon_n &\\sim \\textsf{multivariate normal}(0, \\Sigma)\n\\end{align*}\\]\nwhere \\(x_n\\) is a \\(J\\)-row-vector of predictors (\\(x\\) is an \\((N \\times\nJ)\\) matrix), \\(y_n\\) is a \\(K\\)-vector of observations, \\(\\beta\\) is a \\((K\n\\times J)\\) matrix of regression coefficients (vector \\(\\beta_k\\) holds coefficients for outcome \\(k\\)), and \\(\\Sigma\\) is covariance matrix governing the error. As usual, the intercept can be rolled into \\(x\\) as a column of ones.\nThe basic Stan code is straightforward (though see below for more optimized code for use with LKJ priors on correlation).\ndata {\n int<lower=1> K;\n int<lower=1> J;\n int<lower=0> N;\n array[N] vector[J] x;\n array[N] vector[K] y;\n}\nparameters {\n matrix[K, J] beta;\n cov_matrix[K] Sigma;\n}\nmodel {\n array[N] vector[K] mu;\n for (n in 1:N) {\n mu[n] = beta * x[n];\n }\n y ~ multi_normal(mu, Sigma);\n}\nFor efficiency, the multivariate normal is vectorized by precomputing the array of mean vectors and sharing the same covariance matrix.\nFollowing the advice in the multivariate hierarchical priors section, we will place a weakly informative normal prior on the regression coefficients, an LKJ prior on the correlations and a half-Cauchy prior on standard deviations. The covariance structure is parameterized in terms of Cholesky factors for efficiency and arithmetic stability.\n// ...\nparameters {\n matrix[K, J] beta;\n cholesky_factor_corr[K] L_Omega;\n vector<lower=0>[K] L_sigma;\n}\nmodel {\n array[N] vector[K] mu;\n matrix[K, K] L_Sigma;\n\n for (n in 1:N) {\n mu[n] = beta * x[n];\n\n }\n\n L_Sigma = diag_pre_multiply(L_sigma, L_Omega);\n\n to_vector(beta) ~ normal(0, 5);\n L_Omega ~ lkj_corr_cholesky(4);\n L_sigma ~ cauchy(0, 2.5);\n\n y ~ multi_normal_cholesky(mu, L_Sigma);\n}\nThe Cholesky factor of the covariance matrix is then reconstructed as a local variable and used in the model by scaling the Cholesky factor of the correlation matrices. The regression coefficients get a prior all at once by converting the matrix beta to a vector.\nIf required, the full correlation or covariance matrices may be reconstructed from their Cholesky factors in the generated quantities block.\n\n\n\nThe multivariate probit model generates sequences of boolean variables by applying a step function to the output of a seemingly unrelated regression.\nThe observations \\(y_n\\) are \\(D\\)-vectors of boolean values (coded 0 for false, 1 for true). The values for the observations \\(y_n\\) are based on latent values \\(z_n\\) drawn from a seemingly unrelated regression model (see the previous section), \\[\\begin{align*}\nz_n &= x_n \\, \\beta + \\epsilon_n \\\\\n\\epsilon_n &\\sim \\textsf{multivariate normal}(0, \\Sigma)\n\\end{align*}\\]\nThese are then put through the step function to produce a \\(K\\)-vector \\(z_n\\) of boolean values with elements defined by \\[\ny_{n, k} = \\operatorname{I}\\left(z_{n, k} > 0\\right),\n\\] where \\(\\operatorname{I}()\\) is the indicator function taking the value 1 if its argument is true and 0 otherwise.\nUnlike in the seemingly unrelated regressions case, here the covariance matrix \\(\\Sigma\\) has unit standard deviations (i.e., it is a correlation matrix). As with ordinary probit and logistic regressions, letting the scale vary causes the model (which is defined only by a cutpoint at 0, not a scale) to be unidentified (see Greene (2011)).\nMultivariate probit regression can be coded in Stan using the trick introduced by Albert and Chib (1993), where the underlying continuous value vectors \\(y_n\\) are coded as truncated parameters. The key to coding the model in Stan is declaring the latent vector \\(z\\) in two parts, based on whether the corresponding value of \\(y\\) is 0 or 1. Otherwise, the model is identical to the seemingly unrelated regression model in the previous section.\nFirst, we introduce a sum function for two-dimensional arrays of integers; this is going to help us calculate how many total 1 values there are in \\(y\\).\nfunctions {\n int sum2d(array[,] int a) {\n int s = 0;\n for (i in 1:size(a)) {\n s += sum(a[i]);\n }\n return s;\n }\n}\nThe function is trivial, but it’s not a built-in for Stan and it’s easier to understand the rest of the model if it’s pulled into its own function so as not to create a distraction.\nThe data declaration block is much like for the seemingly unrelated regressions, but the observations y are now integers constrained to be 0 or 1.\ndata {\n int<lower=1> K;\n int<lower=1> D;\n int<lower=0> N;\n array[N, D] int<lower=0, upper=1> y;\n array[N] vector[K] x;\n}\nAfter declaring the data, there is a rather involved transformed data block whose sole purpose is to sort the data array y into positive and negative components, keeping track of indexes so that z can be easily reassembled in the transformed parameters block.\ntransformed data {\n int<lower=0> N_pos;\n array[sum2d(y)] int<lower=1, upper=N> n_pos;\n array[size(n_pos)] int<lower=1, upper=D> d_pos;\n int<lower=0> N_neg;\n array[(N * D) - size(n_pos)] int<lower=1, upper=N> n_neg;\n array[size(n_neg)] int<lower=1, upper=D> d_neg;\n\n N_pos = size(n_pos);\n N_neg = size(n_neg);\n {\n int i;\n int j;\n i = 1;\n j = 1;\n for (n in 1:N) {\n for (d in 1:D) {\n if (y[n, d] == 1) {\n n_pos[i] = n;\n d_pos[i] = d;\n i += 1;\n } else {\n n_neg[j] = n;\n d_neg[j] = d;\n j += 1;\n }\n }\n }\n }\n}\nThe variables N_pos and N_neg are set to the number of true (1) and number of false (0) observations in y. The loop then fills in the sequence of indexes for the positive and negative values in four arrays.\nThe parameters are declared as follows.\nparameters {\n matrix[D, K] beta;\n cholesky_factor_corr[D] L_Omega;\n vector<lower=0>[N_pos] z_pos;\n vector<upper=0>[N_neg] z_neg;\n}\nThese include the regression coefficients beta and the Cholesky factor of the correlation matrix, L_Omega. This time there is no scaling because the covariance matrix has unit scale (i.e., it is a correlation matrix; see above).\nThe critical part of the parameter declaration is that the latent real value \\(z\\) is broken into positive-constrained and negative-constrained components, whose size was conveniently calculated in the transformed data block. The transformed data block’s real work was to allow the transformed parameter block to reconstruct \\(z\\).\ntransformed parameters {\n array[N] vector[D] z;\n for (n in 1:N_pos) {\n z[n_pos[n], d_pos[n]] = z_pos[n];\n }\n for (n in 1:N_neg) {\n z[n_neg[n], d_neg[n]] = z_neg[n];\n }\n}\nAt this point, the model is simple, pretty much recreating the seemingly unrelated regression.\nmodel {\n L_Omega ~ lkj_corr_cholesky(4);\n to_vector(beta) ~ normal(0, 5);\n {\n array[N] vector[D] beta_x;\n for (n in 1:N) {\n beta_x[n] = beta * x[n];\n }\n z ~ multi_normal_cholesky(beta_x, L_Omega);\n }\n}\nThis simple form of model is made possible by the Albert and Chib-style constraints on z.\nFinally, the correlation matrix itself can be put back together in the generated quantities block if desired.\ngenerated quantities {\n corr_matrix[D] Omega;\n Omega = multiply_lower_tri_self_transpose(L_Omega);\n}\nThe same could be done for the seemingly unrelated regressions in the previous section.\n\n\n\n\nThe main application of pseudorandom number generator (PRNGs) is for posterior inference, including prediction and posterior predictive checks. They can also be used for pure data simulation, which is like a posterior predictive check with no conditioning. See the function reference manual for a complete description of the syntax and usage of pseudorandom number generators.\n\n\nConsider predicting unobserved outcomes using linear regression. Given predictors \\(x_1, \\dotsc, x_N\\) and observed outcomes \\(y_1, \\dotsc, y_N\\), and assuming a standard linear regression with intercept \\(\\alpha\\), slope \\(\\beta\\), and error scale \\(\\sigma\\), along with improper uniform priors, the posterior over the parameters given \\(x\\) and \\(y\\) is \\[\np\\left(\\alpha, \\beta, \\sigma \\mid x, y \\right)\n\\propto\n\\prod_{n=1}^N\n \\textsf{normal}\\left(y_n \\mid \\alpha + \\beta x_n, \\sigma\\right).\n\\]\nFor this model, the posterior predictive inference for a new outcome \\(\\tilde{y}_m\\) given a predictor \\(\\tilde{x}_m\\), conditioned on the observed data \\(x\\) and \\(y\\), is \\[\np\\left(\\tilde{y}_n \\mid \\tilde{x}_n, x, y\\right)\n= \\int_{(\\alpha,\\beta,\\sigma)}\n \\textsf{normal}\\left(\\tilde{y}_n \\mid \\alpha + \\beta \\tilde{x}_n, \\sigma\\right)\n \\times\n p\\left(\\alpha, \\beta, \\sigma \\mid x, y\\right)\n \\,\\textrm{d}(\\alpha,\\beta,\\sigma).\n\\]\nTo code the posterior predictive inference in Stan, a standard linear regression is combined with a random number in the generated quantities block.\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n}\ngenerated quantities {\n vector[N_tilde] y_tilde;\n for (n in 1:N_tilde) {\n y_tilde[n] = normal_rng(alpha + beta * x_tilde[n], sigma);\n }\n}\nGiven observed predictors \\(x\\) and outcomes \\(y\\), y_tilde will be drawn according to \\(p\\left(\\tilde{y} \\mid \\tilde{x}, y, x\\right)\\). This means that, for example, the posterior mean for y_tilde is the estimate of the outcome that minimizes expected square error (conditioned on the data and model).\n\n\n\nA good way to investigate the fit of a model to the data, a critical step in Bayesian data analysis, is to generate simulated data according to the parameters of the model. This is carried out with exactly the same procedure as before, only the observed data predictors \\(x\\) are used in place of new predictors \\(\\tilde{x}\\) for unobserved outcomes. If the model fits the data well, the predictions for \\(\\tilde{y}\\) based on \\(x\\) should match the observed data \\(y\\).\nTo code posterior predictive checks in Stan requires only a slight modification of the prediction code to use \\(x\\) and \\(N\\) in place of \\(\\tilde{x}\\) and \\(\\tilde{N}\\),\ngenerated quantities {\n vector[N] y_tilde;\n for (n in 1:N) {\n y_tilde[n] = normal_rng(alpha + beta * x[n], sigma);\n }\n}\nGelman et al. (2013) recommend choosing several posterior draws \\(\\tilde{y}^{(1)}, \\dotsc, \\tilde{y}^{(M)}\\) and plotting each of them alongside the data \\(y\\) that was actually observed. If the model fits well, the simulated \\(\\tilde{y}\\) will look like the actual data \\(y\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#linear-regression", + "href": "stan-users-guide/regression.html#linear-regression", + "title": "Regression Models", + "section": "", + "text": "The simplest linear regression model is the following, with a single predictor and a slope and intercept coefficient, and normally distributed noise. This model can be written using standard regression notation as \\[\ny_n = \\alpha + \\beta x_n + \\epsilon_n\n\\quad\\text{where}\\quad\n\\epsilon_n \\sim \\operatorname{normal}(0,\\sigma).\n\\]\nThis is equivalent to the following sampling involving the residual, \\[\ny_n - (\\alpha + \\beta X_n) \\sim \\operatorname{normal}(0,\\sigma),\n\\] and reducing still further, to \\[\ny_n \\sim \\operatorname{normal}(\\alpha + \\beta X_n, \\, \\sigma).\n\\]\nThis latter form of the model is coded in Stan as follows.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n}\nThere are N observations and for each observation, \\(n \\in N\\), we have predictor x[n] and outcome y[n]. The intercept and slope parameters are alpha and beta. The model assumes a normally distributed noise term with scale sigma. This model has improper priors for the two regression coefficients.\n\n\nThe distribution statement in the previous model is vectorized, with\ny ~ normal(alpha + beta * x, sigma);\nproviding the same model as the unvectorized version,\nfor (n in 1:N) {\n y[n] ~ normal(alpha + beta * x[n], sigma);\n}\nIn addition to being more concise, the vectorized form is much faster.1\nIn general, Stan allows the arguments to distributions such as normal to be vectors. If any of the other arguments are vectors or arrays, they have to be the same size. If any of the other arguments is a scalar, it is reused for each vector entry.\nThe other reason this works is that Stan’s arithmetic operators are overloaded to perform matrix arithmetic on matrices. In this case, because x is of type vector and beta of type real, the expression beta * x is of type vector. Because Stan supports vectorization, a regression model with more than one predictor can be written directly using matrix notation.\ndata {\n int<lower=0> N; // number of data items\n int<lower=0> K; // number of predictors\n matrix[N, K] x; // predictor matrix\n vector[N] y; // outcome vector\n}\nparameters {\n real alpha; // intercept\n vector[K] beta; // coefficients for predictors\n real<lower=0> sigma; // error scale\n}\nmodel {\n y ~ normal(x * beta + alpha, sigma); // data model\n}\nThe constraint lower=0 in the declaration of sigma constrains the value to be greater than or equal to 0. With no prior in the model block, the effect is an improper prior on non-negative real numbers. Although a more informative prior may be added, improper priors are acceptable as long as they lead to proper posteriors.\nIn the model above, x is an \\(N \\times K\\) matrix of predictors and beta a \\(K\\)-vector of coefficients, so x * beta is an \\(N\\)-vector of predictions, one for each of the \\(N\\) data items. These predictions line up with the outcomes in the \\(N\\)-vector y, so the entire model may be written using matrix arithmetic as shown. It would be possible to include a column of ones in the data matrix x to remove the alpha parameter.\nThe distribution statement in the model above is just a more efficient, vector-based approach to coding the model with a loop, as in the following statistically equivalent model.\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(x[n] * beta, sigma);\n }\n}\nWith Stan’s matrix indexing scheme, x[n] picks out row n of the matrix x; because beta is a column vector, the product x[n] * beta is a scalar of type real.\n\n\nIn the model formulation\ny ~ normal(x * beta, sigma);\nthere is no longer an intercept coefficient alpha. Instead, we have assumed that the first column of the input matrix x is a column of 1 values. This way, beta[1] plays the role of the intercept. If the intercept gets a different prior than the slope terms, then it would be clearer to break it out. It is also slightly more efficient in its explicit form with the intercept variable singled out because there’s one fewer multiplications; it should not make that much of a difference to speed, though, so the choice should be based on clarity.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#QR-reparameterization.section", + "href": "stan-users-guide/regression.html#QR-reparameterization.section", + "title": "Regression Models", + "section": "", + "text": "In the previous example, the linear predictor can be written as \\(\\eta\n= x \\beta\\), where \\(\\eta\\) is a \\(N\\)-vector of predictions, \\(x\\) is a \\(N\n\\times K\\) matrix, and \\(\\beta\\) is a \\(K\\)-vector of coefficients. Presuming \\(N \\geq K\\), we can exploit the fact that any design matrix \\(x\\) can be decomposed using the thin QR decomposition into an orthogonal matrix \\(Q\\) and an upper-triangular matrix \\(R\\), i.e. \\(x = Q\nR\\).\nThe functions qr_thin_Q and qr_thin_R implement the thin QR decomposition, which is to be preferred to the fat QR decomposition that would be obtained by using qr_Q and qr_R, as the latter would more easily run out of memory (see the Stan Functions Reference for more information on the qr_thin_Q and qr_thin_R functions). In practice, it is best to write \\(x = Q^\\ast\nR^\\ast\\) where \\(Q^\\ast = Q * \\sqrt{n - 1}\\) and \\(R^\\ast =\n\\frac{1}{\\sqrt{n - 1}} R\\). Thus, we can equivalently write \\(\\eta = x\n\\beta = Q R \\beta = Q^\\ast R^\\ast \\beta\\). If we let \\(\\theta = R^\\ast\n\\beta\\), then we have \\(\\eta = Q^\\ast \\theta\\) and \\(\\beta = R^{\\ast^{-1}}\n\\theta\\). In that case, the previous Stan program becomes\ndata {\n int<lower=0> N; // number of data items\n int<lower=0> K; // number of predictors\n matrix[N, K] x; // predictor matrix\n vector[N] y; // outcome vector\n}\ntransformed data {\n matrix[N, K] Q_ast;\n matrix[K, K] R_ast;\n matrix[K, K] R_ast_inverse;\n // thin and scale the QR decomposition\n Q_ast = qr_thin_Q(x) * sqrt(N - 1);\n R_ast = qr_thin_R(x) / sqrt(N - 1);\n R_ast_inverse = inverse(R_ast);\n}\nparameters {\n real alpha; // intercept\n vector[K] theta; // coefficients on Q_ast\n real<lower=0> sigma; // error scale\n}\nmodel {\n y ~ normal(Q_ast * theta + alpha, sigma); // data model\n}\ngenerated quantities {\n vector[K] beta;\n beta = R_ast_inverse * theta; // coefficients on x\n}\nSince this Stan program generates equivalent predictions for \\(y\\) and the same posterior distribution for \\(\\alpha\\), \\(\\beta\\), and \\(\\sigma\\) as the previous Stan program, many wonder why the version with this QR reparameterization performs so much better in practice, often both in terms of wall time and in terms of effective sample size. The reasoning is threefold:\n\nThe columns of \\(Q^\\ast\\) are orthogonal whereas the columns of \\(x\\) generally are not. Thus, it is easier for a Markov Chain to move around in \\(\\theta\\)-space than in \\(\\beta\\)-space.\nThe columns of \\(Q^\\ast\\) have the same scale whereas the columns of \\(x\\) generally do not. Thus, a Hamiltonian Monte Carlo algorithm can move around the parameter space with a smaller number of larger steps\nSince the covariance matrix for the columns of \\(Q^\\ast\\) is an identity matrix, \\(\\theta\\) typically has a reasonable scale if the units of \\(y\\) are also reasonable. This also helps HMC move efficiently without compromising numerical accuracy.\n\nConsequently, this QR reparameterization is recommended for linear and generalized linear models in Stan whenever \\(K > 1\\) and you do not have an informative prior on the location of \\(\\beta\\). It can also be worthwhile to subtract the mean from each column of \\(x\\) before obtaining the QR decomposition, which does not affect the posterior distribution of \\(\\theta\\) or \\(\\beta\\) but does affect \\(\\alpha\\) and allows you to interpret \\(\\alpha\\) as the expectation of \\(y\\) in a linear model.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#regression-priors.section", + "href": "stan-users-guide/regression.html#regression-priors.section", + "title": "Regression Models", + "section": "", + "text": "See our general discussion of priors for tips on priors for parameters in regression models.\nLater sections discuss univariate hierarchical priors and multivariate hierarchical priors, as well as priors used to identify models.\nHowever, as described in QR-reparameterization section, if you do not have an informative prior on the location of the regression coefficients, then you are better off reparameterizing your model so that the regression coefficients are a generated quantity. In that case, it usually does not matter much what prior is used on on the reparameterized regression coefficients and almost any weakly informative prior that scales with the outcome will do.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#robust-noise-models", + "href": "stan-users-guide/regression.html#robust-noise-models", + "title": "Regression Models", + "section": "", + "text": "The standard approach to linear regression is to model the noise term \\(\\epsilon\\) as having a normal distribution. From Stan’s perspective, there is nothing special about normally distributed noise. For instance, robust regression can be accommodated by giving the noise term a Student-\\(t\\) distribution. To code this in Stan, the distribution distribution is changed to the following.\ndata {\n // ...\n real<lower=0> nu;\n}\n// ...\nmodel {\n y ~ student_t(nu, alpha + beta * x, sigma);\n}\nThe degrees of freedom constant nu is specified as data.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#logistic-probit-regression.section", + "href": "stan-users-guide/regression.html#logistic-probit-regression.section", + "title": "Regression Models", + "section": "", + "text": "For binary outcomes, either of the closely related logistic or probit regression models may be used. These generalized linear models vary only in the link function they use to map linear predictions in \\((-\\infty,\\infty)\\) to probability values in \\((0,1)\\). Their respective link functions, the logistic function and the standard normal cumulative distribution function, are both sigmoid functions (i.e., they are both S-shaped).\nA logistic regression model with one predictor and an intercept is coded as follows.\ndata {\n int<lower=0> N;\n vector[N] x;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n real alpha;\n real beta;\n}\nmodel {\n y ~ bernoulli_logit(alpha + beta * x);\n}\nThe noise parameter is built into the Bernoulli formulation here rather than specified directly.\nLogistic regression is a kind of generalized linear model with binary outcomes and the log odds (logit) link function, defined by \\[\n\\operatorname{logit}(v) = \\log \\left( \\frac{v}{1-v} \\right).\n\\]\nThe inverse of the link function appears in the model: \\[\n\\operatorname{logit}^{-1}(u) = \\texttt{inv}\\mathtt{\\_}\\texttt{logit}(u) = \\frac{1}{1 + \\exp(-u)}.\n\\]\nThe model formulation above uses the logit-parameterized version of the Bernoulli distribution, which is defined by \\[\n\\texttt{bernoulli}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha \\right)\n=\n\\texttt{bernoulli}\\left(y \\mid \\operatorname{logit}^{-1}(\\alpha)\\right).\n\\]\nThe formulation is also vectorized in the sense that alpha and beta are scalars and x is a vector, so that alpha + beta * x is a vector. The vectorized formulation is equivalent to the less efficient version\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(alpha + beta * x[n]);\n}\nExpanding out the Bernoulli logit, the model is equivalent to the more explicit, but less efficient and less arithmetically stable\nfor (n in 1:N) {\n y[n] ~ bernoulli(inv_logit(alpha + beta * x[n]));\n}\nOther link functions may be used in the same way. For example, probit regression uses the cumulative normal distribution function, which is typically written as\n\\[\n\\Phi(x) = \\int_{-\\infty}^x \\textsf{normal}\\left(y \\mid 0,1 \\right) \\,\\textrm{d}y.\n\\]\nThe cumulative standard normal distribution function \\(\\Phi\\) is implemented in Stan as the function Phi. The probit regression model may be coded in Stan by replacing the logistic model’s distribution statement with the following.\ny[n] ~ bernoulli(Phi(alpha + beta * x[n]));\nA fast approximation to the cumulative standard normal distribution function \\(\\Phi\\) is implemented in Stan as the function Phi_approx.2 The approximate probit regression model may be coded with the following.\ny[n] ~ bernoulli(Phi_approx(alpha + beta * x[n]));", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#multi-logit.section", + "href": "stan-users-guide/regression.html#multi-logit.section", + "title": "Regression Models", + "section": "", + "text": "Multiple outcome forms of logistic regression can be coded directly in Stan. For instance, suppose there are \\(K\\) possible outcomes for each output variable \\(y_n\\). Also suppose that there is a \\(D\\)-dimensional vector \\(x_n\\) of predictors for \\(y_n\\). The multi-logit model with \\(\\textsf{normal}(0,5)\\) priors on the coefficients is coded as follows.\ndata {\n int K;\n int N;\n int D;\n array[N] int y;\n matrix[N, D] x;\n}\nparameters {\n matrix[D, K] beta;\n}\nmodel {\n matrix[N, K] x_beta = x * beta;\n\n to_vector(beta) ~ normal(0, 5);\n\n for (n in 1:N) {\n y[n] ~ categorical_logit(x_beta[n]');\n\n }\n}\nwhere x_beta[n]' is the transpose of x_beta[n]. The prior on beta is coded in vectorized form. As of Stan 2.18, the categorical-logit distribution is not vectorized for parameter arguments, so the loop is required. The matrix multiplication is pulled out to define a local variable for all of the predictors for efficiency. Like the Bernoulli-logit, the categorical-logit distribution applies softmax internally to convert an arbitrary vector to a simplex, \\[\n\\texttt{categorical}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha\\right)\n=\n\\texttt{categorical}\\left(y \\mid \\texttt{softmax}(\\alpha)\\right),\n\\] where \\[\n\\texttt{softmax}(u) = \\exp(u) / \\operatorname{sum}\\left(\\exp(u)\\right).\n\\]\nThe categorical distribution with log-odds (logit) scaled parameters used above is equivalent to writing\ny[n] ~ categorical(softmax(x[n] * beta));\n\n\nThe data block in the above model is defined without constraints on sizes K, N, and D or on the outcome array y. Constraints on data declarations provide error checking at the point data are read (or transformed data are defined), which is before sampling begins. Constraints on data declarations also make the model author’s intentions more explicit, which can help with readability. The above model’s declarations could be tightened to\nint<lower=2> K;\nint<lower=0> N;\nint<lower=1> D;\narray[N] int<lower=1, upper=K> y;\nThese constraints arise because the number of categories, K, must be at least two in order for a categorical model to be useful. The number of data items, N, can be zero, but not negative; unlike R, Stan’s for-loops always move forward, so that a loop extent of 1:N when N is equal to zero ensures the loop’s body will not be executed. The number of predictors, D, must be at least one in order for beta * x[n] to produce an appropriate argument for softmax(). The categorical outcomes y[n] must be between 1 and K in order for the discrete sampling to be well defined.\nConstraints on data declarations are optional. Constraints on parameters declared in the parameters block, on the other hand, are not optional—they are required to ensure support for all parameter values satisfying their constraints. Constraints on transformed data, transformed parameters, and generated quantities are also optional.\n\n\n\nBecause softmax is invariant under adding a constant to each component of its input, the model is typically only identified if there is a suitable prior on the coefficients.\nAn alternative is to use \\((K-1)\\)-vectors by fixing one of them to be zero. The partially known parameters section discusses how to mix constants and parameters in a vector. In the multi-logit case, the parameter block would be redefined to use \\((K - 1)\\)-vectors\nparameters {\n matrix[D, K - 1] beta_raw;\n}\nand then these are transformed to parameters to use in the model. First, a transformed data block is added before the parameters block to define a vector of zero values,\ntransformed data {\n vector[D] zeros = rep_vector(0, D);\n}\nwhich can then be appended to beta_raw to produce the coefficient matrix beta,\ntransformed parameters {\n matrix[D, K] beta = append_col(beta_raw, zeros);\n}\nThe rep_vector(0, D) call creates a column vector of size D with all entries set to zero. The derived matrix beta is then defined to be the result of appending the vector zeros as a new column at the end of beta_raw; the vector zeros is defined as transformed data so that it doesn’t need to be constructed from scratch each time it is used.\nThis is not the same model as using \\(K\\)-vectors as parameters, because now the prior only applies to \\((K-1)\\)-vectors. In practice, this will cause the maximum likelihood solutions to be different and also the posteriors to be slightly different when taking priors centered around zero, as is typical for regression coefficients.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#parameterizing-centered-vectors", + "href": "stan-users-guide/regression.html#parameterizing-centered-vectors", + "title": "Regression Models", + "section": "", + "text": "When there are varying effects in a regression, the resulting likelihood is not identified unless further steps are taken. For example, we might have a global intercept \\(\\alpha\\) and then a varying effect \\(\\beta_k\\) for age group \\(k\\) to make a linear predictor \\(\\alpha +\n\\beta_k\\). With this predictor, we can add a constant to \\(\\alpha\\) and subtract from each \\(\\beta_k\\) and get exactly the same likelihood.\nThe traditional approach to identifying such a model is to pin the first varying effect to zero, i.e., \\(\\beta_1 = 0\\). With one of the varying effects fixed, you can no longer add a constant to all of them and the model’s likelihood is identified. In addition to the difficulty in specifying such a model in Stan, it is awkward to formulate priors because the other coefficients are all interpreted relative to \\(\\beta_1\\).\nIn a Bayesian setting, a proper prior on each of the \\(\\beta\\) is enough to identify the model. Unfortunately, this can lead to inefficiency during sampling as the model is still only weakly identified through the prior—there is a very simple example of the difference in the discussion of collinearity in the collinearity section.\nAn alternative identification strategy that allows a symmetric prior is to enforce a sum-to-zero constraint on the varying effects, i.e., \\(\\sum_{k=1}^K \\beta_k = 0.\\)\nA parameter vector constrained to sum to zero may also be used to identify a multi-logit regression parameter vector (see the multi-logit section for details), or may be used for ability or difficulty parameters (but not both) in an IRT model (see the item-response model section for details).\n\n\nAs of Stan 2.36, there is a built in sum_to_zero_vector type, which can be used as follows.\nparameters {\n sum_to_zero_vector[K] beta;\n // ...\n}\nThis produces a vector of size K such that sum(beta) = 0. In the unconstrained representation requires only K - 1 values because the last is determined by the first K - 1.\nPlacing a prior on beta in this parameterization, for example,\n beta ~ normal(0, 1);\nleads to a subtly different posterior than what you would get with the same prior on an unconstrained size-K vector. As explained below, the variance is reduced.\nThe sum-to-zero constraint can be implemented naively by setting the last element to the negative sum of the first elements, i.e., \\(\\beta_K\n= -\\sum_{k=1}^{K-1} \\beta_k.\\) But that leads to high correlation among the \\(\\beta_k\\).\nThe transform used in Stan eliminates these correlations by constructing an orthogonal basis and applying it to the zero-sum-constraint; Seyboldt (2024) provides an explanation. The Stan Reference Manual provides the details in the chapter on transforms. Although any orthogonal basis can be used, Stan uses the inverse isometric log transform because it is convenient to describe and the transform simplifies to efficient scalar operations rather than more expensive matrix operations.\n\n\nOn the Stan forums, Aaron Goodman provided the following code to produce a prior with standard normal marginals on the components of beta,\nmodel {\n beta ~ normal(0, inv(sqrt(1 - inv(K))));\n // ...\n}\nThe scale component can be multiplied by sigma to produce a normal(0, sigma) prior marginally.\nTo generate distributions with marginals other than standard normal, the resulting beta may be scaled by some factor sigma and translated to some new location mu.\n\n\n\n\nAdding a prior such as \\(\\beta \\sim \\textsf{normal}(0,\\epsilon)\\) for a small \\(\\epsilon\\) will provide a kind of soft centering of a parameter vector \\(\\beta\\) by preferring, all else being equal, that \\(\\sum_{k=1}^K\n\\beta_k = 0\\). This approach is only guaranteed to roughly center if \\(\\beta\\) and the elementwise addition \\(\\beta + c\\) for a scalar constant \\(c\\) produce the same likelihood (perhaps by another vector \\(\\alpha\\) being transformed to \\(\\alpha - c\\), as in the IRT models). This is another way of achieving a symmetric prior, though it requires choosing an \\(\\epsilon\\). If \\(\\epsilon\\) is too large, there won’t be a strong enough centering effect and if it is too small, it will add high curvature to the target density and impede sampling.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#ordered-logistic.section", + "href": "stan-users-guide/regression.html#ordered-logistic.section", + "title": "Regression Models", + "section": "", + "text": "Ordered regression for an outcome \\(y_n \\in \\{ 1, \\dotsc, k \\}\\) with predictors \\(x_n \\in \\mathbb{R}^D\\) is determined by a single coefficient vector \\(\\beta \\in \\mathbb{R}^D\\) along with a sequence of cutpoints \\(c \\in\n\\mathbb{R}^{K-1}\\) sorted so that \\(c_d < c_{d+1}\\). The discrete output is \\(k\\) if the linear predictor \\(x_n \\beta\\) falls between \\(c_{k-1}\\) and \\(c_k\\), assuming \\(c_0 = -\\infty\\) and \\(c_K = \\infty\\). The noise term is fixed by the form of regression, with examples for ordered logistic and ordered probit models.\n\n\nThe ordered logistic model can be coded in Stan using the ordered data type for the cutpoints and the built-in ordered_logistic distribution.\ndata {\n int<lower=2> K;\n int<lower=0> N;\n int<lower=1> D;\n array[N] int<lower=1, upper=K> y;\n matrix[N, D] x;\n}\nparameters {\n vector[D] beta;\n ordered[K - 1] c;\n}\nmodel {\n y ~ ordered_logistic(x * beta, c);\n}\nThe vector of cutpoints c is declared as ordered[K - 1], which guarantees that c[k] is less than c[k + 1].\nIf the cutpoints were assigned independent priors, the constraint effectively truncates the joint prior to support over points that satisfy the ordering constraint. Luckily, Stan does not need to compute the effect of the constraint on the normalizing term because the probability is needed only up to a proportion.\nThe equivalent model can be written using ordered_logistic_glm distribution, which can provide more efficient computation in case of higher dimensional beta.\n y ~ ordered_logistic_glm(x, beta, c);\n\n\nAn ordered probit model can be coded in exactly the same way by using the built-in ordered_probit distribution.\nmodel {\n ordered_probit(x * beta, c);\n}\nThere is not yet an ordered_probit_glm distribution in Stan.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#hierarchical-regression", + "href": "stan-users-guide/regression.html#hierarchical-regression", + "title": "Regression Models", + "section": "", + "text": "The simplest multilevel model is a hierarchical model in which the data are grouped into \\(L\\) distinct categories (or levels). An extreme approach would be to completely pool all the data and estimate a common vector of regression coefficients \\(\\beta\\). At the other extreme, an approach with no pooling assigns each level \\(l\\) its own coefficient vector \\(\\beta_l\\) that is estimated separately from the other levels. A hierarchical model is an intermediate solution where the degree of pooling is determined by the data and a prior on the amount of pooling.\nSuppose each binary outcome \\(y_n \\in \\{ 0, 1 \\}\\) has an associated level, \\(ll_n \\in \\{ 1, \\dotsc, L \\}\\). Each outcome will also have an associated predictor vector \\(x_n \\in \\mathbb{R}^D\\). Each level \\(l\\) gets its own coefficient vector \\(\\beta_l \\in \\mathbb{R}^D\\). The hierarchical structure involves drawing the coefficients \\(\\beta_{l,d}\n\\in \\mathbb{R}\\) from a prior that is also estimated with the data. This hierarchically estimated prior determines the amount of pooling. If the data in each level are similar, strong pooling will be reflected in low hierarchical variance. If the data in the levels are dissimilar, weaker pooling will be reflected in higher hierarchical variance.\nThe following model encodes a hierarchical logistic regression model with a hierarchical prior on the regression coefficients.\ndata {\n int<lower=1> D;\n int<lower=0> N;\n int<lower=1> L;\n array[N] int<lower=0, upper=1> y;\n array[N] int<lower=1, upper=L> ll;\n array[N] row_vector[D] x;\n}\nparameters {\n array[D] real mu;\n array[D] real<lower=0> sigma;\n array[L] vector[D] beta;\n}\nmodel {\n for (d in 1:D) {\n mu[d] ~ normal(0, 100);\n for (l in 1:L) {\n beta[l, d] ~ normal(mu[d], sigma[d]);\n }\n }\n for (n in 1:N) {\n y[n] ~ bernoulli(inv_logit(x[n] * beta[ll[n]]));\n }\n}\nThe standard deviation parameter sigma gets an implicit uniform prior on \\((0,\\infty)\\) because of its declaration with a lower-bound constraint of zero. Stan allows improper priors as long as the posterior is proper. Nevertheless, it is usually helpful to have informative or at least weakly informative priors for all parameters; see the regression priors section for recommendations on priors for regression coefficients and scales.\n\n\nWhere possible, vectorizing distribution statements leads to faster log probability and derivative evaluations. The speed boost is not because loops are eliminated, but because vectorization allows sharing subcomputations in the log probability and gradient calculations and because it reduces the size of the expression tree required for gradient calculations.\nThe first optimization vectorizes the for-loop over D as\nmu ~ normal(0, 100);\nfor (l in 1:L) {\n beta[l] ~ normal(mu, sigma);\n}\nThe declaration of beta as an array of vectors means that the expression beta[l] denotes a vector. Although beta could have been declared as a matrix, an array of vectors (or a two-dimensional array) is more efficient for accessing rows; see the indexing efficiency section for more information on the efficiency tradeoffs among arrays, vectors, and matrices.\nThis model can be further sped up and at the same time made more arithmetically stable by replacing the application of inverse-logit inside the Bernoulli distribution with the logit-parameterized Bernoulli,3\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(x[n] * beta[ll[n]]);\n}\nUnlike in R or BUGS, loops, array access and assignments are fast in Stan because they are translated directly to C++. In most cases, the cost of allocating and assigning to a container is more than made up for by the increased efficiency due to vectorizing the log probability and gradient calculations. Thus the following version is faster than the original formulation as a loop over a distribution statement.\n{\n vector[N] x_beta_ll;\n for (n in 1:N) {\n x_beta_ll[n] = x[n] * beta[ll[n]];\n }\n y ~ bernoulli_logit(x_beta_ll);\n}\nThe brackets introduce a new scope for the local variable x_beta_ll; alternatively, the variable may be declared at the top of the model block.\nIn some cases, such as the above, the local variable assignment leads to models that are less readable. The recommended practice in such cases is to first develop and debug the more transparent version of the model and only work on optimizations when the simpler formulation has been debugged.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#hierarchical-priors.section", + "href": "stan-users-guide/regression.html#hierarchical-priors.section", + "title": "Regression Models", + "section": "", + "text": "Priors on priors, also known as “hyperpriors,” should be treated the same way as priors on lower-level parameters in that as much prior information as is available should be brought to bear. Because hyperpriors often apply to only a handful of lower-level parameters, care must be taken to ensure the posterior is both proper and not overly sensitive either statistically or computationally to wide tails in the priors.\n\n\nThe fundamental problem with maximum likelihood estimation (MLE) in the hierarchical model setting is that as the hierarchical variance drops and the values cluster around the hierarchical mean, the overall density grows without bound. As an illustration, consider a simple hierarchical linear regression (with fixed prior mean) of \\(y_n \\in\n\\mathbb{R}\\) on \\(x_n \\in \\mathbb{R}^K\\), formulated as \\[\\begin{align*}\ny_n & \\sim \\textsf{normal}(x_n \\beta, \\sigma) \\\\\n\\beta_k & \\sim \\textsf{normal}(0,\\tau) \\\\\n\\tau & \\sim \\textsf{Cauchy}(0,2.5) \\\\\n\\end{align*}\\]\nIn this case, as \\(\\tau \\rightarrow 0\\) and \\(\\beta_k \\rightarrow 0\\), the posterior density \\[ p(\\beta,\\tau,\\sigma|y,x) \\propto p(y|x,\\beta,\\tau,\\sigma) \\] grows without bound. See the Neal’s funnel density, which has similar behavior.\nThere is obviously no MLE estimate for \\(\\beta,\\tau,\\sigma\\) in such a case, and therefore the model must be modified if posterior modes are to be used for inference. The approach recommended by Chung et al. (2013) is to use a gamma distribution as a prior, such as \\[\n\\sigma \\sim \\textsf{Gamma}(2, 1/A),\n\\] for a reasonably large value of \\(A\\), such as \\(A = 10\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#item-response-models.section", + "href": "stan-users-guide/regression.html#item-response-models.section", + "title": "Regression Models", + "section": "", + "text": "Item-response theory (IRT) models the situation in which a number of students each answer one or more of a group of test questions. The model is based on parameters for the ability of the students, the difficulty of the questions, and in more articulated models, the discriminativeness of the questions and the probability of guessing correctly; see Gelman and Hill (2007, pps. 314–320) for a textbook introduction to hierarchical IRT models and Curtis (2010) for encodings of a range of IRT models in BUGS.\n\n\nThe data provided for an IRT model may be declared as follows to account for the fact that not every student is required to answer every question.\ndata {\n int<lower=1> J; // number of students\n int<lower=1> K; // number of questions\n int<lower=1> N; // number of observations\n array[N] int<lower=1, upper=J> jj; // student for observation n\n array[N] int<lower=1, upper=K> kk; // question for observation n\n array[N] int<lower=0, upper=1> y; // correctness for observation n\n}\nThis declares a total of N student-question pairs in the data set, where each n in 1:N indexes a binary observation y[n] of the correctness of the answer of student jj[n] on question kk[n].\nThe prior hyperparameters will be hard coded in the rest of this section for simplicity, though they could be coded as data in Stan for more flexibility.\n\n\n\nThe 1PL item-response model, also known as the Rasch model, has one parameter (1P) for questions and uses the logistic link function (L).\nThe model parameters are declared as follows.\nparameters {\n real delta; // mean student ability\n array[J] real alpha; // ability of student j - mean ability\n array[K] real beta; // difficulty of question k\n}\nThe parameter alpha[J] is the ability coefficient for student j and beta[k] is the difficulty coefficient for question k. The non-standard parameterization used here also includes an intercept term delta, which represents the average student’s response to the average question.4\nThe model itself is as follows.\nmodel {\n alpha ~ std_normal(); // informative true prior\n beta ~ std_normal(); // informative true prior\n delta ~ normal(0.75, 1); // informative true prior\n for (n in 1:N) {\n y[n] ~ bernoulli_logit(alpha[jj[n]] - beta[kk[n]] + delta);\n }\n}\nThis model uses the logit-parameterized Bernoulli distribution, where \\[\n\\texttt{bernoulli}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha\\right)\n=\n\\texttt{bernoulli}\\left(y \\mid \\operatorname{logit}^{-1}(\\alpha)\\right).\n\\]\nThe key to understanding it is the term inside the bernoulli_logit distribution, from which it follows that \\[\n\\Pr[y_n = 1] = \\operatorname{logit}^{-1}\\left(\\alpha_{jj[n]} - \\beta_{kk[n]}\n+ \\delta\\right).\n\\]\nThe model suffers from additive identifiability issues without the priors. For example, adding a term \\(\\xi\\) to each \\(\\alpha_j\\) and \\(\\beta_k\\) results in the same predictions. The use of priors for \\(\\alpha\\) and \\(\\beta\\) located at 0 identifies the parameters; see Gelman and Hill (2007) for a discussion of identifiability issues and alternative approaches to identification.\nFor testing purposes, the IRT 1PL model distributed with Stan uses informative priors that match the actual data generation process used to simulate the data in R (the simulation code is supplied in the same directory as the models). This is unrealistic for most practical applications, but allows Stan’s inferences to be validated. A simple sensitivity analysis with fatter priors shows that the posterior is fairly sensitive to the prior even with 400 students and 100 questions and only 25% missingness at random. For real applications, the priors should be fit hierarchically along with the other parameters, as described in the next section.\n\n\n\nThe simple 1PL model described in the previous section is generalized in this section with the addition of a discrimination parameter to model how noisy a question is and by adding multilevel priors for the question difficulty and discrimination parameters. The model parameters are declared as follows.\nparameters {\n real mu_beta; // mean question difficulty\n vector[J] alpha; // ability for j - mean\n vector[K] beta; // difficulty for k\n vector<lower=0>[K] gamma; // discrimination of k\n real<lower=0> sigma_beta; // scale of difficulties\n real<lower=0> sigma_gamma; // scale of log discrimination\n}\nThe parameters should be clearer after the model definition.\nmodel {\n alpha ~ std_normal();\n beta ~ normal(0, sigma_beta);\n gamma ~ lognormal(0, sigma_gamma);\n mu_beta ~ cauchy(0, 5);\n sigma_beta ~ cauchy(0, 5);\n sigma_gamma ~ cauchy(0, 5);\n y ~ bernoulli_logit(gamma[kk] .* (alpha[jj] - (beta[kk] + mu_beta)));\n}\nThe std_normal function is used here, defined by \\[\n\\texttt{std}\\mathtt{\\_}\\texttt{normal}(y)\n=\n\\textsf{normal}\\left(y \\mid 0, 1\\right).\n\\]\nThe distribution statement is also vectorized using elementwise multiplication; it is equivalent to\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(gamma[kk[n]]\n * (alpha[jj[n]] - (beta[kk[n]] + mu_beta));\n}\nThe 2PL model is similar to the 1PL model, with the additional parameter gamma[k] modeling how discriminative question k is. If gamma[k] is greater than 1, responses are more attenuated with less chance of getting a question right at random. The parameter gamma[k] is constrained to be positive, which prohibits there being questions that are easier for students of lesser ability; such questions are not unheard of, but they tend to be eliminated from most testing situations where an IRT model would be applied.\nThe model is parameterized here with student abilities alpha being given a standard normal prior. This is to identify both the scale and the location of the parameters, both of which would be unidentified otherwise; see the problematic posteriors chapter for further discussion of identifiability. The difficulty and discrimination parameters beta and gamma then have varying scales given hierarchically in this model. They could also be given weakly informative non-hierarchical priors, such as\nbeta ~ normal(0, 5);\ngamma ~ lognormal(0, 2);\nThe point is that the alpha determines the scale and location and beta and gamma are allowed to float.\nThe beta parameter is here given a non-centered parameterization, with parameter mu_beta serving as the mean beta location. An alternative would’ve been to take:\nbeta ~ normal(mu_beta, sigma_beta);\nand\ny[n] ~ bernoulli_logit(gamma[kk[n]] * (alpha[jj[n]] - beta[kk[n]]));\nNon-centered parameterizations tend to be more efficient in hierarchical models; see the reparameterization section for more information on non-centered reparameterizations.\nThe intercept term mu_beta can’t itself be modeled hierarchically, so it is given a weakly informative \\(\\textsf{Cauchy}(0,5)\\) prior. Similarly, the scale terms, sigma_beta, and sigma_gamma, are given half-Cauchy priors. As mentioned earlier, the scale and location for alpha are fixed to ensure identifiability. The truncation in the half-Cauchy prior is implicit; explicit truncation is not necessary because the log probability need only be calculated up to a proportion and the scale variables are constrained to \\((0,\\infty)\\) by their declarations.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#priors-for-identification.section", + "href": "stan-users-guide/regression.html#priors-for-identification.section", + "title": "Regression Models", + "section": "", + "text": "One application of (hierarchical) priors is to identify the scale and/or location of a group of parameters. For example, in the IRT models discussed in the previous section, there is both a location and scale non-identifiability. With uniform priors, the posteriors will float in terms of both scale and location. See the collinearity section for a simple example of the problems this poses for estimation.\nThe non-identifiability is resolved by providing a standard normal (i.e., \\(\\textsf{normal}(0,1)\\)) prior on one group of coefficients, such as the student abilities. With a standard normal prior on the student abilities, the IRT model is identified in that the posterior will produce a group of estimates for student ability parameters that have a sample mean of close to zero and a sample variance of close to one. The difficulty and discrimination parameters for the questions should then be given a diffuse, or ideally a hierarchical prior, which will identify these parameters by scaling and locating relative to the student ability parameters.\n\n\n\nAnother case in which priors can help provide identifiability is in the case of collinearity in a linear regression. In linear regression, if two predictors are collinear (i.e, one is a linear function of the other), then their coefficients will have a correlation of 1 (or -1) in the posterior. This leads to non-identifiability. By placing normal priors on the coefficients, the maximum likelihood solution of two duplicated predictors (trivially collinear) will be half the value than would be obtained by only including one.\n\n\n\nIn a logistic regression, if a predictor is positive in cases of 1 outcomes and negative in cases of 0 outcomes, then the maximum likelihood estimate for the coefficient for that predictor diverges to infinity. This divergence can be controlled by providing a prior for the coefficient, which will “shrink” the estimate back toward zero and thus identify the model in the posterior.\nSimilar problems arise for sampling with improper flat priors. The sampler will try to draw large values. By providing a prior, the posterior will be concentrated around finite values, leading to well-behaved sampling.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#multivariate-hierarchical-priors.section", + "href": "stan-users-guide/regression.html#multivariate-hierarchical-priors.section", + "title": "Regression Models", + "section": "", + "text": "In hierarchical regression models (and other situations), several individual-level variables may be assigned hierarchical priors. For example, a model with multiple varying intercepts and slopes within might assign them a multivariate prior.\nAs an example, the individuals might be people and the outcome income, with predictors such as education level and age, and the groups might be states or other geographic divisions. The effect of education level and age as well as an intercept might be allowed to vary by state. Furthermore, there might be state-level predictors, such as average state income and unemployment level.\n\n\nGelman and Hill (2007, chap. 13, Chapter 17) provide a discussion of a hierarchical model with \\(N\\) individuals organized into \\(J\\) groups. Each individual has a predictor row vector \\(x_n\\) of size \\(K\\); to unify the notation, they assume that \\(x_{n,1} = 1\\) is a fixed “intercept” predictor. To encode group membership, they assume individual \\(n\\) belongs to group \\(jj[n] \\in \\{ 1, \\dotsc, J \\}\\). Each individual \\(n\\) also has an observed outcome \\(y_n\\) taking on real values.\n\n\nThe model is a linear regression with slope and intercept coefficients varying by group, so that \\(\\beta_j\\) is the coefficient \\(K\\)-vector for group \\(j\\). The data model for individual \\(n\\) is then just \\[\ny_n \\sim \\textsf{normal}(x_n \\, \\beta_{jj[n]}, \\, \\sigma)\n\\quad\\text{for}\\quad n \\in \\{ 1, \\dotsc, N \\}.\n\\]\n\n\n\nGelman and Hill model the coefficient vectors \\(\\beta_j\\) as being drawn from a multivariate distribution with mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\), \\[\n\\beta_j \\sim \\textsf{multivariate normal}(\\mu_j, \\, \\Sigma)\n\\quad\\text{for}\\quad j \\in \\{ 1, \\dotsc, J \\}.\n\\]\nBelow, we discuss the full model of Gelman and Hill, which uses group-level predictors to model \\(\\mu\\); for now, we assume \\(\\mu\\) is a simple vector parameter.\n\n\n\nFor hierarchical modeling, the group-level mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\) must themselves be given priors. The group-level mean vector can be given a reasonable weakly-informative prior for independent coefficients, such as \\[\n\\mu_j \\sim \\textsf{normal}(0,5).\n\\] If more is known about the expected coefficient values \\(\\beta_{j, k}\\), this information can be incorporated into the prior for \\(\\mu_j\\).\nFor the prior on the covariance matrix, Gelman and Hill suggest using a scaled inverse Wishart. That choice was motivated primarily by convenience as it is conjugate to the multivariate likelihood function and thus simplifies Gibbs sampling\nIn Stan, there is no restriction to conjugacy for multivariate priors, and we in fact recommend a slightly different approach. Like Gelman and Hill, we decompose our prior into a scale and a matrix, but are able to do so in a more natural way based on the actual variable scales and a correlation matrix. Specifically, we define \\[\n\\Sigma = \\texttt{diag}\\mathtt{\\_}\\texttt{matrix}(\\tau) \\times \\Omega \\times \\texttt{diag}\\mathtt{\\_}\\texttt{matrix}(\\tau),\n\\] where \\(\\Omega\\) is a correlation matrix and \\(\\tau\\) is the vector of coefficient scales. This mapping from scale vector \\(\\tau\\) and correlation matrix \\(\\Omega\\) can be inverted, using \\[\n\\tau_k = \\sqrt{\\Sigma_{k,k}}\n\\quad\\textsf{and}\\quad\n\\Omega_{i, j} = \\frac{\\Sigma_{i, j}}{\\tau_i \\, \\tau_j}.\n\\]\nThe components of the scale vector \\(\\tau\\) can be given any reasonable prior for scales, but we recommend something weakly informative like a half-Cauchy distribution with a small scale, such as \\[\n\\tau_k \\sim \\textsf{Cauchy}(0, 2.5)\n\\quad\\text{for}\\quad k \\in \\{ 1, \\dotsc, K \\}\n\\quad\\text{constrained\\ by}\\quad \\tau_k > 0.\n\\] As for the prior means, if there is information about the scale of variation of coefficients across groups, it should be incorporated into the prior for \\(\\tau\\). For large numbers of exchangeable coefficients, the components of \\(\\tau\\) itself (perhaps excluding the intercept) may themselves be given a hierarchical prior.\nOur final recommendation is to give the correlation matrix \\(\\Omega\\) an LKJ prior with shape \\(\\eta \\geq 1\\),5\n\\[\n\\Omega \\sim \\textsf{LKJCorr}(\\eta).\n\\]\nThe LKJ correlation distribution is defined by \\[\n\\textsf{LKJCorr}\\left(\\Sigma \\mid \\eta\\right)\n\\propto\n\\operatorname{det}\\left(\\Sigma\\right)^{\\eta - 1}.\n\\]\nThe basic behavior of the LKJ correlation distribution is similar to that of a beta distribution. For \\(\\eta = 1\\), the result is a uniform distribution. Despite being the identity over correlation matrices, the marginal distribution over the entries in that matrix (i.e., the correlations) is not uniform between -1 and 1. Rather, it concentrates around zero as the dimensionality increases due to the complex constraints.\nFor \\(\\eta > 1\\), the density increasingly concentrates mass around the unit matrix, i.e., favoring less correlation. For \\(\\eta < 1\\), it increasingly concentrates mass in the other direction, i.e., favoring more correlation.\nThe LKJ prior may thus be used to control the expected amount of correlation among the parameters \\(\\beta_j\\). For a discussion of decomposing a covariance prior into a prior on correlation matrices and an independent prior on scales, see Barnard, McCulloch, and Meng (2000).\n\n\n\nTo complete Gelman and Hill’s model, suppose each group \\(j \\in \\{ 1, \\dotsc, J \\}\\) is supplied with an \\(L\\)-dimensional row-vector of group-level predictors \\(u_j\\). The prior mean for the \\(\\beta_j\\) can then itself be modeled as a regression, using an \\(L\\)-dimensional coefficient vector \\(\\gamma\\). The prior for the group-level coefficients then becomes \\[\n\\beta_j \\sim \\textsf{multivariate normal}(u_j \\, \\gamma, \\Sigma)\n\\]\nThe group-level coefficients \\(\\gamma\\) may themselves be given independent weakly informative priors, such as \\[\n\\gamma_l \\sim \\textsf{normal}(0,5).\n\\] As usual, information about the group-level means should be incorporated into this prior.\n\n\n\nThe Stan code for the full hierarchical model with multivariate priors on the group-level coefficients and group-level prior means follows its definition.\ndata {\n int<lower=0> N; // num individuals\n int<lower=1> K; // num ind predictors\n int<lower=1> J; // num groups\n int<lower=1> L; // num group predictors\n array[N] int<lower=1, upper=J> jj; // group for individual\n matrix[N, K] x; // individual predictors\n array[J] row_vector[L] u; // group predictors\n vector[N] y; // outcomes\n}\nparameters {\n corr_matrix[K] Omega; // prior correlation\n vector<lower=0>[K] tau; // prior scale\n matrix[L, K] gamma; // group coeffs\n array[J] vector[K] beta; // indiv coeffs by group\n real<lower=0> sigma; // prediction error scale\n}\nmodel {\n tau ~ cauchy(0, 2.5);\n Omega ~ lkj_corr(2);\n to_vector(gamma) ~ normal(0, 5);\n {\n array[J] row_vector[K] u_gamma;\n for (j in 1:J) {\n u_gamma[j] = u[j] * gamma;\n }\n beta ~ multi_normal(u_gamma, quad_form_diag(Omega, tau));\n }\n for (n in 1:N) {\n y[n] ~ normal(x[n] * beta[jj[n]], sigma);\n }\n}\nThe hyperprior covariance matrix is defined implicitly through the quadratic form in the code because the correlation matrix Omega and scale vector tau are more natural to inspect in the output; to output Sigma, define it as a transformed parameter. The function quad_form_diag is defined so that quad_form_diag(Sigma, tau) is equivalent to diag_matrix(tau) * Sigma * diag_matrix(tau), where diag_matrix(tau) returns the matrix with tau on the diagonal and zeroes off diagonal; the version using quad_form_diag should be faster. For details on these and other matrix arithmetic operators and functions, see the function reference manual.\n\n\n\nThe code in the Stan program above can be sped up dramatically by replacing the the distribution statement inside the for loop:\nfor (n in 1:N) {\n y[n] ~ normal(x[n] * beta[jj[n]], sigma);\n}\nwith the vectorized distribution statement:\n{\n vector[N] x_beta_jj;\n for (n in 1:N) {\n x_beta_jj[n] = x[n] * beta[jj[n]];\n }\n y ~ normal(x_beta_jj, sigma);\n}\nThe outer brackets create a local scope in which to define the variable x_beta_jj, which is then filled in a loop and used to define a vectorized distribution statement. The reason this is such a big win is that it allows us to take the log of sigma only once and it greatly reduces the size of the resulting expression graph by packing all of the work into a single distribution function.\nAlthough it is tempting to redeclare beta and include a revised model block distribution statement,\nparameters {\n matrix[J, K] beta;\n// ...\n}\nmodel {\n y ~ normal(rows_dot_product(x, beta[jj]), sigma);\n // ...\n}\nthis fails because it breaks the vectorization for beta,6\nbeta ~ multi_normal(...);\nwhich requires beta to be an array of vectors. Both vectorizations are important, so the best solution is to just use the loop above, because rows_dot_product cannot do much optimization in and of itself because there are no shared computations.\nThe code in the Stan program above also builds up an array of vectors for the outcomes and for the multivariate normal, which provides a major speedup by reducing the number of linear systems that need to be solved and differentiated.\n{\n matrix[K, K] Sigma_beta;\n Sigma_beta = quad_form_diag(Omega, tau);\n for (j in 1:J) {\n beta[j] ~ multi_normal((u[j] * gamma)', Sigma_beta);\n }\n}\nIn this example, the covariance matrix Sigma_beta is defined as a local variable so as not to have to repeat the quadratic form computation \\(J\\) times. This vectorization can be combined with the Cholesky-factor optimization in the next section.\n\n\n\nThe multivariate normal density and LKJ prior on correlation matrices both require their matrix parameters to be factored. Vectorizing, as in the previous section, ensures this is only done once for each density. An even better solution, both in terms of efficiency and numerical stability, is to parameterize the model directly in terms of Cholesky factors of correlation matrices using the multivariate version of the non-centered parameterization. For the model in the previous section, the program fragment to replace the full matrix prior with an equivalent Cholesky factorized prior is as follows.\ndata {\n matrix[L, J] u; // group predictors transposed\n // ...\n}\nparameters {\n matrix[K, J] z;\n cholesky_factor_corr[K] L_Omega;\n matrix[K, L] gamma;\n // ...\n}\ntransformed parameters {\n matrix[K, J] beta;\n beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;\n}\nmodel {\n to_vector(z) ~ std_normal();\n L_Omega ~ lkj_corr_cholesky(2);\n // ...\n}\nThe data variable u was originally an array of vectors, which is efficient for access; here it is redeclared as a matrix in order to use it in matrix arithmetic. Moreover, it is transposed, along with gamma and beta, to minimize the number of transposition operations. The new parameter L_Omega is the Cholesky factor of the original correlation matrix Omega, so that\nOmega = L_Omega * L_Omega'\nThe prior scale vector tau is unchanged, and furthermore, pre-multiplying the Cholesky factor by the scale produces the Cholesky factor of the final covariance matrix,\nSigma_beta\n = quad_form_diag(Omega, tau)\n = diag_pre_multiply(tau, L_Omega) * diag_pre_multiply(tau, L_Omega)'\nwhere the diagonal pre-multiply compound operation is defined by\ndiag_pre_multiply(a, b) = diag_matrix(a) * b\nThe new variable z is declared as a matrix, the entries of which are given independent standard normal priors; the to_vector operation turns the matrix into a vector so that it can be used as a vectorized argument to the univariate normal density. This results in every column of z being a \\(K\\)-variate normal random vector with the identity as covariance matrix. Therefore, multiplying z by the Cholesky factor of the covariance matrix and adding the mean (u * gamma)' produces a beta distributed as in the original model, where the variance is, letting \\(L = \\mathrm{diag}(\\tau)\\,\\Omega_L\\),\n\\[\n\\begin{aligned}\n\\mathbb{V}[\\beta] &= \\mathbb{E}\\big((L \\, z)(L \\, z)^\\top) \\\\\n&= \\mathbb{E}\\big((L \\, z \\, z^\\top \\, L^\\top) \\\\\n&= L \\, \\mathbb{E}(z \\, z^\\top) \\, L^\\top \\\\\n&= L \\, L^\\top =(\\mathrm{diag}(\\tau)\\,\\Omega_L)\\,(\\mathrm{diag}(\\tau)\\,\\Omega_L)^\\top \\\\\n&= \\mathrm{diag}(\\tau)\\,\\Omega\\,\\mathrm{diag}(\\tau) \\\\\n&= \\Sigma.\n\\end{aligned}\n\\] Where we have used the linearity of expectations (line 2 to 3), the definition of \\(\\Omega = \\Omega_L \\, \\Omega_L^\\top\\), and the fact that \\(\\mathbb{E}(z \\, z^\\top) = I\\) since \\(z \\sim \\mathcal{N}(0, I)\\).\nOmitting the remaining data declarations, which are the same as before with the exception of u, the optimized model is as follows.\nparameters {\n matrix[K, J] z;\n cholesky_factor_corr[K] L_Omega;\n vector<lower=0, upper=pi() / 2>[K] tau_unif; // prior scale\n matrix[K, L] gamma; // group coeffs\n real<lower=0> sigma; // prediction error scale\n}\ntransformed parameters {\n vector<lower=0>[K] tau = 2.5 * tan(tau_unif);\n matrix[K, J] beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;\n}\nmodel {\n vector[N] mu;\n for(n in 1:N) {\n mu[n] = x[n, ] * beta[, jj[n]];\n }\n to_vector(z) ~ std_normal();\n L_Omega ~ lkj_corr_cholesky(2);\n to_vector(gamma) ~ normal(0, 5);\n y ~ normal(mu, sigma);\n}\nThis model also reparameterizes the prior scale tau to avoid potential problems with the heavy tails of the Cauchy distribution. The statement tau_unif ~ uniform(0, pi() / 2) can be omitted from the model block because Stan increments the log posterior for parameters with uniform priors without it.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#prediction-forecasting-and-backcasting", + "href": "stan-users-guide/regression.html#prediction-forecasting-and-backcasting", + "title": "Regression Models", + "section": "", + "text": "Stan models can be used for “predicting” the values of arbitrary model unknowns. When predictions are about the future, they’re called “forecasts;” when they are predictions about the past, as in climate reconstruction or cosmology, they are sometimes called “backcasts” (or “aftcasts” or “hindcasts” or “antecasts,” depending on the author’s feelings about the opposite of “fore”).\n\n\nAs a simple example, the following linear regression provides the same setup for estimating the coefficients beta as in our very first example, using y for the N observations and x for the N predictor vectors. The model parameters and model for observations are exactly the same as before.\nTo make predictions, we need to be given the number of predictions, N_new, and their predictor matrix, x_new. The predictions themselves are modeled as a parameter y_new. The model statement for the predictions is exactly the same as for the observations, with the new outcome vector y_new and prediction matrix x_new.\ndata {\n int<lower=1> K;\n int<lower=0> N;\n matrix[N, K] x;\n vector[N] y;\n\n int<lower=0> N_new;\n matrix[N_new, K] x_new;\n}\nparameters {\n vector[K] beta;\n real<lower=0> sigma;\n\n vector[N_new] y_new; // predictions\n}\nmodel {\n y ~ normal(x * beta, sigma); // observed model\n\n y_new ~ normal(x_new * beta, sigma); // prediction model\n}\n\n\n\nWhere possible, the most efficient way to generate predictions is to use the generated quantities block. This provides proper Monte Carlo (not Markov chain Monte Carlo) inference, which can have a much higher effective sample size per iteration.\n// ...data as above...\n\nparameters {\n vector[K] beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(x * beta, sigma);\n}\ngenerated quantities {\n vector[N_new] y_new;\n for (n in 1:N_new) {\n y_new[n] = normal_rng(x_new[n] * beta, sigma);\n }\n}\nNow the data are just as before, but the parameter y_new is now declared as a generated quantity, and the prediction model is removed from the model and replaced by a pseudo-random draw from a normal distribution.\n\n\nIt is possible for values to overflow or underflow in generated quantities. The problem is that if the result is NaN, then any constraints placed on the variables will be violated. It is possible to check a value assigned by an RNG and reject it if it overflows, but this is both inefficient and leads to biased posterior estimates. Instead, the conditions causing overflow, such as trying to generate a negative binomial random variate with a mean of \\(2^{31}\\), must be intercepted and dealt with. This is typically done by reparameterizing or reimplementing the random number generator using real values rather than integers, which are upper-bounded by \\(2^{31} - 1\\) in Stan.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#multivariate-outcomes", + "href": "stan-users-guide/regression.html#multivariate-outcomes", + "title": "Regression Models", + "section": "", + "text": "Most regressions are set up to model univariate observations (be they scalar, boolean, categorical, ordinal, or count). Even multinomial regressions are just repeated categorical regressions. In contrast, this section discusses regression when each observed value is multivariate. To relate multiple outcomes in a regression setting, their error terms are provided with covariance structure.\nThis section considers two cases, seemingly unrelated regressions for continuous multivariate quantities and multivariate probit regression for boolean multivariate quantities.\n\n\nThe first model considered is the “seemingly unrelated” regressions (SUR) of econometrics where several linear regressions share predictors and use a covariance error structure rather than independent errors (Zellner 1962; Greene 2011).\nThe model is easy to write down as a regression, \\[\\begin{align*}\ny_n &= x_n \\, \\beta + \\epsilon_n \\\\\n\\epsilon_n &\\sim \\textsf{multivariate normal}(0, \\Sigma)\n\\end{align*}\\]\nwhere \\(x_n\\) is a \\(J\\)-row-vector of predictors (\\(x\\) is an \\((N \\times\nJ)\\) matrix), \\(y_n\\) is a \\(K\\)-vector of observations, \\(\\beta\\) is a \\((K\n\\times J)\\) matrix of regression coefficients (vector \\(\\beta_k\\) holds coefficients for outcome \\(k\\)), and \\(\\Sigma\\) is covariance matrix governing the error. As usual, the intercept can be rolled into \\(x\\) as a column of ones.\nThe basic Stan code is straightforward (though see below for more optimized code for use with LKJ priors on correlation).\ndata {\n int<lower=1> K;\n int<lower=1> J;\n int<lower=0> N;\n array[N] vector[J] x;\n array[N] vector[K] y;\n}\nparameters {\n matrix[K, J] beta;\n cov_matrix[K] Sigma;\n}\nmodel {\n array[N] vector[K] mu;\n for (n in 1:N) {\n mu[n] = beta * x[n];\n }\n y ~ multi_normal(mu, Sigma);\n}\nFor efficiency, the multivariate normal is vectorized by precomputing the array of mean vectors and sharing the same covariance matrix.\nFollowing the advice in the multivariate hierarchical priors section, we will place a weakly informative normal prior on the regression coefficients, an LKJ prior on the correlations and a half-Cauchy prior on standard deviations. The covariance structure is parameterized in terms of Cholesky factors for efficiency and arithmetic stability.\n// ...\nparameters {\n matrix[K, J] beta;\n cholesky_factor_corr[K] L_Omega;\n vector<lower=0>[K] L_sigma;\n}\nmodel {\n array[N] vector[K] mu;\n matrix[K, K] L_Sigma;\n\n for (n in 1:N) {\n mu[n] = beta * x[n];\n\n }\n\n L_Sigma = diag_pre_multiply(L_sigma, L_Omega);\n\n to_vector(beta) ~ normal(0, 5);\n L_Omega ~ lkj_corr_cholesky(4);\n L_sigma ~ cauchy(0, 2.5);\n\n y ~ multi_normal_cholesky(mu, L_Sigma);\n}\nThe Cholesky factor of the covariance matrix is then reconstructed as a local variable and used in the model by scaling the Cholesky factor of the correlation matrices. The regression coefficients get a prior all at once by converting the matrix beta to a vector.\nIf required, the full correlation or covariance matrices may be reconstructed from their Cholesky factors in the generated quantities block.\n\n\n\nThe multivariate probit model generates sequences of boolean variables by applying a step function to the output of a seemingly unrelated regression.\nThe observations \\(y_n\\) are \\(D\\)-vectors of boolean values (coded 0 for false, 1 for true). The values for the observations \\(y_n\\) are based on latent values \\(z_n\\) drawn from a seemingly unrelated regression model (see the previous section), \\[\\begin{align*}\nz_n &= x_n \\, \\beta + \\epsilon_n \\\\\n\\epsilon_n &\\sim \\textsf{multivariate normal}(0, \\Sigma)\n\\end{align*}\\]\nThese are then put through the step function to produce a \\(K\\)-vector \\(z_n\\) of boolean values with elements defined by \\[\ny_{n, k} = \\operatorname{I}\\left(z_{n, k} > 0\\right),\n\\] where \\(\\operatorname{I}()\\) is the indicator function taking the value 1 if its argument is true and 0 otherwise.\nUnlike in the seemingly unrelated regressions case, here the covariance matrix \\(\\Sigma\\) has unit standard deviations (i.e., it is a correlation matrix). As with ordinary probit and logistic regressions, letting the scale vary causes the model (which is defined only by a cutpoint at 0, not a scale) to be unidentified (see Greene (2011)).\nMultivariate probit regression can be coded in Stan using the trick introduced by Albert and Chib (1993), where the underlying continuous value vectors \\(y_n\\) are coded as truncated parameters. The key to coding the model in Stan is declaring the latent vector \\(z\\) in two parts, based on whether the corresponding value of \\(y\\) is 0 or 1. Otherwise, the model is identical to the seemingly unrelated regression model in the previous section.\nFirst, we introduce a sum function for two-dimensional arrays of integers; this is going to help us calculate how many total 1 values there are in \\(y\\).\nfunctions {\n int sum2d(array[,] int a) {\n int s = 0;\n for (i in 1:size(a)) {\n s += sum(a[i]);\n }\n return s;\n }\n}\nThe function is trivial, but it’s not a built-in for Stan and it’s easier to understand the rest of the model if it’s pulled into its own function so as not to create a distraction.\nThe data declaration block is much like for the seemingly unrelated regressions, but the observations y are now integers constrained to be 0 or 1.\ndata {\n int<lower=1> K;\n int<lower=1> D;\n int<lower=0> N;\n array[N, D] int<lower=0, upper=1> y;\n array[N] vector[K] x;\n}\nAfter declaring the data, there is a rather involved transformed data block whose sole purpose is to sort the data array y into positive and negative components, keeping track of indexes so that z can be easily reassembled in the transformed parameters block.\ntransformed data {\n int<lower=0> N_pos;\n array[sum2d(y)] int<lower=1, upper=N> n_pos;\n array[size(n_pos)] int<lower=1, upper=D> d_pos;\n int<lower=0> N_neg;\n array[(N * D) - size(n_pos)] int<lower=1, upper=N> n_neg;\n array[size(n_neg)] int<lower=1, upper=D> d_neg;\n\n N_pos = size(n_pos);\n N_neg = size(n_neg);\n {\n int i;\n int j;\n i = 1;\n j = 1;\n for (n in 1:N) {\n for (d in 1:D) {\n if (y[n, d] == 1) {\n n_pos[i] = n;\n d_pos[i] = d;\n i += 1;\n } else {\n n_neg[j] = n;\n d_neg[j] = d;\n j += 1;\n }\n }\n }\n }\n}\nThe variables N_pos and N_neg are set to the number of true (1) and number of false (0) observations in y. The loop then fills in the sequence of indexes for the positive and negative values in four arrays.\nThe parameters are declared as follows.\nparameters {\n matrix[D, K] beta;\n cholesky_factor_corr[D] L_Omega;\n vector<lower=0>[N_pos] z_pos;\n vector<upper=0>[N_neg] z_neg;\n}\nThese include the regression coefficients beta and the Cholesky factor of the correlation matrix, L_Omega. This time there is no scaling because the covariance matrix has unit scale (i.e., it is a correlation matrix; see above).\nThe critical part of the parameter declaration is that the latent real value \\(z\\) is broken into positive-constrained and negative-constrained components, whose size was conveniently calculated in the transformed data block. The transformed data block’s real work was to allow the transformed parameter block to reconstruct \\(z\\).\ntransformed parameters {\n array[N] vector[D] z;\n for (n in 1:N_pos) {\n z[n_pos[n], d_pos[n]] = z_pos[n];\n }\n for (n in 1:N_neg) {\n z[n_neg[n], d_neg[n]] = z_neg[n];\n }\n}\nAt this point, the model is simple, pretty much recreating the seemingly unrelated regression.\nmodel {\n L_Omega ~ lkj_corr_cholesky(4);\n to_vector(beta) ~ normal(0, 5);\n {\n array[N] vector[D] beta_x;\n for (n in 1:N) {\n beta_x[n] = beta * x[n];\n }\n z ~ multi_normal_cholesky(beta_x, L_Omega);\n }\n}\nThis simple form of model is made possible by the Albert and Chib-style constraints on z.\nFinally, the correlation matrix itself can be put back together in the generated quantities block if desired.\ngenerated quantities {\n corr_matrix[D] Omega;\n Omega = multiply_lower_tri_self_transpose(L_Omega);\n}\nThe same could be done for the seemingly unrelated regressions in the previous section.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#applications-of-pseudorandom-number-generation", + "href": "stan-users-guide/regression.html#applications-of-pseudorandom-number-generation", + "title": "Regression Models", + "section": "", + "text": "The main application of pseudorandom number generator (PRNGs) is for posterior inference, including prediction and posterior predictive checks. They can also be used for pure data simulation, which is like a posterior predictive check with no conditioning. See the function reference manual for a complete description of the syntax and usage of pseudorandom number generators.\n\n\nConsider predicting unobserved outcomes using linear regression. Given predictors \\(x_1, \\dotsc, x_N\\) and observed outcomes \\(y_1, \\dotsc, y_N\\), and assuming a standard linear regression with intercept \\(\\alpha\\), slope \\(\\beta\\), and error scale \\(\\sigma\\), along with improper uniform priors, the posterior over the parameters given \\(x\\) and \\(y\\) is \\[\np\\left(\\alpha, \\beta, \\sigma \\mid x, y \\right)\n\\propto\n\\prod_{n=1}^N\n \\textsf{normal}\\left(y_n \\mid \\alpha + \\beta x_n, \\sigma\\right).\n\\]\nFor this model, the posterior predictive inference for a new outcome \\(\\tilde{y}_m\\) given a predictor \\(\\tilde{x}_m\\), conditioned on the observed data \\(x\\) and \\(y\\), is \\[\np\\left(\\tilde{y}_n \\mid \\tilde{x}_n, x, y\\right)\n= \\int_{(\\alpha,\\beta,\\sigma)}\n \\textsf{normal}\\left(\\tilde{y}_n \\mid \\alpha + \\beta \\tilde{x}_n, \\sigma\\right)\n \\times\n p\\left(\\alpha, \\beta, \\sigma \\mid x, y\\right)\n \\,\\textrm{d}(\\alpha,\\beta,\\sigma).\n\\]\nTo code the posterior predictive inference in Stan, a standard linear regression is combined with a random number in the generated quantities block.\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n}\ngenerated quantities {\n vector[N_tilde] y_tilde;\n for (n in 1:N_tilde) {\n y_tilde[n] = normal_rng(alpha + beta * x_tilde[n], sigma);\n }\n}\nGiven observed predictors \\(x\\) and outcomes \\(y\\), y_tilde will be drawn according to \\(p\\left(\\tilde{y} \\mid \\tilde{x}, y, x\\right)\\). This means that, for example, the posterior mean for y_tilde is the estimate of the outcome that minimizes expected square error (conditioned on the data and model).\n\n\n\nA good way to investigate the fit of a model to the data, a critical step in Bayesian data analysis, is to generate simulated data according to the parameters of the model. This is carried out with exactly the same procedure as before, only the observed data predictors \\(x\\) are used in place of new predictors \\(\\tilde{x}\\) for unobserved outcomes. If the model fits the data well, the predictions for \\(\\tilde{y}\\) based on \\(x\\) should match the observed data \\(y\\).\nTo code posterior predictive checks in Stan requires only a slight modification of the prediction code to use \\(x\\) and \\(N\\) in place of \\(\\tilde{x}\\) and \\(\\tilde{N}\\),\ngenerated quantities {\n vector[N] y_tilde;\n for (n in 1:N) {\n y_tilde[n] = normal_rng(alpha + beta * x[n], sigma);\n }\n}\nGelman et al. (2013) recommend choosing several posterior draws \\(\\tilde{y}^{(1)}, \\dotsc, \\tilde{y}^{(M)}\\) and plotting each of them alongside the data \\(y\\) that was actually observed. If the model fits well, the simulated \\(\\tilde{y}\\) will look like the actual data \\(y\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/regression.html#footnotes", + "href": "stan-users-guide/regression.html#footnotes", + "title": "Regression Models", + "section": "Footnotes", + "text": "Footnotes\n\n\nUnlike in Python and R, which are interpreted, Stan is translated to C++ and compiled, so loops and assignment statements are fast. Vectorized code is faster in Stan because (a) the expression tree used to compute derivatives can be simplified, leading to fewer virtual function calls, and (b) computations that would be repeated in the looping version, such as log(sigma) in the above model, will be computed once and reused.↩︎\nThe Phi_approx function is a rescaled version of the inverse logit function, so while the scale is roughly the same \\(\\Phi\\), the tails do not match.↩︎\nThe Bernoulli-logit distribution builds in the log link function, taking \\[\\texttt{bernoulli}\\mathtt{\\_}\\texttt{logit}\\left(y \\mid \\alpha\\right) = \\texttt{bernoulli}\\left(y \\mid \\operatorname{logit}^{-1}(\\alpha)\\right).\\]↩︎\nGelman and Hill (2007) treat the \\(\\delta\\) term equivalently as the location parameter in the distribution of student abilities.↩︎\nThe prior is named for Lewandowski, Kurowicka, and Joe, as it was derived by inverting the random correlation matrix generation strategy of Lewandowski, Kurowicka, and Joe (2009).↩︎\nThanks to Mike Lawrence for pointing this out in the GitHub issue for the manual.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Regression Models" + ] + }, + { + "objectID": "stan-users-guide/proportionality-constants.html", + "href": "stan-users-guide/proportionality-constants.html", + "title": "Proportionality Constants", + "section": "", + "text": "When evaluating a likelihood or prior as part of the log density computation in MCMC, variational inference, or optimization, it is usually only necessary to compute the functions up to a proportionality constant (or similarly compute log densities up to an additive constant). In MCMC this comes from the fact that the distribution being sampled does not need to be normalized (and so it is the normalization constant that is ignored). Similarly the distribution does not need normalized to perform variational inference or do optimizations. The advantage of working with unnormalized distributions is they can make computation quite a bit cheaper.\nThere are three different syntaxes to build the model in Stan. The way to select between them is by determining if the proportionality constants are necessary. If performance is not a problem, it is always safe to use the normalized densities.\nThe distribution statement (~) and log density increment statement (target +=) with _lupdf() use unnormalized densities for \\(x\\) (dropping proportionality constants):\nx ~ normal(0, 1);\ntarget += normal_lupdf(x | 0, 1); // the 'u' is for unnormalized\nThe log density increment statement (target +=) with _lpdf() uses the full normalized density for \\(x\\) (dropping no constants):\ntarget += normal_lpdf(x | 0, 1);\nFor discrete distributions, the target += syntax is using _lupmf and _lpmf instead:\ny ~ bernoulli(0.5);\ntarget += bernoulli_lupmf(y | 0.5);\ntarget += bernoulli_lpmf(y | 0.5);\n\n\nIf a density \\(p(\\theta)\\) can be factored into \\(K g(\\theta)\\) where \\(K\\) are all the factors that are a not a function of \\(\\theta\\) and \\(g(\\theta)\\) are all the terms that are a function of \\(\\theta\\), then it is said that \\(g(\\theta)\\) is proportional to \\(p(\\theta)\\) up to a constant.\nThe advantage of all this is that sometimes \\(K\\) is expensive to compute and if it is not a function of the distribution that is to be sampled (or optimized or approximated with variational inference), there is no need to compute it because it will not affect the results.\nStan takes advantage of the proportionality constant fact with the ~ syntax. Take for instance the normal data model:\ndata {\n real mu;\n real<lower=0.0> sigma;\n}\nparameters {\n real x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\nSyntactically, this is just shorthand for the equivalent model that replaces the ~ syntax with a target += statement and a normal_lupdf function call:\ndata {\n real mu;\n real<lower=0.0> sigma;\n}\nparameters {\n real x;\n}\nmodel {\n target += normal_lupdf(x | mu, sigma);\n}\nThe function normal_lupdf is only guaranteed to return the log density of the normal distribution up to a proportionality constant density to be sampled. The proportionality constant itself is not defined. The full log density of the statement here is:\n\\[\n\\textsf{normal\\_lpdf}(x | \\mu, \\sigma) =\n-\\log \\left( \\sigma \\sqrt{2 \\pi} \\right)\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2.\n\\]\nNow because the density here is only a function of \\(x\\), the additive terms in the log density that are not a function of \\(x\\) can be dropped. In this case it is enough to know only the quadratic term:\n\\[\n\\textsf{normal\\_lupdf}(x | \\mu, \\sigma) =\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2.\n\\]\n\n\n\nIn the case that the proportionality constants were needed for a normal log density the function normal_lpdf can be used. For clarity, if there is ever a situation where it is unclear if the normalization is necessary, it should always be safe to include it. Only use the ~ or target += normal_lupdf syntaxes if it is absolutely clear that the proportionality constants are not necessary.\n\n\n\nWhen a custom _lpdf or _lpmf function is defined, the compiler will automatically make available a _lupdf or _lupmf version of the function. It is only possible to define custom distributions in the normalized form in Stan. Any attempt to define an unnormalized distribution directly will result in an error.\nThe difference in the normalized and unnormalized versions of custom probability functions is how probability functions are treated inside these functions. Any internal unnormalized probability function call will be replaced with its normalized equivalent if the normalized version of the parent custom distribution is called.\nThe following code demonstrates the different behaviors:\nfunctions {\n real custom1_lpdf(x) {\n return normal_lupdf(x | 0.0, 1.0);\n }\n real custom2_lpdf(x) {\n return normal_lpdf(x | 0.0, 1.0);\n }\n}\nparameters {\n real mu;\n}\nmodel {\n mu ~ custom1(); // Normalization constants dropped\n target += custom1_lupdf(mu); // Normalization constants dropped\n target += custom1_lpdf(mu); // Normalization constants kept\n\n mu ~ custom2(); // Normalization constants kept\n target += custom2_lupdf(mu); // Normalization constants kept\n target += custom2_lpdf(mu); // Normalization constants kept\n}\n\n\n\nTo avoid ambiguities in how the normalization constants work, functions ending in _lupdf and _lupmf can only be used in the model block or user-defined probability functions (functions ending in _lpdf or _lpmf).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Proportionality Constants" + ] + }, + { + "objectID": "stan-users-guide/proportionality-constants.html#dropping-proportionality-constants", + "href": "stan-users-guide/proportionality-constants.html#dropping-proportionality-constants", + "title": "Proportionality Constants", + "section": "", + "text": "If a density \\(p(\\theta)\\) can be factored into \\(K g(\\theta)\\) where \\(K\\) are all the factors that are a not a function of \\(\\theta\\) and \\(g(\\theta)\\) are all the terms that are a function of \\(\\theta\\), then it is said that \\(g(\\theta)\\) is proportional to \\(p(\\theta)\\) up to a constant.\nThe advantage of all this is that sometimes \\(K\\) is expensive to compute and if it is not a function of the distribution that is to be sampled (or optimized or approximated with variational inference), there is no need to compute it because it will not affect the results.\nStan takes advantage of the proportionality constant fact with the ~ syntax. Take for instance the normal data model:\ndata {\n real mu;\n real<lower=0.0> sigma;\n}\nparameters {\n real x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\nSyntactically, this is just shorthand for the equivalent model that replaces the ~ syntax with a target += statement and a normal_lupdf function call:\ndata {\n real mu;\n real<lower=0.0> sigma;\n}\nparameters {\n real x;\n}\nmodel {\n target += normal_lupdf(x | mu, sigma);\n}\nThe function normal_lupdf is only guaranteed to return the log density of the normal distribution up to a proportionality constant density to be sampled. The proportionality constant itself is not defined. The full log density of the statement here is:\n\\[\n\\textsf{normal\\_lpdf}(x | \\mu, \\sigma) =\n-\\log \\left( \\sigma \\sqrt{2 \\pi} \\right)\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2.\n\\]\nNow because the density here is only a function of \\(x\\), the additive terms in the log density that are not a function of \\(x\\) can be dropped. In this case it is enough to know only the quadratic term:\n\\[\n\\textsf{normal\\_lupdf}(x | \\mu, \\sigma) =\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2.\n\\]", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Proportionality Constants" + ] + }, + { + "objectID": "stan-users-guide/proportionality-constants.html#keeping-proportionality-constants", + "href": "stan-users-guide/proportionality-constants.html#keeping-proportionality-constants", + "title": "Proportionality Constants", + "section": "", + "text": "In the case that the proportionality constants were needed for a normal log density the function normal_lpdf can be used. For clarity, if there is ever a situation where it is unclear if the normalization is necessary, it should always be safe to include it. Only use the ~ or target += normal_lupdf syntaxes if it is absolutely clear that the proportionality constants are not necessary.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Proportionality Constants" + ] + }, + { + "objectID": "stan-users-guide/proportionality-constants.html#user-defined-distributions", + "href": "stan-users-guide/proportionality-constants.html#user-defined-distributions", + "title": "Proportionality Constants", + "section": "", + "text": "When a custom _lpdf or _lpmf function is defined, the compiler will automatically make available a _lupdf or _lupmf version of the function. It is only possible to define custom distributions in the normalized form in Stan. Any attempt to define an unnormalized distribution directly will result in an error.\nThe difference in the normalized and unnormalized versions of custom probability functions is how probability functions are treated inside these functions. Any internal unnormalized probability function call will be replaced with its normalized equivalent if the normalized version of the parent custom distribution is called.\nThe following code demonstrates the different behaviors:\nfunctions {\n real custom1_lpdf(x) {\n return normal_lupdf(x | 0.0, 1.0);\n }\n real custom2_lpdf(x) {\n return normal_lpdf(x | 0.0, 1.0);\n }\n}\nparameters {\n real mu;\n}\nmodel {\n mu ~ custom1(); // Normalization constants dropped\n target += custom1_lupdf(mu); // Normalization constants dropped\n target += custom1_lpdf(mu); // Normalization constants kept\n\n mu ~ custom2(); // Normalization constants kept\n target += custom2_lupdf(mu); // Normalization constants kept\n target += custom2_lpdf(mu); // Normalization constants kept\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Proportionality Constants" + ] + }, + { + "objectID": "stan-users-guide/proportionality-constants.html#limitations-on-using-_lupdf-and-_lupmf-functions", + "href": "stan-users-guide/proportionality-constants.html#limitations-on-using-_lupdf-and-_lupmf-functions", + "title": "Proportionality Constants", + "section": "", + "text": "To avoid ambiguities in how the normalization constants work, functions ending in _lupdf and _lupmf can only be used in the model block or user-defined probability functions (functions ending in _lpdf or _lpmf).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Proportionality Constants" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html", + "href": "stan-users-guide/poststratification.html", + "title": "Poststratification", + "section": "", + "text": "Stratification is a technique developed for survey sampling in which a population is partitioned into subgroups (i.e., stratified) and each group (i.e., stratum) is sampled independently. If the subgroups are more homogeneous (i.e., lower variance) than the population as a whole, this can reduce variance in the estimate of a quantity of interest at the population level.\nPoststratification is a technique for adjusting a non-representative sample (i.e., a convenience sample or other observational data) for which there are demographic predictors characterizing the strata. It is carried out after a model is fit to the observed data, hence the name poststratification (Little 1993). Poststratification can be fruitfully combined with regression modeling (or more general parametric modeling), which provides estimates based on combinations of predictors (or general parameters) rather than raw counts in each stratum. Multilevel modeling is useful in determining how much partial pooling to apply in the regressions, leading to the popularity of the combination of multilevel regression and poststratification (MRP) (Park, Gelman, and Bafumi 2004).\n\n\n\n\nStratification and poststratification can be applied to many applications beyond survey sampling (Kennedy and Gelman 2019). For example, large-scale whole-earth soil-carbon models are fit with parametric models of how soil-carbon depends on features of an area such as soil composition, flora, fauna, temperature, humidity, etc. Given a model that predicts soil-carbon concentration given these features, a whole-earth model can be created by stratifying the earth into a grid of say 10km by 10km “squares” (they can’t literally be square because the earth’s surface is topologically a sphere). Each grid area has an estimated makeup of soil type, forestation, climate, etc. The global level of soil carbon is then estimated using poststratification by simply summing the expected soil carbon estimated for each square in the grid (Paustian et al. 1997). Dynamic models can then be constructed by layering a time-series component, varying the poststratification predictors over time, or both (Field et al. 1998).\n\n\n\nSuppose a university’s administration would like to estimate the support for a given proposal among its students. A poll is carried out in which 490 respondents are undergraduates, 112 are graduate students, and 47 are continuing education students. Now suppose that support for the issue among the poll respondents is is 25% among undergraduate students (subgroup 1), 40% among graduate students (subgroup 2), and 80% among continuing education students (subgroup 3). Now suppose that the student body is made up of 20,000 undergraduates, 5,000 graduate students, and 2,000 continuing education students. It is important that our subgroups are exclusive and exhaustive, i.e., they form a partition of the population.\nThe proportion of support in the poll among students in each group provides a simple maximum likelihood estimate \\(\\theta^* = (0.25, 0.5,\n0.8)\\) of support in each group for a simple Bernoulli model where student \\(n\\)’s vote is modeled as \\[\ny_n \\sim \\textrm{bernoulli}(\\theta_{jj[n]}),\n\\] where \\(jj[n] \\in 1:3\\) is the subgroup to which the \\(n\\)-th student belongs.\nAn estimate of the population prevalence of support for the issue among students can be constructed by simply multiplying estimated support in each group by the size of each group. Letting \\(N = (20\\,000,\\, 5\\,000,\\, 2\\,000)\\) be the subgroup sizes, the poststratified estimate of support in the population \\(\\phi^*\\) is estimated by \\[\n\\phi^*\n= \\frac{\\displaystyle \\sum_{j = 1}^3 \\theta_j^* \\cdot N_j}\n {\\displaystyle \\sum_{j = 1}^3 N_j}.\n\\] Plugging in our estimates and population counts yields \\[\\begin{eqnarray*}\n\\phi*\n& = & \\frac{0.25 \\cdot 20\\,000 + 0.4 \\cdot 5\\,000 + 0.8 \\cdot 2\\,000}\n {20\\,000 + 5\\,000 + 2\\,000}\n\\\\[4pt] & = & \\frac{8\\,600}{27\\,000}\n\\\\[4pt] & \\approx & 0.32.\n\\end{eqnarray*}\\]\n\n\n\n\nConsidering the same polling data from the previous section in a Bayesian setting, the uncertainty in the estimation of subgroup support is pushed through predictive inference in order to get some idea of the uncertainty of estimated support. Continuing the example of the previous section, the data model remains the same, \\[\ny_n \\sim \\textrm{bernoulli}(\\theta_{jj[n]}),\n\\] where \\(jj[n] \\in 1:J\\) is the group to which item \\(n\\) belongs and \\(\\theta_j\\) is the proportion of support in group \\(j\\).\nThis can be reformulated from a Bernoulli model to a binomial model in the usual way. Letting \\(A_j\\) be the number of respondents in group \\(j\\) and \\(a_j\\) be the number of positive responses in group \\(j\\), the data model may be reduced to the form \\[\na_j \\sim \\textrm{binomial}(A_j, \\theta_j).\n\\] A simple uniform prior on the proportion of support in each group completes the model, \\[\n\\theta_j \\sim \\textrm{beta}(1, 1).\n\\] A more informative prior could be used if there is prior information available about support among the student body.\nUsing sampling, draws \\(\\theta^{(m)} \\sim p(\\theta \\mid y)\\) from the posterior may be combined with the population sizes \\(N\\) to estimate \\(\\phi\\), the proportion of support in the population, \\[\n\\phi^{(m)}\n= \\frac{\\displaystyle \\sum_{j = 1}^J \\theta_j^{(m)} \\cdot N_j}\n {\\displaystyle \\sum_{j = 1}^J N_j}.\n\\] The posterior draws for \\(\\phi^{(m)}\\) characterize expected support for the issue in the entire population. These draws may be used to estimate expected support (the average of the \\(\\phi^{(m)}\\)), posterior intervals (quantiles of the \\(\\phi^{(m)}\\)), or to plot a histogram.\n\n\n\nThe maximum likelihood and Bayesian estimates can be handled with the same Stan program. The model of individual votes is collapsed to a binomial, where \\(A_j\\) is the number of voters from group \\(j\\), \\(a_j\\) is the number of positive responses from group \\(j\\), and \\(N_j\\) is the size of group \\(j\\) in the population.\ndata {\n int<lower=1> J;\n array[J] int<lower=0> A; \n array[J] int<lower=0> a;\n vector<lower=0>[J] N;\n}\nparameters {\n vector<lower=0, upper=1>[J] theta;\n}\nmodel {\n a ~ binomial(A, theta);\n}\ngenerated quantities {t\n real<lower=0, upper=1> phi = dot(N, theta) / sum(N);\n}\nThe binomial distribution statement is vectorized, and implicitly generates the joint likelihood for the \\(J\\) terms. The prior is implicitly uniform on \\((0, 1),\\) the support of \\(\\theta.\\) The summation is computed using a dot product and the sum function, which is why N was declared as a vector rather than as an array of integers.\n\n\n\nIn applications to polling, there are often numerous demographic features like age, gender, income, education, state of residence, etc. If each of these demographic features induces a partition on the population, then their product also induces a partition on the population. Often sources such as the census have matching (or at least matchable) demographic data; otherwise it must be estimated.\nThe problem facing poststratification by demographic feature is that the number of strata increases exponentially as a function of the number of features. For instance, 4 age brackets, 2 sexes, 5 income brackets, and 50 states of residence leads to \\(5 \\cdot 2 \\cdot 5 \\cdot\n50 = 2000\\) strata. Adding another 5-way distinction, say for education level, leads to 10,000 strata. A simple model like the one in the previous section that takes an independent parameter \\(\\theta_j\\) for support in each stratum is unworkable in that many groups will have zero respondents and almost all groups will have very few respondents.\nA practical approach to overcoming the problem of low data size per stratum is to use a regression model. Each demographic feature will require a regression coefficient for each of its subgroups, but now the parameters add to rather than multiply the total number of parameters. For example, with 4 age brackets, 2 sexes, 5 income brackets, and 50 states of residence, there are only \\(4 + 2 + 5 + 50 =\n61\\) regression coefficients to estimate. Now suppose that item \\(n\\) has demographic features \\(\\textrm{age}_n \\in 1:5\\), \\(\\textrm{sex}_n \\in 1:2\\), \\(\\textrm{income}_n \\in 1:5,\\) and \\(\\textrm{state}_n \\in 1:50\\). A logistic regression may be formulated as \\[\ny_n \\sim\n\\textrm{bernoulli}(\\textrm{logit}^{-1}(\n\\alpha + \\beta_{\\textrm{age}[n]}\n+ \\gamma_{\\textrm{sex}[n]}\n+ \\delta_{\\textrm{income}[n]}\n+ \\epsilon_{\\textrm{state}[n]}\n)),\n\\] where \\(\\textrm{age}[n]\\) is the age of the \\(n\\)-th respondent, \\(\\textrm{sex}[n]\\) is their sex, \\(\\textrm{income}[n]\\) their income and \\(\\textrm{state}[n]\\) their state of residence. These coefficients can be assigned priors, resulting in a Bayesian regression model.\nTo poststratify the results, the population size for each combination of predictors must still be known. Then the population estimate is constructed as \\[\n\\sum_{i = 1}^5 \\sum_{j = 1}^2 \\sum_{k = 1}^5 \\sum_{m = 1}^{50}\n\\textrm{logit}^{-1}(\\alpha + \\beta_i + \\gamma_j + \\delta_k + \\eta_m)\n\\cdot \\textrm{pop}_{i, j, k, m},\n\\] where \\(\\textrm{pop}_{i, j, k, m}\\) is the size of the subpopulation with age \\(i\\), sex \\(j\\), income level \\(k\\), and state of residence \\(m\\).\nAs formulated, it should be clear that any kind of prediction could be used as a basis for poststratification. For example, a Gaussian process or neural network could be used to produce a non-parametric model of outcomes \\(y\\) given predictors \\(x\\).\n\n\n\nWith large numbers of demographic features, each cell may have very few items in it with which to estimate regression coefficients. For example, even in a national-level poll of 10,000 respondents, if they are divided by the 50 states, that’s only 200 respondents per state on average. When data sizes are small, parameter estimation can be stabilized and sharpened by providing hierarchical priors. With hierarchical priors, the data determines the amount of partial pooling among the groups. The only drawback is that if the number of groups is small, it can be hard to fit these models without strong hyperpriors.\nThe model introduced in the previous section had the data model \\[\ny_n \\sim\n\\textrm{bernoulli}(\\textrm{logit}^{-1}(\n\\alpha + \\beta_{\\textrm{age}[n]}\n+ \\gamma_{\\textrm{sex}[n]}\n+ \\delta_{\\textrm{income}[n]}\n+ \\epsilon_{\\textrm{state}[n]}\n)).\n\\] The overall intercept can be given a broad fixed prior, \\[\n\\alpha \\sim \\textrm{normal}(0, 5).\n\\] The other regression parameters can be given hierarchical priors, \\[\\begin{eqnarray*}\n\\beta_{1:4} & \\sim & \\textrm{normal}(0, \\sigma^{\\beta})\n\\\\[2pt]\n\\gamma_{1:2} & \\sim & \\textrm{normal}(0, \\sigma^{\\gamma})\n\\\\[2pt]\n\\delta_{1:5} & \\sim & \\textrm{normal}(0, \\sigma^{\\delta})\n\\\\[2pt]\n\\epsilon_{1:50} & \\sim & \\textrm{normal}(0, \\sigma^{\\epsilon}).\n\\end{eqnarray*}\\]\nThe hyperparameters for scale of variation within a group can be given simple standard hyperpriors, \\[\n\\sigma^{\\beta}, \\sigma^{\\gamma}, \\sigma^{\\delta}, \\sigma^{\\epsilon}\n\\sim \\textrm{normal}(0, 1).\n\\] The scales of these fixed hyperpriors need to be determined on a problem-by-problem basis, though ideally they will be close to standard (mean zero, unit variance).\n\n\nThe multilevel structure of the models used for multilevel regression and poststratification consist of a sum of intercepts that vary by demographic feature. This immediately introduces non-identifiability. A constant added to each state coefficient and subtracted from each age coefficient leads to exactly the same likelihood.\nThis is non-identifiability that is only mitigated by the (hierarchical) priors. When demographic partitions are small, as they are with several categories in the example, it can be more computationally tractable to enforce a sum-to-zero constraint on the coefficients. Other values than zero will by necessity be absorbed into the intercept, which is why it typically gets a broader prior even with standardized data. With a sum to zero constraint, coefficients for binary features will be negations of each other. For example, because there are only two sex categories, \\(\\gamma_2 =\n-\\gamma_1.\\)\nTo implement sum-to-zero constraints,\nparameters {\n vector[K - 1] alpha_raw;\n// ...\n}\ntransformed parameters {\n vector<multiplier=sigma_alpha>[K] alpha\n = append_row(alpha_raw, -sum(alpha_raw));\n// ... \n}\nmodel {\n alpha ~ normal(0, sigma_alpha);\n}\nThis prior is hard to interpret in that there are K normal distributions, but only K - 1 free parameters. An alternative is to put the prior only on alpha_raw, but that is also difficult to interpret.\nSoft constraints can be more computationally tractable. They are also simpler to implement.\nparameters {\n vector<multiplier=alpha>[K] alpha;\n// ...\n}\nmodel {\n alpha ~ normal(0, sigma_alpha);\n sum(alpha) ~ normal(0, 0.001);\n}\nThis leaves the regular prior, but adds a second prior that concentrates the sum near zero. The scale of the second prior will need to be established on a problem and data-set specific basis so that it doesn’t shrink the estimates beyond the shrinkage of the hierarchical scale parameters.\nNote that in the hierarchical model, the values of the coefficients when there are only two coefficients should be the same absolute value but opposite signs. Any other difference could be combined into the overall intercept \\(\\alpha.\\) Even with a wide prior on the intercept, the hyperprior on \\(\\sigma^{\\gamma}\\) may not be strong enough to enforce that, leading to a weak form non-identifiability in the posterior. Enforcing a (hard or soft) sum-to-zero constraint can help mitigate non-identifiability. Whatever prior is chosen, prior predictive checks can help diagnose problems with it.\nNone of this work to manage identifiability in multilevel regressions has anything to do with the poststratification; it’s just required to fit a large multilevel regression with multiple discrete categories. Having multiple intercepts always leads to weak non-identifiability, even with the priors on the intercepts all centered at zero.\n\n\n\n\nMultilevel regression and poststratification can be coded directly in Stan. To code the non-centered parameterization for each coefficient, which will be required for sampling efficiency, the multiplier transform is used on each of the parameters. The combination of\nvector<multiplier=s>[K] a;\n// ...\na ~ normal(0, s);\nimplements a non-centered parameterization for a; a centered parameterization would drop the multiplier specification. The prior scale s is being centered here. The prior location is fixed to zero in multilevel regressions because there is an overall intercept; introducing a location parameters in the prior would introduce non-identifiability with the overall intercept. The centered parameterization drops the multiplier.\nHere is the full Stan model, which performs poststratification in the generated quantities using population sizes made available through data variable P.\ndata {\n int<lower=0> N;\n array[N] int<lower=1, upper=4> age;\n array[N] int<lower=1, upper=5> income;\n array[N] int<lower=1, upper=50> state;\n array[N] int<lower=0> y;\n array[4, 5, 50] int<lower=0> P;\n}\nparameters {\n real alpha;\n real<lower=0> sigma_beta;\n vector<multiplier=sigma_beta>[4] beta;\n real<lower=0> sigma_gamma;\n vector<multiplier=sigma_gamma>[5] gamma;\n real<lower=0> sigma_delta;\n vector<multiplier=sigma_delta>[50] delta;\n}\nmodel {\n y ~ bernoulli_logit(alpha + beta[age] + gamma[income] + delta[state]);\n alpha ~ normal(0, 2);\n beta ~ normal(0, sigma_beta);\n gamma ~ normal(0, sigma_gamma);\n delta ~ normal(0, sigma_delta);\n { sigma_beta, sigma_gamma, sigma_delta } ~ normal(0, 1);\n}\ngenerated quantities {\n real expect_pos = 0;\n int total = 0;\n for (b in 1:4) {\n for (c in 1:5) {\n for (d in 1:50) {\n total += P[b, c, d];\n expect_pos\n += P[b, c, d]\n * inv_logit(alpha + beta[b] + gamma[c] + delta[d]);\n }\n }\n }\n real<lower=0, upper=1> phi = expect_pos / total;\n}\nUnlike in posterior predictive inference aimed at uncertainty, there is no need to introduce binomial sampling uncertainty into the estimate of expected positive votes. Instead, generated quantities are computed as expectations. In general, it is more efficient to work in expectation if possible (the Rao-Blackwell theorem says it’s at least as efficient to work in expectation, but in practice, it can be much much more efficient, especially for discrete quantities).\n\n\nIn some cases, it can be more efficient to break the data down by group. Suppose there are \\(4 \\times 5 \\times 2 \\times 50 = 2000\\) groups. The data can be broken down into a size-2000 array, with entries corresponding to total vote counts in that group\nint<lower=0> G;\narray[G] int<lower=1, upper=4> age;\narray[G] int<lower=1, upper=5> income;\narray[G] int<lower=1, upper=50> state;\nThen the number of positive votes and the number of total votes are collected into two parallel arrays indexed by group.\narray[G] int<lower=0> pos_votes;\narray[G] int<lower=0> total_votes;\nFinally, the data model is converted to binomial.\npos_votes ~ binomial_logit(total_votes,\n alpha + beta[age] + ...);\nThe predictors look the same because of the way the age and other data items are coded.\n\n\n\nIn this first model, sex is not included as a predictor. With only two categories, it needs to be modeled separately, because it is not feasible to build a hierarchical model with only two cases. A sex predictor is straightforward to add to the data block; it takes on values 1 or 2 for each of the N data points.\n array[N] int<lower=1, upper=2> sex;\nThen add a single regression coefficient as a parameter,\n real epsilon;\nIn the log odds calculation, introduce a new term\n[epsilon, -epsilon][sex]';\nThat is, the data model will now look like\n y ~ bernoulli_logit(alpha + beta[age] + gamma[income] + delta[state]\n + [epsilon, -epsilon][sex]');\nFor data point n, the expression [epsilon, -epsilon][sex] takes on value [epsilon, -epsilon][sex][n], which with Stan’s multi-indexing reduces to [epsilon, -epsilon][sex[n]]. This term evaluates to epsilon if sex[n] is 1 and to -epsilon if sex[n] is 2. The result is effectively a sum-to-zero constraint on two sex coefficients. The ' at the end transposes [epsilon, -epsilon][sex] which is a row_vector into a vector that can be added to the other variables.\nFinally, a prior is needed for the coefficient in the model block,\nepsilon ~ normal(0, 2);\nAs with other priors in multilevel models, the posterior for epsilon should be investigated to make sure it is not unrealistically wide.\n\n\n\n\nIf there are group-level predictors, such as average income in a state, or vote share in a previous election, these may be used as predictors in the regression. They will not pose an obstacle to poststratification because they are at the group level. For example, suppose the average income level in the state is available as the data variable\narray[50] real<lower=0> income;\nthen a regression coefficient psi can be added for the effect of average state income,\nreal psi;\nwith a fixed prior,\npsi ~ normal(0, 2);\nThis prior assumes the income predictor has been standardized. Finally, a term is added to the regression for the fixed predictor,\ny ~ bernoulli_logit(alpha + beta[age] + ... + delta[state]\n + income[state] * psi);\nAnd finally, the formula in the generated quantities block is also updated,\nexpect_pos\n += P[b, c, d]\n * inv_logit(alpha + beta[b] + gamma[c] + delta[d]\n + income[d] * psi);\nHere d is the loop variable looping over states. This ensures that the poststratification formula matches the model formula.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#some-examples", + "href": "stan-users-guide/poststratification.html#some-examples", + "title": "Poststratification", + "section": "", + "text": "Stratification and poststratification can be applied to many applications beyond survey sampling (Kennedy and Gelman 2019). For example, large-scale whole-earth soil-carbon models are fit with parametric models of how soil-carbon depends on features of an area such as soil composition, flora, fauna, temperature, humidity, etc. Given a model that predicts soil-carbon concentration given these features, a whole-earth model can be created by stratifying the earth into a grid of say 10km by 10km “squares” (they can’t literally be square because the earth’s surface is topologically a sphere). Each grid area has an estimated makeup of soil type, forestation, climate, etc. The global level of soil carbon is then estimated using poststratification by simply summing the expected soil carbon estimated for each square in the grid (Paustian et al. 1997). Dynamic models can then be constructed by layering a time-series component, varying the poststratification predictors over time, or both (Field et al. 1998).\n\n\n\nSuppose a university’s administration would like to estimate the support for a given proposal among its students. A poll is carried out in which 490 respondents are undergraduates, 112 are graduate students, and 47 are continuing education students. Now suppose that support for the issue among the poll respondents is is 25% among undergraduate students (subgroup 1), 40% among graduate students (subgroup 2), and 80% among continuing education students (subgroup 3). Now suppose that the student body is made up of 20,000 undergraduates, 5,000 graduate students, and 2,000 continuing education students. It is important that our subgroups are exclusive and exhaustive, i.e., they form a partition of the population.\nThe proportion of support in the poll among students in each group provides a simple maximum likelihood estimate \\(\\theta^* = (0.25, 0.5,\n0.8)\\) of support in each group for a simple Bernoulli model where student \\(n\\)’s vote is modeled as \\[\ny_n \\sim \\textrm{bernoulli}(\\theta_{jj[n]}),\n\\] where \\(jj[n] \\in 1:3\\) is the subgroup to which the \\(n\\)-th student belongs.\nAn estimate of the population prevalence of support for the issue among students can be constructed by simply multiplying estimated support in each group by the size of each group. Letting \\(N = (20\\,000,\\, 5\\,000,\\, 2\\,000)\\) be the subgroup sizes, the poststratified estimate of support in the population \\(\\phi^*\\) is estimated by \\[\n\\phi^*\n= \\frac{\\displaystyle \\sum_{j = 1}^3 \\theta_j^* \\cdot N_j}\n {\\displaystyle \\sum_{j = 1}^3 N_j}.\n\\] Plugging in our estimates and population counts yields \\[\\begin{eqnarray*}\n\\phi*\n& = & \\frac{0.25 \\cdot 20\\,000 + 0.4 \\cdot 5\\,000 + 0.8 \\cdot 2\\,000}\n {20\\,000 + 5\\,000 + 2\\,000}\n\\\\[4pt] & = & \\frac{8\\,600}{27\\,000}\n\\\\[4pt] & \\approx & 0.32.\n\\end{eqnarray*}\\]", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#bayesian-poststratification", + "href": "stan-users-guide/poststratification.html#bayesian-poststratification", + "title": "Poststratification", + "section": "", + "text": "Considering the same polling data from the previous section in a Bayesian setting, the uncertainty in the estimation of subgroup support is pushed through predictive inference in order to get some idea of the uncertainty of estimated support. Continuing the example of the previous section, the data model remains the same, \\[\ny_n \\sim \\textrm{bernoulli}(\\theta_{jj[n]}),\n\\] where \\(jj[n] \\in 1:J\\) is the group to which item \\(n\\) belongs and \\(\\theta_j\\) is the proportion of support in group \\(j\\).\nThis can be reformulated from a Bernoulli model to a binomial model in the usual way. Letting \\(A_j\\) be the number of respondents in group \\(j\\) and \\(a_j\\) be the number of positive responses in group \\(j\\), the data model may be reduced to the form \\[\na_j \\sim \\textrm{binomial}(A_j, \\theta_j).\n\\] A simple uniform prior on the proportion of support in each group completes the model, \\[\n\\theta_j \\sim \\textrm{beta}(1, 1).\n\\] A more informative prior could be used if there is prior information available about support among the student body.\nUsing sampling, draws \\(\\theta^{(m)} \\sim p(\\theta \\mid y)\\) from the posterior may be combined with the population sizes \\(N\\) to estimate \\(\\phi\\), the proportion of support in the population, \\[\n\\phi^{(m)}\n= \\frac{\\displaystyle \\sum_{j = 1}^J \\theta_j^{(m)} \\cdot N_j}\n {\\displaystyle \\sum_{j = 1}^J N_j}.\n\\] The posterior draws for \\(\\phi^{(m)}\\) characterize expected support for the issue in the entire population. These draws may be used to estimate expected support (the average of the \\(\\phi^{(m)}\\)), posterior intervals (quantiles of the \\(\\phi^{(m)}\\)), or to plot a histogram.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#poststratification-in-stan", + "href": "stan-users-guide/poststratification.html#poststratification-in-stan", + "title": "Poststratification", + "section": "", + "text": "The maximum likelihood and Bayesian estimates can be handled with the same Stan program. The model of individual votes is collapsed to a binomial, where \\(A_j\\) is the number of voters from group \\(j\\), \\(a_j\\) is the number of positive responses from group \\(j\\), and \\(N_j\\) is the size of group \\(j\\) in the population.\ndata {\n int<lower=1> J;\n array[J] int<lower=0> A; \n array[J] int<lower=0> a;\n vector<lower=0>[J] N;\n}\nparameters {\n vector<lower=0, upper=1>[J] theta;\n}\nmodel {\n a ~ binomial(A, theta);\n}\ngenerated quantities {t\n real<lower=0, upper=1> phi = dot(N, theta) / sum(N);\n}\nThe binomial distribution statement is vectorized, and implicitly generates the joint likelihood for the \\(J\\) terms. The prior is implicitly uniform on \\((0, 1),\\) the support of \\(\\theta.\\) The summation is computed using a dot product and the sum function, which is why N was declared as a vector rather than as an array of integers.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#regression-and-poststratification", + "href": "stan-users-guide/poststratification.html#regression-and-poststratification", + "title": "Poststratification", + "section": "", + "text": "In applications to polling, there are often numerous demographic features like age, gender, income, education, state of residence, etc. If each of these demographic features induces a partition on the population, then their product also induces a partition on the population. Often sources such as the census have matching (or at least matchable) demographic data; otherwise it must be estimated.\nThe problem facing poststratification by demographic feature is that the number of strata increases exponentially as a function of the number of features. For instance, 4 age brackets, 2 sexes, 5 income brackets, and 50 states of residence leads to \\(5 \\cdot 2 \\cdot 5 \\cdot\n50 = 2000\\) strata. Adding another 5-way distinction, say for education level, leads to 10,000 strata. A simple model like the one in the previous section that takes an independent parameter \\(\\theta_j\\) for support in each stratum is unworkable in that many groups will have zero respondents and almost all groups will have very few respondents.\nA practical approach to overcoming the problem of low data size per stratum is to use a regression model. Each demographic feature will require a regression coefficient for each of its subgroups, but now the parameters add to rather than multiply the total number of parameters. For example, with 4 age brackets, 2 sexes, 5 income brackets, and 50 states of residence, there are only \\(4 + 2 + 5 + 50 =\n61\\) regression coefficients to estimate. Now suppose that item \\(n\\) has demographic features \\(\\textrm{age}_n \\in 1:5\\), \\(\\textrm{sex}_n \\in 1:2\\), \\(\\textrm{income}_n \\in 1:5,\\) and \\(\\textrm{state}_n \\in 1:50\\). A logistic regression may be formulated as \\[\ny_n \\sim\n\\textrm{bernoulli}(\\textrm{logit}^{-1}(\n\\alpha + \\beta_{\\textrm{age}[n]}\n+ \\gamma_{\\textrm{sex}[n]}\n+ \\delta_{\\textrm{income}[n]}\n+ \\epsilon_{\\textrm{state}[n]}\n)),\n\\] where \\(\\textrm{age}[n]\\) is the age of the \\(n\\)-th respondent, \\(\\textrm{sex}[n]\\) is their sex, \\(\\textrm{income}[n]\\) their income and \\(\\textrm{state}[n]\\) their state of residence. These coefficients can be assigned priors, resulting in a Bayesian regression model.\nTo poststratify the results, the population size for each combination of predictors must still be known. Then the population estimate is constructed as \\[\n\\sum_{i = 1}^5 \\sum_{j = 1}^2 \\sum_{k = 1}^5 \\sum_{m = 1}^{50}\n\\textrm{logit}^{-1}(\\alpha + \\beta_i + \\gamma_j + \\delta_k + \\eta_m)\n\\cdot \\textrm{pop}_{i, j, k, m},\n\\] where \\(\\textrm{pop}_{i, j, k, m}\\) is the size of the subpopulation with age \\(i\\), sex \\(j\\), income level \\(k\\), and state of residence \\(m\\).\nAs formulated, it should be clear that any kind of prediction could be used as a basis for poststratification. For example, a Gaussian process or neural network could be used to produce a non-parametric model of outcomes \\(y\\) given predictors \\(x\\).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#multilevel-regression-and-poststratification", + "href": "stan-users-guide/poststratification.html#multilevel-regression-and-poststratification", + "title": "Poststratification", + "section": "", + "text": "With large numbers of demographic features, each cell may have very few items in it with which to estimate regression coefficients. For example, even in a national-level poll of 10,000 respondents, if they are divided by the 50 states, that’s only 200 respondents per state on average. When data sizes are small, parameter estimation can be stabilized and sharpened by providing hierarchical priors. With hierarchical priors, the data determines the amount of partial pooling among the groups. The only drawback is that if the number of groups is small, it can be hard to fit these models without strong hyperpriors.\nThe model introduced in the previous section had the data model \\[\ny_n \\sim\n\\textrm{bernoulli}(\\textrm{logit}^{-1}(\n\\alpha + \\beta_{\\textrm{age}[n]}\n+ \\gamma_{\\textrm{sex}[n]}\n+ \\delta_{\\textrm{income}[n]}\n+ \\epsilon_{\\textrm{state}[n]}\n)).\n\\] The overall intercept can be given a broad fixed prior, \\[\n\\alpha \\sim \\textrm{normal}(0, 5).\n\\] The other regression parameters can be given hierarchical priors, \\[\\begin{eqnarray*}\n\\beta_{1:4} & \\sim & \\textrm{normal}(0, \\sigma^{\\beta})\n\\\\[2pt]\n\\gamma_{1:2} & \\sim & \\textrm{normal}(0, \\sigma^{\\gamma})\n\\\\[2pt]\n\\delta_{1:5} & \\sim & \\textrm{normal}(0, \\sigma^{\\delta})\n\\\\[2pt]\n\\epsilon_{1:50} & \\sim & \\textrm{normal}(0, \\sigma^{\\epsilon}).\n\\end{eqnarray*}\\]\nThe hyperparameters for scale of variation within a group can be given simple standard hyperpriors, \\[\n\\sigma^{\\beta}, \\sigma^{\\gamma}, \\sigma^{\\delta}, \\sigma^{\\epsilon}\n\\sim \\textrm{normal}(0, 1).\n\\] The scales of these fixed hyperpriors need to be determined on a problem-by-problem basis, though ideally they will be close to standard (mean zero, unit variance).\n\n\nThe multilevel structure of the models used for multilevel regression and poststratification consist of a sum of intercepts that vary by demographic feature. This immediately introduces non-identifiability. A constant added to each state coefficient and subtracted from each age coefficient leads to exactly the same likelihood.\nThis is non-identifiability that is only mitigated by the (hierarchical) priors. When demographic partitions are small, as they are with several categories in the example, it can be more computationally tractable to enforce a sum-to-zero constraint on the coefficients. Other values than zero will by necessity be absorbed into the intercept, which is why it typically gets a broader prior even with standardized data. With a sum to zero constraint, coefficients for binary features will be negations of each other. For example, because there are only two sex categories, \\(\\gamma_2 =\n-\\gamma_1.\\)\nTo implement sum-to-zero constraints,\nparameters {\n vector[K - 1] alpha_raw;\n// ...\n}\ntransformed parameters {\n vector<multiplier=sigma_alpha>[K] alpha\n = append_row(alpha_raw, -sum(alpha_raw));\n// ... \n}\nmodel {\n alpha ~ normal(0, sigma_alpha);\n}\nThis prior is hard to interpret in that there are K normal distributions, but only K - 1 free parameters. An alternative is to put the prior only on alpha_raw, but that is also difficult to interpret.\nSoft constraints can be more computationally tractable. They are also simpler to implement.\nparameters {\n vector<multiplier=alpha>[K] alpha;\n// ...\n}\nmodel {\n alpha ~ normal(0, sigma_alpha);\n sum(alpha) ~ normal(0, 0.001);\n}\nThis leaves the regular prior, but adds a second prior that concentrates the sum near zero. The scale of the second prior will need to be established on a problem and data-set specific basis so that it doesn’t shrink the estimates beyond the shrinkage of the hierarchical scale parameters.\nNote that in the hierarchical model, the values of the coefficients when there are only two coefficients should be the same absolute value but opposite signs. Any other difference could be combined into the overall intercept \\(\\alpha.\\) Even with a wide prior on the intercept, the hyperprior on \\(\\sigma^{\\gamma}\\) may not be strong enough to enforce that, leading to a weak form non-identifiability in the posterior. Enforcing a (hard or soft) sum-to-zero constraint can help mitigate non-identifiability. Whatever prior is chosen, prior predictive checks can help diagnose problems with it.\nNone of this work to manage identifiability in multilevel regressions has anything to do with the poststratification; it’s just required to fit a large multilevel regression with multiple discrete categories. Having multiple intercepts always leads to weak non-identifiability, even with the priors on the intercepts all centered at zero.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#coding-mrp-in-stan", + "href": "stan-users-guide/poststratification.html#coding-mrp-in-stan", + "title": "Poststratification", + "section": "", + "text": "Multilevel regression and poststratification can be coded directly in Stan. To code the non-centered parameterization for each coefficient, which will be required for sampling efficiency, the multiplier transform is used on each of the parameters. The combination of\nvector<multiplier=s>[K] a;\n// ...\na ~ normal(0, s);\nimplements a non-centered parameterization for a; a centered parameterization would drop the multiplier specification. The prior scale s is being centered here. The prior location is fixed to zero in multilevel regressions because there is an overall intercept; introducing a location parameters in the prior would introduce non-identifiability with the overall intercept. The centered parameterization drops the multiplier.\nHere is the full Stan model, which performs poststratification in the generated quantities using population sizes made available through data variable P.\ndata {\n int<lower=0> N;\n array[N] int<lower=1, upper=4> age;\n array[N] int<lower=1, upper=5> income;\n array[N] int<lower=1, upper=50> state;\n array[N] int<lower=0> y;\n array[4, 5, 50] int<lower=0> P;\n}\nparameters {\n real alpha;\n real<lower=0> sigma_beta;\n vector<multiplier=sigma_beta>[4] beta;\n real<lower=0> sigma_gamma;\n vector<multiplier=sigma_gamma>[5] gamma;\n real<lower=0> sigma_delta;\n vector<multiplier=sigma_delta>[50] delta;\n}\nmodel {\n y ~ bernoulli_logit(alpha + beta[age] + gamma[income] + delta[state]);\n alpha ~ normal(0, 2);\n beta ~ normal(0, sigma_beta);\n gamma ~ normal(0, sigma_gamma);\n delta ~ normal(0, sigma_delta);\n { sigma_beta, sigma_gamma, sigma_delta } ~ normal(0, 1);\n}\ngenerated quantities {\n real expect_pos = 0;\n int total = 0;\n for (b in 1:4) {\n for (c in 1:5) {\n for (d in 1:50) {\n total += P[b, c, d];\n expect_pos\n += P[b, c, d]\n * inv_logit(alpha + beta[b] + gamma[c] + delta[d]);\n }\n }\n }\n real<lower=0, upper=1> phi = expect_pos / total;\n}\nUnlike in posterior predictive inference aimed at uncertainty, there is no need to introduce binomial sampling uncertainty into the estimate of expected positive votes. Instead, generated quantities are computed as expectations. In general, it is more efficient to work in expectation if possible (the Rao-Blackwell theorem says it’s at least as efficient to work in expectation, but in practice, it can be much much more efficient, especially for discrete quantities).\n\n\nIn some cases, it can be more efficient to break the data down by group. Suppose there are \\(4 \\times 5 \\times 2 \\times 50 = 2000\\) groups. The data can be broken down into a size-2000 array, with entries corresponding to total vote counts in that group\nint<lower=0> G;\narray[G] int<lower=1, upper=4> age;\narray[G] int<lower=1, upper=5> income;\narray[G] int<lower=1, upper=50> state;\nThen the number of positive votes and the number of total votes are collected into two parallel arrays indexed by group.\narray[G] int<lower=0> pos_votes;\narray[G] int<lower=0> total_votes;\nFinally, the data model is converted to binomial.\npos_votes ~ binomial_logit(total_votes,\n alpha + beta[age] + ...);\nThe predictors look the same because of the way the age and other data items are coded.\n\n\n\nIn this first model, sex is not included as a predictor. With only two categories, it needs to be modeled separately, because it is not feasible to build a hierarchical model with only two cases. A sex predictor is straightforward to add to the data block; it takes on values 1 or 2 for each of the N data points.\n array[N] int<lower=1, upper=2> sex;\nThen add a single regression coefficient as a parameter,\n real epsilon;\nIn the log odds calculation, introduce a new term\n[epsilon, -epsilon][sex]';\nThat is, the data model will now look like\n y ~ bernoulli_logit(alpha + beta[age] + gamma[income] + delta[state]\n + [epsilon, -epsilon][sex]');\nFor data point n, the expression [epsilon, -epsilon][sex] takes on value [epsilon, -epsilon][sex][n], which with Stan’s multi-indexing reduces to [epsilon, -epsilon][sex[n]]. This term evaluates to epsilon if sex[n] is 1 and to -epsilon if sex[n] is 2. The result is effectively a sum-to-zero constraint on two sex coefficients. The ' at the end transposes [epsilon, -epsilon][sex] which is a row_vector into a vector that can be added to the other variables.\nFinally, a prior is needed for the coefficient in the model block,\nepsilon ~ normal(0, 2);\nAs with other priors in multilevel models, the posterior for epsilon should be investigated to make sure it is not unrealistically wide.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/poststratification.html#adding-group-level-predictors", + "href": "stan-users-guide/poststratification.html#adding-group-level-predictors", + "title": "Poststratification", + "section": "", + "text": "If there are group-level predictors, such as average income in a state, or vote share in a previous election, these may be used as predictors in the regression. They will not pose an obstacle to poststratification because they are at the group level. For example, suppose the average income level in the state is available as the data variable\narray[50] real<lower=0> income;\nthen a regression coefficient psi can be added for the effect of average state income,\nreal psi;\nwith a fixed prior,\npsi ~ normal(0, 2);\nThis prior assumes the income predictor has been standardized. Finally, a term is added to the regression for the fixed predictor,\ny ~ bernoulli_logit(alpha + beta[age] + ... + delta[state]\n + income[state] * psi);\nAnd finally, the formula in the generated quantities block is also updated,\nexpect_pos\n += P[b, c, d]\n * inv_logit(alpha + beta[b] + gamma[c] + delta[d]\n + income[d] * psi);\nHere d is the loop variable looping over states. This ensures that the poststratification formula matches the model formula.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Poststratification" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html", + "href": "stan-users-guide/posterior-prediction.html", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "The goal of inference is often posterior prediction, that is evaluating or sampling from the posterior predictive distribution \\(p(\\tilde{y} \\mid y),\\) where \\(y\\) is observed data and \\(\\tilde{y}\\) is yet to be observed data. Often there are unmodeled predictors \\(x\\) and \\(\\tilde{x}\\) for the observed data \\(y\\) and unobserved data \\(\\tilde{y}\\). With predictors, the posterior predictive density is \\(p(\\tilde{y} \\mid\n\\tilde{x}, x, y).\\) All of these variables may represent multivariate quantities.\nThis chapter explains how to sample from the posterior predictive distribution in Stan, including applications to posterior predictive simulation and calculating event probabilities. These techniques can be coded in Stan using random number generation in the generated quantities block. Further, a technique for fitting and performing inference in two stages is presented in a section on stand-alone generated quantities in Stan\n\n\nGiven a full Bayesian model \\(p(y, \\theta)\\), the posterior predictive density for new data \\(\\tilde{y}\\) given observed data \\(y\\) is \\[\np(\\tilde{y} \\mid y)\n=\n\\int p(\\tilde{y} \\mid \\theta) \\cdot p(\\theta \\mid y)\n\\, \\textrm{d}\\theta.\n\\] The product under the integral reduces to the joint posterior density \\(p(\\tilde{y}, \\theta \\mid y),\\) so that the integral is simply marginalizing out the parameters \\(\\theta,\\) leaving the predictive density \\(p(\\tilde{y} \\mid y)\\) of future observations given past observations.\n\n\n\nThe posterior predictive density (or mass) of a prediction \\(\\tilde{y}\\) given observed data \\(y\\) can be computed using \\(M\\) Monte Carlo draws\n\\[\n\\theta^{(m)} \\sim p(\\theta \\mid y)\n\\] from the posterior as \\[\np(\\tilde{y} \\mid y)\n\\approx\n\\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\theta^{(m)}).\n\\]\nComputing directly using this formula will lead to underflow in many situations, but the log posterior predictive density, \\(\\log\np(\\tilde{y} \\mid y)\\) may be computed using the stable log sum of exponents function as \\[\\begin{eqnarray*}\n\\log p(\\tilde{y} \\mid y)\n& \\approx &\n\\log \\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\theta^{(m)}).\n\\\\[4pt]\n& = &\n- \\log M\n+ \\textrm{log-sum-exp}_{m = 1}^M \\log p(\\tilde{y} \\mid \\theta^{(m)}),\n\\end{eqnarray*}\\] where \\[\n\\textrm{log-sum-exp}_{m = 1}^M v_m\n= \\log \\sum_{m = 1}^M \\exp v_m\n\\] is used to maintain arithmetic precision. See the section on log sum of exponentials for more details.\n\n\n\nGiven draws from the posterior \\(\\theta^{(m)} \\sim p(\\theta \\mid y),\\) draws from the posterior predictive \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y}\n\\mid y)\\) can be generated by randomly generating from the sampling distribution with the parameter draw plugged in, \\[\n\\tilde{y}^{(m)} \\sim p(y \\mid \\theta^{(m)}).\n\\]\nRandomly drawing \\(\\tilde{y}\\) from the data model is critical because there are two forms of uncertainty in posterior predictive quantities, aleatoric uncertainty and epistemic uncertainty. Epistemic uncertainty arises because \\(\\theta\\) is unknown and estimated based only on a finite sample of data \\(y\\). Aleatoric uncertainty arises because even a known value of \\(\\theta\\) leads to uncertainty about new \\(\\tilde{y}\\) as described by the data model \\(p(\\tilde{y} \\mid \\theta)\\). Both forms of uncertainty show up in the factored form of the posterior predictive distribution, \\[\np(\\tilde{y} \\mid y)\n=\n\\int\n\\underbrace{p(\\tilde{y} \\mid \\theta)}_{\\begin{array}{l}\n \\textrm{aleatoric}\n \\\\[-2pt] \\textrm{uncertainty}\n \\end{array}}\n\\cdot \\underbrace{p(\\theta \\mid y)}_{\\begin{array}{l}\n \\textrm{epistemic}\n \\\\[-2pt] \\textrm{uncertainty}\n \\end{array}}\n\\, \\textrm{d}\\theta.\n\\]\n\n\n\nPosterior predictive quantities can be coded in Stan using the generated quantities block.\n\n\nFor example, consider a simple Poisson model for count data with a rate parameter \\(\\lambda > 0\\) having a gamma-distributed prior, \\[\n\\lambda \\sim \\textrm{gamma}(1, 1).\n\\] The \\(N\\) observations \\(y_1, \\ldots, y_N\\) are modeled as Poisson distributed, \\[\ny_n \\sim \\textrm{poisson}(\\lambda).\n\\]\n\n\n\nThe following Stan program defines a variable for \\(\\tilde{y}\\) by random number generation in the generated quantities block.\ndata {\n int<lower=0> N;\n array[N] int<lower=0> y;\n}\nparameters {\n real<lower=0> lambda;\n}\nmodel {\n lambda ~ gamma(1, 1);\n y ~ poisson(lambda);\n}\ngenerated quantities {\n int<lower=0> y_tilde = poisson_rng(lambda);\n}\nThe random draw from the data model for \\(\\tilde{y}\\) is coded using Stan’s Poisson random number generator in the generated quantities block. This accounts for the aleatoric component of the uncertainty; Stan’s posterior sampler will account for the epistemic uncertainty, generating a new \\(\\tilde{y}^{(m)} \\sim p(y \\mid\n\\lambda^{(m)})\\) for each posterior draw \\(\\lambda^{(m)} \\sim p(\\theta\n\\mid y).\\)\nThe posterior draws \\(\\tilde{y}^{(m)}\\) may be used to estimate the expected value of \\(\\tilde{y}\\) or any of its quantiles or posterior intervals, as well as event probabilities involving \\(\\tilde{y}\\). In general, \\(\\mathbb{E}[f(\\tilde{y}, \\theta) \\mid y]\\) may be evaluated as \\[\n\\mathbb{E}[f(\\tilde{y}, \\theta) \\mid y]\n\\approx \\frac{1}{M} \\sum_{m=1}^M f(\\tilde{y}^{(m)}, \\theta^{(m)}),\n\\] which is just the posterior mean of \\(f(\\tilde{y}, \\theta).\\) This quantity is computed by Stan if the value of \\(f(\\tilde{y}, \\theta)\\) is assigned to a variable in the generated quantities block. That is, if we have\ngenerated quantities {\n real f_val = f(y_tilde, theta);\n // ...\n}\nwhere the value of \\(f(\\tilde{y}, \\theta)\\) is assigned to variable f_val, then the posterior mean of f_val will be the expectation \\(\\mathbb{E}[f(\\tilde{y}, \\theta) \\mid y]\\).\n\n\n\nThe gamma distribution is the conjugate prior distribution for the Poisson distribution, so the posterior density \\(p(\\lambda \\mid y)\\) will also follow a gamma distribution.\nBecause the posterior follows a gamma distribution and the sampling distribution is Poisson, the posterior predictive \\(p(\\tilde{y} \\mid\ny)\\) will follow a negative binomial distribution, because the negative binomial is defined as a compound gamma-Poisson. That is, \\(y \\sim\n\\textrm{negative-binomial}(\\alpha, \\beta)\\) if \\(\\lambda \\sim\n\\textrm{gamma}(\\alpha, \\beta)\\) and \\(y \\sim \\textrm{poisson}(\\lambda).\\) Rather than marginalizing out the rate parameter \\(\\lambda\\) analytically as can be done to define the negative binomial probability mass function, the rate \\(\\lambda^{(m)} \\sim p(\\lambda \\mid y)\\) is sampled from the posterior and then used to generate a draw of \\(\\tilde{y}^{(m)} \\sim p(y \\mid \\lambda^{(m)}).\\)\n\n\n\n\n\n\nConsider a regression with a single predictor \\(x_n\\) for the training outcome \\(y_n\\) and \\(\\tilde{x}_n\\) for the test outcome \\(\\tilde{y}_n.\\) Without considering the parametric form of any of the distributions, the posterior predictive distribution for a general regression in \\[\\begin{eqnarray}\np(\\tilde{y} \\mid \\tilde{x}, y, x)\n& = & \\int p(\\tilde{y} \\mid \\tilde{x}, \\theta) \\cdot p(\\theta \\mid y, x) \\,\n\\textrm{d}\\theta\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m=1}^M \\, p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\end{eqnarray}\\] where \\(\\theta^{(m)} \\sim p(\\theta \\mid x, y).\\)\n\n\n\nThe following program defines a Poisson regression with a single predictor. These predictors are all coded as data, as are their sizes. Only the observed \\(y\\) values are coded as data. The predictive quantities \\(\\tilde{y}\\) appear in the generated quantities block, where they are generated by random number generation.\ndata {\n int<lower=0> N;\n vector[N] x;\n array[N] int<lower=0> y;\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n}\nparameters {\n real alpha;\n real beta;\n}\nmodel {\n y ~ poisson_log(alpha + beta * x);\n { alpha, beta } ~ normal(0, 1);\n}\ngenerated quantities {\n array[N_tilde] int<lower=0> y_tilde\n = poisson_log_rng(alpha + beta * x_tilde);\n}\nThe Poisson distributions in both the model and generated quantities block are coded using the log rate as a parameter (that’s poisson_log vs. poisson, with the suffixes defining the scale of the parameter). The regression coefficients, an intercept alpha and slope beta, are given standard normal priors.\nIn the model block, the log rate for the Poisson is a linear function of the training data \\(x\\), whereas in the generated quantities block it is a function of the test data \\(\\tilde{x}\\). Because the generated quantities block does not affect the posterior draws, the model fits \\(\\alpha\\) and \\(\\beta\\) using only the training data, reserving \\(\\tilde{x}\\) to generate \\(\\tilde{y}.\\)\nThe result from running Stan is a predictive sample \\(\\tilde{y}^{(1)},\n\\ldots \\tilde{y}^{(M)}\\) where each \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y}\n\\mid \\tilde{x}, x, y).\\)\nThe mean of the posterior predictive distribution is the expected value \\[\\begin{align}\n\\mathbb{E}[\\tilde{y} \\mid \\tilde{x}, x, y]\n& =\n\\int\n\\tilde{y}\n\\cdot p(\\tilde{y} \\mid \\tilde{x}, \\theta)\n\\cdot p(\\theta \\mid x, y)\n\\, \\textrm{d}\\theta\n\\\\[4pt]\n& \\approx \\frac{1}{M} \\sum_{m = 1}^M \\tilde{y}^{(m)},\n\\end{align}\\] where the \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y} \\mid \\tilde{x}, x, y)\\) are drawn from the posterior predictive distribution. Thus the posterior mean of y_tilde[n] after running Stan is the expected value of \\(\\tilde{y}_n\\) conditioned on the training data \\(x, y\\) and predictor \\(\\tilde{x}_n.\\) This is the Bayesian estimate for \\(\\tilde{y}\\) with minimum expected squared error. The posterior draws can also be used to estimate quantiles for the median and any posterior intervals of interest for \\(\\tilde{y}\\), as well as covariance of the \\(\\tilde{y_n}.\\) The posterior draws \\(\\tilde{y}^{(m)}\\) may also be used to estimate predictive event probabilities, such as \\(\\Pr[\\tilde{y}_1 > 0]\\) or \\(\\Pr[\\prod_{n =\n1}^{\\tilde{N}}(\\tilde{y_n}) > 1],\\) as expectations of indicator functions.\nAll of this can be carried out by running Stan only a single time to draw a single sample of \\(M\\) draws, \\[\n\\tilde{y}^{(1)}, \\ldots, \\tilde{y}^{(M)} \\sim p(\\tilde{y} \\mid\n\\tilde{x}, x, y).\n\\] It’s only when moving to cross-validation where multiple runs are required.\n\n\n\n\nEvent probabilities involving either parameters or predictions or both may be coded in the generated quantities block. For example, to evaluate \\(\\Pr[\\lambda > 5 \\mid y]\\) in the simple Poisson example with only a rate parameter \\(\\lambda\\), it suffices to define a generated quantity\ngenerated quantities {\n int<lower=0, upper=1> lambda_gt_5 = lambda > 5;\n // ...\n}\nThe value of the expression lambda > 5 is 1 if the condition is true and 0 otherwise. The posterior mean of this parameter is the event probability \\[\\begin{eqnarray*}\n\\Pr[\\lambda > 5 \\mid y]\n& = &\n\\int \\textrm{I}(\\lambda > 5) \\cdot p(\\lambda \\mid y)\n\\, \\textrm{d}\\lambda\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M \\textrm{I}[\\lambda^{(m)} > 5],\n\\end{eqnarray*}\\] where each \\(\\lambda^{(m)} \\sim p(\\lambda \\mid y)\\) is distributed according to the posterior. In Stan, this is recovered as the posterior mean of the parameter lambda_gt_5.\nIn general, event probabilities may be expressed as expectations of indicator functions. For example, \\[\\begin{eqnarray*}\n\\Pr[\\lambda > 5 \\mid y]\n& = & \\mathbb{E}[\\textrm{I}[\\lambda > 5] \\mid y]\n\\\\[4pt]\n& = &\n\\int\n\\textrm{I}(\\lambda > 5) \\cdot p(\\lambda \\mid y)\n\\, \\textrm{d}\\lambda\n\\\\[4pt]\n& \\approx & \\frac{1}{M} \\sum_{m = 1}^M \\textrm{I}(\\lambda^{(m)} > 5).\n\\end{eqnarray*}\\] The last line above is the posterior mean of the indicator function as coded in Stan.\nEvent probabilities involving posterior predictive quantities \\(\\tilde{y}\\) work exactly the same way as those for parameters. For example, if \\(\\tilde{y}_n\\) is the prediction for the \\(n\\)-th unobserved outcome (such as the score of a team in a game or a level of expression of a protein in a cell), then \\[\\begin{eqnarray*}\n\\Pr[\\tilde{y}_3 > \\tilde{y}_7 \\mid \\tilde{x}, x, y]\n& = &\n\\mathbb{E}\\!\\left[I[\\tilde{y}_3 > \\tilde{y}_7] \\mid \\tilde{x}, x, y\\right]\n\\\\[4pt]\n& = &\n\\int\n\\textrm{I}(\\tilde{y}_3 > \\tilde{y}_7)\n\\cdot p(\\tilde{y} \\mid \\tilde{x}, x, y)\n\\, \\textrm{d}\\tilde{y}\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M\n\\textrm{I}(\\tilde{y}^{(m)}_3 > \\tilde{y}^{(m)}_7),\n\\end{eqnarray*}\\] where \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y} \\mid \\tilde{x}, x, y).\\)\n\n\n\nStan’s sampling algorithms take a Stan program representing a posterior \\(p(\\theta \\mid y, x)\\) along with actual data \\(x\\) and \\(y\\) to produce a set of draws \\(\\theta^{(1)}, \\ldots, \\theta^{(M)}\\) from the posterior. Posterior predictive draws \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y} \\mid\n\\tilde{x}, x, y)\\) can be generated by drawing \\[\n\\tilde{y}^{(m)} \\sim p(y \\mid \\tilde{x}, \\theta^{(m)})\n\\] from the data model. Note that drawing \\(\\tilde{y}^{(m)}\\) only depends on the new predictors \\(\\tilde{x}\\) and the posterior draws \\(\\theta^{(m)}\\). Most importantly, neither the original data or the model density is required.\nBy saving the posterior draws, predictions for new data items \\(\\tilde{x}\\) may be generated whenever needed. In Stan’s interfaces, this is done by writing a second Stan program that inputs the original program’s parameters and the new predictors. For example, for the linear regression case, the program to take posterior draws declares the data and parameters, and defines the model.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n alpha ~ normal(0, 5);\n beta ~ normal(0, 1);\n sigma ~ lognormal(0, 0.5);\n}\nA second program can be used to generate new observations. This follow-on program need only declare the parameters as they were originally defined. This may require defining constants in the data block such as sizes and hyperparameters that are involved in parameter size or constraint declarations. Then additional data is read in corresponding to predictors for new outcomes that have yet to be observed. There is no need to repeat the model or unneeded transformed parameters or generated quantities. The complete follow-on program for prediction just declares the predictors in the data, the original parameters, and then the predictions in the generated quantities block.\ndata {\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\ngenerated quantities {\n vector[N_tilde] y_tilde\n = normal_rng(alpha + beta * x_tilde, sigma);\n}\nWhen running stand-alone generated quantities, the inputs required are the original draws for the parameters and any predictors corresponding to new predictions, and the output will be draws for \\(\\tilde{y}\\) or derived quantities such as event probabilities.\nAny posterior predictive quantities desired may be generated this way. For example, event probabilities are estimated in the usual way by defining indicator variables in the generated quantities block.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#posterior-predictive-distribution", + "href": "stan-users-guide/posterior-prediction.html#posterior-predictive-distribution", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "Given a full Bayesian model \\(p(y, \\theta)\\), the posterior predictive density for new data \\(\\tilde{y}\\) given observed data \\(y\\) is \\[\np(\\tilde{y} \\mid y)\n=\n\\int p(\\tilde{y} \\mid \\theta) \\cdot p(\\theta \\mid y)\n\\, \\textrm{d}\\theta.\n\\] The product under the integral reduces to the joint posterior density \\(p(\\tilde{y}, \\theta \\mid y),\\) so that the integral is simply marginalizing out the parameters \\(\\theta,\\) leaving the predictive density \\(p(\\tilde{y} \\mid y)\\) of future observations given past observations.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#computing-the-posterior-predictive-distribution", + "href": "stan-users-guide/posterior-prediction.html#computing-the-posterior-predictive-distribution", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "The posterior predictive density (or mass) of a prediction \\(\\tilde{y}\\) given observed data \\(y\\) can be computed using \\(M\\) Monte Carlo draws\n\\[\n\\theta^{(m)} \\sim p(\\theta \\mid y)\n\\] from the posterior as \\[\np(\\tilde{y} \\mid y)\n\\approx\n\\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\theta^{(m)}).\n\\]\nComputing directly using this formula will lead to underflow in many situations, but the log posterior predictive density, \\(\\log\np(\\tilde{y} \\mid y)\\) may be computed using the stable log sum of exponents function as \\[\\begin{eqnarray*}\n\\log p(\\tilde{y} \\mid y)\n& \\approx &\n\\log \\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\theta^{(m)}).\n\\\\[4pt]\n& = &\n- \\log M\n+ \\textrm{log-sum-exp}_{m = 1}^M \\log p(\\tilde{y} \\mid \\theta^{(m)}),\n\\end{eqnarray*}\\] where \\[\n\\textrm{log-sum-exp}_{m = 1}^M v_m\n= \\log \\sum_{m = 1}^M \\exp v_m\n\\] is used to maintain arithmetic precision. See the section on log sum of exponentials for more details.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#sampling-from-the-posterior-predictive-distribution", + "href": "stan-users-guide/posterior-prediction.html#sampling-from-the-posterior-predictive-distribution", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "Given draws from the posterior \\(\\theta^{(m)} \\sim p(\\theta \\mid y),\\) draws from the posterior predictive \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y}\n\\mid y)\\) can be generated by randomly generating from the sampling distribution with the parameter draw plugged in, \\[\n\\tilde{y}^{(m)} \\sim p(y \\mid \\theta^{(m)}).\n\\]\nRandomly drawing \\(\\tilde{y}\\) from the data model is critical because there are two forms of uncertainty in posterior predictive quantities, aleatoric uncertainty and epistemic uncertainty. Epistemic uncertainty arises because \\(\\theta\\) is unknown and estimated based only on a finite sample of data \\(y\\). Aleatoric uncertainty arises because even a known value of \\(\\theta\\) leads to uncertainty about new \\(\\tilde{y}\\) as described by the data model \\(p(\\tilde{y} \\mid \\theta)\\). Both forms of uncertainty show up in the factored form of the posterior predictive distribution, \\[\np(\\tilde{y} \\mid y)\n=\n\\int\n\\underbrace{p(\\tilde{y} \\mid \\theta)}_{\\begin{array}{l}\n \\textrm{aleatoric}\n \\\\[-2pt] \\textrm{uncertainty}\n \\end{array}}\n\\cdot \\underbrace{p(\\theta \\mid y)}_{\\begin{array}{l}\n \\textrm{epistemic}\n \\\\[-2pt] \\textrm{uncertainty}\n \\end{array}}\n\\, \\textrm{d}\\theta.\n\\]", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#posterior-predictive-simulation-in-stan", + "href": "stan-users-guide/posterior-prediction.html#posterior-predictive-simulation-in-stan", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "Posterior predictive quantities can be coded in Stan using the generated quantities block.\n\n\nFor example, consider a simple Poisson model for count data with a rate parameter \\(\\lambda > 0\\) having a gamma-distributed prior, \\[\n\\lambda \\sim \\textrm{gamma}(1, 1).\n\\] The \\(N\\) observations \\(y_1, \\ldots, y_N\\) are modeled as Poisson distributed, \\[\ny_n \\sim \\textrm{poisson}(\\lambda).\n\\]\n\n\n\nThe following Stan program defines a variable for \\(\\tilde{y}\\) by random number generation in the generated quantities block.\ndata {\n int<lower=0> N;\n array[N] int<lower=0> y;\n}\nparameters {\n real<lower=0> lambda;\n}\nmodel {\n lambda ~ gamma(1, 1);\n y ~ poisson(lambda);\n}\ngenerated quantities {\n int<lower=0> y_tilde = poisson_rng(lambda);\n}\nThe random draw from the data model for \\(\\tilde{y}\\) is coded using Stan’s Poisson random number generator in the generated quantities block. This accounts for the aleatoric component of the uncertainty; Stan’s posterior sampler will account for the epistemic uncertainty, generating a new \\(\\tilde{y}^{(m)} \\sim p(y \\mid\n\\lambda^{(m)})\\) for each posterior draw \\(\\lambda^{(m)} \\sim p(\\theta\n\\mid y).\\)\nThe posterior draws \\(\\tilde{y}^{(m)}\\) may be used to estimate the expected value of \\(\\tilde{y}\\) or any of its quantiles or posterior intervals, as well as event probabilities involving \\(\\tilde{y}\\). In general, \\(\\mathbb{E}[f(\\tilde{y}, \\theta) \\mid y]\\) may be evaluated as \\[\n\\mathbb{E}[f(\\tilde{y}, \\theta) \\mid y]\n\\approx \\frac{1}{M} \\sum_{m=1}^M f(\\tilde{y}^{(m)}, \\theta^{(m)}),\n\\] which is just the posterior mean of \\(f(\\tilde{y}, \\theta).\\) This quantity is computed by Stan if the value of \\(f(\\tilde{y}, \\theta)\\) is assigned to a variable in the generated quantities block. That is, if we have\ngenerated quantities {\n real f_val = f(y_tilde, theta);\n // ...\n}\nwhere the value of \\(f(\\tilde{y}, \\theta)\\) is assigned to variable f_val, then the posterior mean of f_val will be the expectation \\(\\mathbb{E}[f(\\tilde{y}, \\theta) \\mid y]\\).\n\n\n\nThe gamma distribution is the conjugate prior distribution for the Poisson distribution, so the posterior density \\(p(\\lambda \\mid y)\\) will also follow a gamma distribution.\nBecause the posterior follows a gamma distribution and the sampling distribution is Poisson, the posterior predictive \\(p(\\tilde{y} \\mid\ny)\\) will follow a negative binomial distribution, because the negative binomial is defined as a compound gamma-Poisson. That is, \\(y \\sim\n\\textrm{negative-binomial}(\\alpha, \\beta)\\) if \\(\\lambda \\sim\n\\textrm{gamma}(\\alpha, \\beta)\\) and \\(y \\sim \\textrm{poisson}(\\lambda).\\) Rather than marginalizing out the rate parameter \\(\\lambda\\) analytically as can be done to define the negative binomial probability mass function, the rate \\(\\lambda^{(m)} \\sim p(\\lambda \\mid y)\\) is sampled from the posterior and then used to generate a draw of \\(\\tilde{y}^{(m)} \\sim p(y \\mid \\lambda^{(m)}).\\)", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#posterior-prediction-for-regressions", + "href": "stan-users-guide/posterior-prediction.html#posterior-prediction-for-regressions", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "Consider a regression with a single predictor \\(x_n\\) for the training outcome \\(y_n\\) and \\(\\tilde{x}_n\\) for the test outcome \\(\\tilde{y}_n.\\) Without considering the parametric form of any of the distributions, the posterior predictive distribution for a general regression in \\[\\begin{eqnarray}\np(\\tilde{y} \\mid \\tilde{x}, y, x)\n& = & \\int p(\\tilde{y} \\mid \\tilde{x}, \\theta) \\cdot p(\\theta \\mid y, x) \\,\n\\textrm{d}\\theta\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m=1}^M \\, p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\end{eqnarray}\\] where \\(\\theta^{(m)} \\sim p(\\theta \\mid x, y).\\)\n\n\n\nThe following program defines a Poisson regression with a single predictor. These predictors are all coded as data, as are their sizes. Only the observed \\(y\\) values are coded as data. The predictive quantities \\(\\tilde{y}\\) appear in the generated quantities block, where they are generated by random number generation.\ndata {\n int<lower=0> N;\n vector[N] x;\n array[N] int<lower=0> y;\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n}\nparameters {\n real alpha;\n real beta;\n}\nmodel {\n y ~ poisson_log(alpha + beta * x);\n { alpha, beta } ~ normal(0, 1);\n}\ngenerated quantities {\n array[N_tilde] int<lower=0> y_tilde\n = poisson_log_rng(alpha + beta * x_tilde);\n}\nThe Poisson distributions in both the model and generated quantities block are coded using the log rate as a parameter (that’s poisson_log vs. poisson, with the suffixes defining the scale of the parameter). The regression coefficients, an intercept alpha and slope beta, are given standard normal priors.\nIn the model block, the log rate for the Poisson is a linear function of the training data \\(x\\), whereas in the generated quantities block it is a function of the test data \\(\\tilde{x}\\). Because the generated quantities block does not affect the posterior draws, the model fits \\(\\alpha\\) and \\(\\beta\\) using only the training data, reserving \\(\\tilde{x}\\) to generate \\(\\tilde{y}.\\)\nThe result from running Stan is a predictive sample \\(\\tilde{y}^{(1)},\n\\ldots \\tilde{y}^{(M)}\\) where each \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y}\n\\mid \\tilde{x}, x, y).\\)\nThe mean of the posterior predictive distribution is the expected value \\[\\begin{align}\n\\mathbb{E}[\\tilde{y} \\mid \\tilde{x}, x, y]\n& =\n\\int\n\\tilde{y}\n\\cdot p(\\tilde{y} \\mid \\tilde{x}, \\theta)\n\\cdot p(\\theta \\mid x, y)\n\\, \\textrm{d}\\theta\n\\\\[4pt]\n& \\approx \\frac{1}{M} \\sum_{m = 1}^M \\tilde{y}^{(m)},\n\\end{align}\\] where the \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y} \\mid \\tilde{x}, x, y)\\) are drawn from the posterior predictive distribution. Thus the posterior mean of y_tilde[n] after running Stan is the expected value of \\(\\tilde{y}_n\\) conditioned on the training data \\(x, y\\) and predictor \\(\\tilde{x}_n.\\) This is the Bayesian estimate for \\(\\tilde{y}\\) with minimum expected squared error. The posterior draws can also be used to estimate quantiles for the median and any posterior intervals of interest for \\(\\tilde{y}\\), as well as covariance of the \\(\\tilde{y_n}.\\) The posterior draws \\(\\tilde{y}^{(m)}\\) may also be used to estimate predictive event probabilities, such as \\(\\Pr[\\tilde{y}_1 > 0]\\) or \\(\\Pr[\\prod_{n =\n1}^{\\tilde{N}}(\\tilde{y_n}) > 1],\\) as expectations of indicator functions.\nAll of this can be carried out by running Stan only a single time to draw a single sample of \\(M\\) draws, \\[\n\\tilde{y}^{(1)}, \\ldots, \\tilde{y}^{(M)} \\sim p(\\tilde{y} \\mid\n\\tilde{x}, x, y).\n\\] It’s only when moving to cross-validation where multiple runs are required.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#estimating-event-probabilities", + "href": "stan-users-guide/posterior-prediction.html#estimating-event-probabilities", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "Event probabilities involving either parameters or predictions or both may be coded in the generated quantities block. For example, to evaluate \\(\\Pr[\\lambda > 5 \\mid y]\\) in the simple Poisson example with only a rate parameter \\(\\lambda\\), it suffices to define a generated quantity\ngenerated quantities {\n int<lower=0, upper=1> lambda_gt_5 = lambda > 5;\n // ...\n}\nThe value of the expression lambda > 5 is 1 if the condition is true and 0 otherwise. The posterior mean of this parameter is the event probability \\[\\begin{eqnarray*}\n\\Pr[\\lambda > 5 \\mid y]\n& = &\n\\int \\textrm{I}(\\lambda > 5) \\cdot p(\\lambda \\mid y)\n\\, \\textrm{d}\\lambda\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M \\textrm{I}[\\lambda^{(m)} > 5],\n\\end{eqnarray*}\\] where each \\(\\lambda^{(m)} \\sim p(\\lambda \\mid y)\\) is distributed according to the posterior. In Stan, this is recovered as the posterior mean of the parameter lambda_gt_5.\nIn general, event probabilities may be expressed as expectations of indicator functions. For example, \\[\\begin{eqnarray*}\n\\Pr[\\lambda > 5 \\mid y]\n& = & \\mathbb{E}[\\textrm{I}[\\lambda > 5] \\mid y]\n\\\\[4pt]\n& = &\n\\int\n\\textrm{I}(\\lambda > 5) \\cdot p(\\lambda \\mid y)\n\\, \\textrm{d}\\lambda\n\\\\[4pt]\n& \\approx & \\frac{1}{M} \\sum_{m = 1}^M \\textrm{I}(\\lambda^{(m)} > 5).\n\\end{eqnarray*}\\] The last line above is the posterior mean of the indicator function as coded in Stan.\nEvent probabilities involving posterior predictive quantities \\(\\tilde{y}\\) work exactly the same way as those for parameters. For example, if \\(\\tilde{y}_n\\) is the prediction for the \\(n\\)-th unobserved outcome (such as the score of a team in a game or a level of expression of a protein in a cell), then \\[\\begin{eqnarray*}\n\\Pr[\\tilde{y}_3 > \\tilde{y}_7 \\mid \\tilde{x}, x, y]\n& = &\n\\mathbb{E}\\!\\left[I[\\tilde{y}_3 > \\tilde{y}_7] \\mid \\tilde{x}, x, y\\right]\n\\\\[4pt]\n& = &\n\\int\n\\textrm{I}(\\tilde{y}_3 > \\tilde{y}_7)\n\\cdot p(\\tilde{y} \\mid \\tilde{x}, x, y)\n\\, \\textrm{d}\\tilde{y}\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M\n\\textrm{I}(\\tilde{y}^{(m)}_3 > \\tilde{y}^{(m)}_7),\n\\end{eqnarray*}\\] where \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y} \\mid \\tilde{x}, x, y).\\)", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/posterior-prediction.html#stand-alone-generated-quantities-and-ongoing-prediction", + "href": "stan-users-guide/posterior-prediction.html#stand-alone-generated-quantities-and-ongoing-prediction", + "title": "Posterior Predictive Sampling", + "section": "", + "text": "Stan’s sampling algorithms take a Stan program representing a posterior \\(p(\\theta \\mid y, x)\\) along with actual data \\(x\\) and \\(y\\) to produce a set of draws \\(\\theta^{(1)}, \\ldots, \\theta^{(M)}\\) from the posterior. Posterior predictive draws \\(\\tilde{y}^{(m)} \\sim p(\\tilde{y} \\mid\n\\tilde{x}, x, y)\\) can be generated by drawing \\[\n\\tilde{y}^{(m)} \\sim p(y \\mid \\tilde{x}, \\theta^{(m)})\n\\] from the data model. Note that drawing \\(\\tilde{y}^{(m)}\\) only depends on the new predictors \\(\\tilde{x}\\) and the posterior draws \\(\\theta^{(m)}\\). Most importantly, neither the original data or the model density is required.\nBy saving the posterior draws, predictions for new data items \\(\\tilde{x}\\) may be generated whenever needed. In Stan’s interfaces, this is done by writing a second Stan program that inputs the original program’s parameters and the new predictors. For example, for the linear regression case, the program to take posterior draws declares the data and parameters, and defines the model.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n alpha ~ normal(0, 5);\n beta ~ normal(0, 1);\n sigma ~ lognormal(0, 0.5);\n}\nA second program can be used to generate new observations. This follow-on program need only declare the parameters as they were originally defined. This may require defining constants in the data block such as sizes and hyperparameters that are involved in parameter size or constraint declarations. Then additional data is read in corresponding to predictors for new outcomes that have yet to be observed. There is no need to repeat the model or unneeded transformed parameters or generated quantities. The complete follow-on program for prediction just declares the predictors in the data, the original parameters, and then the predictions in the generated quantities block.\ndata {\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\ngenerated quantities {\n vector[N_tilde] y_tilde\n = normal_rng(alpha + beta * x_tilde, sigma);\n}\nWhen running stand-alone generated quantities, the inputs required are the original draws for the parameters and any predictors corresponding to new predictions, and the output will be draws for \\(\\tilde{y}\\) or derived quantities such as event probabilities.\nAny posterior predictive quantities desired may be generated this way. For example, event probabilities are estimated in the usual way by defining indicator variables in the generated quantities block.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior Predictive Sampling" + ] + }, + { + "objectID": "stan-users-guide/one-dimensional-integrals.html", + "href": "stan-users-guide/one-dimensional-integrals.html", + "title": "Computing One Dimensional Integrals", + "section": "", + "text": "Definite and indefinite one dimensional integrals can be performed in Stan using the integrate_1d function\nAs an example, the normalizing constant of a left-truncated normal distribution is\n\\[\n \\int_a^\\infty \\frac{1}{\\sqrt{2 \\pi \\sigma^2}} e^{-\\frac{1}{2}\\frac{(x - \\mu)^2}{\\sigma^2}}\n\\]\nTo compute this integral in Stan, the integrand must first be defined as a Stan function (see the Stan Reference Manual chapter on User-Defined Functions for more information on coding user-defined functions).\nreal normal_density(real x, // Function argument\n real xc, // Complement of function argument\n // on the domain (defined later)\n array[] real theta, // parameters\n array[] real x_r, // data (real)\n array[] int x_i) { // data (integer)\n real mu = theta[1];\n real sigma = theta[2];\n\n return 1 / (sqrt(2 * pi()) * sigma) * exp(-0.5 * ((x - mu) / sigma)^2);\n}\nThis function is expected to return the value of the integrand evaluated at point x. The argument xc is used in definite integrals to avoid loss of precision near the limits of integration and is set to NaN when either limit is infinite (see the section on precision/loss in the chapter on Higher-Order Functions of the Stan Functions Reference for details on how to use this). The argument theta is used to pass in arguments of the integral that are a function of the parameters in our model. The arguments x_r and x_i are used to pass in real and integer arguments of the integral that are not a function of our parameters.\nThe function defining the integrand must have exactly the argument types and return type of normal_density above, though argument naming is not important. Even if x_r and x_i are unused in the integrand, they must be included in the function signature. Even if the integral does not involve some of these, they must still be supplied some value. The most efficient will be a zero-length array or vector, which can be created with rep_array(0, 0) and rep_vector(0, 0), respectively. Other options include an uninitialized variable declared with size 0, which is equivalent to the above, or any easy value, such as size 1 array created with {0}.\n\n\nSuppose that our model requires evaluating the lpdf of a left-truncated normal, but the truncation limit is to be estimated as a parameter. Because the truncation point is a parameter, we must include the normalization term of the truncated pdf when computing our model’s log density. Note this is just an example of how to use the 1D integrator. The more efficient way to perform the correct normalization in Stan is described in the chapter on Truncated or Censored Data of this guide.\nSuch a model might look like (include the function defined at the beginning of this chapter to make this code compile):\ndata {\n int N;\n array[N] real y;\n}\n\ntransformed data {\n array[0] real x_r;\n array[0] int x_i;\n}\n\nparameters {\n real mu;\n real<lower=0.0> sigma;\n real left_limit;\n}\n\nmodel {\n mu ~ normal(0, 1);\n sigma ~ normal(0, 1);\n left_limit ~ normal(0, 1);\n target += normal_lpdf(y | mu, sigma);\n target += -log(integrate_1d(normal_density,\n left_limit,\n positive_infinity(),\n { mu, sigma }, x_r, x_i));\n}\n\n\nThe limits of integration can be finite or infinite. The infinite limits are made available via the Stan calls negative_infinity() and positive_infinity().\nIf both limits are either negative_infinity() or positive_infinity(), the integral and its gradients are set to zero.\n\n\n\nThe arguments for the real data x_r and the integer data x_i must be expressions that only involve data or transformed data variables. theta, on the other hand, can be a function of data, transformed data, parameters, or transformed parameters.\nThe endpoints of integration can be data or parameters (and internally the derivatives of the integral with respect to the endpoints are handled with the Leibniz integral rule).\n\n\n\n\nThe integral is performed with the iterative 1D double exponential quadrature methods implemented in the Boost library (Agrawal et al. 2017). If the \\(n\\)th estimate of the integral is denoted \\(I_n\\) and the \\(n\\)th estimate of the norm of the integral is denoted \\(|I|_n\\), the iteration is terminated when\n\\[\n \\frac{{|I_{n + 1} - I_n|}}{{|I|_{n + 1}}} < \\text{relative tolerance}.\n\\]\nThe relative_tolerance parameter can be optionally specified as the last argument to integrate_1d. By default, integrate_1d follows the Boost library recommendation of setting relative_tolerance to the square root of the machine epsilon of double precision floating point numbers (about 1e-8). If the Boost integrator is not able to reach the relative tolerance an exception is raised with a message somehing like “Exception: integrate: error estimate of integral 4.25366e-13 exceeds the given relative tolerance times norm of integral”. If integrate_1d causes an exception in transformed parameters block or model block, the result has the same effect as assigning a \\(-\\infty\\) log probability, which causes rejection of the current proposal in MCMC samplers and adjustment of search parameters in optimization. If integrate_1d causes an exception in generated quantities block, the returned output from integrate_1d is NaN. In these cases, a bigger relative_tolerance value can be specified.\n\n\nIntegrals on the (possibly infinite) interval \\((a, b)\\) that cross zero are split into two integrals, one from \\((a, 0)\\) and one from \\((0, b)\\). This is because the quadrature methods employed internally can have difficulty near zero.\nIn this case, each integral is separately integrated to the given relative_tolerance.\n\n\n\nIf care is not taken, the quadrature can suffer from numerical loss of precision near the endpoints of definite integrals.\nFor instance, in integrating the pdf of a beta distribution when the values of \\(\\alpha\\) and \\(\\beta\\) are small, most of the probability mass is lumped near zero and one.\nThe pdf of a beta distribution is proportional to\n\\[\np(x) \\propto x^{\\alpha - 1}(1 - x)^{\\beta - 1}\n\\]\nNormalizing this distribution requires computing the integral of \\(p(x)\\) from zero to one. In Stan code, the integrand might look like:\nreal beta(real x, real xc, array[] real theta, array[] real x_r, array[] int x_i) {\n real alpha = theta[1];\n real beta = theta[2];\n\n return x^(alpha - 1.0) * (1.0 - x)^(beta - 1.0);\n}\nThe issue is that there will be numerical breakdown in the precision of 1.0 - x as x gets close to one. This is because of the limited precision of double precision floating numbers. This integral will fail to converge for values of alpha and beta much less than one.\nThis is where xc is useful. It is defined, for definite integrals, as a high precision version of the distance from x to the nearest endpoint — a - x or b - x for a lower endpoint a and an upper endpoint b. To make use of this for the beta integral, the integrand can be re-coded:\nreal beta(real x, real xc, array[] real theta, array[] real x_r, array[] int x_i) {\n real alpha = theta[1];\n real beta = theta[2];\n real v;\n\n if(x > 0.5) {\n v = x^(alpha - 1.0) * xc^(beta - 1.0);\n } else {\n v = x^(alpha - 1.0) * (1.0 - x)^(beta - 1.0);\n }\n\n return v;\n}\nIn this case, as we approach the upper limit of integration \\(a = 1\\), xc will take on the value of \\(a - x = 1 - x\\). This version of the integrand will converge for much smaller values of alpha and beta than otherwise possible.\nConsider another example: let’s say we have a log-normal distribution that is both shifted away from zero by some amount \\(\\delta\\), and truncated at some value \\(b\\). If we were interested in calculating the expectation of a variable \\(X\\) distributed in this way, we would need to calculate \\[\n\\int_a^b xf(x)\\,dx = \\int_{\\delta}^b xf(x)\\,dx\n\\] in the numerator, where \\(f(x)\\) is the probability density function for the shifted log-normal distribution. This probability density function can be coded in Stan as:\nreal shift_lognormal_pdf(real x,\n real mu,\n real sigma,\n real delta) {\n real p;\n\n p = (1.0 / ((x - delta) * sigma * sqrt(2 * pi()))) *\n exp(-1 * (log(x - delta) - mu)^2 / (2 * sigma^2));\n\n return p;\n}\nTherefore, the function that we want to integrate is:\nreal integrand(real x,\n real xc,\n array[] real theta,\n array[] real x_r,\n array[] int x_i) {\n real numerator;\n real p;\n\n real mu = theta[1];\n real sigma = theta[2];\n real delta = theta[3];\n real b = theta[4];\n\n p = shift_lognormal_pdf(x, mu, sigma, delta);\n\n numerator = x * p;\n\n return numerator;\n}\nWhat happens here is that, given that the log-normal distribution is shifted by \\(\\delta\\), when we then try to integrate the numerator, our x starts at values just above delta. This, in turn, causes the x - delta term to be near zero, leading to a breakdown.\nWe can use xc, and define the integrand as:\nreal integrand(real x,\n real xc,\n array[] real theta,\n array[] real x_r,\n array[] int x_i) {\n real numerator;\n real p;\n\n real mu = theta[1];\n real sigma = theta[2];\n real delta = theta[3];\n real b = theta[4];\n\n if (x < delta + 1) {\n p = shift_lognormal_pdf(xc, mu, sigma, delta);\n } else {\n p = shift_lognormal_pdf(x, mu, sigma, delta);\n }\n\n numerator = x * p;\n\n return numerator;\n}\nWhy does this work? When our values of x are less than delta + 1 (so, when they’re near delta, given that our lower bound of integration is equal to \\(\\delta\\)), we pass xc as an argument to our shift_lognormal_pdf function. This way, instead of dealing with x - delta in shift_lognormal_pdf, we are working with xc - delta which is equal to delta - x - delta, as delta is the lower endpoint in that case. The delta terms cancel out, and we are left with a high-precision version of x. We don’t encounter the same problem at the upper limit \\(b\\) so we don’t adjust the code for that case.\nNote, xc is only used for definite integrals. If either the left endpoint is at negative infinity or the right endpoint is at positive infinity, xc will be NaN.\nFor zero-crossing definite integrals (see section Zero Crossing) the integrals are broken into two pieces (\\((a, 0)\\) and \\((0, b)\\) for endpoints \\(a < 0\\) and \\(b > 0\\)) and xc is a high precision version of the distance to the limits of each of the two integrals separately. This means xc will be a high precision version of a - x, x, or b - x, depending on the value of x and the endpoints.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Computing One Dimensional Integrals" + ] + }, + { + "objectID": "stan-users-guide/one-dimensional-integrals.html#calling-the-integrator", + "href": "stan-users-guide/one-dimensional-integrals.html#calling-the-integrator", + "title": "Computing One Dimensional Integrals", + "section": "", + "text": "Suppose that our model requires evaluating the lpdf of a left-truncated normal, but the truncation limit is to be estimated as a parameter. Because the truncation point is a parameter, we must include the normalization term of the truncated pdf when computing our model’s log density. Note this is just an example of how to use the 1D integrator. The more efficient way to perform the correct normalization in Stan is described in the chapter on Truncated or Censored Data of this guide.\nSuch a model might look like (include the function defined at the beginning of this chapter to make this code compile):\ndata {\n int N;\n array[N] real y;\n}\n\ntransformed data {\n array[0] real x_r;\n array[0] int x_i;\n}\n\nparameters {\n real mu;\n real<lower=0.0> sigma;\n real left_limit;\n}\n\nmodel {\n mu ~ normal(0, 1);\n sigma ~ normal(0, 1);\n left_limit ~ normal(0, 1);\n target += normal_lpdf(y | mu, sigma);\n target += -log(integrate_1d(normal_density,\n left_limit,\n positive_infinity(),\n { mu, sigma }, x_r, x_i));\n}\n\n\nThe limits of integration can be finite or infinite. The infinite limits are made available via the Stan calls negative_infinity() and positive_infinity().\nIf both limits are either negative_infinity() or positive_infinity(), the integral and its gradients are set to zero.\n\n\n\nThe arguments for the real data x_r and the integer data x_i must be expressions that only involve data or transformed data variables. theta, on the other hand, can be a function of data, transformed data, parameters, or transformed parameters.\nThe endpoints of integration can be data or parameters (and internally the derivatives of the integral with respect to the endpoints are handled with the Leibniz integral rule).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Computing One Dimensional Integrals" + ] + }, + { + "objectID": "stan-users-guide/one-dimensional-integrals.html#integrator-convergence", + "href": "stan-users-guide/one-dimensional-integrals.html#integrator-convergence", + "title": "Computing One Dimensional Integrals", + "section": "", + "text": "The integral is performed with the iterative 1D double exponential quadrature methods implemented in the Boost library (Agrawal et al. 2017). If the \\(n\\)th estimate of the integral is denoted \\(I_n\\) and the \\(n\\)th estimate of the norm of the integral is denoted \\(|I|_n\\), the iteration is terminated when\n\\[\n \\frac{{|I_{n + 1} - I_n|}}{{|I|_{n + 1}}} < \\text{relative tolerance}.\n\\]\nThe relative_tolerance parameter can be optionally specified as the last argument to integrate_1d. By default, integrate_1d follows the Boost library recommendation of setting relative_tolerance to the square root of the machine epsilon of double precision floating point numbers (about 1e-8). If the Boost integrator is not able to reach the relative tolerance an exception is raised with a message somehing like “Exception: integrate: error estimate of integral 4.25366e-13 exceeds the given relative tolerance times norm of integral”. If integrate_1d causes an exception in transformed parameters block or model block, the result has the same effect as assigning a \\(-\\infty\\) log probability, which causes rejection of the current proposal in MCMC samplers and adjustment of search parameters in optimization. If integrate_1d causes an exception in generated quantities block, the returned output from integrate_1d is NaN. In these cases, a bigger relative_tolerance value can be specified.\n\n\nIntegrals on the (possibly infinite) interval \\((a, b)\\) that cross zero are split into two integrals, one from \\((a, 0)\\) and one from \\((0, b)\\). This is because the quadrature methods employed internally can have difficulty near zero.\nIn this case, each integral is separately integrated to the given relative_tolerance.\n\n\n\nIf care is not taken, the quadrature can suffer from numerical loss of precision near the endpoints of definite integrals.\nFor instance, in integrating the pdf of a beta distribution when the values of \\(\\alpha\\) and \\(\\beta\\) are small, most of the probability mass is lumped near zero and one.\nThe pdf of a beta distribution is proportional to\n\\[\np(x) \\propto x^{\\alpha - 1}(1 - x)^{\\beta - 1}\n\\]\nNormalizing this distribution requires computing the integral of \\(p(x)\\) from zero to one. In Stan code, the integrand might look like:\nreal beta(real x, real xc, array[] real theta, array[] real x_r, array[] int x_i) {\n real alpha = theta[1];\n real beta = theta[2];\n\n return x^(alpha - 1.0) * (1.0 - x)^(beta - 1.0);\n}\nThe issue is that there will be numerical breakdown in the precision of 1.0 - x as x gets close to one. This is because of the limited precision of double precision floating numbers. This integral will fail to converge for values of alpha and beta much less than one.\nThis is where xc is useful. It is defined, for definite integrals, as a high precision version of the distance from x to the nearest endpoint — a - x or b - x for a lower endpoint a and an upper endpoint b. To make use of this for the beta integral, the integrand can be re-coded:\nreal beta(real x, real xc, array[] real theta, array[] real x_r, array[] int x_i) {\n real alpha = theta[1];\n real beta = theta[2];\n real v;\n\n if(x > 0.5) {\n v = x^(alpha - 1.0) * xc^(beta - 1.0);\n } else {\n v = x^(alpha - 1.0) * (1.0 - x)^(beta - 1.0);\n }\n\n return v;\n}\nIn this case, as we approach the upper limit of integration \\(a = 1\\), xc will take on the value of \\(a - x = 1 - x\\). This version of the integrand will converge for much smaller values of alpha and beta than otherwise possible.\nConsider another example: let’s say we have a log-normal distribution that is both shifted away from zero by some amount \\(\\delta\\), and truncated at some value \\(b\\). If we were interested in calculating the expectation of a variable \\(X\\) distributed in this way, we would need to calculate \\[\n\\int_a^b xf(x)\\,dx = \\int_{\\delta}^b xf(x)\\,dx\n\\] in the numerator, where \\(f(x)\\) is the probability density function for the shifted log-normal distribution. This probability density function can be coded in Stan as:\nreal shift_lognormal_pdf(real x,\n real mu,\n real sigma,\n real delta) {\n real p;\n\n p = (1.0 / ((x - delta) * sigma * sqrt(2 * pi()))) *\n exp(-1 * (log(x - delta) - mu)^2 / (2 * sigma^2));\n\n return p;\n}\nTherefore, the function that we want to integrate is:\nreal integrand(real x,\n real xc,\n array[] real theta,\n array[] real x_r,\n array[] int x_i) {\n real numerator;\n real p;\n\n real mu = theta[1];\n real sigma = theta[2];\n real delta = theta[3];\n real b = theta[4];\n\n p = shift_lognormal_pdf(x, mu, sigma, delta);\n\n numerator = x * p;\n\n return numerator;\n}\nWhat happens here is that, given that the log-normal distribution is shifted by \\(\\delta\\), when we then try to integrate the numerator, our x starts at values just above delta. This, in turn, causes the x - delta term to be near zero, leading to a breakdown.\nWe can use xc, and define the integrand as:\nreal integrand(real x,\n real xc,\n array[] real theta,\n array[] real x_r,\n array[] int x_i) {\n real numerator;\n real p;\n\n real mu = theta[1];\n real sigma = theta[2];\n real delta = theta[3];\n real b = theta[4];\n\n if (x < delta + 1) {\n p = shift_lognormal_pdf(xc, mu, sigma, delta);\n } else {\n p = shift_lognormal_pdf(x, mu, sigma, delta);\n }\n\n numerator = x * p;\n\n return numerator;\n}\nWhy does this work? When our values of x are less than delta + 1 (so, when they’re near delta, given that our lower bound of integration is equal to \\(\\delta\\)), we pass xc as an argument to our shift_lognormal_pdf function. This way, instead of dealing with x - delta in shift_lognormal_pdf, we are working with xc - delta which is equal to delta - x - delta, as delta is the lower endpoint in that case. The delta terms cancel out, and we are left with a high-precision version of x. We don’t encounter the same problem at the upper limit \\(b\\) so we don’t adjust the code for that case.\nNote, xc is only used for definite integrals. If either the left endpoint is at negative infinity or the right endpoint is at positive infinity, xc will be NaN.\nFor zero-crossing definite integrals (see section Zero Crossing) the integrals are broken into two pieces (\\((a, 0)\\) and \\((0, b)\\) for endpoints \\(a < 0\\) and \\(b > 0\\)) and xc is a high precision version of the distance to the limits of each of the two integrals separately. This means xc will be a high precision version of a - x, x, or b - x, depending on the value of x and the endpoints.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Computing One Dimensional Integrals" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html", + "href": "stan-users-guide/multiple-imputation.html", + "title": "Multiple imputation", + "section": "", + "text": "Missing data is common in applied data analysis. Ignoring this missingness can distort posterior inferences and reduce their precision, and modeling it can improve inference quality. There are several ways to model missing data. Chapter 18 in Gelman et al. (2013) offers an approximately Bayesian perspective, and the chapter Missing Data and Partially Known Parameters in the Reference Manual shows an approach that handles missing data as parameters.\nAnother way to model missing data is through multiple imputation, which replaces missing values with sampled values to obtain different versions of a complete data set. Then, the model of interest can be fitted to each of these complete versions separately. Combining the resulting draws from these different fits produces a sample that accounts for the uncertainty in the missing values.\n\n\nSuppose that we have a data set \\(x\\) with columns \\(x_{\\cdot, 1}, \\ldots, x_{\\cdot, K}\\) that make up covariates in a regression with quantities of interest \\(\\theta\\). With the completely observed data, \\(x^\\text{comp}\\), we could estimate the posterior distribution of \\(\\theta\\) as \\(p(\\theta \\mid x^\\text{comp})\\). But with missing data, our matrix is split into \\(x^{\\text{obs}}\\) (the observed values of \\(x\\)) and \\(x^{\\text{mis}}\\) (the missing values of \\(x\\)).\nFortunately, we can treat \\(x^{\\text{mis}}\\) as additional, nuisance parameters that we can estimate along with \\(\\theta\\) as \\(p(\\theta, x^{\\text{mis}} \\mid x^{\\text{obs}})\\). So, we can express the marginal distribution of \\(\\theta\\) given only the observed values \\(x^{\\text{obs}}\\) as \\[\\begin{align*}\np(\\theta \\mid x^{\\text{obs}}) =&\n\\int\n p(\\theta, x^{\\text{mis}} \\mid x^{\\text{obs}})\n \\mathrm{d}x^{\\text{mis}} \\\\\n=&\n\\int\n p(\\theta \\mid x^{\\text{obs}}, x^{\\text{mis}})\n p(x^{\\text{mis}} \\mid x^{\\text{obs}})\n \\mathrm{d}x^{\\text{mis}} \\\\\n=& \\int\n p(\\theta \\mid x^{\\text{imp}})\n p(x^{\\text{mis}} \\mid x^{\\text{obs}})\n \\mathrm{d}x^{\\text{mis}},\n\\end{align*}\\] where \\(x^{\\text{imp}}\\) is a data set that includes imputed values of \\(x^{\\text{mis}}\\).\nThe equations above show that we do not need to describe \\(p(\\theta \\mid x^{\\text{obs}})\\) directly. Instead, we can first find a way to sample from \\(x^{\\text{mis}}\\) based on \\(x^{\\text{obs}}\\), and then use these samples to fit the model \\(p(\\theta \\mid x^\\text{imp})\\), treating \\(x^\\text{imp}\\) as if it was \\(x^\\text{comp}\\).\nNote that the model for \\(p(x^{\\text{mis}} \\mid x^{\\text{obs}})\\) needs to be explicit. In a typical regression setting with covariates \\(x\\) and outcome \\(y\\), this model for \\(x\\) is not needed because inferences for the regression parameters are independent of the model of \\(x\\) when \\(x\\) is fully observed. But with missing data and multiple imputation, we need to generate values for \\(x^{\\text{mis}}\\) based on \\(x^{\\text{obs}}\\), which in turn needs an explicit model for \\(x^{\\text{mis}}\\).\nThus, the general outline for multiple imputation with a number \\(M\\) of imputations is:\n\nDraw \\(M\\) random samples \\(x^{\\text{mis}}_1, \\ldots,\nx^{\\text{mis}}_M\\) from the posterior predictive distribution \\(p(x^{\\text{mis}} \\mid x^{\\text{obs}})\\).\nUse these samples to get \\(M\\) different complete data sets \\(x_1^\\text{imp}, \\ldots, x_M^\\text{imp}\\).\nFor each of the \\(M\\) imputed data sets, sample from the model of interest \\(p(\\theta \\mid x_m^\\text{imp})\\), each time treating \\(x_m^\\text{imp}\\) as if it was \\(x^\\text{comp}\\).\nCombine the draws for \\(\\theta\\) from all \\(M\\) fits.\n\n\n\nFollowing the notation above, we can express the outline with the pseudocode below:\ntheta_estimates = []\nfor (m in 1:M) {\n x_mis <- get_imputations(x_obs)\n x_imp <- add_imputations(x_obs, x_mis)\n imputation_fits <- my_model.fit(x_imp).draws()\n theta_estimates <- theta_estimates.append(imputation_fits)\n}\nEach of the functions in this pseudocode can encapsulate simple or complex procedures. get_imputations(), for example, may impute missing data by taking values directly from x_obs; or it may model a regression where an \\(x_{\\cdot, k}\\) with missing values is the outcome and the \\(x_{\\cdot, j}\\) without missing observations are predictors. Below we illustrate how to implement this second approach in Stan.\n\n\n\n\nImagine our data set is composed of two numerical, continuous variables \\(x\\) and \\(y\\), and one binary variable \\(z\\). We want to estimate the parameters that govern the conditional association \\(p(y \\mid x, z)\\), but several values of \\(x\\) are missing. So, before we fit this model for \\(y\\), we impute the missing values in \\(x\\).\nWe first find all the fully observed data points and identify their values as \\(x^{\\text{obs}}\\), \\(y^{\\text{obs}}\\), and \\(z^{\\text{obs}}\\). Then we use these data to fit the model \\[\nx^{\\text{obs}} \\sim \\text{normal}(\\gamma_0 + \\gamma_1 y^{\\text{obs}} +\n \\gamma_2 z^{\\text{obs}}, \\lambda).\n\\]\nThis model can then give us samples for the missing values \\(x^{\\text{mis}}\\) conditional on the corresponding values of \\(y\\) and \\(z\\), which we call \\(y^{\\text{aux}}\\) and \\(z^{\\text{aux}}\\). The Stan code for this imputation is:\ndata {\n int<lower=0> N_obs;\n vector[N_obs] x_obs;\n vector[N_obs] y_obs;\n array[N_obs] int<lower=0, upper=1> z_obs; \n int<lower=0> N_mis; \n vector[N_mis] y_aux; \n array[N_mis] int<lower=0, upper=1> z_aux; \n}\nparameters {\n vector[3] gamma;\n real<lower=0> lambda;\n}\nmodel {\n gamma ~ normal(0, 1);\n lambda ~ exponential(1);\n x_obs ~ normal(gamma[1] + gamma[2] * y_obs + gamma[3] * z_obs,\n lambda);\n}\ngenerated quantities {\n array[N_mis] x_imp = normal_rng(gamma[1] + gamma[2] * y_aux[n]\n + gamma[3] * z_aux[n], lambda);\n}\nThe generated quantities block automatically samples \\(\\gamma_0\\), \\(\\gamma_1\\), \\(\\gamma_2\\), and \\(\\lambda\\) from their posterior distributions. So, the random draws of x_imp incorporate uncertainty from the estimated parameters and from the sampling variation in \\(x^{\\text{mis}}\\).\nMultiple posterior draws of \\(x^{\\text{mis}}\\) give us multiple imputed data sets. With these data sets we can model \\(p(y \\mid \\beta, \\sigma, x, z)\\) as \\[\ny \\sim \\text{normal}(\\beta_0 + \\beta_1 x^\\text{comp} + \\beta_2 z,\n \\sigma),\n\\] where \\(x^\\text{comp}\\) contains observed and imputed values. The Stan code for this model is:\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n vector[N] z;\n}\nparameters {\n vector[3] beta;\n real<lower=0> sigma;\n}\nmodel {\n beta ~ normal(0, 1);\n sigma ~ exponential(1);\n y ~ normal(beta[1] + beta[2] * x + beta[3] * z, sigma);\n}\nWith multiple imputation, the model for \\(p(y \\mid \\beta, \\sigma, x, z)\\) does not need to distinguish between observed and imputed values of \\(x\\). Instead, this model can treat all imputed data sets as complete because we can combine all the posterior draws from multiple fits.\n\n\n\nA more general scenario involves an outcome variable \\(y\\) and several explanatory variables \\(x_1, \\ldots, x_K\\), each of which can have missing values that we want to impute.\nOne solution is to do multiple imputation through chained equations, a procedure often called “MICE”1. The MICE procedure in this scenario is:\n\nInitialize missing values in all \\(x_i\\). For each variable \\(x_i\\) with missing entries, fill its missing values with random samples from its observed values (or use another simple initialization rule).\nUpdate each \\(x_i\\) given \\(y\\) and the other \\(x\\)’s. For \\(i=1, \\ldots, K\\), fit a model for the observed \\(x_i\\) conditional on the current versions (with observed and imputed values) of \\(y\\) and \\(x_{-k} = x_1, \\ldots x_{k-1}, x_{k+1}, \\ldots,\nx_{K}\\). Use this model to draw impuations from the predictive distribution of the missing \\(x_i\\). Completing this step for all \\(i = 1, \\ldots, K\\) constitutes a single imputation cycle.\nWarmup period. Repeat the imputation cycle in step 1 several times as warmup to let the imputations stabilize.\nCreate M imputed datasets. After the warmup, record the current complete dataset as one imputed dataset. Then either restart from step 0 and repeat steps 1–3 until you have \\(M\\) imputed datasets; or do a single long run, i.e., continue iterating steps 1–2 and save the imputed dataset at \\(M\\) well-spaced iterations (e.g., every \\(S\\) cycles) to obtain \\(M\\) imputed datasets, without restarting from step 0 each time.\nFit the target model \\(p(y \\mid \\beta, \\sigma, x_1, \\ldots,\nx_K)\\) separately to each of the \\(M\\) imputed datasets and save the posterior draws for the parameters of interest.\nCombine the draws (or other summaries) from all \\(M\\) fits.\n\nNote that, as described here, the MICE procedure does not guarantee that the conditional distributions of all variables will be compatible. Compatibility means that there is a joint distribution for all the variables used in the imputation that can be decomposed as the conditional distributions we used. An incompatible imputation model will technically not sample from any well-defined probability distribution. But the consequences of this are not always serious. See section 6.4 in Carpenter et al. (2023) for a more detailed explanation of compatibility and of the related concept of congeniality.\n\n\nImagine that we again want to use numerical variables \\(x\\) and \\(y\\), and dichotomic variable \\(z\\) to estimate the parameters in \\(p(y \\mid \\beta, \\sigma, x, z)\\). But now both \\(x\\) and \\(z\\) have missing values that we want to impute.\nTo apply the MICE procedure in this example, we can reuse the models for \\(x\\) and \\(y\\) that we defined above. We also need a new model to impute \\(z^{\\text{mis}}\\), so we use the logistic regression \\[\nz^{\\text{obs}} \\sim \\text{Bernoulli}(\n \\text{logit}^{-1}(\n \\alpha_0 + \\alpha_1 y^{\\text{obs}} + \\alpha_2 x^{\\text{obs}})\n ).\n\\] Here, \\(z^{\\text{obs}}\\) contains only the completely observed values from our original data set, while \\(x^{\\text{obs}}\\) and \\(y^{\\text{obs}}\\) contain all the values that correspond to \\(z^{\\text{obs}}\\). Thus, \\(x^{\\text{obs}}\\) can include observed and imputed values, and \\(y^{\\text{obs}}\\) need not be the same as in the model for \\(x^{\\text{obs}}\\).\nWith this model we can sample values for \\(z^{\\text{mis}}\\) conditional on its corresponding values \\(y^{\\text{aux}}\\) and \\(x^{\\text{aux}}\\).\nThe Stan code to impute \\(z^{\\text{mis}}\\) is:\ndata {\n int<lower=0> N_obs;\n array[N_obs] int<lower=0, upper=1> z_obs;\n vector[N_obs] y_obs;\n vector[N_obs] x_obs;\n int<lower=0> N_mis;\n vector[N_mis] y_aux;\n vector[N_mis] x_aux;\n}\nparameters {\n vector[3] gamma;\n}\nmodel {\n gamma ~ normal(0, 1);\n z_obs ~ bernoulli_logit(gamma[1] + gamma[2] * y_obs \n + gamma[3] * x_obs);\n}\ngenerated quantities {\n array[N_mis] int z_imp = bernoulli_logit_rng(gamma[1]\n + gamma[2] * y_aux\n + gamma[3] * x_aux);\n}\nThe pseudocode for this example of MICE, restarting the imputations after each imputation cycle, is shown below. Note that we do not need to initialize \\(x^{\\text{mis}}\\) in step 0 because we can impute it directly with the model for \\(x^{\\text{obs}}\\).\ncompleted_datasets <- []\nfor (m in 1:M) {\n data_m <- copy(data_orig)\n data_m.z[missing_idz] <- random_sample(data_m.z[observed_idz])\n for (cycle in 1:n_warm) {\n stanmod_x <- build_model(\"model_for_x.stan\")\n fit_x <- stanmod_x.fit(\n obs_data=data_m[observed_idx],\n mis_data=data_m[missing_idx]\n )\n data_m.x[missing_idx] <- get_imputations(fit_x, \"x_imp\")\n stanmod_z <- build_model(\"model_for_z.stan\")\n fit_z <- stanmod_z.fit(\n obs_data=data_m[observed_idz],\n mis_data=data_m[missing_idz]\n )\n data_m.z[missing_idz] <- get_imputations(fit_z, \"z_imp\")\n }\n completed_datasets.append(data_m)\n}\nall_draws <- []\nfor (dataset in completed_datasets) {\n stanmod_y <- build_model(\"model_for_y.stan\")\n fit_y <- stanmod_y.fit(dataset)\n all_draws.append(extract_draws(model_results))\n}\nall_results <- combine_results(all_draws)\n\n\n\n\nWith Stan’s MCMC sampler, we can treat posterior draw chains from imputed data sets as if they were chains based on complete data. There is one important difference. Multiple imputation expresses uncertainty in the missing values as consistent differences in the estimates obtained from different imputated data sets. This means that chains obtained from the same imputed data set should converge, but chains obtained from different data sets do not have to. So, we need not worry if diagnostics2 signal that the chains from different imputed data sets are not converging properly. See an example in Bürkner (2025).\n\n\n\nA full Bayesian probability model includes a feedback flow of information between all parameters and all data. Cut models separate some parts of this feedback flow so that different subsets of data influence only some parameters in the model (see Plummer (2015)).\nMultiple imputation interrupts the flow of information from data to parameters. In our regression above, for example, the imputations influence the distribution of the parameters in \\(p(y \\mid x_1, \\ldots, x_K)\\), but these parameters do not influence the imputations. So, we could use multiple imputation to implement a cut model.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html#motivation-and-outline", + "href": "stan-users-guide/multiple-imputation.html#motivation-and-outline", + "title": "Multiple imputation", + "section": "", + "text": "Suppose that we have a data set \\(x\\) with columns \\(x_{\\cdot, 1}, \\ldots, x_{\\cdot, K}\\) that make up covariates in a regression with quantities of interest \\(\\theta\\). With the completely observed data, \\(x^\\text{comp}\\), we could estimate the posterior distribution of \\(\\theta\\) as \\(p(\\theta \\mid x^\\text{comp})\\). But with missing data, our matrix is split into \\(x^{\\text{obs}}\\) (the observed values of \\(x\\)) and \\(x^{\\text{mis}}\\) (the missing values of \\(x\\)).\nFortunately, we can treat \\(x^{\\text{mis}}\\) as additional, nuisance parameters that we can estimate along with \\(\\theta\\) as \\(p(\\theta, x^{\\text{mis}} \\mid x^{\\text{obs}})\\). So, we can express the marginal distribution of \\(\\theta\\) given only the observed values \\(x^{\\text{obs}}\\) as \\[\\begin{align*}\np(\\theta \\mid x^{\\text{obs}}) =&\n\\int\n p(\\theta, x^{\\text{mis}} \\mid x^{\\text{obs}})\n \\mathrm{d}x^{\\text{mis}} \\\\\n=&\n\\int\n p(\\theta \\mid x^{\\text{obs}}, x^{\\text{mis}})\n p(x^{\\text{mis}} \\mid x^{\\text{obs}})\n \\mathrm{d}x^{\\text{mis}} \\\\\n=& \\int\n p(\\theta \\mid x^{\\text{imp}})\n p(x^{\\text{mis}} \\mid x^{\\text{obs}})\n \\mathrm{d}x^{\\text{mis}},\n\\end{align*}\\] where \\(x^{\\text{imp}}\\) is a data set that includes imputed values of \\(x^{\\text{mis}}\\).\nThe equations above show that we do not need to describe \\(p(\\theta \\mid x^{\\text{obs}})\\) directly. Instead, we can first find a way to sample from \\(x^{\\text{mis}}\\) based on \\(x^{\\text{obs}}\\), and then use these samples to fit the model \\(p(\\theta \\mid x^\\text{imp})\\), treating \\(x^\\text{imp}\\) as if it was \\(x^\\text{comp}\\).\nNote that the model for \\(p(x^{\\text{mis}} \\mid x^{\\text{obs}})\\) needs to be explicit. In a typical regression setting with covariates \\(x\\) and outcome \\(y\\), this model for \\(x\\) is not needed because inferences for the regression parameters are independent of the model of \\(x\\) when \\(x\\) is fully observed. But with missing data and multiple imputation, we need to generate values for \\(x^{\\text{mis}}\\) based on \\(x^{\\text{obs}}\\), which in turn needs an explicit model for \\(x^{\\text{mis}}\\).\nThus, the general outline for multiple imputation with a number \\(M\\) of imputations is:\n\nDraw \\(M\\) random samples \\(x^{\\text{mis}}_1, \\ldots,\nx^{\\text{mis}}_M\\) from the posterior predictive distribution \\(p(x^{\\text{mis}} \\mid x^{\\text{obs}})\\).\nUse these samples to get \\(M\\) different complete data sets \\(x_1^\\text{imp}, \\ldots, x_M^\\text{imp}\\).\nFor each of the \\(M\\) imputed data sets, sample from the model of interest \\(p(\\theta \\mid x_m^\\text{imp})\\), each time treating \\(x_m^\\text{imp}\\) as if it was \\(x^\\text{comp}\\).\nCombine the draws for \\(\\theta\\) from all \\(M\\) fits.\n\n\n\nFollowing the notation above, we can express the outline with the pseudocode below:\ntheta_estimates = []\nfor (m in 1:M) {\n x_mis <- get_imputations(x_obs)\n x_imp <- add_imputations(x_obs, x_mis)\n imputation_fits <- my_model.fit(x_imp).draws()\n theta_estimates <- theta_estimates.append(imputation_fits)\n}\nEach of the functions in this pseudocode can encapsulate simple or complex procedures. get_imputations(), for example, may impute missing data by taking values directly from x_obs; or it may model a regression where an \\(x_{\\cdot, k}\\) with missing values is the outcome and the \\(x_{\\cdot, j}\\) without missing observations are predictors. Below we illustrate how to implement this second approach in Stan.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html#imputing-one-variable-in-stan", + "href": "stan-users-guide/multiple-imputation.html#imputing-one-variable-in-stan", + "title": "Multiple imputation", + "section": "", + "text": "Imagine our data set is composed of two numerical, continuous variables \\(x\\) and \\(y\\), and one binary variable \\(z\\). We want to estimate the parameters that govern the conditional association \\(p(y \\mid x, z)\\), but several values of \\(x\\) are missing. So, before we fit this model for \\(y\\), we impute the missing values in \\(x\\).\nWe first find all the fully observed data points and identify their values as \\(x^{\\text{obs}}\\), \\(y^{\\text{obs}}\\), and \\(z^{\\text{obs}}\\). Then we use these data to fit the model \\[\nx^{\\text{obs}} \\sim \\text{normal}(\\gamma_0 + \\gamma_1 y^{\\text{obs}} +\n \\gamma_2 z^{\\text{obs}}, \\lambda).\n\\]\nThis model can then give us samples for the missing values \\(x^{\\text{mis}}\\) conditional on the corresponding values of \\(y\\) and \\(z\\), which we call \\(y^{\\text{aux}}\\) and \\(z^{\\text{aux}}\\). The Stan code for this imputation is:\ndata {\n int<lower=0> N_obs;\n vector[N_obs] x_obs;\n vector[N_obs] y_obs;\n array[N_obs] int<lower=0, upper=1> z_obs; \n int<lower=0> N_mis; \n vector[N_mis] y_aux; \n array[N_mis] int<lower=0, upper=1> z_aux; \n}\nparameters {\n vector[3] gamma;\n real<lower=0> lambda;\n}\nmodel {\n gamma ~ normal(0, 1);\n lambda ~ exponential(1);\n x_obs ~ normal(gamma[1] + gamma[2] * y_obs + gamma[3] * z_obs,\n lambda);\n}\ngenerated quantities {\n array[N_mis] x_imp = normal_rng(gamma[1] + gamma[2] * y_aux[n]\n + gamma[3] * z_aux[n], lambda);\n}\nThe generated quantities block automatically samples \\(\\gamma_0\\), \\(\\gamma_1\\), \\(\\gamma_2\\), and \\(\\lambda\\) from their posterior distributions. So, the random draws of x_imp incorporate uncertainty from the estimated parameters and from the sampling variation in \\(x^{\\text{mis}}\\).\nMultiple posterior draws of \\(x^{\\text{mis}}\\) give us multiple imputed data sets. With these data sets we can model \\(p(y \\mid \\beta, \\sigma, x, z)\\) as \\[\ny \\sim \\text{normal}(\\beta_0 + \\beta_1 x^\\text{comp} + \\beta_2 z,\n \\sigma),\n\\] where \\(x^\\text{comp}\\) contains observed and imputed values. The Stan code for this model is:\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n vector[N] z;\n}\nparameters {\n vector[3] beta;\n real<lower=0> sigma;\n}\nmodel {\n beta ~ normal(0, 1);\n sigma ~ exponential(1);\n y ~ normal(beta[1] + beta[2] * x + beta[3] * z, sigma);\n}\nWith multiple imputation, the model for \\(p(y \\mid \\beta, \\sigma, x, z)\\) does not need to distinguish between observed and imputed values of \\(x\\). Instead, this model can treat all imputed data sets as complete because we can combine all the posterior draws from multiple fits.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html#imputing-two-or-more-variables", + "href": "stan-users-guide/multiple-imputation.html#imputing-two-or-more-variables", + "title": "Multiple imputation", + "section": "", + "text": "A more general scenario involves an outcome variable \\(y\\) and several explanatory variables \\(x_1, \\ldots, x_K\\), each of which can have missing values that we want to impute.\nOne solution is to do multiple imputation through chained equations, a procedure often called “MICE”1. The MICE procedure in this scenario is:\n\nInitialize missing values in all \\(x_i\\). For each variable \\(x_i\\) with missing entries, fill its missing values with random samples from its observed values (or use another simple initialization rule).\nUpdate each \\(x_i\\) given \\(y\\) and the other \\(x\\)’s. For \\(i=1, \\ldots, K\\), fit a model for the observed \\(x_i\\) conditional on the current versions (with observed and imputed values) of \\(y\\) and \\(x_{-k} = x_1, \\ldots x_{k-1}, x_{k+1}, \\ldots,\nx_{K}\\). Use this model to draw impuations from the predictive distribution of the missing \\(x_i\\). Completing this step for all \\(i = 1, \\ldots, K\\) constitutes a single imputation cycle.\nWarmup period. Repeat the imputation cycle in step 1 several times as warmup to let the imputations stabilize.\nCreate M imputed datasets. After the warmup, record the current complete dataset as one imputed dataset. Then either restart from step 0 and repeat steps 1–3 until you have \\(M\\) imputed datasets; or do a single long run, i.e., continue iterating steps 1–2 and save the imputed dataset at \\(M\\) well-spaced iterations (e.g., every \\(S\\) cycles) to obtain \\(M\\) imputed datasets, without restarting from step 0 each time.\nFit the target model \\(p(y \\mid \\beta, \\sigma, x_1, \\ldots,\nx_K)\\) separately to each of the \\(M\\) imputed datasets and save the posterior draws for the parameters of interest.\nCombine the draws (or other summaries) from all \\(M\\) fits.\n\nNote that, as described here, the MICE procedure does not guarantee that the conditional distributions of all variables will be compatible. Compatibility means that there is a joint distribution for all the variables used in the imputation that can be decomposed as the conditional distributions we used. An incompatible imputation model will technically not sample from any well-defined probability distribution. But the consequences of this are not always serious. See section 6.4 in Carpenter et al. (2023) for a more detailed explanation of compatibility and of the related concept of congeniality.\n\n\nImagine that we again want to use numerical variables \\(x\\) and \\(y\\), and dichotomic variable \\(z\\) to estimate the parameters in \\(p(y \\mid \\beta, \\sigma, x, z)\\). But now both \\(x\\) and \\(z\\) have missing values that we want to impute.\nTo apply the MICE procedure in this example, we can reuse the models for \\(x\\) and \\(y\\) that we defined above. We also need a new model to impute \\(z^{\\text{mis}}\\), so we use the logistic regression \\[\nz^{\\text{obs}} \\sim \\text{Bernoulli}(\n \\text{logit}^{-1}(\n \\alpha_0 + \\alpha_1 y^{\\text{obs}} + \\alpha_2 x^{\\text{obs}})\n ).\n\\] Here, \\(z^{\\text{obs}}\\) contains only the completely observed values from our original data set, while \\(x^{\\text{obs}}\\) and \\(y^{\\text{obs}}\\) contain all the values that correspond to \\(z^{\\text{obs}}\\). Thus, \\(x^{\\text{obs}}\\) can include observed and imputed values, and \\(y^{\\text{obs}}\\) need not be the same as in the model for \\(x^{\\text{obs}}\\).\nWith this model we can sample values for \\(z^{\\text{mis}}\\) conditional on its corresponding values \\(y^{\\text{aux}}\\) and \\(x^{\\text{aux}}\\).\nThe Stan code to impute \\(z^{\\text{mis}}\\) is:\ndata {\n int<lower=0> N_obs;\n array[N_obs] int<lower=0, upper=1> z_obs;\n vector[N_obs] y_obs;\n vector[N_obs] x_obs;\n int<lower=0> N_mis;\n vector[N_mis] y_aux;\n vector[N_mis] x_aux;\n}\nparameters {\n vector[3] gamma;\n}\nmodel {\n gamma ~ normal(0, 1);\n z_obs ~ bernoulli_logit(gamma[1] + gamma[2] * y_obs \n + gamma[3] * x_obs);\n}\ngenerated quantities {\n array[N_mis] int z_imp = bernoulli_logit_rng(gamma[1]\n + gamma[2] * y_aux\n + gamma[3] * x_aux);\n}\nThe pseudocode for this example of MICE, restarting the imputations after each imputation cycle, is shown below. Note that we do not need to initialize \\(x^{\\text{mis}}\\) in step 0 because we can impute it directly with the model for \\(x^{\\text{obs}}\\).\ncompleted_datasets <- []\nfor (m in 1:M) {\n data_m <- copy(data_orig)\n data_m.z[missing_idz] <- random_sample(data_m.z[observed_idz])\n for (cycle in 1:n_warm) {\n stanmod_x <- build_model(\"model_for_x.stan\")\n fit_x <- stanmod_x.fit(\n obs_data=data_m[observed_idx],\n mis_data=data_m[missing_idx]\n )\n data_m.x[missing_idx] <- get_imputations(fit_x, \"x_imp\")\n stanmod_z <- build_model(\"model_for_z.stan\")\n fit_z <- stanmod_z.fit(\n obs_data=data_m[observed_idz],\n mis_data=data_m[missing_idz]\n )\n data_m.z[missing_idz] <- get_imputations(fit_z, \"z_imp\")\n }\n completed_datasets.append(data_m)\n}\nall_draws <- []\nfor (dataset in completed_datasets) {\n stanmod_y <- build_model(\"model_for_y.stan\")\n fit_y <- stanmod_y.fit(dataset)\n all_draws.append(extract_draws(model_results))\n}\nall_results <- combine_results(all_draws)", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html#combining-posterior-draws", + "href": "stan-users-guide/multiple-imputation.html#combining-posterior-draws", + "title": "Multiple imputation", + "section": "", + "text": "With Stan’s MCMC sampler, we can treat posterior draw chains from imputed data sets as if they were chains based on complete data. There is one important difference. Multiple imputation expresses uncertainty in the missing values as consistent differences in the estimates obtained from different imputated data sets. This means that chains obtained from the same imputed data set should converge, but chains obtained from different data sets do not have to. So, we need not worry if diagnostics2 signal that the chains from different imputed data sets are not converging properly. See an example in Bürkner (2025).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html#cut-models", + "href": "stan-users-guide/multiple-imputation.html#cut-models", + "title": "Multiple imputation", + "section": "", + "text": "A full Bayesian probability model includes a feedback flow of information between all parameters and all data. Cut models separate some parts of this feedback flow so that different subsets of data influence only some parameters in the model (see Plummer (2015)).\nMultiple imputation interrupts the flow of information from data to parameters. In our regression above, for example, the imputations influence the distribution of the parameters in \\(p(y \\mid x_1, \\ldots, x_K)\\), but these parameters do not influence the imputations. So, we could use multiple imputation to implement a cut model.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/multiple-imputation.html#footnotes", + "href": "stan-users-guide/multiple-imputation.html#footnotes", + "title": "Multiple imputation", + "section": "Footnotes", + "text": "Footnotes\n\n\nSection 5 of chapter 4 in van Buuren (2018) details the MICE procedure in a frequentist context.↩︎\nSuch as \\(\\hat{R}\\). See Split R-hat for detecting non-stationarity in the Reference Manual.↩︎", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Multiple imputation" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html", + "href": "stan-users-guide/missing-data.html", + "title": "Missing Data and Partially Known Parameters", + "section": "", + "text": "Bayesian inference supports a general approach to missing data in which any missing data item is represented as a parameter that is estimated in the posterior (Gelman et al. 2013). If the missing data are not explicitly modeled, as in the predictors for most regression models, then the result is an improper prior on the parameter representing the missing predictor.\nMixing arrays of observed and missing data can be difficult to include in Stan, partly because it can be tricky to model discrete unknowns in Stan and partly because unlike some other statistical languages (for example, R and Bugs), Stan requires observed and unknown quantities to be defined in separate places in the model. Thus it can be necessary to include code in a Stan program to splice together observed and missing parts of a data structure. Examples are provided later in the chapter.\n\n\nStan treats variables declared in the data and transformed data blocks as known and the variables in the parameters block as unknown.\nAn example involving missing normal observations could be coded as follows.1\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_mis;\n array[N_obs] real y_obs;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n array[N_mis] real y_mis;\n}\nmodel {\n y_obs ~ normal(mu, sigma);\n y_mis ~ normal(mu, sigma);\n}\nThe number of observed and missing data points are coded as data with non-negative integer variables N_obs and N_mis. The observed data are provided as an array data variable y_obs. The missing data are coded as an array parameter, y_mis. The ordinary parameters being estimated, the location mu and scale sigma, are also coded as parameters. The model is vectorized on the observed and missing data; combining them in this case would be less efficient because the data observations would be promoted and have needless derivatives calculated.\n\n\n\nIn some situations, such as when a multivariate probability function has partially observed outcomes or parameters, it will be necessary to create a vector mixing known (data) and unknown (parameter) values. This can be done in Stan by creating a vector or array in the transformed parameters block and assigning to it.\nThe following example involves a bivariate covariance matrix in which the variances are known, but the covariance is not.\ndata {\n int<lower=0> N;\n array[N] vector[2] y;\n real<lower=0> var1;\n real<lower=0> var2;\n}\ntransformed data {\n real<lower=0> max_cov = sqrt(var1 * var2);\n real<upper=0> min_cov = -max_cov;\n}\nparameters {\n vector[2] mu;\n real<lower=min_cov, upper=max_cov> cov;\n}\ntransformed parameters {\n matrix[2, 2] Sigma;\n Sigma[1, 1] = var1;\n Sigma[1, 2] = cov;\n Sigma[2, 1] = cov;\n Sigma[2, 2] = var2;\n}\nmodel {\n y ~ multi_normal(mu, Sigma);\n}\nThe variances are defined as data in variables var1 and var2, whereas the covariance is defined as a parameter in variable cov. The \\(2 \\times 2\\) covariance matrix Sigma is defined as a transformed parameter, with the variances assigned to the two diagonal elements and the covariance to the two off-diagonal elements.\nThe constraint on the covariance declaration ensures that the resulting covariance matrix sigma is positive definite. The bound, plus or minus the square root of the product of the variances, is defined as transformed data so that it is only calculated once.\nThe vectorization of the multivariate normal is critical for efficiency here. The transformed parameter Sigma could be defined as a local variable within the model block if it does not need to be included in the sampler’s output.\n\n\n\nIf the missing data are part of some larger data structure, then it can often be effectively reassembled using index arrays and slicing. Here’s an example for time-series data, where only some entries in the series are observed.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_mis;\n array[N_obs] int<lower=1, upper=N_obs + N_mis> ii_obs;\n array[N_mis] int<lower=1, upper=N_obs + N_mis> ii_mis;\n array[N_obs] real y_obs;\n}\ntransformed data {\n int<lower=0> N = N_obs + N_mis;\n}\nparameters {\n array[N_mis] real y_mis;\n real<lower=0> sigma;\n}\ntransformed parameters {\n array[N] real y;\n y[ii_obs] = y_obs;\n y[ii_mis] = y_mis;\n}\nmodel {\n sigma ~ gamma(1, 1);\n y[1] ~ normal(0, 100);\n y[2:N] ~ normal(y[1:(N - 1)], sigma);\n}\nThe index arrays ii_obs and ii_mis contain the indexes into the final array y of the observed data (coded as a data vector y_obs) and the missing data (coded as a parameter vector y_mis). See the time series chapter for further discussion of time-series model and specifically the autoregression section for an explanation of the vectorization for y as well as an explanation of how to convert this example to a full AR(1) model. To ensure y[1] has a proper posterior in case it is missing, we have given it an explicit, albeit broad, prior.\nAnother potential application would be filling the columns of a data matrix of predictors for which some predictors are missing; matrix columns can be accessed as vectors and assigned the same way, as in\nx[N_obs_2, 2] = x_obs_2;\nx[N_mis_2, 2] = x_mis_2;\nwhere the relevant variables are all hard coded with index 2 because Stan doesn’t support ragged arrays. These could all be packed into a single array with more fiddly indexing that slices out vectors from longer vectors (see the ragged data structures section for a general discussion of coding ragged data structures in Stan).\n\n\n\nRick Farouni, on the Stan users group, inquired as to how to build a Cholesky factor for a covariance matrix with a unit diagonal, as used in Bayesian factor analysis (Aguilar and West 2000). This can be accomplished by declaring the below-diagonal elements as parameters, then filling the full matrix as a transformed parameter.\ndata {\n int<lower=2> K;\n}\ntransformed data {\n int<lower=1> K_choose_2;\n K_choose_2 = (K * (K - 1)) / 2;\n}\nparameters {\n vector[K_choose_2] L_lower;\n}\ntransformed parameters {\n cholesky_factor_cov[K] L;\n for (k in 1:K) {\n L[k, k] = 1;\n }\n {\n int i;\n for (m in 2:K) {\n for (n in 1:(m - 1)) {\n L[m, n] = L_lower[i];\n L[n, m] = 0;\n i += 1;\n }\n }\n }\n}\nIt is most convenient to place a prior directly on L_lower. An alternative would be a prior for the full Cholesky factor L, because the transform from L_lower to L is just the identity and thus does not require a Jacobian adjustment (despite the warning from the parser, which is not smart enough to do the code analysis to infer that the transform is linear). It would not be at all convenient to place a prior on the full covariance matrix L * L', because that would require a Jacobian adjustment; the exact adjustment is detailed in the reference manual.\n\n\n\nIt’s often the case that one or more components of a multivariate outcome are missing.2\nAs an example, we’ll consider the bivariate distribution, which is easily marginalized. The coding here is brute force, representing both an array of vector observations y and a boolean array y_observed to indicate which values were observed (others can have dummy values in the input).\narray[N] vector[2] y;\narray[N, 2] int<lower=0, upper=1> y_observed;\nIf both components are observed, we model them using the full multi-normal, otherwise we model the marginal distribution of the component that is observed.\nfor (n in 1:N) {\n if (y_observed[n, 1] && y_observed[n, 2]) {\n y[n] ~ multi_normal(mu, Sigma);\n } else if (y_observed[n, 1]) {\n y[n, 1] ~ normal(mu[1], sqrt(Sigma[1, 1]));\n } else if (y_observed[n, 2]) {\n y[n, 2] ~ normal(mu[2], sqrt(Sigma[2, 2]));\n }\n}\nIt’s a bit more work, but much more efficient to vectorize these distribution statements. In transformed data, build up three vectors of indices, for the three cases above:\ntransformed data {\n array[observed_12(y_observed)] int ns12;\n array[observed_1(y_observed)] int ns1;\n array[observed_2(y_observed)] int ns2;\n}\nYou will need to write functions that pull out the count of observations in each of the three situations. This must be done with functions because the result needs to go in top-level block variable size declaration. Then the rest of transformed data just fills in the values using three counters.\nint n12 = 1;\nint n1 = 1;\nint n2 = 1;\nfor (n in 1:N) {\n if (y_observed[n, 1] && y_observed[n, 2]) {\n ns12[n12] = n;\n n12 += 1;\n } else if (y_observed[n, 1]) {\n ns1[n1] = n;\n n1 += 1;\n } else if (y_observed[n, 2]) {\n ns2[n2] = n;\n n2 += 1;\n }\n}\nThen, in the model block, everything is vectorizable using those indexes constructed once in transformed data:\ny[ns12] ~ multi_normal(mu, Sigma);\ny[ns1] ~ normal(mu[1], sqrt(Sigma[1, 1]));\ny[ns2] ~ normal(mu[2], sqrt(Sigma[2, 2]));\nThe result will be much more efficient than using latent variables for the missing data, but it requires the multivariate distribution to be marginalized analytically. It’d be more efficient still to precompute the three arrays in the transformed data block, though the efficiency improvement will be relatively minor compared to vectorizing the probability functions.\nThis approach can easily be generalized with some index fiddling to the general multivariate case. The trick is to pull out entries in the covariance matrix for the missing components. It can also be used in situations such as multivariate differential equation solutions where only one component is observed, as in a phase-space experiment recording only time and position of a pendulum (and not recording momentum).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html#missing-data", + "href": "stan-users-guide/missing-data.html#missing-data", + "title": "Missing Data and Partially Known Parameters", + "section": "", + "text": "Stan treats variables declared in the data and transformed data blocks as known and the variables in the parameters block as unknown.\nAn example involving missing normal observations could be coded as follows.1\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_mis;\n array[N_obs] real y_obs;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n array[N_mis] real y_mis;\n}\nmodel {\n y_obs ~ normal(mu, sigma);\n y_mis ~ normal(mu, sigma);\n}\nThe number of observed and missing data points are coded as data with non-negative integer variables N_obs and N_mis. The observed data are provided as an array data variable y_obs. The missing data are coded as an array parameter, y_mis. The ordinary parameters being estimated, the location mu and scale sigma, are also coded as parameters. The model is vectorized on the observed and missing data; combining them in this case would be less efficient because the data observations would be promoted and have needless derivatives calculated.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html#partially-known-parameters.section", + "href": "stan-users-guide/missing-data.html#partially-known-parameters.section", + "title": "Missing Data and Partially Known Parameters", + "section": "", + "text": "In some situations, such as when a multivariate probability function has partially observed outcomes or parameters, it will be necessary to create a vector mixing known (data) and unknown (parameter) values. This can be done in Stan by creating a vector or array in the transformed parameters block and assigning to it.\nThe following example involves a bivariate covariance matrix in which the variances are known, but the covariance is not.\ndata {\n int<lower=0> N;\n array[N] vector[2] y;\n real<lower=0> var1;\n real<lower=0> var2;\n}\ntransformed data {\n real<lower=0> max_cov = sqrt(var1 * var2);\n real<upper=0> min_cov = -max_cov;\n}\nparameters {\n vector[2] mu;\n real<lower=min_cov, upper=max_cov> cov;\n}\ntransformed parameters {\n matrix[2, 2] Sigma;\n Sigma[1, 1] = var1;\n Sigma[1, 2] = cov;\n Sigma[2, 1] = cov;\n Sigma[2, 2] = var2;\n}\nmodel {\n y ~ multi_normal(mu, Sigma);\n}\nThe variances are defined as data in variables var1 and var2, whereas the covariance is defined as a parameter in variable cov. The \\(2 \\times 2\\) covariance matrix Sigma is defined as a transformed parameter, with the variances assigned to the two diagonal elements and the covariance to the two off-diagonal elements.\nThe constraint on the covariance declaration ensures that the resulting covariance matrix sigma is positive definite. The bound, plus or minus the square root of the product of the variances, is defined as transformed data so that it is only calculated once.\nThe vectorization of the multivariate normal is critical for efficiency here. The transformed parameter Sigma could be defined as a local variable within the model block if it does not need to be included in the sampler’s output.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html#sliced-missing-data", + "href": "stan-users-guide/missing-data.html#sliced-missing-data", + "title": "Missing Data and Partially Known Parameters", + "section": "", + "text": "If the missing data are part of some larger data structure, then it can often be effectively reassembled using index arrays and slicing. Here’s an example for time-series data, where only some entries in the series are observed.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_mis;\n array[N_obs] int<lower=1, upper=N_obs + N_mis> ii_obs;\n array[N_mis] int<lower=1, upper=N_obs + N_mis> ii_mis;\n array[N_obs] real y_obs;\n}\ntransformed data {\n int<lower=0> N = N_obs + N_mis;\n}\nparameters {\n array[N_mis] real y_mis;\n real<lower=0> sigma;\n}\ntransformed parameters {\n array[N] real y;\n y[ii_obs] = y_obs;\n y[ii_mis] = y_mis;\n}\nmodel {\n sigma ~ gamma(1, 1);\n y[1] ~ normal(0, 100);\n y[2:N] ~ normal(y[1:(N - 1)], sigma);\n}\nThe index arrays ii_obs and ii_mis contain the indexes into the final array y of the observed data (coded as a data vector y_obs) and the missing data (coded as a parameter vector y_mis). See the time series chapter for further discussion of time-series model and specifically the autoregression section for an explanation of the vectorization for y as well as an explanation of how to convert this example to a full AR(1) model. To ensure y[1] has a proper posterior in case it is missing, we have given it an explicit, albeit broad, prior.\nAnother potential application would be filling the columns of a data matrix of predictors for which some predictors are missing; matrix columns can be accessed as vectors and assigned the same way, as in\nx[N_obs_2, 2] = x_obs_2;\nx[N_mis_2, 2] = x_mis_2;\nwhere the relevant variables are all hard coded with index 2 because Stan doesn’t support ragged arrays. These could all be packed into a single array with more fiddly indexing that slices out vectors from longer vectors (see the ragged data structures section for a general discussion of coding ragged data structures in Stan).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html#loading-matrix-for-factor-analysis", + "href": "stan-users-guide/missing-data.html#loading-matrix-for-factor-analysis", + "title": "Missing Data and Partially Known Parameters", + "section": "", + "text": "Rick Farouni, on the Stan users group, inquired as to how to build a Cholesky factor for a covariance matrix with a unit diagonal, as used in Bayesian factor analysis (Aguilar and West 2000). This can be accomplished by declaring the below-diagonal elements as parameters, then filling the full matrix as a transformed parameter.\ndata {\n int<lower=2> K;\n}\ntransformed data {\n int<lower=1> K_choose_2;\n K_choose_2 = (K * (K - 1)) / 2;\n}\nparameters {\n vector[K_choose_2] L_lower;\n}\ntransformed parameters {\n cholesky_factor_cov[K] L;\n for (k in 1:K) {\n L[k, k] = 1;\n }\n {\n int i;\n for (m in 2:K) {\n for (n in 1:(m - 1)) {\n L[m, n] = L_lower[i];\n L[n, m] = 0;\n i += 1;\n }\n }\n }\n}\nIt is most convenient to place a prior directly on L_lower. An alternative would be a prior for the full Cholesky factor L, because the transform from L_lower to L is just the identity and thus does not require a Jacobian adjustment (despite the warning from the parser, which is not smart enough to do the code analysis to infer that the transform is linear). It would not be at all convenient to place a prior on the full covariance matrix L * L', because that would require a Jacobian adjustment; the exact adjustment is detailed in the reference manual.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html#missing-multivariate-data", + "href": "stan-users-guide/missing-data.html#missing-multivariate-data", + "title": "Missing Data and Partially Known Parameters", + "section": "", + "text": "It’s often the case that one or more components of a multivariate outcome are missing.2\nAs an example, we’ll consider the bivariate distribution, which is easily marginalized. The coding here is brute force, representing both an array of vector observations y and a boolean array y_observed to indicate which values were observed (others can have dummy values in the input).\narray[N] vector[2] y;\narray[N, 2] int<lower=0, upper=1> y_observed;\nIf both components are observed, we model them using the full multi-normal, otherwise we model the marginal distribution of the component that is observed.\nfor (n in 1:N) {\n if (y_observed[n, 1] && y_observed[n, 2]) {\n y[n] ~ multi_normal(mu, Sigma);\n } else if (y_observed[n, 1]) {\n y[n, 1] ~ normal(mu[1], sqrt(Sigma[1, 1]));\n } else if (y_observed[n, 2]) {\n y[n, 2] ~ normal(mu[2], sqrt(Sigma[2, 2]));\n }\n}\nIt’s a bit more work, but much more efficient to vectorize these distribution statements. In transformed data, build up three vectors of indices, for the three cases above:\ntransformed data {\n array[observed_12(y_observed)] int ns12;\n array[observed_1(y_observed)] int ns1;\n array[observed_2(y_observed)] int ns2;\n}\nYou will need to write functions that pull out the count of observations in each of the three situations. This must be done with functions because the result needs to go in top-level block variable size declaration. Then the rest of transformed data just fills in the values using three counters.\nint n12 = 1;\nint n1 = 1;\nint n2 = 1;\nfor (n in 1:N) {\n if (y_observed[n, 1] && y_observed[n, 2]) {\n ns12[n12] = n;\n n12 += 1;\n } else if (y_observed[n, 1]) {\n ns1[n1] = n;\n n1 += 1;\n } else if (y_observed[n, 2]) {\n ns2[n2] = n;\n n2 += 1;\n }\n}\nThen, in the model block, everything is vectorizable using those indexes constructed once in transformed data:\ny[ns12] ~ multi_normal(mu, Sigma);\ny[ns1] ~ normal(mu[1], sqrt(Sigma[1, 1]));\ny[ns2] ~ normal(mu[2], sqrt(Sigma[2, 2]));\nThe result will be much more efficient than using latent variables for the missing data, but it requires the multivariate distribution to be marginalized analytically. It’d be more efficient still to precompute the three arrays in the transformed data block, though the efficiency improvement will be relatively minor compared to vectorizing the probability functions.\nThis approach can easily be generalized with some index fiddling to the general multivariate case. The trick is to pull out entries in the covariance matrix for the missing components. It can also be used in situations such as multivariate differential equation solutions where only one component is observed, as in a phase-space experiment recording only time and position of a pendulum (and not recording momentum).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/missing-data.html#footnotes", + "href": "stan-users-guide/missing-data.html#footnotes", + "title": "Missing Data and Partially Known Parameters", + "section": "Footnotes", + "text": "Footnotes\n\n\nA more meaningful estimation example would involve a regression of the observed and missing observations using predictors that were known for each and specified in the data block.↩︎\nThis is not the same as missing components of a multivariate predictor in a regression problem; in that case, you will need to represent the missing data as a parameter and impute missing values in order to feed them into the regression.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Missing Data and Partially Known Parameters" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html", + "href": "stan-users-guide/matrices-arrays.html", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "This chapter provides pointers as to how to choose among the various container types (matrix, vector, array, and tuple) provided by Stan.\n\n\nStan provides three basic scalar types, int, real, and complex, as well as three basic linear algebra types, vector, row_vector, and matrix. Stan allows arrays of any dimensionality, containing any type of element (though that type must be declared and must be the same for all elements).\nThis leaves us in the awkward situation of having three one-dimensional containers, as exemplified by the following declarations.\narray[N] real a;\nvector[N] a;\nrow_vector[N] a;\nThese distinctions matter. Matrix types, like vector and row vector, are required for linear algebra operations. There is no automatic promotion of arrays to vectors because the target, row vector or column vector, is ambiguous. Similarly, row vectors are separated from column vectors because multiplying a row vector by a column vector produces a scalar, whereas multiplying in the opposite order produces a matrix.\nThe following code fragment shows all four ways to declare a two-dimensional container of size \\(M \\times N\\).\narray[M, N] real b; // b[m] : array[] real (efficient)\narray[M] vector[N] b; // b[m] : vector (efficient)\narray[M] row_vector[N] b; // b[m] : row_vector (efficient)\nmatrix[M, N] b; // b[m] : row_vector (inefficient)\nThe main differences among these choices involve efficiency for various purposes and the type of b[m], which is shown in comments to the right of the declarations. Thus the only way to efficiently iterate over row vectors is to use the third declaration, but if you need linear algebra on matrices, but the only way to use matrix operations is to use the fourth declaration.\nThe inefficiencies due to any manual reshaping of containers is usually slight compared to what else is going on in a Stan program (typically a lot of gradient calculations).\n\n\n\nArrays may contain entries of any type, but the types must be the same for all entries. Matrices and vectors contain either real numbers or complex numbers, but all the contained types are the same (e.g., if a vector has a single complex typed entry, all the entries are complex).\nWith arrays or vectors, we can represent pairs of real numbers or pairs of complex numbers. For example, a complex_vector[3] holds exactly three complex numbers. With arrays and vectors, there is no way to represent a pair consisting of an integer and a real number.\nTuples provide a way to represent a sequence of values of heterogeneous types. For example, tuple(int, real) is the type of a pair consisting of an integer and a real number and tuple(array[5] int, vector[6]) is the type of pairs where the first element is a five-element array of integers, and the second is a six-element vector.\n\n\nTuples are declared using the keyword tuple followed by a sequence of type declarations in parentheses. Tuples are constructed using only parentheses. The following example illustrations both declaration and construction.\ntuple(int, vector[3]) ny = (5, [3, 2.9, 1.8]');\nThe elements of a tuple are accessed by position, starting from 1. For example, we can extract the elements of the tuple above using\nint n = ny.1;\nvector[3] y = ny.2;\nWe can also assign into the elements of a tuple.\ntuple(int, vector[3], complex) abc;\nabc.1 = 5;\nabc.2[1] = 3;\nabc.2[2] = 2.9;\nabc.2[3] = 1.4798;\nabc.3 = 2 + 1.9j;\nAs the cascaded indexing example shows, the result of abc.1 is an lvalue (i.e., something to which values may be assigned), and we can further index into it to create new lvalues (e.g., abc.2[1] pulls out the first element of the vector value of the second element of the tuple.)\nThere are two efficiency considerations for tuples. First, like the other container types, tuples are passed to functions by constant reference, which means only a pointer gets passed rather than copying the data. Second, like the array types, creating a tuple requires copying the data for all of its elements. For example, in the following code, the matrix is copied, entailing 1000 copies of scalar values.\nint a = 5;\nmatrix[10, 100] b = ...;\ntuple(int, matrix[10, 100]) ab = (a, b); // COPIES b\nb[1,1] = 10.3; // does NOT change ab\n\n\n\nTuples are primarily useful for two things. First, they provide a way to encapsulate a group of heterogeneous items so that they may be passed as a group. This lets us define arrays of structures as well as structures of arrays. For example, array[N] tuple(int, real, vector[5]) is an array of tuples, each of which has an integer, real, and vector component. Alternatively, we can represent the same information using a tuple of parallel arrays as tuple(array[N] int, array[N] real, array[N] vector[5]).\nThe second use is for function return values. Here, if a function computes two different things with different types, and the computation shares work, it’s best to write one function that returns both things. For example, an eigendecomposition returns a pair consisting of a vector of eigenvalues and a matrix of eigenvectors, whereas a singular value decomposition returns three matrices of different shapes. Before introducing tuples in version 2.33, the QR decomposition of matrix \\(A = Q \\cdot R\\), where \\(Q\\) is orthonormal and \\(R\\) is upper triangular. In the past, this required two function calls.\nmatrix[M, N] A = ...;\nmatrix[M, M] Q = qr_Q(A);\nmatrix[M, N] R = qr_R(A);\nWith tuples, this can be simplified to the following,\ntuple(matrix[M, M], matrix[M, N]) QR = qr(A);\nwith QR.1 being Q and QR.2 giving R.\n\n\n\n\nStan’s matrices, vectors, and array variables are sized when they are declared and may not be dynamically resized. Function arguments do not have sizes, but these sizes are fixed when the function is called and the container is instantiated. Also, declarations may be inside loops and thus may change over the course of running a program, but each time a declaration is visited, it declares a fixed size object.\nWhen an index is provided that is out of bounds, Stan throws a rejection error and computation on the current log density and gradient evaluation is halted and the algorithm is left to clean up the error. All of Stan’s containers check the sizes of all indexes.\n\n\n\nThe underlying matrix and linear algebra operations are implemented in terms of data types from the Eigen C++ library. By having vectors and matrices as basic types, no conversion is necessary when invoking matrix operations or calling linear algebra functions.\nArrays, on the other hand, are implemented as instances of the C++\nstd::vector class (not to be confused with Eigen’s Eigen::Vector class or Stan vectors). By implementing arrays this way, indexing is efficient because values can be returned by reference rather than copied by value.\n\n\nIn Stan models, there are a few minor efficiency considerations in deciding between a two-dimensional array and a matrix, which may seem interchangeable at first glance.\nFirst, matrices use a bit less memory than two-dimensional arrays. This is because they don’t store a sequence of arrays, but just the data and the two dimensions.\nSecond, matrices store their data in column-major order. Furthermore, all of the data in a matrix is guaranteed to be contiguous in memory. This is an important consideration for optimized code because bringing in data from memory to cache is much more expensive than performing arithmetic operations with contemporary CPUs. Arrays, on the other hand, only guarantee that the values of primitive types are contiguous in memory; otherwise, they hold copies of their values (which are returned by reference wherever possible).\nThird, both data structures are best traversed in the order in which they are stored. This also helps with memory locality. This is column-major for matrices, so the following order is appropriate.\nmatrix[M, N] a;\n//...\nfor (n in 1:N) {\n for (m in 1:M) {\n // ... do something with a[m, n] ...\n }\n}\nArrays, on the other hand, should be traversed in row-major order (i.e., last index fastest), as in the following example.\narray[M, N] real a;\n// ...\nfor (m in 1:M) {\n for (n in 1:N) {\n // ... do something with a[m, n] ...\n }\n}\nThe first use of a[m ,n] should bring a[m] into memory. Overall, traversing matrices is more efficient than traversing arrays.\nThis is true even for arrays of matrices. For example, the ideal order in which to traverse a two-dimensional array of matrices is\narray[I, J] matrix[M, N] b;\n// ...\nfor (i in 1:I) {\n for (j in 1:J) {\n for (n in 1:N) {\n for (m in 1:M) {\n // ... do something with b[i, j, m, n] ...\n }\n }\n }\n}\nIf a is a matrix, the notation a[m] picks out row m of that matrix. This is a rather inefficient operation for matrices. If indexing of vectors is needed, it is much better to declare an array of vectors. That is, this\narray[M] row_vector[N] b;\n// ...\nfor (m in 1:M) {\n // ... do something with row vector b[m] ...\n}\nis much more efficient than the pure matrix version\nmatrix[M, N] b;\n// ...\nfor (m in 1:M) {\n // ... do something with row vector b[m] ...\n}\nSimilarly, indexing an array of column vectors is more efficient than using the col function to pick out a column of a matrix.\nIn contrast, whatever can be done as pure matrix algebra will be the fastest. So if I want to create a row of predictor-coefficient dot-products, it’s more efficient to do this\nmatrix[N, k] x; // predictors (aka covariates)\n// ...\nvector[K] beta; // coeffs\n// ...\nvector[N] y_hat; // linear prediction\n// ...\ny_hat = x * beta;\nthan it is to do this\narray[N] row_vector[K] x; // predictors (aka covariates)\n// ...\nvector[K] beta; // coeffs\n// ...\nvector[N] y_hat; // linear prediction\n// ...\nfor (n in 1:N) {\n y_hat[n] = x[n] * beta;\n}\n\n\n\nFor use purely as a container, there is really nothing to decide among vectors, row vectors and one-dimensional arrays. The Eigen::Vector template specialization and the std::vector template class are implemented similarly as containers of double values (the type real in Stan). Only arrays in Stan are allowed to store integer values.\n\n\n\n\nThe key to understanding efficiency of matrix and vector representations is memory locality and reference passing versus copying.\n\n\nCPUs on computers bring in memory in blocks through layers of caches. Fetching from memory is much slower than performing arithmetic operations. The only way to make container operations fast is to respect memory locality and access elements that are close together in memory sequentially in the program.\n\n\n\nMatrices are stored internally in column-major order. That is, an \\(M\n\\times N\\) matrix stores its elements in the order \\[\n(1,1), (2, 1), \\dotsc, (M, 1), (1, 2), \\dotsc, (M, 2), \\dotsc, (1, N),\n\\dotsc, (M, N).\n\\]\nThis means that it’s much more efficient to write loops over matrices column by column, as in the following example.\nmatrix[M, N] a;\n// ...\nfor (n in 1:N) {\n for (m in 1:M) {\n // ... do something with a[m, n] ...\n }\n}\nIt also follows that pulling a row out of a matrix is not memory local, as it has to stride over the whole sequence of values. It also requires a copy operation into a new data structure as it is not stored internally as a unit in a matrix. For sequential access to row vectors in a matrix, it is much better to use an array of row vectors, as in the following example.\narray[M] row_vector[N] a;\n// ...\nfor (m in 1:M) {\n // ... do something with row vector a[m] ...\n}\nEven if what is done involves a function call, the row vector a[m] will not have to be copied.\n\n\n\nArrays are stored internally following their data structure. That means a two dimensional array is stored in row-major order. Thus it is efficient to pull out a “row” of a two-dimensional array.\narray[M, N] real a;\n// ...\nfor (m in 1:M) {\n // ... do something with a[m] ...\n}\nA difference with matrices is that the entries a[m] in the two dimensional array are not necessarily adjacent in memory, so there are no guarantees on iterating over all the elements in a two-dimensional array will provide memory locality across the “rows.”\n\n\n\n\nThere is no automatic conversion among matrices, vectors, and arrays in Stan. But there are a wide range of conversion functions to convert a matrix into a vector, or a multi-dimensional array into a one-dimensional array, or convert a vector to an array. See the section on mixed matrix and array operations in the functions reference manual for a complete list of conversion operators and the multi-indexing chapter for some reshaping operations involving multiple indexing and range indexing.\n\n\n\nStan expressions are all evaluated before assignment happens, so there is no danger of so-called aliasing in array, vector, or matrix operations. In the following Stan program, contrast the behavior of the loop assignment to u and the compound slicing assignment to x, where u and x start with the same values.\ntransformed data {\n vector[4] x = [ 1, 2, 3, 4 ]';\n vector[4] u = [ 1, 2, 3, 4 ]';\n\n for (t in 2:4) {\n u[t] = u[t - 1] * 3;\n }\n\n x[2:4] = x[1:3] * 3;\n\n print(\"u = \", u);\n print(\"x = \", x);\n}\nThe output it produces is,\nu = [1, 3, 9, 27]\nx = [1, 3, 6, 9]\nIn the loop version assigning to u, the values are updated before being used to define subsequent values; in the sliced expression assigning to x, the entire right-hand side is evaluated before assigning to the left-hand side.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#basic-motivation", + "href": "stan-users-guide/matrices-arrays.html#basic-motivation", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "Stan provides three basic scalar types, int, real, and complex, as well as three basic linear algebra types, vector, row_vector, and matrix. Stan allows arrays of any dimensionality, containing any type of element (though that type must be declared and must be the same for all elements).\nThis leaves us in the awkward situation of having three one-dimensional containers, as exemplified by the following declarations.\narray[N] real a;\nvector[N] a;\nrow_vector[N] a;\nThese distinctions matter. Matrix types, like vector and row vector, are required for linear algebra operations. There is no automatic promotion of arrays to vectors because the target, row vector or column vector, is ambiguous. Similarly, row vectors are separated from column vectors because multiplying a row vector by a column vector produces a scalar, whereas multiplying in the opposite order produces a matrix.\nThe following code fragment shows all four ways to declare a two-dimensional container of size \\(M \\times N\\).\narray[M, N] real b; // b[m] : array[] real (efficient)\narray[M] vector[N] b; // b[m] : vector (efficient)\narray[M] row_vector[N] b; // b[m] : row_vector (efficient)\nmatrix[M, N] b; // b[m] : row_vector (inefficient)\nThe main differences among these choices involve efficiency for various purposes and the type of b[m], which is shown in comments to the right of the declarations. Thus the only way to efficiently iterate over row vectors is to use the third declaration, but if you need linear algebra on matrices, but the only way to use matrix operations is to use the fourth declaration.\nThe inefficiencies due to any manual reshaping of containers is usually slight compared to what else is going on in a Stan program (typically a lot of gradient calculations).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#tuple-types", + "href": "stan-users-guide/matrices-arrays.html#tuple-types", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "Arrays may contain entries of any type, but the types must be the same for all entries. Matrices and vectors contain either real numbers or complex numbers, but all the contained types are the same (e.g., if a vector has a single complex typed entry, all the entries are complex).\nWith arrays or vectors, we can represent pairs of real numbers or pairs of complex numbers. For example, a complex_vector[3] holds exactly three complex numbers. With arrays and vectors, there is no way to represent a pair consisting of an integer and a real number.\nTuples provide a way to represent a sequence of values of heterogeneous types. For example, tuple(int, real) is the type of a pair consisting of an integer and a real number and tuple(array[5] int, vector[6]) is the type of pairs where the first element is a five-element array of integers, and the second is a six-element vector.\n\n\nTuples are declared using the keyword tuple followed by a sequence of type declarations in parentheses. Tuples are constructed using only parentheses. The following example illustrations both declaration and construction.\ntuple(int, vector[3]) ny = (5, [3, 2.9, 1.8]');\nThe elements of a tuple are accessed by position, starting from 1. For example, we can extract the elements of the tuple above using\nint n = ny.1;\nvector[3] y = ny.2;\nWe can also assign into the elements of a tuple.\ntuple(int, vector[3], complex) abc;\nabc.1 = 5;\nabc.2[1] = 3;\nabc.2[2] = 2.9;\nabc.2[3] = 1.4798;\nabc.3 = 2 + 1.9j;\nAs the cascaded indexing example shows, the result of abc.1 is an lvalue (i.e., something to which values may be assigned), and we can further index into it to create new lvalues (e.g., abc.2[1] pulls out the first element of the vector value of the second element of the tuple.)\nThere are two efficiency considerations for tuples. First, like the other container types, tuples are passed to functions by constant reference, which means only a pointer gets passed rather than copying the data. Second, like the array types, creating a tuple requires copying the data for all of its elements. For example, in the following code, the matrix is copied, entailing 1000 copies of scalar values.\nint a = 5;\nmatrix[10, 100] b = ...;\ntuple(int, matrix[10, 100]) ab = (a, b); // COPIES b\nb[1,1] = 10.3; // does NOT change ab\n\n\n\nTuples are primarily useful for two things. First, they provide a way to encapsulate a group of heterogeneous items so that they may be passed as a group. This lets us define arrays of structures as well as structures of arrays. For example, array[N] tuple(int, real, vector[5]) is an array of tuples, each of which has an integer, real, and vector component. Alternatively, we can represent the same information using a tuple of parallel arrays as tuple(array[N] int, array[N] real, array[N] vector[5]).\nThe second use is for function return values. Here, if a function computes two different things with different types, and the computation shares work, it’s best to write one function that returns both things. For example, an eigendecomposition returns a pair consisting of a vector of eigenvalues and a matrix of eigenvectors, whereas a singular value decomposition returns three matrices of different shapes. Before introducing tuples in version 2.33, the QR decomposition of matrix \\(A = Q \\cdot R\\), where \\(Q\\) is orthonormal and \\(R\\) is upper triangular. In the past, this required two function calls.\nmatrix[M, N] A = ...;\nmatrix[M, M] Q = qr_Q(A);\nmatrix[M, N] R = qr_R(A);\nWith tuples, this can be simplified to the following,\ntuple(matrix[M, M], matrix[M, N]) QR = qr(A);\nwith QR.1 being Q and QR.2 giving R.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#fixed-sizes-and-indexing-out-of-bounds", + "href": "stan-users-guide/matrices-arrays.html#fixed-sizes-and-indexing-out-of-bounds", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "Stan’s matrices, vectors, and array variables are sized when they are declared and may not be dynamically resized. Function arguments do not have sizes, but these sizes are fixed when the function is called and the container is instantiated. Also, declarations may be inside loops and thus may change over the course of running a program, but each time a declaration is visited, it declares a fixed size object.\nWhen an index is provided that is out of bounds, Stan throws a rejection error and computation on the current log density and gradient evaluation is halted and the algorithm is left to clean up the error. All of Stan’s containers check the sizes of all indexes.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#indexing-efficiency.section", + "href": "stan-users-guide/matrices-arrays.html#indexing-efficiency.section", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "The underlying matrix and linear algebra operations are implemented in terms of data types from the Eigen C++ library. By having vectors and matrices as basic types, no conversion is necessary when invoking matrix operations or calling linear algebra functions.\nArrays, on the other hand, are implemented as instances of the C++\nstd::vector class (not to be confused with Eigen’s Eigen::Vector class or Stan vectors). By implementing arrays this way, indexing is efficient because values can be returned by reference rather than copied by value.\n\n\nIn Stan models, there are a few minor efficiency considerations in deciding between a two-dimensional array and a matrix, which may seem interchangeable at first glance.\nFirst, matrices use a bit less memory than two-dimensional arrays. This is because they don’t store a sequence of arrays, but just the data and the two dimensions.\nSecond, matrices store their data in column-major order. Furthermore, all of the data in a matrix is guaranteed to be contiguous in memory. This is an important consideration for optimized code because bringing in data from memory to cache is much more expensive than performing arithmetic operations with contemporary CPUs. Arrays, on the other hand, only guarantee that the values of primitive types are contiguous in memory; otherwise, they hold copies of their values (which are returned by reference wherever possible).\nThird, both data structures are best traversed in the order in which they are stored. This also helps with memory locality. This is column-major for matrices, so the following order is appropriate.\nmatrix[M, N] a;\n//...\nfor (n in 1:N) {\n for (m in 1:M) {\n // ... do something with a[m, n] ...\n }\n}\nArrays, on the other hand, should be traversed in row-major order (i.e., last index fastest), as in the following example.\narray[M, N] real a;\n// ...\nfor (m in 1:M) {\n for (n in 1:N) {\n // ... do something with a[m, n] ...\n }\n}\nThe first use of a[m ,n] should bring a[m] into memory. Overall, traversing matrices is more efficient than traversing arrays.\nThis is true even for arrays of matrices. For example, the ideal order in which to traverse a two-dimensional array of matrices is\narray[I, J] matrix[M, N] b;\n// ...\nfor (i in 1:I) {\n for (j in 1:J) {\n for (n in 1:N) {\n for (m in 1:M) {\n // ... do something with b[i, j, m, n] ...\n }\n }\n }\n}\nIf a is a matrix, the notation a[m] picks out row m of that matrix. This is a rather inefficient operation for matrices. If indexing of vectors is needed, it is much better to declare an array of vectors. That is, this\narray[M] row_vector[N] b;\n// ...\nfor (m in 1:M) {\n // ... do something with row vector b[m] ...\n}\nis much more efficient than the pure matrix version\nmatrix[M, N] b;\n// ...\nfor (m in 1:M) {\n // ... do something with row vector b[m] ...\n}\nSimilarly, indexing an array of column vectors is more efficient than using the col function to pick out a column of a matrix.\nIn contrast, whatever can be done as pure matrix algebra will be the fastest. So if I want to create a row of predictor-coefficient dot-products, it’s more efficient to do this\nmatrix[N, k] x; // predictors (aka covariates)\n// ...\nvector[K] beta; // coeffs\n// ...\nvector[N] y_hat; // linear prediction\n// ...\ny_hat = x * beta;\nthan it is to do this\narray[N] row_vector[K] x; // predictors (aka covariates)\n// ...\nvector[K] beta; // coeffs\n// ...\nvector[N] y_hat; // linear prediction\n// ...\nfor (n in 1:N) {\n y_hat[n] = x[n] * beta;\n}\n\n\n\nFor use purely as a container, there is really nothing to decide among vectors, row vectors and one-dimensional arrays. The Eigen::Vector template specialization and the std::vector template class are implemented similarly as containers of double values (the type real in Stan). Only arrays in Stan are allowed to store integer values.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#memory-locality", + "href": "stan-users-guide/matrices-arrays.html#memory-locality", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "The key to understanding efficiency of matrix and vector representations is memory locality and reference passing versus copying.\n\n\nCPUs on computers bring in memory in blocks through layers of caches. Fetching from memory is much slower than performing arithmetic operations. The only way to make container operations fast is to respect memory locality and access elements that are close together in memory sequentially in the program.\n\n\n\nMatrices are stored internally in column-major order. That is, an \\(M\n\\times N\\) matrix stores its elements in the order \\[\n(1,1), (2, 1), \\dotsc, (M, 1), (1, 2), \\dotsc, (M, 2), \\dotsc, (1, N),\n\\dotsc, (M, N).\n\\]\nThis means that it’s much more efficient to write loops over matrices column by column, as in the following example.\nmatrix[M, N] a;\n// ...\nfor (n in 1:N) {\n for (m in 1:M) {\n // ... do something with a[m, n] ...\n }\n}\nIt also follows that pulling a row out of a matrix is not memory local, as it has to stride over the whole sequence of values. It also requires a copy operation into a new data structure as it is not stored internally as a unit in a matrix. For sequential access to row vectors in a matrix, it is much better to use an array of row vectors, as in the following example.\narray[M] row_vector[N] a;\n// ...\nfor (m in 1:M) {\n // ... do something with row vector a[m] ...\n}\nEven if what is done involves a function call, the row vector a[m] will not have to be copied.\n\n\n\nArrays are stored internally following their data structure. That means a two dimensional array is stored in row-major order. Thus it is efficient to pull out a “row” of a two-dimensional array.\narray[M, N] real a;\n// ...\nfor (m in 1:M) {\n // ... do something with a[m] ...\n}\nA difference with matrices is that the entries a[m] in the two dimensional array are not necessarily adjacent in memory, so there are no guarantees on iterating over all the elements in a two-dimensional array will provide memory locality across the “rows.”", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#converting-among-matrix-vector-and-array-types", + "href": "stan-users-guide/matrices-arrays.html#converting-among-matrix-vector-and-array-types", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "There is no automatic conversion among matrices, vectors, and arrays in Stan. But there are a wide range of conversion functions to convert a matrix into a vector, or a multi-dimensional array into a one-dimensional array, or convert a vector to an array. See the section on mixed matrix and array operations in the functions reference manual for a complete list of conversion operators and the multi-indexing chapter for some reshaping operations involving multiple indexing and range indexing.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/matrices-arrays.html#aliasing-in-stan-containers", + "href": "stan-users-guide/matrices-arrays.html#aliasing-in-stan-containers", + "title": "Matrices, Vectors, Arrays, and Tuples", + "section": "", + "text": "Stan expressions are all evaluated before assignment happens, so there is no danger of so-called aliasing in array, vector, or matrix operations. In the following Stan program, contrast the behavior of the loop assignment to u and the compound slicing assignment to x, where u and x start with the same values.\ntransformed data {\n vector[4] x = [ 1, 2, 3, 4 ]';\n vector[4] u = [ 1, 2, 3, 4 ]';\n\n for (t in 2:4) {\n u[t] = u[t - 1] * 3;\n }\n\n x[2:4] = x[1:3] * 3;\n\n print(\"u = \", u);\n print(\"x = \", x);\n}\nThe output it produces is,\nu = [1, 3, 9, 27]\nx = [1, 3, 6, 9]\nIn the loop version assigning to u, the values are updated before being used to define subsequent values; in the sliced expression assigning to x, the entire right-hand side is evaluated before assigning to the left-hand side.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Matrices, Vectors, Arrays, and Tuples" + ] + }, + { + "objectID": "stan-users-guide/index.html", + "href": "stan-users-guide/index.html", + "title": "Stan User’s Guide", + "section": "", + "text": "This is the official user’s guide for Stan. It provides example models and programming techniques for coding statistical models in Stan.\n\nPart 1 gives Stan code and discussions for several important classes of models.\nPart 2 discusses various general Stan programming techniques that are not tied to any particular model.\nPart 3 introduces algorithms for calibration and model checking that require multiple runs of Stan.\nThe appendices provide an introduction to the stanc3 compiler used in the various interfaces to Stan, a style guide, and advice for users of BUGS and JAGS.\n\nWe recommend working through this guide using the textbooks Bayesian Data Analysis and Statistical Rethinking: A Bayesian Course with Examples in R and Stan as references on the concepts, and using the Stan Reference Manual when necessary to clarify programming issues.\nDownload the pdf version of this manual.\n\nCopyright and trademark\n\nCopyright 2011–2025, Stan Development Team and their assignees.\nThe Stan name and logo are registered trademarks of NumFOCUS.\n\n\n\nLicensing\n\nText content: CC-BY ND 4.0 license\nComputer code: BSD 3-clause license\nLogo: Stan logo usage guidelines\n\n\n\n\n\n Back to top", + "crumbs": [ + "Stan Users Guide", + "Stan User's Guide" + ] + }, + { + "objectID": "stan-users-guide/gaussian-processes.html", + "href": "stan-users-guide/gaussian-processes.html", + "title": "Gaussian Processes", + "section": "", + "text": "Gaussian processes are continuous stochastic processes and thus may be interpreted as providing a probability distribution over functions. A probability distribution over continuous functions may be viewed, roughly, as an uncountably infinite collection of random variables, one for each valid input. The generality of the supported functions makes Gaussian priors popular choices for priors in general multivariate (non-linear) regression problems.\nThe defining feature of a Gaussian process is that the joint distribution of the function’s value at a finite number of input points is a multivariate normal distribution. This makes it tractable to both fit models from finite amounts of observed data and make predictions for finitely many new data points.\nUnlike a simple multivariate normal distribution, which is parameterized by a mean vector and covariance matrix, a Gaussian process is parameterized by a mean function and covariance function. The mean and covariance functions apply to vectors of inputs and return a mean vector and covariance matrix which provide the mean and covariance of the outputs corresponding to those input points in the functions drawn from the process.\nGaussian processes can be encoded in Stan by implementing their mean and covariance functions or by using the specialized covariance functions outlined below, and plugging the result into the Gaussian model. This form of model is straightforward and may be used for simulation, model fitting, or posterior predictive inference. A more efficient Stan implementation for the GP with a normally distributed outcome marginalizes over the latent Gaussian process, and applies a Cholesky-factor reparameterization of the Gaussian to compute the likelihood and the posterior predictive distribution analytically.\nAfter defining Gaussian processes, this chapter covers the basic implementations for simulation, hyperparameter estimation, and posterior predictive inference for univariate regressions, multivariate regressions, and multivariate logistic regressions. Gaussian processes are general, and by necessity this chapter only touches on some basic models. For more information, see Rasmussen and Williams (2006).\nNote that fitting Gaussian processes as described below using exact inference by computing Cholesky of the covariance matrix scales cubicly with the size of data. Due to how Stan autodiff is implemented, Stan is also slower than Gaussian process specialized software. It is likely that Gaussian processes using exact inference by computing Cholesky of the covariance matrix with \\(N>1000\\) are too slow for practical purposes in Stan. There are many approximations to speed-up Gaussian process computation, from which the basis function approaches for 1-3 dimensional \\(x\\) are easiest to implement in Stan (see, e.g., Riutort-Mayol et al. (2023)).\n\n\nThe data for a multivariate Gaussian process regression consists of a series of \\(N\\) inputs \\(x_1,\\dotsc,x_N \\in \\mathbb{R}^D\\) paired with outputs \\(y_1,\\dotsc,y_N \\in \\mathbb{R}\\). The defining feature of Gaussian processes is that the probability of a finite number of outputs \\(y\\) conditioned on their inputs \\(x\\) is Gaussian: \\[\ny \\sim \\textsf{multivariate normal}(m(x), K(x \\mid \\theta)),\n\\] where \\(m(x)\\) is an \\(N\\)-vector and \\(K(x \\mid \\theta)\\) is an \\(N \\times N\\) covariance matrix. The mean function \\(m : \\mathbb{R}^{N \\times D}\n\\rightarrow \\mathbb{R}^{N}\\) can be anything, but the covariance function \\(K : \\mathbb{R}^{N \\times D} \\rightarrow \\mathbb{R}^{N \\times N}\\) must produce a positive-definite matrix for any input \\(x\\).1\nA popular covariance function, which will be used in the implementations later in this chapter, is an exponentiated quadratic function, \\[\n K(x \\mid \\alpha, \\rho, \\sigma)_{i, j}\n= \\alpha^2\n\\exp \\left(\n- \\dfrac{1}{2 \\rho^2} \\sum_{d=1}^D (x_{i,d} - x_{j,d})^2\n\\right)\n+ \\delta_{i, j} \\sigma^2,\n\\] where \\(\\alpha\\), \\(\\rho\\), and \\(\\sigma\\) are hyperparameters defining the covariance function and where \\(\\delta_{i, j}\\) is the Kronecker delta function with value 1 if \\(i = j\\) and value 0 otherwise; this test is between the indexes \\(i\\) and \\(j\\), not between values \\(x_i\\) and \\(x_j\\). This kernel is obtained through a convolution of two independent Gaussian processes, \\(f_1\\) and \\(f_2\\), with kernels \\[\n K_1(x \\mid \\alpha, \\rho)_{i, j}\n= \\alpha^2\n\\exp \\left(\n- \\dfrac{1}{2 \\rho^2} \\sum_{d=1}^D (x_{i,d} - x_{j,d})^2\n\\right)\n\\] and \\[\n K_2(x \\mid \\sigma)_{i, j}\n=\n\\delta_{i, j} \\sigma^2.\n\\]\nThe addition of \\(\\sigma^2\\) on the diagonal is important to ensure the positive definiteness of the resulting matrix in the case of two identical inputs \\(x_i = x_j\\). In statistical terms, \\(\\sigma\\) is the scale of the noise term in the regression.\nThe hyperparameter \\(\\rho\\) is the length-scale, and corresponds to the frequency of the functions represented by the Gaussian process prior with respect to the domain. Values of \\(\\rho\\) closer to zero lead the GP to represent high-frequency functions, whereas larger values of \\(\\rho\\) lead to low-frequency functions. The hyperparameter \\(\\alpha\\) is the marginal standard deviation. It controls the magnitude of the range of the function represented by the GP. If you were to take the standard deviation of many draws from the GP \\(f_1\\) prior at a single input \\(x\\) conditional on one value of \\(\\alpha\\) one would recover \\(\\alpha\\).\nThe only term in the squared exponential covariance function involving the inputs \\(x_i\\) and \\(x_j\\) is their vector difference, \\(x_i - x_j\\). This produces a process with stationary covariance in the sense that if an input vector \\(x\\) is translated by a vector \\(\\epsilon\\) to \\(x +\n\\epsilon\\), the covariance at any pair of outputs is unchanged, because \\(K(x \\mid \\theta) = K(x + \\epsilon \\mid \\theta)\\).\nThe summation involved is just the squared Euclidean distance between \\(x_i\\) and \\(x_j\\) (i.e., the squared \\(L_2\\) norm of their difference, \\(x_i -\nx_j\\)). This results in support for smooth functions in the process. The amount of variation in the function is controlled by the free hyperparameters \\(\\alpha\\), \\(\\rho\\), and \\(\\sigma\\).\nChanging the notion of distance from Euclidean to taxicab distance (i.e., an \\(L_1\\) norm) changes the support to functions which are continuous but not smooth.\n\n\n\nIt is simplest to start with a Stan model that does nothing more than simulate draws of functions \\(f\\) from a Gaussian process. In practical terms, the model will draw values \\(y_n = f(x_n)\\) for finitely many input points \\(x_n\\).\nThe Stan model defines the mean and covariance functions in a transformed data block and then samples outputs \\(y\\) in the model using a multivariate normal distribution. To make the model concrete, the squared exponential covariance function described in the previous section will be used with hyperparameters set to \\(\\alpha^2 = 1\\), \\(\\rho^2 = 1\\), and \\(\\sigma^2 = 0.1\\), and the mean function \\(m\\) is defined to always return the zero vector, \\(m(x) = \\textbf{0}\\). Consider the following implementation of a Gaussian process simulator.\ndata {\n int<lower=1> N;\n array[N] real x;\n}\ntransformed data {\n matrix[N, N] K;\n vector[N] mu = rep_vector(0, N);\n for (i in 1:(N - 1)) {\n K[i, i] = 1 + 0.1;\n for (j in (i + 1):N) {\n K[i, j] = exp(-0.5 * square(x[i] - x[j]));\n K[j, i] = K[i, j];\n }\n }\n K[N, N] = 1 + 0.1;\n}\nparameters {\n vector[N] y;\n}\nmodel {\n y ~ multi_normal(mu, K);\n}\nThe above model can also be written more compactly using the specialized covariance function that implements the exponentiated quadratic kernel.\ndata {\n int<lower=1> N;\n array[N] real x;\n}\ntransformed data {\n matrix[N, N] K = gp_exp_quad_cov(x, 1.0, 1.0);\n vector[N] mu = rep_vector(0, N);\n for (n in 1:N) {\n K[n, n] = K[n, n] + 0.1;\n }\n}\nparameters {\n vector[N] y;\n}\nmodel {\n y ~ multi_normal(mu, K);\n}\nThe input data are just the vector of inputs x and its size N. Such a model can be used with values of x evenly spaced over some interval in order to plot sample draws of functions from a Gaussian process.\n\n\nOnly the input data needs to change in moving from a univariate model to a multivariate model.\nThe only lines that change from the univariate model above are as follows.\ndata {\n int<lower=1> N;\n int<lower=1> D;\n array[N] vector[D] x;\n}\ntransformed data {\n // ...\n}\nThe data are now declared as an array of vectors instead of an array of scalars; the dimensionality D is also declared.\nIn the remainder of the chapter, univariate models will be used for simplicity, but any of the models could be changed to multivariate in the same way as the simple sampling model. The only extra computational overhead from a multivariate model is in the distance calculation.\n\n\n\nA more efficient implementation of the simulation model can be coded in Stan by relocating, rescaling and rotating an isotropic standard normal variate. Suppose \\(\\eta\\) is an an isotropic standard normal variate \\[\n\\eta \\sim \\textsf{normal}(\\textbf{0}, \\textrm{I}),\n\\] where \\(\\textbf{0}\\) is an \\(N\\)-vector of 0 values and \\(\\textrm{I}\\) is the \\(N\n\\times N\\) identity matrix. Let \\(L\\) be the Cholesky decomposition of \\(K(x \\mid \\theta)\\), i.e., the lower-triangular matrix \\(L\\) such that \\(LL^{\\top} =\nK(x \\mid \\theta)\\). Then the transformed variable \\(\\mu + L\\eta\\) has the intended target distribution, \\[\n \\mu + L\\eta \\sim \\textsf{multivariate normal}(\\mu(x), K(x \\mid \\theta)).\n\\]\nThis transform can be applied directly to Gaussian process simulation.\nThis model has the same data declarations for N and x, and the same transformed data definitions of mu and K as the previous model, with the addition of a transformed data variable for the Cholesky decomposition. The parameters change to the raw parameters sampled from an isotropic standard normal, and the actual parameters are defined in generated quantities.\n// ...\ntransformed data {\n matrix[N, N] L;\n // ...\n L = cholesky_decompose(K);\n}\nparameters {\n vector[N] eta;\n}\nmodel {\n eta ~ std_normal();\n}\ngenerated quantities {\n vector[N] y;\n y = mu + L * eta;\n}\nThe Cholesky decomposition is only computed once, after the data are loaded and the covariance matrix K computed. The isotropic normal distribution for eta is specified as a vectorized univariate distribution for efficiency; this specifies that each eta[n] has an independent standard normal distribution. The sampled vector y is then defined as a generated quantity using a direct encoding of the transform described above.\n\n\n\n\n\n\nThe full generative model for a GP with a normal outcome, \\(y \\in \\mathbb{R}^N\\), with inputs \\(x \\in \\mathbb{R}^N\\), for a finite \\(N\\): \\[\\begin{align*}\n\\rho &\\sim \\textsf{InvGamma}(5, 5) \\\\\n\\alpha &\\sim \\textsf{normal}(0, 1) \\\\\n\\sigma &\\sim \\textsf{normal}(0, 1) \\\\\nf &\\sim \\textsf{multivariate normal}\\left(0, K(x \\mid \\alpha, \\rho)\\right) \\\\\ny_i &\\sim \\textsf{normal}(f_i, \\sigma) \\, \\forall i \\in \\{1, \\dots, N\\}\n\\end{align*}\\] With a normal outcome, it is possible to integrate out the Gaussian process \\(f\\), yielding the more parsimonious model: \\[\\begin{align*}\n\\rho &\\sim \\textsf{InvGamma}(5, 5) \\\\\n\\alpha &\\sim \\textsf{normal}(0, 1) \\\\\n\\sigma &\\sim \\textsf{normal}(0, 1) \\\\\ny &\\sim \\textsf{multivariate normal}\n \\left(0, K(x \\mid \\alpha, \\rho) + \\textbf{I}_N \\sigma^2\\right) \\\\\n\\end{align*}\\]\nIt can be more computationally efficient when dealing with a normal outcome to integrate out the Gaussian process, because this yields a lower-dimensional parameter space over which to do inference. We’ll fit both models in Stan. The former model will be referred to as the latent variable GP, while the latter will be called the marginal likelihood GP.\nThe hyperparameters controlling the covariance function of a Gaussian process can be fit by assigning them priors, like we have in the generative models above, and then computing the posterior distribution of the hyperparameters given observed data. The priors on the parameters should be defined based on prior knowledge of the scale of the output values (\\(\\alpha\\)), the scale of the output noise (\\(\\sigma\\)), and the scale at which distances are measured among inputs (\\(\\rho\\)). See the Gaussian process priors section for more information about how to specify appropriate priors for the hyperparameters.\nThe Stan program implementing the marginal likelihood GP is shown below. The program is similar to the Stan programs that implement the simulation GPs above, but because we are doing inference on the hyperparameters, we need to calculate the covariance matrix K in the model block, rather than the transformed data block.\ndata {\n int<lower=1> N;\n array[N] real x;\n vector[N] y;\n}\ntransformed data {\n vector[N] mu = rep_vector(0, N);\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n}\nmodel {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n real sq_sigma = square(sigma);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + sq_sigma;\n }\n\n L_K = cholesky_decompose(K);\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n\n y ~ multi_normal_cholesky(mu, L_K);\n}\nThe data block declares a vector y of observed values y[n] for inputs x[n]. The transformed data block now only defines the mean vector to be zero. The three hyperparameters are defined as parameters constrained to be non-negative. The computation of the covariance matrix K is now in the model block because it involves unknown parameters and thus can’t simply be precomputed as transformed data. The rest of the model consists of the priors for the hyperparameters and the multivariate Cholesky-parameterized normal distribution, only now the value y is known and the covariance matrix K is an unknown dependent on the hyperparameters, allowing us to learn the hyperparameters.\nWe have used the Cholesky parameterized multivariate normal rather than the standard parameterization because it allows us to the cholesky_decompose function which has been optimized for both small and large matrices. When working with small matrices the differences in computational speed between the two approaches will not be noticeable, but for larger matrices (\\(N \\gtrsim 100\\)) the Cholesky decomposition version will be faster.\nHamiltonian Monte Carlo sampling is fast and effective for hyperparameter inference in this model (Neal 1997). If the posterior is well-concentrated for the hyperparameters the Stan implementation will fit hyperparameters in models with a few hundred data points in seconds.\n\n\nWe can also explicitly code the latent variable formulation of a GP in Stan. This will be useful for when the outcome is not normal. We’ll need to add a small positive term, \\(\\delta\\) to the diagonal of the covariance matrix in order to ensure that our covariance matrix remains positive definite.\ndata {\n int<lower=1> N;\n array[N] real x;\n vector[N] y;\n}\ntransformed data {\n real delta = 1e-9;\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n vector[N] eta;\n}\nmodel {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n eta ~ std_normal();\n\n y ~ normal(f, sigma);\n}\nTwo differences between the latent variable GP and the marginal likelihood GP are worth noting. The first is that we have augmented our parameter block with a new parameter vector of length \\(N\\) called eta. This is used in the model block to generate a multivariate normal vector called \\(f\\), corresponding to the latent GP. We put a \\(\\textsf{normal}(0,1)\\) prior on eta like we did in the Cholesky-parameterized GP in the simulation section. The second difference is that although we could code the distribution statement for \\(y\\) with one \\(N\\)-dimensional multivariate normal with an identity covariance matrix multiplied by \\(\\sigma^2\\), we instead use vectorized univariate normal distribution, which is equivalent but more efficient to use.\n\n\n\n\nGaussian processes can be generalized the same way as standard linear models by introducing a link function. This allows them to be used as discrete data models.\n\n\nIf we want to model count data, we can remove the \\(\\sigma\\) parameter, and use poisson_log, which implements a Poisson distribution with log link function, rather than normal. We can also add an overall mean parameter, \\(a\\), which will account for the marginal expected value for \\(y\\). We do this because we cannot center count data like we would for normally distributed data.\ndata {\n // ...\n array[N] int<lower=0> y;\n // ...\n}\n// ...\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real a;\n vector[N] eta;\n}\nmodel {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n a ~ std_normal();\n eta ~ std_normal();\n\n y ~ poisson_log(a + f);\n}\n\n\n\nFor computational reasons, we may want to integrate out the latent variable \\(f\\), as was done in the normal output model. Unfortunately, exact marginalization over \\(f\\) is not possible when the outcome model is not normal. Instead, we may perform approximate marginalization with an embedded Laplace approximation (Rasmussen and Williams 2006; Rue, Martino, and Chopin 2009; Margossian et al. 2020). To do so, we first use the function laplace_marginal to approximate the marginal likelihood \\(p(y \\mid \\rho, \\alpha, a)\\) and sample the hyperparameters with Hamiltonian Monte Carlo sampling. Then, we recover the integrated out \\(f\\) in the generated quantities block using laplace_latent_rng.\nThe embedded Laplace approximation computes a Gaussian approximation of the conditional posterior, \\[\n \\hat p_\\mathcal{L}(f \\mid \\rho, \\alpha, a, y) \\approx p(f \\mid \\rho, \\alpha, a, y),\n\\] where \\(\\hat p_\\mathcal{L}\\) is a Gaussian that matches the mode and curvature of \\(p(f \\mid \\rho, \\alpha, a, y)\\). We then obtain an approximation of the marginal likelihood as follows: \\[\n \\hat p_\\mathcal{L}(y \\mid \\rho, \\alpha, a)\n = \\frac{p(f^* \\mid \\alpha, \\rho) p(y \\mid f^*, a)}{\n \\hat p_\\mathcal{L}(f^* \\mid \\rho, \\alpha, a, y)},\n\\] where \\(f^*\\) is the mode of \\(p(f \\mid \\rho, \\alpha, a, y)\\), obtained via numerical optimization.\nTo use Stan’s embedded Laplace approximation, we must define the prior covariance function and the log likelihood function in the functions block.\nfunctions {\n // log likelihood function\n real ll_function(vector f, real a, array[] int y) {\n return poisson_log_lpmf(y | a + f);\n }\n\n // covariance function\n matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n return add_diag(K, delta)\n }\n\n}\nThe embedded Laplace relies on calculations of the log likelihood’s Hessian, \\(\\partial^2 \\log p(y \\mid f, a, \\rho, \\alpha) / \\partial f^2\\), and these calculations can be much faster when the Hessian is sparse. In particular, it is expected that the Hessian is block diagonal. In the transformed data block we can specify the block size of the Hessian.\ntransformed data {\n int hessian_block_size = 1;\n}\nFor example, if \\(y_i\\) depends only on \\(f_i\\), then the Hessian of the log likelihood is diagonal and the block size is 1. On the other hand, if the Hessian is not sparse, then we set the hessian block size to \\(N\\), where \\(N\\) is the dimension of \\(f\\). Currently, Stan does not check the block size of the Hessian and so the user is responsible for correctly specifying the block size.\nFinally, we increment target in the model block with the approximation to \\(\\log p(y \\mid \\rho, \\alpha, a)\\).\nmodel {\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n\n target += laplace_marginal(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\nNotice that we do not need to construct \\(f\\) explicitly, since it is marginalized out. Instead, we can recover the latent variables in generated quantities:\ngenerated quantities {\n vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\nUsers can set the control parameters of the embedded Laplace approximation, via laplace_marginal_tol and laplace_latent_tol_rng. When using these functions, the user must set all the control options and store them in a tuple. These control parameters mostly concern the numerical optimizer used to find the mode \\(f^*\\) of \\(p(f \\mid \\rho, \\alpha, a)\\).\ntransformed data {\n tuple(vector[N], real, int, int, int, int) laplace_ops;\n laplace_ops.1 = rep_vector(0, N); // starting point for Laplace optimizer\n laplace_ops.2 = 1.49e-8; // tolerance for optimizer\n laplace_ops.3 = 500; // maximum number of steps for optimizer.\n laplace_ops.4 = 1; // solver type being used.\n laplace_ops.5 = 1000; // max number of steps for linesearch.\n laplace_ops.6 = 1; // allow_fallback (1: TRUE, 0: FALSE)\nIf users want to depart from the defaults for only some of the control parameters, a tuple with the default values (as above) can be created with the helper callable generate_laplace_options(), and the specific control parameter can then be modified,\ntransformed data {\n tuple(vector[N], real, int, int, int, int, int) laplace_ops =\n generate_laplace_options(N);\n\n laplace_ops.2 = 1e-6; // make tolerance of the optimizer less strict.\n}\nThe tuple laplace_ops is then passed to laplace_marginal_tol and laplace_rng_tol.\nmodel {\n// ...\n\n target += laplace_marginal_tol(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta),\n laplace_ops);\n}\n\ngenerated quantities {\n vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta),\n laplace_ops);\n}\nStan also provides support for a limited menu of built-in functions, including the Poisson distribution with a log link and and prior mean \\(m\\). When using such a built-in function, the user does not need to specify a likelihood in the functions block. However, the user must strictly follow the signature of the likelihood: in this case, \\(m\\) must be a vector of length \\(N\\) (to allow for different offsets for each observation \\(y_i\\)) and we must indicate which element of \\(f\\) each component of \\(y\\) matches using the variable \\(y_\\text{index}\\). In our example, there is a simple pairing \\((y_i, f_i)\\), however we could imagine a scenario where multiple observations \\((y_{j1}, y_{j2}, ...)\\) are observed for a single \\(f_j\\).\ntransformed data {\n // ...\n array[n_obs] int y_index;\n for (i in 1:n_obs) y_index[i] = i - 1;\n}\n\n// ...\n\ntransformed parameter {\n vector[N] m = rep_vector(a, N);\n}\n\nmodel {\n // ...\n target += laplace_marginal_poisson_log_lpmf(y | y_index, m,\n cov_function, (rho, alpha, x, N, delta));\n}\n\ngenerated quantities {\n vector[N] f = laplace_latent_poisson_log_rng(y, y_index, m,\n cov_function, (rho, alpha, x, N, delta));\n}\nAs before, we could specify the control parameters for the embedded Laplace approximation using laplace_marginal_tol_poisson_log_lpmf and laplace_latent_tol_poisson_log_nrg.\nMarginalization with a Laplace approximation can lead to faster inference, however it also introduces an approximation error. In practice, this error is negligible when using a Poisson likelihood and the approximation works well for log concave likelihoods (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). Still, users should exercise caution, especially when trying unconventional likelihoods.\n\n\n\nFor binary classification problems, the observed outputs \\(z_n \\in\n\\{ 0,1 \\}\\) are binary. These outputs are modeled using a Gaussian process with (unobserved) outputs \\(y_n\\) through the logistic link, \\[\nz_n \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(y_n)),\n\\] or in other words, \\[\n\\Pr[z_n = 1] = \\operatorname{logit}^{-1}(y_n).\n\\]\nWe can extend our latent variable GP Stan program to deal with classification problems. Below a is the bias term, which can help account for imbalanced classes in the training data:\ndata {\n // ...\n array[N] int<lower=0, upper=1> z;\n // ...\n}\n// ...\nmodel {\n // ...\n z ~ bernoulli_logit(a + f);\n}\n\n\n\nAs with the Poisson GP, we cannot marginalize the latent variables exactly, however we can resort to an embedded Laplace approximation.\nfunctions {\n // log likelihood function\n real ll_function(vector f, real a, array[] int z) {\n return bernoulli_logit_lpmf(z | a + f);\n }\n\n // covariance function\n matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n return add_diag(K, delta)\n }\n}\n\n// ...\n\ntransformed data {\n int hessian_block_size = 1;\n}\n\n// ...\n\nmodel {\n target += laplace_marginal(ll_function, (a, z), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\n\ngenerated quantities {\n vector[N] f = laplace_latent_rng(ll_function, (a, z), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\nWhile marginalization with a Laplace approximation can lead to faster inference, it also introduces an approximation error. In practice, this error may not be negligible with a Bernoulli likelihood; for more discussion see, e.g. (Vehtari et al. 2016; Margossian et al. 2020).\n\n\n\n\nIf we have multivariate inputs \\(x \\in \\mathbb{R}^D\\), the squared exponential covariance function can be further generalized by fitting a scale parameter \\(\\rho_d\\) for each dimension \\(d\\), \\[\n k(x \\mid \\alpha, \\vec{\\rho}, \\sigma)_{i, j} = \\alpha^2 \\exp\n\\left(-\\dfrac{1}{2}\n\\sum_{d=1}^D \\dfrac{1}{\\rho_d^2} (x_{i,d} - x_{j,d})^2\n\\right)\n+ \\delta_{i, j}\\sigma^2.\n\\] The estimation of \\(\\rho\\) was termed “automatic relevance determination” by Neal (1996), but this is misleading, because the magnitude of the scale of the posterior for each \\(\\rho_d\\) is dependent on the scaling of the input data along dimension \\(d\\). Moreover, the scale of the parameters \\(\\rho_d\\) measures non-linearity along the \\(d\\)-th dimension, rather than “relevance” (Piironen and Vehtari 2016).\nA priori, the closer \\(\\rho_d\\) is to zero, the more nonlinear the conditional mean in dimension \\(d\\) is. A posteriori, the actual dependencies between \\(x\\) and \\(y\\) play a role. With one covariate \\(x_1\\) having a linear effect and another covariate \\(x_2\\) having a nonlinear effect, it is possible that \\(\\rho_1 > \\rho_2\\) even if the predictive relevance of \\(x_1\\) is higher (Rasmussen and Williams 2006, 80). The collection of \\(\\rho_d\\) (or \\(1/\\rho_d\\)) parameters can also be modeled hierarchically.\nThe implementation of automatic relevance determination is a straightforward extension of the one-dimensional case by modifying rho to be an array.\ndata {\n int<lower=1> N;\n int<lower=1> D;\n array[N] vector[D] x;\n vector[N] y;\n}\ntransformed data {\n real delta = 1e-9;\n}\nparameters {\n array[D] real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n vector[N] eta;\n}\nmodel {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n eta ~ std_normal();\n\n y ~ normal(f, sigma);\n}\n\n\n\nFormulating priors for GP hyperparameters requires the analyst to consider the inherent statistical properties of a GP, the GP’s purpose in the model, and the numerical issues that may arise in Stan when estimating a GP.\nPerhaps most importantly, the parameters \\(\\rho\\) and \\(\\alpha\\) are weakly identified (Zhang 2004). The ratio of the two parameters is well-identified, but in practice we put independent priors on the two hyperparameters because these two quantities are more interpretable than their ratio.\n\n\nGPs are a flexible class of priors and, as such, can represent a wide spectrum of functions. For length scales below the minimum spacing of the covariates the GP likelihood plateaus. Unless regularized by a prior, this flat likelihood induces considerable posterior mass at small length scales where the observation variance drops to zero and the functions supported by the GP begin to exactly interpolate between the input data. The resulting posterior not only significantly overfits to the input data, it also becomes hard to accurately sample using Euclidean HMC.\nWe may wish to put further soft constraints on the length-scale, but these are dependent on how the GP is used in our statistical model.\nIf our model consists of only the GP, i.e.: \\[\\begin{align*}\nf &\\sim \\textsf{multivariate normal}\\left(0, K(x \\mid \\alpha, \\rho)\\right) \\\\\ny_i &\\sim \\textsf{normal}(f_i, \\sigma) \\, \\forall i \\in \\{1, \\dots, N\\} \\\\\n & x \\in \\mathbb{R}^{N \\times D}, \\quad\n f \\in \\mathbb{R}^N\n\\end{align*}\\]\nwe likely don’t need constraints beyond penalizing small length-scales. We’d like to allow the GP prior to represent both high-frequency and low-frequency functions, so our prior should put non-negligible mass on both sets of functions. In this case, an inverse gamma, inv_gamma_lpdf in Stan’s language, will work well as it has a sharp left tail that puts negligible mass on infinitesimal length-scales, but a generous right tail, allowing for large length-scales. Inverse gamma priors will avoid infinitesimal length-scales because the density is zero at zero, so the posterior for length-scale will be pushed away from zero. An inverse gamma distribution is one of many zero-avoiding or boundary-avoiding distributions.2.\nIf we’re using the GP as a component in a larger model that includes an overall mean and fixed effects for the same variables we’re using as the domain for the GP, i.e.: \\[\\begin{align*}\nf &\\sim \\textsf{multivariate normal}\\big(0, K(x \\mid \\alpha, \\rho)\\big) \\\\\ny_i &\\sim \\textsf{normal}\\left(\\beta_0 + x_i \\beta_{[1:D]} + f_i, \\sigma\\right) \\, \\forall i\n \\in \\{1, \\dots, N\\} \\\\\n & x_i^T, \\beta_{[1:D]} \\in \\mathbb{R}^D,\\quad\n x \\in \\mathbb{R}^{N \\times D},\\quad\n f \\in \\mathbb{R}^N\n\\end{align*}\\]\nwe’ll likely want to constrain large length-scales as well. A length scale that is larger than the scale of the data yields a GP posterior that is practically linear (with respect to the particular covariate) and increasing the length scale has little impact on the likelihood. This will introduce nonidentifiability in our model, as both the fixed effects and the GP will explain similar variation. In order to limit the amount of overlap between the GP and the linear regression, we should use a prior with a sharper right tail to limit the GP to higher-frequency functions. We can use a generalized inverse Gaussian distribution: \\[\\begin{align*}\nf(x \\mid a, b, p) &= \\dfrac{\\left(a/b\\right)^{p/2}}{2K_p\\left(\\sqrt{ab}\\right)} x^{p - 1}\\exp\\big(-(ax + b\n / x)/2\\big) \\\\\n & x, a, b \\in \\mathbb{R}^{+},\\quad\n p \\in \\mathbb{Z}\n\\end{align*}\\]\nwhich has an inverse gamma left tail if \\(p \\leq 0\\) and an inverse Gaussian right tail. This has not yet been implemented in Stan’s math library, but it is possible to implement as a user defined function:\nfunctions {\n real generalized_inverse_gaussian_lpdf(real x, int p,\n real a, real b) {\n return p * 0.5 * log(a / b)\n - log(2 * modified_bessel_second_kind(p, sqrt(a * b)))\n + (p - 1) * log(x)\n - (a * x + b / x) * 0.5;\n }\n}\ndata {\n // ...\n}\nIf we have high-frequency covariates in our fixed effects, we may wish to further regularize the GP away from high-frequency functions, which means we’ll need to penalize smaller length-scales. Luckily, we have a useful way of thinking about how length-scale affects the frequency of the functions supported by the GP. If we were to repeatedly draw from a zero-mean GP with a length-scale of \\(\\rho\\) in a fixed-domain \\([0,T]\\), we would get a distribution for the number of times each draw of the GP crossed the zero axis. The expectation of this random variable, the number of zero crossings, is \\(T / \\pi\n\\rho\\). You can see that as \\(\\rho\\) decreases, the expectation of the number of upcrossings increases as the GP is representing higher-frequency functions. Thus, this is a good statistic to keep in mind when setting a lower-bound for our prior on length-scale in the presence of high-frequency covariates. However, this statistic is only valid for one-dimensional inputs.\n\n\n\nThe parameter \\(\\alpha\\) corresponds to how much of the variation is explained by the regression function and has a similar role to the prior variance for linear model weights. This means the prior can be the same as used in linear models, such as a half-\\(t\\) prior on \\(\\alpha\\).\nA half-\\(t\\) or half-Gaussian prior on alpha also has the benefit of putting nontrivial prior mass around zero. This allows the GP support the zero functions and allows the possibility that the GP won’t contribute to the conditional mean of the total output.\n\n\n\n\nSuppose for a given sequence of inputs \\(x\\) that the corresponding outputs \\(y\\) are observed. Given a new sequence of inputs \\(\\tilde{x}\\), the posterior predictive distribution of their labels is computed by sampling outputs \\(\\tilde{y}\\) according to \\[\np\\left(\\tilde{y} \\mid \\tilde{x},x,y\\right)\n\\ = \\\n\\frac{p\\left(\\tilde{y}, y \\mid \\tilde{x},x\\right)}\n {p(y \\mid x)}\n\\ \\propto \\\np\\left(\\tilde{y}, y \\mid \\tilde{x},x\\right).\n\\]\nA direct implementation in Stan defines a model in terms of the joint distribution of the observed \\(y\\) and unobserved \\(\\tilde{y}\\).\ndata {\n int<lower=1> N1;\n array[N1] real x1;\n vector[N1] y1;\n int<lower=1> N2;\n array[N2] real x2;\n}\ntransformed data {\n real delta = 1e-9;\n int<lower=1> N = N1 + N2;\n array[N] real x;\n for (n1 in 1:N1) {\n x[n1] = x1[n1];\n }\n for (n2 in 1:N2) {\n x[N1 + n2] = x2[n2];\n }\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n vector[N] eta;\n}\ntransformed parameters {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n}\nmodel {\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n eta ~ std_normal();\n\n y1 ~ normal(f[1:N1], sigma);\n}\ngenerated quantities {\n vector[N2] y2;\n for (n2 in 1:N2) {\n y2[n2] = normal_rng(f[N1 + n2], sigma);\n }\n}\nThe input vectors x1 and x2 are declared as data, as is the observed output vector y1. The unknown output vector y2, which corresponds to input vector x2, is declared in the generated quantities block and will be sampled when the model is executed.\nA transformed data block is used to combine the input vectors x1 and x2 into a single vector x.\nThe model block declares and defines a local variable for the combined output vector f, which consists of the concatenation of the conditional mean for known outputs y1 and unknown outputs y2. Thus the combined output vector f is aligned with the combined input vector x. All that is left is to define the univariate normal distribution statement for y.\nThe generated quantities block defines the quantity y2. We generate y2 by randomly generating N2 values from univariate normals with each mean corresponding to the appropriate element in f.\n\n\nWe can do predictive inference in non-Gaussian GPs in much the same way as we do with Gaussian GPs.\nConsider the following full model for prediction using logistic Gaussian process regression.\ndata {\n int<lower=1> N1;\n array[N1] real x1;\n array[N1] int<lower=0, upper=1> z1;\n int<lower=1> N2;\n array[N2] real x2;\n}\ntransformed data {\n real delta = 1e-9;\n int<lower=1> N = N1 + N2;\n array[N] real x;\n for (n1 in 1:N1) {\n x[n1] = x1[n1];\n }\n for (n2 in 1:N2) {\n x[N1 + n2] = x2[n2];\n }\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real a;\n vector[N] eta;\n}\ntransformed parameters {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n}\nmodel {\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n a ~ std_normal();\n eta ~ std_normal();\n\n z1 ~ bernoulli_logit(a + f[1:N1]);\n}\ngenerated quantities {\n array[N2] int z2;\n for (n2 in 1:N2) {\n z2[n2] = bernoulli_logit_rng(a + f[N1 + n2]);\n }\n}\n\n\n\nBayesian predictive inference for Gaussian processes with Gaussian observations can be sped up by deriving the posterior analytically, then directly sampling from it.\nJumping straight to the result, \\[\np\\left(\\tilde{y} \\mid \\tilde{x},y,x\\right)\n=\n\\textsf{normal}\\left(K^{\\top}\\Sigma^{-1}y,\\\n \\Omega - K^{\\top}\\Sigma^{-1}K\\right),\n\\] where \\(\\Sigma = K(x \\mid \\alpha, \\rho, \\sigma)\\) is the result of applying the covariance function to the inputs \\(x\\) with observed outputs \\(y\\), \\(\\Omega =\nK(\\tilde{x} \\mid \\alpha, \\rho)\\) is the result of applying the covariance function to the inputs \\(\\tilde{x}\\) for which predictions are to be inferred, and \\(K\\) is the matrix of covariances between inputs \\(x\\) and \\(\\tilde{x}\\), which in the case of the exponentiated quadratic covariance function would be \\[\nK(x \\mid \\alpha, \\rho)_{i, j} = \\alpha^2 \\exp\\left(-\\dfrac{1}{2 \\rho^2}\n\\sum_{d=1}^D \\left(x_{i,d} - \\tilde{x}_{j,d}\\right)^2\\right).\n\\]\nThere is no noise term including \\(\\sigma^2\\) because the indexes of elements in \\(x\\) and \\(\\tilde{x}\\) are never the same.\nThis Stan code below uses the analytic form of the posterior and provides sampling of the resulting multivariate normal through the Cholesky decomposition. The data declaration is the same as for the latent variable example, but we’ve defined a function called gp_pred_rng which will generate a draw from the posterior predictive mean conditioned on observed data y1. The code uses a Cholesky decomposition in triangular solves in order to cut down on the number of matrix-matrix multiplications when computing the conditional mean and the conditional covariance of \\(p(\\tilde{y})\\).\nfunctions {\n vector gp_pred_rng(array[] real x2,\n vector y1,\n array[] real x1,\n real alpha,\n real rho,\n real sigma,\n real delta) {\n int N1 = rows(y1);\n int N2 = size(x2);\n vector[N2] f2;\n {\n matrix[N1, N1] L_K;\n vector[N1] K_div_y1;\n matrix[N1, N2] k_x1_x2;\n matrix[N1, N2] v_pred;\n vector[N2] f2_mu;\n matrix[N2, N2] cov_f2;\n matrix[N2, N2] diag_delta;\n matrix[N1, N1] K;\n K = gp_exp_quad_cov(x1, alpha, rho);\n for (n in 1:N1) {\n K[n, n] = K[n, n] + square(sigma);\n }\n L_K = cholesky_decompose(K);\n K_div_y1 = mdivide_left_tri_low(L_K, y1);\n K_div_y1 = mdivide_right_tri_low(K_div_y1', L_K)';\n k_x1_x2 = gp_exp_quad_cov(x1, x2, alpha, rho);\n f2_mu = (k_x1_x2' * K_div_y1);\n v_pred = mdivide_left_tri_low(L_K, k_x1_x2);\n cov_f2 = gp_exp_quad_cov(x2, alpha, rho) - v_pred' * v_pred;\n diag_delta = diag_matrix(rep_vector(delta, N2));\n\n f2 = multi_normal_rng(f2_mu, cov_f2 + diag_delta);\n }\n return f2;\n }\n}\ndata {\n int<lower=1> N1;\n array[N1] real x1;\n vector[N1] y1;\n int<lower=1> N2;\n array[N2] real x2;\n}\ntransformed data {\n vector[N1] mu = rep_vector(0, N1);\n real delta = 1e-9;\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n}\nmodel {\n matrix[N1, N1] L_K;\n {\n matrix[N1, N1] K = gp_exp_quad_cov(x1, alpha, rho);\n real sq_sigma = square(sigma);\n\n // diagonal elements\n for (n1 in 1:N1) {\n K[n1, n1] = K[n1, n1] + sq_sigma;\n }\n\n L_K = cholesky_decompose(K);\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n\n y1 ~ multi_normal_cholesky(mu, L_K);\n}\ngenerated quantities {\n vector[N2] f2;\n vector[N2] y2;\n\n f2 = gp_pred_rng(x2, y1, x1, alpha, rho, sigma, delta);\n for (n2 in 1:N2) {\n y2[n2] = normal_rng(f2[n2], sigma);\n }\n}\n\n\n\n\nSuppose we have observations \\(y_i \\in \\mathbb{R}^M\\) observed at \\(x_i \\in \\mathbb{R}^K\\). One can model the data like so: \\[\\begin{align*}\ny_i &\\sim \\textsf{multivariate normal}\\left(f(x_i), \\textbf{I}_M \\sigma^2\\right) \\\\\nf(x) &\\sim \\textsf{GP}\\big(m(x), K(x \\mid \\theta, \\phi)\\big) \\\\\n & K(x \\mid \\theta) \\in \\mathbb{R}^{M \\times M}, \\quad\n f(x), m(x) \\in \\mathbb{R}^M\n\\end{align*}\\] where the \\(K(x, x^\\prime \\mid \\theta, \\phi)_{[m, m^\\prime]}\\) entry defines the covariance between \\(f_m(x)\\) and \\(f_{m^\\prime}(x^\\prime)(x)\\). This construction of Gaussian processes allows us to learn the covariance between the output dimensions of \\(f(x)\\). If we parameterize our kernel \\(K\\): \\[\nK(x, x^\\prime \\mid \\theta, \\phi)_{[m, m^\\prime]} = k\\left(x, x^\\prime \\mid\n\\theta\\right) k\\left(m, m^\\prime \\mid \\phi\\right)\n\\] then our finite dimensional generative model for the above is: \\[\\begin{align*}\nf &\\sim \\textsf{matrixnormal}\\big(m(x), K(x \\mid \\alpha, \\rho), C(\\phi)\\big) \\\\\ny_{i, m} &\\sim \\textsf{normal}(f_{i,m}, \\sigma) \\\\\nf &\\in \\mathbb{R}^{N \\times M}\n\\end{align*}\\] where \\(K(x \\mid \\alpha, \\rho)\\) is the exponentiated quadratic kernel we’ve used throughout this chapter, and \\(C(\\phi)\\) is a positive-definite matrix, parameterized by some vector \\(\\phi\\).\nThe matrix normal distribution has two covariance matrices: \\(K(x \\mid\n\\alpha, \\rho)\\) to encode column covariance, and \\(C(\\phi)\\) to define row covariance. The salient features of the matrix normal are that the rows of the matrix \\(f\\) are distributed: \\[\nf_{[n,]} \\sim \\textsf{multivariate normal}\\big(m(x)_{[n,]}, K(x \\mid \\alpha,\n\\rho)_{[n,n]} C(\\phi)\\big)\n\\] and that the columns of the matrix \\(f\\) are distributed: \\[\nf_{[,m]} \\sim \\textsf{multivariate normal}\\big(m(x)_{[,m]}, K(x\n \\mid \\alpha, \\rho) C(\\phi)_{[m,m]}\\big)\n\\] This also means means that \\(\\mathbb{E}\\left[f^T f\\right]\\) is equal to \\(\\operatorname{trace}\\!\\big(K(x \\mid \\alpha, \\rho)\\big) \\times C\\), whereas \\(\\mathbb{E}\\left[ff^T\\right]\\) is \\(\\operatorname{trace}(C) \\times K(x \\mid \\alpha, \\rho)\\). We can derive this using properties of expectation and the matrix normal density.\nWe should set \\(\\alpha\\) to \\(1.0\\) because the parameter is not identified unless we constrain \\(\\operatorname{trace}(C) = 1\\). Otherwise, we can multiply \\(\\alpha\\) by a scalar \\(d\\) and \\(C\\) by \\(1/d\\) and our likelihood will not change.\nWe can generate a random variable \\(f\\) from a matrix normal density in \\(\\mathbb{R}^{N \\times M}\\) using the following algorithm: \\[\\begin{align*}\n\\eta_{i,j} &\\sim \\textsf{normal}(0, 1) \\, \\forall i,j \\\\\nf &= L_{K(x \\mid 1.0, \\rho)} \\, \\eta \\, L_C(\\phi)^T \\\\\nf &\\sim \\textsf{matrixnormal}\\big(0, K(x \\mid 1.0, \\rho), C(\\phi)\\big) \\\\\n\\eta &\\in \\mathbb{R}^{N \\times M} \\\\\nL_C(\\phi) &= \\texttt{cholesky}\\mathtt{\\_}\\texttt{decompose}\\big(C(\\phi)\\big) \\\\\nL_{K(x \\mid 1.0, \\rho)} &= \\texttt{cholesky}\\mathtt{\\_}\\texttt{decompose}\\big(K(x \\mid 1.0, \\rho)\\big)\n\\end{align*}\\]\nThis can be implemented in Stan using a latent-variable GP formulation. We’ve used \\(\\textsf{LKJCorr}\\) for \\(C(\\phi)\\), but any positive-definite matrix will do.\ndata {\n int<lower=1> N;\n int<lower=1> D;\n array[N] real x;\n matrix[N, D] y;\n}\ntransformed data {\n real delta = 1e-9;\n}\nparameters {\n real<lower=0> rho;\n vector<lower=0>[D] alpha;\n real<lower=0> sigma;\n cholesky_factor_corr[D] L_Omega;\n matrix[N, D] eta;\n}\nmodel {\n matrix[N, D] f;\n {\n matrix[N, N] K = gp_exp_quad_cov(x, 1.0, rho);\n matrix[N, N] L_K;\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta\n * diag_pre_multiply(alpha, L_Omega)';\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n L_Omega ~ lkj_corr_cholesky(3);\n to_vector(eta) ~ std_normal();\n\n to_vector(y) ~ normal(to_vector(f), sigma);\n}\ngenerated quantities {\n matrix[D, D] Omega;\n Omega = L_Omega * L_Omega';\n}", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Gaussian Processes" + ] + }, + { + "objectID": "stan-users-guide/gaussian-processes.html#gaussian-process-regression", + "href": "stan-users-guide/gaussian-processes.html#gaussian-process-regression", + "title": "Gaussian Processes", + "section": "", + "text": "The data for a multivariate Gaussian process regression consists of a series of \\(N\\) inputs \\(x_1,\\dotsc,x_N \\in \\mathbb{R}^D\\) paired with outputs \\(y_1,\\dotsc,y_N \\in \\mathbb{R}\\). The defining feature of Gaussian processes is that the probability of a finite number of outputs \\(y\\) conditioned on their inputs \\(x\\) is Gaussian: \\[\ny \\sim \\textsf{multivariate normal}(m(x), K(x \\mid \\theta)),\n\\] where \\(m(x)\\) is an \\(N\\)-vector and \\(K(x \\mid \\theta)\\) is an \\(N \\times N\\) covariance matrix. The mean function \\(m : \\mathbb{R}^{N \\times D}\n\\rightarrow \\mathbb{R}^{N}\\) can be anything, but the covariance function \\(K : \\mathbb{R}^{N \\times D} \\rightarrow \\mathbb{R}^{N \\times N}\\) must produce a positive-definite matrix for any input \\(x\\).1\nA popular covariance function, which will be used in the implementations later in this chapter, is an exponentiated quadratic function, \\[\n K(x \\mid \\alpha, \\rho, \\sigma)_{i, j}\n= \\alpha^2\n\\exp \\left(\n- \\dfrac{1}{2 \\rho^2} \\sum_{d=1}^D (x_{i,d} - x_{j,d})^2\n\\right)\n+ \\delta_{i, j} \\sigma^2,\n\\] where \\(\\alpha\\), \\(\\rho\\), and \\(\\sigma\\) are hyperparameters defining the covariance function and where \\(\\delta_{i, j}\\) is the Kronecker delta function with value 1 if \\(i = j\\) and value 0 otherwise; this test is between the indexes \\(i\\) and \\(j\\), not between values \\(x_i\\) and \\(x_j\\). This kernel is obtained through a convolution of two independent Gaussian processes, \\(f_1\\) and \\(f_2\\), with kernels \\[\n K_1(x \\mid \\alpha, \\rho)_{i, j}\n= \\alpha^2\n\\exp \\left(\n- \\dfrac{1}{2 \\rho^2} \\sum_{d=1}^D (x_{i,d} - x_{j,d})^2\n\\right)\n\\] and \\[\n K_2(x \\mid \\sigma)_{i, j}\n=\n\\delta_{i, j} \\sigma^2.\n\\]\nThe addition of \\(\\sigma^2\\) on the diagonal is important to ensure the positive definiteness of the resulting matrix in the case of two identical inputs \\(x_i = x_j\\). In statistical terms, \\(\\sigma\\) is the scale of the noise term in the regression.\nThe hyperparameter \\(\\rho\\) is the length-scale, and corresponds to the frequency of the functions represented by the Gaussian process prior with respect to the domain. Values of \\(\\rho\\) closer to zero lead the GP to represent high-frequency functions, whereas larger values of \\(\\rho\\) lead to low-frequency functions. The hyperparameter \\(\\alpha\\) is the marginal standard deviation. It controls the magnitude of the range of the function represented by the GP. If you were to take the standard deviation of many draws from the GP \\(f_1\\) prior at a single input \\(x\\) conditional on one value of \\(\\alpha\\) one would recover \\(\\alpha\\).\nThe only term in the squared exponential covariance function involving the inputs \\(x_i\\) and \\(x_j\\) is their vector difference, \\(x_i - x_j\\). This produces a process with stationary covariance in the sense that if an input vector \\(x\\) is translated by a vector \\(\\epsilon\\) to \\(x +\n\\epsilon\\), the covariance at any pair of outputs is unchanged, because \\(K(x \\mid \\theta) = K(x + \\epsilon \\mid \\theta)\\).\nThe summation involved is just the squared Euclidean distance between \\(x_i\\) and \\(x_j\\) (i.e., the squared \\(L_2\\) norm of their difference, \\(x_i -\nx_j\\)). This results in support for smooth functions in the process. The amount of variation in the function is controlled by the free hyperparameters \\(\\alpha\\), \\(\\rho\\), and \\(\\sigma\\).\nChanging the notion of distance from Euclidean to taxicab distance (i.e., an \\(L_1\\) norm) changes the support to functions which are continuous but not smooth.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Gaussian Processes" + ] + }, + { + "objectID": "stan-users-guide/gaussian-processes.html#simulating-from-a-gaussian-process", + "href": "stan-users-guide/gaussian-processes.html#simulating-from-a-gaussian-process", + "title": "Gaussian Processes", + "section": "", + "text": "It is simplest to start with a Stan model that does nothing more than simulate draws of functions \\(f\\) from a Gaussian process. In practical terms, the model will draw values \\(y_n = f(x_n)\\) for finitely many input points \\(x_n\\).\nThe Stan model defines the mean and covariance functions in a transformed data block and then samples outputs \\(y\\) in the model using a multivariate normal distribution. To make the model concrete, the squared exponential covariance function described in the previous section will be used with hyperparameters set to \\(\\alpha^2 = 1\\), \\(\\rho^2 = 1\\), and \\(\\sigma^2 = 0.1\\), and the mean function \\(m\\) is defined to always return the zero vector, \\(m(x) = \\textbf{0}\\). Consider the following implementation of a Gaussian process simulator.\ndata {\n int<lower=1> N;\n array[N] real x;\n}\ntransformed data {\n matrix[N, N] K;\n vector[N] mu = rep_vector(0, N);\n for (i in 1:(N - 1)) {\n K[i, i] = 1 + 0.1;\n for (j in (i + 1):N) {\n K[i, j] = exp(-0.5 * square(x[i] - x[j]));\n K[j, i] = K[i, j];\n }\n }\n K[N, N] = 1 + 0.1;\n}\nparameters {\n vector[N] y;\n}\nmodel {\n y ~ multi_normal(mu, K);\n}\nThe above model can also be written more compactly using the specialized covariance function that implements the exponentiated quadratic kernel.\ndata {\n int<lower=1> N;\n array[N] real x;\n}\ntransformed data {\n matrix[N, N] K = gp_exp_quad_cov(x, 1.0, 1.0);\n vector[N] mu = rep_vector(0, N);\n for (n in 1:N) {\n K[n, n] = K[n, n] + 0.1;\n }\n}\nparameters {\n vector[N] y;\n}\nmodel {\n y ~ multi_normal(mu, K);\n}\nThe input data are just the vector of inputs x and its size N. Such a model can be used with values of x evenly spaced over some interval in order to plot sample draws of functions from a Gaussian process.\n\n\nOnly the input data needs to change in moving from a univariate model to a multivariate model.\nThe only lines that change from the univariate model above are as follows.\ndata {\n int<lower=1> N;\n int<lower=1> D;\n array[N] vector[D] x;\n}\ntransformed data {\n // ...\n}\nThe data are now declared as an array of vectors instead of an array of scalars; the dimensionality D is also declared.\nIn the remainder of the chapter, univariate models will be used for simplicity, but any of the models could be changed to multivariate in the same way as the simple sampling model. The only extra computational overhead from a multivariate model is in the distance calculation.\n\n\n\nA more efficient implementation of the simulation model can be coded in Stan by relocating, rescaling and rotating an isotropic standard normal variate. Suppose \\(\\eta\\) is an an isotropic standard normal variate \\[\n\\eta \\sim \\textsf{normal}(\\textbf{0}, \\textrm{I}),\n\\] where \\(\\textbf{0}\\) is an \\(N\\)-vector of 0 values and \\(\\textrm{I}\\) is the \\(N\n\\times N\\) identity matrix. Let \\(L\\) be the Cholesky decomposition of \\(K(x \\mid \\theta)\\), i.e., the lower-triangular matrix \\(L\\) such that \\(LL^{\\top} =\nK(x \\mid \\theta)\\). Then the transformed variable \\(\\mu + L\\eta\\) has the intended target distribution, \\[\n \\mu + L\\eta \\sim \\textsf{multivariate normal}(\\mu(x), K(x \\mid \\theta)).\n\\]\nThis transform can be applied directly to Gaussian process simulation.\nThis model has the same data declarations for N and x, and the same transformed data definitions of mu and K as the previous model, with the addition of a transformed data variable for the Cholesky decomposition. The parameters change to the raw parameters sampled from an isotropic standard normal, and the actual parameters are defined in generated quantities.\n// ...\ntransformed data {\n matrix[N, N] L;\n // ...\n L = cholesky_decompose(K);\n}\nparameters {\n vector[N] eta;\n}\nmodel {\n eta ~ std_normal();\n}\ngenerated quantities {\n vector[N] y;\n y = mu + L * eta;\n}\nThe Cholesky decomposition is only computed once, after the data are loaded and the covariance matrix K computed. The isotropic normal distribution for eta is specified as a vectorized univariate distribution for efficiency; this specifies that each eta[n] has an independent standard normal distribution. The sampled vector y is then defined as a generated quantity using a direct encoding of the transform described above.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Gaussian Processes" + ] + }, + { + "objectID": "stan-users-guide/gaussian-processes.html#fit-gp.section", + "href": "stan-users-guide/gaussian-processes.html#fit-gp.section", + "title": "Gaussian Processes", + "section": "", + "text": "The full generative model for a GP with a normal outcome, \\(y \\in \\mathbb{R}^N\\), with inputs \\(x \\in \\mathbb{R}^N\\), for a finite \\(N\\): \\[\\begin{align*}\n\\rho &\\sim \\textsf{InvGamma}(5, 5) \\\\\n\\alpha &\\sim \\textsf{normal}(0, 1) \\\\\n\\sigma &\\sim \\textsf{normal}(0, 1) \\\\\nf &\\sim \\textsf{multivariate normal}\\left(0, K(x \\mid \\alpha, \\rho)\\right) \\\\\ny_i &\\sim \\textsf{normal}(f_i, \\sigma) \\, \\forall i \\in \\{1, \\dots, N\\}\n\\end{align*}\\] With a normal outcome, it is possible to integrate out the Gaussian process \\(f\\), yielding the more parsimonious model: \\[\\begin{align*}\n\\rho &\\sim \\textsf{InvGamma}(5, 5) \\\\\n\\alpha &\\sim \\textsf{normal}(0, 1) \\\\\n\\sigma &\\sim \\textsf{normal}(0, 1) \\\\\ny &\\sim \\textsf{multivariate normal}\n \\left(0, K(x \\mid \\alpha, \\rho) + \\textbf{I}_N \\sigma^2\\right) \\\\\n\\end{align*}\\]\nIt can be more computationally efficient when dealing with a normal outcome to integrate out the Gaussian process, because this yields a lower-dimensional parameter space over which to do inference. We’ll fit both models in Stan. The former model will be referred to as the latent variable GP, while the latter will be called the marginal likelihood GP.\nThe hyperparameters controlling the covariance function of a Gaussian process can be fit by assigning them priors, like we have in the generative models above, and then computing the posterior distribution of the hyperparameters given observed data. The priors on the parameters should be defined based on prior knowledge of the scale of the output values (\\(\\alpha\\)), the scale of the output noise (\\(\\sigma\\)), and the scale at which distances are measured among inputs (\\(\\rho\\)). See the Gaussian process priors section for more information about how to specify appropriate priors for the hyperparameters.\nThe Stan program implementing the marginal likelihood GP is shown below. The program is similar to the Stan programs that implement the simulation GPs above, but because we are doing inference on the hyperparameters, we need to calculate the covariance matrix K in the model block, rather than the transformed data block.\ndata {\n int<lower=1> N;\n array[N] real x;\n vector[N] y;\n}\ntransformed data {\n vector[N] mu = rep_vector(0, N);\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n}\nmodel {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n real sq_sigma = square(sigma);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + sq_sigma;\n }\n\n L_K = cholesky_decompose(K);\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n\n y ~ multi_normal_cholesky(mu, L_K);\n}\nThe data block declares a vector y of observed values y[n] for inputs x[n]. The transformed data block now only defines the mean vector to be zero. The three hyperparameters are defined as parameters constrained to be non-negative. The computation of the covariance matrix K is now in the model block because it involves unknown parameters and thus can’t simply be precomputed as transformed data. The rest of the model consists of the priors for the hyperparameters and the multivariate Cholesky-parameterized normal distribution, only now the value y is known and the covariance matrix K is an unknown dependent on the hyperparameters, allowing us to learn the hyperparameters.\nWe have used the Cholesky parameterized multivariate normal rather than the standard parameterization because it allows us to the cholesky_decompose function which has been optimized for both small and large matrices. When working with small matrices the differences in computational speed between the two approaches will not be noticeable, but for larger matrices (\\(N \\gtrsim 100\\)) the Cholesky decomposition version will be faster.\nHamiltonian Monte Carlo sampling is fast and effective for hyperparameter inference in this model (Neal 1997). If the posterior is well-concentrated for the hyperparameters the Stan implementation will fit hyperparameters in models with a few hundred data points in seconds.\n\n\nWe can also explicitly code the latent variable formulation of a GP in Stan. This will be useful for when the outcome is not normal. We’ll need to add a small positive term, \\(\\delta\\) to the diagonal of the covariance matrix in order to ensure that our covariance matrix remains positive definite.\ndata {\n int<lower=1> N;\n array[N] real x;\n vector[N] y;\n}\ntransformed data {\n real delta = 1e-9;\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n vector[N] eta;\n}\nmodel {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n eta ~ std_normal();\n\n y ~ normal(f, sigma);\n}\nTwo differences between the latent variable GP and the marginal likelihood GP are worth noting. The first is that we have augmented our parameter block with a new parameter vector of length \\(N\\) called eta. This is used in the model block to generate a multivariate normal vector called \\(f\\), corresponding to the latent GP. We put a \\(\\textsf{normal}(0,1)\\) prior on eta like we did in the Cholesky-parameterized GP in the simulation section. The second difference is that although we could code the distribution statement for \\(y\\) with one \\(N\\)-dimensional multivariate normal with an identity covariance matrix multiplied by \\(\\sigma^2\\), we instead use vectorized univariate normal distribution, which is equivalent but more efficient to use.\n\n\n\n\nGaussian processes can be generalized the same way as standard linear models by introducing a link function. This allows them to be used as discrete data models.\n\n\nIf we want to model count data, we can remove the \\(\\sigma\\) parameter, and use poisson_log, which implements a Poisson distribution with log link function, rather than normal. We can also add an overall mean parameter, \\(a\\), which will account for the marginal expected value for \\(y\\). We do this because we cannot center count data like we would for normally distributed data.\ndata {\n // ...\n array[N] int<lower=0> y;\n // ...\n}\n// ...\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real a;\n vector[N] eta;\n}\nmodel {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n a ~ std_normal();\n eta ~ std_normal();\n\n y ~ poisson_log(a + f);\n}\n\n\n\nFor computational reasons, we may want to integrate out the latent variable \\(f\\), as was done in the normal output model. Unfortunately, exact marginalization over \\(f\\) is not possible when the outcome model is not normal. Instead, we may perform approximate marginalization with an embedded Laplace approximation (Rasmussen and Williams 2006; Rue, Martino, and Chopin 2009; Margossian et al. 2020). To do so, we first use the function laplace_marginal to approximate the marginal likelihood \\(p(y \\mid \\rho, \\alpha, a)\\) and sample the hyperparameters with Hamiltonian Monte Carlo sampling. Then, we recover the integrated out \\(f\\) in the generated quantities block using laplace_latent_rng.\nThe embedded Laplace approximation computes a Gaussian approximation of the conditional posterior, \\[\n \\hat p_\\mathcal{L}(f \\mid \\rho, \\alpha, a, y) \\approx p(f \\mid \\rho, \\alpha, a, y),\n\\] where \\(\\hat p_\\mathcal{L}\\) is a Gaussian that matches the mode and curvature of \\(p(f \\mid \\rho, \\alpha, a, y)\\). We then obtain an approximation of the marginal likelihood as follows: \\[\n \\hat p_\\mathcal{L}(y \\mid \\rho, \\alpha, a)\n = \\frac{p(f^* \\mid \\alpha, \\rho) p(y \\mid f^*, a)}{\n \\hat p_\\mathcal{L}(f^* \\mid \\rho, \\alpha, a, y)},\n\\] where \\(f^*\\) is the mode of \\(p(f \\mid \\rho, \\alpha, a, y)\\), obtained via numerical optimization.\nTo use Stan’s embedded Laplace approximation, we must define the prior covariance function and the log likelihood function in the functions block.\nfunctions {\n // log likelihood function\n real ll_function(vector f, real a, array[] int y) {\n return poisson_log_lpmf(y | a + f);\n }\n\n // covariance function\n matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n return add_diag(K, delta)\n }\n\n}\nThe embedded Laplace relies on calculations of the log likelihood’s Hessian, \\(\\partial^2 \\log p(y \\mid f, a, \\rho, \\alpha) / \\partial f^2\\), and these calculations can be much faster when the Hessian is sparse. In particular, it is expected that the Hessian is block diagonal. In the transformed data block we can specify the block size of the Hessian.\ntransformed data {\n int hessian_block_size = 1;\n}\nFor example, if \\(y_i\\) depends only on \\(f_i\\), then the Hessian of the log likelihood is diagonal and the block size is 1. On the other hand, if the Hessian is not sparse, then we set the hessian block size to \\(N\\), where \\(N\\) is the dimension of \\(f\\). Currently, Stan does not check the block size of the Hessian and so the user is responsible for correctly specifying the block size.\nFinally, we increment target in the model block with the approximation to \\(\\log p(y \\mid \\rho, \\alpha, a)\\).\nmodel {\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n\n target += laplace_marginal(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\nNotice that we do not need to construct \\(f\\) explicitly, since it is marginalized out. Instead, we can recover the latent variables in generated quantities:\ngenerated quantities {\n vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\nUsers can set the control parameters of the embedded Laplace approximation, via laplace_marginal_tol and laplace_latent_tol_rng. When using these functions, the user must set all the control options and store them in a tuple. These control parameters mostly concern the numerical optimizer used to find the mode \\(f^*\\) of \\(p(f \\mid \\rho, \\alpha, a)\\).\ntransformed data {\n tuple(vector[N], real, int, int, int, int) laplace_ops;\n laplace_ops.1 = rep_vector(0, N); // starting point for Laplace optimizer\n laplace_ops.2 = 1.49e-8; // tolerance for optimizer\n laplace_ops.3 = 500; // maximum number of steps for optimizer.\n laplace_ops.4 = 1; // solver type being used.\n laplace_ops.5 = 1000; // max number of steps for linesearch.\n laplace_ops.6 = 1; // allow_fallback (1: TRUE, 0: FALSE)\nIf users want to depart from the defaults for only some of the control parameters, a tuple with the default values (as above) can be created with the helper callable generate_laplace_options(), and the specific control parameter can then be modified,\ntransformed data {\n tuple(vector[N], real, int, int, int, int, int) laplace_ops =\n generate_laplace_options(N);\n\n laplace_ops.2 = 1e-6; // make tolerance of the optimizer less strict.\n}\nThe tuple laplace_ops is then passed to laplace_marginal_tol and laplace_rng_tol.\nmodel {\n// ...\n\n target += laplace_marginal_tol(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta),\n laplace_ops);\n}\n\ngenerated quantities {\n vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta),\n laplace_ops);\n}\nStan also provides support for a limited menu of built-in functions, including the Poisson distribution with a log link and and prior mean \\(m\\). When using such a built-in function, the user does not need to specify a likelihood in the functions block. However, the user must strictly follow the signature of the likelihood: in this case, \\(m\\) must be a vector of length \\(N\\) (to allow for different offsets for each observation \\(y_i\\)) and we must indicate which element of \\(f\\) each component of \\(y\\) matches using the variable \\(y_\\text{index}\\). In our example, there is a simple pairing \\((y_i, f_i)\\), however we could imagine a scenario where multiple observations \\((y_{j1}, y_{j2}, ...)\\) are observed for a single \\(f_j\\).\ntransformed data {\n // ...\n array[n_obs] int y_index;\n for (i in 1:n_obs) y_index[i] = i - 1;\n}\n\n// ...\n\ntransformed parameter {\n vector[N] m = rep_vector(a, N);\n}\n\nmodel {\n // ...\n target += laplace_marginal_poisson_log_lpmf(y | y_index, m,\n cov_function, (rho, alpha, x, N, delta));\n}\n\ngenerated quantities {\n vector[N] f = laplace_latent_poisson_log_rng(y, y_index, m,\n cov_function, (rho, alpha, x, N, delta));\n}\nAs before, we could specify the control parameters for the embedded Laplace approximation using laplace_marginal_tol_poisson_log_lpmf and laplace_latent_tol_poisson_log_nrg.\nMarginalization with a Laplace approximation can lead to faster inference, however it also introduces an approximation error. In practice, this error is negligible when using a Poisson likelihood and the approximation works well for log concave likelihoods (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). Still, users should exercise caution, especially when trying unconventional likelihoods.\n\n\n\nFor binary classification problems, the observed outputs \\(z_n \\in\n\\{ 0,1 \\}\\) are binary. These outputs are modeled using a Gaussian process with (unobserved) outputs \\(y_n\\) through the logistic link, \\[\nz_n \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(y_n)),\n\\] or in other words, \\[\n\\Pr[z_n = 1] = \\operatorname{logit}^{-1}(y_n).\n\\]\nWe can extend our latent variable GP Stan program to deal with classification problems. Below a is the bias term, which can help account for imbalanced classes in the training data:\ndata {\n // ...\n array[N] int<lower=0, upper=1> z;\n // ...\n}\n// ...\nmodel {\n // ...\n z ~ bernoulli_logit(a + f);\n}\n\n\n\nAs with the Poisson GP, we cannot marginalize the latent variables exactly, however we can resort to an embedded Laplace approximation.\nfunctions {\n // log likelihood function\n real ll_function(vector f, real a, array[] int z) {\n return bernoulli_logit_lpmf(z | a + f);\n }\n\n // covariance function\n matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n return add_diag(K, delta)\n }\n}\n\n// ...\n\ntransformed data {\n int hessian_block_size = 1;\n}\n\n// ...\n\nmodel {\n target += laplace_marginal(ll_function, (a, z), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\n\ngenerated quantities {\n vector[N] f = laplace_latent_rng(ll_function, (a, z), hessian_block_size,\n cov_function, (rho, alpha, x, N, delta));\n}\nWhile marginalization with a Laplace approximation can lead to faster inference, it also introduces an approximation error. In practice, this error may not be negligible with a Bernoulli likelihood; for more discussion see, e.g. (Vehtari et al. 2016; Margossian et al. 2020).\n\n\n\n\nIf we have multivariate inputs \\(x \\in \\mathbb{R}^D\\), the squared exponential covariance function can be further generalized by fitting a scale parameter \\(\\rho_d\\) for each dimension \\(d\\), \\[\n k(x \\mid \\alpha, \\vec{\\rho}, \\sigma)_{i, j} = \\alpha^2 \\exp\n\\left(-\\dfrac{1}{2}\n\\sum_{d=1}^D \\dfrac{1}{\\rho_d^2} (x_{i,d} - x_{j,d})^2\n\\right)\n+ \\delta_{i, j}\\sigma^2.\n\\] The estimation of \\(\\rho\\) was termed “automatic relevance determination” by Neal (1996), but this is misleading, because the magnitude of the scale of the posterior for each \\(\\rho_d\\) is dependent on the scaling of the input data along dimension \\(d\\). Moreover, the scale of the parameters \\(\\rho_d\\) measures non-linearity along the \\(d\\)-th dimension, rather than “relevance” (Piironen and Vehtari 2016).\nA priori, the closer \\(\\rho_d\\) is to zero, the more nonlinear the conditional mean in dimension \\(d\\) is. A posteriori, the actual dependencies between \\(x\\) and \\(y\\) play a role. With one covariate \\(x_1\\) having a linear effect and another covariate \\(x_2\\) having a nonlinear effect, it is possible that \\(\\rho_1 > \\rho_2\\) even if the predictive relevance of \\(x_1\\) is higher (Rasmussen and Williams 2006, 80). The collection of \\(\\rho_d\\) (or \\(1/\\rho_d\\)) parameters can also be modeled hierarchically.\nThe implementation of automatic relevance determination is a straightforward extension of the one-dimensional case by modifying rho to be an array.\ndata {\n int<lower=1> N;\n int<lower=1> D;\n array[N] vector[D] x;\n vector[N] y;\n}\ntransformed data {\n real delta = 1e-9;\n}\nparameters {\n array[D] real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n vector[N] eta;\n}\nmodel {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n eta ~ std_normal();\n\n y ~ normal(f, sigma);\n}\n\n\n\nFormulating priors for GP hyperparameters requires the analyst to consider the inherent statistical properties of a GP, the GP’s purpose in the model, and the numerical issues that may arise in Stan when estimating a GP.\nPerhaps most importantly, the parameters \\(\\rho\\) and \\(\\alpha\\) are weakly identified (Zhang 2004). The ratio of the two parameters is well-identified, but in practice we put independent priors on the two hyperparameters because these two quantities are more interpretable than their ratio.\n\n\nGPs are a flexible class of priors and, as such, can represent a wide spectrum of functions. For length scales below the minimum spacing of the covariates the GP likelihood plateaus. Unless regularized by a prior, this flat likelihood induces considerable posterior mass at small length scales where the observation variance drops to zero and the functions supported by the GP begin to exactly interpolate between the input data. The resulting posterior not only significantly overfits to the input data, it also becomes hard to accurately sample using Euclidean HMC.\nWe may wish to put further soft constraints on the length-scale, but these are dependent on how the GP is used in our statistical model.\nIf our model consists of only the GP, i.e.: \\[\\begin{align*}\nf &\\sim \\textsf{multivariate normal}\\left(0, K(x \\mid \\alpha, \\rho)\\right) \\\\\ny_i &\\sim \\textsf{normal}(f_i, \\sigma) \\, \\forall i \\in \\{1, \\dots, N\\} \\\\\n & x \\in \\mathbb{R}^{N \\times D}, \\quad\n f \\in \\mathbb{R}^N\n\\end{align*}\\]\nwe likely don’t need constraints beyond penalizing small length-scales. We’d like to allow the GP prior to represent both high-frequency and low-frequency functions, so our prior should put non-negligible mass on both sets of functions. In this case, an inverse gamma, inv_gamma_lpdf in Stan’s language, will work well as it has a sharp left tail that puts negligible mass on infinitesimal length-scales, but a generous right tail, allowing for large length-scales. Inverse gamma priors will avoid infinitesimal length-scales because the density is zero at zero, so the posterior for length-scale will be pushed away from zero. An inverse gamma distribution is one of many zero-avoiding or boundary-avoiding distributions.2.\nIf we’re using the GP as a component in a larger model that includes an overall mean and fixed effects for the same variables we’re using as the domain for the GP, i.e.: \\[\\begin{align*}\nf &\\sim \\textsf{multivariate normal}\\big(0, K(x \\mid \\alpha, \\rho)\\big) \\\\\ny_i &\\sim \\textsf{normal}\\left(\\beta_0 + x_i \\beta_{[1:D]} + f_i, \\sigma\\right) \\, \\forall i\n \\in \\{1, \\dots, N\\} \\\\\n & x_i^T, \\beta_{[1:D]} \\in \\mathbb{R}^D,\\quad\n x \\in \\mathbb{R}^{N \\times D},\\quad\n f \\in \\mathbb{R}^N\n\\end{align*}\\]\nwe’ll likely want to constrain large length-scales as well. A length scale that is larger than the scale of the data yields a GP posterior that is practically linear (with respect to the particular covariate) and increasing the length scale has little impact on the likelihood. This will introduce nonidentifiability in our model, as both the fixed effects and the GP will explain similar variation. In order to limit the amount of overlap between the GP and the linear regression, we should use a prior with a sharper right tail to limit the GP to higher-frequency functions. We can use a generalized inverse Gaussian distribution: \\[\\begin{align*}\nf(x \\mid a, b, p) &= \\dfrac{\\left(a/b\\right)^{p/2}}{2K_p\\left(\\sqrt{ab}\\right)} x^{p - 1}\\exp\\big(-(ax + b\n / x)/2\\big) \\\\\n & x, a, b \\in \\mathbb{R}^{+},\\quad\n p \\in \\mathbb{Z}\n\\end{align*}\\]\nwhich has an inverse gamma left tail if \\(p \\leq 0\\) and an inverse Gaussian right tail. This has not yet been implemented in Stan’s math library, but it is possible to implement as a user defined function:\nfunctions {\n real generalized_inverse_gaussian_lpdf(real x, int p,\n real a, real b) {\n return p * 0.5 * log(a / b)\n - log(2 * modified_bessel_second_kind(p, sqrt(a * b)))\n + (p - 1) * log(x)\n - (a * x + b / x) * 0.5;\n }\n}\ndata {\n // ...\n}\nIf we have high-frequency covariates in our fixed effects, we may wish to further regularize the GP away from high-frequency functions, which means we’ll need to penalize smaller length-scales. Luckily, we have a useful way of thinking about how length-scale affects the frequency of the functions supported by the GP. If we were to repeatedly draw from a zero-mean GP with a length-scale of \\(\\rho\\) in a fixed-domain \\([0,T]\\), we would get a distribution for the number of times each draw of the GP crossed the zero axis. The expectation of this random variable, the number of zero crossings, is \\(T / \\pi\n\\rho\\). You can see that as \\(\\rho\\) decreases, the expectation of the number of upcrossings increases as the GP is representing higher-frequency functions. Thus, this is a good statistic to keep in mind when setting a lower-bound for our prior on length-scale in the presence of high-frequency covariates. However, this statistic is only valid for one-dimensional inputs.\n\n\n\nThe parameter \\(\\alpha\\) corresponds to how much of the variation is explained by the regression function and has a similar role to the prior variance for linear model weights. This means the prior can be the same as used in linear models, such as a half-\\(t\\) prior on \\(\\alpha\\).\nA half-\\(t\\) or half-Gaussian prior on alpha also has the benefit of putting nontrivial prior mass around zero. This allows the GP support the zero functions and allows the possibility that the GP won’t contribute to the conditional mean of the total output.\n\n\n\n\nSuppose for a given sequence of inputs \\(x\\) that the corresponding outputs \\(y\\) are observed. Given a new sequence of inputs \\(\\tilde{x}\\), the posterior predictive distribution of their labels is computed by sampling outputs \\(\\tilde{y}\\) according to \\[\np\\left(\\tilde{y} \\mid \\tilde{x},x,y\\right)\n\\ = \\\n\\frac{p\\left(\\tilde{y}, y \\mid \\tilde{x},x\\right)}\n {p(y \\mid x)}\n\\ \\propto \\\np\\left(\\tilde{y}, y \\mid \\tilde{x},x\\right).\n\\]\nA direct implementation in Stan defines a model in terms of the joint distribution of the observed \\(y\\) and unobserved \\(\\tilde{y}\\).\ndata {\n int<lower=1> N1;\n array[N1] real x1;\n vector[N1] y1;\n int<lower=1> N2;\n array[N2] real x2;\n}\ntransformed data {\n real delta = 1e-9;\n int<lower=1> N = N1 + N2;\n array[N] real x;\n for (n1 in 1:N1) {\n x[n1] = x1[n1];\n }\n for (n2 in 1:N2) {\n x[N1 + n2] = x2[n2];\n }\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n vector[N] eta;\n}\ntransformed parameters {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n}\nmodel {\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n eta ~ std_normal();\n\n y1 ~ normal(f[1:N1], sigma);\n}\ngenerated quantities {\n vector[N2] y2;\n for (n2 in 1:N2) {\n y2[n2] = normal_rng(f[N1 + n2], sigma);\n }\n}\nThe input vectors x1 and x2 are declared as data, as is the observed output vector y1. The unknown output vector y2, which corresponds to input vector x2, is declared in the generated quantities block and will be sampled when the model is executed.\nA transformed data block is used to combine the input vectors x1 and x2 into a single vector x.\nThe model block declares and defines a local variable for the combined output vector f, which consists of the concatenation of the conditional mean for known outputs y1 and unknown outputs y2. Thus the combined output vector f is aligned with the combined input vector x. All that is left is to define the univariate normal distribution statement for y.\nThe generated quantities block defines the quantity y2. We generate y2 by randomly generating N2 values from univariate normals with each mean corresponding to the appropriate element in f.\n\n\nWe can do predictive inference in non-Gaussian GPs in much the same way as we do with Gaussian GPs.\nConsider the following full model for prediction using logistic Gaussian process regression.\ndata {\n int<lower=1> N1;\n array[N1] real x1;\n array[N1] int<lower=0, upper=1> z1;\n int<lower=1> N2;\n array[N2] real x2;\n}\ntransformed data {\n real delta = 1e-9;\n int<lower=1> N = N1 + N2;\n array[N] real x;\n for (n1 in 1:N1) {\n x[n1] = x1[n1];\n }\n for (n2 in 1:N2) {\n x[N1 + n2] = x2[n2];\n }\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real a;\n vector[N] eta;\n}\ntransformed parameters {\n vector[N] f;\n {\n matrix[N, N] L_K;\n matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta;\n }\n}\nmodel {\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n a ~ std_normal();\n eta ~ std_normal();\n\n z1 ~ bernoulli_logit(a + f[1:N1]);\n}\ngenerated quantities {\n array[N2] int z2;\n for (n2 in 1:N2) {\n z2[n2] = bernoulli_logit_rng(a + f[N1 + n2]);\n }\n}\n\n\n\nBayesian predictive inference for Gaussian processes with Gaussian observations can be sped up by deriving the posterior analytically, then directly sampling from it.\nJumping straight to the result, \\[\np\\left(\\tilde{y} \\mid \\tilde{x},y,x\\right)\n=\n\\textsf{normal}\\left(K^{\\top}\\Sigma^{-1}y,\\\n \\Omega - K^{\\top}\\Sigma^{-1}K\\right),\n\\] where \\(\\Sigma = K(x \\mid \\alpha, \\rho, \\sigma)\\) is the result of applying the covariance function to the inputs \\(x\\) with observed outputs \\(y\\), \\(\\Omega =\nK(\\tilde{x} \\mid \\alpha, \\rho)\\) is the result of applying the covariance function to the inputs \\(\\tilde{x}\\) for which predictions are to be inferred, and \\(K\\) is the matrix of covariances between inputs \\(x\\) and \\(\\tilde{x}\\), which in the case of the exponentiated quadratic covariance function would be \\[\nK(x \\mid \\alpha, \\rho)_{i, j} = \\alpha^2 \\exp\\left(-\\dfrac{1}{2 \\rho^2}\n\\sum_{d=1}^D \\left(x_{i,d} - \\tilde{x}_{j,d}\\right)^2\\right).\n\\]\nThere is no noise term including \\(\\sigma^2\\) because the indexes of elements in \\(x\\) and \\(\\tilde{x}\\) are never the same.\nThis Stan code below uses the analytic form of the posterior and provides sampling of the resulting multivariate normal through the Cholesky decomposition. The data declaration is the same as for the latent variable example, but we’ve defined a function called gp_pred_rng which will generate a draw from the posterior predictive mean conditioned on observed data y1. The code uses a Cholesky decomposition in triangular solves in order to cut down on the number of matrix-matrix multiplications when computing the conditional mean and the conditional covariance of \\(p(\\tilde{y})\\).\nfunctions {\n vector gp_pred_rng(array[] real x2,\n vector y1,\n array[] real x1,\n real alpha,\n real rho,\n real sigma,\n real delta) {\n int N1 = rows(y1);\n int N2 = size(x2);\n vector[N2] f2;\n {\n matrix[N1, N1] L_K;\n vector[N1] K_div_y1;\n matrix[N1, N2] k_x1_x2;\n matrix[N1, N2] v_pred;\n vector[N2] f2_mu;\n matrix[N2, N2] cov_f2;\n matrix[N2, N2] diag_delta;\n matrix[N1, N1] K;\n K = gp_exp_quad_cov(x1, alpha, rho);\n for (n in 1:N1) {\n K[n, n] = K[n, n] + square(sigma);\n }\n L_K = cholesky_decompose(K);\n K_div_y1 = mdivide_left_tri_low(L_K, y1);\n K_div_y1 = mdivide_right_tri_low(K_div_y1', L_K)';\n k_x1_x2 = gp_exp_quad_cov(x1, x2, alpha, rho);\n f2_mu = (k_x1_x2' * K_div_y1);\n v_pred = mdivide_left_tri_low(L_K, k_x1_x2);\n cov_f2 = gp_exp_quad_cov(x2, alpha, rho) - v_pred' * v_pred;\n diag_delta = diag_matrix(rep_vector(delta, N2));\n\n f2 = multi_normal_rng(f2_mu, cov_f2 + diag_delta);\n }\n return f2;\n }\n}\ndata {\n int<lower=1> N1;\n array[N1] real x1;\n vector[N1] y1;\n int<lower=1> N2;\n array[N2] real x2;\n}\ntransformed data {\n vector[N1] mu = rep_vector(0, N1);\n real delta = 1e-9;\n}\nparameters {\n real<lower=0> rho;\n real<lower=0> alpha;\n real<lower=0> sigma;\n}\nmodel {\n matrix[N1, N1] L_K;\n {\n matrix[N1, N1] K = gp_exp_quad_cov(x1, alpha, rho);\n real sq_sigma = square(sigma);\n\n // diagonal elements\n for (n1 in 1:N1) {\n K[n1, n1] = K[n1, n1] + sq_sigma;\n }\n\n L_K = cholesky_decompose(K);\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n\n y1 ~ multi_normal_cholesky(mu, L_K);\n}\ngenerated quantities {\n vector[N2] f2;\n vector[N2] y2;\n\n f2 = gp_pred_rng(x2, y1, x1, alpha, rho, sigma, delta);\n for (n2 in 1:N2) {\n y2[n2] = normal_rng(f2[n2], sigma);\n }\n}\n\n\n\n\nSuppose we have observations \\(y_i \\in \\mathbb{R}^M\\) observed at \\(x_i \\in \\mathbb{R}^K\\). One can model the data like so: \\[\\begin{align*}\ny_i &\\sim \\textsf{multivariate normal}\\left(f(x_i), \\textbf{I}_M \\sigma^2\\right) \\\\\nf(x) &\\sim \\textsf{GP}\\big(m(x), K(x \\mid \\theta, \\phi)\\big) \\\\\n & K(x \\mid \\theta) \\in \\mathbb{R}^{M \\times M}, \\quad\n f(x), m(x) \\in \\mathbb{R}^M\n\\end{align*}\\] where the \\(K(x, x^\\prime \\mid \\theta, \\phi)_{[m, m^\\prime]}\\) entry defines the covariance between \\(f_m(x)\\) and \\(f_{m^\\prime}(x^\\prime)(x)\\). This construction of Gaussian processes allows us to learn the covariance between the output dimensions of \\(f(x)\\). If we parameterize our kernel \\(K\\): \\[\nK(x, x^\\prime \\mid \\theta, \\phi)_{[m, m^\\prime]} = k\\left(x, x^\\prime \\mid\n\\theta\\right) k\\left(m, m^\\prime \\mid \\phi\\right)\n\\] then our finite dimensional generative model for the above is: \\[\\begin{align*}\nf &\\sim \\textsf{matrixnormal}\\big(m(x), K(x \\mid \\alpha, \\rho), C(\\phi)\\big) \\\\\ny_{i, m} &\\sim \\textsf{normal}(f_{i,m}, \\sigma) \\\\\nf &\\in \\mathbb{R}^{N \\times M}\n\\end{align*}\\] where \\(K(x \\mid \\alpha, \\rho)\\) is the exponentiated quadratic kernel we’ve used throughout this chapter, and \\(C(\\phi)\\) is a positive-definite matrix, parameterized by some vector \\(\\phi\\).\nThe matrix normal distribution has two covariance matrices: \\(K(x \\mid\n\\alpha, \\rho)\\) to encode column covariance, and \\(C(\\phi)\\) to define row covariance. The salient features of the matrix normal are that the rows of the matrix \\(f\\) are distributed: \\[\nf_{[n,]} \\sim \\textsf{multivariate normal}\\big(m(x)_{[n,]}, K(x \\mid \\alpha,\n\\rho)_{[n,n]} C(\\phi)\\big)\n\\] and that the columns of the matrix \\(f\\) are distributed: \\[\nf_{[,m]} \\sim \\textsf{multivariate normal}\\big(m(x)_{[,m]}, K(x\n \\mid \\alpha, \\rho) C(\\phi)_{[m,m]}\\big)\n\\] This also means means that \\(\\mathbb{E}\\left[f^T f\\right]\\) is equal to \\(\\operatorname{trace}\\!\\big(K(x \\mid \\alpha, \\rho)\\big) \\times C\\), whereas \\(\\mathbb{E}\\left[ff^T\\right]\\) is \\(\\operatorname{trace}(C) \\times K(x \\mid \\alpha, \\rho)\\). We can derive this using properties of expectation and the matrix normal density.\nWe should set \\(\\alpha\\) to \\(1.0\\) because the parameter is not identified unless we constrain \\(\\operatorname{trace}(C) = 1\\). Otherwise, we can multiply \\(\\alpha\\) by a scalar \\(d\\) and \\(C\\) by \\(1/d\\) and our likelihood will not change.\nWe can generate a random variable \\(f\\) from a matrix normal density in \\(\\mathbb{R}^{N \\times M}\\) using the following algorithm: \\[\\begin{align*}\n\\eta_{i,j} &\\sim \\textsf{normal}(0, 1) \\, \\forall i,j \\\\\nf &= L_{K(x \\mid 1.0, \\rho)} \\, \\eta \\, L_C(\\phi)^T \\\\\nf &\\sim \\textsf{matrixnormal}\\big(0, K(x \\mid 1.0, \\rho), C(\\phi)\\big) \\\\\n\\eta &\\in \\mathbb{R}^{N \\times M} \\\\\nL_C(\\phi) &= \\texttt{cholesky}\\mathtt{\\_}\\texttt{decompose}\\big(C(\\phi)\\big) \\\\\nL_{K(x \\mid 1.0, \\rho)} &= \\texttt{cholesky}\\mathtt{\\_}\\texttt{decompose}\\big(K(x \\mid 1.0, \\rho)\\big)\n\\end{align*}\\]\nThis can be implemented in Stan using a latent-variable GP formulation. We’ve used \\(\\textsf{LKJCorr}\\) for \\(C(\\phi)\\), but any positive-definite matrix will do.\ndata {\n int<lower=1> N;\n int<lower=1> D;\n array[N] real x;\n matrix[N, D] y;\n}\ntransformed data {\n real delta = 1e-9;\n}\nparameters {\n real<lower=0> rho;\n vector<lower=0>[D] alpha;\n real<lower=0> sigma;\n cholesky_factor_corr[D] L_Omega;\n matrix[N, D] eta;\n}\nmodel {\n matrix[N, D] f;\n {\n matrix[N, N] K = gp_exp_quad_cov(x, 1.0, rho);\n matrix[N, N] L_K;\n\n // diagonal elements\n for (n in 1:N) {\n K[n, n] = K[n, n] + delta;\n }\n\n L_K = cholesky_decompose(K);\n f = L_K * eta\n * diag_pre_multiply(alpha, L_Omega)';\n }\n\n rho ~ inv_gamma(5, 5);\n alpha ~ std_normal();\n sigma ~ std_normal();\n L_Omega ~ lkj_corr_cholesky(3);\n to_vector(eta) ~ std_normal();\n\n to_vector(y) ~ normal(to_vector(f), sigma);\n}\ngenerated quantities {\n matrix[D, D] Omega;\n Omega = L_Omega * L_Omega';\n}", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Gaussian Processes" + ] + }, + { + "objectID": "stan-users-guide/gaussian-processes.html#footnotes", + "href": "stan-users-guide/gaussian-processes.html#footnotes", + "title": "Gaussian Processes", + "section": "Footnotes", + "text": "Footnotes\n\n\nGaussian processes can be extended to covariance functions producing positive semi-definite matrices, but Stan does not support inference in the resulting models because the resulting distribution does not have unconstrained support.↩︎\nA boundary-avoiding prior is just one where the limit of the density is zero at the boundary, the result of which is estimates that are pushed away from the boundary.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Gaussian Processes" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html", + "href": "stan-users-guide/floating-point.html", + "title": "Floating Point Arithmetic", + "section": "", + "text": "Computers approximate real values in \\(\\mathbb{R}\\) using a fixed number of bits. This chapter explains how this is done and why it is important for writing robust Stan (and other numerical) programs. The subfield of computer science devoted to studying how real arithmetic works on computers is called numerical analysis.\n\n\nStan’s arithmetic is implemented using double-precision arithmetic. The behavior of most1 modern computers follows the floating-point arithmetic, IEEE Standard for Floating-Point Arithmetic (IEEE 754).\n\n\nThe double-precision component of the IEEE 754 standard specifies the representation of real values using a fixed pattern of 64 bits (8 bytes). All values are represented in base two (i.e., binary). The representation is divided into two signed components:\n\nsignificand (53 bits): base value representing significant digits\nexponent (11 bits): power of two multiplied by the base\n\nThe value of a finite floating point number is\n\\[\nv = (-1)^s \\times c \\, 2^q\n\\]\n\n\n\nA normal floating-point value does not use any leading zeros in its significand; subnormal numbers may use leading zeros. Not all I/O systems support subnormal numbers.\n\n\n\nThere are some reserved exponent values so that legal exponent values range between\\(-(2^{10}) + 2 = -1022\\) and \\(2^{10} - 1 = 1023\\). Legal significand values are between \\(-2^{52}\\) and \\(2^{52} - 1\\). Floating point allows the representation of both really big and really small values. Some extreme values are\n\nlargest normal finite number: \\(\\approx 1.8 \\times 10^{308}\\)\nlargest subnormal finite number: \\(\\approx 2.2 \\times 10^{308}\\)\nsmallest positive normal number: \\(\\approx 2.2 \\times 10^{-308}\\)\nsmallest positive subnormal number: \\(\\approx 4.9 \\times 10^{-324}\\)\n\n\n\n\nBecause of the sign bit, there are two ways to represent zero, often called “positive zero” and “negative zero”. This distinction is irrelevant in Stan (as it is in R), because the two values are equal (i.e., 0 == -0 evaluates to true).\n\n\n\nA specially chosen bit pattern is used for the not-a-number value (often written as NaN in programming language output, including Stan’s).\nStan provides a value function not_a_number() that returns this special not-a-number value. It is meant to represent error conditions, not missing values. Usually when not-a-number is an argument to a function, the result will not-a-number if an exception (a rejection in Stan) is not raised.\nStan also provides a test function is_nan(x) that returns 1 if x is not-a-number and 0 otherwise.\nNot-a-number values propagate under almost all mathematical operations. For example, all of the built-in binary arithmetic operations (addition, subtraction, multiplication, division, negation) return not-a-number if any of their arguments are not-a-number. The built-in functions such as log and exp have the same behavior, propagating not-a-number values.\nMost of Stan’s built-in functions will throw exceptions (i.e., reject) when any of their arguments is not-a-number.\nComparisons with not-a-number always return false, up to and including comparison with itself. That is, not_a_number() == not_a_number() somewhat confusingly returns false. That is why there is a built-in is_nan() function in Stan (and in C++). The only exception is negation, which remains coherent. This means not_a_number() != not_a_number() returns true.\nUndefined operations often return not-a-number values. For example, sqrt(-1) will evaluate to not-a-number.\n\n\n\nThere are also two special values representing positive infinity (\\(\\infty)\\) and negative infinity (\\(-\\infty\\)). These are not as pathological as not-a-number, but are often used to represent error conditions such as overflow and underflow. For example, rather than raising an error or returning not-a-number, log(0) evaluates to negative infinity. Exponentiating negative infinity leads back to zero, so that 0 == exp(log(0)). Nevertheless, this should not be done in Stan because the chain rule used to calculate the derivatives will attempt illegal operations and return not-a-number.\nThere are value functions positive_infinity() and negative_infinity() as well as a test function is_inf().\nPositive and negative infinity have the expected comparison behavior, so that negative_infinity() < 0 evaluates to true (represented with 1 in Stan). Also, negating positive infinity leads to negative infinity and vice-versa.\nPositive infinity added to either itself or a finite value produces positive infinity. Negative infinity behaves the same way. However, attempts to subtract positive infinity from itself produce not-a-number, not zero. Similarly, attempts to divide infinite values results in a not-a-number value.\n\n\n\n\nIn programming languages such as Stan, numbers may be represented in standard decimal (base 10) notation. For example, 2.39 or -1567846.276452. Remember there is no point in writing more than 16 significant digits as they cannot be represented. A number may be coded in Stan using scientific notation, which consists of a signed decimal representation of a base and a signed integer decimal exponent. For example, 36.29e-3 represents the number \\(36.29 \\times\n10^{-3}\\), which is the same number as is represented by 0.03629.\n\n\n\nThe choice of significand provides \\(\\log_{10} 2^{53} \\approx 15.95\\) decimal (base 10) digits of arithmetic precision. This is just the precision of the floating-point representation. After several operations are chained together, the realized arithmetic precision is often much lower.\n\n\nIn practice, the finite amount of arithmetic precision leads to rounding, whereby a number is represented by the closest floating-point number. For example, with only 16 decimal digits of accuracy,\n1 + 1e-20 == 1\nThe closest floating point number to \\(1 + 10^{-20}\\) turns out to be \\(1\\) itself. By contrast,\n0 + 1e-20 == 1e-20\nThis highlights the fact that precision depends on scale. Even though 1 + 1e-20 == 1, we have 1e-20 + 1e-20 == 2e-20, as expected.\nRounding also manifests itself in a lack of transitivity. In particular, it does not usually hold for three floating point numbers \\(a, b, c\\) that \\((a + b) + c = a + (b + c)\\).\nIn statistical applications, problems often manifest in situations where users expect the usual rules of real-valued arithmetic to hold. Suppose we have a lower triangular matrix \\(L\\) with strictly positive diagonal, so that it is the Cholesky factor of a positive-definite matrix \\(L \\, L^{\\top}\\). In practice, rounding and loss of precision may render the result \\(L \\, L^{\\top}\\) neither symmetric nor positive definite.\nIn practice, care must be taken to defend against rounding. For example, symmetry may be produced by adding \\(L \\, L^{\\top}\\) with its transpose and dividing by two, or by copying the lower triangular portion into the upper portion. Positive definiteness may be maintained by adding a small quantity to the diagonal.\n\n\n\nThe smallest number greater than zero is roughly \\(0 + 10^{-323}\\). The largest number less than one is roughly \\(1 - 10^{-15.95}\\). The asymmetry is apparent when considering the representation of that largest number smaller than one—the exponent is of no help, and the number is represented as the binary equivalent of \\(0.9999999999999999\\).\nFor this reason, the machine precision is said to be roughly \\(10^{-15.95}\\). This constant is available as machine_precision() in Stan.\n\n\n\nSpecial operations are available to mitigate this problem with numbers rounding when they get close to one. For example, consider the operation log(1 + x) for positive x. When x is small (less than \\(10^{-16}\\) for double-precision floating point), the sum in the argument will round to 1 and the result will round to zero. To allow more granularity, programming languages provide a library function directly implementing \\(f(x) = \\log (1 + x)\\). In Stan (as in C++), this operation is written as log1p(x). Because x itself may be close to zero, the function log1p(x) can take the logarithm of values very close to one, the results of which are close to zero.\nSimilarly, the complementary cumulative distribution functions (CCDF), defined by \\(F^{\\complement}_Y(y) = 1 - F_Y(y)\\), where \\(F_Y\\) is the cumulative distribution function (CDF) for the random variable \\(Y\\). This allows values very close to one to be represented in complementary form.\n\n\n\nAnother downside to floating point representations is that subtraction of two numbers close to each other results in a loss of precision that depends on how close they are. This is easy to see in practice. Consider \\[\\begin{align*}\n 1&.23456789012345 \\\\\n- 1&.23456789012344 \\\\\n= 0&.00000000000001\n\\end{align*}\\] We start with fifteen decimal places of accuracy in the arguments and are left with a single decimal place of accuracy in the result.\nCatastrophic cancellation arises in statistical computations whenever we calculate variance for a distribution with small standard deviations relative to its location. When calculating summary statistics, Stan uses Welford’s algorithm for computing variances. This avoids catastrophic cancellation and may also be carried out in a single pass.\n\n\n\nEven though 1e200 may be represented as a double precision floating point value, there is no finite value large enough to represent 1e200 * 1e200. The result of 1e200 * 1e200 is said to overflow. The IEEE 754 standard requires the result to be positive infinity.\nOverflow is rarely a problem in statistical computations. If it is, it’s possible to work on the log scale, just as for underflow as described below.\n\n\n\nWhen there is no number small enough to represent a result, it is said to underflow. For instance, 1e-200 may be represented, but 1e-200 * 1e-200 underflows so that the result is zero.\nUnderflow is a ubiquitous problem in likelihood calculations, For example, if \\(p(y_n \\mid \\theta) < 0.1\\), then \\[\np(y \\mid \\theta) = \\prod_{n=1}^N p(y_n \\mid \\theta)\n\\] will underflow as soon as \\(N > 350\\) or so.\nTo deal with underflow, work on the log scale. Even though \\(p(y \\mid\n\\theta)\\) can’t be represented, there is no problem representing \\[\n\\begin{array}{rcl}\n\\log p(y \\mid \\theta)\n& = & \\log \\prod_{n=1}^N p(y_n \\mid \\theta)\n\\\\[4pt]\n& = & \\sum_{n = 1}^N \\log p(y_n \\mid \\theta)\n\\end{array}\n\\]\nThis is why all of Stan’s probability functions operate on the log scale.\n\n\n\n\nWorking on the log scale, multiplication is converted to addition, \\[\n\\log (a \\cdot b) = \\log a + \\log b.\n\\] Thus sequences of multiplication operations can remain on the log scale. But what about addition? Given \\(\\log a\\) and \\(\\log b\\), how do we get \\(\\log (a + b)\\)? Working out the algebra, \\[\n\\log (a + b)\n=\n\\log (\\exp(\\log a) + \\exp(\\log b)).\n\\]\n\n\nThe nested log of sum of exponentials is so common, it has its own name, “log-sum-exp”, \\[\n\\textrm{log-sum-exp}(u, v)\n=\n\\log (\\exp(u) + \\exp(v)).\n\\] so that \\[\n\\log (a + b)\n=\n\\textrm{log-sum-exp}(\\log a, \\log b).\n\\]\nAlthough it appears this might overflow as soon as exponentiation is introduced, evaluation does not proceed by evaluating the terms as written. Instead, with a little algebra, the terms are rearranged into a stable form, \\[\n\\textrm{log-sum-exp}(u, v)\n=\n\\max(u, v) + \\log\\big( \\exp(u - \\max(u, v)) + \\exp(v - \\max(u, v)) \\big).\n\\]\nBecause the terms inside the exponentiations are \\(u - \\max(u, v)\\) and \\(v - \\max(u, v)\\), one will be zero and the other will be negative. Because the operation is symmetric, it may be assumed without loss of generality that \\(u \\geq v\\), so that \\[\n\\textrm{log-sum-exp}(u, v) = u + \\log\\big(1 + \\exp(v - u)\\big).\n\\]\nAlthough the inner term may itself be evaluated using the built-in function log1p, there is only limited gain because \\(\\exp(v - u)\\) is only near zero when \\(u\\) is much larger than \\(v\\), meaning the final result is likely to round to \\(u\\) anyway.\nTo conclude, when evaluating \\(\\log (a + b)\\) given \\(\\log a\\) and \\(\\log\nb\\), and assuming \\(\\log a > \\log b\\), return\n\\[\n\\log (a + b) =\n\\log a + \\textrm{log1p}\\big(\\exp(\\log b - \\log a)\\big).\n\\]\n\n\n\nThe log sum of exponentials function may be generalized to sequences in the obvious way, so that if \\(v = v_1, \\ldots, v_N\\), then \\[\\begin{eqnarray*}\n\\textrm{log-sum-exp}(v)\n& = & \\log \\sum_{n = 1}^N \\exp(v_n)\n\\\\[4pt]\n& = & \\max(v) + \\log \\sum_{n = 1}^N \\exp(v_n - \\max(v)).\n\\end{eqnarray*}\\] The exponent cannot overflow because its argument is either zero or negative. This form makes it easy to calculate \\(\\log (u_1 + \\cdots + u_N)\\) given only \\(\\log u_n\\).\n\n\n\nAn immediate application is to computing the mean of a vector \\(u\\) entirely on the log scale. That is, given \\(\\log u\\) and returning \\(\\log \\textrm{mean}(u)\\). \\[\\begin{eqnarray*}\n\\log \\left( \\frac{1}{N} \\sum_{n = 1}^N u_n \\right)\n& = & \\log \\frac{1}{N} + \\log \\sum_{n = 1}^N \\exp(\\log u_n)\n\\\\[4pt]\n& = & -\\log N + \\textrm{log-sum-exp}(\\log u).\n\\end{eqnarray*}\\] where \\(\\log u = (\\log u_1, \\ldots, \\log u_N)\\) is understood elementwise.\n\n\n\n\nBecause floating-point representations are inexact, it is rarely a good idea to test exact inequality. The general recommendation is that rather than testing x == y, an approximate test may be used given an absolute or relative tolerance.\nGiven a positive absolute tolerance of epsilon, x can be compared to y using the conditional\nabs(x - y) <= epsilon.\nAbsolute tolerances work when the scale of x and y and the relevant comparison is known.\nGiven a positive relative tolerance of epsilon, a typical comparison is\n2 * abs(x - y) / (abs(x) + abs(y)) <= epsilon.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html#floating-point-representations", + "href": "stan-users-guide/floating-point.html#floating-point-representations", + "title": "Floating Point Arithmetic", + "section": "", + "text": "Stan’s arithmetic is implemented using double-precision arithmetic. The behavior of most1 modern computers follows the floating-point arithmetic, IEEE Standard for Floating-Point Arithmetic (IEEE 754).\n\n\nThe double-precision component of the IEEE 754 standard specifies the representation of real values using a fixed pattern of 64 bits (8 bytes). All values are represented in base two (i.e., binary). The representation is divided into two signed components:\n\nsignificand (53 bits): base value representing significant digits\nexponent (11 bits): power of two multiplied by the base\n\nThe value of a finite floating point number is\n\\[\nv = (-1)^s \\times c \\, 2^q\n\\]\n\n\n\nA normal floating-point value does not use any leading zeros in its significand; subnormal numbers may use leading zeros. Not all I/O systems support subnormal numbers.\n\n\n\nThere are some reserved exponent values so that legal exponent values range between\\(-(2^{10}) + 2 = -1022\\) and \\(2^{10} - 1 = 1023\\). Legal significand values are between \\(-2^{52}\\) and \\(2^{52} - 1\\). Floating point allows the representation of both really big and really small values. Some extreme values are\n\nlargest normal finite number: \\(\\approx 1.8 \\times 10^{308}\\)\nlargest subnormal finite number: \\(\\approx 2.2 \\times 10^{308}\\)\nsmallest positive normal number: \\(\\approx 2.2 \\times 10^{-308}\\)\nsmallest positive subnormal number: \\(\\approx 4.9 \\times 10^{-324}\\)\n\n\n\n\nBecause of the sign bit, there are two ways to represent zero, often called “positive zero” and “negative zero”. This distinction is irrelevant in Stan (as it is in R), because the two values are equal (i.e., 0 == -0 evaluates to true).\n\n\n\nA specially chosen bit pattern is used for the not-a-number value (often written as NaN in programming language output, including Stan’s).\nStan provides a value function not_a_number() that returns this special not-a-number value. It is meant to represent error conditions, not missing values. Usually when not-a-number is an argument to a function, the result will not-a-number if an exception (a rejection in Stan) is not raised.\nStan also provides a test function is_nan(x) that returns 1 if x is not-a-number and 0 otherwise.\nNot-a-number values propagate under almost all mathematical operations. For example, all of the built-in binary arithmetic operations (addition, subtraction, multiplication, division, negation) return not-a-number if any of their arguments are not-a-number. The built-in functions such as log and exp have the same behavior, propagating not-a-number values.\nMost of Stan’s built-in functions will throw exceptions (i.e., reject) when any of their arguments is not-a-number.\nComparisons with not-a-number always return false, up to and including comparison with itself. That is, not_a_number() == not_a_number() somewhat confusingly returns false. That is why there is a built-in is_nan() function in Stan (and in C++). The only exception is negation, which remains coherent. This means not_a_number() != not_a_number() returns true.\nUndefined operations often return not-a-number values. For example, sqrt(-1) will evaluate to not-a-number.\n\n\n\nThere are also two special values representing positive infinity (\\(\\infty)\\) and negative infinity (\\(-\\infty\\)). These are not as pathological as not-a-number, but are often used to represent error conditions such as overflow and underflow. For example, rather than raising an error or returning not-a-number, log(0) evaluates to negative infinity. Exponentiating negative infinity leads back to zero, so that 0 == exp(log(0)). Nevertheless, this should not be done in Stan because the chain rule used to calculate the derivatives will attempt illegal operations and return not-a-number.\nThere are value functions positive_infinity() and negative_infinity() as well as a test function is_inf().\nPositive and negative infinity have the expected comparison behavior, so that negative_infinity() < 0 evaluates to true (represented with 1 in Stan). Also, negating positive infinity leads to negative infinity and vice-versa.\nPositive infinity added to either itself or a finite value produces positive infinity. Negative infinity behaves the same way. However, attempts to subtract positive infinity from itself produce not-a-number, not zero. Similarly, attempts to divide infinite values results in a not-a-number value.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html#literals-decimal-and-scientific-notation", + "href": "stan-users-guide/floating-point.html#literals-decimal-and-scientific-notation", + "title": "Floating Point Arithmetic", + "section": "", + "text": "In programming languages such as Stan, numbers may be represented in standard decimal (base 10) notation. For example, 2.39 or -1567846.276452. Remember there is no point in writing more than 16 significant digits as they cannot be represented. A number may be coded in Stan using scientific notation, which consists of a signed decimal representation of a base and a signed integer decimal exponent. For example, 36.29e-3 represents the number \\(36.29 \\times\n10^{-3}\\), which is the same number as is represented by 0.03629.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html#arithmetic-precision", + "href": "stan-users-guide/floating-point.html#arithmetic-precision", + "title": "Floating Point Arithmetic", + "section": "", + "text": "The choice of significand provides \\(\\log_{10} 2^{53} \\approx 15.95\\) decimal (base 10) digits of arithmetic precision. This is just the precision of the floating-point representation. After several operations are chained together, the realized arithmetic precision is often much lower.\n\n\nIn practice, the finite amount of arithmetic precision leads to rounding, whereby a number is represented by the closest floating-point number. For example, with only 16 decimal digits of accuracy,\n1 + 1e-20 == 1\nThe closest floating point number to \\(1 + 10^{-20}\\) turns out to be \\(1\\) itself. By contrast,\n0 + 1e-20 == 1e-20\nThis highlights the fact that precision depends on scale. Even though 1 + 1e-20 == 1, we have 1e-20 + 1e-20 == 2e-20, as expected.\nRounding also manifests itself in a lack of transitivity. In particular, it does not usually hold for three floating point numbers \\(a, b, c\\) that \\((a + b) + c = a + (b + c)\\).\nIn statistical applications, problems often manifest in situations where users expect the usual rules of real-valued arithmetic to hold. Suppose we have a lower triangular matrix \\(L\\) with strictly positive diagonal, so that it is the Cholesky factor of a positive-definite matrix \\(L \\, L^{\\top}\\). In practice, rounding and loss of precision may render the result \\(L \\, L^{\\top}\\) neither symmetric nor positive definite.\nIn practice, care must be taken to defend against rounding. For example, symmetry may be produced by adding \\(L \\, L^{\\top}\\) with its transpose and dividing by two, or by copying the lower triangular portion into the upper portion. Positive definiteness may be maintained by adding a small quantity to the diagonal.\n\n\n\nThe smallest number greater than zero is roughly \\(0 + 10^{-323}\\). The largest number less than one is roughly \\(1 - 10^{-15.95}\\). The asymmetry is apparent when considering the representation of that largest number smaller than one—the exponent is of no help, and the number is represented as the binary equivalent of \\(0.9999999999999999\\).\nFor this reason, the machine precision is said to be roughly \\(10^{-15.95}\\). This constant is available as machine_precision() in Stan.\n\n\n\nSpecial operations are available to mitigate this problem with numbers rounding when they get close to one. For example, consider the operation log(1 + x) for positive x. When x is small (less than \\(10^{-16}\\) for double-precision floating point), the sum in the argument will round to 1 and the result will round to zero. To allow more granularity, programming languages provide a library function directly implementing \\(f(x) = \\log (1 + x)\\). In Stan (as in C++), this operation is written as log1p(x). Because x itself may be close to zero, the function log1p(x) can take the logarithm of values very close to one, the results of which are close to zero.\nSimilarly, the complementary cumulative distribution functions (CCDF), defined by \\(F^{\\complement}_Y(y) = 1 - F_Y(y)\\), where \\(F_Y\\) is the cumulative distribution function (CDF) for the random variable \\(Y\\). This allows values very close to one to be represented in complementary form.\n\n\n\nAnother downside to floating point representations is that subtraction of two numbers close to each other results in a loss of precision that depends on how close they are. This is easy to see in practice. Consider \\[\\begin{align*}\n 1&.23456789012345 \\\\\n- 1&.23456789012344 \\\\\n= 0&.00000000000001\n\\end{align*}\\] We start with fifteen decimal places of accuracy in the arguments and are left with a single decimal place of accuracy in the result.\nCatastrophic cancellation arises in statistical computations whenever we calculate variance for a distribution with small standard deviations relative to its location. When calculating summary statistics, Stan uses Welford’s algorithm for computing variances. This avoids catastrophic cancellation and may also be carried out in a single pass.\n\n\n\nEven though 1e200 may be represented as a double precision floating point value, there is no finite value large enough to represent 1e200 * 1e200. The result of 1e200 * 1e200 is said to overflow. The IEEE 754 standard requires the result to be positive infinity.\nOverflow is rarely a problem in statistical computations. If it is, it’s possible to work on the log scale, just as for underflow as described below.\n\n\n\nWhen there is no number small enough to represent a result, it is said to underflow. For instance, 1e-200 may be represented, but 1e-200 * 1e-200 underflows so that the result is zero.\nUnderflow is a ubiquitous problem in likelihood calculations, For example, if \\(p(y_n \\mid \\theta) < 0.1\\), then \\[\np(y \\mid \\theta) = \\prod_{n=1}^N p(y_n \\mid \\theta)\n\\] will underflow as soon as \\(N > 350\\) or so.\nTo deal with underflow, work on the log scale. Even though \\(p(y \\mid\n\\theta)\\) can’t be represented, there is no problem representing \\[\n\\begin{array}{rcl}\n\\log p(y \\mid \\theta)\n& = & \\log \\prod_{n=1}^N p(y_n \\mid \\theta)\n\\\\[4pt]\n& = & \\sum_{n = 1}^N \\log p(y_n \\mid \\theta)\n\\end{array}\n\\]\nThis is why all of Stan’s probability functions operate on the log scale.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html#log-sum-of-exponentials", + "href": "stan-users-guide/floating-point.html#log-sum-of-exponentials", + "title": "Floating Point Arithmetic", + "section": "", + "text": "Working on the log scale, multiplication is converted to addition, \\[\n\\log (a \\cdot b) = \\log a + \\log b.\n\\] Thus sequences of multiplication operations can remain on the log scale. But what about addition? Given \\(\\log a\\) and \\(\\log b\\), how do we get \\(\\log (a + b)\\)? Working out the algebra, \\[\n\\log (a + b)\n=\n\\log (\\exp(\\log a) + \\exp(\\log b)).\n\\]\n\n\nThe nested log of sum of exponentials is so common, it has its own name, “log-sum-exp”, \\[\n\\textrm{log-sum-exp}(u, v)\n=\n\\log (\\exp(u) + \\exp(v)).\n\\] so that \\[\n\\log (a + b)\n=\n\\textrm{log-sum-exp}(\\log a, \\log b).\n\\]\nAlthough it appears this might overflow as soon as exponentiation is introduced, evaluation does not proceed by evaluating the terms as written. Instead, with a little algebra, the terms are rearranged into a stable form, \\[\n\\textrm{log-sum-exp}(u, v)\n=\n\\max(u, v) + \\log\\big( \\exp(u - \\max(u, v)) + \\exp(v - \\max(u, v)) \\big).\n\\]\nBecause the terms inside the exponentiations are \\(u - \\max(u, v)\\) and \\(v - \\max(u, v)\\), one will be zero and the other will be negative. Because the operation is symmetric, it may be assumed without loss of generality that \\(u \\geq v\\), so that \\[\n\\textrm{log-sum-exp}(u, v) = u + \\log\\big(1 + \\exp(v - u)\\big).\n\\]\nAlthough the inner term may itself be evaluated using the built-in function log1p, there is only limited gain because \\(\\exp(v - u)\\) is only near zero when \\(u\\) is much larger than \\(v\\), meaning the final result is likely to round to \\(u\\) anyway.\nTo conclude, when evaluating \\(\\log (a + b)\\) given \\(\\log a\\) and \\(\\log\nb\\), and assuming \\(\\log a > \\log b\\), return\n\\[\n\\log (a + b) =\n\\log a + \\textrm{log1p}\\big(\\exp(\\log b - \\log a)\\big).\n\\]\n\n\n\nThe log sum of exponentials function may be generalized to sequences in the obvious way, so that if \\(v = v_1, \\ldots, v_N\\), then \\[\\begin{eqnarray*}\n\\textrm{log-sum-exp}(v)\n& = & \\log \\sum_{n = 1}^N \\exp(v_n)\n\\\\[4pt]\n& = & \\max(v) + \\log \\sum_{n = 1}^N \\exp(v_n - \\max(v)).\n\\end{eqnarray*}\\] The exponent cannot overflow because its argument is either zero or negative. This form makes it easy to calculate \\(\\log (u_1 + \\cdots + u_N)\\) given only \\(\\log u_n\\).\n\n\n\nAn immediate application is to computing the mean of a vector \\(u\\) entirely on the log scale. That is, given \\(\\log u\\) and returning \\(\\log \\textrm{mean}(u)\\). \\[\\begin{eqnarray*}\n\\log \\left( \\frac{1}{N} \\sum_{n = 1}^N u_n \\right)\n& = & \\log \\frac{1}{N} + \\log \\sum_{n = 1}^N \\exp(\\log u_n)\n\\\\[4pt]\n& = & -\\log N + \\textrm{log-sum-exp}(\\log u).\n\\end{eqnarray*}\\] where \\(\\log u = (\\log u_1, \\ldots, \\log u_N)\\) is understood elementwise.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html#comparing-floating-point-numbers", + "href": "stan-users-guide/floating-point.html#comparing-floating-point-numbers", + "title": "Floating Point Arithmetic", + "section": "", + "text": "Because floating-point representations are inexact, it is rarely a good idea to test exact inequality. The general recommendation is that rather than testing x == y, an approximate test may be used given an absolute or relative tolerance.\nGiven a positive absolute tolerance of epsilon, x can be compared to y using the conditional\nabs(x - y) <= epsilon.\nAbsolute tolerances work when the scale of x and y and the relevant comparison is known.\nGiven a positive relative tolerance of epsilon, a typical comparison is\n2 * abs(x - y) / (abs(x) + abs(y)) <= epsilon.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/floating-point.html#footnotes", + "href": "stan-users-guide/floating-point.html#footnotes", + "title": "Floating Point Arithmetic", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe notable exception is Intel’s optimizing compilers under certain optimization settings.↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Floating Point Arithmetic" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html", + "href": "stan-users-guide/efficiency-tuning.html", + "title": "Efficiency Tuning", + "section": "", + "text": "This chapter provides a grab bag of techniques for optimizing Stan code, including vectorization, sufficient statistics, and conjugacy. At a coarse level, efficiency involves both the amount of time required for a computation and the amount of memory required. For practical applied statistical modeling, we are mainly concerned with reducing wall time (how long a program takes as measured by a clock on the wall) and keeping memory requirements within available bounds.\n\n\nThe standard algorithm analyses in computer science measure efficiency asymptotically as a function of problem size (such as data, number of parameters, etc.) and typically do not consider constant additive factors like startup times or multiplicative factors like speed of operations. In practice, the constant factors are important; if run time can be cut in half or more, that’s a huge gain. This chapter focuses on both the constant factors involved in efficiency (such as using built-in matrix operations as opposed to naive loops) and on asymptotic efficiency factors (such as using linear algorithms instead of quadratic algorithms in loops).\n\n\n\nStan programs express models which are intrinsically statistical in nature. The algorithms applied to these models may or may not themselves be probabilistic. For example, given an initial value for parameters (which may itself be given deterministically or generated randomly), Stan’s optimization algorithm (L-BFGS) for penalized maximum likelihood estimation is purely deterministic. Stan’s sampling algorithms are based on Markov chain Monte Carlo algorithms, which are probabilistic by nature at every step. Stan’s variational inference algorithm (ADVI) is probabilistic despite being an optimization algorithm; the randomization lies in a nested Monte Carlo calculation for an expected gradient.\nWith probabilistic algorithms, there will be variation in run times (and maybe memory usage) based on the randomization involved. For example, by starting too far out in the tail, iterative algorithms underneath the hood, such as the solvers for ordinary differential equations, may take different numbers of steps. Ideally this variation will be limited; when there is a lot of variation it can be a sign that there is a problem with the model’s parameterization in a Stan program or with initialization.\nA well-behaved Stan program will have low variance between runs with different random initializations and differently seeded random number generators. But sometimes an algorithm can get stuck in one part of the posterior, typically due to high curvature. Such sticking almost always indicates the need to reparameterize the model. Just throwing away Markov chains with apparently poor behavior (slow, or stuck) can lead to bias in posterior estimates. This problem with getting stuck can often be overcome by lowering the initial step size to avoid getting stuck during adaptation and increasing the target acceptance rate in order to target a lower step size. This is because smaller step sizes allow Stan’s gradient-based algorithms to better follow the curvature in the density or penalized maximum likelihood being fit.\n\n\n\nThere is a difference between pure computational efficiency and statistical efficiency for Stan programs fit with sampling-based algorithms. Computational efficiency measures the amount of time or memory required for a given step in a calculation, such as an evaluation of a log posterior or penalized likelihood.\nStatistical efficiency typically involves requiring fewer steps in algorithms by making the statistical formulation of a model better behaved. The typical way to do this is by applying a change of variables (i.e., reparameterization) so that sampling algorithms mix better or optimization algorithms require less adaptation.\n\n\n\nBecause Stan’s algorithms rely on step-based gradient-based approximations of the density (or penalized maximum likelihood) being fitted, posterior curvature not captured by this first-order approximation plays a central role in determining the statistical efficiency of Stan’s algorithms.\nA second-order approximation to curvature is provided by the Hessian, the matrix of second derivatives of the log density \\(\\log\np(\\theta)\\) with respect to the parameter vector \\(\\theta\\), defined as \\[\nH(\\theta) = \\nabla \\, \\nabla \\, \\log p(\\theta \\mid y),\n\\] so that \\[\nH_{i, j}(\\theta) = \\frac{\\partial^2 \\log p(\\theta \\mid y)}\n {\\partial \\theta_i \\ \\partial \\theta_j}.\n\\] For pure penalized maximum likelihood problems, the posterior log density \\(\\log p(\\theta \\mid y)\\) is replaced by the penalized likelihood function \\(\\mathcal{L}(\\theta) = \\log p(y \\mid \\theta) - \\lambda(\\theta)\\).\n\n\nA good gauge of how difficult a problem the curvature presents is given by the condition number of the Hessian matrix \\(H\\), which is the ratio of the largest to the smallest eigenvalue of \\(H\\) (assuming the Hessian is positive definite). This essentially measures the difference between the flattest direction of movement and the most curved. Typically, the step size of a gradient-based algorithm is bounded by the most sharply curved direction. With better conditioned log densities or penalized likelihood functions, it is easier for Stan’s adaptation, especially the diagonal adaptations that are used as defaults.\n\n\n\nIdeally, all parameters should be programmed so that they have unit scale and so that posterior correlation is reduced; together, these properties mean that there is no rotation or scaling required for optimal performance of Stan’s algorithms. For Hamiltonian Monte Carlo, this implies a unit mass matrix, which requires no adaptation as it is where the algorithm initializes.\n\n\n\nIn all but very simple models (such as multivariate normals), the Hessian will vary as \\(\\theta\\) varies (an extreme example is Neal’s funnel, as naturally arises in hierarchical models with little or no data). The more the curvature varies, the harder it is for all of the algorithms with fixed adaptation parameters to find adaptations that cover the entire density well. Many of the variable transforms proposed are aimed at improving the conditioning of the Hessian and/or making it more consistent across the relevant portions of the density (or penalized maximum likelihood function) being fit.\nFor all of Stan’s algorithms, the curvature along the path from the initial values of the parameters to the solution is relevant. For penalized maximum likelihood and variational inference, the solution of the iterative algorithm will be a single point, so this is all that matters. For sampling, the relevant “solution” is the typical set, which is the posterior volume where almost all draws from the posterior lies; thus, the typical set contains almost all of the posterior probability mass.\nWith sampling, the curvature may vary dramatically between the points on the path from the initialization point to the typical set and within the typical set. This is why adaptation needs to run long enough to visit enough points in the typical set to get a good first-order estimate of the curvature within the typical set. If adaptation is not run long enough, sampling within the typical set after adaptation will not be efficient. We generally recommend at least one hundred iterations after the typical set is reached (and the first effective draw is ready to be realized). Whether adaptation has run long enough can be measured by comparing the adaptation parameters derived from a set of diffuse initial parameter values.\n\n\n\nImproving statistical efficiency is achieved by reparameterizing the model so that the same result may be calculated using a density or penalized maximum likelihood that is better conditioned. Again, see the example of reparameterizing Neal’s funnel for an example, and also the examples in the change of variables chapter.\nOne has to be careful in using change-of-variables reparameterizations when using maximum likelihood estimation, because they can change the result if the Jacobian term is inadvertently included in the revised likelihood model.\n\n\n\n\nModel misspecification, which roughly speaking means using a model that doesn’t match the data, can be a major source of slow code. This can be seen in cases where simulated data according to the model runs robustly and efficiently, whereas the real data for which it was intended runs slowly or may even have convergence and mixing issues. While some of the techniques recommended in the remaining sections of this chapter may mitigate the problem, the best remedy is a better model specification.\nCounterintuitively, more complicated models often run faster than simpler models. One common pattern is with a group of parameters with a wide fixed prior such as normal(0, 1000)). This can fit slowly due to the mismatch between prior and posterior (the prior has support for values in the hundreds or even thousands, whereas the posterior may be concentrated near zero). In such cases, replacing the fixed prior with a hierarchical prior such as normal(mu, sigma), where mu and sigma are new parameters with their own hyperpriors, can be beneficial.\n\n\n\nStan validates all of its data structure constraints. For example, consider a transformed parameter defined to be a covariance matrix and then used as a covariance parameter in the model block.\ntransformed parameters {\n cov_matrix[K] Sigma;\n // ...\n} // first validation\nmodel {\n y ~ multi_normal(mu, Sigma); // second validation\n // ...\n}\nBecause Sigma is declared to be a covariance matrix, it will be factored at the end of the transformed parameter block to ensure that it is positive definite. The multivariate normal log density function also validates that Sigma is positive definite. This test is expensive, having cubic run time (i.e., \\(\\mathcal{O}(N^3)\\) for \\(N \\times N\\) matrices), so it should not be done twice.\nThe test may be avoided by simply declaring Sigma to be a simple unconstrained matrix.\ntransformed parameters {\n matrix[K, K] Sigma;\n // ...\n}\nmodel {\n y ~ multi_normal(mu, Sigma); // only validation\n}\nNow the only validation is carried out by the multivariate normal.\n\n\n\nStan’s sampler can be slow in sampling from distributions with difficult posterior geometries. One way to speed up such models is through reparameterization. In some cases, reparameterization can dramatically increase effective sample size for the same number of iterations or even make programs that would not converge well behaved.\n\n\nIn this section, we discuss a general transform from a centered to a non-centered parameterization (Papaspiliopoulos, Roberts, and Sköld 2007).1\nThis reparameterization is helpful when there is not much data, because it separates the hierarchical parameters and lower-level parameters in the prior.\nNeal (2003) defines a distribution that exemplifies the difficulties of sampling from some hierarchical models. Neal’s example is fairly extreme, but can be trivially reparameterized in such a way as to make sampling straightforward. Neal’s example has support for \\(y \\in\n\\mathbb{R}\\) and \\(x \\in \\mathbb{R}^9\\) with density\n\\[\np(y,x) = \\textsf{normal}(y \\mid 0,3) \\times \\prod_{n=1}^9\n\\textsf{normal}(x_n \\mid 0,\\exp(y/2)).\n\\]\nThe probability contours are shaped like ten-dimensional funnels. The funnel’s neck is particularly sharp because of the exponential function applied to \\(y\\). A plot of the log marginal density of \\(y\\) and the first dimension \\(x_1\\) is shown in the following plot.\nThe funnel can be implemented directly in Stan as follows.\nparameters {\n real y;\n vector[9] x;\n}\nmodel {\n y ~ normal(0, 3);\n x ~ normal(0, exp(y/2));\n}\nWhen the model is expressed this way, Stan has trouble sampling from the neck of the funnel, where \\(y\\) is small and thus \\(x\\) is constrained to be near 0. This is due to the fact that the density’s scale changes with \\(y\\), so that a step size that works well in the body will be too large for the neck, and a step size that works in the neck will be inefficient in the body. This can be seen in the following plots.\n\n\n\n\n\n\n\n\n\n\n\n\n\nFigure 1: Neal’s funnel. (Left) The marginal density of Neal’s funnel for the upper-level variable \\(y\\) and one lower-level variable \\(x_1\\) (see the text for the formula). The blue region has log density greater than -8, the yellow region density greater than -16, and the gray background a density less than -16. (Right) 4000 draws are taken from a run of Stan’s sampler with default settings. Both plots are restricted to the shown window of \\(x_1\\) and \\(y\\) values; some draws fell outside of the displayed area as would be expected given the density. The draws are consistent with the marginal density \\(p(y) = \\textsf{normal}(y \\mid 0,3)\\), which has mean 0 and standard deviation 3.\n\n\n\nIn this particular instance, because the analytic form of the density is known, the model can be converted to the following more efficient form.\nparameters {\n real y_raw;\n vector[9] x_raw;\n}\ntransformed parameters {\n real y;\n vector[9] x;\n\n y = 3.0 * y_raw;\n x = exp(y/2) * x_raw;\n}\nmodel {\n y_raw ~ std_normal(); // implies y ~ normal(0, 3)\n x_raw ~ std_normal(); // implies x ~ normal(0, exp(y/2))\n}\nIn this second model, the parameters x_raw and y_raw are sampled as independent standard normals, which is easy for Stan. These are then transformed into draws from the funnel. In this case, the same transform may be used to define Monte Carlo directly based on independent standard normal draws; Markov chain Monte Carlo methods are not necessary. If such a reparameterization were used in Stan code, it is useful to provide a comment indicating what the distribution for the parameter implies for the distribution of the transformed parameter.\n\n\n\nSampling from heavy tailed distributions such as the Cauchy is difficult for Hamiltonian Monte Carlo, which operates within a Euclidean geometry.\nThe practical problem is that tail of the Cauchy requires a relatively large step size compared to the trunk. With a small step size, the No-U-Turn sampler requires many steps when starting in the tail of the distribution; with a large step size, there will be too much rejection in the central portion of the distribution. This problem may be mitigated by defining the Cauchy-distributed variable as the transform of a uniformly distributed variable using the Cauchy inverse cumulative distribution function.\nSuppose a random variable of interest \\(X\\) has a Cauchy distribution with location \\(\\mu\\) and scale \\(\\tau\\), so that \\(X \\sim\n\\textsf{Cauchy}(\\mu,\\tau)\\). The variable \\(X\\) has a cumulative distribution function \\(F_X:\\mathbb{R} \\rightarrow (0,1)\\) defined by \\[\nF_X(x) = \\frac{1}{\\pi} \\arctan \\left( \\frac{x - \\mu}{\\tau} \\right) +\n\\frac{1}{2}.\n\\] The inverse of the cumulative distribution function, \\(F_X^{-1}:(0,1) \\rightarrow \\mathbb{R}\\), is thus\n\\[\nF^{-1}_X(y) = \\mu + \\tau \\tan \\left( \\pi \\left( y - \\frac{1}{2} \\right) \\right).\n\\] Thus if the random variable \\(Y\\) has a unit uniform distribution, \\(Y\n\\sim \\textsf{uniform}(0,1)\\), then \\(F^{-1}_X(Y)\\) has a Cauchy distribution with location \\(\\mu\\) and scale \\(\\tau\\), i.e., \\(F^{-1}_X(Y) \\sim\n\\textsf{Cauchy}(\\mu,\\tau)\\).\nConsider a Stan program involving a Cauchy-distributed parameter beta.\nparameters {\n real beta;\n // ...\n}\nmodel {\n beta ~ cauchy(mu, tau);\n // ...\n}\nThis declaration of beta as a parameter may be replaced with a transformed parameter beta defined in terms of a uniform-distributed parameter beta_unif.\nparameters {\n real<lower=-pi() / 2, upper=pi() / 2> beta_unif;\n // ...\n}\ntransformed parameters {\n real beta;\n beta = mu + tau * tan(beta_unif); // beta ~ cauchy(mu, tau)\n}\nmodel {\n beta_unif ~ uniform(-pi() / 2, pi() / 2); // not necessary\n // ...\n}\nIt is more convenient in Stan to transform a uniform variable on \\((-\\pi/2, \\pi/2)\\) than one on \\((0,1)\\). The Cauchy location and scale parameters, mu and tau, may be defined as data or may themselves be parameters. The variable beta could also be defined as a local variable if it does not need to be included in the sampler’s output.\nThe uniform distribution on beta_unif is defined explicitly in the model block, but it could be safely removed from the program without changing sampling behavior. This is because \\(\\log\n\\textsf{uniform}(\\beta_{\\textsf{unif}} \\mid -\\pi/2,\\pi/2) =\n-\\log \\pi\\) is a constant and Stan only needs the total log probability up to an additive constant. Stan will spend some time checking that that beta_unif is between -pi() / 2 and pi() / 2, but this condition is guaranteed by the constraints in the declaration of beta_unif.\n\n\n\nOne thing that sometimes works when you’re having trouble with the heavy-tailedness of Student-t distributions is to use the gamma-mixture representation, which says that you can generate a Student-t distributed variable \\(\\beta\\), \\[\n\\beta \\sim \\textsf{Student-t}(\\nu, 0, 1),\n\\] by first generating a gamma-distributed precision (inverse variance) \\(\\tau\\) according to \\[\n\\tau \\sim \\textsf{Gamma}(\\nu/2, \\nu/2),\n\\] and then generating \\(\\beta\\) from the normal distribution, \\[\n\\beta \\sim \\textsf{normal}\\left(0,\\tau^{-\\frac{1}{2}}\\right).\n\\]\nBecause \\(\\tau\\) is precision, \\(\\tau^{-\\frac{1}{2}}\\) is the scale (standard deviation), which is the parameterization used by Stan.\nThe marginal distribution of \\(\\beta\\) when you integrate out \\(\\tau\\) is \\(\\textsf{Student-t}(\\nu, 0, 1)\\), i.e., \\[\n\\textsf{Student-t}(\\beta \\mid \\nu, 0, 1)\n=\n\\int_0^{\\infty}\n\\,\n\\textsf{normal}\\left(\\beta \\middle| 0, \\tau^{-0.5}\\right)\n\\times\n\\textsf{Gamma}\\left(\\tau \\middle| \\nu/2, \\nu/2\\right)\n\\\n\\text{d} \\tau.\n\\]\nTo go one step further, instead of defining a \\(\\beta\\) drawn from a normal with precision \\(\\tau\\), define \\(\\alpha\\) to be drawn from a unit normal, \\[\n\\alpha \\sim \\textsf{normal}(0,1)\n\\] and rescale by defining \\[\n\\beta = \\alpha \\, \\tau^{-\\frac{1}{2}}.\n\\]\nNow suppose \\(\\mu = \\beta x\\) is the product of \\(\\beta\\) with a regression predictor \\(x\\). Then the reparameterization \\(\\mu = \\alpha\n\\tau^{-\\frac{1}{2}} x\\) has the same distribution, but in the original, direct parameterization, \\(\\beta\\) has (potentially) heavy tails, whereas in the second, neither \\(\\tau\\) nor \\(\\alpha\\) have heavy tails.\nTo translate into Stan notation, this reparameterization replaces\nparameters {\n real<lower=0> nu;\n real beta;\n // ...\n}\nmodel {\n beta ~ student_t(nu, 0, 1);\n // ...\n}\nwith\nparameters {\n real<lower=0> nu;\n real<lower=0> tau;\n real alpha;\n // ...\n}\ntransformed parameters {\n real beta;\n beta = alpha / sqrt(tau);\n // ...\n}\nmodel {\n real half_nu;\n half_nu = 0.5 * nu;\n tau ~ gamma(half_nu, half_nu);\n alpha ~ std_normal();\n // ...\n}\nAlthough set to 0 here, in most cases, the lower bound for the degrees of freedom parameter nu can be set to 1 or higher; when nu is 1, the result is a Cauchy distribution with fat tails and as nu approaches infinity, the Student-t distribution approaches a normal distribution. Thus the parameter nu characterizes the heaviness of the tails of the model.\n\n\n\nUnfortunately, the usual situation in applied Bayesian modeling involves complex geometries and interactions that are not known analytically. Nevertheless, the non-centered parameterization can still be effective for separating parameters.\n\n\nFor example, a vectorized hierarchical model might draw a vector of coefficients \\(\\beta\\) with definitions as follows. The so-called centered parameterization is as follows.\nparameters {\n real mu_beta;\n real<lower=0> sigma_beta;\n vector[K] beta;\n // ...\n}\nmodel {\n beta ~ normal(mu_beta, sigma_beta);\n // ...\n}\nAlthough not shown, a full model will have priors on both mu_beta and sigma_beta along with data modeled based on these coefficients. For instance, a standard binary logistic regression with data matrix x and binary outcome vector y would include a likelihood statement such as form y ~ bernoulli_logit(x * beta), leading to an analytically intractable posterior.\nA hierarchical model such as the above will suffer from the same kind of inefficiencies as Neal’s funnel, because the values of beta, mu_beta and sigma_beta are highly correlated in the posterior. The extremity of the correlation depends on the amount of data, with Neal’s funnel being the extreme with no data. In these cases, the non-centered parameterization, discussed in the next section, is preferable; when there is a lot of data, the centered parameterization is more efficient. See Betancourt and Girolami (2013) for more information on the effects of centering in hierarchical models fit with Hamiltonian Monte Carlo.\n\n\n\n\nSometimes the group-level effects do not constrain the hierarchical distribution tightly. Examples arise when there are not many groups, or when the inter-group variation is high. In such cases, hierarchical models can be made much more efficient by shifting the data’s correlation with the parameters to the hyperparameters. Similar to the funnel example, this will be much more efficient in terms of effective sample size when there is not much data (see Betancourt and Girolami (2013)), and in more extreme cases will be necessary to achieve convergence.\nparameters {\n real mu_beta;\n real<lower=0> sigma_beta;\n vector[K] beta_raw;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n // implies: beta ~ normal(mu_beta, sigma_beta)\n beta = mu_beta + sigma_beta * beta_raw;\n}\nmodel {\n beta_raw ~ std_normal();\n // ...\n}\nAny priors defined for mu_beta and sigma_beta remain as defined in the original model.\nAlternatively, Stan’s affine transform can be used to decouple sigma and beta:\nparameters {\n real mu_beta;\n real<lower=0> sigma_beta;\n vector<offset=mu_beta, multiplier=sigma_beta>[K] beta;\n // ...\n}\nmodel {\n beta ~ normal(mu_beta, sigma_beta);\n // ...\n}\nReparameterization of hierarchical models is not limited to the normal distribution, although the normal distribution is the best candidate for doing so. In general, any distribution of parameters in the location-scale family is a good candidate for reparameterization. Let \\(\\beta = l + s\\alpha\\) where \\(l\\) is a location parameter and \\(s\\) is a scale parameter. The parameter \\(l\\) need not be the mean, \\(s\\) need not be the standard deviation, and neither the mean nor the standard deviation need to exist. If \\(\\alpha\\) and \\(\\beta\\) are from the same distributional family but \\(\\alpha\\) has location zero and unit scale, while \\(\\beta\\) has location \\(l\\) and scale \\(s\\), then that distribution is a location-scale distribution. Thus, if \\(\\alpha\\) were a parameter and \\(\\beta\\) were a transformed parameter, then a prior distribution from the location-scale family on \\(\\alpha\\) with location zero and unit scale implies a prior distribution on \\(\\beta\\) with location \\(l\\) and scale \\(s\\). Doing so would reduce the dependence between \\(\\alpha\\), \\(l\\), and \\(s\\).\nThere are several univariate distributions in the location-scale family, such as the Student t distribution, including its special cases of the Cauchy distribution (with one degree of freedom) and the normal distribution (with infinite degrees of freedom). As shown above, if \\(\\alpha\\) is distributed standard normal, then \\(\\beta\\) is distributed normal with mean \\(\\mu = l\\) and standard deviation \\(\\sigma = s\\). The logistic, the double exponential, the generalized extreme value distributions, and the stable distribution are also in the location-scale family.\nAlso, if \\(z\\) is distributed standard normal, then \\(z^2\\) is distributed chi-squared with one degree of freedom. By summing the squares of \\(K\\) independent standard normal variates, one can obtain a single variate that is distributed chi-squared with \\(K\\) degrees of freedom. However, for large \\(K\\), the computational gains of this reparameterization may be overwhelmed by the computational cost of specifying \\(K\\) primitive parameters just to obtain one transformed parameter to use in a model.\n\n\n\nThe benefits of reparameterization are not limited to univariate distributions. A parameter with a multivariate normal prior distribution is also an excellent candidate for reparameterization. Suppose you intend the prior for \\(\\beta\\) to be multivariate normal with mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\). Such a belief is reflected by the following code.\ndata {\n int<lower=2> K;\n vector[K] mu;\n cov_matrix[K] Sigma;\n // ...\n}\nparameters {\n vector[K] beta;\n // ...\n}\nmodel {\n beta ~ multi_normal(mu, Sigma);\n // ...\n}\nIn this case mu and Sigma are fixed data, but they could be unknown parameters, in which case their priors would be unaffected by a reparameterization of beta.\nIf \\(\\alpha\\) has the same dimensions as \\(\\beta\\) but the elements of \\(\\alpha\\) are independently and identically distributed standard normal such that \\(\\beta = \\mu + L\\alpha\\), where \\(LL^\\top = \\Sigma\\), then \\(\\beta\\) is distributed multivariate normal with mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\). One choice for \\(L\\) is the Cholesky factor of \\(\\Sigma\\). Thus, the model above could be reparameterized as follows.\ndata {\n int<lower=2> K;\n vector[K] mu;\n cov_matrix[K] Sigma;\n // ...\n}\ntransformed data {\n matrix[K, K] L;\n L = cholesky_decompose(Sigma);\n}\nparameters {\n vector[K] alpha;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n beta = mu + L * alpha;\n}\nmodel {\n alpha ~ std_normal();\n // implies: beta ~ multi_normal(mu, Sigma)\n // ...\n}\nThis reparameterization is more efficient for two reasons. First, it reduces dependence among the elements of alpha and second, it avoids the need to invert Sigma every time multi_normal is evaluated.\nThe Cholesky factor is also useful when a covariance matrix is decomposed into a correlation matrix that is multiplied from both sides by a diagonal matrix of standard deviations, where either the standard deviations or the correlations are unknown parameters. The Cholesky factor of the covariance matrix is equal to the product of a diagonal matrix of standard deviations and the Cholesky factor of the correlation matrix. Furthermore, the product of a diagonal matrix of standard deviations and a vector is equal to the elementwise product between the standard deviations and that vector. Thus, if for example the correlation matrix Tau were fixed data but the vector of standard deviations sigma were unknown parameters, then a reparameterization of beta in terms of alpha could be implemented as follows.\ndata {\n int<lower=2> K;\n vector[K] mu;\n corr_matrix[K] Tau;\n // ...\n}\ntransformed data {\n matrix[K, K] L;\n L = cholesky_decompose(Tau);\n}\nparameters {\n vector[K] alpha;\n vector<lower=0>[K] sigma;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n // This equals mu + diag_matrix(sigma) * L * alpha;\n beta = mu + sigma .* (L * alpha);\n}\nmodel {\n sigma ~ cauchy(0, 5);\n alpha ~ std_normal();\n // implies: beta ~ multi_normal(mu,\n // diag_matrix(sigma) * L * L' * diag_matrix(sigma)))\n // ...\n}\nThis reparameterization of a multivariate normal distribution in terms of standard normal variates can be extended to other multivariate distributions that can be conceptualized as contaminations of the multivariate normal, such as the multivariate Student t and the skew multivariate normal distribution.\nA Wishart distribution can also be reparameterized in terms of standard normal variates and chi-squared variates. Let \\(L\\) be the Cholesky factor of a \\(K \\times K\\) positive definite scale matrix \\(S\\) and let \\(\\nu\\) be the degrees of freedom. If \\[\nA = \\begin{pmatrix}\n\\sqrt{c_{1}} & 0 & \\cdots & 0 \\\\\nz_{21} & \\sqrt{c_{2}} & \\ddots & \\vdots \\\\\n\\vdots & \\ddots & \\ddots & 0 \\\\\nz_{K1} & \\cdots & z_{K\\left(K-1\\right)} & \\sqrt{c_{K}}\n\\end{pmatrix},\n\\] where each \\(c_i\\) is distributed chi-squared with \\(\\nu - i + 1\\) degrees of freedom and each \\(z_{ij}\\) is distributed standard normal, then \\(W = LAA^{\\top}L^{\\top}\\) is distributed Wishart with scale matrix \\(S = LL^{\\top}\\) and degrees of freedom \\(\\nu\\). Such a reparameterization can be implemented by the following Stan code:\ndata {\n int<lower=1> N;\n int<lower=1> K;\n int<lower=K + 2> nu\n matrix[K, K] L; // Cholesky factor of scale matrix\n vector[K] mu;\n matrix[N, K] y;\n // ...\n}\nparameters {\n vector<lower=0>[K] c;\n vector[0.5 * K * (K - 1)] z;\n // ...\n}\nmodel {\n matrix[K, K] A;\n int count = 1;\n for (j in 1:(K - 1)) {\n for (i in (j + 1):K) {\n A[i, j] = z[count];\n count += 1;\n }\n for (i in 1:(j - 1)) {\n A[i, j] = 0.0;\n }\n A[j, j] = sqrt(c[j]);\n }\n for (i in 1:(K - 1)) {\n A[i, K] = 0;\n }\n A[K, K] = sqrt(c[K]);\n\n for (i in 1:K) {\n c[i] ~ chi_square(nu - i + 1);\n }\n\n z ~ std_normal();\n // implies: L * A * A' * L' ~ wishart(nu, L * L')\n y ~ multi_normal_cholesky(mu, L * A);\n // ...\n}\nThis reparameterization is more efficient for three reasons. First, it reduces dependence among the elements of z and second, it avoids the need to invert the covariance matrix, \\(W\\) every time wishart is evaluated. Third, if \\(W\\) is to be used with a multivariate normal distribution, you can pass \\(L A\\) to the more efficient multi_normal_cholesky function, rather than passing \\(W\\) to multi_normal.\nIf \\(W\\) is distributed Wishart with scale matrix \\(S\\) and degrees of freedom \\(\\nu\\), then \\(W^{-1}\\) is distributed inverse Wishart with inverse scale matrix \\(S^{-1}\\) and degrees of freedom \\(\\nu\\). Thus, the previous result can be used to reparameterize the inverse Wishart distribution. Since \\(W = L A A^{\\top} L^{\\top}\\), \\(W^{-1} = L^{{\\top}^{-1}} A^{{\\top}^{-1}} A^{-1} L^{-1}\\), where all four inverses exist, but \\(L^{{-1}^{\\top}} = L^{{\\top}^{-1}}\\) and \\(A^{{-1}^{\\top}} = A^{{\\top}^{-1}}\\). We can slightly modify the above Stan code for this case:\ndata {\n int<lower=1> K;\n int<lower=K + 2> nu\n matrix[K, K] L; // Cholesky factor of scale matrix\n // ...\n}\ntransformed data {\n matrix[K, K] eye;\n matrix[K, K] L_inv;\n for (j in 1:K) {\n for (i in 1:K) {\n eye[i, j] = 0.0;\n }\n eye[j, j] = 1.0;\n }\n L_inv = mdivide_left_tri_low(L, eye);\n}\nparameters {\n vector<lower=0>[K] c;\n vector[0.5 * K * (K - 1)] z;\n // ...\n}\nmodel {\n matrix[K, K] A;\n matrix[K, K] A_inv_L_inv;\n int count;\n count = 1;\n for (j in 1:(K - 1)) {\n for (i in (j + 1):K) {\n A[i, j] = z[count];\n count += 1;\n }\n for (i in 1:(j - 1)) {\n A[i, j] = 0.0;\n }\n A[j, j] = sqrt(c[j]);\n }\n for (i in 1:(K - 1)) {\n A[i, K] = 0;\n }\n A[K, K] = sqrt(c[K]);\n\n A_inv_L_inv = mdivide_left_tri_low(A, L_inv);\n for (i in 1:K) {\n c[i] ~ chi_square(nu - i + 1);\n }\n\n z ~ std_normal(); // implies: crossprod(A_inv_L_inv) ~\n // inv_wishart(nu, L_inv' * L_inv)\n // ...\n}\nAnother candidate for reparameterization is the Dirichlet distribution with all \\(K\\) shape parameters equal. Zyczkowski and Sommers (2001) shows that if \\(\\theta_i\\) is equal to the sum of \\(\\beta\\) independent squared standard normal variates and \\(\\rho_i = \\frac{\\theta_i}{\\sum \\theta_i}\\), then the \\(K\\)-vector \\(\\rho\\) is distributed Dirichlet with all shape parameters equal to \\(\\frac{\\beta}{2}\\). In particular, if \\(\\beta = 2\\), then \\(\\rho\\) is uniformly distributed on the unit simplex. Thus, we can make \\(\\rho\\) be a transformed parameter to reduce dependence, as in:\ndata {\n int<lower=1> beta;\n // ...\n}\nparameters {\n array[K] vector[beta] z;\n // ...\n}\ntransformed parameters {\n simplex[K] rho;\n for (k in 1:K) {\n rho[k] = dot_self(z[k]); // sum-of-squares\n }\n rho = rho / sum(rho);\n}\nmodel {\n for (k in 1:K) {\n z[k] ~ std_normal();\n }\n // implies: rho ~ dirichlet(0.5 * beta * ones)\n // ...\n}\n\n\n\n\n\n\nStan spends the vast majority of its time computing the gradient of the log probability function, making gradients the obvious target for optimization. Stan’s gradient calculations with algorithmic differentiation require a template expression to be allocated and constructed for each subexpression of a Stan program involving parameters or transformed parameters.2 This section defines optimization strategies based on vectorizing these subexpressions to reduce the work done during algorithmic differentiation.\n\n\n\nBecause of the gradient bottleneck described in the previous section, it is more efficient to collect a sequence of summands into a vector or array and then apply the sum() operation than it is to continually increment a variable by assignment and addition. For example, consider the following code snippet, where foo() is some operation that depends on n.\nfor (n in 1:N) {\n total += foo(n,...);\n}\nThis code has to create intermediate representations for each of the N summands.\nA faster alternative is to copy the values into a vector, then apply the sum() operator, as in the following refactoring.\n{\n vector[N] summands;\n for (n in 1:N) {\n summands[n] = foo(n,...);\n }\n total = sum(summands);\n}\nSyntactically, the replacement is a statement block delineated by curly brackets ({, }), starting with the definition of the local variable summands.\nEven though it involves extra work to allocate the summands vector and copy N values into it, the savings in differentiation more than make up for it. Perhaps surprisingly, it will also use substantially less memory overall than incrementing total within the loop.\n\n\n\nThe following program directly encodes a linear regression with fixed unit noise using a two-dimensional array x of predictors, an array y of outcomes, and an array beta of regression coefficients.\ndata {\n int<lower=1> K;\n int<lower=1> N;\n array[K, N] real x;\n array[N] real y;\n}\nparameters {\n array[K] real beta;\n}\nmodel {\n for (n in 1:N) {\n real gamma = 0;\n for (k in 1:K) {\n gamma += x[n, k] * beta[k];\n }\n y[n] ~ normal(gamma, 1);\n }\n}\nThe following model computes the same log probability function as the previous model, even supporting the same input files for data and initialization.\ndata {\n int<lower=1> K;\n int<lower=1> N;\n array[N] vector[K] x;\n array[N] real y;\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(dot_product(x[n], beta), 1);\n }\n}\nAlthough it produces equivalent results, the dot product should not be replaced with a transpose and multiply, as in\ny[n] ~ normal(x[n]' * beta, 1);\nThe relative inefficiency of the transpose and multiply approach is that the transposition operator allocates a new vector into which the result of the transposition is copied. This consumes both time and memory.3\nThe inefficiency of transposition could itself be mitigated by reordering the product and pulling the transposition out of the loop, as follows.\n// ...\ntransformed parameters {\n row_vector[K] beta_t;\n beta_t = beta';\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(beta_t * x[n], 1);\n }\n}\nThe problem with transposition could be completely solved by directly encoding the x as a row vector, as in the following example.\ndata {\n // ...\n array[N] row_vector[K] x;\n // ...\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(x[n] * beta, 1);\n }\n}\nDeclaring the data as a matrix and then computing all the predictors at once using matrix multiplication is more efficient still, as in the example discussed in the next section.\nHaving said all this, the most efficient way to code this model is with direct matrix multiplication, as in\ndata {\n matrix[N, K] x;\n vector[N] y;\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n y ~ normal(x * beta, 1);\n}\nIn general, encapsulated single operations that do the work of loops will be more efficient in their encapsulated forms. Rather than performing a sequence of row-vector/vector multiplications, it is better to encapsulate it as a single matrix/vector multiplication.\n\n\n\nThe final and most efficient version replaces the loops and transformed parameters by using the vectorized form of the normal probability function, as in the following example.\ndata {\n int<lower=1> K;\n int<lower=1> N;\n matrix[N, K] x;\n vector[N] y;\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n y ~ normal(x * beta, 1);\n}\nThe variables are all declared as either matrix or vector types. The result of the matrix-vector multiplication x * beta in the model block is a vector of the same length as y.\nThe probability function documentation in the function reference manual indicates which of Stan’s probability functions support vectorization; see the function reference manual for full details. Vectorized probability functions accept either vector or scalar inputs for all arguments, with the only restriction being that all vector arguments are the same dimensionality. In the example above, y is a vector of size N, x * beta is a vector of size N, and 1 is a scalar.\n\n\n\nSometimes data does not arrive in a shape that is ideal for vectorization, but can be put into such shape with some munging (either inside Stan’s transformed data block or outside).\nJohn Hall provided a simple example on the Stan users group. Simplifying notation a bit, the original model had a sampling statement in a loop, as follows.\nfor (n in 1:N) {\n y[n] ~ normal(mu[ii[n]], sigma);\n}\nThe brute force vectorization would build up a mean vector and then vectorize all at once.\n{\n vector[N] mu_ii;\n for (n in 1:N) {\n mu_ii[n] = mu[ii[n]];\n }\n y ~ normal(mu_ii, sigma);\n}\nIf there aren’t many levels (values ii[n] can take), then it behooves us to reorganize the data by group in a case like this. Rather than having a single observation vector y, there are K of them. And because Stan doesn’t support ragged arrays, it means K declarations. For instance, with 5 levels, we have\ny_1 ~ normal(mu[1], sigma);\n// ...\ny_5 ~ normal(mu[5], sigma);\nThis way, both the mu and sigma parameters are shared. Which way works out to be more efficient will depend on the shape of the data; if the sizes are small, the simple vectorization may be faster, but for moderate to large sized groups, the full expansion should be faster.\n\n\n\n\nIn some cases, models can be recoded to exploit sufficient statistics in estimation. This can lead to large efficiency gains compared to an expanded model. This section provides examples for Bernoulli and normal distributions, but the same approach can be applied to other members of the exponential family.\n\n\nConsider the following Bernoulli sampling model.\ndata {\n int<lower=0> N;\n array[N] int<lower=0, upper=1> y;\n real<lower=0> alpha;\n real<lower=0> beta;\n}\nparameters {\n real<lower=0, upper=1> theta;\n}\nmodel {\n theta ~ beta(alpha, beta);\n for (n in 1:N) {\n y[n] ~ bernoulli(theta);\n }\n}\nIn this model, the sum of positive outcomes in y is a sufficient statistic for the chance of success theta. The model may be recoded using the binomial distribution as follows.\ntheta ~ beta(alpha, beta);\nsum(y) ~ binomial(N, theta);\nBecause truth is represented as one and falsehood as zero, the sum sum(y) of a binary vector y is equal to the number of positive outcomes out of a total of N trials.\nThis can be generalized to other discrete cases (one wouldn’t expect continuous observations to be duplicated if they are random). Suppose there are only \\(K\\) possible discrete outcomes, \\(z_1, \\dotsc, z_K\\), but there are \\(N\\) observations, where \\(N\\) is much larger than \\(K\\). If \\(f_k\\) is the frequency of outcome \\(z_k\\), then the entire likelihood with distribution foo can be coded as follows.\nfor (k in 1:K) {\n target += f[k] * foo_lpmf(z[k] | ...);\n}\nwhere the ellipses are the parameters of the log probability mass function for distribution foo (there’s no distribution called “foo”; this is just a placeholder for any discrete distribution name).\nThe resulting program looks like a “weighted” regression, but here the weights f[k] are counts and thus sufficient statistics for the PMF and simply amount to an alternative, more efficient coding of the same likelihood. For efficiency, the frequencies f[k] should be counted once in the transformed data block and stored.\nThe same trick works for combining multiple binomial observations.\n\n\n\nConsider the following Stan model for fitting a normal distribution to data.\ndata {\n int N;\n vector[N] y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(mu, sigma);\n}\nWith the vectorized form used for y, Stan is clever enough to only evaluate log(sigma) once, but it still has to evaluate the normal for all of y[1] to y[N], which involves adding up all the squared differences from the mean and then dividing by sigma squared.\nAn equivalent density to the one above (up to normalizing constants that do not depend on parameters), is given in the following Stan program.\ndata {\n int N;\n vector[N] y;\n}\ntransformed data {\n real mean_y = mean(y);\n real<lower=0> var_y = variance(y);\n real nm1_over2 = 0.5 * (N - 1);\n real sqrt_N = sqrt(N);\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n mean_y ~ normal(mu, sigma / sqrt_N);\n var_y ~ gamma(nm1_over2, nm1_over2 / sigma^2);\n}\nThe data and parameters are the same in this program as in the first. The second version adds a transformed data block to compute the mean and variance of the data, which are the sufficient statistics here. These are stored along with two other useful constants. Then the program can define distributions over the mean and variance, both of which are scalars here.\nThe original Stan program and this one define the same model in the sense that they define the same log density up to a constant additive term that does not depend on the parameters. The priors on mu and sigma are both improper, but proper priors could be added as additional statements in the model block without affecting the sufficiency.\nThis transform explicitly relies on aggregating the data. Using this trick on parameters leads to more computation than just computing the normal log density, even before accounting for the non-linear change of variables in the variance.\n\n\n\nThe Poisson distribution is the easiest case, because the sum of observations is sufficient. Specifically, we can replace\ny ~ poisson(lambda);\nwith\nsum(y) ~ poisson(size(y) * lambda);\nThis will work even if y is a parameter vector because no Jacobian adjustment is required for summation.\n\n\n\n\nIf an expression is calculated once, the value should be saved and reused wherever possible. That is, rather than using exp(theta) in multiple places, declare a local variable to store its value and reuse the local variable.\nAnother case that may not be so obvious is with two multilevel parameters, say a[ii[n]] + b[jj[n]]. If a and b are small (i.e., do not have many levels), then a table a_b of their sums can be created, with\nmatrix[size(a), size(b)] a_b;\nfor (i in 1:size(a)) {\n for (j in 1:size(b)) {\n a_b[i, j] = a[i] + b[j];\n }\n}\nThen the sum can be replaced with a_b[ii[n], jj[n]].\n\n\n\nContinuing the model from the previous section, the conjugacy of the beta prior and binomial distribution allow the model to be further optimized to the following equivalent form.\ntheta ~ beta(alpha + sum(y), beta + N - sum(y));\nTo make the model even more efficient, a transformed data variable defined to be sum(y) could be used in the place of sum(y).\n\n\n\nStandardizing the data so that all predictors have a zero sample mean and unit sample variance has the following potential benefits:\n\nIt helps in faster convergence of MCMC chains.\nIt makes the model less sensitive to the specifics of the parameterization.\nIt aids in the interpretation and comparison of the importance of coefficients across different predictors.\n\nWhen there are large differences between the units and scales of the predictors, standardizing the predictors is especially useful. This section illustrates the principle for a simple linear regression.\nSuppose that \\(y = (y_1,\\dotsc,y_N)\\) is a vector of \\(N\\) outcomes and \\(x = (x_1,\\dotsc,x_N)\\) the corresponding vector of \\(N\\) predictors. A simple linear regression involving an intercept coefficient \\(\\alpha\\) and slope coefficient \\(\\beta\\) can be expressed as \\[\ny_n = \\alpha + \\beta x_n + \\epsilon_n,\n\\] where \\[\n\\epsilon_n \\sim \\textsf{normal}(0,\\sigma).\n\\]\nIf \\(x\\) has very large or very small values or if the mean of the values is far away from 0 (on the scale of the values), then it can be more efficient to standardize the predictor values \\(x_n\\). First the elements of \\(x\\) are zero-centered by subtracting the mean, then scaled by dividing by the standard deviation.\nThe mean of \\(x\\) is given by:\n\\[\nmean_x = \\frac{1}{N} \\sum_{n=1}^{N} x_n\n\\]\nThe standard deviation of \\(x\\) is calculated as: \\[\nsd_x = {\\left({\\frac{1}{N} \\sum_{n=1}^{N} (x_n - mean_x)^2}\\right)}^{1/2}\n\\]\nWith these, we compute the \\(z\\), the standardized predictors\n\\[\nz_n = \\frac{x_n - mean_x}{sd_x}\n\\]\nwhere \\(z_n\\) is the standardized value corresponding to \\(x_n\\).\nThe inverse transform is defined by reversing the two normalization steps, first rescaling by the same deviation and relocating by the sample mean.\n\\[\nx_n = z_n sd_x + mean_x\n\\]\nStandardizing the predictors standardizes the scale of the variables, and hence the scale of the priors.\nConsider the following initial model.\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n // priors\n alpha ~ normal(0, 10);\n beta ~ normal(0, 10);\n sigma ~ normal(0, 5);\n // likelihood\n y ~ normal(x * beta + alpha, sigma);\n}\nThe data block for the standardized model is identical. The mean and standard deviation of the data are defined in the transformed data block, along with the standardized predictors.\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n}\ntransformed data {\n real mean_x = mean(x);\n real sd_x = sd(x);\n vector[N] x_std = (x - mean_x) / sd_x;\n}\nparameters {\n real alpha_std;\n real beta_std;\n real<lower=0> sigma_std;\n}\nmodel {\n alpha_std ~ normal(0, 10);\n beta_std ~ normal(0, 10);\n sigma_std ~ normal(0, 5);\n y ~ normal(x_std * beta_std + alpha_std, sigma_std);\n}\nThe parameters are renamed to indicate that they aren’t the “natural” parameters. The transformed data x_std is defined in terms of variables mean_x and sd_x; by declaring these variables in the transformed data block, they will be available in all following blocks, and therefore can be used in the generated quantities block to record the “natural” parameters alpha and beta.\nThe fairly diffuse priors on the coefficients are the same. These could have been transformed as well, but here they are left as is, because the scales make sense as diffuse priors for standardized data.\nThe original regression \\[\ny_n = \\alpha + \\beta x_n + \\epsilon_n\n\\] has been transformed to a regression on the standardized data variable \\(z\\),\n\\[\ny_n = \\alpha' + \\beta' z_n + \\epsilon_n.\n\\]\nThe likelihood is specified in terms of the standardized parameters. The original slope \\(\\beta\\) is the standardized slope \\(\\beta'\\) scaled by the inverse of the standard deviation of \\(x\\). The original intercept \\(\\alpha\\) is the intercept from the standardized model \\(\\alpha'\\), corrected for the effect of scaling and centering \\(x\\). Thus, the formulas to retrieve \\(\\alpha\\) and \\(\\beta\\) from \\(\\alpha'\\) and \\(\\beta'\\) are:\n\\[\\begin{align*}\n\\beta = \\frac{\\beta'}{\\sigma_x} \\\\\n\\alpha = \\alpha' - \\beta' \\frac{\\mu_x}{\\sigma_x}\n\\end{align*}\\]\nThese recovered parameter values on the original scales can be calculated within Stan using a generated quantities block following the model block,\ngenerated quantities {\n real beta = beta_std / sd_x;\n real alpha = alpha_std - beta_std * mean_x / sd_x;\n\n}\nWhen there are multiple real-valued predictors, i.e., when K is the number of predictors, x is an \\(N \\times K\\) matrix, and beta ia \\(K\\)-vector of coefficients, then x * beta is an \\(N\\)-vector of predictions, one for each of the \\(N\\) data items. When \\(K \\ll N\\) the QR reparameterization is recommended for linear and generalized linear models unless there is an informative prior on the location of \\(\\beta\\).\n\n\nFor many applications on the standard scale, normal distributions with location zero and scale one will be used. In these cases, it is more efficient to use\ny ~ std_normal();\nthan to use\ny ~ normal(0, 1);\nbecause the subtraction of the location and division by the scale cancel, as does subtracting the log of the scale.\n\n\n\n\nThe map-reduce operation, even without multi-core MPI support, can be used to make programs more scalable and also more efficient. See the map-reduce chapter for more information on implementing map-reduce operations.\nMap-reduce allows greater scalability because only the Jacobian of the mapped function for each shard is stored. The Jacobian consists of all of the derivatives of the outputs with respect to the parameters. During execution, the derivatives of the shard are evaluated using nested automatic differentiation. As often happens with modern CPUs, reduced memory overhead leads to increased memory locality and faster execution. The Jacobians are all computed with local memory and their outputs stored contiguously in memory.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#what-is-efficiency", + "href": "stan-users-guide/efficiency-tuning.html#what-is-efficiency", + "title": "Efficiency Tuning", + "section": "", + "text": "The standard algorithm analyses in computer science measure efficiency asymptotically as a function of problem size (such as data, number of parameters, etc.) and typically do not consider constant additive factors like startup times or multiplicative factors like speed of operations. In practice, the constant factors are important; if run time can be cut in half or more, that’s a huge gain. This chapter focuses on both the constant factors involved in efficiency (such as using built-in matrix operations as opposed to naive loops) and on asymptotic efficiency factors (such as using linear algorithms instead of quadratic algorithms in loops).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#efficiency-for-probabilistic-models-and-algorithms", + "href": "stan-users-guide/efficiency-tuning.html#efficiency-for-probabilistic-models-and-algorithms", + "title": "Efficiency Tuning", + "section": "", + "text": "Stan programs express models which are intrinsically statistical in nature. The algorithms applied to these models may or may not themselves be probabilistic. For example, given an initial value for parameters (which may itself be given deterministically or generated randomly), Stan’s optimization algorithm (L-BFGS) for penalized maximum likelihood estimation is purely deterministic. Stan’s sampling algorithms are based on Markov chain Monte Carlo algorithms, which are probabilistic by nature at every step. Stan’s variational inference algorithm (ADVI) is probabilistic despite being an optimization algorithm; the randomization lies in a nested Monte Carlo calculation for an expected gradient.\nWith probabilistic algorithms, there will be variation in run times (and maybe memory usage) based on the randomization involved. For example, by starting too far out in the tail, iterative algorithms underneath the hood, such as the solvers for ordinary differential equations, may take different numbers of steps. Ideally this variation will be limited; when there is a lot of variation it can be a sign that there is a problem with the model’s parameterization in a Stan program or with initialization.\nA well-behaved Stan program will have low variance between runs with different random initializations and differently seeded random number generators. But sometimes an algorithm can get stuck in one part of the posterior, typically due to high curvature. Such sticking almost always indicates the need to reparameterize the model. Just throwing away Markov chains with apparently poor behavior (slow, or stuck) can lead to bias in posterior estimates. This problem with getting stuck can often be overcome by lowering the initial step size to avoid getting stuck during adaptation and increasing the target acceptance rate in order to target a lower step size. This is because smaller step sizes allow Stan’s gradient-based algorithms to better follow the curvature in the density or penalized maximum likelihood being fit.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#statistical-vs.-computational-efficiency", + "href": "stan-users-guide/efficiency-tuning.html#statistical-vs.-computational-efficiency", + "title": "Efficiency Tuning", + "section": "", + "text": "There is a difference between pure computational efficiency and statistical efficiency for Stan programs fit with sampling-based algorithms. Computational efficiency measures the amount of time or memory required for a given step in a calculation, such as an evaluation of a log posterior or penalized likelihood.\nStatistical efficiency typically involves requiring fewer steps in algorithms by making the statistical formulation of a model better behaved. The typical way to do this is by applying a change of variables (i.e., reparameterization) so that sampling algorithms mix better or optimization algorithms require less adaptation.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#model-conditioning-and-curvature", + "href": "stan-users-guide/efficiency-tuning.html#model-conditioning-and-curvature", + "title": "Efficiency Tuning", + "section": "", + "text": "Because Stan’s algorithms rely on step-based gradient-based approximations of the density (or penalized maximum likelihood) being fitted, posterior curvature not captured by this first-order approximation plays a central role in determining the statistical efficiency of Stan’s algorithms.\nA second-order approximation to curvature is provided by the Hessian, the matrix of second derivatives of the log density \\(\\log\np(\\theta)\\) with respect to the parameter vector \\(\\theta\\), defined as \\[\nH(\\theta) = \\nabla \\, \\nabla \\, \\log p(\\theta \\mid y),\n\\] so that \\[\nH_{i, j}(\\theta) = \\frac{\\partial^2 \\log p(\\theta \\mid y)}\n {\\partial \\theta_i \\ \\partial \\theta_j}.\n\\] For pure penalized maximum likelihood problems, the posterior log density \\(\\log p(\\theta \\mid y)\\) is replaced by the penalized likelihood function \\(\\mathcal{L}(\\theta) = \\log p(y \\mid \\theta) - \\lambda(\\theta)\\).\n\n\nA good gauge of how difficult a problem the curvature presents is given by the condition number of the Hessian matrix \\(H\\), which is the ratio of the largest to the smallest eigenvalue of \\(H\\) (assuming the Hessian is positive definite). This essentially measures the difference between the flattest direction of movement and the most curved. Typically, the step size of a gradient-based algorithm is bounded by the most sharply curved direction. With better conditioned log densities or penalized likelihood functions, it is easier for Stan’s adaptation, especially the diagonal adaptations that are used as defaults.\n\n\n\nIdeally, all parameters should be programmed so that they have unit scale and so that posterior correlation is reduced; together, these properties mean that there is no rotation or scaling required for optimal performance of Stan’s algorithms. For Hamiltonian Monte Carlo, this implies a unit mass matrix, which requires no adaptation as it is where the algorithm initializes.\n\n\n\nIn all but very simple models (such as multivariate normals), the Hessian will vary as \\(\\theta\\) varies (an extreme example is Neal’s funnel, as naturally arises in hierarchical models with little or no data). The more the curvature varies, the harder it is for all of the algorithms with fixed adaptation parameters to find adaptations that cover the entire density well. Many of the variable transforms proposed are aimed at improving the conditioning of the Hessian and/or making it more consistent across the relevant portions of the density (or penalized maximum likelihood function) being fit.\nFor all of Stan’s algorithms, the curvature along the path from the initial values of the parameters to the solution is relevant. For penalized maximum likelihood and variational inference, the solution of the iterative algorithm will be a single point, so this is all that matters. For sampling, the relevant “solution” is the typical set, which is the posterior volume where almost all draws from the posterior lies; thus, the typical set contains almost all of the posterior probability mass.\nWith sampling, the curvature may vary dramatically between the points on the path from the initialization point to the typical set and within the typical set. This is why adaptation needs to run long enough to visit enough points in the typical set to get a good first-order estimate of the curvature within the typical set. If adaptation is not run long enough, sampling within the typical set after adaptation will not be efficient. We generally recommend at least one hundred iterations after the typical set is reached (and the first effective draw is ready to be realized). Whether adaptation has run long enough can be measured by comparing the adaptation parameters derived from a set of diffuse initial parameter values.\n\n\n\nImproving statistical efficiency is achieved by reparameterizing the model so that the same result may be calculated using a density or penalized maximum likelihood that is better conditioned. Again, see the example of reparameterizing Neal’s funnel for an example, and also the examples in the change of variables chapter.\nOne has to be careful in using change-of-variables reparameterizations when using maximum likelihood estimation, because they can change the result if the Jacobian term is inadvertently included in the revised likelihood model.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#well-specified-models", + "href": "stan-users-guide/efficiency-tuning.html#well-specified-models", + "title": "Efficiency Tuning", + "section": "", + "text": "Model misspecification, which roughly speaking means using a model that doesn’t match the data, can be a major source of slow code. This can be seen in cases where simulated data according to the model runs robustly and efficiently, whereas the real data for which it was intended runs slowly or may even have convergence and mixing issues. While some of the techniques recommended in the remaining sections of this chapter may mitigate the problem, the best remedy is a better model specification.\nCounterintuitively, more complicated models often run faster than simpler models. One common pattern is with a group of parameters with a wide fixed prior such as normal(0, 1000)). This can fit slowly due to the mismatch between prior and posterior (the prior has support for values in the hundreds or even thousands, whereas the posterior may be concentrated near zero). In such cases, replacing the fixed prior with a hierarchical prior such as normal(mu, sigma), where mu and sigma are new parameters with their own hyperpriors, can be beneficial.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#avoiding-validation", + "href": "stan-users-guide/efficiency-tuning.html#avoiding-validation", + "title": "Efficiency Tuning", + "section": "", + "text": "Stan validates all of its data structure constraints. For example, consider a transformed parameter defined to be a covariance matrix and then used as a covariance parameter in the model block.\ntransformed parameters {\n cov_matrix[K] Sigma;\n // ...\n} // first validation\nmodel {\n y ~ multi_normal(mu, Sigma); // second validation\n // ...\n}\nBecause Sigma is declared to be a covariance matrix, it will be factored at the end of the transformed parameter block to ensure that it is positive definite. The multivariate normal log density function also validates that Sigma is positive definite. This test is expensive, having cubic run time (i.e., \\(\\mathcal{O}(N^3)\\) for \\(N \\times N\\) matrices), so it should not be done twice.\nThe test may be avoided by simply declaring Sigma to be a simple unconstrained matrix.\ntransformed parameters {\n matrix[K, K] Sigma;\n // ...\n}\nmodel {\n y ~ multi_normal(mu, Sigma); // only validation\n}\nNow the only validation is carried out by the multivariate normal.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#reparameterization.section", + "href": "stan-users-guide/efficiency-tuning.html#reparameterization.section", + "title": "Efficiency Tuning", + "section": "", + "text": "Stan’s sampler can be slow in sampling from distributions with difficult posterior geometries. One way to speed up such models is through reparameterization. In some cases, reparameterization can dramatically increase effective sample size for the same number of iterations or even make programs that would not converge well behaved.\n\n\nIn this section, we discuss a general transform from a centered to a non-centered parameterization (Papaspiliopoulos, Roberts, and Sköld 2007).1\nThis reparameterization is helpful when there is not much data, because it separates the hierarchical parameters and lower-level parameters in the prior.\nNeal (2003) defines a distribution that exemplifies the difficulties of sampling from some hierarchical models. Neal’s example is fairly extreme, but can be trivially reparameterized in such a way as to make sampling straightforward. Neal’s example has support for \\(y \\in\n\\mathbb{R}\\) and \\(x \\in \\mathbb{R}^9\\) with density\n\\[\np(y,x) = \\textsf{normal}(y \\mid 0,3) \\times \\prod_{n=1}^9\n\\textsf{normal}(x_n \\mid 0,\\exp(y/2)).\n\\]\nThe probability contours are shaped like ten-dimensional funnels. The funnel’s neck is particularly sharp because of the exponential function applied to \\(y\\). A plot of the log marginal density of \\(y\\) and the first dimension \\(x_1\\) is shown in the following plot.\nThe funnel can be implemented directly in Stan as follows.\nparameters {\n real y;\n vector[9] x;\n}\nmodel {\n y ~ normal(0, 3);\n x ~ normal(0, exp(y/2));\n}\nWhen the model is expressed this way, Stan has trouble sampling from the neck of the funnel, where \\(y\\) is small and thus \\(x\\) is constrained to be near 0. This is due to the fact that the density’s scale changes with \\(y\\), so that a step size that works well in the body will be too large for the neck, and a step size that works in the neck will be inefficient in the body. This can be seen in the following plots.\n\n\n\n\n\n\n\n\n\n\n\n\n\nFigure 1: Neal’s funnel. (Left) The marginal density of Neal’s funnel for the upper-level variable \\(y\\) and one lower-level variable \\(x_1\\) (see the text for the formula). The blue region has log density greater than -8, the yellow region density greater than -16, and the gray background a density less than -16. (Right) 4000 draws are taken from a run of Stan’s sampler with default settings. Both plots are restricted to the shown window of \\(x_1\\) and \\(y\\) values; some draws fell outside of the displayed area as would be expected given the density. The draws are consistent with the marginal density \\(p(y) = \\textsf{normal}(y \\mid 0,3)\\), which has mean 0 and standard deviation 3.\n\n\n\nIn this particular instance, because the analytic form of the density is known, the model can be converted to the following more efficient form.\nparameters {\n real y_raw;\n vector[9] x_raw;\n}\ntransformed parameters {\n real y;\n vector[9] x;\n\n y = 3.0 * y_raw;\n x = exp(y/2) * x_raw;\n}\nmodel {\n y_raw ~ std_normal(); // implies y ~ normal(0, 3)\n x_raw ~ std_normal(); // implies x ~ normal(0, exp(y/2))\n}\nIn this second model, the parameters x_raw and y_raw are sampled as independent standard normals, which is easy for Stan. These are then transformed into draws from the funnel. In this case, the same transform may be used to define Monte Carlo directly based on independent standard normal draws; Markov chain Monte Carlo methods are not necessary. If such a reparameterization were used in Stan code, it is useful to provide a comment indicating what the distribution for the parameter implies for the distribution of the transformed parameter.\n\n\n\nSampling from heavy tailed distributions such as the Cauchy is difficult for Hamiltonian Monte Carlo, which operates within a Euclidean geometry.\nThe practical problem is that tail of the Cauchy requires a relatively large step size compared to the trunk. With a small step size, the No-U-Turn sampler requires many steps when starting in the tail of the distribution; with a large step size, there will be too much rejection in the central portion of the distribution. This problem may be mitigated by defining the Cauchy-distributed variable as the transform of a uniformly distributed variable using the Cauchy inverse cumulative distribution function.\nSuppose a random variable of interest \\(X\\) has a Cauchy distribution with location \\(\\mu\\) and scale \\(\\tau\\), so that \\(X \\sim\n\\textsf{Cauchy}(\\mu,\\tau)\\). The variable \\(X\\) has a cumulative distribution function \\(F_X:\\mathbb{R} \\rightarrow (0,1)\\) defined by \\[\nF_X(x) = \\frac{1}{\\pi} \\arctan \\left( \\frac{x - \\mu}{\\tau} \\right) +\n\\frac{1}{2}.\n\\] The inverse of the cumulative distribution function, \\(F_X^{-1}:(0,1) \\rightarrow \\mathbb{R}\\), is thus\n\\[\nF^{-1}_X(y) = \\mu + \\tau \\tan \\left( \\pi \\left( y - \\frac{1}{2} \\right) \\right).\n\\] Thus if the random variable \\(Y\\) has a unit uniform distribution, \\(Y\n\\sim \\textsf{uniform}(0,1)\\), then \\(F^{-1}_X(Y)\\) has a Cauchy distribution with location \\(\\mu\\) and scale \\(\\tau\\), i.e., \\(F^{-1}_X(Y) \\sim\n\\textsf{Cauchy}(\\mu,\\tau)\\).\nConsider a Stan program involving a Cauchy-distributed parameter beta.\nparameters {\n real beta;\n // ...\n}\nmodel {\n beta ~ cauchy(mu, tau);\n // ...\n}\nThis declaration of beta as a parameter may be replaced with a transformed parameter beta defined in terms of a uniform-distributed parameter beta_unif.\nparameters {\n real<lower=-pi() / 2, upper=pi() / 2> beta_unif;\n // ...\n}\ntransformed parameters {\n real beta;\n beta = mu + tau * tan(beta_unif); // beta ~ cauchy(mu, tau)\n}\nmodel {\n beta_unif ~ uniform(-pi() / 2, pi() / 2); // not necessary\n // ...\n}\nIt is more convenient in Stan to transform a uniform variable on \\((-\\pi/2, \\pi/2)\\) than one on \\((0,1)\\). The Cauchy location and scale parameters, mu and tau, may be defined as data or may themselves be parameters. The variable beta could also be defined as a local variable if it does not need to be included in the sampler’s output.\nThe uniform distribution on beta_unif is defined explicitly in the model block, but it could be safely removed from the program without changing sampling behavior. This is because \\(\\log\n\\textsf{uniform}(\\beta_{\\textsf{unif}} \\mid -\\pi/2,\\pi/2) =\n-\\log \\pi\\) is a constant and Stan only needs the total log probability up to an additive constant. Stan will spend some time checking that that beta_unif is between -pi() / 2 and pi() / 2, but this condition is guaranteed by the constraints in the declaration of beta_unif.\n\n\n\nOne thing that sometimes works when you’re having trouble with the heavy-tailedness of Student-t distributions is to use the gamma-mixture representation, which says that you can generate a Student-t distributed variable \\(\\beta\\), \\[\n\\beta \\sim \\textsf{Student-t}(\\nu, 0, 1),\n\\] by first generating a gamma-distributed precision (inverse variance) \\(\\tau\\) according to \\[\n\\tau \\sim \\textsf{Gamma}(\\nu/2, \\nu/2),\n\\] and then generating \\(\\beta\\) from the normal distribution, \\[\n\\beta \\sim \\textsf{normal}\\left(0,\\tau^{-\\frac{1}{2}}\\right).\n\\]\nBecause \\(\\tau\\) is precision, \\(\\tau^{-\\frac{1}{2}}\\) is the scale (standard deviation), which is the parameterization used by Stan.\nThe marginal distribution of \\(\\beta\\) when you integrate out \\(\\tau\\) is \\(\\textsf{Student-t}(\\nu, 0, 1)\\), i.e., \\[\n\\textsf{Student-t}(\\beta \\mid \\nu, 0, 1)\n=\n\\int_0^{\\infty}\n\\,\n\\textsf{normal}\\left(\\beta \\middle| 0, \\tau^{-0.5}\\right)\n\\times\n\\textsf{Gamma}\\left(\\tau \\middle| \\nu/2, \\nu/2\\right)\n\\\n\\text{d} \\tau.\n\\]\nTo go one step further, instead of defining a \\(\\beta\\) drawn from a normal with precision \\(\\tau\\), define \\(\\alpha\\) to be drawn from a unit normal, \\[\n\\alpha \\sim \\textsf{normal}(0,1)\n\\] and rescale by defining \\[\n\\beta = \\alpha \\, \\tau^{-\\frac{1}{2}}.\n\\]\nNow suppose \\(\\mu = \\beta x\\) is the product of \\(\\beta\\) with a regression predictor \\(x\\). Then the reparameterization \\(\\mu = \\alpha\n\\tau^{-\\frac{1}{2}} x\\) has the same distribution, but in the original, direct parameterization, \\(\\beta\\) has (potentially) heavy tails, whereas in the second, neither \\(\\tau\\) nor \\(\\alpha\\) have heavy tails.\nTo translate into Stan notation, this reparameterization replaces\nparameters {\n real<lower=0> nu;\n real beta;\n // ...\n}\nmodel {\n beta ~ student_t(nu, 0, 1);\n // ...\n}\nwith\nparameters {\n real<lower=0> nu;\n real<lower=0> tau;\n real alpha;\n // ...\n}\ntransformed parameters {\n real beta;\n beta = alpha / sqrt(tau);\n // ...\n}\nmodel {\n real half_nu;\n half_nu = 0.5 * nu;\n tau ~ gamma(half_nu, half_nu);\n alpha ~ std_normal();\n // ...\n}\nAlthough set to 0 here, in most cases, the lower bound for the degrees of freedom parameter nu can be set to 1 or higher; when nu is 1, the result is a Cauchy distribution with fat tails and as nu approaches infinity, the Student-t distribution approaches a normal distribution. Thus the parameter nu characterizes the heaviness of the tails of the model.\n\n\n\nUnfortunately, the usual situation in applied Bayesian modeling involves complex geometries and interactions that are not known analytically. Nevertheless, the non-centered parameterization can still be effective for separating parameters.\n\n\nFor example, a vectorized hierarchical model might draw a vector of coefficients \\(\\beta\\) with definitions as follows. The so-called centered parameterization is as follows.\nparameters {\n real mu_beta;\n real<lower=0> sigma_beta;\n vector[K] beta;\n // ...\n}\nmodel {\n beta ~ normal(mu_beta, sigma_beta);\n // ...\n}\nAlthough not shown, a full model will have priors on both mu_beta and sigma_beta along with data modeled based on these coefficients. For instance, a standard binary logistic regression with data matrix x and binary outcome vector y would include a likelihood statement such as form y ~ bernoulli_logit(x * beta), leading to an analytically intractable posterior.\nA hierarchical model such as the above will suffer from the same kind of inefficiencies as Neal’s funnel, because the values of beta, mu_beta and sigma_beta are highly correlated in the posterior. The extremity of the correlation depends on the amount of data, with Neal’s funnel being the extreme with no data. In these cases, the non-centered parameterization, discussed in the next section, is preferable; when there is a lot of data, the centered parameterization is more efficient. See Betancourt and Girolami (2013) for more information on the effects of centering in hierarchical models fit with Hamiltonian Monte Carlo.\n\n\n\n\nSometimes the group-level effects do not constrain the hierarchical distribution tightly. Examples arise when there are not many groups, or when the inter-group variation is high. In such cases, hierarchical models can be made much more efficient by shifting the data’s correlation with the parameters to the hyperparameters. Similar to the funnel example, this will be much more efficient in terms of effective sample size when there is not much data (see Betancourt and Girolami (2013)), and in more extreme cases will be necessary to achieve convergence.\nparameters {\n real mu_beta;\n real<lower=0> sigma_beta;\n vector[K] beta_raw;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n // implies: beta ~ normal(mu_beta, sigma_beta)\n beta = mu_beta + sigma_beta * beta_raw;\n}\nmodel {\n beta_raw ~ std_normal();\n // ...\n}\nAny priors defined for mu_beta and sigma_beta remain as defined in the original model.\nAlternatively, Stan’s affine transform can be used to decouple sigma and beta:\nparameters {\n real mu_beta;\n real<lower=0> sigma_beta;\n vector<offset=mu_beta, multiplier=sigma_beta>[K] beta;\n // ...\n}\nmodel {\n beta ~ normal(mu_beta, sigma_beta);\n // ...\n}\nReparameterization of hierarchical models is not limited to the normal distribution, although the normal distribution is the best candidate for doing so. In general, any distribution of parameters in the location-scale family is a good candidate for reparameterization. Let \\(\\beta = l + s\\alpha\\) where \\(l\\) is a location parameter and \\(s\\) is a scale parameter. The parameter \\(l\\) need not be the mean, \\(s\\) need not be the standard deviation, and neither the mean nor the standard deviation need to exist. If \\(\\alpha\\) and \\(\\beta\\) are from the same distributional family but \\(\\alpha\\) has location zero and unit scale, while \\(\\beta\\) has location \\(l\\) and scale \\(s\\), then that distribution is a location-scale distribution. Thus, if \\(\\alpha\\) were a parameter and \\(\\beta\\) were a transformed parameter, then a prior distribution from the location-scale family on \\(\\alpha\\) with location zero and unit scale implies a prior distribution on \\(\\beta\\) with location \\(l\\) and scale \\(s\\). Doing so would reduce the dependence between \\(\\alpha\\), \\(l\\), and \\(s\\).\nThere are several univariate distributions in the location-scale family, such as the Student t distribution, including its special cases of the Cauchy distribution (with one degree of freedom) and the normal distribution (with infinite degrees of freedom). As shown above, if \\(\\alpha\\) is distributed standard normal, then \\(\\beta\\) is distributed normal with mean \\(\\mu = l\\) and standard deviation \\(\\sigma = s\\). The logistic, the double exponential, the generalized extreme value distributions, and the stable distribution are also in the location-scale family.\nAlso, if \\(z\\) is distributed standard normal, then \\(z^2\\) is distributed chi-squared with one degree of freedom. By summing the squares of \\(K\\) independent standard normal variates, one can obtain a single variate that is distributed chi-squared with \\(K\\) degrees of freedom. However, for large \\(K\\), the computational gains of this reparameterization may be overwhelmed by the computational cost of specifying \\(K\\) primitive parameters just to obtain one transformed parameter to use in a model.\n\n\n\nThe benefits of reparameterization are not limited to univariate distributions. A parameter with a multivariate normal prior distribution is also an excellent candidate for reparameterization. Suppose you intend the prior for \\(\\beta\\) to be multivariate normal with mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\). Such a belief is reflected by the following code.\ndata {\n int<lower=2> K;\n vector[K] mu;\n cov_matrix[K] Sigma;\n // ...\n}\nparameters {\n vector[K] beta;\n // ...\n}\nmodel {\n beta ~ multi_normal(mu, Sigma);\n // ...\n}\nIn this case mu and Sigma are fixed data, but they could be unknown parameters, in which case their priors would be unaffected by a reparameterization of beta.\nIf \\(\\alpha\\) has the same dimensions as \\(\\beta\\) but the elements of \\(\\alpha\\) are independently and identically distributed standard normal such that \\(\\beta = \\mu + L\\alpha\\), where \\(LL^\\top = \\Sigma\\), then \\(\\beta\\) is distributed multivariate normal with mean vector \\(\\mu\\) and covariance matrix \\(\\Sigma\\). One choice for \\(L\\) is the Cholesky factor of \\(\\Sigma\\). Thus, the model above could be reparameterized as follows.\ndata {\n int<lower=2> K;\n vector[K] mu;\n cov_matrix[K] Sigma;\n // ...\n}\ntransformed data {\n matrix[K, K] L;\n L = cholesky_decompose(Sigma);\n}\nparameters {\n vector[K] alpha;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n beta = mu + L * alpha;\n}\nmodel {\n alpha ~ std_normal();\n // implies: beta ~ multi_normal(mu, Sigma)\n // ...\n}\nThis reparameterization is more efficient for two reasons. First, it reduces dependence among the elements of alpha and second, it avoids the need to invert Sigma every time multi_normal is evaluated.\nThe Cholesky factor is also useful when a covariance matrix is decomposed into a correlation matrix that is multiplied from both sides by a diagonal matrix of standard deviations, where either the standard deviations or the correlations are unknown parameters. The Cholesky factor of the covariance matrix is equal to the product of a diagonal matrix of standard deviations and the Cholesky factor of the correlation matrix. Furthermore, the product of a diagonal matrix of standard deviations and a vector is equal to the elementwise product between the standard deviations and that vector. Thus, if for example the correlation matrix Tau were fixed data but the vector of standard deviations sigma were unknown parameters, then a reparameterization of beta in terms of alpha could be implemented as follows.\ndata {\n int<lower=2> K;\n vector[K] mu;\n corr_matrix[K] Tau;\n // ...\n}\ntransformed data {\n matrix[K, K] L;\n L = cholesky_decompose(Tau);\n}\nparameters {\n vector[K] alpha;\n vector<lower=0>[K] sigma;\n // ...\n}\ntransformed parameters {\n vector[K] beta;\n // This equals mu + diag_matrix(sigma) * L * alpha;\n beta = mu + sigma .* (L * alpha);\n}\nmodel {\n sigma ~ cauchy(0, 5);\n alpha ~ std_normal();\n // implies: beta ~ multi_normal(mu,\n // diag_matrix(sigma) * L * L' * diag_matrix(sigma)))\n // ...\n}\nThis reparameterization of a multivariate normal distribution in terms of standard normal variates can be extended to other multivariate distributions that can be conceptualized as contaminations of the multivariate normal, such as the multivariate Student t and the skew multivariate normal distribution.\nA Wishart distribution can also be reparameterized in terms of standard normal variates and chi-squared variates. Let \\(L\\) be the Cholesky factor of a \\(K \\times K\\) positive definite scale matrix \\(S\\) and let \\(\\nu\\) be the degrees of freedom. If \\[\nA = \\begin{pmatrix}\n\\sqrt{c_{1}} & 0 & \\cdots & 0 \\\\\nz_{21} & \\sqrt{c_{2}} & \\ddots & \\vdots \\\\\n\\vdots & \\ddots & \\ddots & 0 \\\\\nz_{K1} & \\cdots & z_{K\\left(K-1\\right)} & \\sqrt{c_{K}}\n\\end{pmatrix},\n\\] where each \\(c_i\\) is distributed chi-squared with \\(\\nu - i + 1\\) degrees of freedom and each \\(z_{ij}\\) is distributed standard normal, then \\(W = LAA^{\\top}L^{\\top}\\) is distributed Wishart with scale matrix \\(S = LL^{\\top}\\) and degrees of freedom \\(\\nu\\). Such a reparameterization can be implemented by the following Stan code:\ndata {\n int<lower=1> N;\n int<lower=1> K;\n int<lower=K + 2> nu\n matrix[K, K] L; // Cholesky factor of scale matrix\n vector[K] mu;\n matrix[N, K] y;\n // ...\n}\nparameters {\n vector<lower=0>[K] c;\n vector[0.5 * K * (K - 1)] z;\n // ...\n}\nmodel {\n matrix[K, K] A;\n int count = 1;\n for (j in 1:(K - 1)) {\n for (i in (j + 1):K) {\n A[i, j] = z[count];\n count += 1;\n }\n for (i in 1:(j - 1)) {\n A[i, j] = 0.0;\n }\n A[j, j] = sqrt(c[j]);\n }\n for (i in 1:(K - 1)) {\n A[i, K] = 0;\n }\n A[K, K] = sqrt(c[K]);\n\n for (i in 1:K) {\n c[i] ~ chi_square(nu - i + 1);\n }\n\n z ~ std_normal();\n // implies: L * A * A' * L' ~ wishart(nu, L * L')\n y ~ multi_normal_cholesky(mu, L * A);\n // ...\n}\nThis reparameterization is more efficient for three reasons. First, it reduces dependence among the elements of z and second, it avoids the need to invert the covariance matrix, \\(W\\) every time wishart is evaluated. Third, if \\(W\\) is to be used with a multivariate normal distribution, you can pass \\(L A\\) to the more efficient multi_normal_cholesky function, rather than passing \\(W\\) to multi_normal.\nIf \\(W\\) is distributed Wishart with scale matrix \\(S\\) and degrees of freedom \\(\\nu\\), then \\(W^{-1}\\) is distributed inverse Wishart with inverse scale matrix \\(S^{-1}\\) and degrees of freedom \\(\\nu\\). Thus, the previous result can be used to reparameterize the inverse Wishart distribution. Since \\(W = L A A^{\\top} L^{\\top}\\), \\(W^{-1} = L^{{\\top}^{-1}} A^{{\\top}^{-1}} A^{-1} L^{-1}\\), where all four inverses exist, but \\(L^{{-1}^{\\top}} = L^{{\\top}^{-1}}\\) and \\(A^{{-1}^{\\top}} = A^{{\\top}^{-1}}\\). We can slightly modify the above Stan code for this case:\ndata {\n int<lower=1> K;\n int<lower=K + 2> nu\n matrix[K, K] L; // Cholesky factor of scale matrix\n // ...\n}\ntransformed data {\n matrix[K, K] eye;\n matrix[K, K] L_inv;\n for (j in 1:K) {\n for (i in 1:K) {\n eye[i, j] = 0.0;\n }\n eye[j, j] = 1.0;\n }\n L_inv = mdivide_left_tri_low(L, eye);\n}\nparameters {\n vector<lower=0>[K] c;\n vector[0.5 * K * (K - 1)] z;\n // ...\n}\nmodel {\n matrix[K, K] A;\n matrix[K, K] A_inv_L_inv;\n int count;\n count = 1;\n for (j in 1:(K - 1)) {\n for (i in (j + 1):K) {\n A[i, j] = z[count];\n count += 1;\n }\n for (i in 1:(j - 1)) {\n A[i, j] = 0.0;\n }\n A[j, j] = sqrt(c[j]);\n }\n for (i in 1:(K - 1)) {\n A[i, K] = 0;\n }\n A[K, K] = sqrt(c[K]);\n\n A_inv_L_inv = mdivide_left_tri_low(A, L_inv);\n for (i in 1:K) {\n c[i] ~ chi_square(nu - i + 1);\n }\n\n z ~ std_normal(); // implies: crossprod(A_inv_L_inv) ~\n // inv_wishart(nu, L_inv' * L_inv)\n // ...\n}\nAnother candidate for reparameterization is the Dirichlet distribution with all \\(K\\) shape parameters equal. Zyczkowski and Sommers (2001) shows that if \\(\\theta_i\\) is equal to the sum of \\(\\beta\\) independent squared standard normal variates and \\(\\rho_i = \\frac{\\theta_i}{\\sum \\theta_i}\\), then the \\(K\\)-vector \\(\\rho\\) is distributed Dirichlet with all shape parameters equal to \\(\\frac{\\beta}{2}\\). In particular, if \\(\\beta = 2\\), then \\(\\rho\\) is uniformly distributed on the unit simplex. Thus, we can make \\(\\rho\\) be a transformed parameter to reduce dependence, as in:\ndata {\n int<lower=1> beta;\n // ...\n}\nparameters {\n array[K] vector[beta] z;\n // ...\n}\ntransformed parameters {\n simplex[K] rho;\n for (k in 1:K) {\n rho[k] = dot_self(z[k]); // sum-of-squares\n }\n rho = rho / sum(rho);\n}\nmodel {\n for (k in 1:K) {\n z[k] ~ std_normal();\n }\n // implies: rho ~ dirichlet(0.5 * beta * ones)\n // ...\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#vectorization", + "href": "stan-users-guide/efficiency-tuning.html#vectorization", + "title": "Efficiency Tuning", + "section": "", + "text": "Stan spends the vast majority of its time computing the gradient of the log probability function, making gradients the obvious target for optimization. Stan’s gradient calculations with algorithmic differentiation require a template expression to be allocated and constructed for each subexpression of a Stan program involving parameters or transformed parameters.2 This section defines optimization strategies based on vectorizing these subexpressions to reduce the work done during algorithmic differentiation.\n\n\n\nBecause of the gradient bottleneck described in the previous section, it is more efficient to collect a sequence of summands into a vector or array and then apply the sum() operation than it is to continually increment a variable by assignment and addition. For example, consider the following code snippet, where foo() is some operation that depends on n.\nfor (n in 1:N) {\n total += foo(n,...);\n}\nThis code has to create intermediate representations for each of the N summands.\nA faster alternative is to copy the values into a vector, then apply the sum() operator, as in the following refactoring.\n{\n vector[N] summands;\n for (n in 1:N) {\n summands[n] = foo(n,...);\n }\n total = sum(summands);\n}\nSyntactically, the replacement is a statement block delineated by curly brackets ({, }), starting with the definition of the local variable summands.\nEven though it involves extra work to allocate the summands vector and copy N values into it, the savings in differentiation more than make up for it. Perhaps surprisingly, it will also use substantially less memory overall than incrementing total within the loop.\n\n\n\nThe following program directly encodes a linear regression with fixed unit noise using a two-dimensional array x of predictors, an array y of outcomes, and an array beta of regression coefficients.\ndata {\n int<lower=1> K;\n int<lower=1> N;\n array[K, N] real x;\n array[N] real y;\n}\nparameters {\n array[K] real beta;\n}\nmodel {\n for (n in 1:N) {\n real gamma = 0;\n for (k in 1:K) {\n gamma += x[n, k] * beta[k];\n }\n y[n] ~ normal(gamma, 1);\n }\n}\nThe following model computes the same log probability function as the previous model, even supporting the same input files for data and initialization.\ndata {\n int<lower=1> K;\n int<lower=1> N;\n array[N] vector[K] x;\n array[N] real y;\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(dot_product(x[n], beta), 1);\n }\n}\nAlthough it produces equivalent results, the dot product should not be replaced with a transpose and multiply, as in\ny[n] ~ normal(x[n]' * beta, 1);\nThe relative inefficiency of the transpose and multiply approach is that the transposition operator allocates a new vector into which the result of the transposition is copied. This consumes both time and memory.3\nThe inefficiency of transposition could itself be mitigated by reordering the product and pulling the transposition out of the loop, as follows.\n// ...\ntransformed parameters {\n row_vector[K] beta_t;\n beta_t = beta';\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(beta_t * x[n], 1);\n }\n}\nThe problem with transposition could be completely solved by directly encoding the x as a row vector, as in the following example.\ndata {\n // ...\n array[N] row_vector[K] x;\n // ...\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ normal(x[n] * beta, 1);\n }\n}\nDeclaring the data as a matrix and then computing all the predictors at once using matrix multiplication is more efficient still, as in the example discussed in the next section.\nHaving said all this, the most efficient way to code this model is with direct matrix multiplication, as in\ndata {\n matrix[N, K] x;\n vector[N] y;\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n y ~ normal(x * beta, 1);\n}\nIn general, encapsulated single operations that do the work of loops will be more efficient in their encapsulated forms. Rather than performing a sequence of row-vector/vector multiplications, it is better to encapsulate it as a single matrix/vector multiplication.\n\n\n\nThe final and most efficient version replaces the loops and transformed parameters by using the vectorized form of the normal probability function, as in the following example.\ndata {\n int<lower=1> K;\n int<lower=1> N;\n matrix[N, K] x;\n vector[N] y;\n}\nparameters {\n vector[K] beta;\n}\nmodel {\n y ~ normal(x * beta, 1);\n}\nThe variables are all declared as either matrix or vector types. The result of the matrix-vector multiplication x * beta in the model block is a vector of the same length as y.\nThe probability function documentation in the function reference manual indicates which of Stan’s probability functions support vectorization; see the function reference manual for full details. Vectorized probability functions accept either vector or scalar inputs for all arguments, with the only restriction being that all vector arguments are the same dimensionality. In the example above, y is a vector of size N, x * beta is a vector of size N, and 1 is a scalar.\n\n\n\nSometimes data does not arrive in a shape that is ideal for vectorization, but can be put into such shape with some munging (either inside Stan’s transformed data block or outside).\nJohn Hall provided a simple example on the Stan users group. Simplifying notation a bit, the original model had a sampling statement in a loop, as follows.\nfor (n in 1:N) {\n y[n] ~ normal(mu[ii[n]], sigma);\n}\nThe brute force vectorization would build up a mean vector and then vectorize all at once.\n{\n vector[N] mu_ii;\n for (n in 1:N) {\n mu_ii[n] = mu[ii[n]];\n }\n y ~ normal(mu_ii, sigma);\n}\nIf there aren’t many levels (values ii[n] can take), then it behooves us to reorganize the data by group in a case like this. Rather than having a single observation vector y, there are K of them. And because Stan doesn’t support ragged arrays, it means K declarations. For instance, with 5 levels, we have\ny_1 ~ normal(mu[1], sigma);\n// ...\ny_5 ~ normal(mu[5], sigma);\nThis way, both the mu and sigma parameters are shared. Which way works out to be more efficient will depend on the shape of the data; if the sizes are small, the simple vectorization may be faster, but for moderate to large sized groups, the full expansion should be faster.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#exploiting-sufficient-statistics", + "href": "stan-users-guide/efficiency-tuning.html#exploiting-sufficient-statistics", + "title": "Efficiency Tuning", + "section": "", + "text": "In some cases, models can be recoded to exploit sufficient statistics in estimation. This can lead to large efficiency gains compared to an expanded model. This section provides examples for Bernoulli and normal distributions, but the same approach can be applied to other members of the exponential family.\n\n\nConsider the following Bernoulli sampling model.\ndata {\n int<lower=0> N;\n array[N] int<lower=0, upper=1> y;\n real<lower=0> alpha;\n real<lower=0> beta;\n}\nparameters {\n real<lower=0, upper=1> theta;\n}\nmodel {\n theta ~ beta(alpha, beta);\n for (n in 1:N) {\n y[n] ~ bernoulli(theta);\n }\n}\nIn this model, the sum of positive outcomes in y is a sufficient statistic for the chance of success theta. The model may be recoded using the binomial distribution as follows.\ntheta ~ beta(alpha, beta);\nsum(y) ~ binomial(N, theta);\nBecause truth is represented as one and falsehood as zero, the sum sum(y) of a binary vector y is equal to the number of positive outcomes out of a total of N trials.\nThis can be generalized to other discrete cases (one wouldn’t expect continuous observations to be duplicated if they are random). Suppose there are only \\(K\\) possible discrete outcomes, \\(z_1, \\dotsc, z_K\\), but there are \\(N\\) observations, where \\(N\\) is much larger than \\(K\\). If \\(f_k\\) is the frequency of outcome \\(z_k\\), then the entire likelihood with distribution foo can be coded as follows.\nfor (k in 1:K) {\n target += f[k] * foo_lpmf(z[k] | ...);\n}\nwhere the ellipses are the parameters of the log probability mass function for distribution foo (there’s no distribution called “foo”; this is just a placeholder for any discrete distribution name).\nThe resulting program looks like a “weighted” regression, but here the weights f[k] are counts and thus sufficient statistics for the PMF and simply amount to an alternative, more efficient coding of the same likelihood. For efficiency, the frequencies f[k] should be counted once in the transformed data block and stored.\nThe same trick works for combining multiple binomial observations.\n\n\n\nConsider the following Stan model for fitting a normal distribution to data.\ndata {\n int N;\n vector[N] y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(mu, sigma);\n}\nWith the vectorized form used for y, Stan is clever enough to only evaluate log(sigma) once, but it still has to evaluate the normal for all of y[1] to y[N], which involves adding up all the squared differences from the mean and then dividing by sigma squared.\nAn equivalent density to the one above (up to normalizing constants that do not depend on parameters), is given in the following Stan program.\ndata {\n int N;\n vector[N] y;\n}\ntransformed data {\n real mean_y = mean(y);\n real<lower=0> var_y = variance(y);\n real nm1_over2 = 0.5 * (N - 1);\n real sqrt_N = sqrt(N);\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n mean_y ~ normal(mu, sigma / sqrt_N);\n var_y ~ gamma(nm1_over2, nm1_over2 / sigma^2);\n}\nThe data and parameters are the same in this program as in the first. The second version adds a transformed data block to compute the mean and variance of the data, which are the sufficient statistics here. These are stored along with two other useful constants. Then the program can define distributions over the mean and variance, both of which are scalars here.\nThe original Stan program and this one define the same model in the sense that they define the same log density up to a constant additive term that does not depend on the parameters. The priors on mu and sigma are both improper, but proper priors could be added as additional statements in the model block without affecting the sufficiency.\nThis transform explicitly relies on aggregating the data. Using this trick on parameters leads to more computation than just computing the normal log density, even before accounting for the non-linear change of variables in the variance.\n\n\n\nThe Poisson distribution is the easiest case, because the sum of observations is sufficient. Specifically, we can replace\ny ~ poisson(lambda);\nwith\nsum(y) ~ poisson(size(y) * lambda);\nThis will work even if y is a parameter vector because no Jacobian adjustment is required for summation.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#aggregating-common-subexpressions", + "href": "stan-users-guide/efficiency-tuning.html#aggregating-common-subexpressions", + "title": "Efficiency Tuning", + "section": "", + "text": "If an expression is calculated once, the value should be saved and reused wherever possible. That is, rather than using exp(theta) in multiple places, declare a local variable to store its value and reuse the local variable.\nAnother case that may not be so obvious is with two multilevel parameters, say a[ii[n]] + b[jj[n]]. If a and b are small (i.e., do not have many levels), then a table a_b of their sums can be created, with\nmatrix[size(a), size(b)] a_b;\nfor (i in 1:size(a)) {\n for (j in 1:size(b)) {\n a_b[i, j] = a[i] + b[j];\n }\n}\nThen the sum can be replaced with a_b[ii[n], jj[n]].", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#exploiting-conjugacy", + "href": "stan-users-guide/efficiency-tuning.html#exploiting-conjugacy", + "title": "Efficiency Tuning", + "section": "", + "text": "Continuing the model from the previous section, the conjugacy of the beta prior and binomial distribution allow the model to be further optimized to the following equivalent form.\ntheta ~ beta(alpha + sum(y), beta + N - sum(y));\nTo make the model even more efficient, a transformed data variable defined to be sum(y) could be used in the place of sum(y).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#standardizing-predictors", + "href": "stan-users-guide/efficiency-tuning.html#standardizing-predictors", + "title": "Efficiency Tuning", + "section": "", + "text": "Standardizing the data so that all predictors have a zero sample mean and unit sample variance has the following potential benefits:\n\nIt helps in faster convergence of MCMC chains.\nIt makes the model less sensitive to the specifics of the parameterization.\nIt aids in the interpretation and comparison of the importance of coefficients across different predictors.\n\nWhen there are large differences between the units and scales of the predictors, standardizing the predictors is especially useful. This section illustrates the principle for a simple linear regression.\nSuppose that \\(y = (y_1,\\dotsc,y_N)\\) is a vector of \\(N\\) outcomes and \\(x = (x_1,\\dotsc,x_N)\\) the corresponding vector of \\(N\\) predictors. A simple linear regression involving an intercept coefficient \\(\\alpha\\) and slope coefficient \\(\\beta\\) can be expressed as \\[\ny_n = \\alpha + \\beta x_n + \\epsilon_n,\n\\] where \\[\n\\epsilon_n \\sim \\textsf{normal}(0,\\sigma).\n\\]\nIf \\(x\\) has very large or very small values or if the mean of the values is far away from 0 (on the scale of the values), then it can be more efficient to standardize the predictor values \\(x_n\\). First the elements of \\(x\\) are zero-centered by subtracting the mean, then scaled by dividing by the standard deviation.\nThe mean of \\(x\\) is given by:\n\\[\nmean_x = \\frac{1}{N} \\sum_{n=1}^{N} x_n\n\\]\nThe standard deviation of \\(x\\) is calculated as: \\[\nsd_x = {\\left({\\frac{1}{N} \\sum_{n=1}^{N} (x_n - mean_x)^2}\\right)}^{1/2}\n\\]\nWith these, we compute the \\(z\\), the standardized predictors\n\\[\nz_n = \\frac{x_n - mean_x}{sd_x}\n\\]\nwhere \\(z_n\\) is the standardized value corresponding to \\(x_n\\).\nThe inverse transform is defined by reversing the two normalization steps, first rescaling by the same deviation and relocating by the sample mean.\n\\[\nx_n = z_n sd_x + mean_x\n\\]\nStandardizing the predictors standardizes the scale of the variables, and hence the scale of the priors.\nConsider the following initial model.\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n // priors\n alpha ~ normal(0, 10);\n beta ~ normal(0, 10);\n sigma ~ normal(0, 5);\n // likelihood\n y ~ normal(x * beta + alpha, sigma);\n}\nThe data block for the standardized model is identical. The mean and standard deviation of the data are defined in the transformed data block, along with the standardized predictors.\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n}\ntransformed data {\n real mean_x = mean(x);\n real sd_x = sd(x);\n vector[N] x_std = (x - mean_x) / sd_x;\n}\nparameters {\n real alpha_std;\n real beta_std;\n real<lower=0> sigma_std;\n}\nmodel {\n alpha_std ~ normal(0, 10);\n beta_std ~ normal(0, 10);\n sigma_std ~ normal(0, 5);\n y ~ normal(x_std * beta_std + alpha_std, sigma_std);\n}\nThe parameters are renamed to indicate that they aren’t the “natural” parameters. The transformed data x_std is defined in terms of variables mean_x and sd_x; by declaring these variables in the transformed data block, they will be available in all following blocks, and therefore can be used in the generated quantities block to record the “natural” parameters alpha and beta.\nThe fairly diffuse priors on the coefficients are the same. These could have been transformed as well, but here they are left as is, because the scales make sense as diffuse priors for standardized data.\nThe original regression \\[\ny_n = \\alpha + \\beta x_n + \\epsilon_n\n\\] has been transformed to a regression on the standardized data variable \\(z\\),\n\\[\ny_n = \\alpha' + \\beta' z_n + \\epsilon_n.\n\\]\nThe likelihood is specified in terms of the standardized parameters. The original slope \\(\\beta\\) is the standardized slope \\(\\beta'\\) scaled by the inverse of the standard deviation of \\(x\\). The original intercept \\(\\alpha\\) is the intercept from the standardized model \\(\\alpha'\\), corrected for the effect of scaling and centering \\(x\\). Thus, the formulas to retrieve \\(\\alpha\\) and \\(\\beta\\) from \\(\\alpha'\\) and \\(\\beta'\\) are:\n\\[\\begin{align*}\n\\beta = \\frac{\\beta'}{\\sigma_x} \\\\\n\\alpha = \\alpha' - \\beta' \\frac{\\mu_x}{\\sigma_x}\n\\end{align*}\\]\nThese recovered parameter values on the original scales can be calculated within Stan using a generated quantities block following the model block,\ngenerated quantities {\n real beta = beta_std / sd_x;\n real alpha = alpha_std - beta_std * mean_x / sd_x;\n\n}\nWhen there are multiple real-valued predictors, i.e., when K is the number of predictors, x is an \\(N \\times K\\) matrix, and beta ia \\(K\\)-vector of coefficients, then x * beta is an \\(N\\)-vector of predictions, one for each of the \\(N\\) data items. When \\(K \\ll N\\) the QR reparameterization is recommended for linear and generalized linear models unless there is an informative prior on the location of \\(\\beta\\).\n\n\nFor many applications on the standard scale, normal distributions with location zero and scale one will be used. In these cases, it is more efficient to use\ny ~ std_normal();\nthan to use\ny ~ normal(0, 1);\nbecause the subtraction of the location and division by the scale cancel, as does subtracting the log of the scale.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#using-map-reduce", + "href": "stan-users-guide/efficiency-tuning.html#using-map-reduce", + "title": "Efficiency Tuning", + "section": "", + "text": "The map-reduce operation, even without multi-core MPI support, can be used to make programs more scalable and also more efficient. See the map-reduce chapter for more information on implementing map-reduce operations.\nMap-reduce allows greater scalability because only the Jacobian of the mapped function for each shard is stored. The Jacobian consists of all of the derivatives of the outputs with respect to the parameters. During execution, the derivatives of the shard are evaluated using nested automatic differentiation. As often happens with modern CPUs, reduced memory overhead leads to increased memory locality and faster execution. The Jacobians are all computed with local memory and their outputs stored contiguously in memory.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/efficiency-tuning.html#footnotes", + "href": "stan-users-guide/efficiency-tuning.html#footnotes", + "title": "Efficiency Tuning", + "section": "Footnotes", + "text": "Footnotes\n\n\nThis parameterization came to be known on our mailing lists as the “Matt trick” after Matt Hoffman, who independently came up with it while fitting hierarchical models in Stan.↩︎\nStan uses its own arena-based allocation, so allocation and deallocation are faster than with a raw call to new.↩︎\nFuture versions of Stan may remove this inefficiency by more fully exploiting expression templates inside the Eigen C++ matrix library. This will require enhancing Eigen to deal with mixed-type arguments, such as the type double used for constants and the algorithmic differentiation type stan::math::var used for variables.↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Efficiency Tuning" + ] + }, + { + "objectID": "stan-users-guide/dae.html", + "href": "stan-users-guide/dae.html", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "Stan support solving systems of differential-algebraic equations (DAEs) of index 1 (Serban et al. 2021). The solver adaptively refines the solutions in order to satisfy given tolerances.\nOne can think a differential-algebraic system of equations as ODEs with additional algebraic constraints applied to some of the variables. In such a system, the variable derivatives may not be expressed explicitly with a right-hand-side as in ODEs, but implicitly constrained.\nSimilar to ODE solvers, the DAE solvers must not only provide the solution to the DAE itself, but also the gradient of the DAE solution with respect to parameters (the sensitivities). Stan’s DAE solver uses the forward sensitivity calculation to expand the base DAE system with additional DAE equations for the gradients of the solution. For each parameter, an additional full set of \\(N\\) sensitivity states are added meaning that the full DAE solved has \\(N \\, + N \\cdot M\\) states.\nTwo interfaces are provided for the forward sensitivity solver: one with default tolerances and default max number of steps, and one that allows these controls to be modified. Choosing tolerances is important for making any of the solvers work well – the defaults will not work everywhere. The tolerances should be chosen primarily with consideration to the scales of the solutions, the accuracy needed for the solutions, and how the solutions are used in the model. The same principles in the control parameters section apply here.\nInternally Stan’s DAE solver uses a variable-step, variable-order, backward-differentiation formula implementation (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005).\n\n\nA DAE is defined by a set of expressions for the residuals of differential equations and algebraic equations \\(r(y', y, t, \\theta)\\), and consistent initial conditions \\(y(t_0, \\theta) = y_0, y'(t_0, \\theta)=y'_0\\). The DAE is define by residual function as \\(r(y', y, t, \\theta)=0\\). The \\(\\theta\\) dependence is included in the notation to highlight that the solution \\(y(t)\\) is a function of any parameters used in the computation.\n\n\n\nAs an example of a system of DAEs, consider following chemical kinetics problem(Robertson 1966). The nondimensionalized DAE consists of two differential equations and one algebraic constraint. The differential equations describe the reactions from reactants \\(y_1\\) and \\(y_2\\) to the product \\(y_3\\), and the algebraic equation describes the mass conservation. (Serban and Hindmarsh 2021).\n\\[\\begin{align*}\n\\frac{dy_1}{dt} + \\alpha y_1 - \\beta y_2 y_3 = 0 \\\\\n\\frac{dy_2}{dt} - \\alpha y_1 + \\beta y_2 y_3 + \\gamma y_2^2 = 0 \\\\\ny_1 + y_2 + y_3 - 1.0 = 0\n\\end{align*}\\]\nThe state equations implicitly defines the state \\((y_1(t), y_2(t), y_3(t))\\) at future times as a function of an initial state and the system parameters, in this example the reaction rate coefficients \\((\\alpha, \\beta, \\gamma)\\).\nUnlike solving ODEs, solving DAEs requires a consistent initial condition. That is, one must specify both \\(y(t_0)\\) and \\(y'(t_0)\\) so that residual function becomes zero at initial time \\(t_0\\) \\[\\begin{equation*}\nr(y'(t_0), y(t_0), t_0) = 0\n\\end{equation*}\\]\n\n\n\nThe index along a DAE solution \\(y(t)\\) is the minimum number of differentiations of some of the components of the system required to solve for \\(y'\\) uniquely in terms of \\(y\\) and \\(t\\), so that the DAE is converted into an ODE for \\(y\\). Thus an ODE system is of index 0. The above chemical kinetics DAE is of index 1, as we can perform differentiation of the third equation followed by introducing the first two equations in order to obtain the ODE for \\(y_3\\).\nMost DAE solvers, including the one in Stan, support only index-1 DAEs. For a high index DAE problem the user must first convert it to a lower index system. This often can be done by carrying out differentiations analytically (Ascher and Petzold 1998).\n\n\n\nThe first step in coding an DAE system in Stan is defining the DAE residual function. The system functions require a specific signature so that the solvers know how to use them properly.\nThe first argument to the residual function is time, passed as a real; the second argument to the residual function is the system state \\(y\\), passed as a vector, the third argument to the residual function is the state derivative \\(y'\\), also passed as a vector. The residual function’s return value is a vector of the same size as state and state derivatives. Additional arguments can be included in the residual function to pass other information into the solve (these will be passed through the function that starts the DAE solution). These argument can be parameters (in our example, the reaction rate coefficient \\(\\alpha\\), \\(\\beta\\), and \\(\\gamma\\)), data, or any quantities that are needed to define the DAE.\nThe above reaction be coded using the following function in Stan (see the user-defined functions chapter for more information on coding user-defined functions).\nvector chem(real t, vector yy, vector yp,\n real alpha, real beta, real gamma) {\n vector[3] res;\n res[1] = yp[1] + alpha * yy[1] - beta * yy[2] * yy[3];\n res[2] = yp[2] - alpha * yy[1] + beta * yy[2] * yy[3] + gamma * yy[2] * yy[2];\n res[3] = yy[1] + yy[2] + yy[3] - 1.0;\n return res;\n}\nThe function takes in a time t (a real), the system state yy (a vector), state derivative yp (a vector), as well as parameter alpha (a real), beta (a real), and gamma (a real). The function returns a vector of the residuals at time t. The DAE coded here does not explicitly depend on t, however one still needs to specify t as an argument.\n\n\nThe types in the DAE residual function are strict. The first argument is the time passed as a real, the second argument is the state passed as a vector, the third argument is the state derivative passed as a vector, and the return type is a vector. A model that does not have this signature will fail to compile. The fourth argument onwards can be any type, granted all the argument types match the types of the respective arguments in the solver call.\nAll of these are possible DAE signatures:\nvector my_dae1(real t, vector y, vector yp, real a0);\nvector my_dae2(real t, vector y, vector yp, array[] int a0, vector a1);\nvector my_dae3(real t, vector y, vector yp, matrix a0, array[] real a1, row_vector a2);\nbut these are not allowed:\nvector my_dae1(real t, array[] real y, vector yp);\n// Second argument is not a vector\narray[] real my_dae2(real t, vector y, vector yp);\n// Return type is not a vector\nvector my_dae3(real t, vector y);\n// First argument is not a real and missing the third argument\n\n\n\n\nStan provides a dae function for solving DAEs, so that the above chemical reaction equation can be solved in the following code.\ndata {\n int N;\n vector[3] yy0;\n vector[3] yp0;\n real t0;\n real alpha;\n real beta;\n array[N] real ts;\n array[N] vector[3] y;\n}\nparameters {\n real gamma;\n}\ntransformed parameters {\n vector[3] y_hat[N] = dae(chem, yy0, yp0, t0, ts, alpha, beta, gamma);\n}\nSince gamma is a parameter, the DAE solver is called in the transformed parameters block.\n\n\n\nUsing dae_tol one can specify the relative_tolerance, absolute_tolerance, and max_num_steps parameters in order to control the DAE solution.\nvector[3] y_hat[N] = dae_tol(chem, yy0, yp0, t0, ts,\n relative_tolerance,\n absolute_tolerance,\n max_num_steps,\n alpha, beta, gamma);\nrelative_tolerance and absolute_tolerance control accuracy the solver tries to achieve, and max_num_steps specifies the maximum number of steps the solver will take between output time points before throwing an error.\nThe control parameters must be data variables – they cannot be parameters or expressions that depend on parameters, including local variables in any block other than transformed data and generated quantities. User-defined function arguments may be qualified as only allowing data arguments using the data qualifier.\nThe default value of relative and absolute tolerances are \\(10^{-10}\\) and the maximum number of steps between outputs is one hundred million. We suggest the user choose the control parameters according to the problem in hand, and resort to the defaults only when no knowledge of the DAE system or the physics it models is available.\n\n\nThe maximum number of steps can be used to stop a runaway simulation. This can arise in when MCMC moves to a part of parameter space very far from where a differential equation would typically be solved. In particular this can happen during warmup. With the non-stiff solver, this may happen when the sampler moves to stiff regions of parameter space, which will requires small step sizes.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/dae.html#notation", + "href": "stan-users-guide/dae.html#notation", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "A DAE is defined by a set of expressions for the residuals of differential equations and algebraic equations \\(r(y', y, t, \\theta)\\), and consistent initial conditions \\(y(t_0, \\theta) = y_0, y'(t_0, \\theta)=y'_0\\). The DAE is define by residual function as \\(r(y', y, t, \\theta)=0\\). The \\(\\theta\\) dependence is included in the notation to highlight that the solution \\(y(t)\\) is a function of any parameters used in the computation.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/dae.html#example-chemical-kinetics", + "href": "stan-users-guide/dae.html#example-chemical-kinetics", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "As an example of a system of DAEs, consider following chemical kinetics problem(Robertson 1966). The nondimensionalized DAE consists of two differential equations and one algebraic constraint. The differential equations describe the reactions from reactants \\(y_1\\) and \\(y_2\\) to the product \\(y_3\\), and the algebraic equation describes the mass conservation. (Serban and Hindmarsh 2021).\n\\[\\begin{align*}\n\\frac{dy_1}{dt} + \\alpha y_1 - \\beta y_2 y_3 = 0 \\\\\n\\frac{dy_2}{dt} - \\alpha y_1 + \\beta y_2 y_3 + \\gamma y_2^2 = 0 \\\\\ny_1 + y_2 + y_3 - 1.0 = 0\n\\end{align*}\\]\nThe state equations implicitly defines the state \\((y_1(t), y_2(t), y_3(t))\\) at future times as a function of an initial state and the system parameters, in this example the reaction rate coefficients \\((\\alpha, \\beta, \\gamma)\\).\nUnlike solving ODEs, solving DAEs requires a consistent initial condition. That is, one must specify both \\(y(t_0)\\) and \\(y'(t_0)\\) so that residual function becomes zero at initial time \\(t_0\\) \\[\\begin{equation*}\nr(y'(t_0), y(t_0), t_0) = 0\n\\end{equation*}\\]", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/dae.html#index-of-daes", + "href": "stan-users-guide/dae.html#index-of-daes", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "The index along a DAE solution \\(y(t)\\) is the minimum number of differentiations of some of the components of the system required to solve for \\(y'\\) uniquely in terms of \\(y\\) and \\(t\\), so that the DAE is converted into an ODE for \\(y\\). Thus an ODE system is of index 0. The above chemical kinetics DAE is of index 1, as we can perform differentiation of the third equation followed by introducing the first two equations in order to obtain the ODE for \\(y_3\\).\nMost DAE solvers, including the one in Stan, support only index-1 DAEs. For a high index DAE problem the user must first convert it to a lower index system. This often can be done by carrying out differentiations analytically (Ascher and Petzold 1998).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/dae.html#coding-the-dae-system-function", + "href": "stan-users-guide/dae.html#coding-the-dae-system-function", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "The first step in coding an DAE system in Stan is defining the DAE residual function. The system functions require a specific signature so that the solvers know how to use them properly.\nThe first argument to the residual function is time, passed as a real; the second argument to the residual function is the system state \\(y\\), passed as a vector, the third argument to the residual function is the state derivative \\(y'\\), also passed as a vector. The residual function’s return value is a vector of the same size as state and state derivatives. Additional arguments can be included in the residual function to pass other information into the solve (these will be passed through the function that starts the DAE solution). These argument can be parameters (in our example, the reaction rate coefficient \\(\\alpha\\), \\(\\beta\\), and \\(\\gamma\\)), data, or any quantities that are needed to define the DAE.\nThe above reaction be coded using the following function in Stan (see the user-defined functions chapter for more information on coding user-defined functions).\nvector chem(real t, vector yy, vector yp,\n real alpha, real beta, real gamma) {\n vector[3] res;\n res[1] = yp[1] + alpha * yy[1] - beta * yy[2] * yy[3];\n res[2] = yp[2] - alpha * yy[1] + beta * yy[2] * yy[3] + gamma * yy[2] * yy[2];\n res[3] = yy[1] + yy[2] + yy[3] - 1.0;\n return res;\n}\nThe function takes in a time t (a real), the system state yy (a vector), state derivative yp (a vector), as well as parameter alpha (a real), beta (a real), and gamma (a real). The function returns a vector of the residuals at time t. The DAE coded here does not explicitly depend on t, however one still needs to specify t as an argument.\n\n\nThe types in the DAE residual function are strict. The first argument is the time passed as a real, the second argument is the state passed as a vector, the third argument is the state derivative passed as a vector, and the return type is a vector. A model that does not have this signature will fail to compile. The fourth argument onwards can be any type, granted all the argument types match the types of the respective arguments in the solver call.\nAll of these are possible DAE signatures:\nvector my_dae1(real t, vector y, vector yp, real a0);\nvector my_dae2(real t, vector y, vector yp, array[] int a0, vector a1);\nvector my_dae3(real t, vector y, vector yp, matrix a0, array[] real a1, row_vector a2);\nbut these are not allowed:\nvector my_dae1(real t, array[] real y, vector yp);\n// Second argument is not a vector\narray[] real my_dae2(real t, vector y, vector yp);\n// Return type is not a vector\nvector my_dae3(real t, vector y);\n// First argument is not a real and missing the third argument", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/dae.html#solving-daes", + "href": "stan-users-guide/dae.html#solving-daes", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "Stan provides a dae function for solving DAEs, so that the above chemical reaction equation can be solved in the following code.\ndata {\n int N;\n vector[3] yy0;\n vector[3] yp0;\n real t0;\n real alpha;\n real beta;\n array[N] real ts;\n array[N] vector[3] y;\n}\nparameters {\n real gamma;\n}\ntransformed parameters {\n vector[3] y_hat[N] = dae(chem, yy0, yp0, t0, ts, alpha, beta, gamma);\n}\nSince gamma is a parameter, the DAE solver is called in the transformed parameters block.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/dae.html#control-dae.section", + "href": "stan-users-guide/dae.html#control-dae.section", + "title": "Differential-Algebraic Equations", + "section": "", + "text": "Using dae_tol one can specify the relative_tolerance, absolute_tolerance, and max_num_steps parameters in order to control the DAE solution.\nvector[3] y_hat[N] = dae_tol(chem, yy0, yp0, t0, ts,\n relative_tolerance,\n absolute_tolerance,\n max_num_steps,\n alpha, beta, gamma);\nrelative_tolerance and absolute_tolerance control accuracy the solver tries to achieve, and max_num_steps specifies the maximum number of steps the solver will take between output time points before throwing an error.\nThe control parameters must be data variables – they cannot be parameters or expressions that depend on parameters, including local variables in any block other than transformed data and generated quantities. User-defined function arguments may be qualified as only allowing data arguments using the data qualifier.\nThe default value of relative and absolute tolerances are \\(10^{-10}\\) and the maximum number of steps between outputs is one hundred million. We suggest the user choose the control parameters according to the problem in hand, and resort to the defaults only when no knowledge of the DAE system or the physics it models is available.\n\n\nThe maximum number of steps can be used to stop a runaway simulation. This can arise in when MCMC moves to a part of parameter space very far from where a differential equation would typically be solved. In particular this can happen during warmup. With the non-stiff solver, this may happen when the sampler moves to stiff regions of parameter space, which will requires small step sizes.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Differential-Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/cross-validation.html", + "href": "stan-users-guide/cross-validation.html", + "title": "Held-Out Evaluation and Cross-Validation", + "section": "", + "text": "Held-out evaluation involves splitting a data set into two parts, a training data set and a test data set. The training data is used to estimate the model and the test data is used for evaluation. Held-out evaluation is commonly used to declare winners in predictive modeling competitions such as those run by Kaggle.\nCross-validation involves repeated held-out evaluations performed by partitioning a single data set in different ways. The training/test split can be done either by randomly selecting the test set, or by partitioning the data set into several equally-sized subsets and then using each subset in turn as the test data with the other folds as training data.\nHeld-out evaluation and cross-validation may involve any kind of predictive statistics, with common choices being the predictive log density on test data, squared error of parameter estimates, or accuracy in a classification task.\n\n\nGiven training data \\((x, y)\\) consisting of parallel sequences of predictors and observations and test data \\((\\tilde{x}, \\tilde{y})\\) of the same structure, the posterior predictive density is \\[\np(\\tilde{y} \\mid \\tilde{x}, x, y)\n=\n\\int\n p(\\tilde{y} \\mid \\tilde{x}, \\theta)\n \\cdot p(\\theta \\mid x, y)\n\\, \\textrm{d}\\theta,\n\\]\nwhere \\(\\theta\\) is the vector of model parameters. This predictive density is the density of the test observations, conditioned on both the test predictors \\(\\tilde{x}\\) and the training data \\((x, y).\\)\nThis integral may be calculated with Monte Carlo methods as usual, \\[\np(\\tilde{y} \\mid \\tilde{x}, x, y)\n\\approx\n\\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\] where the \\(\\theta^{(m)} \\sim p(\\theta \\mid x, y)\\) are draws from the posterior given only the training data \\((x, y).\\)\nTo avoid underflow in calculations, it will be more stable to compute densities on the log scale. Taking the logarithm and pushing it through results in a stable computation, \\[\\begin{eqnarray*}\n\\log p(\\tilde{y} \\mid \\tilde{x}, x, y)\n& \\approx &\n\\log \\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\\\[4pt]\n& = & -\\log M + \\log \\sum_{m = 1}^M p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\\\[4pt]\n& = & -\\log M + \\log \\sum_{m = 1}^M \\exp(\\log p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}))\n\\\\[4pt]\n& = & -\\log M + \\textrm{log-sum-exp}_{m = 1}^M \\log p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)})\n\\end{eqnarray*}\\] where the log sum of exponentials function is defined so as to make the above equation hold, \\[\n\\textrm{log-sum-exp}_{m = 1}^M \\, \\mu_m\n= \\log \\sum_{m=1}^M \\exp(\\mu_m).\n\\] The log sum of exponentials function can be implemented so as to avoid underflow and maintain high arithmetic precision as \\[\n\\textrm{log-sum-exp}_{m = 1}^M \\mu_m\n= \\textrm{max}(\\mu)\n+ \\log \\sum_{m = 1}^M \\exp(\\mu_m - \\textrm{max}(\\mu)).\n\\] Pulling the maximum out preserves all of its precision. By subtracting the maximum, the terms \\(\\mu_m - \\textrm{max}(\\mu) \\leq 0\\), and thus will not overflow.\n\n\nTo evaluate the log predictive density of a model, it suffices to implement the log predictive density of the test data in the generated quantities block. The log sum of exponentials calculation must be done on the outside of Stan using the posterior draws of \\(\\log p(\\tilde{y} \\mid \\tilde{x},\n\\theta^{(m)}).\\)\nHere is the code for evaluating the log posterior predictive density in a simple linear regression of the test data \\(\\tilde{y}\\) given predictors \\(\\tilde{x}\\) and training data \\((x, y).\\)\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n vector[N_tilde] y_tilde;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n}\ngenerated quantities {\n real log_p = normal_lpdf(y_tilde | alpha + beta * x_tilde, sigma);\n}\nOnly the training data x and y are used in the model block. The test data y_tilde and test predictors x_tilde appear in only the generated quantities block. Thus the program is not cheating by using the test data during training. Although this model does not do so, it would be fair to use x_tilde in the model block—only the test observations y_tilde are unknown before they are predicted.\nGiven \\(M\\) posterior draws from Stan, the sequence log_p[1:M] will be available, so that the log posterior predictive density of the test data given training data and predictors is just log_sum_exp(log_p) - log(M).\n\n\n\n\n\n\nEstimation is usually considered for unknown parameters. If the data from which the parameters were estimated came from simulated data, the true value of the parameters may be known. If \\(\\theta\\) is the true value and \\(\\hat{\\theta}\\) the estimate, then error is just the difference between the prediction and the true value, \\[\n\\textrm{err} = \\hat{\\theta} - \\theta.\n\\]\nIf the estimate is larger than the true value, the error is positive, and if it’s smaller, then error is negative. If an estimator’s unbiased, then expected error is zero. So typically, absolute error or squared error are used, which will always have positive expectations for an imperfect estimator. Absolute error is defined as \\[\n\\textrm{abs-err} = \\left| \\hat{\\theta} - \\theta \\right|\n\\] and squared error as \\[\n\\textrm{sq-err} = \\left( \\hat{\\theta} - \\theta \\right)^2.\n\\] Gneiting and Raftery (2007) provide a thorough overview of such scoring rules and their properties.\nBayesian posterior means minimize expected square error, whereas posterior medians minimize expected absolute error. Estimates based on modes rather than probability, such as (penalized) maximum likelihood estimates or maximum a posterior estimates, do not have these properties.\n\n\n\nIn addition to parameters, other unknown quantities may be estimated, such as the score of a football match or the effect of a medical treatment given to a subject. In these cases, square error is defined in the same way. If there are multiple exchangeable outcomes being estimated, \\(z_1, \\ldots, z_N,\\) then it is common to report mean square error (MSE), \\[\n\\textrm{mse}\n= \\frac{1}{N} \\sum_{n = 1}^N \\left( \\hat{z}_n - z_n\\right)^2.\n\\] To put the error back on the scale of the original value, the square root may be applied, resulting in what is known prosaically as root mean square error (RMSE), \\[\n\\textrm{rmse} = \\sqrt{\\textrm{mean-sq-err}}.\n\\]\n\n\n\nConsider a simple linear regression model, parameters for the intercept \\(\\alpha\\) and slope \\(\\beta\\), along with predictors \\(\\tilde{x}_n\\). The standard Bayesian estimate is the expected value of \\(\\tilde{y}\\) given the predictors and training data, \\[\\begin{eqnarray*}\n\\hat{\\tilde{y}}_n\n& = & \\mathbb{E}[\\tilde{y}_n \\mid \\tilde{x}_n, x, y]\n\\\\[4pt]\n& \\approx & \\frac{1}{M} \\sum_{m = 1}^M \\tilde{y}_n^{(m)}\n\\end{eqnarray*}\\] where \\(\\tilde{y}_n^{(m)}\\) is drawn from the data model \\[\n\\tilde{y}_n^{(m)}\n\\sim p(\\tilde{y}_n \\mid \\tilde{x}_n, \\alpha^{(m)}, \\beta^{(m)}),\n\\] for parameters \\(\\alpha^{(m)}\\) and \\(\\beta^{(m)}\\) drawn from the posterior, \\[\n(\\alpha^{(m)}, \\beta^{(m)}) \\sim p(\\alpha, \\beta \\mid x, y).\n\\]\nIn the linear regression case, two stages of simplification can be carried out, the first of which helpfully reduces the variance of the estimator. First, rather than averaging draws \\(\\tilde{y}_n^{(m)}\\), the same result is obtained by averaging linear predictions, \\[\\begin{eqnarray*}\n\\hat{\\tilde{y}}_n\n& = & \\mathbb{E}\\left[\n \\alpha + \\beta \\cdot \\tilde{x}_n\n \\mid \\tilde{x}_n, x, y\n \\right]\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M\n \\alpha^{(m)} + \\beta^{(m)} \\cdot \\tilde{x}_n.\n\\end{eqnarray*}\\] This is possible because \\[\n\\tilde{y}_n^{(m)} \\sim \\textrm{normal}(\\tilde{y}_n \\mid \\alpha^{(m)} +\n\\beta^{(m)} \\cdot \\tilde{x}_n, \\sigma^{(m)}),\n\\] and the normal distribution has symmetric error so that the expectation of \\(\\tilde{y}_n^{(m)}\\) is the same as \\(\\alpha^{(m)} + \\beta^{(m)} \\cdot\n\\tilde{x}_n\\). Replacing the sampled quantity \\(\\tilde{y}_n^{(m)}\\) with its expectation is a general variance reduction technique for Monte Carlo estimates known as Rao-Blackwellization (Rao 1945; Blackwell 1947).\nIn the linear case, because the predictor is linear in the coefficients, the estimate can be further simplified to use the estimated coefficients, \\[\\begin{eqnarray*}\n\\tilde{y}_n^{(m)}\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M\n \\left( \\alpha^{(m)} + \\beta^{(m)} \\cdot \\tilde{x}_n \\right)\n\\\\[4pt]\n& = & \\frac{1}{M} \\sum_{m = 1}^M \\alpha^{(m)}\n + \\frac{1}{M} \\sum_{m = 1}^M (\\beta^{(m)} \\cdot \\tilde{x}_n)\n\\\\[4pt]\n& = & \\frac{1}{M} \\sum_{m = 1}^M \\alpha^{(m)}\n + \\left( \\frac{1}{M} \\sum_{m = 1}^M \\beta^{(m)}\\right) \\cdot \\tilde{x}_n\n\\\\[4pt]\n& = & \\hat{\\alpha} + \\hat{\\beta} \\cdot \\tilde{x}_n.\n\\end{eqnarray*}\\]\nIn Stan, only the first of the two steps (the important variance reduction step) can be coded in the object model. The linear predictor is defined in the generated quantities block.\ndata {\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n // ...\n}\n// ...\ngenerated quantities {\n vector[N_tilde] tilde_y = alpha + beta * x_tilde;\n}\nThe posterior mean of tilde_y calculated by Stan is the Bayesian estimate \\(\\hat{\\tilde{y}}.\\) The posterior median may also be calculated and used as an estimate, though square error and the posterior mean are more commonly reported.\n\n\n\n\nCross-validation involves choosing multiple subsets of a data set as the test set and using the other data as training. This can be done by partitioning the data and using each subset in turn as the test set with the remaining subsets as training data. A partition into ten subsets is common to reduce computational overhead. In the limit, when the test set is just a single item, the result is known as leave-one-out (LOO) cross-validation (Vehtari, Gelman, and Gabry 2017).\nPartitioning the data and reusing the partitions is very fiddly in the indexes and may not lead to even divisions of the data. It’s far easier to use random partitions, which support arbitrarily sized test/training splits and can be easily implemented in Stan. The drawback is that the variance of the resulting estimate is higher than with a balanced block partition.\n\n\nFor the simple linear regression model, randomized cross-validation can be implemented in a single model. To randomly permute a vector in Stan, the simplest approach is the following.\nfunctions {\n array[] int permutation_rng(int N) {\n array[N] int y;\n for (n in 1 : N) {\n y[n] = n;\n }\n vector[N] theta = rep_vector(1.0 / N, N);\n for (n in 1 : size(y)) {\n int i = categorical_rng(theta);\n int temp = y[n];\n y[n] = y[i];\n y[i] = temp;\n }\n return y;\n }\n}\nThe name of the function must end in _rng because it uses other random functions internally. This will restrict its usage to the transformed data and generated quantities block. The code walks through an array of integers exchanging each item with another randomly chosen item, resulting in a uniformly drawn permutation of the integers 1:N.1\nThe transformed data block uses the permutation RNG to generate training data and test data by taking prefixes and suffixes of the permuted data.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n int<lower=0, upper=N> N_test;\n}\ntransformed data {\n int N_train = N - N_test;\n array[N] int permutation = permutation_rng(N);\n vector[N_train] x_train = x[permutation[1 : N_train]];\n vector[N_train] y_train = y[permutation[1 : N_train]];\n vector[N_test] x_test = x[permutation[N_train + 1 : N]];\n vector[N_test] y_test = y[permutation[N_train + 1 : N]];\n}\nRecall that in Stan, permutation[1:N_train] is an array of integers, so that x[permutation[1 : N_train]] is a vector defined for i in 1:N_train by\nx[permutation[1 : N_train]][i] = x[permutation[1:N_train][i]]\n = x[permutation[i]]\nGiven the test/train split, the rest of the model is straightforward.\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y_train ~ normal(alpha + beta * x_train, sigma);\n { alpha, beta, sigma } ~ normal(0, 1);\n}\ngenerated quantities {\n vector[N] y_test_hat = normal_rng(alpha + beta * x_test, sigma);\n vector[N] err = y_test_sim - y_hat;\n}\nThe prediction y_test_hat is defined in the generated quantities block using the general form involving all uncertainty. The posterior of this quantity corresponds to using a posterior mean estimator, \\[\\begin{eqnarray*}\n\\hat{y}^{\\textrm{test}}\n& = & \\mathbb{E}\\left[ y^{\\textrm{test}} \\mid x^{\\textrm{test}}, x^{\\textrm{train}} y^{\\textrm{train}} \\right]\n\\\\[4pt]\n& \\approx & \\frac{1}{M} \\sum_{m = 1}^M \\hat{y}^{\\textrm{test}(m)}.\n\\end{eqnarray*}\\]\nBecause the test set is constant and the expectation operator is linear, the posterior mean of err as defined in the Stan program will be the error of the posterior mean estimate, \\[\\begin{eqnarray*}\n \\hat{y}^{\\textrm{test}} - y^{\\textrm{test}}\n& = &\n\\mathbb{E}\\left[\n \\hat{y}^{\\textrm{test}}\n \\mid x^{\\textrm{test}}, x^{\\textrm{train}}, y^{\\textrm{train}}\n\\right]\n - y^{\\textrm{test}}\n\\\\[4pt]\n& = &\n\\mathbb{E}\\left[\n \\hat{y}^{\\textrm{test}} - y^{\\textrm{test}}\n \\mid x^{\\textrm{test}}, x^{\\textrm{train}}, y^{\\textrm{train}}\n\\right]\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M \\hat{y}^{\\textrm{test}(m)} - y^{\\textrm{test}},\n\\end{eqnarray*}\\] where \\[\n\\hat{y}^{\\textrm{test}(m)}\n\\sim p(y \\mid x^{\\textrm{test}}, x^{\\textrm{train}},\ny^{\\textrm{train}}).\n\\] This just calculates error; taking absolute value or squaring will compute absolute error and mean square error. Note that the absolute value and square operation should not be done within the Stan program because neither is a linear function and the result of averaging squares is not the same as squaring an average in general.\nBecause the test set size is chosen for convenience in cross-validation, results should be presented on a per-item scale, such as average absolute error or root mean square error, not on the scale of error in the fold being evaluated.\n\n\n\nIt is straightforward to declare the variable permutation in the data block instead of the transformed data block and read it in as data. This allows an external program to control the blocking, allowing non-random partitions to be evaluated.\n\n\n\nCross-validation must be done with care if the data is inherently structured. For example, in a simple natural language application, data might be structured by document. For cross-validation, one needs to cross-validate at the document level, not at the individual word level. This is related to mixed replication in posterior predictive checking, where there is a choice to simulate new elements of existing groups or generate entirely new groups.\nEducation testing applications are typically grouped by school district, by school, by classroom, and by demographic features of the individual students or the school as a whole. Depending on the variables of interest, different structured subsets should be evaluated. For example, the focus of interest may be on the performance of entire classrooms, so it would make sense to cross-validate at the class or school level on classroom performance.\n\n\n\nOften data measurements have spatial or temporal properties. For example, home energy consumption varies by time of day, day of week, on holidays, by season, and by ambient temperature (e.g., a hot spell or a cold snap). Cross-validation must be tailored to the predictive goal. For example, in predicting energy consumption, the quantity of interest may be the prediction for next week’s energy consumption given historical data and current weather covariates. This suggests an alternative to cross-validation, wherein individual weeks are each tested given previous data. This often allows comparing how well prediction performs with more or less historical data.\n\n\n\nVehtari, Gelman, and Gabry (2017) introduce a method that approximates the evaluation of leave-one-out cross validation inexpensively using only the data point log likelihoods from a single model fit. This method is documented and implemented in the R package loo (Gabry et al. 2019).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Held-Out Evaluation and Cross-Validation" + ] + }, + { + "objectID": "stan-users-guide/cross-validation.html#evaluating-posterior-predictive.section", + "href": "stan-users-guide/cross-validation.html#evaluating-posterior-predictive.section", + "title": "Held-Out Evaluation and Cross-Validation", + "section": "", + "text": "Given training data \\((x, y)\\) consisting of parallel sequences of predictors and observations and test data \\((\\tilde{x}, \\tilde{y})\\) of the same structure, the posterior predictive density is \\[\np(\\tilde{y} \\mid \\tilde{x}, x, y)\n=\n\\int\n p(\\tilde{y} \\mid \\tilde{x}, \\theta)\n \\cdot p(\\theta \\mid x, y)\n\\, \\textrm{d}\\theta,\n\\]\nwhere \\(\\theta\\) is the vector of model parameters. This predictive density is the density of the test observations, conditioned on both the test predictors \\(\\tilde{x}\\) and the training data \\((x, y).\\)\nThis integral may be calculated with Monte Carlo methods as usual, \\[\np(\\tilde{y} \\mid \\tilde{x}, x, y)\n\\approx\n\\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\] where the \\(\\theta^{(m)} \\sim p(\\theta \\mid x, y)\\) are draws from the posterior given only the training data \\((x, y).\\)\nTo avoid underflow in calculations, it will be more stable to compute densities on the log scale. Taking the logarithm and pushing it through results in a stable computation, \\[\\begin{eqnarray*}\n\\log p(\\tilde{y} \\mid \\tilde{x}, x, y)\n& \\approx &\n\\log \\frac{1}{M} \\sum_{m = 1}^M p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\\\[4pt]\n& = & -\\log M + \\log \\sum_{m = 1}^M p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}),\n\\\\[4pt]\n& = & -\\log M + \\log \\sum_{m = 1}^M \\exp(\\log p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)}))\n\\\\[4pt]\n& = & -\\log M + \\textrm{log-sum-exp}_{m = 1}^M \\log p(\\tilde{y} \\mid \\tilde{x}, \\theta^{(m)})\n\\end{eqnarray*}\\] where the log sum of exponentials function is defined so as to make the above equation hold, \\[\n\\textrm{log-sum-exp}_{m = 1}^M \\, \\mu_m\n= \\log \\sum_{m=1}^M \\exp(\\mu_m).\n\\] The log sum of exponentials function can be implemented so as to avoid underflow and maintain high arithmetic precision as \\[\n\\textrm{log-sum-exp}_{m = 1}^M \\mu_m\n= \\textrm{max}(\\mu)\n+ \\log \\sum_{m = 1}^M \\exp(\\mu_m - \\textrm{max}(\\mu)).\n\\] Pulling the maximum out preserves all of its precision. By subtracting the maximum, the terms \\(\\mu_m - \\textrm{max}(\\mu) \\leq 0\\), and thus will not overflow.\n\n\nTo evaluate the log predictive density of a model, it suffices to implement the log predictive density of the test data in the generated quantities block. The log sum of exponentials calculation must be done on the outside of Stan using the posterior draws of \\(\\log p(\\tilde{y} \\mid \\tilde{x},\n\\theta^{(m)}).\\)\nHere is the code for evaluating the log posterior predictive density in a simple linear regression of the test data \\(\\tilde{y}\\) given predictors \\(\\tilde{x}\\) and training data \\((x, y).\\)\ndata {\n int<lower=0> N;\n vector[N] y;\n vector[N] x;\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n vector[N_tilde] y_tilde;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n}\ngenerated quantities {\n real log_p = normal_lpdf(y_tilde | alpha + beta * x_tilde, sigma);\n}\nOnly the training data x and y are used in the model block. The test data y_tilde and test predictors x_tilde appear in only the generated quantities block. Thus the program is not cheating by using the test data during training. Although this model does not do so, it would be fair to use x_tilde in the model block—only the test observations y_tilde are unknown before they are predicted.\nGiven \\(M\\) posterior draws from Stan, the sequence log_p[1:M] will be available, so that the log posterior predictive density of the test data given training data and predictors is just log_sum_exp(log_p) - log(M).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Held-Out Evaluation and Cross-Validation" + ] + }, + { + "objectID": "stan-users-guide/cross-validation.html#estimation-error", + "href": "stan-users-guide/cross-validation.html#estimation-error", + "title": "Held-Out Evaluation and Cross-Validation", + "section": "", + "text": "Estimation is usually considered for unknown parameters. If the data from which the parameters were estimated came from simulated data, the true value of the parameters may be known. If \\(\\theta\\) is the true value and \\(\\hat{\\theta}\\) the estimate, then error is just the difference between the prediction and the true value, \\[\n\\textrm{err} = \\hat{\\theta} - \\theta.\n\\]\nIf the estimate is larger than the true value, the error is positive, and if it’s smaller, then error is negative. If an estimator’s unbiased, then expected error is zero. So typically, absolute error or squared error are used, which will always have positive expectations for an imperfect estimator. Absolute error is defined as \\[\n\\textrm{abs-err} = \\left| \\hat{\\theta} - \\theta \\right|\n\\] and squared error as \\[\n\\textrm{sq-err} = \\left( \\hat{\\theta} - \\theta \\right)^2.\n\\] Gneiting and Raftery (2007) provide a thorough overview of such scoring rules and their properties.\nBayesian posterior means minimize expected square error, whereas posterior medians minimize expected absolute error. Estimates based on modes rather than probability, such as (penalized) maximum likelihood estimates or maximum a posterior estimates, do not have these properties.\n\n\n\nIn addition to parameters, other unknown quantities may be estimated, such as the score of a football match or the effect of a medical treatment given to a subject. In these cases, square error is defined in the same way. If there are multiple exchangeable outcomes being estimated, \\(z_1, \\ldots, z_N,\\) then it is common to report mean square error (MSE), \\[\n\\textrm{mse}\n= \\frac{1}{N} \\sum_{n = 1}^N \\left( \\hat{z}_n - z_n\\right)^2.\n\\] To put the error back on the scale of the original value, the square root may be applied, resulting in what is known prosaically as root mean square error (RMSE), \\[\n\\textrm{rmse} = \\sqrt{\\textrm{mean-sq-err}}.\n\\]\n\n\n\nConsider a simple linear regression model, parameters for the intercept \\(\\alpha\\) and slope \\(\\beta\\), along with predictors \\(\\tilde{x}_n\\). The standard Bayesian estimate is the expected value of \\(\\tilde{y}\\) given the predictors and training data, \\[\\begin{eqnarray*}\n\\hat{\\tilde{y}}_n\n& = & \\mathbb{E}[\\tilde{y}_n \\mid \\tilde{x}_n, x, y]\n\\\\[4pt]\n& \\approx & \\frac{1}{M} \\sum_{m = 1}^M \\tilde{y}_n^{(m)}\n\\end{eqnarray*}\\] where \\(\\tilde{y}_n^{(m)}\\) is drawn from the data model \\[\n\\tilde{y}_n^{(m)}\n\\sim p(\\tilde{y}_n \\mid \\tilde{x}_n, \\alpha^{(m)}, \\beta^{(m)}),\n\\] for parameters \\(\\alpha^{(m)}\\) and \\(\\beta^{(m)}\\) drawn from the posterior, \\[\n(\\alpha^{(m)}, \\beta^{(m)}) \\sim p(\\alpha, \\beta \\mid x, y).\n\\]\nIn the linear regression case, two stages of simplification can be carried out, the first of which helpfully reduces the variance of the estimator. First, rather than averaging draws \\(\\tilde{y}_n^{(m)}\\), the same result is obtained by averaging linear predictions, \\[\\begin{eqnarray*}\n\\hat{\\tilde{y}}_n\n& = & \\mathbb{E}\\left[\n \\alpha + \\beta \\cdot \\tilde{x}_n\n \\mid \\tilde{x}_n, x, y\n \\right]\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M\n \\alpha^{(m)} + \\beta^{(m)} \\cdot \\tilde{x}_n.\n\\end{eqnarray*}\\] This is possible because \\[\n\\tilde{y}_n^{(m)} \\sim \\textrm{normal}(\\tilde{y}_n \\mid \\alpha^{(m)} +\n\\beta^{(m)} \\cdot \\tilde{x}_n, \\sigma^{(m)}),\n\\] and the normal distribution has symmetric error so that the expectation of \\(\\tilde{y}_n^{(m)}\\) is the same as \\(\\alpha^{(m)} + \\beta^{(m)} \\cdot\n\\tilde{x}_n\\). Replacing the sampled quantity \\(\\tilde{y}_n^{(m)}\\) with its expectation is a general variance reduction technique for Monte Carlo estimates known as Rao-Blackwellization (Rao 1945; Blackwell 1947).\nIn the linear case, because the predictor is linear in the coefficients, the estimate can be further simplified to use the estimated coefficients, \\[\\begin{eqnarray*}\n\\tilde{y}_n^{(m)}\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M\n \\left( \\alpha^{(m)} + \\beta^{(m)} \\cdot \\tilde{x}_n \\right)\n\\\\[4pt]\n& = & \\frac{1}{M} \\sum_{m = 1}^M \\alpha^{(m)}\n + \\frac{1}{M} \\sum_{m = 1}^M (\\beta^{(m)} \\cdot \\tilde{x}_n)\n\\\\[4pt]\n& = & \\frac{1}{M} \\sum_{m = 1}^M \\alpha^{(m)}\n + \\left( \\frac{1}{M} \\sum_{m = 1}^M \\beta^{(m)}\\right) \\cdot \\tilde{x}_n\n\\\\[4pt]\n& = & \\hat{\\alpha} + \\hat{\\beta} \\cdot \\tilde{x}_n.\n\\end{eqnarray*}\\]\nIn Stan, only the first of the two steps (the important variance reduction step) can be coded in the object model. The linear predictor is defined in the generated quantities block.\ndata {\n int<lower=0> N_tilde;\n vector[N_tilde] x_tilde;\n // ...\n}\n// ...\ngenerated quantities {\n vector[N_tilde] tilde_y = alpha + beta * x_tilde;\n}\nThe posterior mean of tilde_y calculated by Stan is the Bayesian estimate \\(\\hat{\\tilde{y}}.\\) The posterior median may also be calculated and used as an estimate, though square error and the posterior mean are more commonly reported.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Held-Out Evaluation and Cross-Validation" + ] + }, + { + "objectID": "stan-users-guide/cross-validation.html#cross-validation", + "href": "stan-users-guide/cross-validation.html#cross-validation", + "title": "Held-Out Evaluation and Cross-Validation", + "section": "", + "text": "Cross-validation involves choosing multiple subsets of a data set as the test set and using the other data as training. This can be done by partitioning the data and using each subset in turn as the test set with the remaining subsets as training data. A partition into ten subsets is common to reduce computational overhead. In the limit, when the test set is just a single item, the result is known as leave-one-out (LOO) cross-validation (Vehtari, Gelman, and Gabry 2017).\nPartitioning the data and reusing the partitions is very fiddly in the indexes and may not lead to even divisions of the data. It’s far easier to use random partitions, which support arbitrarily sized test/training splits and can be easily implemented in Stan. The drawback is that the variance of the resulting estimate is higher than with a balanced block partition.\n\n\nFor the simple linear regression model, randomized cross-validation can be implemented in a single model. To randomly permute a vector in Stan, the simplest approach is the following.\nfunctions {\n array[] int permutation_rng(int N) {\n array[N] int y;\n for (n in 1 : N) {\n y[n] = n;\n }\n vector[N] theta = rep_vector(1.0 / N, N);\n for (n in 1 : size(y)) {\n int i = categorical_rng(theta);\n int temp = y[n];\n y[n] = y[i];\n y[i] = temp;\n }\n return y;\n }\n}\nThe name of the function must end in _rng because it uses other random functions internally. This will restrict its usage to the transformed data and generated quantities block. The code walks through an array of integers exchanging each item with another randomly chosen item, resulting in a uniformly drawn permutation of the integers 1:N.1\nThe transformed data block uses the permutation RNG to generate training data and test data by taking prefixes and suffixes of the permuted data.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n int<lower=0, upper=N> N_test;\n}\ntransformed data {\n int N_train = N - N_test;\n array[N] int permutation = permutation_rng(N);\n vector[N_train] x_train = x[permutation[1 : N_train]];\n vector[N_train] y_train = y[permutation[1 : N_train]];\n vector[N_test] x_test = x[permutation[N_train + 1 : N]];\n vector[N_test] y_test = y[permutation[N_train + 1 : N]];\n}\nRecall that in Stan, permutation[1:N_train] is an array of integers, so that x[permutation[1 : N_train]] is a vector defined for i in 1:N_train by\nx[permutation[1 : N_train]][i] = x[permutation[1:N_train][i]]\n = x[permutation[i]]\nGiven the test/train split, the rest of the model is straightforward.\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y_train ~ normal(alpha + beta * x_train, sigma);\n { alpha, beta, sigma } ~ normal(0, 1);\n}\ngenerated quantities {\n vector[N] y_test_hat = normal_rng(alpha + beta * x_test, sigma);\n vector[N] err = y_test_sim - y_hat;\n}\nThe prediction y_test_hat is defined in the generated quantities block using the general form involving all uncertainty. The posterior of this quantity corresponds to using a posterior mean estimator, \\[\\begin{eqnarray*}\n\\hat{y}^{\\textrm{test}}\n& = & \\mathbb{E}\\left[ y^{\\textrm{test}} \\mid x^{\\textrm{test}}, x^{\\textrm{train}} y^{\\textrm{train}} \\right]\n\\\\[4pt]\n& \\approx & \\frac{1}{M} \\sum_{m = 1}^M \\hat{y}^{\\textrm{test}(m)}.\n\\end{eqnarray*}\\]\nBecause the test set is constant and the expectation operator is linear, the posterior mean of err as defined in the Stan program will be the error of the posterior mean estimate, \\[\\begin{eqnarray*}\n \\hat{y}^{\\textrm{test}} - y^{\\textrm{test}}\n& = &\n\\mathbb{E}\\left[\n \\hat{y}^{\\textrm{test}}\n \\mid x^{\\textrm{test}}, x^{\\textrm{train}}, y^{\\textrm{train}}\n\\right]\n - y^{\\textrm{test}}\n\\\\[4pt]\n& = &\n\\mathbb{E}\\left[\n \\hat{y}^{\\textrm{test}} - y^{\\textrm{test}}\n \\mid x^{\\textrm{test}}, x^{\\textrm{train}}, y^{\\textrm{train}}\n\\right]\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M \\hat{y}^{\\textrm{test}(m)} - y^{\\textrm{test}},\n\\end{eqnarray*}\\] where \\[\n\\hat{y}^{\\textrm{test}(m)}\n\\sim p(y \\mid x^{\\textrm{test}}, x^{\\textrm{train}},\ny^{\\textrm{train}}).\n\\] This just calculates error; taking absolute value or squaring will compute absolute error and mean square error. Note that the absolute value and square operation should not be done within the Stan program because neither is a linear function and the result of averaging squares is not the same as squaring an average in general.\nBecause the test set size is chosen for convenience in cross-validation, results should be presented on a per-item scale, such as average absolute error or root mean square error, not on the scale of error in the fold being evaluated.\n\n\n\nIt is straightforward to declare the variable permutation in the data block instead of the transformed data block and read it in as data. This allows an external program to control the blocking, allowing non-random partitions to be evaluated.\n\n\n\nCross-validation must be done with care if the data is inherently structured. For example, in a simple natural language application, data might be structured by document. For cross-validation, one needs to cross-validate at the document level, not at the individual word level. This is related to mixed replication in posterior predictive checking, where there is a choice to simulate new elements of existing groups or generate entirely new groups.\nEducation testing applications are typically grouped by school district, by school, by classroom, and by demographic features of the individual students or the school as a whole. Depending on the variables of interest, different structured subsets should be evaluated. For example, the focus of interest may be on the performance of entire classrooms, so it would make sense to cross-validate at the class or school level on classroom performance.\n\n\n\nOften data measurements have spatial or temporal properties. For example, home energy consumption varies by time of day, day of week, on holidays, by season, and by ambient temperature (e.g., a hot spell or a cold snap). Cross-validation must be tailored to the predictive goal. For example, in predicting energy consumption, the quantity of interest may be the prediction for next week’s energy consumption given historical data and current weather covariates. This suggests an alternative to cross-validation, wherein individual weeks are each tested given previous data. This often allows comparing how well prediction performs with more or less historical data.\n\n\n\nVehtari, Gelman, and Gabry (2017) introduce a method that approximates the evaluation of leave-one-out cross validation inexpensively using only the data point log likelihoods from a single model fit. This method is documented and implemented in the R package loo (Gabry et al. 2019).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Held-Out Evaluation and Cross-Validation" + ] + }, + { + "objectID": "stan-users-guide/cross-validation.html#footnotes", + "href": "stan-users-guide/cross-validation.html#footnotes", + "title": "Held-Out Evaluation and Cross-Validation", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe traditional approach is to walk through a vector and replace each item with a random element from the remaining elements, which is guaranteed to only move each item once. This was not done here as it’d require new categorical theta because Stan does not have a uniform discrete RNG built in.↩︎", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Held-Out Evaluation and Cross-Validation" + ] + }, + { + "objectID": "stan-users-guide/complex-numbers.html", + "href": "stan-users-guide/complex-numbers.html", + "title": "Complex Numbers", + "section": "", + "text": "Stan supports complex scalars, matrices, and vectors as well as real-based ones.\n\n\nThis section describes the complex scalar type, including how to build complex numbers, assign them, and use them in arrays and functions.\n\n\nComplex numbers can be constructed using imaginary literals. For example,\ncomplex z = -1.1 + 2.3i;\nproduces the complex number \\(-1.1 + 2.3i\\). This only works if the real and imaginary components are literal numerals. To construct a complex number out of arbitrary real variables, the to_complex() function may be used. For example, the following code will work if x and y are parameters, transformed data, or local variables in a function or model block.\nreal x = // ...\nreal y = // ...\ncomplex z = to_complex(x, y);\nThe real and imaginary parts of the complex number can be accessed with getters as follows.\nreal x = get_real(z); // x = -1.1\nreal y = get_imag(z); // y = 2.3\nComplex numbers can be compared using equality (or inequality), but not with greater than or less than operators. For example, after running the code above, the following code snippet will print “hello”.\ncomplex a = 3.2 + 2i;\ncomplex b = to_complex(3.2, 2);\nif (a == b) print(\"hello\");\n\n\n\nInteger- or real-valued expressions may be assigned to complex numbers, with the corresponding imaginary component set to zero.\ncomplex z1 = 3; // int promoted to 3 + 0i\ncomplex z2 = 3.2; // real promoted to 3.2 + 0.0i\n\n\n\nArrays of complex numbers work as usual and allow the usual curly bracket constructors.\ncomplex z1; complex z2; complex z3;\n// ...\narray[3] complex zs = { z1, z2, z3 };\nfor (z in zs) {\n print(z);\n}\nComplex arrays allow assignment into their elements, with integer or real assigned values being promoted to complex.\n\n\n\nAll of the standard complex functions are available, including natural logarithm log(z), natural exponentiation exp(z), and powers pow(z1, z2), as well as all of the trig and hyperbolic trigonometric functions and their inverse, such as sin(z), acos(z), tanh(z) and asinh(z).\nPromotion also works for complex-valued function arguments, which may be passed integer or real values, which will be promoted before the function is evaluated. For example, the following user-defined complex function will accept integer, real, or complex arguments.\ncomplex times_i(complex z) {\n complex i = to_complex(0, 1);\n return i * z;\n}\nFor instance, times_i(1) evaluates to the imaginary base \\(i\\), as does times_i(1.0).\n\n\n\n\nThe simplest way to model a distribution over a complex random number \\(z = x + yi\\) is to consider its real part \\(x\\) and imaginary part \\(y\\) to have a bivariate normal distribution. For example, a complex prior can be expressed as follows.\ncomplex z;\nvector[2] mu;\ncholesky_factor_cov[2] L_Sigma;\n// ...\n[get_real(z), get_imag(z)]' ~ multi_normal_cholesky(mu, L_Sigma);\nFor example, if z is data, this can be used to estimate mu and the covariance Cholesky factor L_Sigma. Alternatively, if z is a parameter, mu and L_Sigma may constants defining a prior or further parameters defining a hierarchical model.\n\n\n\nStan supports complex matrices, vectors, and row vectors. Variables of these types are declared with sizes in the same way as their real-based counterparts.\ncomplex_vector[3] v;\ncomplex_row_vector[2] rv;\ncomplex_matrix[3, 2] m;\nWe can construct vectors and matrices using brackets in the same way as for real-valued vectors and matrices. For example, given the declaration of rv above, we could assign it to a constructed row vector.\nrv = [2 + 3i, 1.9 - 2.3i];\nComplex matrices and vectors support all of the standard arithmetic operations including negation, addition, subtraction, and multiplication (division involves a solve, and isn’t a simple arithmetic operation for matrices). They also support transposition.\nFurthermore, it is possible to convert back and forth between arrays and matrices using the to_array functions.\n\n\n\nComplex valued linear regression with complex predictors and regression coefficients looks just like standard regression. For example, if we take x to be predictors, y to be an array of outcomes. For example, consider the following complete Stan program for an intercept and slope.\ndata {\n int<lower=0> N;\n complex_vector[N] x;\n complex_vector[N] y;\n}\nparameters {\n complex alpha;\n complex beta;\n}\nmodel {\n complex_vector[N] eps = y - (alpha + beta * x);\n eps ~ // ...error distribution...\n}\nThe question remains of how to fill in the error distribution and there are several alternatives. We consider only two simple alternatives, and do not consider penalizing the absolute value of the error.\n\n\nThe simplest approach to error in complex regression is to give the real and imaginary parts of eps_n independent independent normal distributions, as follows.\nparameters {\n // ...\n vector[2] sigma;\n}\n// ...\nmodel {\n // ...\n get_real(eps) ~ normal(0, sigma[1]);\n get_imag(eps) ~ normal(0, sigma[2]);\n sigma ~ //...hyperprior...\n}\nA new error scale vector sigma is introduced, and it should itself get a prior based on the expected scale of error for the problem.\n\n\n\nThe next simplest approach is to treat the real and imaginary parts of the complex number as having a multivariate normal prior. This can be done by adding a parameter for correlation to the above, or just working with a multivariate covariance matrix, as we do here.\nparameters {\n cholesky_factor_corr[2] L_Omega; // correlation matrix\n vector[2] sigma; // real, imag error scales\n // ...\n}\n// ...\nmodel {\n array[N] vector[2] eps_arr;\n for (n in 1:N) {\n eps_arr[n] = { to_real(eps[n]), to_imag(eps[n]) };\n }\n eps_arr ~ multi_normal_cholesky([0, 0]',\n diag_pre_multiply(sigma, L_Omega));\n L_Omega ~ lkj_cholesky(4); // shrink toward diagonal correlation\n sigma ~ // ... hyperprior ...\n}\nHere, the real and imaginary components of the error get a joint distribution with correlation and independent scales. The error gets a multivariate normal distribution with zero mean and a Cholesky factor representation of covariance, consisting of a scale vector sigma and a Cholesky factor or a correlation matrix, L_Omega. The prior on the correlations is concentrated loosely around diagonal covariance, and the prior on the scales is left open. In order to vectorize the call to multi_normal_cholesky, the vector of complex numbers needs to be converted to an array of size 2 vectors.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Complex Numbers" + ] + }, + { + "objectID": "stan-users-guide/complex-numbers.html#working-with-complex-numbers", + "href": "stan-users-guide/complex-numbers.html#working-with-complex-numbers", + "title": "Complex Numbers", + "section": "", + "text": "This section describes the complex scalar type, including how to build complex numbers, assign them, and use them in arrays and functions.\n\n\nComplex numbers can be constructed using imaginary literals. For example,\ncomplex z = -1.1 + 2.3i;\nproduces the complex number \\(-1.1 + 2.3i\\). This only works if the real and imaginary components are literal numerals. To construct a complex number out of arbitrary real variables, the to_complex() function may be used. For example, the following code will work if x and y are parameters, transformed data, or local variables in a function or model block.\nreal x = // ...\nreal y = // ...\ncomplex z = to_complex(x, y);\nThe real and imaginary parts of the complex number can be accessed with getters as follows.\nreal x = get_real(z); // x = -1.1\nreal y = get_imag(z); // y = 2.3\nComplex numbers can be compared using equality (or inequality), but not with greater than or less than operators. For example, after running the code above, the following code snippet will print “hello”.\ncomplex a = 3.2 + 2i;\ncomplex b = to_complex(3.2, 2);\nif (a == b) print(\"hello\");\n\n\n\nInteger- or real-valued expressions may be assigned to complex numbers, with the corresponding imaginary component set to zero.\ncomplex z1 = 3; // int promoted to 3 + 0i\ncomplex z2 = 3.2; // real promoted to 3.2 + 0.0i\n\n\n\nArrays of complex numbers work as usual and allow the usual curly bracket constructors.\ncomplex z1; complex z2; complex z3;\n// ...\narray[3] complex zs = { z1, z2, z3 };\nfor (z in zs) {\n print(z);\n}\nComplex arrays allow assignment into their elements, with integer or real assigned values being promoted to complex.\n\n\n\nAll of the standard complex functions are available, including natural logarithm log(z), natural exponentiation exp(z), and powers pow(z1, z2), as well as all of the trig and hyperbolic trigonometric functions and their inverse, such as sin(z), acos(z), tanh(z) and asinh(z).\nPromotion also works for complex-valued function arguments, which may be passed integer or real values, which will be promoted before the function is evaluated. For example, the following user-defined complex function will accept integer, real, or complex arguments.\ncomplex times_i(complex z) {\n complex i = to_complex(0, 1);\n return i * z;\n}\nFor instance, times_i(1) evaluates to the imaginary base \\(i\\), as does times_i(1.0).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Complex Numbers" + ] + }, + { + "objectID": "stan-users-guide/complex-numbers.html#complex-random-variables", + "href": "stan-users-guide/complex-numbers.html#complex-random-variables", + "title": "Complex Numbers", + "section": "", + "text": "The simplest way to model a distribution over a complex random number \\(z = x + yi\\) is to consider its real part \\(x\\) and imaginary part \\(y\\) to have a bivariate normal distribution. For example, a complex prior can be expressed as follows.\ncomplex z;\nvector[2] mu;\ncholesky_factor_cov[2] L_Sigma;\n// ...\n[get_real(z), get_imag(z)]' ~ multi_normal_cholesky(mu, L_Sigma);\nFor example, if z is data, this can be used to estimate mu and the covariance Cholesky factor L_Sigma. Alternatively, if z is a parameter, mu and L_Sigma may constants defining a prior or further parameters defining a hierarchical model.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Complex Numbers" + ] + }, + { + "objectID": "stan-users-guide/complex-numbers.html#complex-matrices-and-vectors", + "href": "stan-users-guide/complex-numbers.html#complex-matrices-and-vectors", + "title": "Complex Numbers", + "section": "", + "text": "Stan supports complex matrices, vectors, and row vectors. Variables of these types are declared with sizes in the same way as their real-based counterparts.\ncomplex_vector[3] v;\ncomplex_row_vector[2] rv;\ncomplex_matrix[3, 2] m;\nWe can construct vectors and matrices using brackets in the same way as for real-valued vectors and matrices. For example, given the declaration of rv above, we could assign it to a constructed row vector.\nrv = [2 + 3i, 1.9 - 2.3i];\nComplex matrices and vectors support all of the standard arithmetic operations including negation, addition, subtraction, and multiplication (division involves a solve, and isn’t a simple arithmetic operation for matrices). They also support transposition.\nFurthermore, it is possible to convert back and forth between arrays and matrices using the to_array functions.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Complex Numbers" + ] + }, + { + "objectID": "stan-users-guide/complex-numbers.html#complex-linear-regression", + "href": "stan-users-guide/complex-numbers.html#complex-linear-regression", + "title": "Complex Numbers", + "section": "", + "text": "Complex valued linear regression with complex predictors and regression coefficients looks just like standard regression. For example, if we take x to be predictors, y to be an array of outcomes. For example, consider the following complete Stan program for an intercept and slope.\ndata {\n int<lower=0> N;\n complex_vector[N] x;\n complex_vector[N] y;\n}\nparameters {\n complex alpha;\n complex beta;\n}\nmodel {\n complex_vector[N] eps = y - (alpha + beta * x);\n eps ~ // ...error distribution...\n}\nThe question remains of how to fill in the error distribution and there are several alternatives. We consider only two simple alternatives, and do not consider penalizing the absolute value of the error.\n\n\nThe simplest approach to error in complex regression is to give the real and imaginary parts of eps_n independent independent normal distributions, as follows.\nparameters {\n // ...\n vector[2] sigma;\n}\n// ...\nmodel {\n // ...\n get_real(eps) ~ normal(0, sigma[1]);\n get_imag(eps) ~ normal(0, sigma[2]);\n sigma ~ //...hyperprior...\n}\nA new error scale vector sigma is introduced, and it should itself get a prior based on the expected scale of error for the problem.\n\n\n\nThe next simplest approach is to treat the real and imaginary parts of the complex number as having a multivariate normal prior. This can be done by adding a parameter for correlation to the above, or just working with a multivariate covariance matrix, as we do here.\nparameters {\n cholesky_factor_corr[2] L_Omega; // correlation matrix\n vector[2] sigma; // real, imag error scales\n // ...\n}\n// ...\nmodel {\n array[N] vector[2] eps_arr;\n for (n in 1:N) {\n eps_arr[n] = { to_real(eps[n]), to_imag(eps[n]) };\n }\n eps_arr ~ multi_normal_cholesky([0, 0]',\n diag_pre_multiply(sigma, L_Omega));\n L_Omega ~ lkj_cholesky(4); // shrink toward diagonal correlation\n sigma ~ // ... hyperprior ...\n}\nHere, the real and imaginary components of the error get a joint distribution with correlation and independent scales. The error gets a multivariate normal distribution with zero mean and a Cholesky factor representation of covariance, consisting of a scale vector sigma and a Cholesky factor or a correlation matrix, L_Omega. The prior on the correlations is concentrated loosely around diagonal covariance, and the prior on the scales is left open. In order to vectorize the call to multi_normal_cholesky, the vector of complex numbers needs to be converted to an array of size 2 vectors.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Complex Numbers" + ] + }, + { + "objectID": "stan-users-guide/bootstrap.html", + "href": "stan-users-guide/bootstrap.html", + "title": "The Bootstrap and Bagging", + "section": "", + "text": "The bootstrap is a technique for approximately sampling from the error distribution for an estimator. Thus it can be used as a Monte Carlo method to estimate standard errors and confidence intervals for point estimates (Efron and Tibshirani 1986; 1994). It works by subsampling the original data and computing sample estimates from the subsample. Like other Monte Carlo methods, the bootstrap is plug-and-play, allowing great flexibility in both model choice and estimator.\nBagging is a technique for combining bootstrapped estimators for model criticism and more robust inference (Breiman 1996; Huggins and Miller 2019).\n\n\n\n\nAn estimator is nothing more than a function mapping a data set to one or more numbers, which are called “estimates”. For example, the mean function maps a data set \\(y_{1,\\ldots, N}\\) to a number by \\[\n\\textrm{mean}(y) = \\frac{1}{N} \\sum_{n=1}^N y_n,\n\\] and hence meets the definition of an estimator. Given the likelihood function \\[\np(y \\mid \\mu) = \\prod_{n=1}^N \\textrm{normal}(y_n \\mid \\mu, 1),\n\\] the mean is the maximum likelihood estimator,\n\\[\n\\textrm{mean}(y) = \\textrm{arg max}_{\\mu} \\ p(y \\mid \\mu, 1)\n\\] A Bayesian approach to point estimation would be to add a prior and use the posterior mean or median as an estimator. Alternatively, a penalty function could be added to the likelihood so that optimization produces a penalized maximum likelihood estimate. With any of these approaches, the estimator is just a function from data to a number.\nIn analyzing estimators, the data set is being modeled as a random variable. It is assumed that the observed data is just one of many possible random samples of data that may have been produced. If the data is modeled a random variable, then the estimator applied to the data is also a random variable. The simulations being done for the bootstrap are attempts to randomly sample replicated data sets and compute the random properties of the estimators using standard Monte Carlo methods.\n\n\n\nThe bootstrap works by applying an estimator to replicated data sets. These replicates are created by subsampling the original data with replacement. The sample quantiles may then be used to estimate standard errors and confidence intervals.\nThe following pseudocode estimates 95% confidence intervals and standard errors for a generic estimate \\(\\hat{\\theta}\\) that is a function of data \\(y\\).\nfor (m in 1:M) {\n y_rep[m] <- sample_uniform(y)\n theta_hat[m] <- estimate_theta(y_rep[m])\n}\nstd_error = sd(theta_hat)\nconf_95pct = [ quantile(theta_hat, 0.025),\n quantile(theta_hat, 0.975) ]\nThe sample_uniform function works by independently assigning each element of y_rep an element of y drawn uniformly at random. This produces a sample with replacement. That is, some elements of y may show up more than once in y_rep and some may not appear at all.\n\n\n\n\nThe bootstrap procedure can be coded quite generally in Stan models. The following code illustrates a Stan model coding the likelihood for a simple linear regression. There is a parallel vector x of predictors in addition to outcomes y. To allow a single program to fit both the original data and random subsamples, the variable resample is set to 1 to resample and 0 to use the original data.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n int<lower=0, upper=1> resample;\n}\ntransformed data {\n simplex[N] uniform = rep_vector(1.0 / N, N);\n array[N] int<lower=1, upper=N> boot_idxs;\n for (n in 1:N) {\n boot_idxs[n] = resample ? categorical_rng(uniform) : n;\n }\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y[boot_idxs] ~ normal(alpha + beta * x[boot_idxs], sigma);\n}\nThe model accepts data in the usual form for a linear regression as a number of observations \\(N\\) with a size \\(N\\) vector \\(x\\) of predictors and a size \\(N\\) vector of outcomes. The transformed data block generates a set of indexes into the data that is the same size as the data. This is done by independently sampling each entry of boot_idxs from 1:N, using a discrete uniform distribution coded as a categorical random number generator with an equal chance for each outcome. If resampling is not done, the array boot_idxs is defined to be the sequence 1:N, because x == x[1:N] and y = y[1:N].\nFor example, when resample == 1, if \\(N = 4,\\) the value of boot_idxs might be {2, 1, 1, 3}, resulting in a bootstrap sample {y[2], y[1], y[1], y[3]} with the first element repeated twice and the fourth element not sampled at all.\nThe parameters are the usual regression coefficients for the intercept alpha, slope beta, and error scale sigma. The model uses the bootstrap index variable boot_idx to index the predictors as x[boot_idx] and outcomes as y[boot_idx]. This generates a new size-\\(N\\) vector whose entries are defined by x[boot_idx][n] = x[boot_idx[n]] and similarly for y. For example, if \\(N = 4\\) and boot_idxs = {2, 1, 1, 3}, then x[boot_idxs] = [x[2], x[1], x[1], x[3]]' and y[boot_idxs] = [y[2], y[1], y[1], y[3]]'. The predictor and outcome vectors remain aligned, with both elements of the pair x[1] and y[1] repeated twice.\nWith the model defined this way, if resample is 1, the model is fit to a bootstrap subsample of the data. If resample is 0, the model is fit to the original data as given. By running the bootstrap fit multiple times, confidence intervals can be generated from quantiles of the results.\n\n\n\nRunning the model multiple times produces a Monte Carlo sample of estimates from multiple alternative data sets subsampled from the original data set. The error distribution is just the distribution of the bootstrap estimates minus the estimate for the original data set.\nTo estimate standard errors and confidence intervals for maximum likelihood estimates the Stan program is executed multiple times using optimization (which turns off Jacobian adjustments for constraints and finds maximum likelihood estimates). On the order of one hundred replicates is typically enough to get a good sense of standard error; more will be needed to accurate estimate the boundaries of a 95% confidence interval. On the other hand, given that there is inherent variance due to sampling the original data \\(y\\), it is usually not worth calculating bootstrap estimates to high precision.\n\n\nHere’s the result of calculating standard errors for the linear regression model above with \\(N = 50\\) data points, \\(\\alpha = 1.2, \\beta\n= -0.5,\\) and \\(\\sigma = 1.5.\\) With a total of \\(M = 100\\) bootstrap samples, there are 100 estimates of \\(\\alpha\\), 100 of \\(\\beta\\), and 100 of \\(\\sigma\\). These are then treated like Monte Carlo draws. For example, the sample standard deviation of the draws for \\(\\alpha\\) provide the bootstrap estimate of the standard error in the estimate for \\(\\alpha\\). Here’s what it looks like for the above model with \\(M =\n100\\)\n parameter estimate std err\n --------- -------- -------\n alpha 1.359 0.218\n beta -0.610 0.204\n sigma 1.537 0.142\nWith the data set fixed, these estimates of standard error will display some Monte Carlo error. For example, here are the standard error estimates from five more runs holding the data the same, but allowing the subsampling to vary within Stan:\n parameter estimate std err\n --------- -------- -------\n alpha 1.359 0.206\n alpha 1.359 0.240\n alpha 1.359 0.234\n alpha 1.359 0.249\n alpha 1.359 0.227\nIncreasing \\(M\\) will reduce Monte Carlo error, but this is not usually worth the extra computation time as there is so much other uncertainty due to the original data sample \\(y\\).\n\n\n\nAs usual with Monte Carlo methods, confidence intervals are estimated using quantiles of the draws. That is, if there are \\(M = 1000\\) estimates of \\(\\hat{\\alpha}\\) in different subsamples, the 2.5% quantile and 97.5% quantile pick out the boundaries of the 95% confidence interval around the estimate for the actual data set \\(y\\). To get accurate 97.5% quantile estimates requires a much larger number of Monte Carlo simulations (roughly twenty times as large as needed for the median).\n\n\n\n\nWhen bootstrapping is carried through inference it is known as bootstrap aggregation, or bagging, in the machine-learning literature (Breiman 1996). In the simplest case, this involves bootstrapping the original data, fitting a model to each bootstrapped data set, then averaging the predictions. For instance, rather than using an estimate \\(\\hat{\\sigma}\\) from the original data set, bootstrapped data sets \\(y^{\\textrm{boot}(1)}, \\ldots,\ny^{\\textrm{boot}(N)}\\) are generated. Each is used to generate an estimate \\(\\hat{\\sigma}^{\\textrm{boot}(n)}.\\) The final estimate is \\[\n\\hat{\\sigma} = \\frac{1}{N} \\sum_{n = 1}^N \\hat{\\sigma}^{\\textrm{boot}(n)}.\n\\] The same would be done to estimate a predictive quantity \\(\\tilde{y}\\) for as yet unseen data. \\[\n\\hat{\\tilde{y}} = \\frac{1}{N} \\sum_{n = 1}^N\n\\hat{\\tilde{y}}^{\\textrm{boot}(n)}.\n\\] For discrete parameters, voting is used to select the outcome.\nOne way of viewing bagging is as a classical attempt to get something like averaging over parameter estimation uncertainty.\n\n\n\nA Bayesian estimator may be analyzed with the bootstrap in exactly the same way as a (penalized) maximum likelihood estimate. For example, the posterior mean and posterior median are two different Bayesian estimators. The bootstrap may be used estimate standard errors and confidence intervals, just as for any other estimator.\n(Huggins and Miller 2019) use the bootstrap to assess model calibration and fitting in a Bayesian framework and further suggest using bagged estimators as a guard against model misspecification. Bagged posteriors will typically have wider posterior intervals than those fit with just the original data, showing that the method is not a pure Bayesian approach to updating, and indicating it would not be calibrated if the model were well specified. The hope is that it can guard against over-certainty in a poorly specified model.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "The Bootstrap and Bagging" + ] + }, + { + "objectID": "stan-users-guide/bootstrap.html#the-bootstrap", + "href": "stan-users-guide/bootstrap.html#the-bootstrap", + "title": "The Bootstrap and Bagging", + "section": "", + "text": "An estimator is nothing more than a function mapping a data set to one or more numbers, which are called “estimates”. For example, the mean function maps a data set \\(y_{1,\\ldots, N}\\) to a number by \\[\n\\textrm{mean}(y) = \\frac{1}{N} \\sum_{n=1}^N y_n,\n\\] and hence meets the definition of an estimator. Given the likelihood function \\[\np(y \\mid \\mu) = \\prod_{n=1}^N \\textrm{normal}(y_n \\mid \\mu, 1),\n\\] the mean is the maximum likelihood estimator,\n\\[\n\\textrm{mean}(y) = \\textrm{arg max}_{\\mu} \\ p(y \\mid \\mu, 1)\n\\] A Bayesian approach to point estimation would be to add a prior and use the posterior mean or median as an estimator. Alternatively, a penalty function could be added to the likelihood so that optimization produces a penalized maximum likelihood estimate. With any of these approaches, the estimator is just a function from data to a number.\nIn analyzing estimators, the data set is being modeled as a random variable. It is assumed that the observed data is just one of many possible random samples of data that may have been produced. If the data is modeled a random variable, then the estimator applied to the data is also a random variable. The simulations being done for the bootstrap are attempts to randomly sample replicated data sets and compute the random properties of the estimators using standard Monte Carlo methods.\n\n\n\nThe bootstrap works by applying an estimator to replicated data sets. These replicates are created by subsampling the original data with replacement. The sample quantiles may then be used to estimate standard errors and confidence intervals.\nThe following pseudocode estimates 95% confidence intervals and standard errors for a generic estimate \\(\\hat{\\theta}\\) that is a function of data \\(y\\).\nfor (m in 1:M) {\n y_rep[m] <- sample_uniform(y)\n theta_hat[m] <- estimate_theta(y_rep[m])\n}\nstd_error = sd(theta_hat)\nconf_95pct = [ quantile(theta_hat, 0.025),\n quantile(theta_hat, 0.975) ]\nThe sample_uniform function works by independently assigning each element of y_rep an element of y drawn uniformly at random. This produces a sample with replacement. That is, some elements of y may show up more than once in y_rep and some may not appear at all.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "The Bootstrap and Bagging" + ] + }, + { + "objectID": "stan-users-guide/bootstrap.html#coding-the-bootstrap-in-stan", + "href": "stan-users-guide/bootstrap.html#coding-the-bootstrap-in-stan", + "title": "The Bootstrap and Bagging", + "section": "", + "text": "The bootstrap procedure can be coded quite generally in Stan models. The following code illustrates a Stan model coding the likelihood for a simple linear regression. There is a parallel vector x of predictors in addition to outcomes y. To allow a single program to fit both the original data and random subsamples, the variable resample is set to 1 to resample and 0 to use the original data.\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n int<lower=0, upper=1> resample;\n}\ntransformed data {\n simplex[N] uniform = rep_vector(1.0 / N, N);\n array[N] int<lower=1, upper=N> boot_idxs;\n for (n in 1:N) {\n boot_idxs[n] = resample ? categorical_rng(uniform) : n;\n }\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n y[boot_idxs] ~ normal(alpha + beta * x[boot_idxs], sigma);\n}\nThe model accepts data in the usual form for a linear regression as a number of observations \\(N\\) with a size \\(N\\) vector \\(x\\) of predictors and a size \\(N\\) vector of outcomes. The transformed data block generates a set of indexes into the data that is the same size as the data. This is done by independently sampling each entry of boot_idxs from 1:N, using a discrete uniform distribution coded as a categorical random number generator with an equal chance for each outcome. If resampling is not done, the array boot_idxs is defined to be the sequence 1:N, because x == x[1:N] and y = y[1:N].\nFor example, when resample == 1, if \\(N = 4,\\) the value of boot_idxs might be {2, 1, 1, 3}, resulting in a bootstrap sample {y[2], y[1], y[1], y[3]} with the first element repeated twice and the fourth element not sampled at all.\nThe parameters are the usual regression coefficients for the intercept alpha, slope beta, and error scale sigma. The model uses the bootstrap index variable boot_idx to index the predictors as x[boot_idx] and outcomes as y[boot_idx]. This generates a new size-\\(N\\) vector whose entries are defined by x[boot_idx][n] = x[boot_idx[n]] and similarly for y. For example, if \\(N = 4\\) and boot_idxs = {2, 1, 1, 3}, then x[boot_idxs] = [x[2], x[1], x[1], x[3]]' and y[boot_idxs] = [y[2], y[1], y[1], y[3]]'. The predictor and outcome vectors remain aligned, with both elements of the pair x[1] and y[1] repeated twice.\nWith the model defined this way, if resample is 1, the model is fit to a bootstrap subsample of the data. If resample is 0, the model is fit to the original data as given. By running the bootstrap fit multiple times, confidence intervals can be generated from quantiles of the results.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "The Bootstrap and Bagging" + ] + }, + { + "objectID": "stan-users-guide/bootstrap.html#error-statistics-from-the-bootstrap", + "href": "stan-users-guide/bootstrap.html#error-statistics-from-the-bootstrap", + "title": "The Bootstrap and Bagging", + "section": "", + "text": "Running the model multiple times produces a Monte Carlo sample of estimates from multiple alternative data sets subsampled from the original data set. The error distribution is just the distribution of the bootstrap estimates minus the estimate for the original data set.\nTo estimate standard errors and confidence intervals for maximum likelihood estimates the Stan program is executed multiple times using optimization (which turns off Jacobian adjustments for constraints and finds maximum likelihood estimates). On the order of one hundred replicates is typically enough to get a good sense of standard error; more will be needed to accurate estimate the boundaries of a 95% confidence interval. On the other hand, given that there is inherent variance due to sampling the original data \\(y\\), it is usually not worth calculating bootstrap estimates to high precision.\n\n\nHere’s the result of calculating standard errors for the linear regression model above with \\(N = 50\\) data points, \\(\\alpha = 1.2, \\beta\n= -0.5,\\) and \\(\\sigma = 1.5.\\) With a total of \\(M = 100\\) bootstrap samples, there are 100 estimates of \\(\\alpha\\), 100 of \\(\\beta\\), and 100 of \\(\\sigma\\). These are then treated like Monte Carlo draws. For example, the sample standard deviation of the draws for \\(\\alpha\\) provide the bootstrap estimate of the standard error in the estimate for \\(\\alpha\\). Here’s what it looks like for the above model with \\(M =\n100\\)\n parameter estimate std err\n --------- -------- -------\n alpha 1.359 0.218\n beta -0.610 0.204\n sigma 1.537 0.142\nWith the data set fixed, these estimates of standard error will display some Monte Carlo error. For example, here are the standard error estimates from five more runs holding the data the same, but allowing the subsampling to vary within Stan:\n parameter estimate std err\n --------- -------- -------\n alpha 1.359 0.206\n alpha 1.359 0.240\n alpha 1.359 0.234\n alpha 1.359 0.249\n alpha 1.359 0.227\nIncreasing \\(M\\) will reduce Monte Carlo error, but this is not usually worth the extra computation time as there is so much other uncertainty due to the original data sample \\(y\\).\n\n\n\nAs usual with Monte Carlo methods, confidence intervals are estimated using quantiles of the draws. That is, if there are \\(M = 1000\\) estimates of \\(\\hat{\\alpha}\\) in different subsamples, the 2.5% quantile and 97.5% quantile pick out the boundaries of the 95% confidence interval around the estimate for the actual data set \\(y\\). To get accurate 97.5% quantile estimates requires a much larger number of Monte Carlo simulations (roughly twenty times as large as needed for the median).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "The Bootstrap and Bagging" + ] + }, + { + "objectID": "stan-users-guide/bootstrap.html#bagging", + "href": "stan-users-guide/bootstrap.html#bagging", + "title": "The Bootstrap and Bagging", + "section": "", + "text": "When bootstrapping is carried through inference it is known as bootstrap aggregation, or bagging, in the machine-learning literature (Breiman 1996). In the simplest case, this involves bootstrapping the original data, fitting a model to each bootstrapped data set, then averaging the predictions. For instance, rather than using an estimate \\(\\hat{\\sigma}\\) from the original data set, bootstrapped data sets \\(y^{\\textrm{boot}(1)}, \\ldots,\ny^{\\textrm{boot}(N)}\\) are generated. Each is used to generate an estimate \\(\\hat{\\sigma}^{\\textrm{boot}(n)}.\\) The final estimate is \\[\n\\hat{\\sigma} = \\frac{1}{N} \\sum_{n = 1}^N \\hat{\\sigma}^{\\textrm{boot}(n)}.\n\\] The same would be done to estimate a predictive quantity \\(\\tilde{y}\\) for as yet unseen data. \\[\n\\hat{\\tilde{y}} = \\frac{1}{N} \\sum_{n = 1}^N\n\\hat{\\tilde{y}}^{\\textrm{boot}(n)}.\n\\] For discrete parameters, voting is used to select the outcome.\nOne way of viewing bagging is as a classical attempt to get something like averaging over parameter estimation uncertainty.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "The Bootstrap and Bagging" + ] + }, + { + "objectID": "stan-users-guide/bootstrap.html#bayesian-bootstrap-and-bagging", + "href": "stan-users-guide/bootstrap.html#bayesian-bootstrap-and-bagging", + "title": "The Bootstrap and Bagging", + "section": "", + "text": "A Bayesian estimator may be analyzed with the bootstrap in exactly the same way as a (penalized) maximum likelihood estimate. For example, the posterior mean and posterior median are two different Bayesian estimators. The bootstrap may be used estimate standard errors and confidence intervals, just as for any other estimator.\n(Huggins and Miller 2019) use the bootstrap to assess model calibration and fitting in a Bayesian framework and further suggest using bagged estimators as a guard against model misspecification. Bagged posteriors will typically have wider posterior intervals than those fit with just the original data, showing that the method is not a pure Bayesian approach to updating, and indicating it would not be calibrated if the model were well specified. The hope is that it can guard against over-certainty in a poorly specified model.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "The Bootstrap and Bagging" + ] + }, + { + "objectID": "reference-manual/whitespace.html", + "href": "reference-manual/whitespace.html", + "title": "Whitespace", + "section": "", + "text": "The whitespace characters (and their ASCII code points) are the space (0x20), tab (0x09), carriage return (0x0D), and line feed (0x0A).\n\n\n\nStan treats all whitespace characters identically. Specifically, there is no significance to indentation, to tabs, to carriage returns or line feeds, or to any vertical alignment of text. Any whitespace character is exchangeable with any other.\nOther than for readability, the number of whitespaces is also irrelevant. One or more whitespace characters of any type are treated identically by the parser.\n\n\n\nZero or more whitespace characters may be placed between symbols in a Stan program. For example, zero or more whitespace characters of any variety may be included before and after a binary operation such as a * b, before a statement-ending semicolon, around parentheses or brackets, before or after commas separating function arguments, etc.\nIdentifiers and literals may not be separated by whitespace. Thus it is not legal to write the number 10000 as 10 000 or to write the identifier normal_lpdf as normal _ lpdf.", + "crumbs": [ + "Reference Manual", + "Language", + "Whitespace" + ] + }, + { + "objectID": "reference-manual/whitespace.html#whitespace-characters", + "href": "reference-manual/whitespace.html#whitespace-characters", + "title": "Whitespace", + "section": "", + "text": "The whitespace characters (and their ASCII code points) are the space (0x20), tab (0x09), carriage return (0x0D), and line feed (0x0A).", + "crumbs": [ + "Reference Manual", + "Language", + "Whitespace" + ] + }, + { + "objectID": "reference-manual/whitespace.html#whitespace-neutrality", + "href": "reference-manual/whitespace.html#whitespace-neutrality", + "title": "Whitespace", + "section": "", + "text": "Stan treats all whitespace characters identically. Specifically, there is no significance to indentation, to tabs, to carriage returns or line feeds, or to any vertical alignment of text. Any whitespace character is exchangeable with any other.\nOther than for readability, the number of whitespaces is also irrelevant. One or more whitespace characters of any type are treated identically by the parser.", + "crumbs": [ + "Reference Manual", + "Language", + "Whitespace" + ] + }, + { + "objectID": "reference-manual/whitespace.html#whitespace-location", + "href": "reference-manual/whitespace.html#whitespace-location", + "title": "Whitespace", + "section": "", + "text": "Zero or more whitespace characters may be placed between symbols in a Stan program. For example, zero or more whitespace characters of any variety may be included before and after a binary operation such as a * b, before a statement-ending semicolon, around parentheses or brackets, before or after commas separating function arguments, etc.\nIdentifiers and literals may not be separated by whitespace. Thus it is not legal to write the number 10000 as 10 000 or to write the identifier normal_lpdf as normal _ lpdf.", + "crumbs": [ + "Reference Manual", + "Language", + "Whitespace" + ] + }, + { + "objectID": "reference-manual/user-functions.html", + "href": "reference-manual/user-functions.html", + "title": "User-Defined Functions", + "section": "", + "text": "Stan allows users to define their own functions. The basic syntax is a simplified version of that used in C and C++. This chapter specifies how functions are declared, defined, and used in Stan.\n\n\nUser-defined functions appear in a special function-definition block before all of the other program blocks.\nfunctions {\n // ... function declarations and definitions ...\n}\ndata {\n // ...\nFunction definitions and declarations may appear in any order. Forward declarations are allowed but not required.\n\n\n\nThe rules for function naming and function-argument naming are the same as for other variables; see the section on variables for more information on valid identifiers. For example,\nreal foo(real mu, real sigma);\ndeclares a function named foo with two argument variables of types real and real. The arguments are named mu and sigma, but that is not part of the declaration.\n\n\nMultiple user-defined functions may have the same name if they have different sequences of argument types. This is known as function overloading.\nFor example, the following two functions are both defined with the name add_up\nreal add_up(real a, real b){\n return a + b;\n}\n\nreal add_up(real a, real b, real c){\n return a + b + c;\n}\nThe return types of overloaded functions do not need to be the same. One could define an additional add_up function as follows\nint add_up(int a, int b){\n return a + b;\n}\nThat being said, functions may not use the same name if their signature only differs by the return type.\nFor example, the following is not permitted\n// illegal\nreal baz(int x);\nint baz(int x);\nFunction names used in the Stan standard library may be overloaded by user-defined functions. Exceptions to this are the reduce_sum family of functions and ODE integrators, which cannot be overloaded.\n\n\n\n\nAll function arguments are mandatory—there are no default values.\n\n\nFunctions with non-void return types are called just like any other built-in function in Stan—they are applied to appropriately typed arguments to produce an expression, which has a value when executed.\n\n\n\nFunctions with void return types may be applied to arguments and used as statements. These act like distribution statements or print statements. Such uses are only appropriate for functions that act through side effects, such as incrementing the log probability accumulator, printing, or raising exceptions.\n\n\n\nOverloaded functions alongside type promotion can result in situations where there are multiple valid interpretations of a function call. Stan requires that there be a unique signature which minimizes the number of promotions required.\nConsider the following two overloaded functions\nreal foo(int a, real b);\nreal foo(real a, int b);\nThese functions do not have a unique minimum when called with two integer arguments foo(1,2), and therefore cannot be called as such.\nPromotion of integers to complex numbers is considered as two separate promotions, one from int to real and a second from real to complex. Consider the following functions with real and complex signatures\nreal bar(real x);\nreal bar(complex z);\nA call bar(5) with an integer argument will be resolved to bar(real) because it only requires a single promotion, whereas the promotion to a complex number requires two promotions.\n\n\n\nThe rules for calling functions work the same way as assignment as far as promotion goes. This means that we can promote arguments to the type expected by function arguments. For example, the following will work.\nreal foo(real x) { return ... };\n...\nint a = 5;\nreal b = foo(a); // a promoted to type real\nIn addition to promoting int to real, Stan also promotes real to complex, and by transitivity, int to complex. This also works for containers, so an array of int may be assigned to an array of real of the same shape. And we can also promote vector to complex_vector and similarly for row vectors and matrices.\n\n\n\nFunctions whose name ends in _lpdf or _lpmf (log density and mass functions) may be used as probability functions and may be used in place of parameterized distributions on the right side of statements.qmd#distribution-statements.section.\n\n\n\nFunctions of certain types are restricted on scope of usage. Functions whose names end in _lp assume access to the log probability accumulator and are only available in the transformed parameters and model blocks.\nFunctions whose name end in _jacobian assume access to the log probability accumulator may only be used within the transformed parameters block.\nFunctions whose names end in _rng assume access to the random number generator and may only be used within the generated quantities block, transformed data block, and within user-defined functions ending in _rng.\nFunctions whose names end in _lpdf and _lpmf can be used anywhere. However, _lupdf and _lupmf functions can only be used in the model block or user-defined probability functions.\nSee the section on function bodies for more information on these special types of function.\n\n\n\n\nStan’s functions all have declared types for both arguments and returned value. As with built-in functions, user-defined functions are only declared for base argument type and dimensionality. This requires a different syntax than for declaring other variables. The choice of language was made so that return types and argument types could use the same declaration syntax.\nThe type void may not be used as an argument type, only a return type for a function with side effects.\n\n\nThe base variable types are integer, real, complex, vector, row_vector, and matrix. No lower-bound or upper-bound constraints are allowed (e.g., real<lower=0> is illegal). Specialized constrained types are also not allowed (e.g., simplex is illegal).\nTuple types of the form tuple(T1, ..., TN) are also allowed, with all of the types T1 to TN being function argument types (i.e., no constraints and no sizes).\n\n\n\nArguments and return types may be arrays, and these are indicated with optional brackets and commas as would be used for indexing. For example, int denotes a single integer argument or return, whereas array[] real indicates a one-dimensional array of reals, array[,] real a two-dimensional array and array[,,] real a three-dimensional array; whitespace is optional, as usual.\nThe dimensions for vectors and matrices are not included, so that matrix is the type of a single matrix argument or return type. Thus if a variable is declared as matrix a, then a has two indexing dimensions, so that a[1] is a row vector and a[1, 1] a real value. Matrices implicitly have two indexing dimensions. The type declaration matrix[ , ] b specifies that b is a two-dimensional array of matrices, for a total of four indexing dimensions, with b[1, 1, 1, 1] picking out a real value.\n\n\n\nFunction argument and return types are not themselves checked for dimensionality. A matrix of any size may be passed in as a matrix argument. Nevertheless, a user-defined function might call a function (such as a multivariate normal density) that itself does dimensionality checks.\nDimensions of function return values will be checked if they’re assigned to a previously declared variable. They may also be checked if they are used as the argument to a function.\nAny errors raised by calls to functions inside user functions or return type mismatches are simply passed on; this typically results in a warning message and rejection of a proposal during sampling or optimization.\n\n\n\nSome of Stan’s built-in functions, like the differential equation solvers, have arguments that must be data. Such data-only arguments must be expressions involving only data, transformed data, and generated quantity variables.\nIn user-defined functions, the qualifier data may be placed before an argument type declaration to indicate that the argument must be data only. For example,\nreal foo(data real x) {\n return x^2;\n}\nrequires the argument x to be data only.\nDeclaring an argument data only allows type inference to proceed in the body of the function so that, for example, the variable may be used as a data-only argument to a built-in function.\n\n\n\n\nThe body of a function is between an open curly brace ({) and close curly brace (}). The body may contain local variable declarations at the top of the function body’s block and these scope the same way as local variables used in any other statement block.\nAny user-defined function may be used in the function body regardless of the order in which the function definitions appear in the file. Self-recursive and mutually recursive functions are possible without any additional declarations.\nThe only restrictions on statements in function bodies are external, and determine whether the log probability accumulator or random number generators are available; see the rest of this section for details.\n\n\nFunctions that call random number generating functions in their bodies must have a name that ends in _rng; attempts to use random-number generators in other functions lead to a compile-time error.\nLike other random number generating functions, user-defined functions with names that end in _rng may be used only in the generated quantities block and transformed data block, or within the bodies of user-defined functions ending in _rng. An attempt to use such a function elsewhere results in a compile-time error.\n\n\n\nFunctions that include distribution statements or log probability increment statements must have a name that ends in _lp. Attempts to use distribution statements or increment log probability statements in other functions lead to a compile-time error.\nLike the target log density increment statement and distribution statements, user-defined functions with names that end in _lp may only be used in blocks where the log probability accumulator is accessible, namely the transformed parameters and model blocks. An attempt to use such a function elsewhere results in a compile-time error.\n\n\n\nFunctions whose names end in _lpdf and _lpmf (density and mass functions) can be used as probability functions in distribution statements. As with the built-in functions, the first argument will appear on the left of the distribution statement operator (~) in the distribution statement and the other arguments follow. For example, suppose a function returning the log of the density of y given parameter theta allows the use of the distribution statement is defined as follows.\nreal foo_lpdf(real y, vector theta) { ... }\nNote that for function definitions, the comma is used rather than the vertical bar.\nFor every custom _lpdf and _lpmf defined there is a corresponding _lupdf and _lupmf defined automatically. The _lupdf and _lupmf versions of the functions cannot be defined directly (to do so will produce an error). The difference in the _lpdf and _lpmf and the corresponding _lupdf and _lupmf functions is that if any other unnormalized density functions are used inside the user-defined function, the _lpdf and _lpmf forms of the user-defined function will change these densities to be normalized. The _lupdf and _lupmf forms of the user-defined functions will instead allow other unnormalized density functions to drop additive constants.\nThe distribution statement shorthand\nz ~ foo(phi);\nwill have the same effect as incrementing the target with the log of the unnormalized density:\ntarget += foo_lupdf(z | phi);\nOther _lupdf and _lupmf functions used in the definition of foo_lpdf will drop additive constants when foo_lupdf is called and will not drop additive constants when foo_lpdf is called.\nIf there are _lupdf and _lupmf functions used inside the following call to foo_lpdf, they will be forced to normalize (return the equivalent of their _lpdf and _lpmf forms):\ntarget += foo_lpdf(z | phi);\nIf there are no _lupdf or _lupmf functions used in the definition of foo_lpdf, then there will be no difference between a foo_lpdf or foo_lupdf call.\nThe unnormalized _lupdf and _lupmf functions can only be used in the model block or in user-defined probability functions (those ending in _lpdf or _lpmf).\nThe same syntax and shorthand that works for _lpdf also works for log probability mass functions with suffixes _lpmf.\nA function that is going to be accessed as distributions must return the log of the density or mass function it defines.\n\n\n\n\nWithin function definition bodies, the parameters may be used like any other variable. But the parameters are constant in the sense that they can’t be assigned to (i.e., can’t appear on the left side of an assignment (=) statement). In other words, their value remains constant throughout the function body. Attempting to assign a value to a function parameter value will raise a compile-time error.1\nLocal variables may be declared at the top of the function block and scope as usual.\n\n\n\nNon-void functions must have a return statement that returns an appropriately typed expression. If the expression in a return statement does not have the same type as the return type declared for the function, a compile-time error is raised.\nVoid functions may use return only without an argument, but return statements are not mandatory.\n\n\nUnlike C++, Stan enforces a syntactic guarantee for non-void functions that ensures control will leave a non-void function through an appropriately typed return statement or because an exception is raised in the execution of the function. To enforce this condition, functions must have a return statement as the last statement in their body. This notion of last is defined recursively in terms of statements that qualify as bodies for functions. The base case is that\n\na return statement qualifies,\n\nand the recursive cases are that\n\na sequence of statements qualifies if its last statement qualifies,\na for loop or while loop qualifies if its body qualifies, and\na conditional statement qualifies if it has a default else clause and all of its body statements qualify.\n\nAn exception is made for “obviously infinite” loops like while (1), which contain a return statement and no break statements. The only way to exit such a loop is to return, so they are considered as returning statements.\nThese rules disqualify\nreal foo(real x) {\n if (x > 2) {\n return 1.0;\n } else if (x <= 2) {\n return -1.0;\n }\n}\nbecause there is no default else clause, and disqualify\nreal foo(real x) {\n real y;\n y = x;\n while (x < 10) {\n if (x > 0) {\n return x;\n }\n y = x / 2;\n }\n}\nbecause the return statement is not the last statement in the while loop. A bogus dummy return could be placed after the while loop in this case. The rules for returns allow\nreal log_fancy(real x) {\n if (x < 1e-30) {\n return x;\n } else if (x < 1e-14) {\n return x * x;\n } else {\n return log(x);\n }\n}\nbecause there’s a default else clause and each condition body has return as its final statement.\n\n\n\n\n\n\nA function can be declared without a return value by using void in place of a return type. Note that the type void may only be used as a return type—arguments may not be declared to be of type void.\n\n\n\nA void function may be used as a statement.\nBecause there is no return, such a usage is only for side effects, such as incrementing the log probability function, printing, or raising an error.\n\n\n\nIn a return statement within a void function’s definition, the return keyword is followed immediately by a semicolon (;) rather than by the expression whose value is returned.\n\n\n\n\nStan supports forward declarations, which look like function definitions without bodies. For example,\nreal unit_normal_lpdf(real y);\ndeclares a function named unit_normal_lpdf that consumes a single real-valued input and produces a real-valued output. Declaring a function without a definition is only really useful when using an extension which supplies the definition in C++ rather than in the Stan code itself. How exactly this can be accomplished will differ depending on your Stan interface.\nA function definition with a body simultaneously declares and defines the named function, as in\nreal unit_normal_lpdf(real y) {\n return -0.5 * square(y);\n}\nA function can be declared and (perhaps separately) defined at most once. However, functions with different argument types are considered distinct even if they have the same name; see the section on function overloading.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#function-definition-block", + "href": "reference-manual/user-functions.html#function-definition-block", + "title": "User-Defined Functions", + "section": "", + "text": "User-defined functions appear in a special function-definition block before all of the other program blocks.\nfunctions {\n // ... function declarations and definitions ...\n}\ndata {\n // ...\nFunction definitions and declarations may appear in any order. Forward declarations are allowed but not required.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#function-names", + "href": "reference-manual/user-functions.html#function-names", + "title": "User-Defined Functions", + "section": "", + "text": "The rules for function naming and function-argument naming are the same as for other variables; see the section on variables for more information on valid identifiers. For example,\nreal foo(real mu, real sigma);\ndeclares a function named foo with two argument variables of types real and real. The arguments are named mu and sigma, but that is not part of the declaration.\n\n\nMultiple user-defined functions may have the same name if they have different sequences of argument types. This is known as function overloading.\nFor example, the following two functions are both defined with the name add_up\nreal add_up(real a, real b){\n return a + b;\n}\n\nreal add_up(real a, real b, real c){\n return a + b + c;\n}\nThe return types of overloaded functions do not need to be the same. One could define an additional add_up function as follows\nint add_up(int a, int b){\n return a + b;\n}\nThat being said, functions may not use the same name if their signature only differs by the return type.\nFor example, the following is not permitted\n// illegal\nreal baz(int x);\nint baz(int x);\nFunction names used in the Stan standard library may be overloaded by user-defined functions. Exceptions to this are the reduce_sum family of functions and ODE integrators, which cannot be overloaded.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#calling-functions", + "href": "reference-manual/user-functions.html#calling-functions", + "title": "User-Defined Functions", + "section": "", + "text": "All function arguments are mandatory—there are no default values.\n\n\nFunctions with non-void return types are called just like any other built-in function in Stan—they are applied to appropriately typed arguments to produce an expression, which has a value when executed.\n\n\n\nFunctions with void return types may be applied to arguments and used as statements. These act like distribution statements or print statements. Such uses are only appropriate for functions that act through side effects, such as incrementing the log probability accumulator, printing, or raising exceptions.\n\n\n\nOverloaded functions alongside type promotion can result in situations where there are multiple valid interpretations of a function call. Stan requires that there be a unique signature which minimizes the number of promotions required.\nConsider the following two overloaded functions\nreal foo(int a, real b);\nreal foo(real a, int b);\nThese functions do not have a unique minimum when called with two integer arguments foo(1,2), and therefore cannot be called as such.\nPromotion of integers to complex numbers is considered as two separate promotions, one from int to real and a second from real to complex. Consider the following functions with real and complex signatures\nreal bar(real x);\nreal bar(complex z);\nA call bar(5) with an integer argument will be resolved to bar(real) because it only requires a single promotion, whereas the promotion to a complex number requires two promotions.\n\n\n\nThe rules for calling functions work the same way as assignment as far as promotion goes. This means that we can promote arguments to the type expected by function arguments. For example, the following will work.\nreal foo(real x) { return ... };\n...\nint a = 5;\nreal b = foo(a); // a promoted to type real\nIn addition to promoting int to real, Stan also promotes real to complex, and by transitivity, int to complex. This also works for containers, so an array of int may be assigned to an array of real of the same shape. And we can also promote vector to complex_vector and similarly for row vectors and matrices.\n\n\n\nFunctions whose name ends in _lpdf or _lpmf (log density and mass functions) may be used as probability functions and may be used in place of parameterized distributions on the right side of statements.qmd#distribution-statements.section.\n\n\n\nFunctions of certain types are restricted on scope of usage. Functions whose names end in _lp assume access to the log probability accumulator and are only available in the transformed parameters and model blocks.\nFunctions whose name end in _jacobian assume access to the log probability accumulator may only be used within the transformed parameters block.\nFunctions whose names end in _rng assume access to the random number generator and may only be used within the generated quantities block, transformed data block, and within user-defined functions ending in _rng.\nFunctions whose names end in _lpdf and _lpmf can be used anywhere. However, _lupdf and _lupmf functions can only be used in the model block or user-defined probability functions.\nSee the section on function bodies for more information on these special types of function.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#argument-types-and-qualifiers", + "href": "reference-manual/user-functions.html#argument-types-and-qualifiers", + "title": "User-Defined Functions", + "section": "", + "text": "Stan’s functions all have declared types for both arguments and returned value. As with built-in functions, user-defined functions are only declared for base argument type and dimensionality. This requires a different syntax than for declaring other variables. The choice of language was made so that return types and argument types could use the same declaration syntax.\nThe type void may not be used as an argument type, only a return type for a function with side effects.\n\n\nThe base variable types are integer, real, complex, vector, row_vector, and matrix. No lower-bound or upper-bound constraints are allowed (e.g., real<lower=0> is illegal). Specialized constrained types are also not allowed (e.g., simplex is illegal).\nTuple types of the form tuple(T1, ..., TN) are also allowed, with all of the types T1 to TN being function argument types (i.e., no constraints and no sizes).\n\n\n\nArguments and return types may be arrays, and these are indicated with optional brackets and commas as would be used for indexing. For example, int denotes a single integer argument or return, whereas array[] real indicates a one-dimensional array of reals, array[,] real a two-dimensional array and array[,,] real a three-dimensional array; whitespace is optional, as usual.\nThe dimensions for vectors and matrices are not included, so that matrix is the type of a single matrix argument or return type. Thus if a variable is declared as matrix a, then a has two indexing dimensions, so that a[1] is a row vector and a[1, 1] a real value. Matrices implicitly have two indexing dimensions. The type declaration matrix[ , ] b specifies that b is a two-dimensional array of matrices, for a total of four indexing dimensions, with b[1, 1, 1, 1] picking out a real value.\n\n\n\nFunction argument and return types are not themselves checked for dimensionality. A matrix of any size may be passed in as a matrix argument. Nevertheless, a user-defined function might call a function (such as a multivariate normal density) that itself does dimensionality checks.\nDimensions of function return values will be checked if they’re assigned to a previously declared variable. They may also be checked if they are used as the argument to a function.\nAny errors raised by calls to functions inside user functions or return type mismatches are simply passed on; this typically results in a warning message and rejection of a proposal during sampling or optimization.\n\n\n\nSome of Stan’s built-in functions, like the differential equation solvers, have arguments that must be data. Such data-only arguments must be expressions involving only data, transformed data, and generated quantity variables.\nIn user-defined functions, the qualifier data may be placed before an argument type declaration to indicate that the argument must be data only. For example,\nreal foo(data real x) {\n return x^2;\n}\nrequires the argument x to be data only.\nDeclaring an argument data only allows type inference to proceed in the body of the function so that, for example, the variable may be used as a data-only argument to a built-in function.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#function-bodies.section", + "href": "reference-manual/user-functions.html#function-bodies.section", + "title": "User-Defined Functions", + "section": "", + "text": "The body of a function is between an open curly brace ({) and close curly brace (}). The body may contain local variable declarations at the top of the function body’s block and these scope the same way as local variables used in any other statement block.\nAny user-defined function may be used in the function body regardless of the order in which the function definitions appear in the file. Self-recursive and mutually recursive functions are possible without any additional declarations.\nThe only restrictions on statements in function bodies are external, and determine whether the log probability accumulator or random number generators are available; see the rest of this section for details.\n\n\nFunctions that call random number generating functions in their bodies must have a name that ends in _rng; attempts to use random-number generators in other functions lead to a compile-time error.\nLike other random number generating functions, user-defined functions with names that end in _rng may be used only in the generated quantities block and transformed data block, or within the bodies of user-defined functions ending in _rng. An attempt to use such a function elsewhere results in a compile-time error.\n\n\n\nFunctions that include distribution statements or log probability increment statements must have a name that ends in _lp. Attempts to use distribution statements or increment log probability statements in other functions lead to a compile-time error.\nLike the target log density increment statement and distribution statements, user-defined functions with names that end in _lp may only be used in blocks where the log probability accumulator is accessible, namely the transformed parameters and model blocks. An attempt to use such a function elsewhere results in a compile-time error.\n\n\n\nFunctions whose names end in _lpdf and _lpmf (density and mass functions) can be used as probability functions in distribution statements. As with the built-in functions, the first argument will appear on the left of the distribution statement operator (~) in the distribution statement and the other arguments follow. For example, suppose a function returning the log of the density of y given parameter theta allows the use of the distribution statement is defined as follows.\nreal foo_lpdf(real y, vector theta) { ... }\nNote that for function definitions, the comma is used rather than the vertical bar.\nFor every custom _lpdf and _lpmf defined there is a corresponding _lupdf and _lupmf defined automatically. The _lupdf and _lupmf versions of the functions cannot be defined directly (to do so will produce an error). The difference in the _lpdf and _lpmf and the corresponding _lupdf and _lupmf functions is that if any other unnormalized density functions are used inside the user-defined function, the _lpdf and _lpmf forms of the user-defined function will change these densities to be normalized. The _lupdf and _lupmf forms of the user-defined functions will instead allow other unnormalized density functions to drop additive constants.\nThe distribution statement shorthand\nz ~ foo(phi);\nwill have the same effect as incrementing the target with the log of the unnormalized density:\ntarget += foo_lupdf(z | phi);\nOther _lupdf and _lupmf functions used in the definition of foo_lpdf will drop additive constants when foo_lupdf is called and will not drop additive constants when foo_lpdf is called.\nIf there are _lupdf and _lupmf functions used inside the following call to foo_lpdf, they will be forced to normalize (return the equivalent of their _lpdf and _lpmf forms):\ntarget += foo_lpdf(z | phi);\nIf there are no _lupdf or _lupmf functions used in the definition of foo_lpdf, then there will be no difference between a foo_lpdf or foo_lupdf call.\nThe unnormalized _lupdf and _lupmf functions can only be used in the model block or in user-defined probability functions (those ending in _lpdf or _lpmf).\nThe same syntax and shorthand that works for _lpdf also works for log probability mass functions with suffixes _lpmf.\nA function that is going to be accessed as distributions must return the log of the density or mass function it defines.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#parameters-are-constant", + "href": "reference-manual/user-functions.html#parameters-are-constant", + "title": "User-Defined Functions", + "section": "", + "text": "Within function definition bodies, the parameters may be used like any other variable. But the parameters are constant in the sense that they can’t be assigned to (i.e., can’t appear on the left side of an assignment (=) statement). In other words, their value remains constant throughout the function body. Attempting to assign a value to a function parameter value will raise a compile-time error.1\nLocal variables may be declared at the top of the function block and scope as usual.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#return-value", + "href": "reference-manual/user-functions.html#return-value", + "title": "User-Defined Functions", + "section": "", + "text": "Non-void functions must have a return statement that returns an appropriately typed expression. If the expression in a return statement does not have the same type as the return type declared for the function, a compile-time error is raised.\nVoid functions may use return only without an argument, but return statements are not mandatory.\n\n\nUnlike C++, Stan enforces a syntactic guarantee for non-void functions that ensures control will leave a non-void function through an appropriately typed return statement or because an exception is raised in the execution of the function. To enforce this condition, functions must have a return statement as the last statement in their body. This notion of last is defined recursively in terms of statements that qualify as bodies for functions. The base case is that\n\na return statement qualifies,\n\nand the recursive cases are that\n\na sequence of statements qualifies if its last statement qualifies,\na for loop or while loop qualifies if its body qualifies, and\na conditional statement qualifies if it has a default else clause and all of its body statements qualify.\n\nAn exception is made for “obviously infinite” loops like while (1), which contain a return statement and no break statements. The only way to exit such a loop is to return, so they are considered as returning statements.\nThese rules disqualify\nreal foo(real x) {\n if (x > 2) {\n return 1.0;\n } else if (x <= 2) {\n return -1.0;\n }\n}\nbecause there is no default else clause, and disqualify\nreal foo(real x) {\n real y;\n y = x;\n while (x < 10) {\n if (x > 0) {\n return x;\n }\n y = x / 2;\n }\n}\nbecause the return statement is not the last statement in the while loop. A bogus dummy return could be placed after the while loop in this case. The rules for returns allow\nreal log_fancy(real x) {\n if (x < 1e-30) {\n return x;\n } else if (x < 1e-14) {\n return x * x;\n } else {\n return log(x);\n }\n}\nbecause there’s a default else clause and each condition body has return as its final statement.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#void-functions-as-statements", + "href": "reference-manual/user-functions.html#void-functions-as-statements", + "title": "User-Defined Functions", + "section": "", + "text": "A function can be declared without a return value by using void in place of a return type. Note that the type void may only be used as a return type—arguments may not be declared to be of type void.\n\n\n\nA void function may be used as a statement.\nBecause there is no return, such a usage is only for side effects, such as incrementing the log probability function, printing, or raising an error.\n\n\n\nIn a return statement within a void function’s definition, the return keyword is followed immediately by a semicolon (;) rather than by the expression whose value is returned.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#forward-declarations.section", + "href": "reference-manual/user-functions.html#forward-declarations.section", + "title": "User-Defined Functions", + "section": "", + "text": "Stan supports forward declarations, which look like function definitions without bodies. For example,\nreal unit_normal_lpdf(real y);\ndeclares a function named unit_normal_lpdf that consumes a single real-valued input and produces a real-valued output. Declaring a function without a definition is only really useful when using an extension which supplies the definition in C++ rather than in the Stan code itself. How exactly this can be accomplished will differ depending on your Stan interface.\nA function definition with a body simultaneously declares and defines the named function, as in\nreal unit_normal_lpdf(real y) {\n return -0.5 * square(y);\n}\nA function can be declared and (perhaps separately) defined at most once. However, functions with different argument types are considered distinct even if they have the same name; see the section on function overloading.", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/user-functions.html#footnotes", + "href": "reference-manual/user-functions.html#footnotes", + "title": "User-Defined Functions", + "section": "Footnotes", + "text": "Footnotes\n\n\nDespite being declared constant and appearing to have a pass-by-value syntax in Stan, the implementation of the language passes function arguments by constant reference in C++.↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "User-Defined Functions" + ] + }, + { + "objectID": "reference-manual/transforms.html", + "href": "reference-manual/transforms.html", + "title": "Constraint Transforms", + "section": "", + "text": "To avoid having to deal with constraints while simulating the Hamiltonian dynamics during sampling, every (multivariate) parameter in a Stan model is transformed to an unconstrained variable behind the scenes by the model compiler. The transform is based on the constraints, if any, in the parameter’s definition. Scalars or the scalar values in vectors, row vectors or matrices may be constrained with lower and/or upper bounds. Vectors may alternatively be constrained to be ordered, positive ordered, or simplexes. Matrices may be constrained to be correlation matrices or covariance matrices. This chapter provides a definition of the transforms used for each type of variable. For examples of how to declare and define these variables in a Stan program, see section Variable declaration. To directly access the functional form of these transformations from inside the Stan language, see Variable Transformation Functions in the Functions Reference.\nStan converts models to C++ classes which define probability functions with support on all of \\(\\mathbb{R}^K\\), where \\(K\\) is the number of unconstrained parameters needed to define the constrained parameters defined in the program. The C++ classes also include code to transform the parameters from unconstrained to constrained and apply the appropriate Jacobians.\n\n\nIn this section the transformations are described mathematically. However, observed behavior can be different from the exact arithmetic.\nStan’s arithmetic is implemented using double-precision floating-point numbers, which may cause computation to behave differently than mathematics. For example, the lower bound constraint is defined above by an exponential inverse transform which mathematically excludes the lower bound, but if the closest floating-point number for the inverse transformed value is the boundary, then the value is rounded to the boundary. This may cause unexpected warnings or errors, if in other parts of the code the boundary value is invalid. For example, we may observe floating-point value 0 for a variance parameter that has been declared with lower=0. In general, double-precision floating-point numbers cannot reliably store more than 16 digits of a number in decimal. See more about floating point arithmetic in the Stan User’s Guide.\nThese issues are exacerbated by the fact that CmdStan stores the output to CSV files with 8 digits precision by default. More digits can be requested by the user at the cost of additional disk usage, as discussed in the CmdStan Command-Line Interface Overview.\n\n\n\nThe support of a random variable \\(X\\) with density \\(p_X(x)\\) is that subset of values for which it has non-zero density,\n\\[\n\\mathrm{supp}(X) = \\{ x | p_X(x) > 0 \\}.\n\\]\nIf \\(f\\) is a total function defined on the support of \\(X\\), then \\(Y =\nf(X)\\) is a new random variable. This section shows how to compute the probability density function of \\(Y\\) for well-behaved transforms \\(f\\). The rest of the chapter details the transforms used by Stan.\n\n\nSuppose \\(X\\) is one dimensional and \\(f: \\mathrm{supp}(X) \\rightarrow\n\\mathbb{R}\\) is a one-to-one, monotonic function with a differentiable inverse \\(f^{-1}\\). Then the density of \\(Y\\) is given by\n\\[\np_Y(y) = p_X(f^{-1}(y))\n \\,\n \\left| \\, \\frac{d}{dy} f^{-1}(y)\\, \\right|.\n\\]\nThe absolute derivative of the inverse transform measures how the scale of the transformed variable changes with respect to the underlying variable.\n\n\n\nThe multivariate generalization of an absolute derivative is a Jacobian, or more fully the absolute value of the determinant of the Jacobian matrix of the transform. The Jacobian matrix measures the change of each output variable relative to every input variable and the absolute determinant uses that to determine the differential change in volume at a given point in the parameter space.\nSuppose \\(X\\) is a \\(K\\)-dimensional random variable with probability density function \\(p_X(x)\\). A new random variable \\(Y = f(X)\\) may be defined by transforming \\(X\\) with a suitably well-behaved function \\(f\\). It suffices for what follows to note that if \\(f\\) is one-to-one and its inverse \\(f^{-1}\\) has a well-defined Jacobian, then the density of \\(Y\\) is\n\\[\np_Y(y) = p_X(f^{-1}(y)) \\, \\left| \\, \\det \\, J_{f^{-1}}(y) \\, \\right|,\n\\]\nwhere \\(\\det{}\\) is the matrix determinant operation and \\(J_{f^{-1}}(y)\\) is the Jacobian matrix of \\(f^{-1}\\) evaluated at \\(y\\). Taking \\(x =\nf^{-1}(y)\\), the Jacobian matrix is defined by\n\\[\nJ_{f^{-1}}(y) =\n\\left[\n\\begin{array}{ccc}\\displaystyle\n\\frac{\\partial x_1}{\\partial y_1}\n& \\cdots\n& \\displaystyle \\frac{\\partial x_1}{\\partial y_{K}}\n\\\\\n\\vdots & \\vdots & \\vdots\n\\\\\n\\displaystyle\\frac{\\partial x_{K}}{\\partial y_1}\n& \\cdots\n& \\displaystyle\\frac{\\partial x_{K}}{\\partial y_{K}}\n\\end{array}\n\\right].\n\\]\nIf the Jacobian matrix is triangular, the determinant reduces to the product of the diagonal entries,\n\\[\n\\det \\, J_{f^{-1}}(y)\n= \\prod_{k=1}^K \\frac{\\partial x_k}{\\partial y_k}.\n\\]\nTriangular matrices naturally arise in situations where the variables are ordered, for instance by dimension, and each variable’s transformed value depends on the previous variable’s transformed values. Diagonal matrices, a simple form of triangular matrix, arise if each transformed variable only depends on a single untransformed variable.\n\n\n\n\nStan uses a logarithmic transform for lower and upper bounds.\n\n\nIf a variable \\(X\\) is declared to have lower bound \\(a\\), it is transformed to an unbounded variable \\(Y\\), where\n\\[\nY = \\log(X - a).\n\\]\n\n\n\nThe inverse of the lower-bound transform maps an unbounded variable \\(Y\\) to a variable \\(X\\) that is bounded below by \\(a\\) by\n\\[\nX = \\exp(Y) + a.\n\\]\n\n\n\nThe absolute derivative of the inverse transform is\n\\[\n\\left| \\,\n\\frac{d}{dy} \\left( \\exp(y) + a \\right)\n\\, \\right|\n= \\exp(y).\n\\]\nTherefore, given the density \\(p_X\\) of \\(X\\), the density of \\(Y\\) is\n\\[\np_Y(y)\n= p_X\\!\\left( \\exp(y) + a \\right) \\cdot \\exp(y).\n\\]\n\n\n\n\nStan uses a negated logarithmic transform for upper bounds.\n\n\nIf a variable \\(X\\) is declared to have an upper bound \\(b\\), it is transformed to the unbounded variable \\(Y\\) by\n\\[\nY = \\log(b - X).\n\\]\n\n\n\nThe inverse of the upper bound transform converts the unbounded variable \\(Y\\) to the variable \\(X\\) bounded above by \\(b\\) through\n\\[\nX = b - \\exp(Y).\n\\]\n\n\n\nThe absolute derivative of the inverse of the upper bound transform is\n\\[\n\\left| \\,\n\\frac{d}{dy} \\left( b - \\exp(y) \\right)\n\\, \\right|\n= \\exp(y).\n\\]\nTherefore, the density of the unconstrained variable \\(Y\\) is defined in terms of the density of the variable \\(X\\) with an upper bound of \\(b\\) by\n\\[\np_Y(y)\n= p_X \\!\\left( b - \\exp(y) \\right) \\cdot \\exp(y).\n\\]\n\n\n\n\nFor lower and upper-bounded variables, Stan uses a scaled and translated log-odds transform.\n\n\nThe log-odds function is defined for \\(u \\in (0,1)\\) by\n\\[\n\\mathrm{logit}(u) = \\log \\frac{u}{1 - u}.\n\\]\nThe inverse of the log odds function is the logistic sigmoid, defined for \\(v \\in (-\\infty,\\infty)\\) by\n\\[\n\\mathrm{logit}^{-1}(v) = \\frac{1}{1 + \\exp(-v)}.\n\\]\nThe derivative of the logistic sigmoid is\n\\[\n\\frac{d}{dy} \\mathrm{logit}^{-1}(y)\n= \\mathrm{logit}^{-1}(y) \\cdot \\left( 1 - \\mathrm{logit}^{-1}(y) \\right).\n\\]\n\n\n\nFor variables constrained to be in the open interval \\((a, b)\\), Stan uses a scaled and translated log-odds transform. If variable \\(X\\) is declared to have lower bound \\(a\\) and upper bound \\(b\\), then it is transformed to a new variable \\(Y\\), where\n\\[\nY = \\mathrm{logit} \\left( \\frac{X - a}{b - a} \\right).\n\\]\n\n\n\nThe inverse of this transform is\n\\[\nX = a + (b - a) \\cdot \\mathrm{logit}^{-1}(Y).\n\\]\n\n\n\nThe absolute derivative of the inverse transform is given by\n\\[\n\\left|\n \\frac{d}{dy}\n \\left(\n a + (b - a) \\cdot \\mathrm{logit}^{-1}(y)\n \\right)\n \\right|\n= (b - a)\n \\cdot \\mathrm{logit}^{-1}(y)\n \\cdot \\left( 1 - \\mathrm{logit}^{-1}(y) \\right).\n\\]\nTherefore, the density of the transformed variable \\(Y\\) is\n\\[\np_Y(y)\n=\np_X \\! \\left( a + (b - a) \\cdot \\mathrm{logit}^{-1}(y) \\right)\n \\cdot (b - a)\n \\cdot \\mathrm{logit}^{-1}(y)\n \\cdot \\left( 1 - \\mathrm{logit}^{-1}(y) \\right).\n\\]\nDespite the apparent complexity of this expression, most of the terms are repeated and thus only need to be evaluated once. Most importantly, \\(\\mathrm{logit}^{-1}(y)\\) only needs to be evaluated once, so there is only one call to \\(\\exp(-y)\\).\n\n\n\n\nStan uses an affine transform to be able to specify parameters with a given offset and multiplier.\n\n\nFor variables with expected offset \\(\\mu\\) and/or (positive) multiplier \\(\\sigma\\), Stan uses an affine transform. Such a variable \\(X\\) is transformed to a new variable \\(Y\\), where\n\\[\nY = \\frac{X - \\mu}{\\sigma}.\n\\]\nThe default value for the offset \\(\\mu\\) is \\(0\\) and for the multiplier \\(\\sigma\\) is \\(1\\) in case not both are specified.\n\n\n\nThe inverse of this transform is\n\\[\nX = \\mu + \\sigma \\cdot Y.\n\\]\n\n\n\nThe absolute derivative of the affine inverse transform is\n\\[\n\\left|\n \\frac{d}{dy}\n \\left(\n \\mu + \\sigma \\cdot y\n \\right)\n \\right|\n= \\sigma.\n\\]\nTherefore, the density of the transformed variable \\(Y\\) is\n\\[\np_Y(y)\n=\np_X \\! \\left( \\mu + \\sigma \\cdot y \\right)\n \\cdot \\sigma.\n\\]\nFor an example of how to code this in Stan, see section Affinely Transformed Real.\n\n\n\n\nFor some modeling tasks, a vector-valued random variable \\(X\\) is required with support on ordered sequences. One example is the set of cut points in ordered logistic regression.\nIn constraint terms, an ordered \\(K\\)-vector \\(x \\in \\mathbb{R}^K\\) satisfies\n\\[\nx_k < x_{k+1}\n\\]\nfor \\(k \\in \\{ 1, \\ldots, K-1 \\}\\).\n\n\nStan’s transform follows the constraint directly. It maps an increasing vector \\(x \\in \\mathbb{R}^{K}\\) to an unconstrained vector \\(y \\in\n\\mathbb{R}^K\\) by setting\n\\[\ny_k\n=\n\\left\\{\n\\begin{array}{ll}\nx_1 & \\mbox{if } k = 1, \\mbox{ and}\n\\\\\n\\log \\left( x_{k} - x_{k-1} \\right) & \\mbox{if } 1 < k \\leq K.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe inverse transform for an unconstrained \\(y \\in \\mathbb{R}^K\\) to an ordered sequence \\(x \\in \\mathbb{R}^K\\) is defined by the recursion\n\\[\nx_k\n=\n\\left\\{\n\\begin{array}{ll}\ny_1 & \\mbox{if } k = 1, \\mbox{ and}\n\\\\\nx_{k-1} + \\exp(y_k) & \\mbox{if } 1 < k \\leq K.\n\\end{array}\n\\right.\n\\]\n\\(x_k\\) can also be expressed iteratively as\n\\[\nx_k = y_1 + \\sum_{k'=2}^k \\exp(y_{k'}).\n\\]\n\n\n\nThe Jacobian of the inverse transform \\(f^{-1}\\) is lower triangular, with diagonal elements for \\(1 \\leq k \\leq K\\) of\n\\[\nJ_{k,k} =\n\\left\\{\n\\begin{array}{ll}\n1 & \\mbox{if } k = 1, \\mbox{ and}\n\\\\\n\\exp(y_k) & \\mbox{if } 1 < k \\leq K.\n\\end{array}\n\\right.\n\\]\nBecause \\(J\\) is triangular, the absolute Jacobian determinant is\n\\[\n\\left| \\, \\det \\, J \\, \\right|\n\\ = \\\n\\left| \\, \\prod_{k=1}^K J_{k,k} \\, \\right|\n\\ = \\\n\\prod_{k=2}^K \\exp(y_k).\n\\]\nPutting this all together, if \\(p_X\\) is the density of \\(X\\), then the transformed variable \\(Y\\) has density \\(p_Y\\) given by\n\\[\np_Y(y)\n= p_X(f^{-1}(y))\n\\\n\\prod_{k=2}^K \\exp(y_k).\n\\]\n\n\n\n\nPlaceholder.\nThe positive ordered transformation is defined in the same style as the ordered transformation above, but with the first element being exponentiated to ensure positivity.\n\n\n\nStan provides built-in constraint transforms for sum-to-zero vectors and sum-to-zero matrices. The sum-to-zero vector is a vector of length \\(N\\) with real values and the sum of the vector equals zero. The sum-to-zero matrix is an \\(N \\times M\\) matrix where both the rows and columns sum-to-zero.\nStan uses an orthogonal basis as the initial point of construction. The orthogonal basis balances the constraint across each unconstrained value. The basis is a matrix \\(H\\) such that \\(H\\in\\mathbb R^{N\\times (N-1)}\\) and \\(H^{\\mathsf T}H=I_{N-1},\\;H^{\\mathsf T}\\mathbf 1=0\\). The sum-to-zero vector lies in the subspace where the vector sums to zero. Although this seems tautological, the orthogonal basis construction allows all the marginal variances of the contrained vector to be the same, see, e.g.,(Seyboldt 2024). Simpler alternatives, such as setting the final element to the negative sum of the first elements, do not result in equal variances. It is worth noting that even with marginal variances being equal each value in the sum-to-zero constrained space is negatively correlated.\nIn many cases one wishes to model the sum-to-zero vector as normally distributed and the induced covariance matrix is fully known:\n\\[\n \\sigma^2 \\begin{pmatrix}\n 1-\\tfrac{1}{N} & -\\tfrac{1}{N} & \\cdots&-\\tfrac{1}{N} \\\\\n -\\tfrac{1}{N} & 1-\\tfrac{1}{N} & \\cdots & -\\tfrac{1}{N} \\\\\n \\vdots & \\vdots & \\ddots & \\vdots \\\\\n -\\tfrac{1}{N} & -\\tfrac{1}{N} & \\cdots &1-\\tfrac{1}{N}\n \\end{pmatrix}.\n\\]\nThe marginal standard deviation no longer corresponds to \\(\\sigma\\) but is \\(\\sigma \\sqrt{1-\\tfrac{1}{N}}\\). The properties of the normal distribution allow multiplying the sum-to-zero vector by the reciprocal of \\(\\sqrt{1-\\tfrac{1}{N}}\\) to adjust the variance to the intended \\(\\sigma\\):\nsum_to_zero_vector[N] x;\nx ~ normal(0, sqrt(N %/% (N - 1)));\nWhen \\(\\sigma\\) is a parameter there is an additional adjustment when using the centered version of the sum-to-zero constraint. If we let \\(y\\) be the unconstrained \\(N - 1\\) vector and \\(y\\) is implicitly given a standard normal prior, then the sum-to-zero vector distributed as normal with a mean of zero and a standard deviation of \\(\\sigma\\) is given by \\[\nx = \\underbrace{\\sigma\\sqrt{\\frac{N}{N-1}}\\;H}_{\\text{ size of } N \\times (N - 1)}\\,y.\n\\] This is the classic non-centered model. The crucial detail from above is that the operation of multiplying \\(\\sigma\\) to \\(y\\) is on \\(N - 1\\) dimensions, not \\(N\\) dimensions. When writing the centered model using \\[\nx \\sim \\mathcal N \\bigg(0,\\, \\sigma\\sqrt{\\frac{N}{N-1}} \\bigg),\n\\] we are incrementing the log density by an additional \\(-\\log(\\sigma)\\) and so must increment the log density by\ntarget += log(sigma * sqrt(N * inv(N - 1)));\n\n// or\n// because `sqrt(N * inv(N - 1))` is constant\n\ntarget += log(sigma);\nwhich correctly adjusts for the \\(N - 1\\) free variables.\nMore details of the Helmert matrix are in (Lancaster 1965), for the basic definitions of the isometric log ratio transform see (Egozcue et al. 2003) and Chapter 3 of (Filzmoser, Hron, and Templ 2018) for the pivot coordinate version used here.\n\n\nVectors that are constrained to sum-to-zero are useful for, among other things, additive varying effects, such as varying slopes or intercepts in a regression model (e.g., for income deciles).\nA sum-to-zero \\(K\\)-vector \\(x \\in \\mathbb{R}^K\\) satisfies the constraint \\[\n\\sum_{k=1}^K x_k = 0.\n\\]\n\n\n\nThe transform is defined iteratively. Given an \\(x \\in \\mathbb{R}^{N + 1}\\) that sums to zero (i.e., \\(\\sum_{n=1}^{N+1} x_n = 0\\)), the transform proceeds as follows to produce an unconstrained \\(y \\in \\mathbb{R}^N\\). This is mathematically equivalent to pre-multiplying the unconstrained \\(y\\) by an orthogonal standard basis, e.g. constructing orthogonal vectors from the standard basis using the Gram-Schmidt process. The single loop version below achieves low computational and memory costs as no matrices are created or multplied.\nThe transform is initialized by setting \\[\nS_N = 0\n\\] and \\[\ny_N = -x_{N + 1} \\sqrt{1 + \\frac{1}{N}}.\n\\] The for each \\(n\\) from \\(N - 1\\) down to \\(1\\), let \\[\nw_{n + 1} = \\frac{y_{n + 1}}{\\sqrt{(n + 1)(n + 2)}},\n\\] \\[\nS_n = S_{n + 1} + w_{n + 1},\n\\] and \\[\ny_n = (S_n - x_{n + 1}) \\frac{\\sqrt{n (n + 1)}}{n}.\n\\]\nThis transform is expressed in Stan code as:\n vector manual_sum_to_zero_jacobian(vector x) {\n int N = size(x) - 1;\n vector[N] y;\n y[N] = -x[N+1] * sqrt(1 + 1. / N);\n real sum_w = 0;\n for (n in 1:(N-1)) {\n int i = N - n;\n int i_p_1 = i + 1;\n real w = y[i_p_1] * inv_sqrt(i_p_1 * (i_p_1 + 1));\n sum_w += w;\n y[i] = (sum_w - x[i_p_1]) * sqrt(i_p_1 * i) / i;\n }\n return y;\n }\n\n\n\nThe inverse transform follows the isometric logratio tranform. It maps an unconstrained vector \\(y \\in \\mathbb{R}^N\\) to a sum-to-zero vector \\(x \\in \\mathbb{R}^{N + 1}\\) such that \\[\n\\sum_{n=1}^{N + 1} x_n = 0.\n\\] The values are defined inductively, starting with \\[\nx_1 = \\sum_{n=1}^N \\frac{y_n}{\\sqrt{n (n + 1)}}\n\\] and then setting \\[\nx_{n + 1} = \\sum_{i = n + 1}^N \\frac{y_i}{\\sqrt{i (i + 1)}}\n- n \\cdot \\frac{y_n}{\\sqrt{n (n + 1)}}.\n\\] for \\(n \\in 1{:}N\\).\nThe definition is such that \\[\n\\sum_{n = 1}^{N + 1} x_n = 0\n\\] by construction, because each of the terms added to \\(x_{n}\\) is then subtracted from \\(x_{n + 1}\\) the number of times it shows up in earlier terms.\n\n\n\nThe inverse transform is a linear operation, leading to a constant Jacobian determinant which is therefore not included.\nThe sum-to-zero inverse transform is expressed within Stan as:\n vector manual_sum_to_zero_inverse_jacobian(vector y) {\n int N = num_elements(y);\n vector[N + 1] x = zeros_vector(N + 1);\n real sum_w = 0;\n for (n in 1:N) {\n int i = N - n + 1;\n real w = y[i] * inv_sqrt(i * (i + 1));\n sum_w += w;\n x[i] += sum_w;\n x[i + 1] -= w * i;\n }\n return x;\n }\nNote that there is no target += increment because the Jacobian is zero.\n\n\n\nThe matrix case of the sum-to-zero transform generalizes the vector case to ensure that every row of the matrix sums to zero and every column the matrix sums to zero. In fact, any \\(N\\)-dimensional array can be constructed into a sum-to-zero N-dimensional array using the sum-to-zero vector. This is because the vector transform is a linear bijection and produces an orthogonally constructed sum-to-zero object by applying the one dimensional transform across each array slice. For the matrix case, there are two slices present, the rows and columns, to perform the transform over. The sum-to-zero vector is applied over the vectorized slice of either the row or column slice and subsequently to the other slice.\nLet the unconstrained matrix be \\[\n\\mathcal Y \\in \\mathbb R^{n_1 \\times n_2}\n\\] and the zero sum vector transform as \\[\n\\mathbf z = \\mathcal C_n(\\mathbf y)\\;=\\;\n\\begin{bmatrix}H_n\\\\[2pt]-\\mathbf 1_{1\\times d}\\end{bmatrix}\\mathbf y\n\\;\\in\\mathbb R^{n+1},\n\\] where \\(H_n\\in\\mathbb R^{n \\times n}\\) is the orthogonal Helmert matrix and satisfies \\(\\mathbf 1_{1\\times d}A_d^{\\!\\top}=0\\).\nApplying \\(C_n\\) to each slice results in\n\\[\n\\mathcal Z \\;=\\;\n\\mathcal X\n\\times_1 \\bigl[\\mathcal C_{n_1}\\bigr]\n\\times_2 \\bigl[\\mathcal C_{n_2}\\bigr]\n\\] where \\[\n\\mathcal Z \\in\n\\mathbb R^{(n_1+1)\\times\\cdots\\times(n_2+1)}.\n\\]\nBecause each \\(\\mathbb R^{d_1\\times\\cdots\\times d_N}\\) is invertible on the \\(\\mathbf 1^\\perp\\) subspace the composite map applied to \\(\\mathcal Z\\) is a linear bijection between \\(\\mathbb R^{n_1\\times n_2}\\) and the codomain \\(\\mathbb R^{(n_1+1)\\times\\cdots\\times(n_2+1)}\\).\n\n\n\n\nVariables constrained to the unit simplex show up in multivariate discrete models as both parameters (categorical and multinomial) and as variates generated by their priors (Dirichlet and multivariate logistic).\nThe unit \\(K\\)-simplex is the set of points \\(x \\in \\mathbb{R}^K\\) such that for \\(1 \\leq k \\leq K\\),\n\\[\nx_k > 0,\n\\]\nand\n\\[\n\\sum_{k=1}^K x_k = 1.\n\\]\nAn alternative definition is to take the convex closure of the vertices. For instance, in 2-dimensions, the simplex vertices are the extreme values \\((0,1)\\), and \\((1,0)\\) and the unit 2-simplex is the line connecting these two points; values such as \\((0.3,0.7)\\) and \\((0.99,0.01)\\) lie on the line. In 3-dimensions, the basis is \\((0,0,1)\\), \\((0,1,0)\\) and \\((1,0,0)\\) and the unit 3-simplex is the boundary and interior of the triangle with these vertices. Points in the 3-simplex include \\((0.5,0.5,0)\\), \\((0.2,0.7,0.1)\\) and all other triplets of non-negative values summing to 1.\nAs these examples illustrate, the simplex always picks out a subspace of \\(K-1\\) dimensions from \\(\\mathbb{R}^K\\). Therefore a point \\(x\\) in the \\(K\\)-simplex is fully determined by its first \\(K-1\\) elements \\(x_1, x_2,\n\\ldots, x_{K-1}\\), with\n\\[\nx_K = 1 - \\sum_{k=1}^{K-1} x_k.\n\\]\n\n\nThe length-\\(K\\) unit simplex inverse transform is given by the softmax of a sum-to-zero vector of length \\(K\\).\nLet \\(y\\) represent the unconstrained \\(K - 1\\) values in \\((-\\infty, \\infty)\\). The intermediate sum-to-zero vector \\(z = \\text{sum\\_to\\_zero\\_transform}(y)\\) is length \\(K\\). The unit simplex is then given by \\[\nx_i = \\text{softmax}(z) = \\frac{\\exp(z_i)}{\\sum_{i = 1}^K \\exp(z_i)}\n\\]\nThe sum-to-zero vector transform is described in further detail at the sum-to-zero vector section of the Reference Manual.\n\n\n\n\n\n\nNote\n\n\n\nAll versions of Stan pre-2.37 used the stick-breaking transform. This is documented at Stan 2.36 Reference Manual: Simplex Transform.\n\n\n\n\nThe Jacobian \\(J\\) of the inverse unit-simplex transform is found by restricting \\(J\\) to the subspace spanned by the sum-to-zero vector \\(z\\). The Jacobian is given as the \\((K - 1) \\times (K - 1)\\) matrix \\(J\\) where\n\\[\nJ_{ij} = \\frac{\\partial x_i}{\\partial z_j} =\n\\frac{\\partial}{\\partial z_i} \\left( \\frac{\\exp(z_i)}{{\\sum_{i = 1}^K \\exp(z_i)}} \\right)\n\\] and \\(i,j \\in 1, \\ldots, K - 1\\).\nThe diagonal and off-diagonal derivatives are found using the derivative quotient rule and algebraic simplification\n\\[\nJ_{ij} =\n\\begin{cases}\nx_i (1 - x_i), & \\text{if } i = j, \\\\\n-x_i x_j, & \\text{if } i \\neq j.\n\\end{cases}\n\\]\nIn matrix form this can be expressed as\n\\[\nJ = \\text{diag}(x) - x x^\\top\n\\]\nThe determinant of this matrix can be found using the Matrix Determinant Lemma:\n\\[\n\\det\\bigl(A + u v^{\\top}\\bigr)\n=\n\\det(A)\\,\\bigl(1 + v^{\\top}A^{-1}u\\bigr).\n\\]\nHere,\n\\[\nA \\;=\\; \\operatorname{diag}(x_{1},\\ldots, x_{K-1}),\n\\quad\nu \\;=\\; -\\bigl(x_1,\\ldots, x_{K-1}\\bigr)^{\\!\\top},\n\\quad\nv \\;=\\; \\bigl(x_{1}, \\ldots, x_{K-1}\\bigr)^{\\!\\top}.\n\\] Therefore,\n\\[\n\\begin{aligned}\n\\det(J)\n&=\n\\bigg(\\prod_{i=1}^{K-1} x_i \\bigg)\n\\bigg(1 + (x_{1},\\ldots, x_{K-1})\\,\\mathrm{diag}\\bigl(x_{1}^{-1},\\ldots,x_{K-1}^{-1}\\bigr)\\,\n\\big(-x_{1},\\ldots,-x_{K-1}\\big)^{\\top}\n\\bigg) \\\\\n&=\n\\bigg(\\prod_{i=1}^{K-1} x_{i}\\bigg)\n\\bigg(1 - \\sum_{i=1}^{K-1} x_{i}\\bigg)\n=\n\\bigg(\\prod_{i=1}^{K-1} x_{i}\\bigg) x_{K} \\\\\n&=\n\\prod_{i=1}^{K} x_{i}.\n\\end{aligned}\n\\]\n\n\n\n\nThe transform \\(Y = f(X)\\) can be derived by reversing the stages of the inverse transform,\n\\[\ny_k\n= H^\\top \\bigg(\\log(x_k)\n- \\frac{1}{K}\\sum_{i=1}^K\\log(x_i) \\bigg)\n.\n\\]\nThe matrix \\(H\\) is the orthogonal basis matrix the sum-to-zero vector uses. Since the matrix is orthonormal, the transpose is the same as the inverse.\n\n\n\n\nThe column_stochastic_matrix[N, M] and row_stochastic_matrix[M, N] type in Stan represents an \\(N \\times M\\) matrix where each column (row) is a unit simplex of dimension \\(N\\). In other words, each column (row) of the matrix is a vector constrained to have non-negative entries that sum to one.\n\n\nA column stochastic matrix \\(X \\in \\mathbb{R}^{N \\times M}\\) is defined such that each column is a simplex. For column \\(m\\) (where \\(1 \\leq m \\leq M\\)):\n\\[\nX_{n, m} \\geq 0 \\quad \\text{for } 1 \\leq n \\leq N,\n\\]\nand\n\\[\n\\sum_{n=1}^N X_{n, m} = 1.\n\\]\nA row stochastic matrix is any matrix whose transpose is a column stochastic matrix (i.e. the rows of the matrix are simplexes)\n\\[\nX_{n, m} \\geq 0 \\quad \\text{for } 1 \\leq n \\leq N,\n\\]\nand\n\\[\n\\sum_{m=1}^N X_{n, m} = 1.\n\\]\nThis definition ensures that each column (row) of the matrix \\(X\\) lies on the \\(N-1\\) dimensional unit simplex, similar to the simplex[N] type, but extended across multiple columns(rows).\n\n\n\nFor the column and row stochastic matrices the inverse transform is the same as simplex, but applied to each column (row).\n\n\n\nThe Jacobian determinant of the inverse transform for each column \\(m\\) in the matrix is given by the product of the diagonal entries \\(J_{n, m}\\) of the lower-triangular Jacobian matrix. This determinant is calculated as:\n\\[\n\\left| \\det J_m \\right| = \\prod_{n=1}^{N-1} \\left( z_{n, m} (1 - z_{n, m}) \\left( 1 - \\sum_{n'=1}^{n-1} X_{n', m} \\right) \\right).\n\\]\nThus, the overall Jacobian determinant for the entire column_stochastic_matrix and row_stochastic_matrix is the product of the determinants for each column (row):\n\\[\n\\left| \\det J \\right| = \\prod_{m=1}^{M} \\left| \\det J_m \\right|.\n\\]\n\n\n\nFor the column and row stochastic matrices the transform is the same as simplex, but applied to each column (row).\n\n\n\n\nAn \\(n\\)-dimensional vector \\(x \\in \\mathbb{R}^n\\) is said to be a unit vector if it has unit Euclidean length, so that\n\\[\n\\Vert x \\Vert\n\\ = \\ \\sqrt{x^{\\top}\\,x}\n\\ = \\ \\sqrt{x_1^2 + x_2^2 + \\cdots + x_n^2}\n\\ = \\ 1\\ .\n\\]\n\n\nStan divides an unconstrained vector \\(y \\in \\mathbb{R}^{n}\\) by its norm, \\(\\Vert y \\Vert = \\sqrt{y^\\top y}\\), to obtain a unit vector \\(x\\),\n\\[\nx = \\frac{y}{\\Vert y \\Vert}.\n\\]\nTo generate a unit vector, Stan generates points at random in \\(\\mathbb{R}^n\\) with independent unit normal distributions, which are then standardized by dividing by their Euclidean length. Muller (1959) showed this generates points uniformly at random on \\(S^{n-1}\\). That is, if we draw \\(y_n \\sim \\mathsf{Normal}(0, 1)\\) for \\(n \\in 1{:}n\\), then \\(x = \\frac{y}{\\Vert y \\Vert}\\) has a uniform distribution over \\(S^{n-1}\\). This allows us to use an \\(n\\)-dimensional basis for \\(S^{n-1}\\) that preserves local neighborhoods in that points that are close to each other in \\(\\mathbb{R}^n\\) map to points near each other in \\(S^{n-1}\\). The mapping is not perfectly distance preserving, because there are points arbitrarily far away from each other in \\(\\mathbb{R}^n\\) that map to identical points in \\(S^{n-1}\\).\n\n\nThe above mapping from \\(\\mathbb{R}^n\\) to \\(S^n\\) is not defined at zero. While this point outcome has measure zero during sampling, and may thus be ignored, it is the default initialization point and thus unit vector parameters cannot be initialized at zero. A simple workaround is to initialize from a very small interval around zero, which is an option built into all of the Stan interfaces.\n\n\n\n\nThe Jacobian matrix relating the input vector \\(y\\) to the output vector \\(x\\) is singular because \\(x^\\top x = 1\\) for any non-zero input vector \\(y\\). Thus, there technically is no unique transformation from \\(x\\) to \\(y\\). To circumvent this issue, let \\(r = \\sqrt{y^\\top y}\\) so that \\(y = r\nx\\). The transformation from \\(\\left(r, x_{-n}\\right)\\) to \\(y\\) is well-defined but \\(r\\) is arbitrary, so we set \\(r = 1\\). In this case, the determinant of the Jacobian is proportional to \\(e^{-\\frac{1}{2} y^\\top y}\\), which is the kernel of a standard multivariate normal distribution with \\(n\\) independent dimensions.\n\n\n\n\nA \\(K \\times K\\) correlation matrix \\(x\\) must be symmetric, so that\n\\[\nx_{k,k'} = x_{k',k}\n\\]\nfor all \\(k,k' \\in \\{ 1, \\ldots, K \\}\\), it must have a unit diagonal, so that\n\\[\nx_{k,k} = 1\n\\]\nfor all \\(k \\in \\{ 1, \\ldots, K \\}\\), and it must be positive definite, so that for every non-zero \\(K\\)-vector \\(a\\),\n\\[\na^{\\top} x a > 0.\n\\]\nThe number of free parameters required to specify a \\(K \\times K\\) correlation matrix is \\(\\binom{K}{2}\\).\nThere is more than one way to map from \\(\\binom{K}{2}\\) unconstrained parameters to a \\(K \\times K\\) correlation matrix. Stan implements the Lewandowski-Kurowicka-Joe (LKJ) transform Lewandowski, Kurowicka, and Joe (2009).\n\n\nIt is easiest to specify the inverse, going from its \\(\\binom{K}{2}\\) parameter basis to a correlation matrix. The basis will actually be broken down into two steps. To start, suppose \\(y\\) is a vector containing \\(\\binom{K}{2}\\) unconstrained values. These are first transformed via the bijective function \\(\\tanh : \\mathbb{R} \\rightarrow\n(-1, 1)\\)\n\\[\n\\tanh y = \\frac{\\exp(2y) - 1}{\\exp(2y) + 1}.\n\\]\nThen, define a \\(K \\times K\\) matrix \\(z\\), the upper triangular values of which are filled by row with the transformed values, and the diagonal entries are set to one. For example, in the \\(4 \\times 4\\) case, there are \\(\\binom{4}{2}\\) values arranged as\n\\[\nz\n=\n\\left[\n\\begin{array}{cccc}\n1 & \\tanh y_1 & \\tanh y_2 & \\tanh y_4\n\\\\\n0 & 1 & \\tanh y_3 & \\tanh y_5\n\\\\\n0 & 0 & 1 & \\tanh y_6\n\\\\\n0 & 0 & 0 & 1\n\\end{array}\n\\right]\n.\n\\]\nLewandowski, Kurowicka and Joe (LKJ) show how to bijectively map the array \\(z\\) to a correlation matrix \\(x\\). The entry \\(z_{i,j}\\) for \\(i <\nj\\) is interpreted as the canonical partial correlation (CPC) between \\(i\\) and \\(j\\), which is the correlation between \\(i\\)’s residuals and \\(j\\)’s residuals when both \\(i\\) and \\(j\\) are regressed on all variables \\(i'\\) such that \\(i'< i\\). In the case of \\(i=1\\), there are no earlier variables, so \\(z_{1,j}\\) is just the Pearson correlation between \\(i\\) and \\(j\\).\nIn Stan, the LKJ transform is reformulated in terms of a Cholesky factor \\(w\\) of the final correlation matrix, defined for \\(1 \\leq i,j \\leq K\\) by\n\\[\nw_{i,j} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } i > j,\n\\\\\n1 & \\mbox{if } 1 = i = j,\n\\\\\n\\prod_{i'=1}^{i - 1} \\left( 1 - z_{i'\\!,\\,j}^2 \\right)^{1/2}\n& \\mbox{if } 1 < i = j,\n\\\\\nz_{i,j} & \\mbox{if } 1 = i < j, \\mbox{ and}\n\\\\\\\nz_{i,j} \\, \\prod_{i'=1}^{i-1} \\left( 1 - z_{i'\\!,\\,j}^2 \\right)^{1/2}\n& \\mbox{ if } 1 < i < j.\n\\end{array}\n\\right.\n\\]\nThis does not require as much computation per matrix entry as it may appear; calculating the rows in terms of earlier rows yields the more manageable expression\n\\[\nw_{i,j} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } i > j,\n\\\\\n1 & \\mbox{if } 1 = i = j,\n\\\\\nz_{i,j} & \\mbox{if } 1 = i < j, \\mbox{ and}\n\\\\\n\\frac{z_{i,j}}{z_{i-1,j}} \\ w_{i-1,j} \\left( 1 - z_{i-1,j}^2 \\right)^{1/2}\n& \\mbox{ if } 1 < i \\leq j.\n\\end{array}\n\\right.\n\\]\nGiven the upper-triangular Cholesky factor \\(w\\), the final correlation matrix is\n\\[\nx = w^{\\top} w.\n\\]\nLewandowski, Kurowicka, and Joe (2009) show that the determinant of the correlation matrix can be defined in terms of the canonical partial correlations as\n\\[\n\\mbox{det} \\, x = \\prod_{i=1}^{K-1} \\ \\prod_{j=i+1}^K \\ (1 - z_{i,j}^2)\n= \\prod_{1 \\leq i < j \\leq K} (1 - z_{i,j}^2),\n\\]\n\n\n\nFrom the inverse of equation 11 in (Lewandowski, Kurowicka, and Joe 2009), the absolute Jacobian determinant is\n\\[\n\\sqrt{\\prod_{i=1}^{K-1}\\prod_{j=i+1}^K \\left(1-z_{i,j}^2\\right)^{K-i-1}} \\\n\\times \\prod_{i=1}^{K-1}\\prod_{j=i+1}^K\n\\frac{\\partial z_{i,j}}{\\partial y_{i,j}}\n\\]\n\n\n\nThe correlation transform is defined by reversing the steps of the inverse transform defined in the previous section.\nStarting with a correlation matrix \\(x\\), the first step is to find the unique upper triangular \\(w\\) such that \\(x = w w^{\\top}\\). Because \\(x\\) is positive definite, this can be done by applying the Cholesky decomposition,\n\\[\nw = \\mbox{chol}(x).\n\\]\nThe next step from the Cholesky factor \\(w\\) back to the array \\(z\\) of canonical partial correlations (CPCs) is simplified by the ordering of the elements in the definition of \\(w\\), which when inverted yields\n\\[\nz_{i,j} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } i \\leq j,\n\\\\\nw_{i,j} & \\mbox{if } 1 = i < j, \\mbox{ and}\n\\\\\n{w_{i,j}}\n\\\n\\prod_{i'=1}^{i-1} \\left( 1 - z_{i'\\!,j}^2 \\right)^{-1/2}\n& \\mbox{if } 1 < i < j.\n\\end{array}\n\\right.\n\\]\nThe final stage of the transform reverses the hyperbolic tangent transform, which is defined by\n\\[\ny = \\tanh^{-1} z = \\frac{1}{2} \\log \\left( \\frac{1 + z}{1 - z} \\right).\n\\]\nThe inverse hyperbolic tangent function, \\(\\tanh^{-1}\\), is also called the Fisher transformation.\n\n\n\n\nA \\(K \\times K\\) matrix is a covariance matrix if it is symmetric and positive definite (see the previous section for definitions). It requires \\(K + \\binom{K}{2}\\) free parameters to specify a \\(K \\times K\\) covariance matrix.\n\n\nStan’s covariance transform is based on a Cholesky decomposition composed with a log transform of the positive-constrained diagonal elements.1\nIf \\(x\\) is a covariance matrix (i.e., a symmetric, positive definite matrix), then there is a unique lower-triangular matrix \\(z =\n\\mathrm{chol}(x)\\) with positive diagonal entries, called a Cholesky factor, such that\n\\[\nx = z \\, z^{\\top}.\n\\]\nThe off-diagonal entries of the Cholesky factor \\(z\\) are unconstrained, but the diagonal entries \\(z_{k,k}\\) must be positive for \\(1 \\leq k\n\\leq K\\).\nTo complete the transform, the diagonal is log-transformed to produce a fully unconstrained lower-triangular matrix \\(y\\) defined by\n\\[\ny_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\log z_{m,m} & \\mbox{if } m = n, \\mbox{ and}\n\\\\\nz_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe inverse transform reverses the two steps of the transform. Given an unconstrained lower-triangular \\(K \\times K\\) matrix \\(y\\), the first step is to recover the intermediate matrix \\(z\\) by reversing the log transform,\n\\[\nz_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\exp(y_{m,m}) & \\mbox{if } m = n, \\mbox{ and}\n\\\\\ny_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\nThe covariance matrix \\(x\\) is recovered from its Cholesky factor \\(z\\) by taking\n\\[\nx = z \\, z^{\\top}.\n\\]\n\n\n\nThe Jacobian is the product of the Jacobians of the exponential transform from the unconstrained lower-triangular matrix \\(y\\) to matrix \\(z\\) with positive diagonals and the product transform from the Cholesky factor \\(z\\) to \\(x\\).\nThe transform from unconstrained \\(y\\) to Cholesky factor \\(z\\) has a diagonal Jacobian matrix, the absolute determinant of which is thus\n\\[\n\\prod_{k=1}^K \\frac{\\partial}{\\partial_{y_{k,k}}} \\, \\exp(y_{k,k})\n\\ = \\\n\\prod_{k=1}^K \\exp(y_{k,k})\n\\ = \\\n\\prod_{k=1}^K z_{k,k}.\n\\]\nThe Jacobian matrix of the second transform from the Cholesky factor \\(z\\) to the covariance matrix \\(x\\) is also triangular, with diagonal entries corresponding to pairs \\((m,n)\\) with \\(m \\geq n\\), defined by\n\\[\n\\frac{\\partial}{\\partial z_{m,n}}\n\\left( z \\, z^{\\top} \\right)_{m,n}\n\\ = \\\n\\frac{\\partial}{\\partial z_{m,n}}\n\\left( \\sum_{k=1}^K z_{m,k} \\, z_{n,k} \\right)\n\\ = \\\n\\left\\{\n\\begin{array}{cl}\n2 \\, z_{n,n} & \\mbox{if } m = n \\mbox{ and }\n\\\\\nz_{n,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\nThe absolute Jacobian determinant of the second transform is thus\n\\[\n2^{K} \\ \\prod_{m = 1}^{K} \\ \\prod_{n=1}^{m} z_{n,n}\n\\ = \\\n\\prod_{n=1}^K \\ \\prod_{m=n}^K z_{n,n}\n\\ = \\\n2^{K} \\ \\prod_{k=1}^K z_{k,k}^{K - k + 1}.\n\\]\nFinally, the full absolute Jacobian determinant of the inverse of the covariance matrix transform from the unconstrained lower-triangular \\(y\\) to a symmetric, positive definite matrix \\(x\\) is the product of the Jacobian determinants of the exponentiation and product transforms,\n\\[\n\\left( \\prod_{k=1}^K z_{k,k} \\right)\n\\left(\n2^{K} \\ \\prod_{k=1}^K z_{k,k}^{K - k + 1}\n\\right)\n\\ = \\\n2^K\n\\, \\prod_{k=1}^K z_{k,k}^{K-k+2}.\n\\]\nLet \\(f^{-1}\\) be the inverse transform from a \\(K + \\binom{K}{2}\\)-vector \\(y\\) to the \\(K \\times K\\) covariance matrix \\(x\\). A density function \\(p_X(x)\\) defined on \\(K \\times K\\) covariance matrices is transformed to the density \\(p_Y(y)\\) over \\(K + \\binom{K}{2}\\) vectors \\(y\\) by\n\\[\np_Y(y) = p_X(f^{-1}(y)) \\ 2^K \\ \\prod_{k=1}^K z_{k,k}^{K-k+2}.\n\\]\n\n\n\n\nAn \\(M \\times M\\) covariance matrix \\(\\Sigma\\) can be Cholesky factored to a lower triangular matrix \\(L\\) such that \\(L\\,L^{\\top} = \\Sigma\\). If \\(\\Sigma\\) is positive definite, then \\(L\\) will be \\(M \\times M\\). If \\(\\Sigma\\) is only positive semi-definite, then \\(L\\) will be \\(M \\times N\\), with \\(N < M\\).\nA matrix is a Cholesky factor for a covariance matrix if and only if it is lower triangular, the diagonal entries are positive, and \\(M \\geq\nN\\). A matrix satisfying these conditions ensures that \\(L \\,\nL^{\\top}\\) is positive semi-definite if \\(M > N\\) and positive definite if \\(M = N\\).\nA Cholesky factor of a covariance matrix requires \\(N + \\binom{N}{2} +\n(M - N)N\\) unconstrained parameters.\n\n\nStan’s Cholesky factor transform only requires the first step of the covariance matrix transform, namely log transforming the positive diagonal elements. Suppose \\(x\\) is an \\(M \\times N\\) Cholesky factor. The above-diagonal entries are zero, the diagonal entries are positive, and the below-diagonal entries are unconstrained. The transform required is thus\n\\[\ny_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\log x_{m,m} & \\mbox{if } m = n, \\mbox{ and}\n\\\\\nx_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe inverse transform need only invert the logarithm with an exponentiation. If \\(y\\) is the unconstrained matrix representation, then the elements of the constrained matrix \\(x\\) is defined by\n\\[\nx_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\exp(y_{m,m}) & \\mbox{if } m = n, \\mbox{ and}\n\\\\\ny_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe transform has a diagonal Jacobian matrix, the absolute determinant of which is\n\\[\n\\prod_{n=1}^N \\frac{\\partial}{\\partial_{y_{n,n}}} \\, \\exp(y_{n,n})\n\\ = \\\n\\prod_{n=1}^N \\exp(y_{n,n})\n\\ = \\\n\\prod_{n=1}^N x_{n,n}.\n\\]\nLet \\(x = f^{-1}(y)\\) be the inverse transform from a \\(N + \\binom{N}{2}\n+ (M - N)N\\) vector to an \\(M \\times N\\) Cholesky factor for a covariance matrix \\(x\\) defined in the previous section. A density function \\(p_X(x)\\) defined on \\(M \\times N\\) Cholesky factors of covariance matrices is transformed to the density \\(p_Y(y)\\) over \\(N + \\binom{N}{2}\n+ (M - N)N\\) vectors \\(y\\) by\n\\[\np_Y(y) = p_X(f^{-1}(y)) \\prod_{N=1}^N x_{n,n}.\n\\]\n\n\n\n\nA \\(K \\times K\\) correlation matrix \\(\\Omega\\) is positive definite and has a unit diagonal. Because it is positive definite, it can be Cholesky factored to a \\(K \\times K\\) lower-triangular matrix \\(L\\) with positive diagonal elements such that \\(\\Omega = L\\,L^{\\top}\\). Because the correlation matrix has a unit diagonal,\n\\[\n\\Omega_{k,k} = L_k\\,L_k^{\\top} = 1,\n\\]\neach row vector \\(L_k\\) of the Cholesky factor is of unit length. The length and positivity constraint allow the diagonal elements of \\(L\\) to be calculated from the off-diagonal elements, so that a Cholesky factor for a \\(K \\times K\\) correlation matrix requires only \\(\\binom{K}{2}\\) unconstrained parameters.\n\n\nIt is easiest to start with the inverse transform from the \\(\\binom{K}{2}\\) unconstrained parameters \\(y\\) to the \\(K \\times K\\) lower-triangular Cholesky factor \\(x\\). The inverse transform is based on the hyperbolic tangent function, \\(\\tanh\\), which satisfies \\(\\tanh(x) \\in (-1,1)\\). Here it will function like an inverse logit with a sign to pick out the direction of an underlying canonical partial correlation; see the section on correlation matrix transforms for more information on the relation between canonical partial correlations and the Cholesky factors of correlation matrices.\nSuppose \\(y\\) is a vector of \\(\\binom{K}{2}\\) unconstrained values. Let \\(z\\) be a lower-triangular matrix with zero diagonal and below diagonal entries filled by row. For example, in the \\(3 \\times 3\\) case,\n\\[\nz =\n\\left[\n\\begin{array}{ccc}\n0 & 0 & 0\n\\\\\n\\tanh y_1 & 0 & 0\n\\\\\n\\tanh y_2 & \\tanh y_3 & 0\n\\end{array}\n\\right]\n\\]\nThe matrix \\(z\\), with entries in the range \\((-1, 1)\\), is then transformed to the Cholesky factor \\(x\\), by taking2\n\\[\nx_{i,j}\n=\n\\left\\{\n\\begin{array}{lll}\n0 & \\mbox{ if } i < j & \\mbox{ [above diagonal]}\n\\\\\n\\sqrt{1 - \\sum_{j' < j} x_{i,j'}^2}\n & \\mbox{ if } i = j & \\mbox{ [on diagonal]}\n\\\\\nz_{i,j} \\ \\sqrt{1 - \\sum_{j' < j} x_{i,j'}^2}\n & \\mbox{ if } i > j & \\mbox{ [below diagonal]}\n\\end{array}\n\\right.\n\\]\nIn the \\(3 \\times 3\\) case, this yields\n\\[\nx =\n\\left[\n\\begin{array}{ccc}\n1 & 0 & 0\n\\\\\nz_{2,1} & \\sqrt{1 - x_{2,1}^2} & 0\n\\\\\nz_{3,1} & z_{3,2} \\sqrt{1 - x_{3,1}^2}\n & \\sqrt{1 - (x_{3,1}^2 + x_{3,2}^2)}\n\\end{array}\n\\right],\n\\]\nwhere the \\(z_{i,j} \\in (-1,1)\\) are the \\(\\tanh\\)-transformed \\(y\\).\nThe approach is a signed stick-breaking process on the quadratic (Euclidean length) scale. Starting from length 1 at \\(j=1\\), each below-diagonal entry \\(x_{i,j}\\) is determined by the (signed) fraction \\(z_{i,j}\\) of the remaining length for the row that it consumes. The diagonal entries \\(x_{i,i}\\) get any leftover length from earlier entries in their row. The above-diagonal entries are zero.\n\n\n\nSuppose \\(x\\) is a \\(K \\times K\\) Cholesky factor for some correlation matrix. The first step of the transform reconstructs the intermediate values \\(z\\) from \\(x\\),\n\\[\nz_{i,j} = \\frac{x_{i,j}}{\\sqrt{1 - \\sum_{j' < j}x_{i,j'}^2}}.\n\\]\nThe mapping from the resulting \\(z\\) to \\(y\\) inverts \\(\\tanh\\),\n\\[\ny\n\\ = \\\n\\tanh^{-1} z\n\\ = \\\n\\frac{1}{2} \\left( \\log (1 + z) - \\log (1 - z) \\right).\n\\]\n\n\n\nThe Jacobian of the full transform is the product of the Jacobians of its component transforms.\nFirst, for the inverse transform \\(z = \\tanh y\\), the derivative is\n\\[\n\\frac{d}{dy} \\tanh y = \\frac{1}{(\\cosh y)^2}.\n\\]\nSecond, for the inverse transform of \\(z\\) to \\(x\\), the resulting Jacobian matrix \\(J\\) is of dimension \\(\\binom{K}{2} \\times\n\\binom{K}{2}\\), with indexes \\((i,j)\\) for \\((i > j)\\). The Jacobian matrix is lower triangular, so that its determinant is the product of its diagonal entries, of which there is one for each \\((i,j)\\) pair,\n\\[\n\\left| \\, \\mbox{det} \\, J \\, \\right|\n \\ = \\ \\prod_{i > j} \\left| \\frac{d}{dz_{i,j}} x_{i,j} \\right|,\n\\]\nwhere\n\\[\n\\frac{d}{dz_{i,j}} x_{i,j}\n= \\sqrt{1 - \\sum_{j' < j} x^2_{i,j'}}.\n\\]\nSo the combined density for unconstrained \\(y\\) is\n\\[\np_Y(y)\n= p_X(f^{-1}(y))\n \\ \\\n \\prod_{n < \\binom{K}{2}} \\frac{1}{(\\cosh y)^2}\n \\ \\\n \\prod_{i > j} \\left( 1 - \\sum_{j' < j} x_{i,j'}^2\n \\right)^{1/2},\n\\]\nwhere \\(x = f^{-1}(y)\\) is used for notational convenience. The log Jacobian determinant of the complete inverse transform \\(x = f^{-1}(y)\\) is given by\n\\[\n\\log \\left| \\, \\det J \\, \\right|\n=\n-2 \\sum_{n \\leq \\binom{K}{2}}\n\\log \\cosh y\n\\\n+\n\\\n\\frac{1}{2} \\\n\\sum_{i > j}\n\\log \\left( 1 - \\sum_{j' < j} x_{i,j'}^2 \\right)\n.\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#limitations-due-to-finite-accuracy-presentation", + "href": "reference-manual/transforms.html#limitations-due-to-finite-accuracy-presentation", + "title": "Constraint Transforms", + "section": "", + "text": "In this section the transformations are described mathematically. However, observed behavior can be different from the exact arithmetic.\nStan’s arithmetic is implemented using double-precision floating-point numbers, which may cause computation to behave differently than mathematics. For example, the lower bound constraint is defined above by an exponential inverse transform which mathematically excludes the lower bound, but if the closest floating-point number for the inverse transformed value is the boundary, then the value is rounded to the boundary. This may cause unexpected warnings or errors, if in other parts of the code the boundary value is invalid. For example, we may observe floating-point value 0 for a variance parameter that has been declared with lower=0. In general, double-precision floating-point numbers cannot reliably store more than 16 digits of a number in decimal. See more about floating point arithmetic in the Stan User’s Guide.\nThese issues are exacerbated by the fact that CmdStan stores the output to CSV files with 8 digits precision by default. More digits can be requested by the user at the cost of additional disk usage, as discussed in the CmdStan Command-Line Interface Overview.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#change-of-variables.section", + "href": "reference-manual/transforms.html#change-of-variables.section", + "title": "Constraint Transforms", + "section": "", + "text": "The support of a random variable \\(X\\) with density \\(p_X(x)\\) is that subset of values for which it has non-zero density,\n\\[\n\\mathrm{supp}(X) = \\{ x | p_X(x) > 0 \\}.\n\\]\nIf \\(f\\) is a total function defined on the support of \\(X\\), then \\(Y =\nf(X)\\) is a new random variable. This section shows how to compute the probability density function of \\(Y\\) for well-behaved transforms \\(f\\). The rest of the chapter details the transforms used by Stan.\n\n\nSuppose \\(X\\) is one dimensional and \\(f: \\mathrm{supp}(X) \\rightarrow\n\\mathbb{R}\\) is a one-to-one, monotonic function with a differentiable inverse \\(f^{-1}\\). Then the density of \\(Y\\) is given by\n\\[\np_Y(y) = p_X(f^{-1}(y))\n \\,\n \\left| \\, \\frac{d}{dy} f^{-1}(y)\\, \\right|.\n\\]\nThe absolute derivative of the inverse transform measures how the scale of the transformed variable changes with respect to the underlying variable.\n\n\n\nThe multivariate generalization of an absolute derivative is a Jacobian, or more fully the absolute value of the determinant of the Jacobian matrix of the transform. The Jacobian matrix measures the change of each output variable relative to every input variable and the absolute determinant uses that to determine the differential change in volume at a given point in the parameter space.\nSuppose \\(X\\) is a \\(K\\)-dimensional random variable with probability density function \\(p_X(x)\\). A new random variable \\(Y = f(X)\\) may be defined by transforming \\(X\\) with a suitably well-behaved function \\(f\\). It suffices for what follows to note that if \\(f\\) is one-to-one and its inverse \\(f^{-1}\\) has a well-defined Jacobian, then the density of \\(Y\\) is\n\\[\np_Y(y) = p_X(f^{-1}(y)) \\, \\left| \\, \\det \\, J_{f^{-1}}(y) \\, \\right|,\n\\]\nwhere \\(\\det{}\\) is the matrix determinant operation and \\(J_{f^{-1}}(y)\\) is the Jacobian matrix of \\(f^{-1}\\) evaluated at \\(y\\). Taking \\(x =\nf^{-1}(y)\\), the Jacobian matrix is defined by\n\\[\nJ_{f^{-1}}(y) =\n\\left[\n\\begin{array}{ccc}\\displaystyle\n\\frac{\\partial x_1}{\\partial y_1}\n& \\cdots\n& \\displaystyle \\frac{\\partial x_1}{\\partial y_{K}}\n\\\\\n\\vdots & \\vdots & \\vdots\n\\\\\n\\displaystyle\\frac{\\partial x_{K}}{\\partial y_1}\n& \\cdots\n& \\displaystyle\\frac{\\partial x_{K}}{\\partial y_{K}}\n\\end{array}\n\\right].\n\\]\nIf the Jacobian matrix is triangular, the determinant reduces to the product of the diagonal entries,\n\\[\n\\det \\, J_{f^{-1}}(y)\n= \\prod_{k=1}^K \\frac{\\partial x_k}{\\partial y_k}.\n\\]\nTriangular matrices naturally arise in situations where the variables are ordered, for instance by dimension, and each variable’s transformed value depends on the previous variable’s transformed values. Diagonal matrices, a simple form of triangular matrix, arise if each transformed variable only depends on a single untransformed variable.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#lower-bound-transform.section", + "href": "reference-manual/transforms.html#lower-bound-transform.section", + "title": "Constraint Transforms", + "section": "", + "text": "Stan uses a logarithmic transform for lower and upper bounds.\n\n\nIf a variable \\(X\\) is declared to have lower bound \\(a\\), it is transformed to an unbounded variable \\(Y\\), where\n\\[\nY = \\log(X - a).\n\\]\n\n\n\nThe inverse of the lower-bound transform maps an unbounded variable \\(Y\\) to a variable \\(X\\) that is bounded below by \\(a\\) by\n\\[\nX = \\exp(Y) + a.\n\\]\n\n\n\nThe absolute derivative of the inverse transform is\n\\[\n\\left| \\,\n\\frac{d}{dy} \\left( \\exp(y) + a \\right)\n\\, \\right|\n= \\exp(y).\n\\]\nTherefore, given the density \\(p_X\\) of \\(X\\), the density of \\(Y\\) is\n\\[\np_Y(y)\n= p_X\\!\\left( \\exp(y) + a \\right) \\cdot \\exp(y).\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#upper-bounded-scalar", + "href": "reference-manual/transforms.html#upper-bounded-scalar", + "title": "Constraint Transforms", + "section": "", + "text": "Stan uses a negated logarithmic transform for upper bounds.\n\n\nIf a variable \\(X\\) is declared to have an upper bound \\(b\\), it is transformed to the unbounded variable \\(Y\\) by\n\\[\nY = \\log(b - X).\n\\]\n\n\n\nThe inverse of the upper bound transform converts the unbounded variable \\(Y\\) to the variable \\(X\\) bounded above by \\(b\\) through\n\\[\nX = b - \\exp(Y).\n\\]\n\n\n\nThe absolute derivative of the inverse of the upper bound transform is\n\\[\n\\left| \\,\n\\frac{d}{dy} \\left( b - \\exp(y) \\right)\n\\, \\right|\n= \\exp(y).\n\\]\nTherefore, the density of the unconstrained variable \\(Y\\) is defined in terms of the density of the variable \\(X\\) with an upper bound of \\(b\\) by\n\\[\np_Y(y)\n= p_X \\!\\left( b - \\exp(y) \\right) \\cdot \\exp(y).\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#logit-transform-jacobian.section", + "href": "reference-manual/transforms.html#logit-transform-jacobian.section", + "title": "Constraint Transforms", + "section": "", + "text": "For lower and upper-bounded variables, Stan uses a scaled and translated log-odds transform.\n\n\nThe log-odds function is defined for \\(u \\in (0,1)\\) by\n\\[\n\\mathrm{logit}(u) = \\log \\frac{u}{1 - u}.\n\\]\nThe inverse of the log odds function is the logistic sigmoid, defined for \\(v \\in (-\\infty,\\infty)\\) by\n\\[\n\\mathrm{logit}^{-1}(v) = \\frac{1}{1 + \\exp(-v)}.\n\\]\nThe derivative of the logistic sigmoid is\n\\[\n\\frac{d}{dy} \\mathrm{logit}^{-1}(y)\n= \\mathrm{logit}^{-1}(y) \\cdot \\left( 1 - \\mathrm{logit}^{-1}(y) \\right).\n\\]\n\n\n\nFor variables constrained to be in the open interval \\((a, b)\\), Stan uses a scaled and translated log-odds transform. If variable \\(X\\) is declared to have lower bound \\(a\\) and upper bound \\(b\\), then it is transformed to a new variable \\(Y\\), where\n\\[\nY = \\mathrm{logit} \\left( \\frac{X - a}{b - a} \\right).\n\\]\n\n\n\nThe inverse of this transform is\n\\[\nX = a + (b - a) \\cdot \\mathrm{logit}^{-1}(Y).\n\\]\n\n\n\nThe absolute derivative of the inverse transform is given by\n\\[\n\\left|\n \\frac{d}{dy}\n \\left(\n a + (b - a) \\cdot \\mathrm{logit}^{-1}(y)\n \\right)\n \\right|\n= (b - a)\n \\cdot \\mathrm{logit}^{-1}(y)\n \\cdot \\left( 1 - \\mathrm{logit}^{-1}(y) \\right).\n\\]\nTherefore, the density of the transformed variable \\(Y\\) is\n\\[\np_Y(y)\n=\np_X \\! \\left( a + (b - a) \\cdot \\mathrm{logit}^{-1}(y) \\right)\n \\cdot (b - a)\n \\cdot \\mathrm{logit}^{-1}(y)\n \\cdot \\left( 1 - \\mathrm{logit}^{-1}(y) \\right).\n\\]\nDespite the apparent complexity of this expression, most of the terms are repeated and thus only need to be evaluated once. Most importantly, \\(\\mathrm{logit}^{-1}(y)\\) only needs to be evaluated once, so there is only one call to \\(\\exp(-y)\\).", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#affinely-transformed-scalar", + "href": "reference-manual/transforms.html#affinely-transformed-scalar", + "title": "Constraint Transforms", + "section": "", + "text": "Stan uses an affine transform to be able to specify parameters with a given offset and multiplier.\n\n\nFor variables with expected offset \\(\\mu\\) and/or (positive) multiplier \\(\\sigma\\), Stan uses an affine transform. Such a variable \\(X\\) is transformed to a new variable \\(Y\\), where\n\\[\nY = \\frac{X - \\mu}{\\sigma}.\n\\]\nThe default value for the offset \\(\\mu\\) is \\(0\\) and for the multiplier \\(\\sigma\\) is \\(1\\) in case not both are specified.\n\n\n\nThe inverse of this transform is\n\\[\nX = \\mu + \\sigma \\cdot Y.\n\\]\n\n\n\nThe absolute derivative of the affine inverse transform is\n\\[\n\\left|\n \\frac{d}{dy}\n \\left(\n \\mu + \\sigma \\cdot y\n \\right)\n \\right|\n= \\sigma.\n\\]\nTherefore, the density of the transformed variable \\(Y\\) is\n\\[\np_Y(y)\n=\np_X \\! \\left( \\mu + \\sigma \\cdot y \\right)\n \\cdot \\sigma.\n\\]\nFor an example of how to code this in Stan, see section Affinely Transformed Real.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#ordered-vector", + "href": "reference-manual/transforms.html#ordered-vector", + "title": "Constraint Transforms", + "section": "", + "text": "For some modeling tasks, a vector-valued random variable \\(X\\) is required with support on ordered sequences. One example is the set of cut points in ordered logistic regression.\nIn constraint terms, an ordered \\(K\\)-vector \\(x \\in \\mathbb{R}^K\\) satisfies\n\\[\nx_k < x_{k+1}\n\\]\nfor \\(k \\in \\{ 1, \\ldots, K-1 \\}\\).\n\n\nStan’s transform follows the constraint directly. It maps an increasing vector \\(x \\in \\mathbb{R}^{K}\\) to an unconstrained vector \\(y \\in\n\\mathbb{R}^K\\) by setting\n\\[\ny_k\n=\n\\left\\{\n\\begin{array}{ll}\nx_1 & \\mbox{if } k = 1, \\mbox{ and}\n\\\\\n\\log \\left( x_{k} - x_{k-1} \\right) & \\mbox{if } 1 < k \\leq K.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe inverse transform for an unconstrained \\(y \\in \\mathbb{R}^K\\) to an ordered sequence \\(x \\in \\mathbb{R}^K\\) is defined by the recursion\n\\[\nx_k\n=\n\\left\\{\n\\begin{array}{ll}\ny_1 & \\mbox{if } k = 1, \\mbox{ and}\n\\\\\nx_{k-1} + \\exp(y_k) & \\mbox{if } 1 < k \\leq K.\n\\end{array}\n\\right.\n\\]\n\\(x_k\\) can also be expressed iteratively as\n\\[\nx_k = y_1 + \\sum_{k'=2}^k \\exp(y_{k'}).\n\\]\n\n\n\nThe Jacobian of the inverse transform \\(f^{-1}\\) is lower triangular, with diagonal elements for \\(1 \\leq k \\leq K\\) of\n\\[\nJ_{k,k} =\n\\left\\{\n\\begin{array}{ll}\n1 & \\mbox{if } k = 1, \\mbox{ and}\n\\\\\n\\exp(y_k) & \\mbox{if } 1 < k \\leq K.\n\\end{array}\n\\right.\n\\]\nBecause \\(J\\) is triangular, the absolute Jacobian determinant is\n\\[\n\\left| \\, \\det \\, J \\, \\right|\n\\ = \\\n\\left| \\, \\prod_{k=1}^K J_{k,k} \\, \\right|\n\\ = \\\n\\prod_{k=2}^K \\exp(y_k).\n\\]\nPutting this all together, if \\(p_X\\) is the density of \\(X\\), then the transformed variable \\(Y\\) has density \\(p_Y\\) given by\n\\[\np_Y(y)\n= p_X(f^{-1}(y))\n\\\n\\prod_{k=2}^K \\exp(y_k).\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#positive-ordered-vector", + "href": "reference-manual/transforms.html#positive-ordered-vector", + "title": "Constraint Transforms", + "section": "", + "text": "Placeholder.\nThe positive ordered transformation is defined in the same style as the ordered transformation above, but with the first element being exponentiated to ensure positivity.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#sum-to-zero-transforms", + "href": "reference-manual/transforms.html#sum-to-zero-transforms", + "title": "Constraint Transforms", + "section": "", + "text": "Stan provides built-in constraint transforms for sum-to-zero vectors and sum-to-zero matrices. The sum-to-zero vector is a vector of length \\(N\\) with real values and the sum of the vector equals zero. The sum-to-zero matrix is an \\(N \\times M\\) matrix where both the rows and columns sum-to-zero.\nStan uses an orthogonal basis as the initial point of construction. The orthogonal basis balances the constraint across each unconstrained value. The basis is a matrix \\(H\\) such that \\(H\\in\\mathbb R^{N\\times (N-1)}\\) and \\(H^{\\mathsf T}H=I_{N-1},\\;H^{\\mathsf T}\\mathbf 1=0\\). The sum-to-zero vector lies in the subspace where the vector sums to zero. Although this seems tautological, the orthogonal basis construction allows all the marginal variances of the contrained vector to be the same, see, e.g.,(Seyboldt 2024). Simpler alternatives, such as setting the final element to the negative sum of the first elements, do not result in equal variances. It is worth noting that even with marginal variances being equal each value in the sum-to-zero constrained space is negatively correlated.\nIn many cases one wishes to model the sum-to-zero vector as normally distributed and the induced covariance matrix is fully known:\n\\[\n \\sigma^2 \\begin{pmatrix}\n 1-\\tfrac{1}{N} & -\\tfrac{1}{N} & \\cdots&-\\tfrac{1}{N} \\\\\n -\\tfrac{1}{N} & 1-\\tfrac{1}{N} & \\cdots & -\\tfrac{1}{N} \\\\\n \\vdots & \\vdots & \\ddots & \\vdots \\\\\n -\\tfrac{1}{N} & -\\tfrac{1}{N} & \\cdots &1-\\tfrac{1}{N}\n \\end{pmatrix}.\n\\]\nThe marginal standard deviation no longer corresponds to \\(\\sigma\\) but is \\(\\sigma \\sqrt{1-\\tfrac{1}{N}}\\). The properties of the normal distribution allow multiplying the sum-to-zero vector by the reciprocal of \\(\\sqrt{1-\\tfrac{1}{N}}\\) to adjust the variance to the intended \\(\\sigma\\):\nsum_to_zero_vector[N] x;\nx ~ normal(0, sqrt(N %/% (N - 1)));\nWhen \\(\\sigma\\) is a parameter there is an additional adjustment when using the centered version of the sum-to-zero constraint. If we let \\(y\\) be the unconstrained \\(N - 1\\) vector and \\(y\\) is implicitly given a standard normal prior, then the sum-to-zero vector distributed as normal with a mean of zero and a standard deviation of \\(\\sigma\\) is given by \\[\nx = \\underbrace{\\sigma\\sqrt{\\frac{N}{N-1}}\\;H}_{\\text{ size of } N \\times (N - 1)}\\,y.\n\\] This is the classic non-centered model. The crucial detail from above is that the operation of multiplying \\(\\sigma\\) to \\(y\\) is on \\(N - 1\\) dimensions, not \\(N\\) dimensions. When writing the centered model using \\[\nx \\sim \\mathcal N \\bigg(0,\\, \\sigma\\sqrt{\\frac{N}{N-1}} \\bigg),\n\\] we are incrementing the log density by an additional \\(-\\log(\\sigma)\\) and so must increment the log density by\ntarget += log(sigma * sqrt(N * inv(N - 1)));\n\n// or\n// because `sqrt(N * inv(N - 1))` is constant\n\ntarget += log(sigma);\nwhich correctly adjusts for the \\(N - 1\\) free variables.\nMore details of the Helmert matrix are in (Lancaster 1965), for the basic definitions of the isometric log ratio transform see (Egozcue et al. 2003) and Chapter 3 of (Filzmoser, Hron, and Templ 2018) for the pivot coordinate version used here.\n\n\nVectors that are constrained to sum-to-zero are useful for, among other things, additive varying effects, such as varying slopes or intercepts in a regression model (e.g., for income deciles).\nA sum-to-zero \\(K\\)-vector \\(x \\in \\mathbb{R}^K\\) satisfies the constraint \\[\n\\sum_{k=1}^K x_k = 0.\n\\]\n\n\n\nThe transform is defined iteratively. Given an \\(x \\in \\mathbb{R}^{N + 1}\\) that sums to zero (i.e., \\(\\sum_{n=1}^{N+1} x_n = 0\\)), the transform proceeds as follows to produce an unconstrained \\(y \\in \\mathbb{R}^N\\). This is mathematically equivalent to pre-multiplying the unconstrained \\(y\\) by an orthogonal standard basis, e.g. constructing orthogonal vectors from the standard basis using the Gram-Schmidt process. The single loop version below achieves low computational and memory costs as no matrices are created or multplied.\nThe transform is initialized by setting \\[\nS_N = 0\n\\] and \\[\ny_N = -x_{N + 1} \\sqrt{1 + \\frac{1}{N}}.\n\\] The for each \\(n\\) from \\(N - 1\\) down to \\(1\\), let \\[\nw_{n + 1} = \\frac{y_{n + 1}}{\\sqrt{(n + 1)(n + 2)}},\n\\] \\[\nS_n = S_{n + 1} + w_{n + 1},\n\\] and \\[\ny_n = (S_n - x_{n + 1}) \\frac{\\sqrt{n (n + 1)}}{n}.\n\\]\nThis transform is expressed in Stan code as:\n vector manual_sum_to_zero_jacobian(vector x) {\n int N = size(x) - 1;\n vector[N] y;\n y[N] = -x[N+1] * sqrt(1 + 1. / N);\n real sum_w = 0;\n for (n in 1:(N-1)) {\n int i = N - n;\n int i_p_1 = i + 1;\n real w = y[i_p_1] * inv_sqrt(i_p_1 * (i_p_1 + 1));\n sum_w += w;\n y[i] = (sum_w - x[i_p_1]) * sqrt(i_p_1 * i) / i;\n }\n return y;\n }\n\n\n\nThe inverse transform follows the isometric logratio tranform. It maps an unconstrained vector \\(y \\in \\mathbb{R}^N\\) to a sum-to-zero vector \\(x \\in \\mathbb{R}^{N + 1}\\) such that \\[\n\\sum_{n=1}^{N + 1} x_n = 0.\n\\] The values are defined inductively, starting with \\[\nx_1 = \\sum_{n=1}^N \\frac{y_n}{\\sqrt{n (n + 1)}}\n\\] and then setting \\[\nx_{n + 1} = \\sum_{i = n + 1}^N \\frac{y_i}{\\sqrt{i (i + 1)}}\n- n \\cdot \\frac{y_n}{\\sqrt{n (n + 1)}}.\n\\] for \\(n \\in 1{:}N\\).\nThe definition is such that \\[\n\\sum_{n = 1}^{N + 1} x_n = 0\n\\] by construction, because each of the terms added to \\(x_{n}\\) is then subtracted from \\(x_{n + 1}\\) the number of times it shows up in earlier terms.\n\n\n\nThe inverse transform is a linear operation, leading to a constant Jacobian determinant which is therefore not included.\nThe sum-to-zero inverse transform is expressed within Stan as:\n vector manual_sum_to_zero_inverse_jacobian(vector y) {\n int N = num_elements(y);\n vector[N + 1] x = zeros_vector(N + 1);\n real sum_w = 0;\n for (n in 1:N) {\n int i = N - n + 1;\n real w = y[i] * inv_sqrt(i * (i + 1));\n sum_w += w;\n x[i] += sum_w;\n x[i + 1] -= w * i;\n }\n return x;\n }\nNote that there is no target += increment because the Jacobian is zero.\n\n\n\nThe matrix case of the sum-to-zero transform generalizes the vector case to ensure that every row of the matrix sums to zero and every column the matrix sums to zero. In fact, any \\(N\\)-dimensional array can be constructed into a sum-to-zero N-dimensional array using the sum-to-zero vector. This is because the vector transform is a linear bijection and produces an orthogonally constructed sum-to-zero object by applying the one dimensional transform across each array slice. For the matrix case, there are two slices present, the rows and columns, to perform the transform over. The sum-to-zero vector is applied over the vectorized slice of either the row or column slice and subsequently to the other slice.\nLet the unconstrained matrix be \\[\n\\mathcal Y \\in \\mathbb R^{n_1 \\times n_2}\n\\] and the zero sum vector transform as \\[\n\\mathbf z = \\mathcal C_n(\\mathbf y)\\;=\\;\n\\begin{bmatrix}H_n\\\\[2pt]-\\mathbf 1_{1\\times d}\\end{bmatrix}\\mathbf y\n\\;\\in\\mathbb R^{n+1},\n\\] where \\(H_n\\in\\mathbb R^{n \\times n}\\) is the orthogonal Helmert matrix and satisfies \\(\\mathbf 1_{1\\times d}A_d^{\\!\\top}=0\\).\nApplying \\(C_n\\) to each slice results in\n\\[\n\\mathcal Z \\;=\\;\n\\mathcal X\n\\times_1 \\bigl[\\mathcal C_{n_1}\\bigr]\n\\times_2 \\bigl[\\mathcal C_{n_2}\\bigr]\n\\] where \\[\n\\mathcal Z \\in\n\\mathbb R^{(n_1+1)\\times\\cdots\\times(n_2+1)}.\n\\]\nBecause each \\(\\mathbb R^{d_1\\times\\cdots\\times d_N}\\) is invertible on the \\(\\mathbf 1^\\perp\\) subspace the composite map applied to \\(\\mathcal Z\\) is a linear bijection between \\(\\mathbb R^{n_1\\times n_2}\\) and the codomain \\(\\mathbb R^{(n_1+1)\\times\\cdots\\times(n_2+1)}\\).", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#simplex-transform.section", + "href": "reference-manual/transforms.html#simplex-transform.section", + "title": "Constraint Transforms", + "section": "", + "text": "Variables constrained to the unit simplex show up in multivariate discrete models as both parameters (categorical and multinomial) and as variates generated by their priors (Dirichlet and multivariate logistic).\nThe unit \\(K\\)-simplex is the set of points \\(x \\in \\mathbb{R}^K\\) such that for \\(1 \\leq k \\leq K\\),\n\\[\nx_k > 0,\n\\]\nand\n\\[\n\\sum_{k=1}^K x_k = 1.\n\\]\nAn alternative definition is to take the convex closure of the vertices. For instance, in 2-dimensions, the simplex vertices are the extreme values \\((0,1)\\), and \\((1,0)\\) and the unit 2-simplex is the line connecting these two points; values such as \\((0.3,0.7)\\) and \\((0.99,0.01)\\) lie on the line. In 3-dimensions, the basis is \\((0,0,1)\\), \\((0,1,0)\\) and \\((1,0,0)\\) and the unit 3-simplex is the boundary and interior of the triangle with these vertices. Points in the 3-simplex include \\((0.5,0.5,0)\\), \\((0.2,0.7,0.1)\\) and all other triplets of non-negative values summing to 1.\nAs these examples illustrate, the simplex always picks out a subspace of \\(K-1\\) dimensions from \\(\\mathbb{R}^K\\). Therefore a point \\(x\\) in the \\(K\\)-simplex is fully determined by its first \\(K-1\\) elements \\(x_1, x_2,\n\\ldots, x_{K-1}\\), with\n\\[\nx_K = 1 - \\sum_{k=1}^{K-1} x_k.\n\\]\n\n\nThe length-\\(K\\) unit simplex inverse transform is given by the softmax of a sum-to-zero vector of length \\(K\\).\nLet \\(y\\) represent the unconstrained \\(K - 1\\) values in \\((-\\infty, \\infty)\\). The intermediate sum-to-zero vector \\(z = \\text{sum\\_to\\_zero\\_transform}(y)\\) is length \\(K\\). The unit simplex is then given by \\[\nx_i = \\text{softmax}(z) = \\frac{\\exp(z_i)}{\\sum_{i = 1}^K \\exp(z_i)}\n\\]\nThe sum-to-zero vector transform is described in further detail at the sum-to-zero vector section of the Reference Manual.\n\n\n\n\n\n\nNote\n\n\n\nAll versions of Stan pre-2.37 used the stick-breaking transform. This is documented at Stan 2.36 Reference Manual: Simplex Transform.\n\n\n\n\nThe Jacobian \\(J\\) of the inverse unit-simplex transform is found by restricting \\(J\\) to the subspace spanned by the sum-to-zero vector \\(z\\). The Jacobian is given as the \\((K - 1) \\times (K - 1)\\) matrix \\(J\\) where\n\\[\nJ_{ij} = \\frac{\\partial x_i}{\\partial z_j} =\n\\frac{\\partial}{\\partial z_i} \\left( \\frac{\\exp(z_i)}{{\\sum_{i = 1}^K \\exp(z_i)}} \\right)\n\\] and \\(i,j \\in 1, \\ldots, K - 1\\).\nThe diagonal and off-diagonal derivatives are found using the derivative quotient rule and algebraic simplification\n\\[\nJ_{ij} =\n\\begin{cases}\nx_i (1 - x_i), & \\text{if } i = j, \\\\\n-x_i x_j, & \\text{if } i \\neq j.\n\\end{cases}\n\\]\nIn matrix form this can be expressed as\n\\[\nJ = \\text{diag}(x) - x x^\\top\n\\]\nThe determinant of this matrix can be found using the Matrix Determinant Lemma:\n\\[\n\\det\\bigl(A + u v^{\\top}\\bigr)\n=\n\\det(A)\\,\\bigl(1 + v^{\\top}A^{-1}u\\bigr).\n\\]\nHere,\n\\[\nA \\;=\\; \\operatorname{diag}(x_{1},\\ldots, x_{K-1}),\n\\quad\nu \\;=\\; -\\bigl(x_1,\\ldots, x_{K-1}\\bigr)^{\\!\\top},\n\\quad\nv \\;=\\; \\bigl(x_{1}, \\ldots, x_{K-1}\\bigr)^{\\!\\top}.\n\\] Therefore,\n\\[\n\\begin{aligned}\n\\det(J)\n&=\n\\bigg(\\prod_{i=1}^{K-1} x_i \\bigg)\n\\bigg(1 + (x_{1},\\ldots, x_{K-1})\\,\\mathrm{diag}\\bigl(x_{1}^{-1},\\ldots,x_{K-1}^{-1}\\bigr)\\,\n\\big(-x_{1},\\ldots,-x_{K-1}\\big)^{\\top}\n\\bigg) \\\\\n&=\n\\bigg(\\prod_{i=1}^{K-1} x_{i}\\bigg)\n\\bigg(1 - \\sum_{i=1}^{K-1} x_{i}\\bigg)\n=\n\\bigg(\\prod_{i=1}^{K-1} x_{i}\\bigg) x_{K} \\\\\n&=\n\\prod_{i=1}^{K} x_{i}.\n\\end{aligned}\n\\]\n\n\n\n\nThe transform \\(Y = f(X)\\) can be derived by reversing the stages of the inverse transform,\n\\[\ny_k\n= H^\\top \\bigg(\\log(x_k)\n- \\frac{1}{K}\\sum_{i=1}^K\\log(x_i) \\bigg)\n.\n\\]\nThe matrix \\(H\\) is the orthogonal basis matrix the sum-to-zero vector uses. Since the matrix is orthonormal, the transpose is the same as the inverse.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#stochastic-matrix-transform.section", + "href": "reference-manual/transforms.html#stochastic-matrix-transform.section", + "title": "Constraint Transforms", + "section": "", + "text": "The column_stochastic_matrix[N, M] and row_stochastic_matrix[M, N] type in Stan represents an \\(N \\times M\\) matrix where each column (row) is a unit simplex of dimension \\(N\\). In other words, each column (row) of the matrix is a vector constrained to have non-negative entries that sum to one.\n\n\nA column stochastic matrix \\(X \\in \\mathbb{R}^{N \\times M}\\) is defined such that each column is a simplex. For column \\(m\\) (where \\(1 \\leq m \\leq M\\)):\n\\[\nX_{n, m} \\geq 0 \\quad \\text{for } 1 \\leq n \\leq N,\n\\]\nand\n\\[\n\\sum_{n=1}^N X_{n, m} = 1.\n\\]\nA row stochastic matrix is any matrix whose transpose is a column stochastic matrix (i.e. the rows of the matrix are simplexes)\n\\[\nX_{n, m} \\geq 0 \\quad \\text{for } 1 \\leq n \\leq N,\n\\]\nand\n\\[\n\\sum_{m=1}^N X_{n, m} = 1.\n\\]\nThis definition ensures that each column (row) of the matrix \\(X\\) lies on the \\(N-1\\) dimensional unit simplex, similar to the simplex[N] type, but extended across multiple columns(rows).\n\n\n\nFor the column and row stochastic matrices the inverse transform is the same as simplex, but applied to each column (row).\n\n\n\nThe Jacobian determinant of the inverse transform for each column \\(m\\) in the matrix is given by the product of the diagonal entries \\(J_{n, m}\\) of the lower-triangular Jacobian matrix. This determinant is calculated as:\n\\[\n\\left| \\det J_m \\right| = \\prod_{n=1}^{N-1} \\left( z_{n, m} (1 - z_{n, m}) \\left( 1 - \\sum_{n'=1}^{n-1} X_{n', m} \\right) \\right).\n\\]\nThus, the overall Jacobian determinant for the entire column_stochastic_matrix and row_stochastic_matrix is the product of the determinants for each column (row):\n\\[\n\\left| \\det J \\right| = \\prod_{m=1}^{M} \\left| \\det J_m \\right|.\n\\]\n\n\n\nFor the column and row stochastic matrices the transform is the same as simplex, but applied to each column (row).", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#unit-vector.section", + "href": "reference-manual/transforms.html#unit-vector.section", + "title": "Constraint Transforms", + "section": "", + "text": "An \\(n\\)-dimensional vector \\(x \\in \\mathbb{R}^n\\) is said to be a unit vector if it has unit Euclidean length, so that\n\\[\n\\Vert x \\Vert\n\\ = \\ \\sqrt{x^{\\top}\\,x}\n\\ = \\ \\sqrt{x_1^2 + x_2^2 + \\cdots + x_n^2}\n\\ = \\ 1\\ .\n\\]\n\n\nStan divides an unconstrained vector \\(y \\in \\mathbb{R}^{n}\\) by its norm, \\(\\Vert y \\Vert = \\sqrt{y^\\top y}\\), to obtain a unit vector \\(x\\),\n\\[\nx = \\frac{y}{\\Vert y \\Vert}.\n\\]\nTo generate a unit vector, Stan generates points at random in \\(\\mathbb{R}^n\\) with independent unit normal distributions, which are then standardized by dividing by their Euclidean length. Muller (1959) showed this generates points uniformly at random on \\(S^{n-1}\\). That is, if we draw \\(y_n \\sim \\mathsf{Normal}(0, 1)\\) for \\(n \\in 1{:}n\\), then \\(x = \\frac{y}{\\Vert y \\Vert}\\) has a uniform distribution over \\(S^{n-1}\\). This allows us to use an \\(n\\)-dimensional basis for \\(S^{n-1}\\) that preserves local neighborhoods in that points that are close to each other in \\(\\mathbb{R}^n\\) map to points near each other in \\(S^{n-1}\\). The mapping is not perfectly distance preserving, because there are points arbitrarily far away from each other in \\(\\mathbb{R}^n\\) that map to identical points in \\(S^{n-1}\\).\n\n\nThe above mapping from \\(\\mathbb{R}^n\\) to \\(S^n\\) is not defined at zero. While this point outcome has measure zero during sampling, and may thus be ignored, it is the default initialization point and thus unit vector parameters cannot be initialized at zero. A simple workaround is to initialize from a very small interval around zero, which is an option built into all of the Stan interfaces.\n\n\n\n\nThe Jacobian matrix relating the input vector \\(y\\) to the output vector \\(x\\) is singular because \\(x^\\top x = 1\\) for any non-zero input vector \\(y\\). Thus, there technically is no unique transformation from \\(x\\) to \\(y\\). To circumvent this issue, let \\(r = \\sqrt{y^\\top y}\\) so that \\(y = r\nx\\). The transformation from \\(\\left(r, x_{-n}\\right)\\) to \\(y\\) is well-defined but \\(r\\) is arbitrary, so we set \\(r = 1\\). In this case, the determinant of the Jacobian is proportional to \\(e^{-\\frac{1}{2} y^\\top y}\\), which is the kernel of a standard multivariate normal distribution with \\(n\\) independent dimensions.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#correlation-matrix-transform.section", + "href": "reference-manual/transforms.html#correlation-matrix-transform.section", + "title": "Constraint Transforms", + "section": "", + "text": "A \\(K \\times K\\) correlation matrix \\(x\\) must be symmetric, so that\n\\[\nx_{k,k'} = x_{k',k}\n\\]\nfor all \\(k,k' \\in \\{ 1, \\ldots, K \\}\\), it must have a unit diagonal, so that\n\\[\nx_{k,k} = 1\n\\]\nfor all \\(k \\in \\{ 1, \\ldots, K \\}\\), and it must be positive definite, so that for every non-zero \\(K\\)-vector \\(a\\),\n\\[\na^{\\top} x a > 0.\n\\]\nThe number of free parameters required to specify a \\(K \\times K\\) correlation matrix is \\(\\binom{K}{2}\\).\nThere is more than one way to map from \\(\\binom{K}{2}\\) unconstrained parameters to a \\(K \\times K\\) correlation matrix. Stan implements the Lewandowski-Kurowicka-Joe (LKJ) transform Lewandowski, Kurowicka, and Joe (2009).\n\n\nIt is easiest to specify the inverse, going from its \\(\\binom{K}{2}\\) parameter basis to a correlation matrix. The basis will actually be broken down into two steps. To start, suppose \\(y\\) is a vector containing \\(\\binom{K}{2}\\) unconstrained values. These are first transformed via the bijective function \\(\\tanh : \\mathbb{R} \\rightarrow\n(-1, 1)\\)\n\\[\n\\tanh y = \\frac{\\exp(2y) - 1}{\\exp(2y) + 1}.\n\\]\nThen, define a \\(K \\times K\\) matrix \\(z\\), the upper triangular values of which are filled by row with the transformed values, and the diagonal entries are set to one. For example, in the \\(4 \\times 4\\) case, there are \\(\\binom{4}{2}\\) values arranged as\n\\[\nz\n=\n\\left[\n\\begin{array}{cccc}\n1 & \\tanh y_1 & \\tanh y_2 & \\tanh y_4\n\\\\\n0 & 1 & \\tanh y_3 & \\tanh y_5\n\\\\\n0 & 0 & 1 & \\tanh y_6\n\\\\\n0 & 0 & 0 & 1\n\\end{array}\n\\right]\n.\n\\]\nLewandowski, Kurowicka and Joe (LKJ) show how to bijectively map the array \\(z\\) to a correlation matrix \\(x\\). The entry \\(z_{i,j}\\) for \\(i <\nj\\) is interpreted as the canonical partial correlation (CPC) between \\(i\\) and \\(j\\), which is the correlation between \\(i\\)’s residuals and \\(j\\)’s residuals when both \\(i\\) and \\(j\\) are regressed on all variables \\(i'\\) such that \\(i'< i\\). In the case of \\(i=1\\), there are no earlier variables, so \\(z_{1,j}\\) is just the Pearson correlation between \\(i\\) and \\(j\\).\nIn Stan, the LKJ transform is reformulated in terms of a Cholesky factor \\(w\\) of the final correlation matrix, defined for \\(1 \\leq i,j \\leq K\\) by\n\\[\nw_{i,j} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } i > j,\n\\\\\n1 & \\mbox{if } 1 = i = j,\n\\\\\n\\prod_{i'=1}^{i - 1} \\left( 1 - z_{i'\\!,\\,j}^2 \\right)^{1/2}\n& \\mbox{if } 1 < i = j,\n\\\\\nz_{i,j} & \\mbox{if } 1 = i < j, \\mbox{ and}\n\\\\\\\nz_{i,j} \\, \\prod_{i'=1}^{i-1} \\left( 1 - z_{i'\\!,\\,j}^2 \\right)^{1/2}\n& \\mbox{ if } 1 < i < j.\n\\end{array}\n\\right.\n\\]\nThis does not require as much computation per matrix entry as it may appear; calculating the rows in terms of earlier rows yields the more manageable expression\n\\[\nw_{i,j} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } i > j,\n\\\\\n1 & \\mbox{if } 1 = i = j,\n\\\\\nz_{i,j} & \\mbox{if } 1 = i < j, \\mbox{ and}\n\\\\\n\\frac{z_{i,j}}{z_{i-1,j}} \\ w_{i-1,j} \\left( 1 - z_{i-1,j}^2 \\right)^{1/2}\n& \\mbox{ if } 1 < i \\leq j.\n\\end{array}\n\\right.\n\\]\nGiven the upper-triangular Cholesky factor \\(w\\), the final correlation matrix is\n\\[\nx = w^{\\top} w.\n\\]\nLewandowski, Kurowicka, and Joe (2009) show that the determinant of the correlation matrix can be defined in terms of the canonical partial correlations as\n\\[\n\\mbox{det} \\, x = \\prod_{i=1}^{K-1} \\ \\prod_{j=i+1}^K \\ (1 - z_{i,j}^2)\n= \\prod_{1 \\leq i < j \\leq K} (1 - z_{i,j}^2),\n\\]\n\n\n\nFrom the inverse of equation 11 in (Lewandowski, Kurowicka, and Joe 2009), the absolute Jacobian determinant is\n\\[\n\\sqrt{\\prod_{i=1}^{K-1}\\prod_{j=i+1}^K \\left(1-z_{i,j}^2\\right)^{K-i-1}} \\\n\\times \\prod_{i=1}^{K-1}\\prod_{j=i+1}^K\n\\frac{\\partial z_{i,j}}{\\partial y_{i,j}}\n\\]\n\n\n\nThe correlation transform is defined by reversing the steps of the inverse transform defined in the previous section.\nStarting with a correlation matrix \\(x\\), the first step is to find the unique upper triangular \\(w\\) such that \\(x = w w^{\\top}\\). Because \\(x\\) is positive definite, this can be done by applying the Cholesky decomposition,\n\\[\nw = \\mbox{chol}(x).\n\\]\nThe next step from the Cholesky factor \\(w\\) back to the array \\(z\\) of canonical partial correlations (CPCs) is simplified by the ordering of the elements in the definition of \\(w\\), which when inverted yields\n\\[\nz_{i,j} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } i \\leq j,\n\\\\\nw_{i,j} & \\mbox{if } 1 = i < j, \\mbox{ and}\n\\\\\n{w_{i,j}}\n\\\n\\prod_{i'=1}^{i-1} \\left( 1 - z_{i'\\!,j}^2 \\right)^{-1/2}\n& \\mbox{if } 1 < i < j.\n\\end{array}\n\\right.\n\\]\nThe final stage of the transform reverses the hyperbolic tangent transform, which is defined by\n\\[\ny = \\tanh^{-1} z = \\frac{1}{2} \\log \\left( \\frac{1 + z}{1 - z} \\right).\n\\]\nThe inverse hyperbolic tangent function, \\(\\tanh^{-1}\\), is also called the Fisher transformation.", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#covariance-matrices", + "href": "reference-manual/transforms.html#covariance-matrices", + "title": "Constraint Transforms", + "section": "", + "text": "A \\(K \\times K\\) matrix is a covariance matrix if it is symmetric and positive definite (see the previous section for definitions). It requires \\(K + \\binom{K}{2}\\) free parameters to specify a \\(K \\times K\\) covariance matrix.\n\n\nStan’s covariance transform is based on a Cholesky decomposition composed with a log transform of the positive-constrained diagonal elements.1\nIf \\(x\\) is a covariance matrix (i.e., a symmetric, positive definite matrix), then there is a unique lower-triangular matrix \\(z =\n\\mathrm{chol}(x)\\) with positive diagonal entries, called a Cholesky factor, such that\n\\[\nx = z \\, z^{\\top}.\n\\]\nThe off-diagonal entries of the Cholesky factor \\(z\\) are unconstrained, but the diagonal entries \\(z_{k,k}\\) must be positive for \\(1 \\leq k\n\\leq K\\).\nTo complete the transform, the diagonal is log-transformed to produce a fully unconstrained lower-triangular matrix \\(y\\) defined by\n\\[\ny_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\log z_{m,m} & \\mbox{if } m = n, \\mbox{ and}\n\\\\\nz_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe inverse transform reverses the two steps of the transform. Given an unconstrained lower-triangular \\(K \\times K\\) matrix \\(y\\), the first step is to recover the intermediate matrix \\(z\\) by reversing the log transform,\n\\[\nz_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\exp(y_{m,m}) & \\mbox{if } m = n, \\mbox{ and}\n\\\\\ny_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\nThe covariance matrix \\(x\\) is recovered from its Cholesky factor \\(z\\) by taking\n\\[\nx = z \\, z^{\\top}.\n\\]\n\n\n\nThe Jacobian is the product of the Jacobians of the exponential transform from the unconstrained lower-triangular matrix \\(y\\) to matrix \\(z\\) with positive diagonals and the product transform from the Cholesky factor \\(z\\) to \\(x\\).\nThe transform from unconstrained \\(y\\) to Cholesky factor \\(z\\) has a diagonal Jacobian matrix, the absolute determinant of which is thus\n\\[\n\\prod_{k=1}^K \\frac{\\partial}{\\partial_{y_{k,k}}} \\, \\exp(y_{k,k})\n\\ = \\\n\\prod_{k=1}^K \\exp(y_{k,k})\n\\ = \\\n\\prod_{k=1}^K z_{k,k}.\n\\]\nThe Jacobian matrix of the second transform from the Cholesky factor \\(z\\) to the covariance matrix \\(x\\) is also triangular, with diagonal entries corresponding to pairs \\((m,n)\\) with \\(m \\geq n\\), defined by\n\\[\n\\frac{\\partial}{\\partial z_{m,n}}\n\\left( z \\, z^{\\top} \\right)_{m,n}\n\\ = \\\n\\frac{\\partial}{\\partial z_{m,n}}\n\\left( \\sum_{k=1}^K z_{m,k} \\, z_{n,k} \\right)\n\\ = \\\n\\left\\{\n\\begin{array}{cl}\n2 \\, z_{n,n} & \\mbox{if } m = n \\mbox{ and }\n\\\\\nz_{n,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\nThe absolute Jacobian determinant of the second transform is thus\n\\[\n2^{K} \\ \\prod_{m = 1}^{K} \\ \\prod_{n=1}^{m} z_{n,n}\n\\ = \\\n\\prod_{n=1}^K \\ \\prod_{m=n}^K z_{n,n}\n\\ = \\\n2^{K} \\ \\prod_{k=1}^K z_{k,k}^{K - k + 1}.\n\\]\nFinally, the full absolute Jacobian determinant of the inverse of the covariance matrix transform from the unconstrained lower-triangular \\(y\\) to a symmetric, positive definite matrix \\(x\\) is the product of the Jacobian determinants of the exponentiation and product transforms,\n\\[\n\\left( \\prod_{k=1}^K z_{k,k} \\right)\n\\left(\n2^{K} \\ \\prod_{k=1}^K z_{k,k}^{K - k + 1}\n\\right)\n\\ = \\\n2^K\n\\, \\prod_{k=1}^K z_{k,k}^{K-k+2}.\n\\]\nLet \\(f^{-1}\\) be the inverse transform from a \\(K + \\binom{K}{2}\\)-vector \\(y\\) to the \\(K \\times K\\) covariance matrix \\(x\\). A density function \\(p_X(x)\\) defined on \\(K \\times K\\) covariance matrices is transformed to the density \\(p_Y(y)\\) over \\(K + \\binom{K}{2}\\) vectors \\(y\\) by\n\\[\np_Y(y) = p_X(f^{-1}(y)) \\ 2^K \\ \\prod_{k=1}^K z_{k,k}^{K-k+2}.\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#cholesky-factors-of-covariance-matrices", + "href": "reference-manual/transforms.html#cholesky-factors-of-covariance-matrices", + "title": "Constraint Transforms", + "section": "", + "text": "An \\(M \\times M\\) covariance matrix \\(\\Sigma\\) can be Cholesky factored to a lower triangular matrix \\(L\\) such that \\(L\\,L^{\\top} = \\Sigma\\). If \\(\\Sigma\\) is positive definite, then \\(L\\) will be \\(M \\times M\\). If \\(\\Sigma\\) is only positive semi-definite, then \\(L\\) will be \\(M \\times N\\), with \\(N < M\\).\nA matrix is a Cholesky factor for a covariance matrix if and only if it is lower triangular, the diagonal entries are positive, and \\(M \\geq\nN\\). A matrix satisfying these conditions ensures that \\(L \\,\nL^{\\top}\\) is positive semi-definite if \\(M > N\\) and positive definite if \\(M = N\\).\nA Cholesky factor of a covariance matrix requires \\(N + \\binom{N}{2} +\n(M - N)N\\) unconstrained parameters.\n\n\nStan’s Cholesky factor transform only requires the first step of the covariance matrix transform, namely log transforming the positive diagonal elements. Suppose \\(x\\) is an \\(M \\times N\\) Cholesky factor. The above-diagonal entries are zero, the diagonal entries are positive, and the below-diagonal entries are unconstrained. The transform required is thus\n\\[\ny_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\log x_{m,m} & \\mbox{if } m = n, \\mbox{ and}\n\\\\\nx_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe inverse transform need only invert the logarithm with an exponentiation. If \\(y\\) is the unconstrained matrix representation, then the elements of the constrained matrix \\(x\\) is defined by\n\\[\nx_{m,n} =\n\\left\\{\n\\begin{array}{cl}\n0 & \\mbox{if } m < n,\n\\\\\n\\exp(y_{m,m}) & \\mbox{if } m = n, \\mbox{ and}\n\\\\\ny_{m,n} & \\mbox{if } m > n.\n\\end{array}\n\\right.\n\\]\n\n\n\nThe transform has a diagonal Jacobian matrix, the absolute determinant of which is\n\\[\n\\prod_{n=1}^N \\frac{\\partial}{\\partial_{y_{n,n}}} \\, \\exp(y_{n,n})\n\\ = \\\n\\prod_{n=1}^N \\exp(y_{n,n})\n\\ = \\\n\\prod_{n=1}^N x_{n,n}.\n\\]\nLet \\(x = f^{-1}(y)\\) be the inverse transform from a \\(N + \\binom{N}{2}\n+ (M - N)N\\) vector to an \\(M \\times N\\) Cholesky factor for a covariance matrix \\(x\\) defined in the previous section. A density function \\(p_X(x)\\) defined on \\(M \\times N\\) Cholesky factors of covariance matrices is transformed to the density \\(p_Y(y)\\) over \\(N + \\binom{N}{2}\n+ (M - N)N\\) vectors \\(y\\) by\n\\[\np_Y(y) = p_X(f^{-1}(y)) \\prod_{N=1}^N x_{n,n}.\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#cholesky-factors-of-correlation-matrices", + "href": "reference-manual/transforms.html#cholesky-factors-of-correlation-matrices", + "title": "Constraint Transforms", + "section": "", + "text": "A \\(K \\times K\\) correlation matrix \\(\\Omega\\) is positive definite and has a unit diagonal. Because it is positive definite, it can be Cholesky factored to a \\(K \\times K\\) lower-triangular matrix \\(L\\) with positive diagonal elements such that \\(\\Omega = L\\,L^{\\top}\\). Because the correlation matrix has a unit diagonal,\n\\[\n\\Omega_{k,k} = L_k\\,L_k^{\\top} = 1,\n\\]\neach row vector \\(L_k\\) of the Cholesky factor is of unit length. The length and positivity constraint allow the diagonal elements of \\(L\\) to be calculated from the off-diagonal elements, so that a Cholesky factor for a \\(K \\times K\\) correlation matrix requires only \\(\\binom{K}{2}\\) unconstrained parameters.\n\n\nIt is easiest to start with the inverse transform from the \\(\\binom{K}{2}\\) unconstrained parameters \\(y\\) to the \\(K \\times K\\) lower-triangular Cholesky factor \\(x\\). The inverse transform is based on the hyperbolic tangent function, \\(\\tanh\\), which satisfies \\(\\tanh(x) \\in (-1,1)\\). Here it will function like an inverse logit with a sign to pick out the direction of an underlying canonical partial correlation; see the section on correlation matrix transforms for more information on the relation between canonical partial correlations and the Cholesky factors of correlation matrices.\nSuppose \\(y\\) is a vector of \\(\\binom{K}{2}\\) unconstrained values. Let \\(z\\) be a lower-triangular matrix with zero diagonal and below diagonal entries filled by row. For example, in the \\(3 \\times 3\\) case,\n\\[\nz =\n\\left[\n\\begin{array}{ccc}\n0 & 0 & 0\n\\\\\n\\tanh y_1 & 0 & 0\n\\\\\n\\tanh y_2 & \\tanh y_3 & 0\n\\end{array}\n\\right]\n\\]\nThe matrix \\(z\\), with entries in the range \\((-1, 1)\\), is then transformed to the Cholesky factor \\(x\\), by taking2\n\\[\nx_{i,j}\n=\n\\left\\{\n\\begin{array}{lll}\n0 & \\mbox{ if } i < j & \\mbox{ [above diagonal]}\n\\\\\n\\sqrt{1 - \\sum_{j' < j} x_{i,j'}^2}\n & \\mbox{ if } i = j & \\mbox{ [on diagonal]}\n\\\\\nz_{i,j} \\ \\sqrt{1 - \\sum_{j' < j} x_{i,j'}^2}\n & \\mbox{ if } i > j & \\mbox{ [below diagonal]}\n\\end{array}\n\\right.\n\\]\nIn the \\(3 \\times 3\\) case, this yields\n\\[\nx =\n\\left[\n\\begin{array}{ccc}\n1 & 0 & 0\n\\\\\nz_{2,1} & \\sqrt{1 - x_{2,1}^2} & 0\n\\\\\nz_{3,1} & z_{3,2} \\sqrt{1 - x_{3,1}^2}\n & \\sqrt{1 - (x_{3,1}^2 + x_{3,2}^2)}\n\\end{array}\n\\right],\n\\]\nwhere the \\(z_{i,j} \\in (-1,1)\\) are the \\(\\tanh\\)-transformed \\(y\\).\nThe approach is a signed stick-breaking process on the quadratic (Euclidean length) scale. Starting from length 1 at \\(j=1\\), each below-diagonal entry \\(x_{i,j}\\) is determined by the (signed) fraction \\(z_{i,j}\\) of the remaining length for the row that it consumes. The diagonal entries \\(x_{i,i}\\) get any leftover length from earlier entries in their row. The above-diagonal entries are zero.\n\n\n\nSuppose \\(x\\) is a \\(K \\times K\\) Cholesky factor for some correlation matrix. The first step of the transform reconstructs the intermediate values \\(z\\) from \\(x\\),\n\\[\nz_{i,j} = \\frac{x_{i,j}}{\\sqrt{1 - \\sum_{j' < j}x_{i,j'}^2}}.\n\\]\nThe mapping from the resulting \\(z\\) to \\(y\\) inverts \\(\\tanh\\),\n\\[\ny\n\\ = \\\n\\tanh^{-1} z\n\\ = \\\n\\frac{1}{2} \\left( \\log (1 + z) - \\log (1 - z) \\right).\n\\]\n\n\n\nThe Jacobian of the full transform is the product of the Jacobians of its component transforms.\nFirst, for the inverse transform \\(z = \\tanh y\\), the derivative is\n\\[\n\\frac{d}{dy} \\tanh y = \\frac{1}{(\\cosh y)^2}.\n\\]\nSecond, for the inverse transform of \\(z\\) to \\(x\\), the resulting Jacobian matrix \\(J\\) is of dimension \\(\\binom{K}{2} \\times\n\\binom{K}{2}\\), with indexes \\((i,j)\\) for \\((i > j)\\). The Jacobian matrix is lower triangular, so that its determinant is the product of its diagonal entries, of which there is one for each \\((i,j)\\) pair,\n\\[\n\\left| \\, \\mbox{det} \\, J \\, \\right|\n \\ = \\ \\prod_{i > j} \\left| \\frac{d}{dz_{i,j}} x_{i,j} \\right|,\n\\]\nwhere\n\\[\n\\frac{d}{dz_{i,j}} x_{i,j}\n= \\sqrt{1 - \\sum_{j' < j} x^2_{i,j'}}.\n\\]\nSo the combined density for unconstrained \\(y\\) is\n\\[\np_Y(y)\n= p_X(f^{-1}(y))\n \\ \\\n \\prod_{n < \\binom{K}{2}} \\frac{1}{(\\cosh y)^2}\n \\ \\\n \\prod_{i > j} \\left( 1 - \\sum_{j' < j} x_{i,j'}^2\n \\right)^{1/2},\n\\]\nwhere \\(x = f^{-1}(y)\\) is used for notational convenience. The log Jacobian determinant of the complete inverse transform \\(x = f^{-1}(y)\\) is given by\n\\[\n\\log \\left| \\, \\det J \\, \\right|\n=\n-2 \\sum_{n \\leq \\binom{K}{2}}\n\\log \\cosh y\n\\\n+\n\\\n\\frac{1}{2} \\\n\\sum_{i > j}\n\\log \\left( 1 - \\sum_{j' < j} x_{i,j'}^2 \\right)\n.\n\\]", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/transforms.html#footnotes", + "href": "reference-manual/transforms.html#footnotes", + "title": "Constraint Transforms", + "section": "Footnotes", + "text": "Footnotes\n\n\nAn alternative to the transform in this section, which can be coded directly in Stan, is to parameterize a covariance matrix as a scaled correlation matrix. An arbitrary \\(K \\times K\\) covariance matrix \\(\\Sigma\\) can be expressed in terms of a \\(K\\)-vector \\(\\sigma\\) and correlation matrix \\(\\Omega\\) as \\[\\Sigma = \\mbox{diag}(\\sigma) \\times \\Omega \\times \\mbox{diag}(\\sigma),\\] so that each entry is just a deviation-scaled correlation, \\[\\Sigma_{m,n} = \\sigma_m \\times \\sigma_n \\times \\Omega_{m,n}.\\]↩︎\nFor convenience, a summation with no terms, such as \\(\\sum_{j' < 1} x_{i,j'}\\), is defined to be 0. This implies \\(x_{1,1} = 1\\) and that \\(x_{i,1} = z_{i,1}\\) for \\(i > 1\\).↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "Constraint Transforms" + ] + }, + { + "objectID": "reference-manual/statements.html", + "href": "reference-manual/statements.html", + "title": "Statements", + "section": "", + "text": "The blocks of a Stan program are made up of variable declarations and statements; see the blocks chapter for details. Unlike programs in BUGS, the declarations and statements making up a Stan program are executed in the order in which they are written. Variables must be defined to have some value (as well as declared to have some type) before they are used — if they do not, the behavior is undefined.\nThe basis of Stan’s execution is the evaluation of a log probability function (specifically, a log probability density function) for a given set of (real-valued) parameters. Log probability functions can be constructed by using distribution statements and log probability increment statements. Statements may be grouped into sequences and into for-each loops. In addition, Stan allows local variables to be declared in blocks and also allows an empty statement consisting only of a semicolon.\n\n\nThe data and parameters blocks do not allow statements of any kind because these blocks are solely used to declare the data variables for input and the parameter variables for sampling. All other blocks allow statements. In these blocks, both variable declarations and statements are allowed. All top-level variables in a block are considered block variables. See the blocks chapter for more information about the block structure of Stan programs.\n\n\n\nAn assignment statement consists of a variable (possibly multivariate with indexing information) and an expression. Executing an assignment statement evaluates the expression on the right-hand side and assigns it to the (indexed) variable on the left-hand side. An example of a simple assignment is as follows.\nn = 0;\nExecuting this statement assigns the value of the expression 0, which is the integer zero, to the variable n. For an assignment to be well formed, the type of the expression on the right-hand side should be compatible with the type of the (indexed) variable on the left-hand side. For the above example, because 0 is an expression of type int, the variable n must be declared as being of type int or of type real. If the variable is of type real, the integer zero is promoted to a floating-point zero and assigned to the variable. After the assignment statement executes, the variable n will have the value zero (either as an integer or a floating-point value, depending on its type).\nSyntactically, every assignment statement must be followed by a semicolon. Otherwise, whitespace between the tokens does not matter (the tokens here being the left-hand-side (indexed) variable, the assignment operator, the right-hand-side expression and the semicolon).\nBecause the right-hand side is evaluated first, it is possible to increment a variable in Stan just as in C++ and other programming languages by writing\nn = n + 1;\nSuch self assignments are not allowed in BUGS, because they induce a cycle into the directed graphical model.\nThe left-hand side of an assignment may contain indices for array, matrix, or vector data structures. For instance, if Sigma is of type matrix, then\nSigma[1, 1] = 1.0;\nsets the value in the first column of the first row of Sigma to one.\nAssignments to subcomponents of larger multi-variate data structures are supported by Stan. For example, a is an array of type array[,] real and b is an array of type array[] real, then the following two statements are both well-formed.\na[3] = b;\nb = a[4];\nSimilarly, if x is a variable declared to have type row_vector and Y is a variable declared as type matrix, then the following sequence of statements to swap the first two rows of Y is well formed.\nx = Y[1];\nY[1] = Y[2];\nY[2] = x;\n\n\nStan allows assignment of lower types to higher types, but not vice-versa. That is, we can assign an expression of type int to an lvalue of type real, and we can assign an expression of type real to an lvalue of type complex. Furthermore, promotion is transitive, so that we can assign an expression of type int to an lvalue of type complex.\nPromotion extends to containers, so that arrays of int can be promoted to arrays of real during assignment, and arrays of real can be assigned to an lvalue of type array of complex. Similarly, an expression of type vector may be assigned to an lvalue of type complex_vector, and similarly for row vectors and matrices.\n\n\n\nThe expressions that are legal left-hand sides of assignment statements are known as “lvalues.” In Stan, there are three kinds of legal lvalues,\n\na variable, or\na variable with one or more indices, or\na comma separated list of lvalues surrounded by ( and )\n\nTo be used as an lvalue, an indexed variable must have at least as many dimensions as the number of indices provided. An array of real or integer types has as many dimensions as it is declared for. A matrix has two dimensions and a vector or row vector one dimension; this also holds for the constrained types, covariance and correlation matrices and their Cholesky factors and ordered, positive ordered, and simplex vectors. An array of matrices has two more dimensions than the array and an array of vectors or row vectors has one more dimension than the array. Note that the number of indices can be less than the number of dimensions of the variable, meaning that the right hand side must itself be multidimensional to match the remaining dimensions.\n\n\n\nMultiple indexes, as described in the multi-indexing section, are also permitted on the left-hand side of assignments. Indexing on the left side works exactly as it does for expressions, with multiple indexes preserving index positions and single indexes reducing them. The type on the left side must still match the type on the right side.\n\n\nAll assignment is carried out as if the right-hand side is copied before the assignment. This resolves any potential aliasing issues arising from he right-hand side changing in the middle of an assignment statement’s execution.\n\n\n\n\nStan’s arithmetic operators may be used in compound arithmetic and assignment operations. For example, consider the following example of compound addition and assignment.\nreal x = 5;\nx += 7; // value of x is now 12\nThe compound arithmetic and assignment statement above is equivalent to the following long form.\nx = x + 7;\nIn general, the compound form\nx op= y\nwill be equivalent to\nx = x op y;\nThe compound statement will be legal whenever the long form is legal. This requires that the operation x op y must itself be well formed and that the result of the operation be assignable to x. For the expression x to be assignable, it must be an indexed variable where the variable is defined in the current block. For example, the following compound addition and assignment statement will increment a single element of a vector by two.\nvector[N] x;\nx[3] += 2;\nAs a further example, consider\nmatrix[M, M] x;\nvector[M] y;\nreal z;\nx *= x; // OK, (x * x) is a matrix\nx *= z; // OK, (x * z) is a matrix\nx *= y; // BAD, (x * y) is a vector\nThe supported compound arithmetic and assignment operations are listed in the compound arithmetic/assignment table; they are also listed in the index prefaced by operator, e.g., operator+=.\nCompound Arithmetic/Assignment Table. Stan allows compound arithmetic and assignment statements of the forms listed in the table. The compound form is legal whenever the corresponding long form would be legal and it has the same effect.\n\n\n\noperation\ncompound\nunfolded\n\n\n\n\naddition\nx += y\nx = x + y\n\n\nsubtraction\nx -= y\nx = x - y\n\n\nmultiplication\nx *= y\nx = x * y\n\n\ndivision\nx /= y\nx = x / y\n\n\nelementwise multiplication\nx .*= y\nx = x .* y\n\n\nelementwise division\nx ./= y\nx = x ./ y\n\n\n\n\n\n\n\nThe basis of Stan’s execution is the evaluation of a log probability function (specifically, a log probability density function) for a given set of (real-valued) parameters; this function returns the log density of the posterior up to an additive constant. Data and transformed data are fixed before the log density is evaluated. The total log probability is initialized to zero. Next, any log Jacobian adjustments accrued by the variable constraints are added to the log density (the Jacobian adjustment may be skipped for maximum likelihood estimation via optimization). Distribution statements and log probability increment statements may add to the log density in the model block. A log probability increment statement directly increments the log density with the value of an expression as follows.1\ntarget += -0.5 * y * y;\nThe keyword target here is actually not a variable, and may not be accessed as such (though see below on how to access the value of target through a special function).\nIn this example, the unnormalized log probability of a unit normal variable \\(y\\) is added to the total log probability. In the general case, the argument can be any expression.2\nAn entire Stan model can be implemented this way. For instance, the following model has a single variable according to a unit normal probability.\nparameters {\n real y;\n}\nmodel {\n target += -0.5 * y * y;\n}\nThis model defines a log probability function\n\\[\n\\log p(y) = - \\, \\frac{y^2}{2} - \\log Z\n\\]\nwhere \\(Z\\) is a normalizing constant that does not depend on \\(y\\). The constant \\(Z\\) is conventionally written this way because on the linear scale, \\[\np(y) = \\frac{1}{Z} \\exp\\left(-\\frac{y^2}{2}\\right).\n\\] which is typically written without reference to \\(Z\\) as \\[\np(y) \\propto \\exp\\left(-\\frac{y^2}{2}\\right).\n\\]\nStan only requires models to be defined up to a constant that does not depend on the parameters. This is convenient because often the normalizing constant \\(Z\\) is either time-consuming to compute or intractable to evaluate.\n\n\nThe built in distribution functions in Stan are all available in normalized and unnormalized form. The normalized forms include all of the terms in the log density, and the unnormalized forms drop terms which are not directly or indirectly a function of the model parameters.\nFor instance, the normal_lpdf function returns the log density of a normal distribution:\n\\[\n\\textsf{normal\\_lpdf}(x | \\mu, \\sigma) =\n-\\log \\left( \\sigma \\sqrt{2 \\pi} \\right)\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2\n\\]\nThe normal_lupdf function returns the log density of an unnormalized distribution. With the unnormalized version of the function, Stan does not define what the normalization constant will be, though usually as many terms as possible are dropped to make the calculation fast. Dropping a constant sigma term, normal_lupdf would be equivalent to:\n\\[\n\\textsf{normal\\_lupdf}(x | \\mu, \\sigma) =\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2\n\\]\nAll functions ending in _lpdf have a corresponding _lupdf version which evaluates and returns the unnormalized density. The same is true for _lpmf and _lupmf.\n\n\n\nThe increment log density statement looks syntactically like compound addition and assignment (see the compound arithmetic/assignment section, it is treated as a primitive statement because target is not itself a variable. So, even though\ntarget += lp;\nis a legal statement, the corresponding long form is not legal.\ntarget = target + lp; // BAD, target is not a variable\n\n\n\nThe target += ... statement accepts an argument in place of ... for any expression type, including integers, reals, vectors, row vectors, matrices, and arrays of any dimensionality, including arrays of vectors and matrices. For container arguments, their sum will be added to the total log density.\n\n\n\n\nA variant of the target += statement described above is the jacobian += statement. This can be used in the transformed parameters block or in functions ending with _jacobian to mimic the log Jacobian adjustments accrued by built-in variable transforms.\nSimilarly to those implemented for the built-in transforms, these Jacobian adjustment may be skipped for maximum likelihood estimation via optimization.\nFor example, here is a program which recreates the existing <upper=x> transform on real numbers:\nfunctions {\n real my_upper_bound_jacobian(real x, real ub) {\n jacobian += x;\n return ub - exp(x);\n }\n}\ndata {\n real ub;\n}\nparameters {\n real b_raw;\n}\ntransformed parameters {\n real b = my_upper_bound_jacobian(b_raw, ub);\n}\nmodel {\n // use b as if it was declared `real<upper=ub> b;` in parameters\n // e.g.\n // b ~ lognormal(0, 1);\n}\n\n\nTo access the accumulated log density up to the current execution point, the function target() may be used.\n\n\n\n\nThe term “sampling statement” has been replaced with distribution statement.\n\n\n\nStan supports writing probability statements also using distribution statements, for example\ny ~ normal(mu, sigma);\nmu ~ normal(0, 10);\nsigma ~ normal(0, 1);\nThe symbol \\(\\sim\\) is called tilde. Due to historical reasons, the distribution statements used to be called “sampling statements” in Stan, but that term is not recommended anymore as it is a less accurate description.\nIn general, we can read \\(\\sim\\) as “is distributed as,” and overall this notation is used as a shorthand for defining distributions, so that the above example can be written also as \\[\n\\begin{aligned}\n p(y| \\mu, \\sigma) & = \\mathrm{normal}(y | \\mu, \\sigma)\\\\\n p(\\mu) & = \\mathrm{normal}(\\mu | 0, 10)\\\\\n p(\\sigma) & = \\mathrm{normal}^+(\\sigma | 0, 1).\n\\end{aligned}\n\\] A collection of distribution statements define a joint distribution as the product of component distributions \\[\np(y,\\mu,\\sigma) = p(y| \\mu, \\sigma )p(\\mu) p(\\sigma).\n\\]\nThis works even if the model is not constructed generatively. For example, suppose you include the following code in a Stan model:\n a ~ normal(0, 1);\n a ~ normal(0, 1);\nThis is translated to \\[\n p(a) = \\mathrm{normal}(a | 0, 1)\\mathrm{normal}(a | 0, 1),\n\\] which in this case is \\(\\mathrm{normal}(a|0,1/\\sqrt{2})\\). One might expect that the above two lines of code would represent a redundant expression of a \\(\\mathrm{normal}(a|0,1)\\) prior, but, no, each line of code corresponds to an additional term in the target, or log posterior density. You can think of each line as representing an additional piece of information.\nWhen the joint distribution is considered as a function of parameters (e.g. \\(\\mu\\), \\(\\sigma\\)) given fixed data, it is proportional to the posterior distribution. In general, the posterior distribution is not a normalized probability density function—that is, it will be positive but will not in general integrate to 1—but the proportionality is sufficient for the Stan algorithms.\nStan always constructs the target function—in Bayesian terms, the log posterior density function of the parameter vector—by adding terms in the model block. Equivalently, each \\(\\sim\\) statement corresponds to a multiplicative factor in the unnormalized posterior density.\nDistribution statements (~) accept only built-in or user-defined distributions on the right side. The left side of a distribution statement may be data, parameter, or a complex expression, but the evaluated type needs to match one of the allowed types of the distribution on the right (see more below).\nIn Stan, a distribution statement is merely a notational convenience following the typical notation used to present models in the literature. The above model defined with distribution statements could be expressed as a direct increment on the total log probability density as\ntarget += normal_lpdf(y | mu, sigma);\ntarget += normal_lpdf(mu | 0, 10);\ntarget += normal_lpdf(sigma | 0, 1);\nStan models can mix distribution statements and log probability increment statements. Although statistical models are usually defined with distributions in the literature, there are several scenarios in which we may want to code the log likelihood or parts of it directly, for example, due to computational efficiency (e.g. censored data model) or coding language limitations (e.g. mixture models in Stan). This is possible with log probability increment statements. See also the discussion below about Jacobians.\nIn general, a distribution statement of the form\ny ~ dist(theta1, ..., thetaN);\ninvolving subexpressions y and theta1 through thetaN (including the case where N is zero) will be well formed if and only if the corresponding log probability increment statement is well-formed. For densities allowing real y values, the log probability density function is used,\ntarget += dist_lpdf(y | theta1, ..., thetaN);\nFor those restricted to integer y values, the log probability mass function is used,\ntarget += dist_lpmf(y | theta1, ..., thetaN);\nThis will be well formed if and only if dist_lpdf(y | theta1, ..., thetaN) or dist_lpmf(y | theta1, ..., thetaN) is a well-formed expression of type real. User defined distributions can be defined in functions block by using function names ending with _lpdf.\n\n\nAlthough both lead to the same inference algorithm behavior in Stan, there is one critical difference between using the distribution statement, as in\ny ~ normal(mu, sigma);\nand explicitly incrementing the log probability function, as in\ntarget += normal_lpdf(y | mu, sigma);\nThe distribution statement drops all the terms in the log probability function that are constant, whereas the explicit call to normal_lpdf adds all of the terms in the definition of the log normal probability function, including all of the constant normalizing terms. Therefore, the explicit increment form can be used to recreate the exact log probability values for the model. Otherwise, the distribution statement form will be faster if any of the input expressions, y, mu, or sigma, involve only constants, data variables, and transformed data variables. See the section Built in distributions above discussing _lupdf and _lupmf functions that also drops all the constant terms.\n\n\n\nThe left-hand side of a distribution statement may be an arbitrary expression (of compatible type)“. For instance, it is legal syntactically to write\nparameters {\n real<lower=0> beta;\n}\n// ...\nmodel {\n log(beta) ~ normal(mu, sigma);\n}\nUnfortunately, this is not enough to properly model beta as having a lognormal distribution. Whenever a nonlinear transform is applied to a parameter, such as the logarithm function being applied to beta here, and then used on the left-hand side of a distribution statement or on the left of a vertical bar in a log pdf function, an adjustment must be made to account for the differential change in scale and ensure beta gets the correct distribution. The correction required is to add the log Jacobian of the transform to the target log density; see the change of variables section for full definitions. For the case above, the following adjustment will account for the log transform.3\ntarget += - log(abs(y));\n\n\n\nStan supports truncating distributions with lower bounds, upper bounds, or both.\n\n\nA probability density function \\(p(x)\\) for a continuous distribution may be truncated to an interval \\([a, b]\\) to define a new density \\(p_{[a, b]}(x)\\) with support \\([a, b]\\) by setting\n\\[\np_{[a, b]}(x)\n= \\frac{p(x)}\n {\\int_a^b p(u) \\, du}.\n\\]\nA probability mass function \\(p(x)\\) for a discrete distribution may be truncated to the closed interval \\([a, b]\\) by\n\\[\np_{[a, b]}(x) = \\frac{p(x)}\n {\\sum_{u = a}^b p(u)}.\n\\]\n\n\n\nA probability density function \\(p(x)\\) can be truncated to \\([a, \\infty]\\) by defining\n\\[\np_{[a, \\infty]}(x)\n= \\frac{p(x)}\n {\\int_a^{\\infty} p(u) \\, du}.\n\\]\nA probability mass function \\(p(x)\\) is truncated to \\([a, \\infty]\\) by defining\n\\[\np_{[a, \\infty]}(x) = \\frac{p(x)}\n {\\sum_{a <= u} p(u)}.\n\\]\n\n\n\nA probability density function \\(p(x)\\) can be truncated to \\([-\\infty, b]\\) by defining\n\\[\np_{[-\\infty, b]}(x)\n= \\frac{p(x)}\n {\\int_{-\\infty}^b p(u) \\, du}.\n\\]\nA probability mass function \\(p(x)\\) is truncated to \\([-\\infty, b]\\) by defining\n\\[\np_{[-\\infty,b]}(x) = \\frac{p(x)}\n {\\sum_{u <= b} p(u)}.\n\\]\n\n\n\nGiven a probability function \\(p_X(x)\\) for a random variable \\(X\\), its cumulative distribution function (cdf) \\(F_X(x)\\) is defined to be the probability that \\(X \\leq x\\),\n\\[\nF_X(x) = \\Pr[X \\leq x].\n\\]\nThe upper-case variable \\(X\\) is the random variable whereas the lower-case variable \\(x\\) is just an ordinary bound variable. For continuous random variables, the definition of the cdf works out to\n\\[\nF_X(x) \\ = \\ \\int_{-\\infty}^{x} p_X(u) \\, du,\n\\]\nFor discrete variables, the cdf is defined to include the upper bound given by the argument,\n\\[\nF_X(x) = \\sum_{u \\leq x} p_X(u).\n\\]\n\n\n\nThe complementary cumulative distribution function (ccdf) in both the continuous and discrete cases is given by\n\\[\nF^C_X(x)\n\\ = \\ \\Pr[X > x]\n\\ = \\ 1 - F_X(x).\n\\]\nUnlike the cdf, the ccdf is exclusive of the bound, hence the event \\(X > x\\) rather than the cdf’s event \\(X \\leq x\\).\nFor continuous distributions, the ccdf works out to\n\\[\nF^C_X(x)\n\\ = \\ 1 - \\int_{-\\infty}^x p_X(u) \\, du\n\\ = \\ \\int_x^{\\infty} p_X(u) \\, du.\n\\]\nThe lower boundary can be included in the integration bounds because it is a single point on a line and hence has no probability mass. For the discrete case, the lower bound must be excluded in the summation explicitly by summing over \\(u > x\\),\n\\[\nF^C_X(x)\n\\ = \\ 1 - \\sum_{u \\leq x} p_X(u)\n\\ = \\ \\sum_{u > x} p_X(u).\n\\]\nCumulative distribution functions provide the necessary integral calculations to define truncated distributions. For truncation with lower and upper bounds, the denominator is defined by \\[\n\\int_a^b p(u) \\, du = F_X(b) - F_X(a).\n\\] This allows truncated distributions to be defined as \\[\np_{[a,b]}(x) = \\frac{p_X(x)}\n {F_X(b) - F_X(a)}.\n\\]\nFor discrete distributions, a slightly more complicated form is required to explicitly insert the lower truncation point, which is otherwise excluded from \\(F_X(b) - F_X(a)\\),\n\\[\np_{[a,b]}(x) = \\frac{p_X(x)}\n {F_X(b) - F_X(a) + p_X(a)}.\n\\]\n\n\n\nStan allows probability functions to be truncated. For example, a truncated unit normal distributions restricted to \\([-0.5, 2.1]\\) can be coded with the following distribution statement.\ny ~ normal(0, 1) T[-0.5, 2.1];\nTruncated distributions are translated as an additional term in the accumulated log density function plus error checking to make sure the variate in the distribution statement is within the bounds of the truncation.\nIn general, the truncation bounds and parameters may be parameters or local variables.\nBecause the example above involves a continuous distribution, it behaves the same way as the following more verbose form.\ny ~ normal(0, 1);\nif (y < -0.5 || y > 2.1) {\n target += negative_infinity();\n} else {\n target += -log_diff_exp(normal_lcdf(2.1 | 0, 1),\n normal_lcdf(-0.5 | 0, 1));\n}\nBecause a Stan program defines a log density function, all calculations are on the log scale. The function normal_lcdf is the log of the cumulative normal distribution function and the function log_diff_exp(a, b) is a more arithmetically stable form of log(exp(a) - exp(b)).\nFor a discrete distribution, another term is necessary in the denominator to account for the excluded boundary. The truncated discrete distribution\ny ~ poisson(3.7) T[2, 10];\nbehaves in the same way as the following code.\ny ~ poisson(3.7);\nif (y < 2 || y > 10) {\n target += negative_infinity();\n} else {\n target += -log_sum_exp(poisson_lpmf(2 | 3.7),\n log_diff_exp(poisson_lcdf(10 | 3.7),\n poisson_lcdf(2 | 3.7)));\n}\nRecall that log_sum_exp(a, b) is just the arithmetically stable form of log(exp(a) + exp(b)).\n\n\n\nFor truncating with only a lower bound, the upper limit is left blank.\ny ~ normal(0, 1) T[-0.5, ];\nThis truncated distribution statement has the same behavior as the following code.\ny ~ normal(0, 1);\nif (y < -0.5) {\n target += negative_infinity();\n} else {\n target += -normal_lccdf(-0.5 | 0, 1);\n}\nThe normal_lccdf function is the normal complementary cumulative distribution function.\nAs with lower and upper truncation, the discrete case requires a more complicated denominator to add back in the probability mass for the lower bound. Thus\ny ~ poisson(3.7) T[2, ];\nbehaves the same way as\ny ~ poisson(3.7);\nif (y < 2) {\n target += negative_infinity();\n} else {\n target += -log_sum_exp(poisson_lpmf(2 | 3.7),\n poisson_lccdf(2 | 3.7));\n}\n\n\n\nTo truncate with only an upper bound, the lower bound is left blank. The upper truncated distribution statement\ny ~ normal(0, 1) T[ , 2.1];\nproduces the same result as the following code.\ntarget += normal_lpdf(y | 0, 1);\nif (y > 2.1) {\n target += negative_infinity();\n} else {\n target += -normal_lcdf(2.1 | 0, 1);\n}\nWith only an upper bound, the discrete case does not need a boundary adjustment. The upper-truncated distribution statement\ny ~ poisson(3.7) T[ , 10];\nbehaves the same way as the following code.\ny ~ poisson(3.7);\nif (y > 10) {\n target += negative_infinity();\n} else {\n target += -poisson_lcdf(10 | 3.7);\n}\n\n\n\nIn all cases, the truncation is only well formed if the appropriate log density or mass function and necessary log cumulative distribution functions are defined. Not every distribution built into Stan has log cdf and log ccdfs defined, nor will every user-defined distribution. The discrete probability function documentations describes the available discrete and continuous cumulative distribution functions; most univariate distributions have log cdf and log ccdf functions.\n\n\n\nFor continuous distributions, truncation points must be expressions of type int or real. For discrete distributions, truncation points must be expressions of type int.\n\n\n\nFor a truncated distribution statement, if the value sampled is not within the bounds specified by the truncation expression, the result is zero probability and the entire statement adds \\(-\\infty\\) to the total log probability, which in turn results in the sample being rejected.\n\n\n\nVectorization of distribution functions with truncation is available if the underlying distribution, lcdf, and lccdf functions meet the required signatures.\nThe equivalent code for a vectorized truncation depends on which of the variables are non-scalars (arrays, vectors, etc.):\n\nIf the variate y is the only non-scalar, the result is the same as described in the above sections, but the lcdf/lccdf calculation is multiplied by size(y).\nIf the other arguments to the distribution are non-scalars, then the vectorized version of the lcdf/lccdf is used. These functions return the sum of their terms, so no multiplication by the size is needed.\nThe exception to the above is when a non-variate is a vector and both a lower and upper bound are specified in the truncation. In this case, a for loop is generated over the elements of the non-scalar arguments. This is required since the log_diff_exp of two sums is not the same as the sum of the pairwise log_diff_exp operations.\n\nNote that while a lower-and-upper truncated distribution may generate a for-loop internally as part of translating the truncation statement, this is still preferable to manually constructing a loop, since the distribution function itself can still be evaluated in a vectorized manner.\n\n\n\n\n\nSuppose N is a variable of type int, y is a one-dimensional array of type array[] real, and mu and sigma are variables of type real. Furthermore, suppose that n has not been defined as a variable. Then the following is a well-formed for-loop statement.\nfor (n in 1:N) {\n y[n] ~ normal(mu, sigma);\n}\nThe loop variable is n, the loop bounds are the values in the range 1:N, and the body is the statement following the loop bounds.\n\n\nThe type of the loop variable is int. Unlike in C++ and similarly to R, this variable must not be declared explicitly.\nThe bounds in a for loop must be integers. Unlike in R, the loop is always interpreted as an upward counting loop. The range L:H will cause the loop to execute the loop with the loop variable taking on all integer values greater than or equal to L and less than or equal to H. For example, the loop for (n in 2:5) will cause the body of the for loop to be executed with n equal to 2, 3, 4, and 5, in order. The variable and bound for (n in 5:2) will not execute anything because there are no integers greater than or equal to 5 and less than or equal to 2.\nThe scope of the loop variable is limited to the body of the loop.\n\n\n\nUnlike in BUGS, Stan allows variables to be reassigned. For example, the variable theta in the following program is reassigned in each iteration of the loop.\nfor (n in 1:N) {\n theta = inv_logit(alpha + x[n] * beta);\n y[n] ~ bernoulli(theta);\n}\nSuch reassignment is not permitted in BUGS. In BUGS, for loops are declarative, defining plates in directed graphical model notation, which can be thought of as repeated substructures in the graphical model. Therefore, it is illegal in BUGS or JAGS to have a for loop that repeatedly reassigns a value to a variable.4\nIn Stan, assignments are executed in the order they are encountered. As a consequence, the following Stan program has a very different interpretation than the previous one.\nfor (n in 1:N) {\n y[n] ~ bernoulli(theta);\n theta = inv_logit(alpha + x[n] * beta);\n}\nIn this program, theta is assigned after it is used in the probability statement. This presupposes it was defined before the first loop iteration (otherwise behavior is undefined), and then each loop uses the assignment from the previous iteration.\nStan loops may be used to accumulate values. Thus it is possible to sum the values of an array directly using code such as the following.\ntotal = 0.0;\nfor (n in 1:N) {\n total = total + x[n];\n}\nAfter the for loop is executed, the variable total will hold the sum of the elements in the array x. This example was purely pedagogical; it is easier and more efficient to write\ntotal = sum(x);\nA variable inside (or outside) a loop may even be reassigned multiple times, as in the following legal code.\nfor (n in 1:100) {\n y += y * epsilon;\n epsilon = 0.5 * epsilon;\n y += y * epsilon;\n}\n\n\n\n\nA second form of for loops allows iteration over elements of containers. If ys is an expression denoting a container (vector, row vector, matrix, or array) with elements of type T, then the following is a well-formed foreach statement.\nfor (y in ys) {\n // ... do something with y ...\n}\nThe order in which elements of ys are visited is defined for container types as follows.\n\nvector, row_vector: elements visited in order, y is of type double\nmatrix: elements visited in column-major order, y is of type double\narray[] T: elements visited in order, y is of type T.\n\nConsequently, if ys is a two dimensional array array[,] real, y will be a one-dimensional array of real values (type array[] real). If ’ysis a matrix, thenywill be a real value (typereal`). To loop over all values of a two-dimensional array using foreach statements would require a doubly-nested loop,\narray[2, 3] real yss;\nfor (ys in yss) {\n for (y in ys) {\n // ... do something with y ...\n }\n}\nwhereas a matrix can be looped over in one foreach statement\nmatrix[2, 3] yss;\nfor (y in yss) {\n // ... do something with y...\n}\nIn both cases, the loop variable y is of type real. The elements of the matrix are visited in column-major order (e.g.,y[1, 1],y[2, 1],y[1, 2], ...,y[2, 3]), whereas the elements of the two-dimensional array are visited in row-major order (e.g.,y[1, 1],y[1, 2],y[1, 3],y[2, 1], ...,y[2, 3]`).\n\n\n\nStan supports full conditional statements using the same if-then-else syntax as C++. The general format is\nif (condition1)\n statement1\nelse if (condition2)\n statement2\n// ...\nelse if (conditionN-1)\n statementN-1\nelse\n statementN\nThere must be a single leading if clause, which may be followed by any number of else if clauses, all of which may be optionally followed by an else clause. Each condition must be an integer value, with non-zero values interpreted as true and the zero value as false.\nThe entire sequence of if-then-else clauses forms a single conditional statement for evaluation. The conditions are evaluated in order until one of the conditions evaluates to a non-zero value, at which point its corresponding statement is executed and the conditional statement finishes execution. If none of the conditions evaluate to a non-zero value and there is a final else clause, its statement is executed.\n\n\n\nStan supports standard while loops using the same syntax as C++. The general format is as follows.\nwhile (condition)\n body\nThe condition must be an integer expression and the body can be any statement (or sequence of statements in curly braces).\nEvaluation of a while loop starts by evaluating the condition. If the condition evaluates to a false (zero) value, the execution of the loop terminates and control moves to the position after the loop. If the loop’s condition evaluates to a true (non-zero) value, the body statement is executed, then the whole loop is executed again. Thus the loop is continually executed as long as the condition evaluates to a true value.\nThe rest of the body of a while loop may be skipped using a continue. The loop will be exited with a break statement. See the section on continue and break statements for more details.\n\n\n\nJust as parentheses may be used to group expressions, curly brackets may be used to group a sequence of zero or more statements into a statement block. At the beginning of each block, local variables may be declared that are scoped over the rest of the statements in the block.\n\n\nBlocks are often used to group a sequence of statements together to be used in the body of a for loop. Because the body of a for loop can be any statement, for loops with bodies consisting of a single statement can be written as follows.\nfor (n in 1:N) {\n y[n] ~ normal(mu, sigma);\n}\nTo put multiple statements inside the body of a for loop, a block is used, as in the following example.\nfor (n in 1:N) {\n lambda[n] ~ gamma(alpha, beta);\n y[n] ~ poisson(lambda[n]);\n}\nThe open curly bracket ({) is the first character of the block and the close curly bracket (}) is the last character.\nBecause whitespace is ignored in Stan, the following program will not compile.\nfor (n in 1:N)\n y[n] ~ normal(mu, sigma);\n z[n] ~ normal(mu, sigma); // ERROR!\nThe problem is that the body of the for loop is taken to be the statement directly following it, which is y[n] ~ normal(mu, sigma). This leaves the probability statement for z[n] hanging, as is clear from the following equivalent program.\nfor (n in 1:N) {\n y[n] ~ normal(mu, sigma);\n}\nz[n] ~ normal(mu, sigma); // ERROR!\nNeither of these programs will compile. If the loop variable n was defined before the for loop, the for-loop declaration will raise an error. If the loop variable n was not defined before the for loop, then the use of the expression z[n] will raise an error.\n\n\n\nA for loop has a statement as a body. It is often convenient in writing programs to be able to define a local variable that will be used temporarily and then forgotten. For instance, the for loop example of repeated assignment should use a local variable for maximum clarity and efficiency, as in the following example.\nfor (n in 1:N) {\n real theta;\n theta = inv_logit(alpha + x[n] * beta);\n y[n] ~ bernoulli(theta);\n}\nThe local variable theta is declared here inside the for loop. The scope of a local variable is just the block in which it is defined. Thus theta is available for use inside the for loop, but not outside of it. As in other situations, Stan does not allow variable hiding. So it is illegal to declare a local variable theta if the variable theta is already defined in the scope of the for loop. For instance, the following is not legal.\nfor (m in 1:M) {\n real theta;\n for (n in 1:N) {\n real theta; // ERROR!\n theta = inv_logit(alpha + x[m, n] * beta);\n y[m, n] ~ bernoulli(theta);\n// ...\nThe compiler will flag the second declaration of theta with a message that it is already defined.\n\n\n\nLocal variables may not have constraints on their declaration. The only types that may be used are listed in the types table under “local”.\n\n\n\nA block is itself a statement, so anywhere a sequence of statements is allowed, one or more of the statements may be a block. For instance, in a for loop, it is legal to have the following\nfor (m in 1:M) {\n {\n int n = 2 * m;\n sum += n;\n }\n for (n in 1:N) {\n sum += x[m, n];\n }\n}\nThe variable declaration int n; is the first element of an embedded block and so has scope within that block. The for loop defines its own local block implicitly over the statement following it in which the loop variable is defined. As far as Stan is concerned, these two uses of n are unrelated.\n\n\n\n\nThe one-token statements continue and break may be used within loops to alter control flow; continue causes the next iteration of the loop to run immediately, whereas break terminates the loop and causes execution to resume after the loop. Both control structures must appear in loops. Both break and continue scope to the most deeply nested loop, but pass through non-loop statements.\nAlthough these control statements may seem undesirable because of their goto-like behavior, their judicious use can greatly improve readability by reducing the level of nesting or eliminating bookkeeping inside loops.\n\n\nWhen a break statement is executed, the most deeply nested loop currently being executed is ended and execution picks up with the next statement after the loop. For example, consider the following program:\nwhile (1) {\n if (n < 0) {\n break;\n }\n foo(n);\n n = n - 1;\n}\nThe while~(1) loop is a “forever” loop, because 1 is the true value, so the test always succeeds. Within the loop, if the value of n is less than 0, the loop terminates, otherwise it executes foo(n) and then decrements n. The statement above does exactly the same thing as\nwhile (n >= 0) {\n foo(n);\n n = n - 1;\n}\nThis case is simply illustrative of the behavior; it is not a case where a break simplifies the loop.\n\n\n\nThe continue statement ends the current operation of the loop and returns to the condition at the top of the loop. Such loops are typically used to exclude some values from calculations. For example, we could use the following loop to sum the positive values in the array x,\nreal sum;\nsum = 0;\nfor (n in 1:size(x)) {\n if (x[n] <= 0) {\n continue;\n }\n sum += x[n];\n}\nWhen the continue statement is executed, control jumps back to the conditional part of the loop. With while and for loops, this causes control to return to the conditional of the loop. With for loops, this advances the loop variable, so the the above program will not go into an infinite loop when faced with an x[n] less than zero. Thus the above program could be rewritten with deeper nesting by reversing the conditional,\nreal sum;\nsum = 0;\nfor (n in 1:size(x)) {\n if (x[n] > 0) {\n sum += x[n];\n }\n}\nWhile the latter form may seem more readable in this simple case, the former has the main line of execution nested one level less deep. Instead, the conditional at the top finds cases to exclude and doesn’t require the same level of nesting for code that’s not excluded. When there are several such exclusion conditions, the break or continue versions tend to be much easier to read.\n\n\n\nIf there is a loop nested within a loop, a break or continue statement only breaks out of the inner loop. So\nwhile (cond1) {\n // ...\n while (cond2) {\n // ...\n if (cond3) {\n break;\n }\n // ...\n }\n // execution continues here after break\n // ...\n}\nIf the break is triggered by cond3 being true, execution will continue after the nested loop.\nAs with break statements, continue statements go back to the top of the most deeply nested loop in which the continue appears.\nAlthough break and continue must appear within loops, they may appear in nested statements within loops, such as within the conditionals shown above or within nested statements. The break and continue statements jump past any control structure other than while-loops and for-loops.\n\n\n\n\nStan provides print statements that can print literal strings and the values of expressions. Print statements accept any number of arguments. Consider the following for-each statement with a print statement in its body.\nfor (n in 1:N) { print(\"loop iteration: \", n); ... }\nThe print statement will execute every time the body of the loop does. Each time the loop body is executed, it will print the string “loop iteration:” (with the trailing space), followed by the value of the expression n, followed by a new line.\n\n\nThe text printed by a print statement varies based on its content. A literal (i.e., quoted) string in a print statement always prints exactly that string (without the quotes). Expressions in print statements result in the value of the expression being printed. But how the value of the expression is formatted will depend on its type.\nPrinting a simple real or int typed variable always prints the variable’s value.5\nFor array, vector, and matrix variables, the print format uses brackets. For example, a 3-vector will print as\n[1, 2, 3]\nand a \\(2 \\times 3\\)-matrix as\n[[1, 2, 3], [4, 5, 6]]\nComplex numbers print as pairs. For example, the pair of statements\ncomplex z = to_complex(1.2, -3.5);\nprint(z)\nwill print as (1.2,-3.5), with no space after the comma or within the parentheses.\nPrinting a more readable version of arrays or matrices can be done with loops. An example is the print statement in the following transformed data block.\ntransformed data {\n matrix[2, 2] u;\n u[1, 1] = 1.0; u[1, 2] = 4.0;\n u[2, 1] = 9.0; u[2, 2] = 16.0;\n for (n in 1:2) {\n print(\"u[\", n, \"] = \", u[n]);\n }\n}\nThis print statement executes twice, printing the following two lines of output.\nu[1] = [1, 4]\nu[2] = [9, 16]\n\n\n\nThe input type to a print function cannot be void. In particular, it can’t be the result of a user-defined void function. All other types are allowed as arguments to the print function.\n\n\n\nPrinting for a print statement happens every time it is executed. The transformed data block is executed once per chain, the transformed parameter and model blocks once per leapfrog step, and the generated quantities block once per iteration.\n\n\n\nString literals begin and end with a double quote character (\"). The characters between the double quote characters may be any byte sequence, with the exception of the double quote character.\nThe Stan interfaces preserve the byte sequences which they receive. The encoding of these byte sequences as characters and their rendering as glyphs will be handled by whatever display mechanism is being used to monitor Stan’s output (e.g., a terminal, a Jupyter notebook, RStudio, etc.). Stan does not enforce a character encoding for strings, and no attempt is made to validate the bytes as legal ASCII, UTF-8, etc.\n\n\n\nBecause Stan is an imperative language, print statements can be very useful for debugging. They can be used to display the values of variables or expressions at various points in the execution of a program. They are particularly useful for spotting problematic not-a-number of infinite values, both of which will be printed.\nIt is particularly useful to print the value of the target log density accumulator (through the target() function), as in the following example.\nvector[2] y;\ny[1] = 1;\nprint(\"log density before =\", target());\ny ~ normal(0,1); // bug! y[2] not defined\nprint(\"log density after =\", target());\nThe example has a bug in that y[2] is not defined before the vector y is used in the distribution statement. By printing the value of the log probability accumulator before and after each distribution statement, it’s possible to isolate where the log probability becomes ill-defined (i.e., becomes not-a-number).\nNote that print statements may not always be displayed immediately, but rather at the end of an operation (e.g., leapfrog step). As such, some issues such as infinite loops are difficult to debug effectively with this technique.\n\n\n\n\nThe Stan reject statement provides a mechanism to report errors or problematic values encountered during program execution and either halt processing or reject iterations.\nLike the print statement, the reject statement accepts any number of quoted string literals or Stan expressions as arguments.\nReject statements are typically embedded in a conditional statement in order to detect variables in illegal states. For example, the following code handles the case where a variable x’s value is negative.\nif (x < 0) {\n reject(\"x must not be negative; found x=\", x);\n}\n\n\nReject statements have the same behavior as exceptions thrown by built-in Stan functions. For example, the normal_lpdf function raises an exception if the input scale is not positive and finite. The effect of a reject statement depends on the program block in which the rejection occurs.\nIn all cases of rejection, the interface accessing the Stan program should print the arguments to the reject statement.\n\n\nRejections in user-defined functions are just passed to the calling function or program block. Reject statements can be used in functions to validate the function arguments, allowing user-defined functions to fully emulate built-in function behavior. It is better to find out earlier rather than later when there is a problem.\n\n\n\nRejections are fatal in the transformed data block. This is because if initialization fails there is no way to recover values, so the algorithm will not begin execution.\nReject statements placed in the transformed data block can be used to validate both the data and transformed data (if any). This allows more complicated constraints to be enforced that can be specified with Stan’s constrained variable declarations.\nFatal errors in other blocks may also be signaled by use of the fatal_error statement.\n\n\n\nRejections in the transformed parameters and model blocks are not in and of themselves instantly fatal. The result has the same effect as assigning a \\(-\\infty\\) log probability, which causes rejection of the current proposal in MCMC samplers and adjustment of search parameters in optimization.\nIf the log probability function results in a rejection every time it is called, the containing application (MCMC sampler or optimization) should diagnose this problem and terminate with an appropriate error message. To aid in diagnosing problems, the message for each reject statement will be printed as a result of executing it.\n\n\n\n\nRejection should be used for error handling, not defining arbitrary constraints. Consider the following errorful Stan program.\nparameters {\n real a;\n real<lower=a> b;\n real<lower=a, upper=b> theta;\n // ...\n}\nmodel {\n // **wrong** needs explicit truncation\n theta ~ normal(0, 1);\n // ...\n}\nThis program is wrong because its truncation bounds on theta depend on parameters, and thus need to be accounted for using an explicit truncation on the distribution. This is the right way to do it.\n theta ~ normal(0, 1) T[a, b];\nThe conceptual issue is that the prior does not integrate to one over the admissible parameter space; it integrates to one over all real numbers and integrates to something less than one over \\([a ,b]\\); in these simple univariate cases, we can overcome that with the T[ , ] notation, which essentially divides by whatever the prior integrates to over \\([a, b]\\).\nThis problem is exactly the same problem as you would get using reject statements to enforce complicated inequalities on multivariate functions. In this case, it is wrong to try to deal with truncation through constraints.\n if (theta < a || theta > b) {\n reject(\"theta not in (a, b)\");\n }\n // still **wrong**, needs T[a,b]\n theta ~ normal(0, 1);\nIn this case, the prior integrates to something less than one over the region of the parameter space where the complicated inequalities are satisfied. But we don’t generally know what value the prior integrates to, so we can’t increment the log probability function to compensate.\nEven if this adjustment to a proper probability model may seem minor in particular models where the amount of truncated posterior density is negligible or constant, we can’t sample from that truncated posterior efficiently. Programs need to use one-to-one mappings that guarantee the constraints are satisfied and only use reject statements to raise errors or help with debugging.\n\n\n\n\nThe Stan fatal_error statement provides a mechanism to report errors or problematic values encountered during program execution and uniformly halt processing.\nLike the print or reject statements, the fatal error statement accepts any number of quoted string literals or Stan expressions as arguments.\nThe fatal error may be used to signal an unrecoverable error in blocks where reject leads to the algorithm attempting to try again, such as the model block.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#statement-block-contexts", + "href": "reference-manual/statements.html#statement-block-contexts", + "title": "Statements", + "section": "", + "text": "The data and parameters blocks do not allow statements of any kind because these blocks are solely used to declare the data variables for input and the parameter variables for sampling. All other blocks allow statements. In these blocks, both variable declarations and statements are allowed. All top-level variables in a block are considered block variables. See the blocks chapter for more information about the block structure of Stan programs.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#assignment-statement.section", + "href": "reference-manual/statements.html#assignment-statement.section", + "title": "Statements", + "section": "", + "text": "An assignment statement consists of a variable (possibly multivariate with indexing information) and an expression. Executing an assignment statement evaluates the expression on the right-hand side and assigns it to the (indexed) variable on the left-hand side. An example of a simple assignment is as follows.\nn = 0;\nExecuting this statement assigns the value of the expression 0, which is the integer zero, to the variable n. For an assignment to be well formed, the type of the expression on the right-hand side should be compatible with the type of the (indexed) variable on the left-hand side. For the above example, because 0 is an expression of type int, the variable n must be declared as being of type int or of type real. If the variable is of type real, the integer zero is promoted to a floating-point zero and assigned to the variable. After the assignment statement executes, the variable n will have the value zero (either as an integer or a floating-point value, depending on its type).\nSyntactically, every assignment statement must be followed by a semicolon. Otherwise, whitespace between the tokens does not matter (the tokens here being the left-hand-side (indexed) variable, the assignment operator, the right-hand-side expression and the semicolon).\nBecause the right-hand side is evaluated first, it is possible to increment a variable in Stan just as in C++ and other programming languages by writing\nn = n + 1;\nSuch self assignments are not allowed in BUGS, because they induce a cycle into the directed graphical model.\nThe left-hand side of an assignment may contain indices for array, matrix, or vector data structures. For instance, if Sigma is of type matrix, then\nSigma[1, 1] = 1.0;\nsets the value in the first column of the first row of Sigma to one.\nAssignments to subcomponents of larger multi-variate data structures are supported by Stan. For example, a is an array of type array[,] real and b is an array of type array[] real, then the following two statements are both well-formed.\na[3] = b;\nb = a[4];\nSimilarly, if x is a variable declared to have type row_vector and Y is a variable declared as type matrix, then the following sequence of statements to swap the first two rows of Y is well formed.\nx = Y[1];\nY[1] = Y[2];\nY[2] = x;\n\n\nStan allows assignment of lower types to higher types, but not vice-versa. That is, we can assign an expression of type int to an lvalue of type real, and we can assign an expression of type real to an lvalue of type complex. Furthermore, promotion is transitive, so that we can assign an expression of type int to an lvalue of type complex.\nPromotion extends to containers, so that arrays of int can be promoted to arrays of real during assignment, and arrays of real can be assigned to an lvalue of type array of complex. Similarly, an expression of type vector may be assigned to an lvalue of type complex_vector, and similarly for row vectors and matrices.\n\n\n\nThe expressions that are legal left-hand sides of assignment statements are known as “lvalues.” In Stan, there are three kinds of legal lvalues,\n\na variable, or\na variable with one or more indices, or\na comma separated list of lvalues surrounded by ( and )\n\nTo be used as an lvalue, an indexed variable must have at least as many dimensions as the number of indices provided. An array of real or integer types has as many dimensions as it is declared for. A matrix has two dimensions and a vector or row vector one dimension; this also holds for the constrained types, covariance and correlation matrices and their Cholesky factors and ordered, positive ordered, and simplex vectors. An array of matrices has two more dimensions than the array and an array of vectors or row vectors has one more dimension than the array. Note that the number of indices can be less than the number of dimensions of the variable, meaning that the right hand side must itself be multidimensional to match the remaining dimensions.\n\n\n\nMultiple indexes, as described in the multi-indexing section, are also permitted on the left-hand side of assignments. Indexing on the left side works exactly as it does for expressions, with multiple indexes preserving index positions and single indexes reducing them. The type on the left side must still match the type on the right side.\n\n\nAll assignment is carried out as if the right-hand side is copied before the assignment. This resolves any potential aliasing issues arising from he right-hand side changing in the middle of an assignment statement’s execution.\n\n\n\n\nStan’s arithmetic operators may be used in compound arithmetic and assignment operations. For example, consider the following example of compound addition and assignment.\nreal x = 5;\nx += 7; // value of x is now 12\nThe compound arithmetic and assignment statement above is equivalent to the following long form.\nx = x + 7;\nIn general, the compound form\nx op= y\nwill be equivalent to\nx = x op y;\nThe compound statement will be legal whenever the long form is legal. This requires that the operation x op y must itself be well formed and that the result of the operation be assignable to x. For the expression x to be assignable, it must be an indexed variable where the variable is defined in the current block. For example, the following compound addition and assignment statement will increment a single element of a vector by two.\nvector[N] x;\nx[3] += 2;\nAs a further example, consider\nmatrix[M, M] x;\nvector[M] y;\nreal z;\nx *= x; // OK, (x * x) is a matrix\nx *= z; // OK, (x * z) is a matrix\nx *= y; // BAD, (x * y) is a vector\nThe supported compound arithmetic and assignment operations are listed in the compound arithmetic/assignment table; they are also listed in the index prefaced by operator, e.g., operator+=.\nCompound Arithmetic/Assignment Table. Stan allows compound arithmetic and assignment statements of the forms listed in the table. The compound form is legal whenever the corresponding long form would be legal and it has the same effect.\n\n\n\noperation\ncompound\nunfolded\n\n\n\n\naddition\nx += y\nx = x + y\n\n\nsubtraction\nx -= y\nx = x - y\n\n\nmultiplication\nx *= y\nx = x * y\n\n\ndivision\nx /= y\nx = x / y\n\n\nelementwise multiplication\nx .*= y\nx = x .* y\n\n\nelementwise division\nx ./= y\nx = x ./ y", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#increment-log-prob.section", + "href": "reference-manual/statements.html#increment-log-prob.section", + "title": "Statements", + "section": "", + "text": "The basis of Stan’s execution is the evaluation of a log probability function (specifically, a log probability density function) for a given set of (real-valued) parameters; this function returns the log density of the posterior up to an additive constant. Data and transformed data are fixed before the log density is evaluated. The total log probability is initialized to zero. Next, any log Jacobian adjustments accrued by the variable constraints are added to the log density (the Jacobian adjustment may be skipped for maximum likelihood estimation via optimization). Distribution statements and log probability increment statements may add to the log density in the model block. A log probability increment statement directly increments the log density with the value of an expression as follows.1\ntarget += -0.5 * y * y;\nThe keyword target here is actually not a variable, and may not be accessed as such (though see below on how to access the value of target through a special function).\nIn this example, the unnormalized log probability of a unit normal variable \\(y\\) is added to the total log probability. In the general case, the argument can be any expression.2\nAn entire Stan model can be implemented this way. For instance, the following model has a single variable according to a unit normal probability.\nparameters {\n real y;\n}\nmodel {\n target += -0.5 * y * y;\n}\nThis model defines a log probability function\n\\[\n\\log p(y) = - \\, \\frac{y^2}{2} - \\log Z\n\\]\nwhere \\(Z\\) is a normalizing constant that does not depend on \\(y\\). The constant \\(Z\\) is conventionally written this way because on the linear scale, \\[\np(y) = \\frac{1}{Z} \\exp\\left(-\\frac{y^2}{2}\\right).\n\\] which is typically written without reference to \\(Z\\) as \\[\np(y) \\propto \\exp\\left(-\\frac{y^2}{2}\\right).\n\\]\nStan only requires models to be defined up to a constant that does not depend on the parameters. This is convenient because often the normalizing constant \\(Z\\) is either time-consuming to compute or intractable to evaluate.\n\n\nThe built in distribution functions in Stan are all available in normalized and unnormalized form. The normalized forms include all of the terms in the log density, and the unnormalized forms drop terms which are not directly or indirectly a function of the model parameters.\nFor instance, the normal_lpdf function returns the log density of a normal distribution:\n\\[\n\\textsf{normal\\_lpdf}(x | \\mu, \\sigma) =\n-\\log \\left( \\sigma \\sqrt{2 \\pi} \\right)\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2\n\\]\nThe normal_lupdf function returns the log density of an unnormalized distribution. With the unnormalized version of the function, Stan does not define what the normalization constant will be, though usually as many terms as possible are dropped to make the calculation fast. Dropping a constant sigma term, normal_lupdf would be equivalent to:\n\\[\n\\textsf{normal\\_lupdf}(x | \\mu, \\sigma) =\n-\\frac{1}{2} \\left( \\frac{x - \\mu}{\\sigma} \\right)^2\n\\]\nAll functions ending in _lpdf have a corresponding _lupdf version which evaluates and returns the unnormalized density. The same is true for _lpmf and _lupmf.\n\n\n\nThe increment log density statement looks syntactically like compound addition and assignment (see the compound arithmetic/assignment section, it is treated as a primitive statement because target is not itself a variable. So, even though\ntarget += lp;\nis a legal statement, the corresponding long form is not legal.\ntarget = target + lp; // BAD, target is not a variable\n\n\n\nThe target += ... statement accepts an argument in place of ... for any expression type, including integers, reals, vectors, row vectors, matrices, and arrays of any dimensionality, including arrays of vectors and matrices. For container arguments, their sum will be added to the total log density.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#increment-log-density-with-a-change-of-variables-adjustment", + "href": "reference-manual/statements.html#increment-log-density-with-a-change-of-variables-adjustment", + "title": "Statements", + "section": "", + "text": "A variant of the target += statement described above is the jacobian += statement. This can be used in the transformed parameters block or in functions ending with _jacobian to mimic the log Jacobian adjustments accrued by built-in variable transforms.\nSimilarly to those implemented for the built-in transforms, these Jacobian adjustment may be skipped for maximum likelihood estimation via optimization.\nFor example, here is a program which recreates the existing <upper=x> transform on real numbers:\nfunctions {\n real my_upper_bound_jacobian(real x, real ub) {\n jacobian += x;\n return ub - exp(x);\n }\n}\ndata {\n real ub;\n}\nparameters {\n real b_raw;\n}\ntransformed parameters {\n real b = my_upper_bound_jacobian(b_raw, ub);\n}\nmodel {\n // use b as if it was declared `real<upper=ub> b;` in parameters\n // e.g.\n // b ~ lognormal(0, 1);\n}\n\n\nTo access the accumulated log density up to the current execution point, the function target() may be used.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#sampling-statements.section", + "href": "reference-manual/statements.html#sampling-statements.section", + "title": "Statements", + "section": "", + "text": "The term “sampling statement” has been replaced with distribution statement.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#distribution-statements.section", + "href": "reference-manual/statements.html#distribution-statements.section", + "title": "Statements", + "section": "", + "text": "Stan supports writing probability statements also using distribution statements, for example\ny ~ normal(mu, sigma);\nmu ~ normal(0, 10);\nsigma ~ normal(0, 1);\nThe symbol \\(\\sim\\) is called tilde. Due to historical reasons, the distribution statements used to be called “sampling statements” in Stan, but that term is not recommended anymore as it is a less accurate description.\nIn general, we can read \\(\\sim\\) as “is distributed as,” and overall this notation is used as a shorthand for defining distributions, so that the above example can be written also as \\[\n\\begin{aligned}\n p(y| \\mu, \\sigma) & = \\mathrm{normal}(y | \\mu, \\sigma)\\\\\n p(\\mu) & = \\mathrm{normal}(\\mu | 0, 10)\\\\\n p(\\sigma) & = \\mathrm{normal}^+(\\sigma | 0, 1).\n\\end{aligned}\n\\] A collection of distribution statements define a joint distribution as the product of component distributions \\[\np(y,\\mu,\\sigma) = p(y| \\mu, \\sigma )p(\\mu) p(\\sigma).\n\\]\nThis works even if the model is not constructed generatively. For example, suppose you include the following code in a Stan model:\n a ~ normal(0, 1);\n a ~ normal(0, 1);\nThis is translated to \\[\n p(a) = \\mathrm{normal}(a | 0, 1)\\mathrm{normal}(a | 0, 1),\n\\] which in this case is \\(\\mathrm{normal}(a|0,1/\\sqrt{2})\\). One might expect that the above two lines of code would represent a redundant expression of a \\(\\mathrm{normal}(a|0,1)\\) prior, but, no, each line of code corresponds to an additional term in the target, or log posterior density. You can think of each line as representing an additional piece of information.\nWhen the joint distribution is considered as a function of parameters (e.g. \\(\\mu\\), \\(\\sigma\\)) given fixed data, it is proportional to the posterior distribution. In general, the posterior distribution is not a normalized probability density function—that is, it will be positive but will not in general integrate to 1—but the proportionality is sufficient for the Stan algorithms.\nStan always constructs the target function—in Bayesian terms, the log posterior density function of the parameter vector—by adding terms in the model block. Equivalently, each \\(\\sim\\) statement corresponds to a multiplicative factor in the unnormalized posterior density.\nDistribution statements (~) accept only built-in or user-defined distributions on the right side. The left side of a distribution statement may be data, parameter, or a complex expression, but the evaluated type needs to match one of the allowed types of the distribution on the right (see more below).\nIn Stan, a distribution statement is merely a notational convenience following the typical notation used to present models in the literature. The above model defined with distribution statements could be expressed as a direct increment on the total log probability density as\ntarget += normal_lpdf(y | mu, sigma);\ntarget += normal_lpdf(mu | 0, 10);\ntarget += normal_lpdf(sigma | 0, 1);\nStan models can mix distribution statements and log probability increment statements. Although statistical models are usually defined with distributions in the literature, there are several scenarios in which we may want to code the log likelihood or parts of it directly, for example, due to computational efficiency (e.g. censored data model) or coding language limitations (e.g. mixture models in Stan). This is possible with log probability increment statements. See also the discussion below about Jacobians.\nIn general, a distribution statement of the form\ny ~ dist(theta1, ..., thetaN);\ninvolving subexpressions y and theta1 through thetaN (including the case where N is zero) will be well formed if and only if the corresponding log probability increment statement is well-formed. For densities allowing real y values, the log probability density function is used,\ntarget += dist_lpdf(y | theta1, ..., thetaN);\nFor those restricted to integer y values, the log probability mass function is used,\ntarget += dist_lpmf(y | theta1, ..., thetaN);\nThis will be well formed if and only if dist_lpdf(y | theta1, ..., thetaN) or dist_lpmf(y | theta1, ..., thetaN) is a well-formed expression of type real. User defined distributions can be defined in functions block by using function names ending with _lpdf.\n\n\nAlthough both lead to the same inference algorithm behavior in Stan, there is one critical difference between using the distribution statement, as in\ny ~ normal(mu, sigma);\nand explicitly incrementing the log probability function, as in\ntarget += normal_lpdf(y | mu, sigma);\nThe distribution statement drops all the terms in the log probability function that are constant, whereas the explicit call to normal_lpdf adds all of the terms in the definition of the log normal probability function, including all of the constant normalizing terms. Therefore, the explicit increment form can be used to recreate the exact log probability values for the model. Otherwise, the distribution statement form will be faster if any of the input expressions, y, mu, or sigma, involve only constants, data variables, and transformed data variables. See the section Built in distributions above discussing _lupdf and _lupmf functions that also drops all the constant terms.\n\n\n\nThe left-hand side of a distribution statement may be an arbitrary expression (of compatible type)“. For instance, it is legal syntactically to write\nparameters {\n real<lower=0> beta;\n}\n// ...\nmodel {\n log(beta) ~ normal(mu, sigma);\n}\nUnfortunately, this is not enough to properly model beta as having a lognormal distribution. Whenever a nonlinear transform is applied to a parameter, such as the logarithm function being applied to beta here, and then used on the left-hand side of a distribution statement or on the left of a vertical bar in a log pdf function, an adjustment must be made to account for the differential change in scale and ensure beta gets the correct distribution. The correction required is to add the log Jacobian of the transform to the target log density; see the change of variables section for full definitions. For the case above, the following adjustment will account for the log transform.3\ntarget += - log(abs(y));\n\n\n\nStan supports truncating distributions with lower bounds, upper bounds, or both.\n\n\nA probability density function \\(p(x)\\) for a continuous distribution may be truncated to an interval \\([a, b]\\) to define a new density \\(p_{[a, b]}(x)\\) with support \\([a, b]\\) by setting\n\\[\np_{[a, b]}(x)\n= \\frac{p(x)}\n {\\int_a^b p(u) \\, du}.\n\\]\nA probability mass function \\(p(x)\\) for a discrete distribution may be truncated to the closed interval \\([a, b]\\) by\n\\[\np_{[a, b]}(x) = \\frac{p(x)}\n {\\sum_{u = a}^b p(u)}.\n\\]\n\n\n\nA probability density function \\(p(x)\\) can be truncated to \\([a, \\infty]\\) by defining\n\\[\np_{[a, \\infty]}(x)\n= \\frac{p(x)}\n {\\int_a^{\\infty} p(u) \\, du}.\n\\]\nA probability mass function \\(p(x)\\) is truncated to \\([a, \\infty]\\) by defining\n\\[\np_{[a, \\infty]}(x) = \\frac{p(x)}\n {\\sum_{a <= u} p(u)}.\n\\]\n\n\n\nA probability density function \\(p(x)\\) can be truncated to \\([-\\infty, b]\\) by defining\n\\[\np_{[-\\infty, b]}(x)\n= \\frac{p(x)}\n {\\int_{-\\infty}^b p(u) \\, du}.\n\\]\nA probability mass function \\(p(x)\\) is truncated to \\([-\\infty, b]\\) by defining\n\\[\np_{[-\\infty,b]}(x) = \\frac{p(x)}\n {\\sum_{u <= b} p(u)}.\n\\]\n\n\n\nGiven a probability function \\(p_X(x)\\) for a random variable \\(X\\), its cumulative distribution function (cdf) \\(F_X(x)\\) is defined to be the probability that \\(X \\leq x\\),\n\\[\nF_X(x) = \\Pr[X \\leq x].\n\\]\nThe upper-case variable \\(X\\) is the random variable whereas the lower-case variable \\(x\\) is just an ordinary bound variable. For continuous random variables, the definition of the cdf works out to\n\\[\nF_X(x) \\ = \\ \\int_{-\\infty}^{x} p_X(u) \\, du,\n\\]\nFor discrete variables, the cdf is defined to include the upper bound given by the argument,\n\\[\nF_X(x) = \\sum_{u \\leq x} p_X(u).\n\\]\n\n\n\nThe complementary cumulative distribution function (ccdf) in both the continuous and discrete cases is given by\n\\[\nF^C_X(x)\n\\ = \\ \\Pr[X > x]\n\\ = \\ 1 - F_X(x).\n\\]\nUnlike the cdf, the ccdf is exclusive of the bound, hence the event \\(X > x\\) rather than the cdf’s event \\(X \\leq x\\).\nFor continuous distributions, the ccdf works out to\n\\[\nF^C_X(x)\n\\ = \\ 1 - \\int_{-\\infty}^x p_X(u) \\, du\n\\ = \\ \\int_x^{\\infty} p_X(u) \\, du.\n\\]\nThe lower boundary can be included in the integration bounds because it is a single point on a line and hence has no probability mass. For the discrete case, the lower bound must be excluded in the summation explicitly by summing over \\(u > x\\),\n\\[\nF^C_X(x)\n\\ = \\ 1 - \\sum_{u \\leq x} p_X(u)\n\\ = \\ \\sum_{u > x} p_X(u).\n\\]\nCumulative distribution functions provide the necessary integral calculations to define truncated distributions. For truncation with lower and upper bounds, the denominator is defined by \\[\n\\int_a^b p(u) \\, du = F_X(b) - F_X(a).\n\\] This allows truncated distributions to be defined as \\[\np_{[a,b]}(x) = \\frac{p_X(x)}\n {F_X(b) - F_X(a)}.\n\\]\nFor discrete distributions, a slightly more complicated form is required to explicitly insert the lower truncation point, which is otherwise excluded from \\(F_X(b) - F_X(a)\\),\n\\[\np_{[a,b]}(x) = \\frac{p_X(x)}\n {F_X(b) - F_X(a) + p_X(a)}.\n\\]\n\n\n\nStan allows probability functions to be truncated. For example, a truncated unit normal distributions restricted to \\([-0.5, 2.1]\\) can be coded with the following distribution statement.\ny ~ normal(0, 1) T[-0.5, 2.1];\nTruncated distributions are translated as an additional term in the accumulated log density function plus error checking to make sure the variate in the distribution statement is within the bounds of the truncation.\nIn general, the truncation bounds and parameters may be parameters or local variables.\nBecause the example above involves a continuous distribution, it behaves the same way as the following more verbose form.\ny ~ normal(0, 1);\nif (y < -0.5 || y > 2.1) {\n target += negative_infinity();\n} else {\n target += -log_diff_exp(normal_lcdf(2.1 | 0, 1),\n normal_lcdf(-0.5 | 0, 1));\n}\nBecause a Stan program defines a log density function, all calculations are on the log scale. The function normal_lcdf is the log of the cumulative normal distribution function and the function log_diff_exp(a, b) is a more arithmetically stable form of log(exp(a) - exp(b)).\nFor a discrete distribution, another term is necessary in the denominator to account for the excluded boundary. The truncated discrete distribution\ny ~ poisson(3.7) T[2, 10];\nbehaves in the same way as the following code.\ny ~ poisson(3.7);\nif (y < 2 || y > 10) {\n target += negative_infinity();\n} else {\n target += -log_sum_exp(poisson_lpmf(2 | 3.7),\n log_diff_exp(poisson_lcdf(10 | 3.7),\n poisson_lcdf(2 | 3.7)));\n}\nRecall that log_sum_exp(a, b) is just the arithmetically stable form of log(exp(a) + exp(b)).\n\n\n\nFor truncating with only a lower bound, the upper limit is left blank.\ny ~ normal(0, 1) T[-0.5, ];\nThis truncated distribution statement has the same behavior as the following code.\ny ~ normal(0, 1);\nif (y < -0.5) {\n target += negative_infinity();\n} else {\n target += -normal_lccdf(-0.5 | 0, 1);\n}\nThe normal_lccdf function is the normal complementary cumulative distribution function.\nAs with lower and upper truncation, the discrete case requires a more complicated denominator to add back in the probability mass for the lower bound. Thus\ny ~ poisson(3.7) T[2, ];\nbehaves the same way as\ny ~ poisson(3.7);\nif (y < 2) {\n target += negative_infinity();\n} else {\n target += -log_sum_exp(poisson_lpmf(2 | 3.7),\n poisson_lccdf(2 | 3.7));\n}\n\n\n\nTo truncate with only an upper bound, the lower bound is left blank. The upper truncated distribution statement\ny ~ normal(0, 1) T[ , 2.1];\nproduces the same result as the following code.\ntarget += normal_lpdf(y | 0, 1);\nif (y > 2.1) {\n target += negative_infinity();\n} else {\n target += -normal_lcdf(2.1 | 0, 1);\n}\nWith only an upper bound, the discrete case does not need a boundary adjustment. The upper-truncated distribution statement\ny ~ poisson(3.7) T[ , 10];\nbehaves the same way as the following code.\ny ~ poisson(3.7);\nif (y > 10) {\n target += negative_infinity();\n} else {\n target += -poisson_lcdf(10 | 3.7);\n}\n\n\n\nIn all cases, the truncation is only well formed if the appropriate log density or mass function and necessary log cumulative distribution functions are defined. Not every distribution built into Stan has log cdf and log ccdfs defined, nor will every user-defined distribution. The discrete probability function documentations describes the available discrete and continuous cumulative distribution functions; most univariate distributions have log cdf and log ccdf functions.\n\n\n\nFor continuous distributions, truncation points must be expressions of type int or real. For discrete distributions, truncation points must be expressions of type int.\n\n\n\nFor a truncated distribution statement, if the value sampled is not within the bounds specified by the truncation expression, the result is zero probability and the entire statement adds \\(-\\infty\\) to the total log probability, which in turn results in the sample being rejected.\n\n\n\nVectorization of distribution functions with truncation is available if the underlying distribution, lcdf, and lccdf functions meet the required signatures.\nThe equivalent code for a vectorized truncation depends on which of the variables are non-scalars (arrays, vectors, etc.):\n\nIf the variate y is the only non-scalar, the result is the same as described in the above sections, but the lcdf/lccdf calculation is multiplied by size(y).\nIf the other arguments to the distribution are non-scalars, then the vectorized version of the lcdf/lccdf is used. These functions return the sum of their terms, so no multiplication by the size is needed.\nThe exception to the above is when a non-variate is a vector and both a lower and upper bound are specified in the truncation. In this case, a for loop is generated over the elements of the non-scalar arguments. This is required since the log_diff_exp of two sums is not the same as the sum of the pairwise log_diff_exp operations.\n\nNote that while a lower-and-upper truncated distribution may generate a for-loop internally as part of translating the truncation statement, this is still preferable to manually constructing a loop, since the distribution function itself can still be evaluated in a vectorized manner.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#for-loops", + "href": "reference-manual/statements.html#for-loops", + "title": "Statements", + "section": "", + "text": "Suppose N is a variable of type int, y is a one-dimensional array of type array[] real, and mu and sigma are variables of type real. Furthermore, suppose that n has not been defined as a variable. Then the following is a well-formed for-loop statement.\nfor (n in 1:N) {\n y[n] ~ normal(mu, sigma);\n}\nThe loop variable is n, the loop bounds are the values in the range 1:N, and the body is the statement following the loop bounds.\n\n\nThe type of the loop variable is int. Unlike in C++ and similarly to R, this variable must not be declared explicitly.\nThe bounds in a for loop must be integers. Unlike in R, the loop is always interpreted as an upward counting loop. The range L:H will cause the loop to execute the loop with the loop variable taking on all integer values greater than or equal to L and less than or equal to H. For example, the loop for (n in 2:5) will cause the body of the for loop to be executed with n equal to 2, 3, 4, and 5, in order. The variable and bound for (n in 5:2) will not execute anything because there are no integers greater than or equal to 5 and less than or equal to 2.\nThe scope of the loop variable is limited to the body of the loop.\n\n\n\nUnlike in BUGS, Stan allows variables to be reassigned. For example, the variable theta in the following program is reassigned in each iteration of the loop.\nfor (n in 1:N) {\n theta = inv_logit(alpha + x[n] * beta);\n y[n] ~ bernoulli(theta);\n}\nSuch reassignment is not permitted in BUGS. In BUGS, for loops are declarative, defining plates in directed graphical model notation, which can be thought of as repeated substructures in the graphical model. Therefore, it is illegal in BUGS or JAGS to have a for loop that repeatedly reassigns a value to a variable.4\nIn Stan, assignments are executed in the order they are encountered. As a consequence, the following Stan program has a very different interpretation than the previous one.\nfor (n in 1:N) {\n y[n] ~ bernoulli(theta);\n theta = inv_logit(alpha + x[n] * beta);\n}\nIn this program, theta is assigned after it is used in the probability statement. This presupposes it was defined before the first loop iteration (otherwise behavior is undefined), and then each loop uses the assignment from the previous iteration.\nStan loops may be used to accumulate values. Thus it is possible to sum the values of an array directly using code such as the following.\ntotal = 0.0;\nfor (n in 1:N) {\n total = total + x[n];\n}\nAfter the for loop is executed, the variable total will hold the sum of the elements in the array x. This example was purely pedagogical; it is easier and more efficient to write\ntotal = sum(x);\nA variable inside (or outside) a loop may even be reassigned multiple times, as in the following legal code.\nfor (n in 1:100) {\n y += y * epsilon;\n epsilon = 0.5 * epsilon;\n y += y * epsilon;\n}", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#foreach-loops", + "href": "reference-manual/statements.html#foreach-loops", + "title": "Statements", + "section": "", + "text": "A second form of for loops allows iteration over elements of containers. If ys is an expression denoting a container (vector, row vector, matrix, or array) with elements of type T, then the following is a well-formed foreach statement.\nfor (y in ys) {\n // ... do something with y ...\n}\nThe order in which elements of ys are visited is defined for container types as follows.\n\nvector, row_vector: elements visited in order, y is of type double\nmatrix: elements visited in column-major order, y is of type double\narray[] T: elements visited in order, y is of type T.\n\nConsequently, if ys is a two dimensional array array[,] real, y will be a one-dimensional array of real values (type array[] real). If ’ysis a matrix, thenywill be a real value (typereal`). To loop over all values of a two-dimensional array using foreach statements would require a doubly-nested loop,\narray[2, 3] real yss;\nfor (ys in yss) {\n for (y in ys) {\n // ... do something with y ...\n }\n}\nwhereas a matrix can be looped over in one foreach statement\nmatrix[2, 3] yss;\nfor (y in yss) {\n // ... do something with y...\n}\nIn both cases, the loop variable y is of type real. The elements of the matrix are visited in column-major order (e.g.,y[1, 1],y[2, 1],y[1, 2], ...,y[2, 3]), whereas the elements of the two-dimensional array are visited in row-major order (e.g.,y[1, 1],y[1, 2],y[1, 3],y[2, 1], ...,y[2, 3]`).", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#conditional-statements", + "href": "reference-manual/statements.html#conditional-statements", + "title": "Statements", + "section": "", + "text": "Stan supports full conditional statements using the same if-then-else syntax as C++. The general format is\nif (condition1)\n statement1\nelse if (condition2)\n statement2\n// ...\nelse if (conditionN-1)\n statementN-1\nelse\n statementN\nThere must be a single leading if clause, which may be followed by any number of else if clauses, all of which may be optionally followed by an else clause. Each condition must be an integer value, with non-zero values interpreted as true and the zero value as false.\nThe entire sequence of if-then-else clauses forms a single conditional statement for evaluation. The conditions are evaluated in order until one of the conditions evaluates to a non-zero value, at which point its corresponding statement is executed and the conditional statement finishes execution. If none of the conditions evaluate to a non-zero value and there is a final else clause, its statement is executed.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#while-statements", + "href": "reference-manual/statements.html#while-statements", + "title": "Statements", + "section": "", + "text": "Stan supports standard while loops using the same syntax as C++. The general format is as follows.\nwhile (condition)\n body\nThe condition must be an integer expression and the body can be any statement (or sequence of statements in curly braces).\nEvaluation of a while loop starts by evaluating the condition. If the condition evaluates to a false (zero) value, the execution of the loop terminates and control moves to the position after the loop. If the loop’s condition evaluates to a true (non-zero) value, the body statement is executed, then the whole loop is executed again. Thus the loop is continually executed as long as the condition evaluates to a true value.\nThe rest of the body of a while loop may be skipped using a continue. The loop will be exited with a break statement. See the section on continue and break statements for more details.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#statement-blocks-and-local-variable-declarations", + "href": "reference-manual/statements.html#statement-blocks-and-local-variable-declarations", + "title": "Statements", + "section": "", + "text": "Just as parentheses may be used to group expressions, curly brackets may be used to group a sequence of zero or more statements into a statement block. At the beginning of each block, local variables may be declared that are scoped over the rest of the statements in the block.\n\n\nBlocks are often used to group a sequence of statements together to be used in the body of a for loop. Because the body of a for loop can be any statement, for loops with bodies consisting of a single statement can be written as follows.\nfor (n in 1:N) {\n y[n] ~ normal(mu, sigma);\n}\nTo put multiple statements inside the body of a for loop, a block is used, as in the following example.\nfor (n in 1:N) {\n lambda[n] ~ gamma(alpha, beta);\n y[n] ~ poisson(lambda[n]);\n}\nThe open curly bracket ({) is the first character of the block and the close curly bracket (}) is the last character.\nBecause whitespace is ignored in Stan, the following program will not compile.\nfor (n in 1:N)\n y[n] ~ normal(mu, sigma);\n z[n] ~ normal(mu, sigma); // ERROR!\nThe problem is that the body of the for loop is taken to be the statement directly following it, which is y[n] ~ normal(mu, sigma). This leaves the probability statement for z[n] hanging, as is clear from the following equivalent program.\nfor (n in 1:N) {\n y[n] ~ normal(mu, sigma);\n}\nz[n] ~ normal(mu, sigma); // ERROR!\nNeither of these programs will compile. If the loop variable n was defined before the for loop, the for-loop declaration will raise an error. If the loop variable n was not defined before the for loop, then the use of the expression z[n] will raise an error.\n\n\n\nA for loop has a statement as a body. It is often convenient in writing programs to be able to define a local variable that will be used temporarily and then forgotten. For instance, the for loop example of repeated assignment should use a local variable for maximum clarity and efficiency, as in the following example.\nfor (n in 1:N) {\n real theta;\n theta = inv_logit(alpha + x[n] * beta);\n y[n] ~ bernoulli(theta);\n}\nThe local variable theta is declared here inside the for loop. The scope of a local variable is just the block in which it is defined. Thus theta is available for use inside the for loop, but not outside of it. As in other situations, Stan does not allow variable hiding. So it is illegal to declare a local variable theta if the variable theta is already defined in the scope of the for loop. For instance, the following is not legal.\nfor (m in 1:M) {\n real theta;\n for (n in 1:N) {\n real theta; // ERROR!\n theta = inv_logit(alpha + x[m, n] * beta);\n y[m, n] ~ bernoulli(theta);\n// ...\nThe compiler will flag the second declaration of theta with a message that it is already defined.\n\n\n\nLocal variables may not have constraints on their declaration. The only types that may be used are listed in the types table under “local”.\n\n\n\nA block is itself a statement, so anywhere a sequence of statements is allowed, one or more of the statements may be a block. For instance, in a for loop, it is legal to have the following\nfor (m in 1:M) {\n {\n int n = 2 * m;\n sum += n;\n }\n for (n in 1:N) {\n sum += x[m, n];\n }\n}\nThe variable declaration int n; is the first element of an embedded block and so has scope within that block. The for loop defines its own local block implicitly over the statement following it in which the loop variable is defined. As far as Stan is concerned, these two uses of n are unrelated.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#break-continue-statements", + "href": "reference-manual/statements.html#break-continue-statements", + "title": "Statements", + "section": "", + "text": "The one-token statements continue and break may be used within loops to alter control flow; continue causes the next iteration of the loop to run immediately, whereas break terminates the loop and causes execution to resume after the loop. Both control structures must appear in loops. Both break and continue scope to the most deeply nested loop, but pass through non-loop statements.\nAlthough these control statements may seem undesirable because of their goto-like behavior, their judicious use can greatly improve readability by reducing the level of nesting or eliminating bookkeeping inside loops.\n\n\nWhen a break statement is executed, the most deeply nested loop currently being executed is ended and execution picks up with the next statement after the loop. For example, consider the following program:\nwhile (1) {\n if (n < 0) {\n break;\n }\n foo(n);\n n = n - 1;\n}\nThe while~(1) loop is a “forever” loop, because 1 is the true value, so the test always succeeds. Within the loop, if the value of n is less than 0, the loop terminates, otherwise it executes foo(n) and then decrements n. The statement above does exactly the same thing as\nwhile (n >= 0) {\n foo(n);\n n = n - 1;\n}\nThis case is simply illustrative of the behavior; it is not a case where a break simplifies the loop.\n\n\n\nThe continue statement ends the current operation of the loop and returns to the condition at the top of the loop. Such loops are typically used to exclude some values from calculations. For example, we could use the following loop to sum the positive values in the array x,\nreal sum;\nsum = 0;\nfor (n in 1:size(x)) {\n if (x[n] <= 0) {\n continue;\n }\n sum += x[n];\n}\nWhen the continue statement is executed, control jumps back to the conditional part of the loop. With while and for loops, this causes control to return to the conditional of the loop. With for loops, this advances the loop variable, so the the above program will not go into an infinite loop when faced with an x[n] less than zero. Thus the above program could be rewritten with deeper nesting by reversing the conditional,\nreal sum;\nsum = 0;\nfor (n in 1:size(x)) {\n if (x[n] > 0) {\n sum += x[n];\n }\n}\nWhile the latter form may seem more readable in this simple case, the former has the main line of execution nested one level less deep. Instead, the conditional at the top finds cases to exclude and doesn’t require the same level of nesting for code that’s not excluded. When there are several such exclusion conditions, the break or continue versions tend to be much easier to read.\n\n\n\nIf there is a loop nested within a loop, a break or continue statement only breaks out of the inner loop. So\nwhile (cond1) {\n // ...\n while (cond2) {\n // ...\n if (cond3) {\n break;\n }\n // ...\n }\n // execution continues here after break\n // ...\n}\nIf the break is triggered by cond3 being true, execution will continue after the nested loop.\nAs with break statements, continue statements go back to the top of the most deeply nested loop in which the continue appears.\nAlthough break and continue must appear within loops, they may appear in nested statements within loops, such as within the conditionals shown above or within nested statements. The break and continue statements jump past any control structure other than while-loops and for-loops.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#print-statements.section", + "href": "reference-manual/statements.html#print-statements.section", + "title": "Statements", + "section": "", + "text": "Stan provides print statements that can print literal strings and the values of expressions. Print statements accept any number of arguments. Consider the following for-each statement with a print statement in its body.\nfor (n in 1:N) { print(\"loop iteration: \", n); ... }\nThe print statement will execute every time the body of the loop does. Each time the loop body is executed, it will print the string “loop iteration:” (with the trailing space), followed by the value of the expression n, followed by a new line.\n\n\nThe text printed by a print statement varies based on its content. A literal (i.e., quoted) string in a print statement always prints exactly that string (without the quotes). Expressions in print statements result in the value of the expression being printed. But how the value of the expression is formatted will depend on its type.\nPrinting a simple real or int typed variable always prints the variable’s value.5\nFor array, vector, and matrix variables, the print format uses brackets. For example, a 3-vector will print as\n[1, 2, 3]\nand a \\(2 \\times 3\\)-matrix as\n[[1, 2, 3], [4, 5, 6]]\nComplex numbers print as pairs. For example, the pair of statements\ncomplex z = to_complex(1.2, -3.5);\nprint(z)\nwill print as (1.2,-3.5), with no space after the comma or within the parentheses.\nPrinting a more readable version of arrays or matrices can be done with loops. An example is the print statement in the following transformed data block.\ntransformed data {\n matrix[2, 2] u;\n u[1, 1] = 1.0; u[1, 2] = 4.0;\n u[2, 1] = 9.0; u[2, 2] = 16.0;\n for (n in 1:2) {\n print(\"u[\", n, \"] = \", u[n]);\n }\n}\nThis print statement executes twice, printing the following two lines of output.\nu[1] = [1, 4]\nu[2] = [9, 16]\n\n\n\nThe input type to a print function cannot be void. In particular, it can’t be the result of a user-defined void function. All other types are allowed as arguments to the print function.\n\n\n\nPrinting for a print statement happens every time it is executed. The transformed data block is executed once per chain, the transformed parameter and model blocks once per leapfrog step, and the generated quantities block once per iteration.\n\n\n\nString literals begin and end with a double quote character (\"). The characters between the double quote characters may be any byte sequence, with the exception of the double quote character.\nThe Stan interfaces preserve the byte sequences which they receive. The encoding of these byte sequences as characters and their rendering as glyphs will be handled by whatever display mechanism is being used to monitor Stan’s output (e.g., a terminal, a Jupyter notebook, RStudio, etc.). Stan does not enforce a character encoding for strings, and no attempt is made to validate the bytes as legal ASCII, UTF-8, etc.\n\n\n\nBecause Stan is an imperative language, print statements can be very useful for debugging. They can be used to display the values of variables or expressions at various points in the execution of a program. They are particularly useful for spotting problematic not-a-number of infinite values, both of which will be printed.\nIt is particularly useful to print the value of the target log density accumulator (through the target() function), as in the following example.\nvector[2] y;\ny[1] = 1;\nprint(\"log density before =\", target());\ny ~ normal(0,1); // bug! y[2] not defined\nprint(\"log density after =\", target());\nThe example has a bug in that y[2] is not defined before the vector y is used in the distribution statement. By printing the value of the log probability accumulator before and after each distribution statement, it’s possible to isolate where the log probability becomes ill-defined (i.e., becomes not-a-number).\nNote that print statements may not always be displayed immediately, but rather at the end of an operation (e.g., leapfrog step). As such, some issues such as infinite loops are difficult to debug effectively with this technique.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#reject-statements.section", + "href": "reference-manual/statements.html#reject-statements.section", + "title": "Statements", + "section": "", + "text": "The Stan reject statement provides a mechanism to report errors or problematic values encountered during program execution and either halt processing or reject iterations.\nLike the print statement, the reject statement accepts any number of quoted string literals or Stan expressions as arguments.\nReject statements are typically embedded in a conditional statement in order to detect variables in illegal states. For example, the following code handles the case where a variable x’s value is negative.\nif (x < 0) {\n reject(\"x must not be negative; found x=\", x);\n}\n\n\nReject statements have the same behavior as exceptions thrown by built-in Stan functions. For example, the normal_lpdf function raises an exception if the input scale is not positive and finite. The effect of a reject statement depends on the program block in which the rejection occurs.\nIn all cases of rejection, the interface accessing the Stan program should print the arguments to the reject statement.\n\n\nRejections in user-defined functions are just passed to the calling function or program block. Reject statements can be used in functions to validate the function arguments, allowing user-defined functions to fully emulate built-in function behavior. It is better to find out earlier rather than later when there is a problem.\n\n\n\nRejections are fatal in the transformed data block. This is because if initialization fails there is no way to recover values, so the algorithm will not begin execution.\nReject statements placed in the transformed data block can be used to validate both the data and transformed data (if any). This allows more complicated constraints to be enforced that can be specified with Stan’s constrained variable declarations.\nFatal errors in other blocks may also be signaled by use of the fatal_error statement.\n\n\n\nRejections in the transformed parameters and model blocks are not in and of themselves instantly fatal. The result has the same effect as assigning a \\(-\\infty\\) log probability, which causes rejection of the current proposal in MCMC samplers and adjustment of search parameters in optimization.\nIf the log probability function results in a rejection every time it is called, the containing application (MCMC sampler or optimization) should diagnose this problem and terminate with an appropriate error message. To aid in diagnosing problems, the message for each reject statement will be printed as a result of executing it.\n\n\n\n\nRejection should be used for error handling, not defining arbitrary constraints. Consider the following errorful Stan program.\nparameters {\n real a;\n real<lower=a> b;\n real<lower=a, upper=b> theta;\n // ...\n}\nmodel {\n // **wrong** needs explicit truncation\n theta ~ normal(0, 1);\n // ...\n}\nThis program is wrong because its truncation bounds on theta depend on parameters, and thus need to be accounted for using an explicit truncation on the distribution. This is the right way to do it.\n theta ~ normal(0, 1) T[a, b];\nThe conceptual issue is that the prior does not integrate to one over the admissible parameter space; it integrates to one over all real numbers and integrates to something less than one over \\([a ,b]\\); in these simple univariate cases, we can overcome that with the T[ , ] notation, which essentially divides by whatever the prior integrates to over \\([a, b]\\).\nThis problem is exactly the same problem as you would get using reject statements to enforce complicated inequalities on multivariate functions. In this case, it is wrong to try to deal with truncation through constraints.\n if (theta < a || theta > b) {\n reject(\"theta not in (a, b)\");\n }\n // still **wrong**, needs T[a,b]\n theta ~ normal(0, 1);\nIn this case, the prior integrates to something less than one over the region of the parameter space where the complicated inequalities are satisfied. But we don’t generally know what value the prior integrates to, so we can’t increment the log probability function to compensate.\nEven if this adjustment to a proper probability model may seem minor in particular models where the amount of truncated posterior density is negligible or constant, we can’t sample from that truncated posterior efficiently. Programs need to use one-to-one mappings that guarantee the constraints are satisfied and only use reject statements to raise errors or help with debugging.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#fatal-error-statements", + "href": "reference-manual/statements.html#fatal-error-statements", + "title": "Statements", + "section": "", + "text": "The Stan fatal_error statement provides a mechanism to report errors or problematic values encountered during program execution and uniformly halt processing.\nLike the print or reject statements, the fatal error statement accepts any number of quoted string literals or Stan expressions as arguments.\nThe fatal error may be used to signal an unrecoverable error in blocks where reject leads to the algorithm attempting to try again, such as the model block.", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/statements.html#footnotes", + "href": "reference-manual/statements.html#footnotes", + "title": "Statements", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe current notation replaces two previous versions. Originally, a variable lp__ was directly exposed and manipulated; this is no longer allowed. The original statement syntax for target += u was increment_log_prob(u), but this form was removed in Stan 2.33↩︎\nWriting this model with the expression -0.5 * y * y is more efficient than with the equivalent expression y * y / -2 because multiplication is more efficient than division; in both cases, the negation is rolled into the numeric literal (-0.5 and -2). Writing square(y) instead of y * y would be even more efficient because the derivatives can be precomputed, reducing the memory and number of operations required for automatic differentiation.↩︎\nBecause \\(\\log | \\frac{d}{dy} \\log y | = \\log | 1/y | = - \\log |y|\\).↩︎\nA programming idiom in BUGS code simulates a local variable by replacing theta in the above example with theta[n], effectively creating N different variables, theta[1], …, theta[N]. Of course, this is not a hack if the value of theta[n] is required for all n.↩︎\nThe adjoint component is always zero during execution for the algorithmic differentiation variables used to implement parameters, transformed parameters, and local variables in the model.↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "Statements" + ] + }, + { + "objectID": "reference-manual/removals.html", + "href": "reference-manual/removals.html", + "title": "Removed Features", + "section": "", + "text": "This chapter lists functionalities that were once present in the language but have since been removed, along with how to replace them.\n\n\nRemoved: The variable lp__ is no longer available for direct access or manipulation.\nReplacement: General manipulation of the value of the lp__ variable is not allowed, but\nlp__ <- lp__ + e;\ncan be replaced with\ntarget += e;\nThe value of lp__ is available through the no-argument function target().\n\n\n\nRemoved: The operator <- for assignment, e.g.,\na <- b;\nis no longer available.\nReplacement: The new syntax uses the operator = for assignment, e.g.,\na = b;\nRemoved In: Stan 2.33\n\n\n\nRemoved: The increment_log_prob(u) statement for incrementing the log density accumulator by u is no longer available.\nReplacement: Replace the above statement with\ntarget += u;\nRemoved In: Stan 2.33\n\n\n\nRemoved: The built-in no-argument function get_lp() is no longer available.\nReplacement: Use the no-argument function target() instead.\nRemoved In: Stan 2.33\n\n\n\nRemoved: Formerly, the probability function for the distribution foo would be applied to an outcome variable y and sequence of zero or more parameters ... to produce the expression foo_log(y, ...). This suffix is no longer a special value.\nReplacement: If y can be a real value (including vectors or matrices), replace\nfoo_log(y, ...)\nwith the log probability density function notation\nfoo_lpdf(y | ...).\nIf y must be an integer (including arrays), instead replace\nfoo_log(y, ...\nwith the log probability mass function\nfoo_lpmf(y | ...).\nRemoved In: Stan 2.33\n\n\n\nRemoved: The log cumulative distribution and complementary cumulative distribution functions for a distribution foo were formerly written as foo_cdf_log and foo_ccdf_log.\nReplacement:\nReplace foo_cdf_log(y, ...) with foo_lcdf(y | ...).\nReplace foo_ccdf_log(y, ...) with foo_lccdf(y | ...).\n\n\n\nRemoved: A user-defined function ending in _log can be no longer be used in statements.qmd#distribution-statements.section.\nReplacement: Replace the _log suffix with _lpdf for density functions or _lpmf for mass functions in the user-defined function.\nRemoved In: Stan 2.33\nNote: Following Stan 2.33, users can stil define a function ending in _log, it simply no longer has a special meaning or is supported in the ~ syntax.\n\n\n\nRemoved: The function if_else is no longer available.\nReplacement: Use the conditional operator which allows more flexibility in the types of b and c and is much more efficient in that it only evaluates whichever of b or c is returned.\nx = if_else(a, b, c);\nwith\nx = a ? b : c;\nRemoved In: Stan 2.33\n\n\n\nRemoved: The use of # for line-based comments is no longer permitted. # may only be used for #include statements.\nReplacement: Use a pair of forward slashes, //, for line comments.\nRemoved In: Stan 2.33\n\n\n\nBefore Stan 2.26, arrays were declared by writing syntax after the variable.\nRemoved: The use of array declarations like\nint n[5];\nreal a[3, 4];\nreal<lower=0> z[5, 4, 2];\nvector[7] mu[3];\nmatrix[7, 2] mu[15, 12];\ncholesky_factor_cov[5, 6] mu[2, 3, 4];\nReplacement: The use of the array keyword, which replaces the above examples with\narray[5] int n;\narray[3, 4] real a;\narray[5, 4, 2] real<lower=0> z;\narray[3] vector[7] mu;\narray[15, 12] matrix[7, 2] mu;\narray[2, 3, 4] cholesky_factor_cov[5, 6] mu;\nRemoved In: Stan 2.33\n\n\n\nStan interprets nested indexing in assingments as flat indexing so that a statement like\na[:][1] = b;\nis the same as\na[:,1] = b;\nHowever, this is inconsistent with multiple indexing rules.\nTo avoid confusion nested multiple indexing in assignment became an error in Stan 2.33. Nesting single indexing is still allowed as it cannot lead to ambiguity.\nRemoved In: Stan 2.33\n\n\n\nRemoved: Using a real value in a conditional is no longer permitted.\nreal x = 1.0;\nif (x) {\nThe value was interpreted as true if it is nonzero.\nReplacement: For the exact equivalent, use a comparison operator to make the intent clear.\nreal x = 1.0;\nif (x != 0) {\nHowever, one should keep in mind that floating point calculations are subject to rounding errors and precise equality is fragile. It is worth considering whether the more robust alternative abs(x) < machine_precision() is appropriate for the use case.\nRemoved In: Stan 2.34", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#lp__-variable", + "href": "reference-manual/removals.html#lp__-variable", + "title": "Removed Features", + "section": "", + "text": "Removed: The variable lp__ is no longer available for direct access or manipulation.\nReplacement: General manipulation of the value of the lp__ variable is not allowed, but\nlp__ <- lp__ + e;\ncan be replaced with\ntarget += e;\nThe value of lp__ is available through the no-argument function target().", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#assignment-with--", + "href": "reference-manual/removals.html#assignment-with--", + "title": "Removed Features", + "section": "", + "text": "Removed: The operator <- for assignment, e.g.,\na <- b;\nis no longer available.\nReplacement: The new syntax uses the operator = for assignment, e.g.,\na = b;\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#increment_log_prob-statement", + "href": "reference-manual/removals.html#increment_log_prob-statement", + "title": "Removed Features", + "section": "", + "text": "Removed: The increment_log_prob(u) statement for incrementing the log density accumulator by u is no longer available.\nReplacement: Replace the above statement with\ntarget += u;\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#get_lp-function", + "href": "reference-manual/removals.html#get_lp-function", + "title": "Removed Features", + "section": "", + "text": "Removed: The built-in no-argument function get_lp() is no longer available.\nReplacement: Use the no-argument function target() instead.\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#log-density-and-mass-functions", + "href": "reference-manual/removals.html#log-density-and-mass-functions", + "title": "Removed Features", + "section": "", + "text": "Removed: Formerly, the probability function for the distribution foo would be applied to an outcome variable y and sequence of zero or more parameters ... to produce the expression foo_log(y, ...). This suffix is no longer a special value.\nReplacement: If y can be a real value (including vectors or matrices), replace\nfoo_log(y, ...)\nwith the log probability density function notation\nfoo_lpdf(y | ...).\nIf y must be an integer (including arrays), instead replace\nfoo_log(y, ...\nwith the log probability mass function\nfoo_lpmf(y | ...).\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#cdf_log-and-ccdf_log-cumulative-distribution-functions", + "href": "reference-manual/removals.html#cdf_log-and-ccdf_log-cumulative-distribution-functions", + "title": "Removed Features", + "section": "", + "text": "Removed: The log cumulative distribution and complementary cumulative distribution functions for a distribution foo were formerly written as foo_cdf_log and foo_ccdf_log.\nReplacement:\nReplace foo_cdf_log(y, ...) with foo_lcdf(y | ...).\nReplace foo_ccdf_log(y, ...) with foo_lccdf(y | ...).", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#user-defined-function-with-_log-suffix", + "href": "reference-manual/removals.html#user-defined-function-with-_log-suffix", + "title": "Removed Features", + "section": "", + "text": "Removed: A user-defined function ending in _log can be no longer be used in statements.qmd#distribution-statements.section.\nReplacement: Replace the _log suffix with _lpdf for density functions or _lpmf for mass functions in the user-defined function.\nRemoved In: Stan 2.33\nNote: Following Stan 2.33, users can stil define a function ending in _log, it simply no longer has a special meaning or is supported in the ~ syntax.", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#if_else-function", + "href": "reference-manual/removals.html#if_else-function", + "title": "Removed Features", + "section": "", + "text": "Removed: The function if_else is no longer available.\nReplacement: Use the conditional operator which allows more flexibility in the types of b and c and is much more efficient in that it only evaluates whichever of b or c is returned.\nx = if_else(a, b, c);\nwith\nx = a ? b : c;\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#character-as-comment-prefix", + "href": "reference-manual/removals.html#character-as-comment-prefix", + "title": "Removed Features", + "section": "", + "text": "Removed: The use of # for line-based comments is no longer permitted. # may only be used for #include statements.\nReplacement: Use a pair of forward slashes, //, for line comments.\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#postfix-brackets-array-syntax", + "href": "reference-manual/removals.html#postfix-brackets-array-syntax", + "title": "Removed Features", + "section": "", + "text": "Before Stan 2.26, arrays were declared by writing syntax after the variable.\nRemoved: The use of array declarations like\nint n[5];\nreal a[3, 4];\nreal<lower=0> z[5, 4, 2];\nvector[7] mu[3];\nmatrix[7, 2] mu[15, 12];\ncholesky_factor_cov[5, 6] mu[2, 3, 4];\nReplacement: The use of the array keyword, which replaces the above examples with\narray[5] int n;\narray[3, 4] real a;\narray[5, 4, 2] real<lower=0> z;\narray[3] vector[7] mu;\narray[15, 12] matrix[7, 2] mu;\narray[2, 3, 4] cholesky_factor_cov[5, 6] mu;\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#nested-multiple-indexing-in-assignments", + "href": "reference-manual/removals.html#nested-multiple-indexing-in-assignments", + "title": "Removed Features", + "section": "", + "text": "Stan interprets nested indexing in assingments as flat indexing so that a statement like\na[:][1] = b;\nis the same as\na[:,1] = b;\nHowever, this is inconsistent with multiple indexing rules.\nTo avoid confusion nested multiple indexing in assignment became an error in Stan 2.33. Nesting single indexing is still allowed as it cannot lead to ambiguity.\nRemoved In: Stan 2.33", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/removals.html#real-values-in-conditionals", + "href": "reference-manual/removals.html#real-values-in-conditionals", + "title": "Removed Features", + "section": "", + "text": "Removed: Using a real value in a conditional is no longer permitted.\nreal x = 1.0;\nif (x) {\nThe value was interpreted as true if it is nonzero.\nReplacement: For the exact equivalent, use a comparison operator to make the intent clear.\nreal x = 1.0;\nif (x != 0) {\nHowever, one should keep in mind that floating point calculations are subject to rounding errors and precise equality is fragile. It is worth considering whether the more robust alternative abs(x) < machine_precision() is appropriate for the use case.\nRemoved In: Stan 2.34", + "crumbs": [ + "Reference Manual", + "Language", + "Removed Features" + ] + }, + { + "objectID": "reference-manual/pathfinder.html", + "href": "reference-manual/pathfinder.html", + "title": "Pathfinder", + "section": "", + "text": "Stan supports the Pathfinder algorithm (Zhang et al. 2022). Pathfinder is a variational method for approximately sampling from differentiable log densities. Starting from a random initialization, Pathfinder locates normal approximations to the target density along a quasi-Newton optimization path, with local covariance estimated using the negative inverse Hessian estimates produced by the LBFGS optimizer. Pathfinder returns draws from the Gaussian approximation with the lowest estimated Kullback-Leibler (KL) divergence to the true posterior.\nStan provides two versions of the Pathfinder algorithm: single-path Pathfinder and multi-path Pathfinder. Single-path Pathfinder generates a set of approximate draws from one run of the basic Pathfinder algorithm. Multi-path Pathfinder uses importance resampling over the draws from multiple runs of Pathfinder. This better matches non-normal target densities and also mitigates the problem of L-BFGS getting stuck at local optima or in saddle points on plateaus. Compared to ADVI and short dynamic HMC runs, Pathfinder requires one to two orders of magnitude fewer log density and gradient evaluations, with greater reductions for more challenging posteriors. While the evaluations by Zhang et al. (2022) found that single-path and multi-path Pathfinder outperform ADVI for most of the models in the PosteriorDB (Magnusson et al. 2024) evaluation set, we recognize the need for further experiments on a wider range of models.\n\n\nPathfinder diagnoses the accuracy of the approximation by computing the density ratio of the true posterior and the approximation and using Pareto-\\(\\hat{k}\\) diagnostic (Vehtari et al. 2024) to assess whether these ratios can be used to improve the approximation via resampling. The normalization for the posterior can be estimated reliably (Vehtari et al. 2024, sec. 3), which is the first requirement for reliable resampling. If estimated Pareto-\\(\\hat{k}\\) for the ratios is smaller than 0.7, there is still need to further diagnose reliability of importance sampling estimate for all quantities of interest (Vehtari et al. 2024, sec. 2.2). If estimated Pareto-\\(\\hat{k}\\) is larger than 0.7, then the estimate for the normalization is unreliable and any Monte Carlo estimate may have a big error. The resampled draws can still contain some useful information about the location and shape of the posterior which can be used in early parts of Bayesian workflow (Gelman et al. 2020).\n\n\n\nIf estimated Pareto-\\(\\hat{k}\\) for the ratios is smaller than 0.7, the resampled posterior draws are almost as good for initializing MCMC as would independent draws from the posterior be. If estimated Pareto-\\(\\hat{k}\\) for the ratios is larger than 0.7, the Pathfinder draws are not reliable for posterior inference directly, but they are still very likely better for initializing MCMC than random draws from an arbitrary pre-defined distribution (e.g. uniform from -2 to 2 used by Stan by default). If Pareto-\\(\\hat{k}\\) is larger than 0.7, it is likely that one of the ratios is much bigger than others and the default resampling with replacement would produce copies of one unique draw. For initializing several Markov chains, it is better to use resampling without replacement to guarantee unique initialization for each chain. At the moment Stan allows turning off the resampling completely, and then the resampling without replacement can be done outside of Stan.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Pathfinder" + ] + }, + { + "objectID": "reference-manual/pathfinder.html#diagnosing-pathfinder", + "href": "reference-manual/pathfinder.html#diagnosing-pathfinder", + "title": "Pathfinder", + "section": "", + "text": "Pathfinder diagnoses the accuracy of the approximation by computing the density ratio of the true posterior and the approximation and using Pareto-\\(\\hat{k}\\) diagnostic (Vehtari et al. 2024) to assess whether these ratios can be used to improve the approximation via resampling. The normalization for the posterior can be estimated reliably (Vehtari et al. 2024, sec. 3), which is the first requirement for reliable resampling. If estimated Pareto-\\(\\hat{k}\\) for the ratios is smaller than 0.7, there is still need to further diagnose reliability of importance sampling estimate for all quantities of interest (Vehtari et al. 2024, sec. 2.2). If estimated Pareto-\\(\\hat{k}\\) is larger than 0.7, then the estimate for the normalization is unreliable and any Monte Carlo estimate may have a big error. The resampled draws can still contain some useful information about the location and shape of the posterior which can be used in early parts of Bayesian workflow (Gelman et al. 2020).", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Pathfinder" + ] + }, + { + "objectID": "reference-manual/pathfinder.html#using-pathfinder-for-initializing-mcmc", + "href": "reference-manual/pathfinder.html#using-pathfinder-for-initializing-mcmc", + "title": "Pathfinder", + "section": "", + "text": "If estimated Pareto-\\(\\hat{k}\\) for the ratios is smaller than 0.7, the resampled posterior draws are almost as good for initializing MCMC as would independent draws from the posterior be. If estimated Pareto-\\(\\hat{k}\\) for the ratios is larger than 0.7, the Pathfinder draws are not reliable for posterior inference directly, but they are still very likely better for initializing MCMC than random draws from an arbitrary pre-defined distribution (e.g. uniform from -2 to 2 used by Stan by default). If Pareto-\\(\\hat{k}\\) is larger than 0.7, it is likely that one of the ratios is much bigger than others and the default resampling with replacement would produce copies of one unique draw. For initializing several Markov chains, it is better to use resampling without replacement to guarantee unique initialization for each chain. At the moment Stan allows turning off the resampling completely, and then the resampling without replacement can be done outside of Stan.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Pathfinder" + ] + }, + { + "objectID": "reference-manual/mcmc.html", + "href": "reference-manual/mcmc.html", + "title": "MCMC Sampling", + "section": "", + "text": "This chapter presents the two Markov chain Monte Carlo (MCMC) algorithms used in Stan, the Hamiltonian Monte Carlo (HMC) algorithm and its adaptive variant the no-U-turn sampler (NUTS), along with details of their implementation and configuration.\n\n\nHamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) method that uses the derivatives of the density function being sampled to generate efficient transitions spanning the posterior (see, e.g., Betancourt and Girolami (2013), Neal (2011) for more details). It uses an approximate Hamiltonian dynamics simulation based on numerical integration which is then corrected by performing a Metropolis acceptance step.\nThis section translates the presentation of HMC by Betancourt and Girolami (2013) into the notation of Gelman et al. (2013).\n\n\nThe goal of sampling is to draw from a density \\(p(\\theta)\\) for parameters \\(\\theta\\). This is typically a Bayesian posterior \\(p(\\theta|y)\\) given data \\(y\\), and in particular, a Bayesian posterior coded as a Stan program.\n\n\n\nHMC introduces auxiliary momentum variables \\(\\rho\\) and draws from a joint density\n\\[\np(\\rho, \\theta) = p(\\rho | \\theta) p(\\theta).\n\\]\nIn most applications of HMC, including Stan, the auxiliary density is a multivariate normal that does not depend on the parameters \\(\\theta\\),\n\\[\n\\rho \\sim \\mathsf{MultiNormal}(0, M).\n\\]\n\\(M\\) is the Euclidean metric. It can be seen as a transform of parameter space that makes sampling more efficient; see Betancourt (2017) for details.\nBy default Stan sets \\(M^{-1}\\) equal to a diagonal estimate of the covariance computed during warmup.\n\n\n\nThe joint density \\(p(\\rho, \\theta)\\) defines a Hamiltonian\n\\[\n\\begin{array}{rcl}\nH(\\rho, \\theta) & = & - \\log p(\\rho, \\theta)\n\\\\[3pt]\n& = & - \\log p(\\rho | \\theta) - \\log p(\\theta).\n\\\\[3pt]\n& = & T(\\rho | \\theta) + V(\\theta),\n\\end{array}\n\\]\nwhere the term\n\\[\nT(\\rho | \\theta) = - \\log p(\\rho | \\theta)\n\\]\nis called the “kinetic energy” and the term\n\\[\nV(\\theta) = - \\log p(\\theta)\n\\]\nis called the “potential energy.” The potential energy is specified by the Stan program through its definition of a log density.\n\n\n\nStarting from the current value of the parameters \\(\\theta\\), a transition to a new state is generated in two stages before being subjected to a Metropolis accept step.\nFirst, a value for the momentum is drawn independently of the current parameter values,\n\\[\n\\rho \\sim \\mathsf{MultiNormal}(0, M).\n\\]\nThus momentum does not persist across iterations.\nNext, the joint system \\((\\theta,\\rho)\\) made up of the current parameter values \\(\\theta\\) and new momentum \\(\\rho\\) is evolved via Hamilton’s equations,\n\\[\n\\begin{array}{rcccl}\n\\displaystyle\n\\frac{d\\theta}{dt}\n& = &\n\\displaystyle\n+ \\frac{\\partial H}{\\partial \\rho}\n& = &\n\\displaystyle\n+ \\frac{\\partial T}{\\partial \\rho}\n\\\\[12pt]\n\\displaystyle\n\\frac{d\\rho}{dt}\n& = &\n\\displaystyle\n- \\frac{\\partial H}{\\partial \\theta }\n& = &\n\\displaystyle\n- \\frac{\\partial T}{\\partial \\theta}\n- \\frac{\\partial V}{\\partial \\theta}.\n\\end{array}\n\\]\nWith the momentum density being independent of the target density, i.e., \\(p(\\rho | \\theta) = p(\\rho)\\), the first term in the momentum time derivative, \\({\\partial T} / {\\partial \\theta}\\) is zero, yielding the pair time derivatives\n\\[\n\\begin{array}{rcl}\n\\frac{d \\theta}{d t} & = & +\\frac{\\partial T}{\\partial \\rho}\n\\\\[2pt]\n\\frac{d \\rho}{d t} & = & -\\frac{\\partial V}{\\partial \\theta}.\n\\end{array}\n\\]\n\n\n\nThe last section leaves a two-state differential equation to solve. Stan, like most other HMC implementations, uses the leapfrog integrator, which is a numerical integration algorithm that’s specifically adapted to provide stable results for Hamiltonian systems of equations.\nLike most numerical integrators, the leapfrog algorithm takes discrete steps of some small time interval \\(\\epsilon\\). The leapfrog algorithm begins by drawing a fresh momentum term independently of the parameter values \\(\\theta\\) or previous momentum value.\n\\[\n\\rho \\sim \\mathsf{MultiNormal}(0, M).\n\\] It then alternates half-step updates of the momentum and full-step updates of the position.\n\\[\n\\begin{array}{rcl}\n\\rho & \\leftarrow\n & \\rho \\, - \\, \\frac{\\epsilon}{2} \\frac{\\partial V}{\\partial \\theta}\n\\\\[6pt]\n\\theta & \\leftarrow\n & \\theta \\, + \\, \\epsilon \\, M^{-1} \\, \\rho\n\\\\[6pt]\n\\rho & \\leftarrow\n & \\rho \\, - \\, \\frac{\\epsilon}{2} \\frac{\\partial V}{\\partial \\theta}.\n\\end{array}\n\\]\nBy applying \\(L\\) leapfrog steps, a total of \\(L \\, \\epsilon\\) time is simulated. The resulting state at the end of the simulation (\\(L\\) repetitions of the above three steps) will be denoted \\((\\rho^{*}, \\theta^{*})\\).\nThe leapfrog integrator’s error is on the order of \\(\\epsilon^3\\) per step and \\(\\epsilon^2\\) globally, where \\(\\epsilon\\) is the time interval (also known as the step size); Leimkuhler and Reich (2004) provide a detailed analysis of numerical integration for Hamiltonian systems, including a derivation of the error bound for the leapfrog integrator.\n\n\n\nIf the leapfrog integrator were perfect numerically, there would no need to do any more randomization per transition than generating a random momentum vector. Instead, what is done in practice to account for numerical errors during integration is to apply a Metropolis acceptance step, where the probability of keeping the proposal \\((\\rho^{*}, \\theta^{*})\\) generated by transitioning from \\((\\rho, \\theta)\\) is\n\\[\n\\min \\!\n\\left(\n1,\n\\ \\exp \\! \\left( H(\\rho, \\theta) - H(\\rho^{*}, \\theta^{*}) \\right)\n\\right).\n\\]\nIf the proposal is not accepted, the previous parameter value is returned for the next draw and used to initialize the next iteration.\n\n\n\nThe Hamiltonian Monte Carlo algorithm starts at a specified initial set of parameters \\(\\theta\\); in Stan, this value is either user-specified or generated randomly. Then, for a given number of iterations, a new momentum vector is sampled and the current value of the parameter \\(\\theta\\) is updated using the leapfrog integrator with discretization time \\(\\epsilon\\) and number of steps \\(L\\) according to the Hamiltonian dynamics. Then a Metropolis acceptance step is applied, and a decision is made whether to update to the new state \\((\\theta^{*}, \\rho^{*})\\) or keep the existing state.\n\n\n\n\nThe Hamiltonian Monte Carlo algorithm has three parameters which must be set,\n\ndiscretization time \\(\\epsilon\\),\nmetric \\(M\\), and\nnumber of steps taken \\(L\\).\n\nIn practice, sampling efficiency, both in terms of iteration speed and iterations per effective sample, is highly sensitive to these three tuning parameters Neal (2011), Hoffman and Gelman (2014).\nIf \\(\\epsilon\\) is too large, the leapfrog integrator will be inaccurate and too many proposals will be rejected. If \\(\\epsilon\\) is too small, too many small steps will be taken by the leapfrog integrator leading to long simulation times per interval. Thus the goal is to balance the acceptance rate between these extremes.\nIf \\(L\\) is too small, the trajectory traced out in each iteration will be too short and sampling will devolve to a random walk. If \\(L\\) is too large, the algorithm will do too much work on each iteration.\nIf the inverse metric \\(M^{-1}\\) is a poor estimate of the posterior covariance, the step size \\(\\epsilon\\) must be kept small to maintain arithmetic precision. This would lead to a large \\(L\\) to compensate.\n\n\nThe actual integration time is \\(L \\, \\epsilon\\), a function of number of steps. Some interfaces to Stan set an approximate integration time \\(t\\) and the discretization interval (step size) \\(\\epsilon\\). In these cases, the number of steps will be rounded down as\n\\[\nL = \\left\\lfloor \\frac{t}{\\epsilon} \\right\\rfloor.\n\\]\nand the actual integration time will still be \\(L \\, \\epsilon\\).\n\n\n\nStan is able to automatically optimize \\(\\epsilon\\) to match an acceptance-rate target, able to estimate \\(M\\) based on warmup sample iterations, and able to dynamically adapt \\(L\\) on the fly during sampling (and during warmup) using the no-U-turn sampling (NUTS) algorithm Hoffman and Gelman (2014).\nWarmup Epochs Figure. Adaptation during warmup occurs in three stages: an initial fast adaptation interval (I), a series of expanding slow adaptation intervals (II), and a final fast adaptation interval (III). For HMC, both the fast and slow intervals are used for adapting the step size, while the slow intervals are used for learning the (co)variance necessitated by the metric. Iteration numbering starts at 1 on the left side of the figure and increases to the right.\n\nWhen adaptation is engaged (it may be turned off by fixing a step size and metric), the warmup period is split into three stages, as illustrated in the warmup adaptation figure, with two fast intervals surrounding a series of growing slow intervals. Here fast and slow refer to parameters that adapt using local and global information, respectively; the Hamiltonian Monte Carlo samplers, for example, define the step size as a fast parameter and the (co)variance as a slow parameter. The size of the the initial and final fast intervals and the initial size of the slow interval are all customizable, although user-specified values may be modified slightly in order to ensure alignment with the warmup period.\nThe motivation behind this partitioning of the warmup period is to allow for more robust adaptation. The stages are as follows.\n\nIn the initial fast interval the chain is allowed to converge towards the typical set,1 with only parameters that can learn from local information adapted.\nAfter this initial stage parameters that require global information, for example (co)variances, are estimated in a series of expanding, memoryless windows; often fast parameters will be adapted here as well.\nLastly, the fast parameters are allowed to adapt to the final update of the slow parameters.\n\nThese intervals may be controlled through the following configuration parameters, all of which must be positive integers:\nAdaptation Parameters Table. The parameters controlling adaptation and their default values.\n\n\n\n\n\n\n\n\nparameter\ndescription\ndefault\n\n\n\n\ninitial buffer\nwidth of initial fast adaptation interval\n75\n\n\nterm buffer\nwidth of final fast adaptation interval\n50\n\n\nwindow\ninitial width of slow adaptation interval\n25\n\n\n\n\n\n\nStan’s HMC algorithms utilize dual averaging Nesterov (2009) to optimize the step size.2\nThis warmup optimization procedure is extremely flexible and for completeness, Stan exposes each tuning option for dual averaging, using the notation of Hoffman and Gelman (2014). In practice, the efficacy of the optimization is sensitive to the value of these parameters, but we do not recommend changing the defaults without experience with the dual-averaging algorithm. For more information, see the discussion of dual averaging in Hoffman and Gelman (2014).\nThe full set of dual-averaging parameters are:\nStep Size Adaptation Parameters Table The parameters controlling step size adaptation, with constraints and default values.\n\n\n\nparameter\ndescription\nconstraint\ndefault\n\n\n\n\ndelta\ntarget Metropolis acceptance rate\n[0, 1]\n0.8\n\n\ngamma\nadaptation regularization scale\n(0, infty)\n0.05\n\n\nkappa\nadaptation relaxation exponent\n(0, infty)\n0.75\n\n\nt_0\nadaptation iteration offset\n(0, infty)\n10\n\n\n\nBy setting the target acceptance parameter \\(\\delta\\) to a value closer to 1 (its value must be strictly less than 1 and its default value is 0.8), adaptation will be forced to use smaller step sizes. This can improve sampling efficiency (effective sample size per iteration) at the cost of increased iteration times. Raising the value of \\(\\delta\\) will also allow some models that would otherwise get stuck to overcome their blockages.\n\n\n\nAll implementations of HMC use numerical integrators requiring a step size (equivalently, discretization time interval). Stan allows the step size to be adapted or set explicitly. Stan also allows the step size to be “jittered” randomly during sampling to avoid any poor interactions with a fixed step size and regions of high curvature. The jitter is a proportion that may be added or subtracted, so the maximum amount of jitter is 1, which will cause step sizes to be selected in the range of 0 to twice the adapted step size. The default value is 0, producing no jitter.\nSmall step sizes can get HMC samplers unstuck that would otherwise get stuck with higher step sizes. The downside is that jittering below the adapted value will increase the number of leapfrog steps required and thus slow down iterations, whereas jittering above the adapted value can cause premature rejection due to simulation error in the Hamiltonian dynamics calculation. See Neal (2011) for further discussion of step-size jittering.\n\n\n\nAll HMC implementations in Stan utilize quadratic kinetic energy functions which are specified up to the choice of a symmetric, positive-definite matrix known as a mass matrix or, more formally, a metric Betancourt (2017).\nIf the metric is constant then the resulting implementation is known as Euclidean HMC. Stan allows a choice among three Euclidean HMC implementations,\n\na unit metric (diagonal matrix of ones),\na diagonal metric (diagonal matrix with positive diagonal entries), and\na dense metric (a dense, symmetric positive definite matrix)\n\nto be configured by the user.\nIf the metric is specified to be diagonal, then regularized variances are estimated based on the iterations in each slow-stage block (labeled II in the warmup adaptation stages figure). Each of these estimates is based only on the iterations in that block. This allows early estimates to be used to help guide warmup and then be forgotten later so that they do not influence the final covariance estimate.\nIf the metric is specified to be dense, then regularized covariance estimates will be carried out, regularizing the estimate to a diagonal matrix, which is itself regularized toward a unit matrix.\nVariances or covariances are estimated using Welford accumulators to avoid a loss of precision over many floating point operations.\n\n\nThe metric can compensate for linear (i.e. global) correlations in the posterior which can dramatically improve the performance of HMC in some problems. This requires knowing the global correlations.\nIn complex models, the global correlations are usually difficult, if not impossible, to derive analytically; for example, nonlinear model components convolve the scales of the data, so standardizing the data does not always help. Therefore, Stan estimates these correlations online with an adaptive warmup. In models with strong nonlinear (i.e. local) correlations this learning can be slow, even with regularization. This is ultimately why warmup in Stan often needs to be so long, and why a sufficiently long warmup can yield such substantial performance improvements.\n\n\n\nThe metric compensates for only linear (equivalently global or position-independent) correlations in the posterior. The hierarchical parameterizations, on the other hand, affect some of the nasty nonlinear (equivalently local or position-dependent) correlations common in hierarchical models.3\nOne of the biggest difficulties with dense metrics is the estimation of the metric itself which introduces a bit of a chicken-and-egg scenario; in order to estimate an appropriate metric for sampling, convergence is required, and in order to converge, an appropriate metric is required.\n\n\n\nStatistical models for which sampling is problematic are not typically dominated by linear correlations for which a dense metric can adjust. Rather, they are governed by more complex nonlinear correlations that are best tackled with better parameterizations or more advanced algorithms, such as Riemannian HMC.\n\n\n\nMCMC convergence time is roughly equivalent to the autocorrelation time. Because HMC (and NUTS) chains tend to be lowly autocorrelated they also tend to converge quite rapidly.\nThis only applies when there is uniformity of curvature across the posterior, an assumption which is violated in many complex models. Quite often, the tails have large curvature while the bulk of the posterior mass is relatively well-behaved; in other words, warmup is slow not because the actual convergence time is slow but rather because the cost of an HMC iteration is more expensive out in the tails.\nPoor behavior in the tails is the kind of pathology that can be uncovered by running only a few warmup iterations. By looking at the acceptance probabilities and step sizes of the first few iterations provides an idea of how bad the problem is and whether it must be addressed with modeling efforts such as tighter priors or reparameterizations.\n\n\n\n\nThe no-U-turn sampler (NUTS) automatically selects an appropriate number of leapfrog steps in each iteration in order to allow the proposals to traverse the posterior without doing unnecessary work. The motivation is to maximize the expected squared jump distance (see, e.g., Roberts, Gelman, and Gilks (1997)) at each step and avoid the random-walk behavior that arises in random-walk Metropolis or Gibbs samplers when there is correlation in the posterior. For a precise definition of the NUTS algorithm and a proof of detailed balance, see Hoffman and Gelman (2014).\nNUTS generates a proposal by starting at an initial position determined by the parameters drawn in the last iteration. It then generates an independent standard normal random momentum vector. It then evolves the initial system both forwards and backwards in time to form a balanced binary tree. At each iteration of the NUTS algorithm the tree depth is increased by one, doubling the number of leapfrog steps and effectively doubles the computation time. The algorithm terminates in one of two ways, either\n\nthe NUTS criterion (i.e., a U-turn in Euclidean space on a subtree) is satisfied for a new subtree or the completed tree, or\nthe depth of the completed tree hits the maximum depth allowed.\n\nRather than using a standard Metropolis step, the final parameter value is selected via multinomial sampling with a bias toward the second half of the steps in the trajectory Betancourt (2016b).4\nConfiguring the no-U-turn sample involves putting a cap on the depth of the trees that it evaluates during each iteration. This is controlled through a maximum depth parameter. The number of leapfrog steps taken is then bounded by 2 to the power of the maximum depth minus 1.\nBoth the tree depth and the actual number of leapfrog steps computed are reported along with the parameters in the output as treedepth__ and n_leapfrog__, respectively. Because the final subtree may only be partially constructed, these two will always satisfy\n\\[\n2^{\\mathrm{treedepth} - 1} - 1\n\\ < \\\nN_{\\mathrm{leapfrog}}\n\\ \\le \\\n2^{\\mathrm{treedepth} } - 1.\n\\]\nTree depth is an important diagnostic tool for NUTS. For example, a tree depth of zero occurs when the first leapfrog step is immediately rejected and the initial state returned, indicating extreme curvature and poorly-chosen step size (at least relative to the current position). On the other hand, a tree depth equal to the maximum depth indicates that NUTS is taking many leapfrog steps and being terminated prematurely to avoid excessively long execution time. Taking very many steps may be a sign of poor adaptation, may be due to targeting a very high acceptance rate, or may simply indicate a difficult posterior from which to sample. In the latter case, reparameterization may help with efficiency. But in the rare cases where the model is correctly specified and a large number of steps is necessary, the maximum depth should be increased to ensure that that the NUTS tree can grow as large as necessary.\n\n\n\n\nIn some situations, such as pure forward data simulation in a directed graphical model (e.g., where you can work down generatively from known hyperpriors to simulate parameters and data), there is no need to declare any parameters in Stan, the model block will be empty (and thus can be omitted), and all output quantities will be produced in the generated quantities block.\nFor example, to generate a sequence of \\(N\\) draws from a binomial with trials \\(K\\) and chance of success \\(\\theta\\), the following program suffices.\ndata {\n real<lower=0, upper=1> theta;\n int<lower=0> K;\n int<lower=0> N;\n}\ngenerated quantities {\n array[N] int<lower=0, upper=K> y;\n for (n in 1:N) {\n y[n] = binomial_rng(K, theta);\n }\n}\nFor this model, the sampler must be configured to use the fixed-parameters setting because there are no parameters. Without parameter sampling there is no need for adaptation and the number of warmup iterations should be set to zero.\nMost models that are written to be sampled without parameters will not declare any parameters, instead putting anything parameter-like in the data block. Nevertheless, it is possible to include parameters for fixed-parameters sampling and initialize them in any of the usual ways (randomly, fixed to zero on the unconstrained scale, or with user-specified values). For example, theta in the example above could be declared as a parameter and initialized as a parameter.\n\n\n\nStan’s interfaces provide a number of configuration options that are shared among the MCMC algorithms (this chapter), the optimization algorithms chapter, and the diagnostics chapter.\n\n\nThe random-number generator’s behavior is fully determined by the unsigned seed (positive integer) it is started with. If a seed is not specified, or a seed of 0 or less is specified, the system time is used to generate a seed. The seed is recorded and included with Stan’s output regardless of whether it was specified or generated randomly from the system time.\nStan also allows a chain identifier to be specified, which is useful when running multiple Markov chains for sampling. The chain identifier is used to advance the random number generator a very large number of random variates so that two chains with different identifiers draw from non-overlapping subsequences of the random-number sequence determined by the seed. When running multiple chains from a single command, Stan’s interfaces will manage the chain identifiers.\n\n\nTogether, the seed and chain identifier determine the behavior of the underlying random number generator. For complete reproducibility, every aspect of the environment needs to be locked down from the OS and version to the C++ compiler and version to the version of Stan and all dependent libraries.\n\n\n\n\nThe initial parameter values for Stan’s algorithms (MCMC, optimization, or diagnostic) may be either specified by the user or generated randomly. If user-specified values are provided, all parameters must be given initial values or Stan will abort with an error message.\n\n\nIf the user specifies initial values, they must satisfy the constraints declared in the model (i.e., they are on the constrained scale).\n\n\n\nIt is also possible to provide an initialization of 0, which causes all variables to be initialized with zero values on the unconstrained scale. The transforms are arranged in such a way that zero initialization provides reasonable variable initializations for most parameters, such as 0 for unconstrained parameters, 1 for parameters constrained to be positive, 0.5 for variables to constrained to lie between 0 and 1, a symmetric (uniform) vector for simplexes, unit matrices for both correlation and covariance matrices, and so on.\n\n\n\nRandom initialization by default initializes the parameter values with values drawn at random from a \\(\\mathsf{Uniform}(-2, 2)\\) distribution. Alternatively, a value other than 2 may be specified for the absolute bounds. These values are on the unconstrained scale, so must be inverse transformed back to satisfy the constraints declared for parameters.\nBecause zero is chosen to be a reasonable default initial value for most parameters, the interval around zero provides a fairly diffuse starting point. For instance, unconstrained variables are initialized randomly in \\((-2, 2)\\), variables constrained to be positive are initialized roughly in \\((0.14, 7.4)\\), variables constrained to fall between 0 and 1 are initialized with values roughly in \\((0.12, 0.88)\\).\n\n\n\n\n\nThe Hamiltonian Monte Carlo algorithms (HMC and NUTS) simulate the trajectory of a fictitious particle representing parameter values when subject to a potential energy field, the value of which at a point is the negative log posterior density (up to a constant that does not depend on location). Random momentum is imparted independently in each direction, by drawing from a standard normal distribution. The Hamiltonian is defined to be the sum of the potential energy and kinetic energy of the system. The key feature of the Hamiltonian is that it is conserved along the trajectory the particle moves.\nIn Stan, we use the leapfrog algorithm to simulate the path of a particle along the trajectory defined by the initial random momentum and the potential energy field. This is done by alternating updates of the position based on the momentum and the momentum based on the position. The momentum updates involve the potential energy and are applied along the gradient. This is essentially a stepwise (discretized) first-order approximation of the trajectory. Leimkuhler and Reich (2004) provide details and error analysis for the leapfrog algorithm.\nA divergence arises when the simulated Hamiltonian trajectory departs from the true trajectory as measured by departure of the Hamiltonian value from its initial value. When this divergence is too high,5 the simulation has gone off the rails and cannot be trusted. The positions along the simulated trajectory after the Hamiltonian diverges will never be selected as the next draw of the MCMC algorithm, potentially reducing Hamiltonian Monte Carlo to a simple random walk and biasing estimates by not being able to thoroughly explore the posterior distribution. Betancourt (2016a) provides details of the theory, computation, and practical implications of divergent transitions in Hamiltonian Monte Carlo.\nThe Stan interfaces report divergences as warnings and provide ways to access which iterations encountered divergences. ShinyStan provides visualizations that highlight the starting point of divergent transitions to diagnose where the divergences arise in parameter space. A common location is in the neck of the funnel in a centered parameterization, an example of which is provided in the user’s guide.\nIf the posterior is highly curved, very small step sizes are required for this gradient-based simulation of the Hamiltonian to be accurate. When the step size is too large (relative to the curvature), the simulation diverges from the true Hamiltonian. This definition is imprecise in the same way that stiffness for a differential equation is imprecise; both are defined by the way they cause traditional stepwise algorithms to diverge from where they should be.\nThe primary cause of divergent transitions in Euclidean HMC (other than bugs in the code) is highly varying posterior curvature, for which small step sizes are too inefficient in some regions and diverge in other regions. If the step size is too small, the sampler becomes inefficient and halts before making a U-turn (hits the maximum tree depth in NUTS); if the step size is too large, the Hamiltonian simulation diverges.\n\n\nIn some cases, simply lowering the initial step size and increasing the target acceptance rate will keep the step size small enough that sampling can proceed. In other cases, a reparameterization is required so that the posterior curvature is more manageable; see the funnel example in the user’s guide for an example.\nBefore reparameterization, it may be helpful to plot the posterior draws, highlighting the divergent transitions to see where they arise. This is marked as a divergent transition in the interfaces; for example, ShinyStan and RStan have special plotting facilities to highlight where divergent transitions arise.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/mcmc.html#hamiltonian-monte-carlo", + "href": "reference-manual/mcmc.html#hamiltonian-monte-carlo", + "title": "MCMC Sampling", + "section": "", + "text": "Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) method that uses the derivatives of the density function being sampled to generate efficient transitions spanning the posterior (see, e.g., Betancourt and Girolami (2013), Neal (2011) for more details). It uses an approximate Hamiltonian dynamics simulation based on numerical integration which is then corrected by performing a Metropolis acceptance step.\nThis section translates the presentation of HMC by Betancourt and Girolami (2013) into the notation of Gelman et al. (2013).\n\n\nThe goal of sampling is to draw from a density \\(p(\\theta)\\) for parameters \\(\\theta\\). This is typically a Bayesian posterior \\(p(\\theta|y)\\) given data \\(y\\), and in particular, a Bayesian posterior coded as a Stan program.\n\n\n\nHMC introduces auxiliary momentum variables \\(\\rho\\) and draws from a joint density\n\\[\np(\\rho, \\theta) = p(\\rho | \\theta) p(\\theta).\n\\]\nIn most applications of HMC, including Stan, the auxiliary density is a multivariate normal that does not depend on the parameters \\(\\theta\\),\n\\[\n\\rho \\sim \\mathsf{MultiNormal}(0, M).\n\\]\n\\(M\\) is the Euclidean metric. It can be seen as a transform of parameter space that makes sampling more efficient; see Betancourt (2017) for details.\nBy default Stan sets \\(M^{-1}\\) equal to a diagonal estimate of the covariance computed during warmup.\n\n\n\nThe joint density \\(p(\\rho, \\theta)\\) defines a Hamiltonian\n\\[\n\\begin{array}{rcl}\nH(\\rho, \\theta) & = & - \\log p(\\rho, \\theta)\n\\\\[3pt]\n& = & - \\log p(\\rho | \\theta) - \\log p(\\theta).\n\\\\[3pt]\n& = & T(\\rho | \\theta) + V(\\theta),\n\\end{array}\n\\]\nwhere the term\n\\[\nT(\\rho | \\theta) = - \\log p(\\rho | \\theta)\n\\]\nis called the “kinetic energy” and the term\n\\[\nV(\\theta) = - \\log p(\\theta)\n\\]\nis called the “potential energy.” The potential energy is specified by the Stan program through its definition of a log density.\n\n\n\nStarting from the current value of the parameters \\(\\theta\\), a transition to a new state is generated in two stages before being subjected to a Metropolis accept step.\nFirst, a value for the momentum is drawn independently of the current parameter values,\n\\[\n\\rho \\sim \\mathsf{MultiNormal}(0, M).\n\\]\nThus momentum does not persist across iterations.\nNext, the joint system \\((\\theta,\\rho)\\) made up of the current parameter values \\(\\theta\\) and new momentum \\(\\rho\\) is evolved via Hamilton’s equations,\n\\[\n\\begin{array}{rcccl}\n\\displaystyle\n\\frac{d\\theta}{dt}\n& = &\n\\displaystyle\n+ \\frac{\\partial H}{\\partial \\rho}\n& = &\n\\displaystyle\n+ \\frac{\\partial T}{\\partial \\rho}\n\\\\[12pt]\n\\displaystyle\n\\frac{d\\rho}{dt}\n& = &\n\\displaystyle\n- \\frac{\\partial H}{\\partial \\theta }\n& = &\n\\displaystyle\n- \\frac{\\partial T}{\\partial \\theta}\n- \\frac{\\partial V}{\\partial \\theta}.\n\\end{array}\n\\]\nWith the momentum density being independent of the target density, i.e., \\(p(\\rho | \\theta) = p(\\rho)\\), the first term in the momentum time derivative, \\({\\partial T} / {\\partial \\theta}\\) is zero, yielding the pair time derivatives\n\\[\n\\begin{array}{rcl}\n\\frac{d \\theta}{d t} & = & +\\frac{\\partial T}{\\partial \\rho}\n\\\\[2pt]\n\\frac{d \\rho}{d t} & = & -\\frac{\\partial V}{\\partial \\theta}.\n\\end{array}\n\\]\n\n\n\nThe last section leaves a two-state differential equation to solve. Stan, like most other HMC implementations, uses the leapfrog integrator, which is a numerical integration algorithm that’s specifically adapted to provide stable results for Hamiltonian systems of equations.\nLike most numerical integrators, the leapfrog algorithm takes discrete steps of some small time interval \\(\\epsilon\\). The leapfrog algorithm begins by drawing a fresh momentum term independently of the parameter values \\(\\theta\\) or previous momentum value.\n\\[\n\\rho \\sim \\mathsf{MultiNormal}(0, M).\n\\] It then alternates half-step updates of the momentum and full-step updates of the position.\n\\[\n\\begin{array}{rcl}\n\\rho & \\leftarrow\n & \\rho \\, - \\, \\frac{\\epsilon}{2} \\frac{\\partial V}{\\partial \\theta}\n\\\\[6pt]\n\\theta & \\leftarrow\n & \\theta \\, + \\, \\epsilon \\, M^{-1} \\, \\rho\n\\\\[6pt]\n\\rho & \\leftarrow\n & \\rho \\, - \\, \\frac{\\epsilon}{2} \\frac{\\partial V}{\\partial \\theta}.\n\\end{array}\n\\]\nBy applying \\(L\\) leapfrog steps, a total of \\(L \\, \\epsilon\\) time is simulated. The resulting state at the end of the simulation (\\(L\\) repetitions of the above three steps) will be denoted \\((\\rho^{*}, \\theta^{*})\\).\nThe leapfrog integrator’s error is on the order of \\(\\epsilon^3\\) per step and \\(\\epsilon^2\\) globally, where \\(\\epsilon\\) is the time interval (also known as the step size); Leimkuhler and Reich (2004) provide a detailed analysis of numerical integration for Hamiltonian systems, including a derivation of the error bound for the leapfrog integrator.\n\n\n\nIf the leapfrog integrator were perfect numerically, there would no need to do any more randomization per transition than generating a random momentum vector. Instead, what is done in practice to account for numerical errors during integration is to apply a Metropolis acceptance step, where the probability of keeping the proposal \\((\\rho^{*}, \\theta^{*})\\) generated by transitioning from \\((\\rho, \\theta)\\) is\n\\[\n\\min \\!\n\\left(\n1,\n\\ \\exp \\! \\left( H(\\rho, \\theta) - H(\\rho^{*}, \\theta^{*}) \\right)\n\\right).\n\\]\nIf the proposal is not accepted, the previous parameter value is returned for the next draw and used to initialize the next iteration.\n\n\n\nThe Hamiltonian Monte Carlo algorithm starts at a specified initial set of parameters \\(\\theta\\); in Stan, this value is either user-specified or generated randomly. Then, for a given number of iterations, a new momentum vector is sampled and the current value of the parameter \\(\\theta\\) is updated using the leapfrog integrator with discretization time \\(\\epsilon\\) and number of steps \\(L\\) according to the Hamiltonian dynamics. Then a Metropolis acceptance step is applied, and a decision is made whether to update to the new state \\((\\theta^{*}, \\rho^{*})\\) or keep the existing state.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/mcmc.html#hmc-algorithm-parameters", + "href": "reference-manual/mcmc.html#hmc-algorithm-parameters", + "title": "MCMC Sampling", + "section": "", + "text": "The Hamiltonian Monte Carlo algorithm has three parameters which must be set,\n\ndiscretization time \\(\\epsilon\\),\nmetric \\(M\\), and\nnumber of steps taken \\(L\\).\n\nIn practice, sampling efficiency, both in terms of iteration speed and iterations per effective sample, is highly sensitive to these three tuning parameters Neal (2011), Hoffman and Gelman (2014).\nIf \\(\\epsilon\\) is too large, the leapfrog integrator will be inaccurate and too many proposals will be rejected. If \\(\\epsilon\\) is too small, too many small steps will be taken by the leapfrog integrator leading to long simulation times per interval. Thus the goal is to balance the acceptance rate between these extremes.\nIf \\(L\\) is too small, the trajectory traced out in each iteration will be too short and sampling will devolve to a random walk. If \\(L\\) is too large, the algorithm will do too much work on each iteration.\nIf the inverse metric \\(M^{-1}\\) is a poor estimate of the posterior covariance, the step size \\(\\epsilon\\) must be kept small to maintain arithmetic precision. This would lead to a large \\(L\\) to compensate.\n\n\nThe actual integration time is \\(L \\, \\epsilon\\), a function of number of steps. Some interfaces to Stan set an approximate integration time \\(t\\) and the discretization interval (step size) \\(\\epsilon\\). In these cases, the number of steps will be rounded down as\n\\[\nL = \\left\\lfloor \\frac{t}{\\epsilon} \\right\\rfloor.\n\\]\nand the actual integration time will still be \\(L \\, \\epsilon\\).\n\n\n\nStan is able to automatically optimize \\(\\epsilon\\) to match an acceptance-rate target, able to estimate \\(M\\) based on warmup sample iterations, and able to dynamically adapt \\(L\\) on the fly during sampling (and during warmup) using the no-U-turn sampling (NUTS) algorithm Hoffman and Gelman (2014).\nWarmup Epochs Figure. Adaptation during warmup occurs in three stages: an initial fast adaptation interval (I), a series of expanding slow adaptation intervals (II), and a final fast adaptation interval (III). For HMC, both the fast and slow intervals are used for adapting the step size, while the slow intervals are used for learning the (co)variance necessitated by the metric. Iteration numbering starts at 1 on the left side of the figure and increases to the right.\n\nWhen adaptation is engaged (it may be turned off by fixing a step size and metric), the warmup period is split into three stages, as illustrated in the warmup adaptation figure, with two fast intervals surrounding a series of growing slow intervals. Here fast and slow refer to parameters that adapt using local and global information, respectively; the Hamiltonian Monte Carlo samplers, for example, define the step size as a fast parameter and the (co)variance as a slow parameter. The size of the the initial and final fast intervals and the initial size of the slow interval are all customizable, although user-specified values may be modified slightly in order to ensure alignment with the warmup period.\nThe motivation behind this partitioning of the warmup period is to allow for more robust adaptation. The stages are as follows.\n\nIn the initial fast interval the chain is allowed to converge towards the typical set,1 with only parameters that can learn from local information adapted.\nAfter this initial stage parameters that require global information, for example (co)variances, are estimated in a series of expanding, memoryless windows; often fast parameters will be adapted here as well.\nLastly, the fast parameters are allowed to adapt to the final update of the slow parameters.\n\nThese intervals may be controlled through the following configuration parameters, all of which must be positive integers:\nAdaptation Parameters Table. The parameters controlling adaptation and their default values.\n\n\n\n\n\n\n\n\nparameter\ndescription\ndefault\n\n\n\n\ninitial buffer\nwidth of initial fast adaptation interval\n75\n\n\nterm buffer\nwidth of final fast adaptation interval\n50\n\n\nwindow\ninitial width of slow adaptation interval\n25\n\n\n\n\n\n\nStan’s HMC algorithms utilize dual averaging Nesterov (2009) to optimize the step size.2\nThis warmup optimization procedure is extremely flexible and for completeness, Stan exposes each tuning option for dual averaging, using the notation of Hoffman and Gelman (2014). In practice, the efficacy of the optimization is sensitive to the value of these parameters, but we do not recommend changing the defaults without experience with the dual-averaging algorithm. For more information, see the discussion of dual averaging in Hoffman and Gelman (2014).\nThe full set of dual-averaging parameters are:\nStep Size Adaptation Parameters Table The parameters controlling step size adaptation, with constraints and default values.\n\n\n\nparameter\ndescription\nconstraint\ndefault\n\n\n\n\ndelta\ntarget Metropolis acceptance rate\n[0, 1]\n0.8\n\n\ngamma\nadaptation regularization scale\n(0, infty)\n0.05\n\n\nkappa\nadaptation relaxation exponent\n(0, infty)\n0.75\n\n\nt_0\nadaptation iteration offset\n(0, infty)\n10\n\n\n\nBy setting the target acceptance parameter \\(\\delta\\) to a value closer to 1 (its value must be strictly less than 1 and its default value is 0.8), adaptation will be forced to use smaller step sizes. This can improve sampling efficiency (effective sample size per iteration) at the cost of increased iteration times. Raising the value of \\(\\delta\\) will also allow some models that would otherwise get stuck to overcome their blockages.\n\n\n\nAll implementations of HMC use numerical integrators requiring a step size (equivalently, discretization time interval). Stan allows the step size to be adapted or set explicitly. Stan also allows the step size to be “jittered” randomly during sampling to avoid any poor interactions with a fixed step size and regions of high curvature. The jitter is a proportion that may be added or subtracted, so the maximum amount of jitter is 1, which will cause step sizes to be selected in the range of 0 to twice the adapted step size. The default value is 0, producing no jitter.\nSmall step sizes can get HMC samplers unstuck that would otherwise get stuck with higher step sizes. The downside is that jittering below the adapted value will increase the number of leapfrog steps required and thus slow down iterations, whereas jittering above the adapted value can cause premature rejection due to simulation error in the Hamiltonian dynamics calculation. See Neal (2011) for further discussion of step-size jittering.\n\n\n\nAll HMC implementations in Stan utilize quadratic kinetic energy functions which are specified up to the choice of a symmetric, positive-definite matrix known as a mass matrix or, more formally, a metric Betancourt (2017).\nIf the metric is constant then the resulting implementation is known as Euclidean HMC. Stan allows a choice among three Euclidean HMC implementations,\n\na unit metric (diagonal matrix of ones),\na diagonal metric (diagonal matrix with positive diagonal entries), and\na dense metric (a dense, symmetric positive definite matrix)\n\nto be configured by the user.\nIf the metric is specified to be diagonal, then regularized variances are estimated based on the iterations in each slow-stage block (labeled II in the warmup adaptation stages figure). Each of these estimates is based only on the iterations in that block. This allows early estimates to be used to help guide warmup and then be forgotten later so that they do not influence the final covariance estimate.\nIf the metric is specified to be dense, then regularized covariance estimates will be carried out, regularizing the estimate to a diagonal matrix, which is itself regularized toward a unit matrix.\nVariances or covariances are estimated using Welford accumulators to avoid a loss of precision over many floating point operations.\n\n\nThe metric can compensate for linear (i.e. global) correlations in the posterior which can dramatically improve the performance of HMC in some problems. This requires knowing the global correlations.\nIn complex models, the global correlations are usually difficult, if not impossible, to derive analytically; for example, nonlinear model components convolve the scales of the data, so standardizing the data does not always help. Therefore, Stan estimates these correlations online with an adaptive warmup. In models with strong nonlinear (i.e. local) correlations this learning can be slow, even with regularization. This is ultimately why warmup in Stan often needs to be so long, and why a sufficiently long warmup can yield such substantial performance improvements.\n\n\n\nThe metric compensates for only linear (equivalently global or position-independent) correlations in the posterior. The hierarchical parameterizations, on the other hand, affect some of the nasty nonlinear (equivalently local or position-dependent) correlations common in hierarchical models.3\nOne of the biggest difficulties with dense metrics is the estimation of the metric itself which introduces a bit of a chicken-and-egg scenario; in order to estimate an appropriate metric for sampling, convergence is required, and in order to converge, an appropriate metric is required.\n\n\n\nStatistical models for which sampling is problematic are not typically dominated by linear correlations for which a dense metric can adjust. Rather, they are governed by more complex nonlinear correlations that are best tackled with better parameterizations or more advanced algorithms, such as Riemannian HMC.\n\n\n\nMCMC convergence time is roughly equivalent to the autocorrelation time. Because HMC (and NUTS) chains tend to be lowly autocorrelated they also tend to converge quite rapidly.\nThis only applies when there is uniformity of curvature across the posterior, an assumption which is violated in many complex models. Quite often, the tails have large curvature while the bulk of the posterior mass is relatively well-behaved; in other words, warmup is slow not because the actual convergence time is slow but rather because the cost of an HMC iteration is more expensive out in the tails.\nPoor behavior in the tails is the kind of pathology that can be uncovered by running only a few warmup iterations. By looking at the acceptance probabilities and step sizes of the first few iterations provides an idea of how bad the problem is and whether it must be addressed with modeling efforts such as tighter priors or reparameterizations.\n\n\n\n\nThe no-U-turn sampler (NUTS) automatically selects an appropriate number of leapfrog steps in each iteration in order to allow the proposals to traverse the posterior without doing unnecessary work. The motivation is to maximize the expected squared jump distance (see, e.g., Roberts, Gelman, and Gilks (1997)) at each step and avoid the random-walk behavior that arises in random-walk Metropolis or Gibbs samplers when there is correlation in the posterior. For a precise definition of the NUTS algorithm and a proof of detailed balance, see Hoffman and Gelman (2014).\nNUTS generates a proposal by starting at an initial position determined by the parameters drawn in the last iteration. It then generates an independent standard normal random momentum vector. It then evolves the initial system both forwards and backwards in time to form a balanced binary tree. At each iteration of the NUTS algorithm the tree depth is increased by one, doubling the number of leapfrog steps and effectively doubles the computation time. The algorithm terminates in one of two ways, either\n\nthe NUTS criterion (i.e., a U-turn in Euclidean space on a subtree) is satisfied for a new subtree or the completed tree, or\nthe depth of the completed tree hits the maximum depth allowed.\n\nRather than using a standard Metropolis step, the final parameter value is selected via multinomial sampling with a bias toward the second half of the steps in the trajectory Betancourt (2016b).4\nConfiguring the no-U-turn sample involves putting a cap on the depth of the trees that it evaluates during each iteration. This is controlled through a maximum depth parameter. The number of leapfrog steps taken is then bounded by 2 to the power of the maximum depth minus 1.\nBoth the tree depth and the actual number of leapfrog steps computed are reported along with the parameters in the output as treedepth__ and n_leapfrog__, respectively. Because the final subtree may only be partially constructed, these two will always satisfy\n\\[\n2^{\\mathrm{treedepth} - 1} - 1\n\\ < \\\nN_{\\mathrm{leapfrog}}\n\\ \\le \\\n2^{\\mathrm{treedepth} } - 1.\n\\]\nTree depth is an important diagnostic tool for NUTS. For example, a tree depth of zero occurs when the first leapfrog step is immediately rejected and the initial state returned, indicating extreme curvature and poorly-chosen step size (at least relative to the current position). On the other hand, a tree depth equal to the maximum depth indicates that NUTS is taking many leapfrog steps and being terminated prematurely to avoid excessively long execution time. Taking very many steps may be a sign of poor adaptation, may be due to targeting a very high acceptance rate, or may simply indicate a difficult posterior from which to sample. In the latter case, reparameterization may help with efficiency. But in the rare cases where the model is correctly specified and a large number of steps is necessary, the maximum depth should be increased to ensure that that the NUTS tree can grow as large as necessary.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/mcmc.html#sampling-without-parameters", + "href": "reference-manual/mcmc.html#sampling-without-parameters", + "title": "MCMC Sampling", + "section": "", + "text": "In some situations, such as pure forward data simulation in a directed graphical model (e.g., where you can work down generatively from known hyperpriors to simulate parameters and data), there is no need to declare any parameters in Stan, the model block will be empty (and thus can be omitted), and all output quantities will be produced in the generated quantities block.\nFor example, to generate a sequence of \\(N\\) draws from a binomial with trials \\(K\\) and chance of success \\(\\theta\\), the following program suffices.\ndata {\n real<lower=0, upper=1> theta;\n int<lower=0> K;\n int<lower=0> N;\n}\ngenerated quantities {\n array[N] int<lower=0, upper=K> y;\n for (n in 1:N) {\n y[n] = binomial_rng(K, theta);\n }\n}\nFor this model, the sampler must be configured to use the fixed-parameters setting because there are no parameters. Without parameter sampling there is no need for adaptation and the number of warmup iterations should be set to zero.\nMost models that are written to be sampled without parameters will not declare any parameters, instead putting anything parameter-like in the data block. Nevertheless, it is possible to include parameters for fixed-parameters sampling and initialize them in any of the usual ways (randomly, fixed to zero on the unconstrained scale, or with user-specified values). For example, theta in the example above could be declared as a parameter and initialized as a parameter.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/mcmc.html#general-config.section", + "href": "reference-manual/mcmc.html#general-config.section", + "title": "MCMC Sampling", + "section": "", + "text": "Stan’s interfaces provide a number of configuration options that are shared among the MCMC algorithms (this chapter), the optimization algorithms chapter, and the diagnostics chapter.\n\n\nThe random-number generator’s behavior is fully determined by the unsigned seed (positive integer) it is started with. If a seed is not specified, or a seed of 0 or less is specified, the system time is used to generate a seed. The seed is recorded and included with Stan’s output regardless of whether it was specified or generated randomly from the system time.\nStan also allows a chain identifier to be specified, which is useful when running multiple Markov chains for sampling. The chain identifier is used to advance the random number generator a very large number of random variates so that two chains with different identifiers draw from non-overlapping subsequences of the random-number sequence determined by the seed. When running multiple chains from a single command, Stan’s interfaces will manage the chain identifiers.\n\n\nTogether, the seed and chain identifier determine the behavior of the underlying random number generator. For complete reproducibility, every aspect of the environment needs to be locked down from the OS and version to the C++ compiler and version to the version of Stan and all dependent libraries.\n\n\n\n\nThe initial parameter values for Stan’s algorithms (MCMC, optimization, or diagnostic) may be either specified by the user or generated randomly. If user-specified values are provided, all parameters must be given initial values or Stan will abort with an error message.\n\n\nIf the user specifies initial values, they must satisfy the constraints declared in the model (i.e., they are on the constrained scale).\n\n\n\nIt is also possible to provide an initialization of 0, which causes all variables to be initialized with zero values on the unconstrained scale. The transforms are arranged in such a way that zero initialization provides reasonable variable initializations for most parameters, such as 0 for unconstrained parameters, 1 for parameters constrained to be positive, 0.5 for variables to constrained to lie between 0 and 1, a symmetric (uniform) vector for simplexes, unit matrices for both correlation and covariance matrices, and so on.\n\n\n\nRandom initialization by default initializes the parameter values with values drawn at random from a \\(\\mathsf{Uniform}(-2, 2)\\) distribution. Alternatively, a value other than 2 may be specified for the absolute bounds. These values are on the unconstrained scale, so must be inverse transformed back to satisfy the constraints declared for parameters.\nBecause zero is chosen to be a reasonable default initial value for most parameters, the interval around zero provides a fairly diffuse starting point. For instance, unconstrained variables are initialized randomly in \\((-2, 2)\\), variables constrained to be positive are initialized roughly in \\((0.14, 7.4)\\), variables constrained to fall between 0 and 1 are initialized with values roughly in \\((0.12, 0.88)\\).", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/mcmc.html#divergent-transitions", + "href": "reference-manual/mcmc.html#divergent-transitions", + "title": "MCMC Sampling", + "section": "", + "text": "The Hamiltonian Monte Carlo algorithms (HMC and NUTS) simulate the trajectory of a fictitious particle representing parameter values when subject to a potential energy field, the value of which at a point is the negative log posterior density (up to a constant that does not depend on location). Random momentum is imparted independently in each direction, by drawing from a standard normal distribution. The Hamiltonian is defined to be the sum of the potential energy and kinetic energy of the system. The key feature of the Hamiltonian is that it is conserved along the trajectory the particle moves.\nIn Stan, we use the leapfrog algorithm to simulate the path of a particle along the trajectory defined by the initial random momentum and the potential energy field. This is done by alternating updates of the position based on the momentum and the momentum based on the position. The momentum updates involve the potential energy and are applied along the gradient. This is essentially a stepwise (discretized) first-order approximation of the trajectory. Leimkuhler and Reich (2004) provide details and error analysis for the leapfrog algorithm.\nA divergence arises when the simulated Hamiltonian trajectory departs from the true trajectory as measured by departure of the Hamiltonian value from its initial value. When this divergence is too high,5 the simulation has gone off the rails and cannot be trusted. The positions along the simulated trajectory after the Hamiltonian diverges will never be selected as the next draw of the MCMC algorithm, potentially reducing Hamiltonian Monte Carlo to a simple random walk and biasing estimates by not being able to thoroughly explore the posterior distribution. Betancourt (2016a) provides details of the theory, computation, and practical implications of divergent transitions in Hamiltonian Monte Carlo.\nThe Stan interfaces report divergences as warnings and provide ways to access which iterations encountered divergences. ShinyStan provides visualizations that highlight the starting point of divergent transitions to diagnose where the divergences arise in parameter space. A common location is in the neck of the funnel in a centered parameterization, an example of which is provided in the user’s guide.\nIf the posterior is highly curved, very small step sizes are required for this gradient-based simulation of the Hamiltonian to be accurate. When the step size is too large (relative to the curvature), the simulation diverges from the true Hamiltonian. This definition is imprecise in the same way that stiffness for a differential equation is imprecise; both are defined by the way they cause traditional stepwise algorithms to diverge from where they should be.\nThe primary cause of divergent transitions in Euclidean HMC (other than bugs in the code) is highly varying posterior curvature, for which small step sizes are too inefficient in some regions and diverge in other regions. If the step size is too small, the sampler becomes inefficient and halts before making a U-turn (hits the maximum tree depth in NUTS); if the step size is too large, the Hamiltonian simulation diverges.\n\n\nIn some cases, simply lowering the initial step size and increasing the target acceptance rate will keep the step size small enough that sampling can proceed. In other cases, a reparameterization is required so that the posterior curvature is more manageable; see the funnel example in the user’s guide for an example.\nBefore reparameterization, it may be helpful to plot the posterior draws, highlighting the divergent transitions to see where they arise. This is marked as a divergent transition in the interfaces; for example, ShinyStan and RStan have special plotting facilities to highlight where divergent transitions arise.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/mcmc.html#footnotes", + "href": "reference-manual/mcmc.html#footnotes", + "title": "MCMC Sampling", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe typical set is a concept borrowed from information theory and refers to the neighborhood (or neighborhoods in multimodal models) of substantial posterior probability mass through which the Markov chain will travel in equilibrium.↩︎\nThis optimization of step size during adaptation of the sampler should not be confused with running Stan’s optimization method.↩︎\nIn Riemannian HMC the metric compensates for nonlinear correlations.↩︎\nStan previously used slice sampling along the trajectory, following the original NUTS paper of Hoffman and Gelman (2014).↩︎\nThe current default threshold is a factor of \\(10^3\\), whereas when the leapfrog integrator is working properly, the divergences will be around \\(10^{-7}\\) and do not compound due to the symplectic nature of the leapfrog integrator.↩︎", + "crumbs": [ + "Reference Manual", + "Algorithms", + "MCMC Sampling" + ] + }, + { + "objectID": "reference-manual/laplace_embedded.html", + "href": "reference-manual/laplace_embedded.html", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The embedded Laplace approximation replaces explicit sampling of potentially high-dimensional Gaussian latent variables with a local Gaussian approximation, marginalizing them out so that inference proceeds over the remaining hyperparameters alone. This approach is often referred to as the integrated Laplace approximation, although the exact details of the method can vary. The details of Stan’s implementation can be found in references (Margossian et al. 2020; Margossian 2023).\nA standard approach to fit a latent Gaussian model would be to perform inference jointly over the latent Gaussian variables and the hyperparameters. Instead, the embedded Laplace approximation can be used to do approximate marginalization of the latent Gaussian variables; we can then use any inference over the remaining hyperparameters. By marginalizing out the latent variables, the sampler explores a lower-dimensional, better-behaved marginal posterior. Individual iterations are more expensive (each requires an inner optimization), but the sampler typically needs far fewer iterations to achieve the same effective sample size. How this trade-off resolves depends on the specific problem at hand.\nFor complete function signatures and the built-in likelihood wrappers (Poisson, Negative Binomial, Bernoulli), see the Embedded Laplace functions reference. For worked examples with full data blocks, see the Gaussian Processes chapter.\n\n\nThe embedded Laplace approximation is used for latent Gaussian models. A latent Gaussian model is defined by three components:\n\n\\(\\phi\\): hyperparameters (e.g., GP kernel length-scale and magnitude, or variance components in a hierarchical model),\n\\(\\theta\\): latent Gaussian variables (the high-dimensional quantity to be marginalized out),\n\\(y\\): observed data.\n\nThese components are related through a hierarchical structure. The hyperparameters \\(\\phi\\) are given a prior \\(p(\\phi)\\). The latent variables \\(\\theta\\) have a multivariate normal prior centered at 0 with covariance matrix \\(K(\\phi)\\). An non-zero mean offset can be incorporated into the likelihood function. The observations \\(y\\) have a data model \\(p(y \\mid \\theta, \\phi)\\). The prior on \\(\\theta\\) is centered at zero; an offset can be incorporated into the data model if a non-zero mean is needed.\n\\[\\begin{eqnarray*}\n \\phi & \\sim & p(\\phi) \\\\\n \\theta & \\sim & \\text{Multi-Normal}(0, K(\\phi)) \\\\\n y & \\sim & p(y \\mid \\theta, \\phi).\n\\end{eqnarray*}\\]\nThe generative model above defines a joint distribution over all three quantities, \\(p(\\phi, \\theta, y) = p(\\phi) \\, p(\\theta \\mid \\phi) \\, p(y \\mid \\theta, \\phi)\\). After observing data \\(y\\), Bayes’ theorem gives the joint posterior \\(p(\\phi, \\theta \\mid y) \\propto p(\\phi) \\, p(\\theta \\mid \\phi) \\, p(y \\mid \\theta, \\phi)\\), where \\(p(y \\mid \\theta, \\phi)\\) as function of \\(\\theta\\) and \\(\\phi\\) is the joint likelihood function.\nSampling directly from the joint posterior \\(p(\\phi, \\theta \\mid y)\\) of this model is often difficult. Challenging geometries (e.g., funnels) frustrate inference algorithms, including Hamiltonian Monte Carlo and variational inference. However, the marginal posterior \\(p(\\phi \\mid y)\\) is often well-behaved and low-dimensional, making it much easier to sample. With an embedded Laplace approximation, we can obtain an approximation of the marginal posterior \\(p(\\phi \\mid y)\\). This is done via an intermediate approximation of the conditional posterior \\(p(\\theta \\mid \\phi, y)\\) by a normal distribution and this normal approximation is well-justified when the likelihood \\(p(y \\mid \\theta, \\phi)\\) as function of \\(\\theta\\) is log concave (given that we already have a normal prior \\(p(\\theta \\mid \\phi)\\)). Once we obtain (approximate) samples \\(\\phi^{(i)} \\sim p(\\phi \\mid y)\\), we can in turn generate posterior draws for \\(\\theta\\) using the normal approximation to \\(p(\\theta \\mid y, \\phi)\\), evaluated at the posterior draws \\(\\phi^{(i)}\\).\n\n\n\nThe two-step inference strategy for using embedded Laplace in a latent Gaussian model requires approximations to both the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) and the marginal likelihood \\(p(y \\mid \\phi)\\). The Laplace approximation is the normal distribution that matches the mode and curvature of the conditional posterior \\(p(\\theta \\mid y, \\phi)\\). The mode, defined as the value of \\(\\theta\\) that maximizes the conditional posterior, is estimated by a Newton solver, \\[\n \\theta^* = \\underset{\\theta}{\\text{argmax}} \\ p(\\theta \\mid y, \\phi),\n\\]\nSince the approximation is normal, the curvature is matched by setting the covariance to the negative Hessian of the log conditional posterior, evaluated at the mode,\n\\[\n \\Sigma^* = - \\left . \\frac{\\partial^2}{\\partial \\theta^2}\n \\log p (\\theta \\mid \\phi, y) \\right |_{\\theta =\\theta^*}.\n\\]\nThe resulting Laplace approximation is a multivariate normal centered at the mode with covariance given by the inverse curvature,\n\\[\n\\hat p_\\mathcal{L} (\\theta \\mid y, \\phi) = \\text{Multi-Normal}(\\theta^*, \\Sigma^*)\n\\approx p(\\theta \\mid y, \\phi).\n\\]\nThis approximation also yields an approximation to the marginal likelihood, obtained by evaluating the prior, likelihood, and approximate posterior at the mode \\(\\theta^*\\),\n\\[\n \\hat p_\\mathcal{L}(y \\mid \\phi) := \\frac{p(\\theta^* \\mid \\phi) \\\n p(y \\mid \\theta^*, \\phi) }{ \\hat p_\\mathcal{L} (\\theta^* \\mid \\phi, y) }\n \\approx p(y \\mid \\phi).\n\\]\nHence, a strategy to approximate the posterior of the latent Gaussian model is to first estimate the marginal posterior \\(\\hat p_\\mathcal{L}(\\phi \\mid y) \\propto p(\\phi) p_\\mathcal{L} (y \\mid \\phi)\\) using any algorithm supported by Stan. Approximate posterior draws for the latent Gaussian variables are then obtained by first sampling \\(\\phi \\sim \\hat p_\\mathcal{L}(\\phi \\mid y)\\) and then \\(\\theta \\sim \\hat p_\\mathcal{L}(\\theta \\mid \\phi, y)\\).\n\n\n\nThe embedded Laplace approximation presents several trade-offs with standard inference over the joint posterior \\(p(\\theta, \\phi \\mid y)\\). The main advantage of the embedded Laplace approximation is that it side-steps the intricate geometry of hierarchical models. The marginal posterior \\(p(\\phi \\mid y)\\) can then be handled by Hamiltonian Monte Carlo sampling without extensive tuning or reparameterization, and the mixing time is faster, meaning we can run shorter chains to achieve a desired precision. One additional benefit is that approximate methods, e.g. variational inference, which work poorly on the joint \\(p(\\theta, \\phi \\mid y)\\) can work well on the marginal posterior \\(p(\\phi \\mid y)\\).\nOn the other hand, the embedded Laplace approximation presents certain disadvantages. First, we need to perform a Laplace approximation each time the log marginal likelihood is evaluated, meaning each iteration can be expensive. Secondly, the approximation can introduce non-negligible error, especially with non-log-concave likelihoods (note the prior is always multivariate normal). How these trade-offs are resolved depends on the application; see Margossian et al. (2020) for some examples.\n\n\nThe quality of the Laplace approximation depends on how close the true conditional posterior \\(p(\\theta \\mid y, \\phi)\\) is to Gaussian.\nWorks well. Log-concave likelihoods, for example from a Poisson model with log link or negative binomial with log link. These produce unimodal conditional posteriors when combined with a Gaussian prior. The approximation error is typically negligible with these likelihoods, especially with moderate-to-large counts (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). If the likelihood is normal, there is no error in the approximation but in this case the marginalization can be worked analytically and the resulting implementation is much faster than using the embedded Laplace approximation.\nWorks adequately. Bernoulli model with logit link has technically log-concave likelihood, but the likelihood can be very skewed making the Gaussian approximation less accurate than for count data. The embedded Laplace is still useful when \\(\\theta\\) is high-dimensional and joint sampling is infeasible; see Vehtari et al. (2016) and Margossian et al. (2020) for discussion.\nNot appropriate. For likelihoods that are not log-concave in \\(\\theta\\), the conditional posterior may be multimodal and the Newton solver finds only a single mode or can fail completely. When \\(\\theta\\) is low-dimensional (a few dozen or fewer), the overhead of the inner optimization may not pay for itself and standard joint HMC sampling is often adequate.\n\n\n\n\nWhen the embedded Laplace approximation does not converge or produces unexpected results, the solver configuration may need adjustment. This section describes the internals of the Newton solver and the options available for tuning it.\n\n\nA critical component of the embedded Laplace approximation is the Newton solver used to estimate the mode \\(\\theta^*\\) of \\(p(\\theta \\mid \\phi, y)\\). The objective function being maximized is the log joint density of the prior and likelihood with respect to \\(\\theta\\).\n\\[\n\\Psi(\\theta) = \\log p(\\theta \\mid \\phi) + \\log p(y \\mid \\theta, \\phi),\n\\]\nConvergence is declared when the change in the objective between successive iterations falls below a tolerance \\(\\Delta\\).\n\\[\n| \\Psi (\\theta^{(i + 1)}) - \\Psi (\\theta^{(i)}) | \\le \\Delta.\n\\]\nThe solver also stops after reaching a pre-specified maximum number of steps. In that case, Stan throws a warning, but still returns the last iteration’s parameters. If you see this warning you should check the diagnostics to understand why the solver failed to converge.\nTo help with cases where the Newton step does not lead to a decrease in the objective function, the Newton iteration is augmented with a wolfe line-search to ensure that at each iteration the objective function \\(\\Psi\\) decreases. Specifically, suppose the objective increases after a Newton step, indicating the step overshot a region of improvement,\n\\[\n\\Psi (\\theta^{(i + 1)}) < \\Psi (\\theta^{(i)}).\n\\]\nThis can indicate that the Newton step \\(\\alpha\\) at iteration \\(i\\) is too large and that we skipped a region where the objective function decreases. In that case, we can fallback to a Wolfe line search to find a step size which satisfies the Wolfe conditions. The wolfe line search attempts to find a search direction \\(p_i\\) and step size \\(\\alpha_k\\) such that an accepted step both increases our objective while ensuring the slope of the accepted step is flatter than our previous position. Together these help push the algorithm towards a minimum.\n\\[\nf(x_i + \\alpha_k p_i) \\le f(x_i) + c_1 \\alpha_k \\nabla f(x_i)^T p_i\n-p^T_i \\Delta f(x_i + \\alpha_k p_i) \\le -c_2 p^T_i \\Delta f(x_i)\n\\]\n\\[\n \\theta^{(i + 1)} \\leftarrow \\frac{\\theta^{(i + 1)} + \\theta^{(i)}}{2}.\n\\]\nWe repeat this halving of steps until \\(\\Psi (\\theta^{(i + 1)}) \\ge \\Psi (\\theta^{(i)})\\), or until a maximum number of linesearch steps is reached. For certain problems, adding a linesearch can make the optimization more stable.\n\n\n\nThe embedded Laplace approximation uses a custom Newton solver, specialized to find the mode of \\(p(\\theta \\mid \\phi, y)\\). A key step for efficient optimization is to ensure all matrix inversions are numerically stable. This can be done using the Woodbury-Sherman-Morrison formula and requires one of three matrix decompositions:\n\nCholesky decomposition of the Hessian of the negative log likelihood \\(W = - \\partial^2_\\theta \\log p(y \\mid \\theta, \\phi)\\).\nCholesky decomposition of the prior covariance matrix \\(K(\\phi)\\).\nLU-decomposition of \\(I + KW\\), where \\(I\\) is the identity matrix.\n\nThe first solver (1) should be used if the negative log likelihood is positive-definite. Otherwise the user should rely on (2). In rarer cases where it is not numerically safe to invert the covariance matrix \\(K\\), users can use the third solver as a last-resort option.\n\n\n\nA key step to speed up computation is to take advantage of the sparsity of \\(H\\), the Hessian of the log likelihood with respect to the latent variables, \\[\n H = \\frac{\\partial^2}{\\partial \\theta^2} \\log p(y \\mid \\theta, \\phi).\n\\] For example, if the observations \\((y_1, \\cdots, y_N)\\) are conditionally independent and each depends on only one component of \\(\\theta\\), the log likelihood decomposes into a sum of per-observation terms, \\[\n \\log p(y \\mid \\theta, \\phi) = \\sum_{i = 1}^N \\log p(y_i \\mid \\theta_i, \\phi),\n\\] and the Hessian is diagonal. This leads to faster calculations of the Hessian and subsequently sparse matrix operations. This case is common in Gaussian process models, and certain hierarchical models.\nStan’s suite of functions for the embedded Laplace approximation exploits block-diagonal structure in the Hessian, where the user specifies the block size B. The user can specify the size \\(B\\) of these blocks. The user is responsible for working out what \\(B\\) is. If the Hessian is dense, then we simply set \\(B = N\\). The diagonal case above corresponds to B = 1. Arbitrary sparsity patterns beyond block-diagonal structure are not currently supported.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "reference-manual/laplace_embedded.html#latent-gaussian-models", + "href": "reference-manual/laplace_embedded.html#latent-gaussian-models", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The embedded Laplace approximation is used for latent Gaussian models. A latent Gaussian model is defined by three components:\n\n\\(\\phi\\): hyperparameters (e.g., GP kernel length-scale and magnitude, or variance components in a hierarchical model),\n\\(\\theta\\): latent Gaussian variables (the high-dimensional quantity to be marginalized out),\n\\(y\\): observed data.\n\nThese components are related through a hierarchical structure. The hyperparameters \\(\\phi\\) are given a prior \\(p(\\phi)\\). The latent variables \\(\\theta\\) have a multivariate normal prior centered at 0 with covariance matrix \\(K(\\phi)\\). An non-zero mean offset can be incorporated into the likelihood function. The observations \\(y\\) have a data model \\(p(y \\mid \\theta, \\phi)\\). The prior on \\(\\theta\\) is centered at zero; an offset can be incorporated into the data model if a non-zero mean is needed.\n\\[\\begin{eqnarray*}\n \\phi & \\sim & p(\\phi) \\\\\n \\theta & \\sim & \\text{Multi-Normal}(0, K(\\phi)) \\\\\n y & \\sim & p(y \\mid \\theta, \\phi).\n\\end{eqnarray*}\\]\nThe generative model above defines a joint distribution over all three quantities, \\(p(\\phi, \\theta, y) = p(\\phi) \\, p(\\theta \\mid \\phi) \\, p(y \\mid \\theta, \\phi)\\). After observing data \\(y\\), Bayes’ theorem gives the joint posterior \\(p(\\phi, \\theta \\mid y) \\propto p(\\phi) \\, p(\\theta \\mid \\phi) \\, p(y \\mid \\theta, \\phi)\\), where \\(p(y \\mid \\theta, \\phi)\\) as function of \\(\\theta\\) and \\(\\phi\\) is the joint likelihood function.\nSampling directly from the joint posterior \\(p(\\phi, \\theta \\mid y)\\) of this model is often difficult. Challenging geometries (e.g., funnels) frustrate inference algorithms, including Hamiltonian Monte Carlo and variational inference. However, the marginal posterior \\(p(\\phi \\mid y)\\) is often well-behaved and low-dimensional, making it much easier to sample. With an embedded Laplace approximation, we can obtain an approximation of the marginal posterior \\(p(\\phi \\mid y)\\). This is done via an intermediate approximation of the conditional posterior \\(p(\\theta \\mid \\phi, y)\\) by a normal distribution and this normal approximation is well-justified when the likelihood \\(p(y \\mid \\theta, \\phi)\\) as function of \\(\\theta\\) is log concave (given that we already have a normal prior \\(p(\\theta \\mid \\phi)\\)). Once we obtain (approximate) samples \\(\\phi^{(i)} \\sim p(\\phi \\mid y)\\), we can in turn generate posterior draws for \\(\\theta\\) using the normal approximation to \\(p(\\theta \\mid y, \\phi)\\), evaluated at the posterior draws \\(\\phi^{(i)}\\).", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "reference-manual/laplace_embedded.html#approximation-of-the-conditional-posterior-and-marginal-likelihood", + "href": "reference-manual/laplace_embedded.html#approximation-of-the-conditional-posterior-and-marginal-likelihood", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The two-step inference strategy for using embedded Laplace in a latent Gaussian model requires approximations to both the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) and the marginal likelihood \\(p(y \\mid \\phi)\\). The Laplace approximation is the normal distribution that matches the mode and curvature of the conditional posterior \\(p(\\theta \\mid y, \\phi)\\). The mode, defined as the value of \\(\\theta\\) that maximizes the conditional posterior, is estimated by a Newton solver, \\[\n \\theta^* = \\underset{\\theta}{\\text{argmax}} \\ p(\\theta \\mid y, \\phi),\n\\]\nSince the approximation is normal, the curvature is matched by setting the covariance to the negative Hessian of the log conditional posterior, evaluated at the mode,\n\\[\n \\Sigma^* = - \\left . \\frac{\\partial^2}{\\partial \\theta^2}\n \\log p (\\theta \\mid \\phi, y) \\right |_{\\theta =\\theta^*}.\n\\]\nThe resulting Laplace approximation is a multivariate normal centered at the mode with covariance given by the inverse curvature,\n\\[\n\\hat p_\\mathcal{L} (\\theta \\mid y, \\phi) = \\text{Multi-Normal}(\\theta^*, \\Sigma^*)\n\\approx p(\\theta \\mid y, \\phi).\n\\]\nThis approximation also yields an approximation to the marginal likelihood, obtained by evaluating the prior, likelihood, and approximate posterior at the mode \\(\\theta^*\\),\n\\[\n \\hat p_\\mathcal{L}(y \\mid \\phi) := \\frac{p(\\theta^* \\mid \\phi) \\\n p(y \\mid \\theta^*, \\phi) }{ \\hat p_\\mathcal{L} (\\theta^* \\mid \\phi, y) }\n \\approx p(y \\mid \\phi).\n\\]\nHence, a strategy to approximate the posterior of the latent Gaussian model is to first estimate the marginal posterior \\(\\hat p_\\mathcal{L}(\\phi \\mid y) \\propto p(\\phi) p_\\mathcal{L} (y \\mid \\phi)\\) using any algorithm supported by Stan. Approximate posterior draws for the latent Gaussian variables are then obtained by first sampling \\(\\phi \\sim \\hat p_\\mathcal{L}(\\phi \\mid y)\\) and then \\(\\theta \\sim \\hat p_\\mathcal{L}(\\theta \\mid \\phi, y)\\).", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "reference-manual/laplace_embedded.html#trade-offs-of-the-approximation", + "href": "reference-manual/laplace_embedded.html#trade-offs-of-the-approximation", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The embedded Laplace approximation presents several trade-offs with standard inference over the joint posterior \\(p(\\theta, \\phi \\mid y)\\). The main advantage of the embedded Laplace approximation is that it side-steps the intricate geometry of hierarchical models. The marginal posterior \\(p(\\phi \\mid y)\\) can then be handled by Hamiltonian Monte Carlo sampling without extensive tuning or reparameterization, and the mixing time is faster, meaning we can run shorter chains to achieve a desired precision. One additional benefit is that approximate methods, e.g. variational inference, which work poorly on the joint \\(p(\\theta, \\phi \\mid y)\\) can work well on the marginal posterior \\(p(\\phi \\mid y)\\).\nOn the other hand, the embedded Laplace approximation presents certain disadvantages. First, we need to perform a Laplace approximation each time the log marginal likelihood is evaluated, meaning each iteration can be expensive. Secondly, the approximation can introduce non-negligible error, especially with non-log-concave likelihoods (note the prior is always multivariate normal). How these trade-offs are resolved depends on the application; see Margossian et al. (2020) for some examples.\n\n\nThe quality of the Laplace approximation depends on how close the true conditional posterior \\(p(\\theta \\mid y, \\phi)\\) is to Gaussian.\nWorks well. Log-concave likelihoods, for example from a Poisson model with log link or negative binomial with log link. These produce unimodal conditional posteriors when combined with a Gaussian prior. The approximation error is typically negligible with these likelihoods, especially with moderate-to-large counts (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). If the likelihood is normal, there is no error in the approximation but in this case the marginalization can be worked analytically and the resulting implementation is much faster than using the embedded Laplace approximation.\nWorks adequately. Bernoulli model with logit link has technically log-concave likelihood, but the likelihood can be very skewed making the Gaussian approximation less accurate than for count data. The embedded Laplace is still useful when \\(\\theta\\) is high-dimensional and joint sampling is infeasible; see Vehtari et al. (2016) and Margossian et al. (2020) for discussion.\nNot appropriate. For likelihoods that are not log-concave in \\(\\theta\\), the conditional posterior may be multimodal and the Newton solver finds only a single mode or can fail completely. When \\(\\theta\\) is low-dimensional (a few dozen or fewer), the overhead of the inner optimization may not pay for itself and standard joint HMC sampling is often adequate.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "reference-manual/laplace_embedded.html#details-of-the-approximation", + "href": "reference-manual/laplace_embedded.html#details-of-the-approximation", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "When the embedded Laplace approximation does not converge or produces unexpected results, the solver configuration may need adjustment. This section describes the internals of the Newton solver and the options available for tuning it.\n\n\nA critical component of the embedded Laplace approximation is the Newton solver used to estimate the mode \\(\\theta^*\\) of \\(p(\\theta \\mid \\phi, y)\\). The objective function being maximized is the log joint density of the prior and likelihood with respect to \\(\\theta\\).\n\\[\n\\Psi(\\theta) = \\log p(\\theta \\mid \\phi) + \\log p(y \\mid \\theta, \\phi),\n\\]\nConvergence is declared when the change in the objective between successive iterations falls below a tolerance \\(\\Delta\\).\n\\[\n| \\Psi (\\theta^{(i + 1)}) - \\Psi (\\theta^{(i)}) | \\le \\Delta.\n\\]\nThe solver also stops after reaching a pre-specified maximum number of steps. In that case, Stan throws a warning, but still returns the last iteration’s parameters. If you see this warning you should check the diagnostics to understand why the solver failed to converge.\nTo help with cases where the Newton step does not lead to a decrease in the objective function, the Newton iteration is augmented with a wolfe line-search to ensure that at each iteration the objective function \\(\\Psi\\) decreases. Specifically, suppose the objective increases after a Newton step, indicating the step overshot a region of improvement,\n\\[\n\\Psi (\\theta^{(i + 1)}) < \\Psi (\\theta^{(i)}).\n\\]\nThis can indicate that the Newton step \\(\\alpha\\) at iteration \\(i\\) is too large and that we skipped a region where the objective function decreases. In that case, we can fallback to a Wolfe line search to find a step size which satisfies the Wolfe conditions. The wolfe line search attempts to find a search direction \\(p_i\\) and step size \\(\\alpha_k\\) such that an accepted step both increases our objective while ensuring the slope of the accepted step is flatter than our previous position. Together these help push the algorithm towards a minimum.\n\\[\nf(x_i + \\alpha_k p_i) \\le f(x_i) + c_1 \\alpha_k \\nabla f(x_i)^T p_i\n-p^T_i \\Delta f(x_i + \\alpha_k p_i) \\le -c_2 p^T_i \\Delta f(x_i)\n\\]\n\\[\n \\theta^{(i + 1)} \\leftarrow \\frac{\\theta^{(i + 1)} + \\theta^{(i)}}{2}.\n\\]\nWe repeat this halving of steps until \\(\\Psi (\\theta^{(i + 1)}) \\ge \\Psi (\\theta^{(i)})\\), or until a maximum number of linesearch steps is reached. For certain problems, adding a linesearch can make the optimization more stable.\n\n\n\nThe embedded Laplace approximation uses a custom Newton solver, specialized to find the mode of \\(p(\\theta \\mid \\phi, y)\\). A key step for efficient optimization is to ensure all matrix inversions are numerically stable. This can be done using the Woodbury-Sherman-Morrison formula and requires one of three matrix decompositions:\n\nCholesky decomposition of the Hessian of the negative log likelihood \\(W = - \\partial^2_\\theta \\log p(y \\mid \\theta, \\phi)\\).\nCholesky decomposition of the prior covariance matrix \\(K(\\phi)\\).\nLU-decomposition of \\(I + KW\\), where \\(I\\) is the identity matrix.\n\nThe first solver (1) should be used if the negative log likelihood is positive-definite. Otherwise the user should rely on (2). In rarer cases where it is not numerically safe to invert the covariance matrix \\(K\\), users can use the third solver as a last-resort option.\n\n\n\nA key step to speed up computation is to take advantage of the sparsity of \\(H\\), the Hessian of the log likelihood with respect to the latent variables, \\[\n H = \\frac{\\partial^2}{\\partial \\theta^2} \\log p(y \\mid \\theta, \\phi).\n\\] For example, if the observations \\((y_1, \\cdots, y_N)\\) are conditionally independent and each depends on only one component of \\(\\theta\\), the log likelihood decomposes into a sum of per-observation terms, \\[\n \\log p(y \\mid \\theta, \\phi) = \\sum_{i = 1}^N \\log p(y_i \\mid \\theta_i, \\phi),\n\\] and the Hessian is diagonal. This leads to faster calculations of the Hessian and subsequently sparse matrix operations. This case is common in Gaussian process models, and certain hierarchical models.\nStan’s suite of functions for the embedded Laplace approximation exploits block-diagonal structure in the Hessian, where the user specifies the block size B. The user can specify the size \\(B\\) of these blocks. The user is responsible for working out what \\(B\\) is. If the Hessian is dense, then we simply set \\(B = N\\). The diagonal case above corresponds to B = 1. Arbitrary sparsity patterns beyond block-diagonal structure are not currently supported.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "reference-manual/index.html", + "href": "reference-manual/index.html", + "title": "Stan Reference Manual", + "section": "", + "text": "This is the official reference manual for Stan’s programming language for coding probability models, inference algorithms for fitting models and making predictions, and posterior analysis tools for evaluating the results. This manual applies to all Stan interfaces.\nThe first part of the reference manual provides a full specification of the Stan programming language. The language is responsible for defining a log density function conditioned on data. Typically, this is a Bayesian posterior, but it may also be a penalized likelihood function. The second part of the manual specifies the inference algorithms and posterior inference tools. The third part provides auxiliary information about the use of Stan.\nDownload the pdf version of this manual.\n\nCopyright and trademark\n\nCopyright 2011–2025, Stan Development Team and their assignees.\nThe Stan name and logo are registered trademarks of NumFOCUS.\n\n\n\nLicensing\n\nText content: CC-BY ND 4.0 license\nComputer code: BSD 3-clause license\nLogo: Stan logo usage guidelines\n\n\n\n\n\n Back to top", + "crumbs": [ + "Reference Manual", + "Stan Reference Manual" + ] + }, + { + "objectID": "reference-manual/expressions.html", + "href": "reference-manual/expressions.html", + "title": "Expressions", + "section": "", + "text": "An expression is the syntactic unit in a Stan program that denotes a value. Every expression in a well-formed Stan program has a type that is determined statically (at compile time), based only on the type of its variables and the types of the functions used in it. If an expressions type cannot be determined statically, the Stan compiler will report the location of the problem.\nThis chapter covers the syntax, typing, and usage of the various forms of expressions in Stan.\n\n\nThe simplest form of expression is a literal that denotes a primitive numerical value.\n\n\nInteger literals represent integers of type int. Integer literals are written in base 10 without any separators. Integer literals may contain a single negative sign. (The expression --1 is interpreted as the negation of the literal -1.)\nThe following list contains well-formed integer literals.\n0, 1, -1, 256, -127098, 24567898765\nInteger literals must have values that fall within the bounds for integer values (see the section on numerical data types).\nInteger literals may not contain decimal points (.). Thus the expressions 1. and 1.0 are of type real and may not be used where a value of type int is required.\n\n\n\nA number written with a period or with scientific notation is assigned to a the continuous numeric type real. Real literals are written in base 10 with a period (.) as a separator and optionally an exponent with optional sign. Examples of well-formed real literals include the following.\n0.0, 1.0, 3.14, -217.9387, 2.7e3, -2E-5, 1.23e+3.\nThe notation e or E followed by a positive or negative integer denotes a power of 10 to multiply. For instance, 2.7e3 and 2.7e+3 denote \\(2.7 \\times 10^3\\), whereas -2E-5 denotes \\(-2 \\times\n10^{-5}\\).\n\n\n\nA number followed by the character i denotes an imaginary number and is assigned to the numeric type complex. The number preceding i may be either a real or integer literal and determines the magnitude of the imaginary number. Examples of well-formed imaginary literals include the following.\n1i, 2i, -325.786i, 1e10i, 2.87e-10i.\nNote that the character i by itself is not a well-formed imaginary literal. The unit imaginary number must be written as 1i.\n\n\n\nStan does not include complex literals directly, but a real or integer literal can be added to an imaginary literal to derive an expression that behaves like a complex literal. Examples include the following.\n1 + 2i, -3.2e9 + 1e10i\nThese will be assigned the type complex, which is the result of adding a real or integer and a complex number. They will also function like literals in the sense that the C++ compiler is able to reduce them to a single complex constant at compile time.\n\n\n\n\nA variable by itself is a well-formed expression of the same type as the variable. Variables in Stan consist of ASCII strings containing only the basic lower-case and upper-case Roman letters, digits, and the underscore (_) character. Variables must start with a letter (a--z and A--Z) and may not end with two underscores (__).\nExamples of legal variable identifiers are as follows.\na, a3, a_3, Sigma, my_cpp_style_variable, myCamelCaseVariable\nUnlike in R and BUGS, variable identifiers in Stan may not contain a period character.\n\n\nStan reserves many strings for internal use and these may not be used as the name of a variable. An attempt to name a variable after an internal string results in the stanc translator halting with an error message indicating which reserved name was used and its location in the model code.\n\n\nThe name of the model cannot be used as a variable within the model. This is usually not a problem because the default in bin/stanc is to append _model to the name of the file containing the model specification. For example, if the model is in file foo.stan, it would not be legal to have a variable named foo_model when using the default model name through bin/stanc. With user-specified model names, variables cannot match the model.\n\n\n\nThe following list contains reserved words for Stan’s programming language. Not all of these features are implemented in Stan yet, but the tokens are reserved for future use.\nfor, in, while, repeat, until, if, then, else,\ntrue, false, target, struct, typedef, export,\nauto, extern, var, static, lower, upper, offset,\nmultiplier\nVariables should not be named after types, either, and thus may not be any of the following.\nint, real, complex, vector, simplex, unit_vector,\nsum_to_zero_vector, sum_to_zero_matrix, ordered, positive_ordered,\nrow_vector, matrix, cholesky_factor_corr,\ncolumn_stochastic_matrix,row_stochastic_matrix,\ncholesky_factor_cov, corr_matrix, cov_matrix, array\nThe following keywords or built-in functions are also reserved and cannot be used as variable names:\nprint, reject, profile, fatal_error, target, jacobian\nThe following block identifiers are reserved and cannot be used as variable names:\nfunctions, model, data, parameters, quantities,\ntransformed, generated\n\n\n\nVariable names will also conflict with the names of distributions suffixed with _lpdf, _lpmf, _lcdf, and _lccdf, _cdf, and _ccdf, such as normal_lcdf_log. No user-defined variable can take a name ending in _lupdf or _lupmf even if a corresponding _lpdf or _lpmf is not defined.\nUsing any of these variable names causes the stanc translator to halt and report the name and location of the variable causing the conflict.\n\n\n\nStan primarily generates code in C++, which features its own reserved words. It is legal to name a variable any of the following names, however doing so will lead to it being renamed _stan_NAME (e.g. _stan_public) behind the scenes (in the generated C++ code).\n\nalignas, alignof, and, and_eq, asm, bitand, bitor, bool,\ncase, catch, char, char16_t, char32_t, class, compl, const,\nconstexpr, const_cast, decltype, default, delete, do,\ndouble, dynamic_cast, enum, explicit, float, friend, goto,\ninline, long, mutable, namespace, new, noexcept, not, not_eq,\nnullptr, operator, or, or_eq, private, protected, public,\nregister, reinterpret_cast, short, signed, sizeof,\nstatic_assert, static_cast, switch, template, this, thread_local,\nthrow, try, typeid, typename, union, unsigned, using, virtual,\nvolatile, wchar_t, xor, xor_eq, fvar, STAN_MAJOR, STAN_MINOR,\nSTAN_PATCH, STAN_MATH_MAJOR, STAN_MATH_MINOR, STAN_MATH_PATCH\n\n\n\n\nThe legal characters for variable identifiers are given in the following table.\n\n\nThe alphanumeric characters and underscore in base ASCII are the only legal characters in Stan identifiers.\n\n\n\ncharacters\nASCII code points\n\n\n\n\na -- z\n97 – 122\n\n\nA -- Z\n65 – 90\n\n\n0 -- 9\n48 – 57\n\n\n_\n95\n\n\n\nAlthough not the most expressive character set, ASCII is the most portable and least prone to corruption through improper character encodings or decodings. Sticking to this range of ASCII makes Stan compatible with Latin-1 or UTF-8 encodings of these characters, which are byte-for-byte identical to ASCII.\n\n\n\nWithin comments, Stan can work with any ASCII-compatible character encoding, such as ASCII itself, UTF-8, or Latin1. It is up to user shells and editors to display them properly.\n\n\n\n\n\nExpressions for the Stan container objects, namely arrays, vectors, row vectors, matrices, and tuples, can all be constructed using expressions.\n\n\nSquare brackets may be wrapped around a sequence of comma separated primitive expressions to produce a row vector expression. For example, the expression [ 1, 10, 100 ] denotes a row vector of three elements with real values 1.0, 10.0, and 100.0. Applying the transpose operator to a row vector expression produces a vector expression. This syntax provides a way declare and define small vectors a single line, as follows.\nrow_vector[2] rv2 = [ 1, 2 ];\nvector[3] v3 = [ 3, 4, 5 ]';\nThe vector expression values may be compound expressions or variable names, so it is legal to write [ 2 * 3, 1 + 4] or [ x, y ], providing that x and y are primitive variables.\n\n\n\nA matrix expression consists of square brackets wrapped around a sequence of comma separated row vector expressions. This syntax provides a way declare and define a matrix in a single line, as follows.\nmatrix[3, 2] m1 = [ [ 1, 2 ], [ 3, 4 ], [5, 6 ] ];\nAny expression denoting a row vector can be used in a matrix expression. For example, the following code is valid:\nvector[2] vX = [ 1, 10 ]';\nrow_vector[2] vY = [ 100, 1000 ];\nmatrix[3, 2] m2 = [ vX', vY, [ 1, 2 ] ];\n\n\n\nComplex vector expressions work the same way as real vector expressions. For example, the following are all legal Stan expressions and assignments.\ncomplex_vector[3] = [1 + 2i, 3 - 1.7i, 0]';\ncomplex_row_vector[2] = [12, -2i];\ncomplex_matrix[2, 3] = [[1 + 2i, 3 - 1.7i, 0],\n [3.9 - 1.234i, 176i, 1 + 1i]];\n\n\nThe empty expression [ ] is ambiguous and therefore is not allowed and similarly expressions such as [ [ ] ] or [ [ ], [ ] ] are not allowed.\n\n\n\n\nIf needed, it is possible to create an empty vector with\nrep_vector(e, 0)\nwhere the first expression e needs to scalar of type real.\nIf needed, it is possible to create an empty matrix with\nrep_matrix(e, 0, 0)\nwhere the first expression e needs to scalar of type real.\n\n\n\nCurly braces may be wrapped around a sequence of expressions to produce an array expression. For example, the expression { 1, 10, 100 } denotes an integer array of three elements with values 1, 10, and 100. This syntax is particularly convenient to define small arrays in a single line, as follows.\narray[3] int a = { 1, 10, 100 };\nThe values may be compound expressions, so it is legal to write { 2 * 3, 1 + 4 }. It is also possible to write two dimensional arrays directly, as in the following example.\narray[2, 3] int b = { { 1, 2, 3 }, { 4, 5, 6 } };\nThis way, b[1] is { 1, 2, 3 } and b[2] is { 4, 5, 6 }.\nWhitespace is always interchangeable in Stan, so the above can be laid out as follows to more clearly indicate the row and column structure of the resulting two dimensional array.\narray[2, 3] int b = { { 1, 2, 3 },\n { 4, 5, 6 } };\n\n\n\nThe empty array expression ({ }) is not allowed. See more about restrictions on array expressions in subsection Restrictions on values.\nIf needed, it is possible to create an empty array with\nrep_array(e, 0)\nwhere the first expression e determines the type of the array. For example, rep_array(0.0, 0) returns an empty real array of type real[], whereas rep_array({123}, 0) returns an empty two dimensional integer array of type int[ , ]. Only the type of the first argument is used, so the integer arrays {123} and {0} produce equivalent values.\n\n\n\nAny type of expression may be used within braces to form an array expression. In the simplest case, all of the elements will be of the same type and the result will be an array of elements of that type. For example, the elements of the array can be vectors, in which case the result is an array of vectors.\nvector[3] b;\nvector[3] c;\n// ...\narray[2] vector[3] d = { b, c };\nThe elements may also be a mixture of int and real typed expressions, in which case the result is an array of real values.\narray[2] real b = { 1, 1.9 };\n\n\n\nStan uses parentheses around a comma-separated sequence of expressions to construct a tuple. For example, we can construct a 2-tuple as follows.\ntuple(int, vector[3]) xy = (42, [1, 2.9, -1.3]');\nThe expression 42 is of type int and the expression [1, 2.9, -1.3] is of type row_vector so that [1, 2.9, -1.3]' is of type vector and of size 3. The whole tuple expression (42, [1, 2.9, -1.3]') thus has a sized type of tuple(int, vector[3]) and an unsized type (e.g., for a function argument) of tuple(int, vector).\nA tuple of one element can be created using the same style as languages like Python, with a trailing comma, e.g., (3.14,). For longer tuples, Stan does not support trailing commas.\n\n\n\nThere are some restrictions on how array expressions may be used that arise from their types being calculated bottom up and the basic data type and assignment rules of Stan.\n\n\nAlthough it is tempting to try to define a ragged array expression, all Stan data types are rectangular (or boxes or other higher-dimensional generalizations). Thus the following nested array expression will cause an error when it tries to create a non-rectangular array.\n{ { 1, 2, 3 }, { 4, 5 } } // compile time error: size mismatch\nThis may appear to be OK, because it is creating a two-dimensional integer array (array[,] int) out of two one-dimensional array integer arrays (array[] int). But it is not allowed because the two one-dimensional arrays are not the same size. If the elements are array expressions, this can be diagnosed at compile time. If one or both expressions is a variable, then that won’t be caught until runtime.\n{ { 1, 2, 3 }, m } // runtime error if m not size 3\n\n\n\nBecause there is no way to infer the type of the result, the empty array expression ({ }) is not allowed. This does not sacrifice expressive power, because a declaration is sufficient to initialize a zero-element array.\narray[0] int a; // a is fully defined as zero element array\n\n\n\nThere is no way to declare or construct a zero-tuple in Stan. Tuples must be at least one element long. The expression () does not pick out a zero-tuple—it is ill formed.\nOne-tuples need a trailing comma, like (1,). The expression (1), without the comma, is of type int rather than a tuple.\n\n\n\n\n\nAny expression wrapped in parentheses is also an expression. Like in C++, but unlike in R, only the round parentheses, ( and ), are allowed. The square brackets [ and ] are reserved for array indexing and the curly braces { and } for grouping statements.\nWith parentheses it is possible to explicitly group subexpressions with operators. Without parentheses, the expression 1 + 2 * 3 has a subexpression 2 * 3 and evaluates to 7. With parentheses, this grouping may be made explicit with the expression 1 + (2 * 3). More importantly, the expression (1 + 2) * 3 has 1 + 2 as a subexpression and evaluates to 9.\n\n\n\nFor integer and real-valued expressions, Stan supports the basic binary arithmetic operations of addition (+), subtraction (-), multiplication (*) and division (/) in the usual ways.\nFor integer expressions, Stan supports the modulus (%) binary arithmetic operation. Stan also supports the unary operation of negation for integer and real-valued expressions. For example, assuming n and m are integer variables and x and y real variables, the following expressions are legal.\n3.0 + 0.14\n-15\n2 * 3 + 1\n(x - y) / 2.0\n(n * (n + 1)) / 2\nx / n\nm % n\nThe negation, addition, subtraction, and multiplication operations are extended to matrices, vectors, and row vectors. The transpose operation, written using an apostrophe (') is also supported for vectors, row vectors, and matrices. Return types for matrix operations are the smallest types that can be statically guaranteed to contain the result. The full set of allowable input types and corresponding return types is detailed in the list of functions.\nFor example, if y and mu are variables of type vector and Sigma is a variable of type matrix, then (y - mu)' * Sigma * (y - mu) is a well-formed expression of type real. The type of the complete expression is inferred working outward from the subexpressions. The subexpression(s) y - mu are of type vector because the variables y and mu are of type vector. The transpose of this expression, the subexpression (y - mu)' is of type row_vector. Multiplication is left associative and transpose has higher precedence than multiplication, so the above expression is equivalent to the following fully specified form (((y - mu)') * Sigma) * (y - mu).\nThe type of subexpression (y - mu)' * Sigma is inferred to be row_vector, being the result of multiplying a row vector by a matrix. The whole expression’s type is thus the type of a row vector multiplied by a (column) vector, which produces a real value.\nStan provides elementwise matrix multiplication (e.g., a .* b) and division (e.g., a ./ b) operations. These provide a shorthand to replace loops, but are not intrinsically more efficient than a version programmed with an elementwise calculations and assignments in a loop. For example, given declarations,\nvector[N] a;\nvector[N] b;\nvector[N] c;\nthe assignment,\nc = a .* b;\nproduces the same result with roughly the same efficiency as the loop\nfor (n in 1:N) {\n c[n] = a[n] * b[n];\n}\nStan supports exponentiation (^) of integer and real-valued expressions. The return type of exponentiation is always a real-value. For example, assuming n and m are integer variables and x and y real variables, the following expressions are legal.\n3 ^ 2\n3.0 ^ -2\n3.0 ^ 0.14\nx ^ n\nn ^ x\nn ^ m\nx ^ y\nExponentiation is right associative, so the expression 2 ^ 3 ^ 4 is equivalent to the fully specified form 2 ^ (3 ^ 4).\n\n\nThe precedence and associativity of operators, as well as built-in syntax such as array indexing and function application is given in tabular form in the following table.\n\n\nStan’s unary, binary, and ternary operators, with their precedences, associativities, place in an expression, and a description. The last two lines list the precedence of function application and array, matrix, and vector indexing. The operators are listed in order of precedence, from least tightly binding to most tightly binding. The full set of legal arguments and corresponding result types are provided in the function documentation for the operators (i.e., operator*(int, int):int indicates the application of the multiplication operator to two integers, which returns an integer). Parentheses may be used to group expressions explicitly rather than relying on precedence and associativity.\n\n\n\n\n\n\n\n\n\n\nOp.\nPrec.\nAssoc.\nPlacement\nDescription\n\n\n\n\n? ~ :\n10\nright\nternary infix\nconditional\n\n\n||\n9\nleft\nbinary infix\nlogical or\n\n\n&&\n8\nleft\nbinary infix\nlogical and\n\n\n==\n7\nleft\nbinary infix\nequality\n\n\n!=\n7\nleft\nbinary infix\ninequality\n\n\n<\n6\nleft\nbinary infix\nless than\n\n\n<=\n6\nleft\nbinary infix\nless than or equal\n\n\n>\n6\nleft\nbinary infix\ngreater than\n\n\n>=\n6\nleft\nbinary infix\ngreater than or equal\n\n\n+\n5\nleft\nbinary infix\naddition\n\n\n-\n5\nleft\nbinary infix\nsubtraction\n\n\n*\n4\nleft\nbinary infix\nmultiplication\n\n\n.*\n4\nleft\nbinary infix\nelementwise multiplication\n\n\n/\n4\nleft\nbinary infix\n(right) division\n\n\n./\n4\nleft\nbinary infix\nelementwise division\n\n\n%\n4\nleft\nbinary infix\nmodulus\n\n\n\\\n3\nleft\nbinary infix\nleft division\n\n\n%/%\n3\nleft\nbinary infix\ninteger division\n\n\n!\n2\nn/a\nunary prefix\nlogical negation\n\n\n-\n2\nn/a\nunary prefix\nnegation\n\n\n+\n2\nn/a\nunary prefix\npromotion (no-op in Stan)\n\n\n^\n1\nright\nbinary infix\nexponentiation\n\n\n.^\n1\nright\nbinary infix\nelementwise exponentiation\n\n\n'\n0\nn/a\nunary postfix\ntransposition\n\n\n()\n0\nn/a\nprefix, wrap\nfunction application\n\n\n[]\n0\nleft\nprefix, wrap\narray, matrix indexing\n\n\n\nOther expression-forming operations, such as function application and subscripting bind more tightly than any of the arithmetic operations.\nThe precedence and associativity determine how expressions are interpreted. Because addition is left associative, the expression a + b + c is interpreted as (a + b) + c. Similarly, a / b * c is interpreted as (a / b) * c.\nBecause multiplication has higher precedence than addition, the expression a * b + c is interpreted as (a * b) + c and the expression a + b * c is interpreted as a + (b * c). Similarly, 2 * x + 3 * - y is interpreted as (2 * x) + (3 * (-y)).\nTransposition and exponentiation bind more tightly than any other arithmetic or logical operation. For vectors, row vectors, and matrices, -u' is interpreted as -(u'), u * v' as u* (v'), and u' * v as (u') * v. For integer and reals, -n ^ 3 is interpreted as -(n ^ 3).\n\n\n\n\n\n\n\nThe ternary conditional operator is unique in that it takes three arguments and uses a mixed syntax. If a is an expression of type int and b and c are expressions that can be converted to one another (e.g., compared with ==), then\na ? b : c\nis an expression of the promoted type of b and c. The result of this expression is b if condition a is true, c otherwise. The only promotion allowed in Stan is integer -> real -> complex; e.g. if one argument is of type int and the other of type real, the conditional expression as a whole is of type real. In other cases, the arguments have to be of the same underlying Stan type (i.e., constraints don’t count, only the shape) and the conditional expression is of that type.\n\n\nThe conditional operator is the most loosely binding operator, so its arguments rarely require parentheses for disambiguation. For example,\na > 0 || b < 0 ? c + d : e - f\nis equivalent to the explicitly grouped version\n(a > 0 || b < 0) ? (c + d) : (e - f)\nThe latter is easier to read even if the parentheses are not strictly necessary.\n\n\n\nThe conditional operator is right associative, so that\na ? b : c ? d : e\nparses as if explicitly grouped as\na ? b : (c ? d : e)\nAgain, the explicitly grouped version is easier to read.\n\n\n\n\nStan’s conditional operator works very much like its C++ analogue. The first argument must be an expression denoting an integer. Typically this is a variable or a relation operator, as in the variable a in the example above. Then there are two resulting arguments, the first being the result returned if the condition evaluates to true (i.e., non-zero) and the second if the condition evaluates to false (i.e., zero). In the example above, the value b is returned if the condition evaluates to a non-zero value and c is returned if the condition evaluates to zero.\n\n\nThe key property of the conditional operator that makes it so useful in high-performance computing is that it only evaluates the returned subexpression, not the alternative expression. In other words, it is not like a typical function that evaluates its argument expressions eagerly in order to pass their values to the function. As usual, the saving is mostly in the derivatives that do not get computed rather than the unnecessary function evaluation itself.\n\n\n\nIf one return expression is a data value (an expression involving only constants and variables defined in the data or transformed data block), and the other is not, then the ternary operator will promote the data value to a parameter value. This can cause needless work calculating derivatives in some cases and be less efficient than a full if-then conditional statement. For example,\ndata {\n array[10] real x;\n // ...\n}\nparameters {\n array[10] real z;\n // ...\n}\nmodel {\n y ~ normal(cond ? x : z, sigma);\n // ...\n}\nwould be more efficiently (if not more transparently) coded as\nif (cond) {\n y ~ normal(x, sigma);\n} else {\n y ~ normal(z, sigma);\n}\nThe conditional statement, like the conditional operator, only evaluates one of the result statements. In this case, the variable x will not be promoted to a parameter and thus not cause any needless work to be carried out when propagating the chain rule during derivative calculations.\n\n\n\n\n\nStan arrays, matrices, vectors, and row vectors are all accessed using the same array-like notation. For instance, if x is a variable of type array [] real (a one-dimensional array of reals) then x[1] is the value of the first element of the array.\nSubscripting has higher precedence than any of the arithmetic operations. For example, alpha * x[1] is equivalent to alpha * (x[1]).\nMultiple subscripts may be provided within a single pair of square brackets. If x is of type array[,] real, a two-dimensional array, then x[2, 501] is of type real.\n\n\nThe subscripting operator also returns subarrays of arrays. For example, if x is of type array[,,] real, then x[2] is of type array[,] real, and x[2, 3] is of type array[] real. As a result, the expressions x[2, 3] and x[2][3] have the same meaning.\n\n\n\nIf Sigma is a variable of type matrix, then Sigma[1] denotes the first row of Sigma and has the type row_vector.\n\n\n\nStan supports mixed indexing of arrays and their vector, row vector or matrix values. For example, if m is of type matrix[ , ], a two-dimensional array of matrices, then m[1] refers to the first row of the array, which is a one-dimensional array of matrices. More than one index may be used, so that m[1, 2] is of type matrix and denotes the matrix in the first row and second column of the array. Continuing to add indices, m[1, 2, 3] is of type row_vector and denotes the third row of the matrix denoted by m[1, 2]. Finally, m[1, 2, 3, 4] is of type real and denotes the value in the third row and fourth column of the matrix that is found at the first row and second column of the array m.\n\n\n\n\nIn addition to single integer indexes, as described in the language indexing section, Stan supports multiple indexing. Multiple indexes can be integer arrays of indexes, lower bounds, upper bounds, lower and upper bounds, or simply shorthand for all of the indexes. If the upper bound is smaller than the lower bound, the range is empty (unlike, e.g., in R). The upper bound and lower bound can be expressions that evaluate to integer. A complete list of index types is given in the following table.\n\n\nTypes of indexes and examples with one-dimensional containers of size N and an integer array ii of type array [] real size K.\n\n\n\nindex type\nexample\nvalue\n\n\n\n\ninteger\na[11]\nvalue of a at index 11\n\n\ninteger array\na[ii]\na[ii[1]], …, a[ii[K]]\n\n\nlower bound\na[3:]\na[3], …, a[N]\n\n\nupper bound\na[:5]\na[1], …, a[5]\n\n\nrange\na[2:7]\na[2], …, a[7]\n\n\nrange\na[7:2]\n[]\n\n\nrange\na[5-3:5+2]\na[2], …, a[7]\n\n\nall\na[:]\na[1], …, a[N]\n\n\nall\na[]\na[1], …, a[N]\n\n\n\nThe range indexing with : allows only increasing sequences. Indexing with a decereasing sequence can be made by creating an integer array in the following way:\n array[6] int ii = reverse(linspaced_int_array(6, 2, 7));\nThen a[ii] evaluates to a[7], …, a[2].\n\n\n\nThe fundamental semantic rule for dealing with multiple indexes is the following. If idxs is a multiple index, then it produces an indexable position in the result. To evaluate that index position in the result, the index is first passed to the multiple index, and the resulting index used.\na[idxs, ...][i, ...] = a[idxs[i], ...][...]\nOn the other hand, if idx is a single index, it reduces the dimensionality of the output, so that\na[idx, ...] = a[idx][...]\nThe only issue is what happens with matrices and vectors. Vectors work just like arrays. Matrices with multiple row indexes and multiple column indexes produce matrices. Matrices with multiple row indexes and a single column index become (column) vectors. Matrices with a single row index and multiple column indexes become row vectors. The types are summarized in the following table.\n\n\nSpecial rules for reducing matrices based on whether the argument is a single or multiple index. Examples are for a matrix a, with integer single indexes i and j and integer array multiple indexes is and js. The same typing rules apply for all multiple indexes.\n\n\n\nexample\nrow index\ncolumn index\nresult type\n\n\n\n\na[i]\nsingle\nn/a\nrow vector\n\n\na[is]\nmultiple\nn/a\nmatrix\n\n\na[i, j]\nsingle\nsingle\nreal\n\n\na[i, js]\nsingle\nmultiple\nrow vector\n\n\na[is, j]\nmultiple\nsingle\nvector\n\n\na[is, js]\nmultiple\nmultiple\nmatrix\n\n\n\nEvaluation of matrices with multiple indexes is defined to respect the following distributivity conditions.\nm[idxs1, idxs2][i, j] = m[idxs1[i], idxs2[j]]\nm[idxs, idx][j] = m[idxs[j], idx]\nm[idx, idxs][j] = m[idx, idxs[j]]\nEvaluation of arrays of matrices and arrays of vectors or row vectors is defined recursively, beginning with the array dimensions.\n\n\n\n\n\nStan provides a range of built in mathematical and statistical functions, which are documented in the built-in function documentation.\nExpressions in Stan may consist of the name of function followed by a sequence of zero or more argument expressions. For instance, log(2.0) is the expression of type real denoting the result of applying the natural logarithm to the value of the real literal 2.0.\nSyntactically, function application has higher precedence than any of the other operators, so that y + log(x) is interpreted as y + (log(x)).\n\n\nEach function has a type signature which determines the allowable type of its arguments and its return type. For instance, the function signature for the logarithm function can be expressed as\nreal log(real);\nand the signature for the lmultiply function is\nreal lmultiply(real, real);\nA function is uniquely determined by its name and its sequence of argument types. For instance, the following two functions are different functions.\nreal mean(array [] real);\nreal mean(vector);\nThe first applies to a one-dimensional array of real values and the second to a vector.\nThe identity conditions for functions explicitly forbids having two functions with the same name and argument types but different return types. This restriction also makes it possible to infer the type of a function expression compositionally by only examining the type of its subexpressions.\n\n\n\nConstants in Stan are nothing more than nullary (no-argument) functions. For instance, the mathematical constants \\(\\pi\\) and \\(e\\) are represented as nullary functions named pi() and e(). See the Stan Functions Reference built-in constants section for a list of built-in constants.\n\n\n\nBecause of integer to real type promotion, rules must be established for which function is called given a sequence of argument types. The scheme employed by Stan is the same as that used by C++, which resolves a function call to the function requiring the minimum number of type promotions.\nFor example, consider a situation in which the following two function signatures have been registered for foo.\nreal foo(real, real);\nint foo(int, int);\nThe use of foo in the expression foo(1.0, 1.0) resolves to foo(real, real), and thus the expression foo(1.0, 1.0) itself is assigned a type of real.\nBecause integers may be promoted to real values, the expression foo(1, 1) could potentially match either foo(real, real) or foo(int, int). The former requires two type promotions and the latter requires none, so foo(1, 1) is resolved to function foo(int, int) and is thus assigned the type int.\nThe expression foo(1, 1.0) has argument types (int, real) and thus does not explicitly match either function signature. By promoting the integer expression 1 to type real, it is able to match foo(real, real), and hence the type of the function expression foo(1, 1.0) is real.\nIn some cases (though not for any built-in Stan functions), a situation may arise in which the function referred to by an expression remains ambiguous. For example, consider a situation in which there are exactly two functions named bar with the following signatures.\nreal bar(real, int);\nreal bar(int, real);\nWith these signatures, the expression bar(1.0, 1) and bar(1, 1.0) resolve to the first and second of the above functions, respectively. The expression bar(1.0, 1.0) is illegal because real values may not be demoted to integers. The expression bar(1, 1) is illegal for a different reason. If the first argument is promoted to a real value, it matches the first signature, whereas if the second argument is promoted to a real value, it matches the second signature. The problem is that these both require one promotion, so the function name bar is ambiguous. If there is not a unique function requiring fewer promotions than all others, as with bar(1, 1) given the two declarations above, the Stan compiler will flag the expression as illegal.\n\n\n\nFor most of the distributions supported by Stan, there is a corresponding random-number generating function. These random number generators are named by the distribution with the suffix _rng. For example, a univariate normal random number can be generated by normal_rng(0, 1); only the parameters of the distribution, here a location (0) and scale (1) are specified because the variate is generated.\n\n\nThe use of random-number generating functions is restricted to the transformed data and generated quantities blocks; attempts to use them elsewhere will result in a parsing error with a diagnostic message. They may also be used in the bodies of user-defined functions whose names end in _rng.\nThis allows the random number generating functions to be used for simulation in general, and for Bayesian posterior predictive checking in particular.\n\n\n\nPosterior predictive checks typically use the parameters of the model to generate simulated data (at the individual and optionally at the group level for hierarchical models), which can then be compared informally using plots and formally by means of test statistics, to the actual data in order to assess the suitability of the model; see Chapter 6 of (Gelman et al. 2013) for more information on posterior predictive checks.\n\n\n\n\n\nStan is strongly statically typed, meaning that the implementation type of an expression can be resolved at compile time.\n\n\nThe primitive implementation types for Stan are\nint, real, complex, vector, row_vector, matrix, complex_vector,\ncomplex_row_vector, complex_matrix\nEvery basic declared type corresponds to a primitive type; the following table shows the mapping from types to their primitive types.\n\n\nThe table shows the variable declaration types of Stan and their corresponding primitive implementation type. Stan functions, operators, and probability functions have argument and result types declared in terms of primitive types plus array dimensionality.\n\n\n\ntype\nprimitive type\n\n\n\n\nint\nint\n\n\nreal\nreal\n\n\nvector\nvector\n\n\nsimplex\nvector\n\n\nunit_vector\nvector\n\n\nsum_to_zero_vector\nvector\n\n\nordered\nvector\n\n\npositive_ordered\nvector\n\n\nrow_vector\nrow_vector\n\n\nmatrix\nmatrix\n\n\ncov_matrix\nmatrix\n\n\ncorr_matrix\nmatrix\n\n\ncholesky_factor_cov\nmatrix\n\n\ncholesky_factor_corr\nmatrix\n\n\ncolumn_stochastic_matrix\nmatrix\n\n\nrow_stochastic_matrix\nmatrix\n\n\nsum_to_zero_matrix\nmatrix\n\n\ncomplex_vector\ncomplex_vector\n\n\ncomplex_row_vector\ncomplex_row_vector\n\n\ncomplex_matrix\ncomplex_matrix\n\n\n\nA full implementation type consists of a primitive implementation type and an integer array dimensionality greater than or equal to zero. These will be written to emphasize their array-like nature. For example, array [] real has an array dimensionality of 1, int an array dimensionality of 0, and array [,,] int an array dimensionality of 3. The implementation type matrix[ , , ] has a total of five dimensions and takes up to five indices, three from the array and two from the matrix.\nRecall that the array dimensions come before the matrix or vector dimensions in an expression such as the following declaration of a three-dimensional array of matrices.\narray[I, J, K] matrix[M, N] a;\nThe matrix a is indexed as a[i, j, k, m, n] with the array indices first, followed by the matrix indices, with a[i, j, k] being a matrix and a[i, j, k, m] being a row vector.\n\n\n\n\nStan’s type inference rules define the implementation type of an expression based on a background set of variable declarations. The rules work bottom up from primitive literal and variable expressions to complex expressions.\n\n\n\nThere are two basic promotion rules,\n\nint types may be promoted to real, and\nreal types may be promoted to complex.\n\nPlus, promotion is transitive, so that\n\nif type U can be promoted to type V and type V can be promoted to type T, then U can be promoted to T.\n\nThe first rule means that expressions of type int may be used anywhere an expression of type real is specified, namely in assignment or function argument passing. An integer is promoted to real by casting it in the underlying C++ code.\nThe remaining rules have to do with covariant typing rules, which say that a container of type U may be promoted to a container of the same shape of type T if U can be promoted to T. For vector and matrix types, this induces three rules,\n\nvector may be promoted to complex_vector,\nrow_vector may be promoted to complex_row_vector\nmatrix may be promoted to complex_matrix.\n\nFor array types, there’s a single rule\n\narray[...] U may be promoted to array[...] T if U can be promoted to T.\n\nFor example, this means array[,] int may be used where array [,] real or array [,] complex is required; as another example, array[] real may be used anywhere array[] complex is required.\nTuples have the natural extension of the above rules, applied to all sub-types at once\n\nA tuple(U1, ..., UN) may be promoted to a tuple(T1, ..., TN) if every Un can be promoted to Tn for n in 1:N\n\n\n\nAn integer literal expression such as 42 is of type int. Real literals such as 42.0 are of type real. Imaginary literals such as -17i are of type complex. the expression 7 - 2i acts like a complex literal, but technically it combines a real literal 7 and an imaginary literal 2i through subtraction.\n\n\n\nThe type of a variable declared locally or in a previous block is determined by its declaration. The type of a loop variable is int.\nThere is always a unique declaration for each variable in each scope because Stan prohibits the redeclaration of an already-declared variables.1\n\n\n\nIf x is an expression of total dimensionality greater than or equal to \\(N\\), then the type of expression e[i1, i2, ..., iN] is the same as that of e[i1][i2]...[iN], so it suffices to define the type of a singly-indexed function. Suppose e is an expression and i is an expression of primitive type int. Then\n\nif e is an expression of type array[i1, i2, ..., iN] T and k, i1, …, iN are expressions of type int, then e[k] is an expression of type array[i2, ..., iN] T,\nif e is an expression of type array[i] T with i and k expressions of type int, then e[k] is of type T,\nif e has implementation type vector or row_vector, dimensionality 0, then e[i] has implementation type real,\nif e has implementation type matrix, then e[i] has type row_vector,\nif e has implementation type complex_vector or complex_row_vector and i is an expression of type int, then e[i] is an expression of type complex, and\nif e has implementation type complex_matrix, and i is an expression of type int, then e[i] is an expression of type complex_row_vector.\n\n\n\n\nIf f is the name of a function and e1,...,eN are expressions for \\(N \\geq 0\\), then f(e1,...,eN) is an expression whose type is determined by the return type in the function signature for f given e1 through eN. Recall that a function signature is a declaration of the argument types and the result type.\nIn looking up functions, binary operators like real * real are defined as operator*(real, real) in the documentation and index.\nIn matching a function definition, all of the promotion rules are in play (integers may be promoted to reals, reals to complex, and containers may be promoted if their types are promoted). For example, arguments of type int may be promoted to type real or complex if necessary (see the subsection on type promotion in the function application section, a real argument will be promoted to complex if necessary, a vector will be promoted to complex_vector if necessary, and so on.\nIn general, matrix operations return the lowest inferable type. For example, row_vector * vector returns a value of type real, which is declared in the function documentation and index as real operator*(row_vector, vector).\n\n\n\n\n\nThere are several expression constructions in Stan that act as higher-order functions.2\nThe higher-order functions and the signature of their argument functions are listed in the following pair of tables.\n\n\nHigher-order functions in Stan with their argument function types. The first group of arguments can be a function of parameters or data. The second group of arguments, consisting of a real and integer array in all cases, must be expressions involving only data and literals.\n\n\n\n\n\n\n\n\n\nfunction\nparameter or data args\ndata args\nreturn type\n\n\n\n\nalgebra_solver\nvector, vector\narray [] real, array [] real\nvector\n\n\nalgebra_solver_newton\nvector, vector\narray [] real, array [] real\nvector\n\n\nintegrate_1d,\nreal, real, array [] real\narray [] real, array [] real\nreal\n\n\nintegrate_ode_X,\nreal, array [] real, array [] real\narray [] real, array [] real\narray [] real\n\n\nmap_rect\nvector, vector\narray [] real, array [] real\nvector\n\n\n\nFor example, the integrate_ode_rk45 function can be used to integrate differential equations in Stan:\nfunctions {\n array [] real foo(real t,\n array [] real y,\n array [] real theta,\n array [] real x_r,\n array [] real x_i) {\n // ...\n }\n}\n// ...\nint<lower=1> T;\narray[2] real y0;\nreal t0;\narray[T] real ts;\narray[1] real theta;\narray[0] real x_r;\narray[0] int x_i;\n// ...\narray[T, 2] real y_hat = integrate_ode_rk45(foo, y0, t0,\n ts, theta, x_r, x_i);\nThe function argument is foo, the name of the user-defined function; as shown in the higher-order functions table, integrate_ode_rk45 takes a real array, a real, three more real arrays, and an integer array as arguments and returns 2D real array.\n\n\n\nVariadic Higher-order functions in Stan with their argument function types. The first group of arguments are restricted in type. The sequence of trailing arguments can be of any length with any types.\n\n\n\nfunction\nrestricted args\nreturn type\n\n\n\n\nsolve_X\nvector\nvector\n\n\node_X,\nvector, real, array [] real\nvector[]\n\n\nreduce_sum\narray[] T, T1, T2\nreal\n\n\n\nT, T1, and T2 can be any Stan type.\nFor example, the ode_rk45 function can be used to integrate differential equations in Stan:\nfunctions {\n vector foo(real t, vector y, real theta, vector beta,\n array [] real x_i, int index) {\n // ...\n }\n}\n// ...\nint<lower=1> T;\nvector[2] y0;\nreal t0;\narray[T] real ts;\nreal theta;\nvector[7] beta;\narray[10] int x_i;\nint index;\n// ...\nvector[2] y_hat[T] = ode_rk45(foo, y0, t0, ts, theta,\n beta, x_i, index);\nThe function argument is foo, the name of the user-defined function. As shown in the variadic higher-order functions table, ode_rk45 takes a real, a vector, a real, a real array, and a sequence of arguments whose types match those at the end of foo and returns an array of vectors.\n\n\n\nThe function argument to higher-order functions is always passed as the first argument. This function argument must be provided as the name of a user-defined or built-in function. No quotes are necessary.\n\n\n\nSome of the arguments to higher-order functions are restricted to data. This means they must be expressions containing only data variables, transformed data variables, or literals; the may contain arbitrary functions applied to data variables or literals, but must not contain parameters, transformed parameters, or local variables from any block other than transformed data.\nFor user-defined functions the qualifier data may be prepended to the type to restrict the argument to data-only variables.\n\n\n\n\nDerivatives of the log probability function defined by a model are used in several ways by Stan. The Hamiltonian Monte Carlo samplers, including NUTS, use gradients to guide updates. The BFGS optimizers also use gradients to guide search for posterior modes.\n\n\nUnlike evaluations in pure mathematics, evaluation of derivatives in Stan is done by applying the chain rule on an expression-by-expression basis, evaluating using floating-point arithmetic. As a result, models such as the following are problematic for inference involving derivatives.\nparameters {\n real x;\n}\nmodel {\n x ~ normal(sqrt(x - x), 1);\n}\nAlgebraically, the distribution statement in the model could be reduced to\n x ~ normal(0, 1);\nand it would seem the model should produce unit normal draws for x. But rather than canceling, the expression sqrt(x - x) causes a problem for derivatives. The cause is the mechanistic evaluation of the chain rule,\n\\[\n\\begin{array}{rcl}\n\\frac{d}{dx} \\sqrt{x - x}\n& = &\n\\frac{1}{2 \\sqrt{x - x}} \\times \\frac{d}{dx} (x - x)\n\\\\[4pt]\n& = &\n\\frac{1}{0} \\times (1 - 1)\n\\\\[4pt]\n& = &\n\\infty \\times 0\n\\\\[4pt]\n& = & \\mathrm{NaN}.\n\\end{array}\n\\]\nRather than the \\(x - x\\) canceling out, it introduces a 0 into the numerator and denominator of the chain-rule evaluation.\nThe only way to avoid this kind problem is to be careful to do the necessary algebraic reductions as part of the model and not introduce expressions like sqrt(x - x) for which the chain rule produces not-a-number values.\n\n\n\nThe best way to diagnose whether something is going wrong with the derivatives is to use the test-gradient option to the sampler or optimizer inputs; this option is available in both Stan and RStan (though it may be slow, because it relies on finite differences to make a comparison to the built-in automatic differentiation).\nFor example, compiling the above model to an executable sqrt-x-minus-x in CmdStan, the test can be run as\n> ./sqrt-x-minus-x diagnose test=gradient\nwhich produces\n...\nTEST GRADIENT MODE\n\n Log probability=-0.393734\n\n param idx value model finite diff error\n 0 -0.887393 nan 0 nan\nEven though finite differences calculates the right gradient of 0, automatic differentiation follows the chain rule and produces a not-a-number output.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#numeric-literals", + "href": "reference-manual/expressions.html#numeric-literals", + "title": "Expressions", + "section": "", + "text": "The simplest form of expression is a literal that denotes a primitive numerical value.\n\n\nInteger literals represent integers of type int. Integer literals are written in base 10 without any separators. Integer literals may contain a single negative sign. (The expression --1 is interpreted as the negation of the literal -1.)\nThe following list contains well-formed integer literals.\n0, 1, -1, 256, -127098, 24567898765\nInteger literals must have values that fall within the bounds for integer values (see the section on numerical data types).\nInteger literals may not contain decimal points (.). Thus the expressions 1. and 1.0 are of type real and may not be used where a value of type int is required.\n\n\n\nA number written with a period or with scientific notation is assigned to a the continuous numeric type real. Real literals are written in base 10 with a period (.) as a separator and optionally an exponent with optional sign. Examples of well-formed real literals include the following.\n0.0, 1.0, 3.14, -217.9387, 2.7e3, -2E-5, 1.23e+3.\nThe notation e or E followed by a positive or negative integer denotes a power of 10 to multiply. For instance, 2.7e3 and 2.7e+3 denote \\(2.7 \\times 10^3\\), whereas -2E-5 denotes \\(-2 \\times\n10^{-5}\\).\n\n\n\nA number followed by the character i denotes an imaginary number and is assigned to the numeric type complex. The number preceding i may be either a real or integer literal and determines the magnitude of the imaginary number. Examples of well-formed imaginary literals include the following.\n1i, 2i, -325.786i, 1e10i, 2.87e-10i.\nNote that the character i by itself is not a well-formed imaginary literal. The unit imaginary number must be written as 1i.\n\n\n\nStan does not include complex literals directly, but a real or integer literal can be added to an imaginary literal to derive an expression that behaves like a complex literal. Examples include the following.\n1 + 2i, -3.2e9 + 1e10i\nThese will be assigned the type complex, which is the result of adding a real or integer and a complex number. They will also function like literals in the sense that the C++ compiler is able to reduce them to a single complex constant at compile time.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#variables.section", + "href": "reference-manual/expressions.html#variables.section", + "title": "Expressions", + "section": "", + "text": "A variable by itself is a well-formed expression of the same type as the variable. Variables in Stan consist of ASCII strings containing only the basic lower-case and upper-case Roman letters, digits, and the underscore (_) character. Variables must start with a letter (a--z and A--Z) and may not end with two underscores (__).\nExamples of legal variable identifiers are as follows.\na, a3, a_3, Sigma, my_cpp_style_variable, myCamelCaseVariable\nUnlike in R and BUGS, variable identifiers in Stan may not contain a period character.\n\n\nStan reserves many strings for internal use and these may not be used as the name of a variable. An attempt to name a variable after an internal string results in the stanc translator halting with an error message indicating which reserved name was used and its location in the model code.\n\n\nThe name of the model cannot be used as a variable within the model. This is usually not a problem because the default in bin/stanc is to append _model to the name of the file containing the model specification. For example, if the model is in file foo.stan, it would not be legal to have a variable named foo_model when using the default model name through bin/stanc. With user-specified model names, variables cannot match the model.\n\n\n\nThe following list contains reserved words for Stan’s programming language. Not all of these features are implemented in Stan yet, but the tokens are reserved for future use.\nfor, in, while, repeat, until, if, then, else,\ntrue, false, target, struct, typedef, export,\nauto, extern, var, static, lower, upper, offset,\nmultiplier\nVariables should not be named after types, either, and thus may not be any of the following.\nint, real, complex, vector, simplex, unit_vector,\nsum_to_zero_vector, sum_to_zero_matrix, ordered, positive_ordered,\nrow_vector, matrix, cholesky_factor_corr,\ncolumn_stochastic_matrix,row_stochastic_matrix,\ncholesky_factor_cov, corr_matrix, cov_matrix, array\nThe following keywords or built-in functions are also reserved and cannot be used as variable names:\nprint, reject, profile, fatal_error, target, jacobian\nThe following block identifiers are reserved and cannot be used as variable names:\nfunctions, model, data, parameters, quantities,\ntransformed, generated\n\n\n\nVariable names will also conflict with the names of distributions suffixed with _lpdf, _lpmf, _lcdf, and _lccdf, _cdf, and _ccdf, such as normal_lcdf_log. No user-defined variable can take a name ending in _lupdf or _lupmf even if a corresponding _lpdf or _lpmf is not defined.\nUsing any of these variable names causes the stanc translator to halt and report the name and location of the variable causing the conflict.\n\n\n\nStan primarily generates code in C++, which features its own reserved words. It is legal to name a variable any of the following names, however doing so will lead to it being renamed _stan_NAME (e.g. _stan_public) behind the scenes (in the generated C++ code).\n\nalignas, alignof, and, and_eq, asm, bitand, bitor, bool,\ncase, catch, char, char16_t, char32_t, class, compl, const,\nconstexpr, const_cast, decltype, default, delete, do,\ndouble, dynamic_cast, enum, explicit, float, friend, goto,\ninline, long, mutable, namespace, new, noexcept, not, not_eq,\nnullptr, operator, or, or_eq, private, protected, public,\nregister, reinterpret_cast, short, signed, sizeof,\nstatic_assert, static_cast, switch, template, this, thread_local,\nthrow, try, typeid, typename, union, unsigned, using, virtual,\nvolatile, wchar_t, xor, xor_eq, fvar, STAN_MAJOR, STAN_MINOR,\nSTAN_PATCH, STAN_MATH_MAJOR, STAN_MATH_MINOR, STAN_MATH_PATCH\n\n\n\n\nThe legal characters for variable identifiers are given in the following table.\n\n\nThe alphanumeric characters and underscore in base ASCII are the only legal characters in Stan identifiers.\n\n\n\ncharacters\nASCII code points\n\n\n\n\na -- z\n97 – 122\n\n\nA -- Z\n65 – 90\n\n\n0 -- 9\n48 – 57\n\n\n_\n95\n\n\n\nAlthough not the most expressive character set, ASCII is the most portable and least prone to corruption through improper character encodings or decodings. Sticking to this range of ASCII makes Stan compatible with Latin-1 or UTF-8 encodings of these characters, which are byte-for-byte identical to ASCII.\n\n\n\nWithin comments, Stan can work with any ASCII-compatible character encoding, such as ASCII itself, UTF-8, or Latin1. It is up to user shells and editors to display them properly.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#vector-matrix-array-expressions.section", + "href": "reference-manual/expressions.html#vector-matrix-array-expressions.section", + "title": "Expressions", + "section": "", + "text": "Expressions for the Stan container objects, namely arrays, vectors, row vectors, matrices, and tuples, can all be constructed using expressions.\n\n\nSquare brackets may be wrapped around a sequence of comma separated primitive expressions to produce a row vector expression. For example, the expression [ 1, 10, 100 ] denotes a row vector of three elements with real values 1.0, 10.0, and 100.0. Applying the transpose operator to a row vector expression produces a vector expression. This syntax provides a way declare and define small vectors a single line, as follows.\nrow_vector[2] rv2 = [ 1, 2 ];\nvector[3] v3 = [ 3, 4, 5 ]';\nThe vector expression values may be compound expressions or variable names, so it is legal to write [ 2 * 3, 1 + 4] or [ x, y ], providing that x and y are primitive variables.\n\n\n\nA matrix expression consists of square brackets wrapped around a sequence of comma separated row vector expressions. This syntax provides a way declare and define a matrix in a single line, as follows.\nmatrix[3, 2] m1 = [ [ 1, 2 ], [ 3, 4 ], [5, 6 ] ];\nAny expression denoting a row vector can be used in a matrix expression. For example, the following code is valid:\nvector[2] vX = [ 1, 10 ]';\nrow_vector[2] vY = [ 100, 1000 ];\nmatrix[3, 2] m2 = [ vX', vY, [ 1, 2 ] ];\n\n\n\nComplex vector expressions work the same way as real vector expressions. For example, the following are all legal Stan expressions and assignments.\ncomplex_vector[3] = [1 + 2i, 3 - 1.7i, 0]';\ncomplex_row_vector[2] = [12, -2i];\ncomplex_matrix[2, 3] = [[1 + 2i, 3 - 1.7i, 0],\n [3.9 - 1.234i, 176i, 1 + 1i]];\n\n\nThe empty expression [ ] is ambiguous and therefore is not allowed and similarly expressions such as [ [ ] ] or [ [ ], [ ] ] are not allowed.\n\n\n\n\nIf needed, it is possible to create an empty vector with\nrep_vector(e, 0)\nwhere the first expression e needs to scalar of type real.\nIf needed, it is possible to create an empty matrix with\nrep_matrix(e, 0, 0)\nwhere the first expression e needs to scalar of type real.\n\n\n\nCurly braces may be wrapped around a sequence of expressions to produce an array expression. For example, the expression { 1, 10, 100 } denotes an integer array of three elements with values 1, 10, and 100. This syntax is particularly convenient to define small arrays in a single line, as follows.\narray[3] int a = { 1, 10, 100 };\nThe values may be compound expressions, so it is legal to write { 2 * 3, 1 + 4 }. It is also possible to write two dimensional arrays directly, as in the following example.\narray[2, 3] int b = { { 1, 2, 3 }, { 4, 5, 6 } };\nThis way, b[1] is { 1, 2, 3 } and b[2] is { 4, 5, 6 }.\nWhitespace is always interchangeable in Stan, so the above can be laid out as follows to more clearly indicate the row and column structure of the resulting two dimensional array.\narray[2, 3] int b = { { 1, 2, 3 },\n { 4, 5, 6 } };\n\n\n\nThe empty array expression ({ }) is not allowed. See more about restrictions on array expressions in subsection Restrictions on values.\nIf needed, it is possible to create an empty array with\nrep_array(e, 0)\nwhere the first expression e determines the type of the array. For example, rep_array(0.0, 0) returns an empty real array of type real[], whereas rep_array({123}, 0) returns an empty two dimensional integer array of type int[ , ]. Only the type of the first argument is used, so the integer arrays {123} and {0} produce equivalent values.\n\n\n\nAny type of expression may be used within braces to form an array expression. In the simplest case, all of the elements will be of the same type and the result will be an array of elements of that type. For example, the elements of the array can be vectors, in which case the result is an array of vectors.\nvector[3] b;\nvector[3] c;\n// ...\narray[2] vector[3] d = { b, c };\nThe elements may also be a mixture of int and real typed expressions, in which case the result is an array of real values.\narray[2] real b = { 1, 1.9 };\n\n\n\nStan uses parentheses around a comma-separated sequence of expressions to construct a tuple. For example, we can construct a 2-tuple as follows.\ntuple(int, vector[3]) xy = (42, [1, 2.9, -1.3]');\nThe expression 42 is of type int and the expression [1, 2.9, -1.3] is of type row_vector so that [1, 2.9, -1.3]' is of type vector and of size 3. The whole tuple expression (42, [1, 2.9, -1.3]') thus has a sized type of tuple(int, vector[3]) and an unsized type (e.g., for a function argument) of tuple(int, vector).\nA tuple of one element can be created using the same style as languages like Python, with a trailing comma, e.g., (3.14,). For longer tuples, Stan does not support trailing commas.\n\n\n\nThere are some restrictions on how array expressions may be used that arise from their types being calculated bottom up and the basic data type and assignment rules of Stan.\n\n\nAlthough it is tempting to try to define a ragged array expression, all Stan data types are rectangular (or boxes or other higher-dimensional generalizations). Thus the following nested array expression will cause an error when it tries to create a non-rectangular array.\n{ { 1, 2, 3 }, { 4, 5 } } // compile time error: size mismatch\nThis may appear to be OK, because it is creating a two-dimensional integer array (array[,] int) out of two one-dimensional array integer arrays (array[] int). But it is not allowed because the two one-dimensional arrays are not the same size. If the elements are array expressions, this can be diagnosed at compile time. If one or both expressions is a variable, then that won’t be caught until runtime.\n{ { 1, 2, 3 }, m } // runtime error if m not size 3\n\n\n\nBecause there is no way to infer the type of the result, the empty array expression ({ }) is not allowed. This does not sacrifice expressive power, because a declaration is sufficient to initialize a zero-element array.\narray[0] int a; // a is fully defined as zero element array\n\n\n\nThere is no way to declare or construct a zero-tuple in Stan. Tuples must be at least one element long. The expression () does not pick out a zero-tuple—it is ill formed.\nOne-tuples need a trailing comma, like (1,). The expression (1), without the comma, is of type int rather than a tuple.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#parentheses-for-grouping", + "href": "reference-manual/expressions.html#parentheses-for-grouping", + "title": "Expressions", + "section": "", + "text": "Any expression wrapped in parentheses is also an expression. Like in C++, but unlike in R, only the round parentheses, ( and ), are allowed. The square brackets [ and ] are reserved for array indexing and the curly braces { and } for grouping statements.\nWith parentheses it is possible to explicitly group subexpressions with operators. Without parentheses, the expression 1 + 2 * 3 has a subexpression 2 * 3 and evaluates to 7. With parentheses, this grouping may be made explicit with the expression 1 + (2 * 3). More importantly, the expression (1 + 2) * 3 has 1 + 2 as a subexpression and evaluates to 9.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#arithmetic-expressions.section", + "href": "reference-manual/expressions.html#arithmetic-expressions.section", + "title": "Expressions", + "section": "", + "text": "For integer and real-valued expressions, Stan supports the basic binary arithmetic operations of addition (+), subtraction (-), multiplication (*) and division (/) in the usual ways.\nFor integer expressions, Stan supports the modulus (%) binary arithmetic operation. Stan also supports the unary operation of negation for integer and real-valued expressions. For example, assuming n and m are integer variables and x and y real variables, the following expressions are legal.\n3.0 + 0.14\n-15\n2 * 3 + 1\n(x - y) / 2.0\n(n * (n + 1)) / 2\nx / n\nm % n\nThe negation, addition, subtraction, and multiplication operations are extended to matrices, vectors, and row vectors. The transpose operation, written using an apostrophe (') is also supported for vectors, row vectors, and matrices. Return types for matrix operations are the smallest types that can be statically guaranteed to contain the result. The full set of allowable input types and corresponding return types is detailed in the list of functions.\nFor example, if y and mu are variables of type vector and Sigma is a variable of type matrix, then (y - mu)' * Sigma * (y - mu) is a well-formed expression of type real. The type of the complete expression is inferred working outward from the subexpressions. The subexpression(s) y - mu are of type vector because the variables y and mu are of type vector. The transpose of this expression, the subexpression (y - mu)' is of type row_vector. Multiplication is left associative and transpose has higher precedence than multiplication, so the above expression is equivalent to the following fully specified form (((y - mu)') * Sigma) * (y - mu).\nThe type of subexpression (y - mu)' * Sigma is inferred to be row_vector, being the result of multiplying a row vector by a matrix. The whole expression’s type is thus the type of a row vector multiplied by a (column) vector, which produces a real value.\nStan provides elementwise matrix multiplication (e.g., a .* b) and division (e.g., a ./ b) operations. These provide a shorthand to replace loops, but are not intrinsically more efficient than a version programmed with an elementwise calculations and assignments in a loop. For example, given declarations,\nvector[N] a;\nvector[N] b;\nvector[N] c;\nthe assignment,\nc = a .* b;\nproduces the same result with roughly the same efficiency as the loop\nfor (n in 1:N) {\n c[n] = a[n] * b[n];\n}\nStan supports exponentiation (^) of integer and real-valued expressions. The return type of exponentiation is always a real-value. For example, assuming n and m are integer variables and x and y real variables, the following expressions are legal.\n3 ^ 2\n3.0 ^ -2\n3.0 ^ 0.14\nx ^ n\nn ^ x\nn ^ m\nx ^ y\nExponentiation is right associative, so the expression 2 ^ 3 ^ 4 is equivalent to the fully specified form 2 ^ (3 ^ 4).\n\n\nThe precedence and associativity of operators, as well as built-in syntax such as array indexing and function application is given in tabular form in the following table.\n\n\nStan’s unary, binary, and ternary operators, with their precedences, associativities, place in an expression, and a description. The last two lines list the precedence of function application and array, matrix, and vector indexing. The operators are listed in order of precedence, from least tightly binding to most tightly binding. The full set of legal arguments and corresponding result types are provided in the function documentation for the operators (i.e., operator*(int, int):int indicates the application of the multiplication operator to two integers, which returns an integer). Parentheses may be used to group expressions explicitly rather than relying on precedence and associativity.\n\n\n\n\n\n\n\n\n\n\nOp.\nPrec.\nAssoc.\nPlacement\nDescription\n\n\n\n\n? ~ :\n10\nright\nternary infix\nconditional\n\n\n||\n9\nleft\nbinary infix\nlogical or\n\n\n&&\n8\nleft\nbinary infix\nlogical and\n\n\n==\n7\nleft\nbinary infix\nequality\n\n\n!=\n7\nleft\nbinary infix\ninequality\n\n\n<\n6\nleft\nbinary infix\nless than\n\n\n<=\n6\nleft\nbinary infix\nless than or equal\n\n\n>\n6\nleft\nbinary infix\ngreater than\n\n\n>=\n6\nleft\nbinary infix\ngreater than or equal\n\n\n+\n5\nleft\nbinary infix\naddition\n\n\n-\n5\nleft\nbinary infix\nsubtraction\n\n\n*\n4\nleft\nbinary infix\nmultiplication\n\n\n.*\n4\nleft\nbinary infix\nelementwise multiplication\n\n\n/\n4\nleft\nbinary infix\n(right) division\n\n\n./\n4\nleft\nbinary infix\nelementwise division\n\n\n%\n4\nleft\nbinary infix\nmodulus\n\n\n\\\n3\nleft\nbinary infix\nleft division\n\n\n%/%\n3\nleft\nbinary infix\ninteger division\n\n\n!\n2\nn/a\nunary prefix\nlogical negation\n\n\n-\n2\nn/a\nunary prefix\nnegation\n\n\n+\n2\nn/a\nunary prefix\npromotion (no-op in Stan)\n\n\n^\n1\nright\nbinary infix\nexponentiation\n\n\n.^\n1\nright\nbinary infix\nelementwise exponentiation\n\n\n'\n0\nn/a\nunary postfix\ntransposition\n\n\n()\n0\nn/a\nprefix, wrap\nfunction application\n\n\n[]\n0\nleft\nprefix, wrap\narray, matrix indexing\n\n\n\nOther expression-forming operations, such as function application and subscripting bind more tightly than any of the arithmetic operations.\nThe precedence and associativity determine how expressions are interpreted. Because addition is left associative, the expression a + b + c is interpreted as (a + b) + c. Similarly, a / b * c is interpreted as (a / b) * c.\nBecause multiplication has higher precedence than addition, the expression a * b + c is interpreted as (a * b) + c and the expression a + b * c is interpreted as a + (b * c). Similarly, 2 * x + 3 * - y is interpreted as (2 * x) + (3 * (-y)).\nTransposition and exponentiation bind more tightly than any other arithmetic or logical operation. For vectors, row vectors, and matrices, -u' is interpreted as -(u'), u * v' as u* (v'), and u' * v as (u') * v. For integer and reals, -n ^ 3 is interpreted as -(n ^ 3).", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#conditional-operator.section", + "href": "reference-manual/expressions.html#conditional-operator.section", + "title": "Expressions", + "section": "", + "text": "The ternary conditional operator is unique in that it takes three arguments and uses a mixed syntax. If a is an expression of type int and b and c are expressions that can be converted to one another (e.g., compared with ==), then\na ? b : c\nis an expression of the promoted type of b and c. The result of this expression is b if condition a is true, c otherwise. The only promotion allowed in Stan is integer -> real -> complex; e.g. if one argument is of type int and the other of type real, the conditional expression as a whole is of type real. In other cases, the arguments have to be of the same underlying Stan type (i.e., constraints don’t count, only the shape) and the conditional expression is of that type.\n\n\nThe conditional operator is the most loosely binding operator, so its arguments rarely require parentheses for disambiguation. For example,\na > 0 || b < 0 ? c + d : e - f\nis equivalent to the explicitly grouped version\n(a > 0 || b < 0) ? (c + d) : (e - f)\nThe latter is easier to read even if the parentheses are not strictly necessary.\n\n\n\nThe conditional operator is right associative, so that\na ? b : c ? d : e\nparses as if explicitly grouped as\na ? b : (c ? d : e)\nAgain, the explicitly grouped version is easier to read.\n\n\n\n\nStan’s conditional operator works very much like its C++ analogue. The first argument must be an expression denoting an integer. Typically this is a variable or a relation operator, as in the variable a in the example above. Then there are two resulting arguments, the first being the result returned if the condition evaluates to true (i.e., non-zero) and the second if the condition evaluates to false (i.e., zero). In the example above, the value b is returned if the condition evaluates to a non-zero value and c is returned if the condition evaluates to zero.\n\n\nThe key property of the conditional operator that makes it so useful in high-performance computing is that it only evaluates the returned subexpression, not the alternative expression. In other words, it is not like a typical function that evaluates its argument expressions eagerly in order to pass their values to the function. As usual, the saving is mostly in the derivatives that do not get computed rather than the unnecessary function evaluation itself.\n\n\n\nIf one return expression is a data value (an expression involving only constants and variables defined in the data or transformed data block), and the other is not, then the ternary operator will promote the data value to a parameter value. This can cause needless work calculating derivatives in some cases and be less efficient than a full if-then conditional statement. For example,\ndata {\n array[10] real x;\n // ...\n}\nparameters {\n array[10] real z;\n // ...\n}\nmodel {\n y ~ normal(cond ? x : z, sigma);\n // ...\n}\nwould be more efficiently (if not more transparently) coded as\nif (cond) {\n y ~ normal(x, sigma);\n} else {\n y ~ normal(z, sigma);\n}\nThe conditional statement, like the conditional operator, only evaluates one of the result statements. In this case, the variable x will not be promoted to a parameter and thus not cause any needless work to be carried out when propagating the chain rule during derivative calculations.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#language-indexing.section", + "href": "reference-manual/expressions.html#language-indexing.section", + "title": "Expressions", + "section": "", + "text": "Stan arrays, matrices, vectors, and row vectors are all accessed using the same array-like notation. For instance, if x is a variable of type array [] real (a one-dimensional array of reals) then x[1] is the value of the first element of the array.\nSubscripting has higher precedence than any of the arithmetic operations. For example, alpha * x[1] is equivalent to alpha * (x[1]).\nMultiple subscripts may be provided within a single pair of square brackets. If x is of type array[,] real, a two-dimensional array, then x[2, 501] is of type real.\n\n\nThe subscripting operator also returns subarrays of arrays. For example, if x is of type array[,,] real, then x[2] is of type array[,] real, and x[2, 3] is of type array[] real. As a result, the expressions x[2, 3] and x[2][3] have the same meaning.\n\n\n\nIf Sigma is a variable of type matrix, then Sigma[1] denotes the first row of Sigma and has the type row_vector.\n\n\n\nStan supports mixed indexing of arrays and their vector, row vector or matrix values. For example, if m is of type matrix[ , ], a two-dimensional array of matrices, then m[1] refers to the first row of the array, which is a one-dimensional array of matrices. More than one index may be used, so that m[1, 2] is of type matrix and denotes the matrix in the first row and second column of the array. Continuing to add indices, m[1, 2, 3] is of type row_vector and denotes the third row of the matrix denoted by m[1, 2]. Finally, m[1, 2, 3, 4] is of type real and denotes the value in the third row and fourth column of the matrix that is found at the first row and second column of the array m.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#language-multi-indexing.section", + "href": "reference-manual/expressions.html#language-multi-indexing.section", + "title": "Expressions", + "section": "", + "text": "In addition to single integer indexes, as described in the language indexing section, Stan supports multiple indexing. Multiple indexes can be integer arrays of indexes, lower bounds, upper bounds, lower and upper bounds, or simply shorthand for all of the indexes. If the upper bound is smaller than the lower bound, the range is empty (unlike, e.g., in R). The upper bound and lower bound can be expressions that evaluate to integer. A complete list of index types is given in the following table.\n\n\nTypes of indexes and examples with one-dimensional containers of size N and an integer array ii of type array [] real size K.\n\n\n\nindex type\nexample\nvalue\n\n\n\n\ninteger\na[11]\nvalue of a at index 11\n\n\ninteger array\na[ii]\na[ii[1]], …, a[ii[K]]\n\n\nlower bound\na[3:]\na[3], …, a[N]\n\n\nupper bound\na[:5]\na[1], …, a[5]\n\n\nrange\na[2:7]\na[2], …, a[7]\n\n\nrange\na[7:2]\n[]\n\n\nrange\na[5-3:5+2]\na[2], …, a[7]\n\n\nall\na[:]\na[1], …, a[N]\n\n\nall\na[]\na[1], …, a[N]\n\n\n\nThe range indexing with : allows only increasing sequences. Indexing with a decereasing sequence can be made by creating an integer array in the following way:\n array[6] int ii = reverse(linspaced_int_array(6, 2, 7));\nThen a[ii] evaluates to a[7], …, a[2].\n\n\n\nThe fundamental semantic rule for dealing with multiple indexes is the following. If idxs is a multiple index, then it produces an indexable position in the result. To evaluate that index position in the result, the index is first passed to the multiple index, and the resulting index used.\na[idxs, ...][i, ...] = a[idxs[i], ...][...]\nOn the other hand, if idx is a single index, it reduces the dimensionality of the output, so that\na[idx, ...] = a[idx][...]\nThe only issue is what happens with matrices and vectors. Vectors work just like arrays. Matrices with multiple row indexes and multiple column indexes produce matrices. Matrices with multiple row indexes and a single column index become (column) vectors. Matrices with a single row index and multiple column indexes become row vectors. The types are summarized in the following table.\n\n\nSpecial rules for reducing matrices based on whether the argument is a single or multiple index. Examples are for a matrix a, with integer single indexes i and j and integer array multiple indexes is and js. The same typing rules apply for all multiple indexes.\n\n\n\nexample\nrow index\ncolumn index\nresult type\n\n\n\n\na[i]\nsingle\nn/a\nrow vector\n\n\na[is]\nmultiple\nn/a\nmatrix\n\n\na[i, j]\nsingle\nsingle\nreal\n\n\na[i, js]\nsingle\nmultiple\nrow vector\n\n\na[is, j]\nmultiple\nsingle\nvector\n\n\na[is, js]\nmultiple\nmultiple\nmatrix\n\n\n\nEvaluation of matrices with multiple indexes is defined to respect the following distributivity conditions.\nm[idxs1, idxs2][i, j] = m[idxs1[i], idxs2[j]]\nm[idxs, idx][j] = m[idxs[j], idx]\nm[idx, idxs][j] = m[idx, idxs[j]]\nEvaluation of arrays of matrices and arrays of vectors or row vectors is defined recursively, beginning with the array dimensions.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#function-application.section", + "href": "reference-manual/expressions.html#function-application.section", + "title": "Expressions", + "section": "", + "text": "Stan provides a range of built in mathematical and statistical functions, which are documented in the built-in function documentation.\nExpressions in Stan may consist of the name of function followed by a sequence of zero or more argument expressions. For instance, log(2.0) is the expression of type real denoting the result of applying the natural logarithm to the value of the real literal 2.0.\nSyntactically, function application has higher precedence than any of the other operators, so that y + log(x) is interpreted as y + (log(x)).\n\n\nEach function has a type signature which determines the allowable type of its arguments and its return type. For instance, the function signature for the logarithm function can be expressed as\nreal log(real);\nand the signature for the lmultiply function is\nreal lmultiply(real, real);\nA function is uniquely determined by its name and its sequence of argument types. For instance, the following two functions are different functions.\nreal mean(array [] real);\nreal mean(vector);\nThe first applies to a one-dimensional array of real values and the second to a vector.\nThe identity conditions for functions explicitly forbids having two functions with the same name and argument types but different return types. This restriction also makes it possible to infer the type of a function expression compositionally by only examining the type of its subexpressions.\n\n\n\nConstants in Stan are nothing more than nullary (no-argument) functions. For instance, the mathematical constants \\(\\pi\\) and \\(e\\) are represented as nullary functions named pi() and e(). See the Stan Functions Reference built-in constants section for a list of built-in constants.\n\n\n\nBecause of integer to real type promotion, rules must be established for which function is called given a sequence of argument types. The scheme employed by Stan is the same as that used by C++, which resolves a function call to the function requiring the minimum number of type promotions.\nFor example, consider a situation in which the following two function signatures have been registered for foo.\nreal foo(real, real);\nint foo(int, int);\nThe use of foo in the expression foo(1.0, 1.0) resolves to foo(real, real), and thus the expression foo(1.0, 1.0) itself is assigned a type of real.\nBecause integers may be promoted to real values, the expression foo(1, 1) could potentially match either foo(real, real) or foo(int, int). The former requires two type promotions and the latter requires none, so foo(1, 1) is resolved to function foo(int, int) and is thus assigned the type int.\nThe expression foo(1, 1.0) has argument types (int, real) and thus does not explicitly match either function signature. By promoting the integer expression 1 to type real, it is able to match foo(real, real), and hence the type of the function expression foo(1, 1.0) is real.\nIn some cases (though not for any built-in Stan functions), a situation may arise in which the function referred to by an expression remains ambiguous. For example, consider a situation in which there are exactly two functions named bar with the following signatures.\nreal bar(real, int);\nreal bar(int, real);\nWith these signatures, the expression bar(1.0, 1) and bar(1, 1.0) resolve to the first and second of the above functions, respectively. The expression bar(1.0, 1.0) is illegal because real values may not be demoted to integers. The expression bar(1, 1) is illegal for a different reason. If the first argument is promoted to a real value, it matches the first signature, whereas if the second argument is promoted to a real value, it matches the second signature. The problem is that these both require one promotion, so the function name bar is ambiguous. If there is not a unique function requiring fewer promotions than all others, as with bar(1, 1) given the two declarations above, the Stan compiler will flag the expression as illegal.\n\n\n\nFor most of the distributions supported by Stan, there is a corresponding random-number generating function. These random number generators are named by the distribution with the suffix _rng. For example, a univariate normal random number can be generated by normal_rng(0, 1); only the parameters of the distribution, here a location (0) and scale (1) are specified because the variate is generated.\n\n\nThe use of random-number generating functions is restricted to the transformed data and generated quantities blocks; attempts to use them elsewhere will result in a parsing error with a diagnostic message. They may also be used in the bodies of user-defined functions whose names end in _rng.\nThis allows the random number generating functions to be used for simulation in general, and for Bayesian posterior predictive checking in particular.\n\n\n\nPosterior predictive checks typically use the parameters of the model to generate simulated data (at the individual and optionally at the group level for hierarchical models), which can then be compared informally using plots and formally by means of test statistics, to the actual data in order to assess the suitability of the model; see Chapter 6 of (Gelman et al. 2013) for more information on posterior predictive checks.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#type-inference", + "href": "reference-manual/expressions.html#type-inference", + "title": "Expressions", + "section": "", + "text": "Stan is strongly statically typed, meaning that the implementation type of an expression can be resolved at compile time.\n\n\nThe primitive implementation types for Stan are\nint, real, complex, vector, row_vector, matrix, complex_vector,\ncomplex_row_vector, complex_matrix\nEvery basic declared type corresponds to a primitive type; the following table shows the mapping from types to their primitive types.\n\n\nThe table shows the variable declaration types of Stan and their corresponding primitive implementation type. Stan functions, operators, and probability functions have argument and result types declared in terms of primitive types plus array dimensionality.\n\n\n\ntype\nprimitive type\n\n\n\n\nint\nint\n\n\nreal\nreal\n\n\nvector\nvector\n\n\nsimplex\nvector\n\n\nunit_vector\nvector\n\n\nsum_to_zero_vector\nvector\n\n\nordered\nvector\n\n\npositive_ordered\nvector\n\n\nrow_vector\nrow_vector\n\n\nmatrix\nmatrix\n\n\ncov_matrix\nmatrix\n\n\ncorr_matrix\nmatrix\n\n\ncholesky_factor_cov\nmatrix\n\n\ncholesky_factor_corr\nmatrix\n\n\ncolumn_stochastic_matrix\nmatrix\n\n\nrow_stochastic_matrix\nmatrix\n\n\nsum_to_zero_matrix\nmatrix\n\n\ncomplex_vector\ncomplex_vector\n\n\ncomplex_row_vector\ncomplex_row_vector\n\n\ncomplex_matrix\ncomplex_matrix\n\n\n\nA full implementation type consists of a primitive implementation type and an integer array dimensionality greater than or equal to zero. These will be written to emphasize their array-like nature. For example, array [] real has an array dimensionality of 1, int an array dimensionality of 0, and array [,,] int an array dimensionality of 3. The implementation type matrix[ , , ] has a total of five dimensions and takes up to five indices, three from the array and two from the matrix.\nRecall that the array dimensions come before the matrix or vector dimensions in an expression such as the following declaration of a three-dimensional array of matrices.\narray[I, J, K] matrix[M, N] a;\nThe matrix a is indexed as a[i, j, k, m, n] with the array indices first, followed by the matrix indices, with a[i, j, k] being a matrix and a[i, j, k, m] being a row vector.\n\n\n\n\nStan’s type inference rules define the implementation type of an expression based on a background set of variable declarations. The rules work bottom up from primitive literal and variable expressions to complex expressions.\n\n\n\nThere are two basic promotion rules,\n\nint types may be promoted to real, and\nreal types may be promoted to complex.\n\nPlus, promotion is transitive, so that\n\nif type U can be promoted to type V and type V can be promoted to type T, then U can be promoted to T.\n\nThe first rule means that expressions of type int may be used anywhere an expression of type real is specified, namely in assignment or function argument passing. An integer is promoted to real by casting it in the underlying C++ code.\nThe remaining rules have to do with covariant typing rules, which say that a container of type U may be promoted to a container of the same shape of type T if U can be promoted to T. For vector and matrix types, this induces three rules,\n\nvector may be promoted to complex_vector,\nrow_vector may be promoted to complex_row_vector\nmatrix may be promoted to complex_matrix.\n\nFor array types, there’s a single rule\n\narray[...] U may be promoted to array[...] T if U can be promoted to T.\n\nFor example, this means array[,] int may be used where array [,] real or array [,] complex is required; as another example, array[] real may be used anywhere array[] complex is required.\nTuples have the natural extension of the above rules, applied to all sub-types at once\n\nA tuple(U1, ..., UN) may be promoted to a tuple(T1, ..., TN) if every Un can be promoted to Tn for n in 1:N\n\n\n\nAn integer literal expression such as 42 is of type int. Real literals such as 42.0 are of type real. Imaginary literals such as -17i are of type complex. the expression 7 - 2i acts like a complex literal, but technically it combines a real literal 7 and an imaginary literal 2i through subtraction.\n\n\n\nThe type of a variable declared locally or in a previous block is determined by its declaration. The type of a loop variable is int.\nThere is always a unique declaration for each variable in each scope because Stan prohibits the redeclaration of an already-declared variables.1\n\n\n\nIf x is an expression of total dimensionality greater than or equal to \\(N\\), then the type of expression e[i1, i2, ..., iN] is the same as that of e[i1][i2]...[iN], so it suffices to define the type of a singly-indexed function. Suppose e is an expression and i is an expression of primitive type int. Then\n\nif e is an expression of type array[i1, i2, ..., iN] T and k, i1, …, iN are expressions of type int, then e[k] is an expression of type array[i2, ..., iN] T,\nif e is an expression of type array[i] T with i and k expressions of type int, then e[k] is of type T,\nif e has implementation type vector or row_vector, dimensionality 0, then e[i] has implementation type real,\nif e has implementation type matrix, then e[i] has type row_vector,\nif e has implementation type complex_vector or complex_row_vector and i is an expression of type int, then e[i] is an expression of type complex, and\nif e has implementation type complex_matrix, and i is an expression of type int, then e[i] is an expression of type complex_row_vector.\n\n\n\n\nIf f is the name of a function and e1,...,eN are expressions for \\(N \\geq 0\\), then f(e1,...,eN) is an expression whose type is determined by the return type in the function signature for f given e1 through eN. Recall that a function signature is a declaration of the argument types and the result type.\nIn looking up functions, binary operators like real * real are defined as operator*(real, real) in the documentation and index.\nIn matching a function definition, all of the promotion rules are in play (integers may be promoted to reals, reals to complex, and containers may be promoted if their types are promoted). For example, arguments of type int may be promoted to type real or complex if necessary (see the subsection on type promotion in the function application section, a real argument will be promoted to complex if necessary, a vector will be promoted to complex_vector if necessary, and so on.\nIn general, matrix operations return the lowest inferable type. For example, row_vector * vector returns a value of type real, which is declared in the function documentation and index as real operator*(row_vector, vector).", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#higher-order-functions", + "href": "reference-manual/expressions.html#higher-order-functions", + "title": "Expressions", + "section": "", + "text": "There are several expression constructions in Stan that act as higher-order functions.2\nThe higher-order functions and the signature of their argument functions are listed in the following pair of tables.\n\n\nHigher-order functions in Stan with their argument function types. The first group of arguments can be a function of parameters or data. The second group of arguments, consisting of a real and integer array in all cases, must be expressions involving only data and literals.\n\n\n\n\n\n\n\n\n\nfunction\nparameter or data args\ndata args\nreturn type\n\n\n\n\nalgebra_solver\nvector, vector\narray [] real, array [] real\nvector\n\n\nalgebra_solver_newton\nvector, vector\narray [] real, array [] real\nvector\n\n\nintegrate_1d,\nreal, real, array [] real\narray [] real, array [] real\nreal\n\n\nintegrate_ode_X,\nreal, array [] real, array [] real\narray [] real, array [] real\narray [] real\n\n\nmap_rect\nvector, vector\narray [] real, array [] real\nvector\n\n\n\nFor example, the integrate_ode_rk45 function can be used to integrate differential equations in Stan:\nfunctions {\n array [] real foo(real t,\n array [] real y,\n array [] real theta,\n array [] real x_r,\n array [] real x_i) {\n // ...\n }\n}\n// ...\nint<lower=1> T;\narray[2] real y0;\nreal t0;\narray[T] real ts;\narray[1] real theta;\narray[0] real x_r;\narray[0] int x_i;\n// ...\narray[T, 2] real y_hat = integrate_ode_rk45(foo, y0, t0,\n ts, theta, x_r, x_i);\nThe function argument is foo, the name of the user-defined function; as shown in the higher-order functions table, integrate_ode_rk45 takes a real array, a real, three more real arrays, and an integer array as arguments and returns 2D real array.\n\n\n\nVariadic Higher-order functions in Stan with their argument function types. The first group of arguments are restricted in type. The sequence of trailing arguments can be of any length with any types.\n\n\n\nfunction\nrestricted args\nreturn type\n\n\n\n\nsolve_X\nvector\nvector\n\n\node_X,\nvector, real, array [] real\nvector[]\n\n\nreduce_sum\narray[] T, T1, T2\nreal\n\n\n\nT, T1, and T2 can be any Stan type.\nFor example, the ode_rk45 function can be used to integrate differential equations in Stan:\nfunctions {\n vector foo(real t, vector y, real theta, vector beta,\n array [] real x_i, int index) {\n // ...\n }\n}\n// ...\nint<lower=1> T;\nvector[2] y0;\nreal t0;\narray[T] real ts;\nreal theta;\nvector[7] beta;\narray[10] int x_i;\nint index;\n// ...\nvector[2] y_hat[T] = ode_rk45(foo, y0, t0, ts, theta,\n beta, x_i, index);\nThe function argument is foo, the name of the user-defined function. As shown in the variadic higher-order functions table, ode_rk45 takes a real, a vector, a real, a real array, and a sequence of arguments whose types match those at the end of foo and returns an array of vectors.\n\n\n\nThe function argument to higher-order functions is always passed as the first argument. This function argument must be provided as the name of a user-defined or built-in function. No quotes are necessary.\n\n\n\nSome of the arguments to higher-order functions are restricted to data. This means they must be expressions containing only data variables, transformed data variables, or literals; the may contain arbitrary functions applied to data variables or literals, but must not contain parameters, transformed parameters, or local variables from any block other than transformed data.\nFor user-defined functions the qualifier data may be prepended to the type to restrict the argument to data-only variables.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#chain-rule-and-derivatives", + "href": "reference-manual/expressions.html#chain-rule-and-derivatives", + "title": "Expressions", + "section": "", + "text": "Derivatives of the log probability function defined by a model are used in several ways by Stan. The Hamiltonian Monte Carlo samplers, including NUTS, use gradients to guide updates. The BFGS optimizers also use gradients to guide search for posterior modes.\n\n\nUnlike evaluations in pure mathematics, evaluation of derivatives in Stan is done by applying the chain rule on an expression-by-expression basis, evaluating using floating-point arithmetic. As a result, models such as the following are problematic for inference involving derivatives.\nparameters {\n real x;\n}\nmodel {\n x ~ normal(sqrt(x - x), 1);\n}\nAlgebraically, the distribution statement in the model could be reduced to\n x ~ normal(0, 1);\nand it would seem the model should produce unit normal draws for x. But rather than canceling, the expression sqrt(x - x) causes a problem for derivatives. The cause is the mechanistic evaluation of the chain rule,\n\\[\n\\begin{array}{rcl}\n\\frac{d}{dx} \\sqrt{x - x}\n& = &\n\\frac{1}{2 \\sqrt{x - x}} \\times \\frac{d}{dx} (x - x)\n\\\\[4pt]\n& = &\n\\frac{1}{0} \\times (1 - 1)\n\\\\[4pt]\n& = &\n\\infty \\times 0\n\\\\[4pt]\n& = & \\mathrm{NaN}.\n\\end{array}\n\\]\nRather than the \\(x - x\\) canceling out, it introduces a 0 into the numerator and denominator of the chain-rule evaluation.\nThe only way to avoid this kind problem is to be careful to do the necessary algebraic reductions as part of the model and not introduce expressions like sqrt(x - x) for which the chain rule produces not-a-number values.\n\n\n\nThe best way to diagnose whether something is going wrong with the derivatives is to use the test-gradient option to the sampler or optimizer inputs; this option is available in both Stan and RStan (though it may be slow, because it relies on finite differences to make a comparison to the built-in automatic differentiation).\nFor example, compiling the above model to an executable sqrt-x-minus-x in CmdStan, the test can be run as\n> ./sqrt-x-minus-x diagnose test=gradient\nwhich produces\n...\nTEST GRADIENT MODE\n\n Log probability=-0.393734\n\n param idx value model finite diff error\n 0 -0.887393 nan 0 nan\nEven though finite differences calculates the right gradient of 0, automatic differentiation follows the chain rule and produces a not-a-number output.", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/expressions.html#footnotes", + "href": "reference-manual/expressions.html#footnotes", + "title": "Expressions", + "section": "Footnotes", + "text": "Footnotes\n\n\nLanguages such as C++ and R allow the declaration of a variable of a given name in a narrower scope to hide (take precedence over for evaluation) a variable defined in a containing scope.↩︎\nInternally, they are implemented as their own expression types because Stan doesn’t have object-level functional types (yet).↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "Expressions" + ] + }, + { + "objectID": "reference-manual/encoding.html", + "href": "reference-manual/encoding.html", + "title": "Character Encoding", + "section": "", + "text": "The content of a Stan program must be coded in ASCII. All identifiers must consist of only ASCII alpha-numeric characters and the underscore character. All arithmetic operators and punctuation must be coded in ASCII.\n\n\nThe UTF-8 encoding of Unicode and the Latin-1 (ISO-8859-1) encoding share the first 128 code points with ASCII and thus cannot be distinguished from ASCII. That means you can set editors, etc., to use UTF-8 or Latin-1 (or the other Latin-n variants) without worrying that the content of a Stan program will be destroyed.\n\n\n\n\nAny bytes on a line after a line-comment sequence (// or #) are ignored up until the ASCII newline character (\\n). They may thus be written in any character encoding which is convenient.\nAny content after a block comment open sequence in ASCII (/*) up to the closing block comment (*/) is ignored, and thus may also be written in whatever character set is convenient.\n\n\n\nThe raw byte sequence within a string literal is escaped according to the C++ standard. In particular, this means that UTF-8 encoded strings are supported, however they are not tested for invalid byte sequences. A print, reject, or fatal_error statement should properly display Unicode characters if your terminal supports the encoding used in the input. In other words, Stan simply preserves any string of bytes between two double quotes (\") when passing to C++. On compliant terminals, this allows the use of glyphs and other characters from encodings such as UTF-8 that fall outside the ASCII-compatible range.\nASCII is the recommended encoding for maximum portability, because it encodes the ASCII characters (Unicode code points 0–127) using the same sequence of bytes as the UTF-8 encoding of Unicode and common ISO-8859 extensions of Latin.", + "crumbs": [ + "Reference Manual", + "Language", + "Character Encoding" + ] + }, + { + "objectID": "reference-manual/encoding.html#content-characters", + "href": "reference-manual/encoding.html#content-characters", + "title": "Character Encoding", + "section": "", + "text": "The content of a Stan program must be coded in ASCII. All identifiers must consist of only ASCII alpha-numeric characters and the underscore character. All arithmetic operators and punctuation must be coded in ASCII.\n\n\nThe UTF-8 encoding of Unicode and the Latin-1 (ISO-8859-1) encoding share the first 128 code points with ASCII and thus cannot be distinguished from ASCII. That means you can set editors, etc., to use UTF-8 or Latin-1 (or the other Latin-n variants) without worrying that the content of a Stan program will be destroyed.", + "crumbs": [ + "Reference Manual", + "Language", + "Character Encoding" + ] + }, + { + "objectID": "reference-manual/encoding.html#comment-characters", + "href": "reference-manual/encoding.html#comment-characters", + "title": "Character Encoding", + "section": "", + "text": "Any bytes on a line after a line-comment sequence (// or #) are ignored up until the ASCII newline character (\\n). They may thus be written in any character encoding which is convenient.\nAny content after a block comment open sequence in ASCII (/*) up to the closing block comment (*/) is ignored, and thus may also be written in whatever character set is convenient.", + "crumbs": [ + "Reference Manual", + "Language", + "Character Encoding" + ] + }, + { + "objectID": "reference-manual/encoding.html#string-literals", + "href": "reference-manual/encoding.html#string-literals", + "title": "Character Encoding", + "section": "", + "text": "The raw byte sequence within a string literal is escaped according to the C++ standard. In particular, this means that UTF-8 encoded strings are supported, however they are not tested for invalid byte sequences. A print, reject, or fatal_error statement should properly display Unicode characters if your terminal supports the encoding used in the input. In other words, Stan simply preserves any string of bytes between two double quotes (\") when passing to C++. On compliant terminals, this allows the use of glyphs and other characters from encodings such as UTF-8 that fall outside the ASCII-compatible range.\nASCII is the recommended encoding for maximum portability, because it encodes the ASCII characters (Unicode code points 0–127) using the same sequence of bytes as the UTF-8 encoding of Unicode and common ISO-8859 extensions of Latin.", + "crumbs": [ + "Reference Manual", + "Language", + "Character Encoding" + ] + }, + { + "objectID": "reference-manual/deprecations.html", + "href": "reference-manual/deprecations.html", + "title": "Deprecated Features", + "section": "", + "text": "This appendix lists currently deprecated functionality along with how to replace it.\nStarting with Stan 2.29, minor (syntax-level) deprecations can be removed 3 versions after release; e.g., syntax deprecated in Stan 2.20 will be removed in Stan 2.23 and placed in Removed Features. The Stan compiler can automatically update many of these on the behalf of the user for at least one version after they are removed.\nAny feature which changes semantic meaning (such as the upgraded ODE solver interface) will not be removed until a major version change (e.g., Stan 3.0).\n\n\nDeprecated:The distribution lkj_cov is deprecated.\nReplacement: Replace lkj_cov_lpdf(...) with an lkj_corr distribution on the correlation matrix and independent lognormal distributions on the scales. That is, replace\ncov_matrix[K] Sigma;\n// ...\nSigma ~ lkj_cov(mu, tau, eta);\nwith\ncorr_matrix[K] Omega;\nvector<lower=0>[K] sigma;\n// ...\nOmega ~ lkj_corr(eta);\nsigma ~ lognormal(mu, tau);\n// ...\ncov_matrix[K] Sigma;\nSigma <- quad_form_diag(Omega, sigma);\nThe variable Sigma may be defined as a local variable in the model block or as a transformed parameter. An even more efficient transform would use Cholesky factors rather than full correlation matrix types.\nScheduled Removal: Stan 3.0 or later.\n\n\n\nDeprecated: Using functions that end in _lp in the transformed parameters block.\nReplacement: Use _jacobian functions and the jacobian += statement instead. These allow for change-of-variable adjustments which can be conditionally enabled by Stan’s algorithms.\n\n\n\nSeveral built-in Stan functions have been deprecated. Consult the functions reference for more information.", + "crumbs": [ + "Reference Manual", + "Language", + "Deprecated Features" + ] + }, + { + "objectID": "reference-manual/deprecations.html#lkj_cov-distribution", + "href": "reference-manual/deprecations.html#lkj_cov-distribution", + "title": "Deprecated Features", + "section": "", + "text": "Deprecated:The distribution lkj_cov is deprecated.\nReplacement: Replace lkj_cov_lpdf(...) with an lkj_corr distribution on the correlation matrix and independent lognormal distributions on the scales. That is, replace\ncov_matrix[K] Sigma;\n// ...\nSigma ~ lkj_cov(mu, tau, eta);\nwith\ncorr_matrix[K] Omega;\nvector<lower=0>[K] sigma;\n// ...\nOmega ~ lkj_corr(eta);\nsigma ~ lognormal(mu, tau);\n// ...\ncov_matrix[K] Sigma;\nSigma <- quad_form_diag(Omega, sigma);\nThe variable Sigma may be defined as a local variable in the model block or as a transformed parameter. An even more efficient transform would use Cholesky factors rather than full correlation matrix types.\nScheduled Removal: Stan 3.0 or later.", + "crumbs": [ + "Reference Manual", + "Language", + "Deprecated Features" + ] + }, + { + "objectID": "reference-manual/deprecations.html#use-of-_lp-functions-in-transformed-parameters", + "href": "reference-manual/deprecations.html#use-of-_lp-functions-in-transformed-parameters", + "title": "Deprecated Features", + "section": "", + "text": "Deprecated: Using functions that end in _lp in the transformed parameters block.\nReplacement: Use _jacobian functions and the jacobian += statement instead. These allow for change-of-variable adjustments which can be conditionally enabled by Stan’s algorithms.", + "crumbs": [ + "Reference Manual", + "Language", + "Deprecated Features" + ] + }, + { + "objectID": "reference-manual/deprecations.html#deprecated-functions", + "href": "reference-manual/deprecations.html#deprecated-functions", + "title": "Deprecated Features", + "section": "", + "text": "Several built-in Stan functions have been deprecated. Consult the functions reference for more information.", + "crumbs": [ + "Reference Manual", + "Language", + "Deprecated Features" + ] + }, + { + "objectID": "reference-manual/blocks.html", + "href": "reference-manual/blocks.html", + "title": "Program Blocks", + "section": "", + "text": "A Stan program is organized into a sequence of named blocks, the bodies of which consist of variable declarations, followed in the case of some blocks with statements.\n\n\nThe full set of named program blocks is exemplified in the following skeletal Stan program.\nfunctions {\n // ... function declarations and definitions ...\n}\ndata {\n // ... declarations ...\n}\ntransformed data {\n // ... declarations ... statements ...\n}\nparameters {\n // ... declarations ...\n}\ntransformed parameters {\n // ... declarations ... statements ...\n}\nmodel {\n // ... declarations ... statements ...\n}\ngenerated quantities {\n // ... declarations ... statements ...\n}\nThe function-definition block contains user-defined functions. The data block declares the required data for the model. The transformed data block allows the definition of constants and transforms of the data. The parameters block declares the model’s parameters — the unconstrained version of the parameters is what’s sampled or optimized. The transformed parameters block allows variables to be defined in terms of data and parameters that may be used later and will be saved. The model block is where the log probability function is defined. The generated quantities block allows derived quantities based on parameters, data, and optionally (pseudo) random number generation.\n\n\nAll of the blocks are optional. A consequence of this is that the empty string is a valid Stan program, although it will trigger a warning message from the Stan compiler. The Stan program blocks that occur must occur in the order presented in the skeletal program above. Within each block, both declarations and statements are optional, subject to the restriction that the declarations come before the statements.\n\n\n\nThe variables declared in each block have scope over all subsequent statements. Thus a variable declared in the transformed data block may be used in the model block. But a variable declared in the generated quantities block may not be used in any earlier block, including the model block. The exception to this rule is that variables declared in the model block are always local to the model block and may not be accessed in the generated quantities block; to make a variable accessible in the model and generated quantities block, it must be declared as a transformed parameter.\nVariables declared as function parameters have scope only within that function definition’s body, and may not be assigned to (they are constant).\n\n\n\nFunctions defined in the function block may be used in any appropriate block. Most functions can be used in any block and applied to a mixture of parameters and data (including constants or program literals).\nRandom-number-generating functions are restricted to transformed data and generated quantities blocks, and within user-defined functions ending in _rng; such functions are suffixed with _rng. Log-probability modifying functions to blocks where the log probability accumulator is in scope (transformed parameters and model); such functions are suffixed with _lp.\nDensity functions defined in the program may be used in distribution statements.\n\n\n\nThe variables declared in the data and parameters block are treated differently than other variables in that they are automatically defined by the context in which they are used. This is why there are no statements allowed in the data or parameters block.\nThe variables in the data block are read from an external input source such as a file or a designated R data structure. The variables in the parameters block are read from the sampler’s current parameter values (either standard HMC or NUTS). The initial values may be provided through an external input source, which is also typically a file or a designated R data structure. In each case, the parameters are instantiated to the values for which the model defines a log probability function.\n\n\n\nThe transformed data and transformed parameters block behave similarly to each other. Both allow new variables to be declared and then defined through a sequence of statements. Because variables scope over every statement that follows them, transformed data variables may be defined in terms of the data variables.\nBefore generating any draws, data variables are read in, then the transformed data variables are declared and the associated statements executed to define them. This means the statements in the transformed data block are only ever evaluated once.1\nTransformed parameters work the same way, being defined in terms of the parameters, transformed data, and data variables. The difference is the frequency of evaluation. Parameters are read in and (inverse) transformed to constrained representations on their natural scales once per log probability and gradient evaluation. This means the inverse transforms and their log absolute Jacobian determinants are evaluated once per leapfrog step. Transformed parameters are then declared and their defining statements executed once per leapfrog step.\n\n\n\nThe generated quantity variables are defined once per sample after all the leapfrog steps have been completed. These may be random quantities, so the block must be rerun even if the Metropolis adjustment of HMC or NUTS rejects the update proposal.\n\n\n\nA table summarizing the point at which variables are read, written, and defined is given in the block actions table.\nBlock Actions Table. The read, write, transform, and evaluate actions and periodicities listed in the last column correspond to the Stan program blocks in the first column. The middle column indicates whether the block allows statements. The last row indicates that parameter initialization requires a read and transform operation applied once per chain.\n\n\n\nblock\nstatement\naction / period\n\n\n\n\ndata\nno\nread / chain\n\n\ntransformed data\nyes\nevaluate / chain\n\n\nparameters\nno\ninv. transform, Jacobian / leapfrog\n\n\n \n \ninv. transform, write / sample\n\n\ntransformed parameters\nyes\nevaluate / leapfrog\n\n\n \n \nwrite / sample\n\n\nmodel\nyes\nevaluate / leapfrog step\n\n\ngenerated quantities\nyes\neval / sample\n\n\n \n \nwrite / sample\n\n\n(initialization)\nn/a\nread, transform / chain\n\n\n\nVariable Declaration Table. This table indicates where variables that are not basic data or parameters should be declared, based on whether it is defined in terms of parameters, whether it is used in the log probability function defined in the model block, and whether it is printed. The two lines marked with asterisks (\\(*\\)) should not be used as there is no need to print a variable every iteration that does not depend on the value of any parameters.\n\n\n\nparam depend\nin target\nsave\ndeclare in\n\n\n\n\n+\n+\n+\ntransformed parameters\n\n\n+\n+\n-\nmodel (local)\n\n\n+\n-\n+\ngenerated quantities\n\n\n+\n-\n-\ngenerated quantities (local)\n\n\n-\n+\n+\ntransformed data   and generated quantities\n\n\n-\n+\n-\ntransformed data\n\n\n-\n-\n+\ngenerated quantities\n\n\n-\n-\n-\ntransformed data (local)\n\n\n\nAnother way to look at the variables is in terms of their function. To decide which variable to use, consult the charts in the variable declaration table. The last line has no corresponding location, as there is no need to print a variable every iteration that does not depend on parameters.2\nThe rest of this chapter provides full details on when and how the variables and statements in each block are executed.\n\n\n\n\nStatistical Variable Taxonomy Table. Variables of the kind indicated in the left column must be declared in one of the blocks declared in the right column.\n\n\n\n\n\n\n\nvariable kind\ndeclaration block\n\n\n\n\nconstants\ndata, transformed data\n\n\nunmodeled data\ndata, transformed data\n\n\nmodeled data\ndata, transformed data\n\n\nmissing data\nparameters, transformed parameters\n\n\nmodeled parameters\nparameters, transformed parameters\n\n\nunmodeled parameters\ndata, transformed data\n\n\nderived quantities\ntransformed data, transformed parameters, generated quantities\n\n\nloop indices\nloop statement\n\n\n\nPage 366 of (Gelman and Hill 2007) provides a taxonomy of the kinds of variables used in Bayesian models. The table of kinds of variables contains Gelman and Hill’s taxonomy along with a missing-data kind along with the corresponding locations of declarations and definitions in Stan.\nConstants can be built into a model as literals, data variables, or as transformed data variables. If specified as variables, their definition must be included in data files. If they are specified as transformed data variables, they cannot be used to specify the sizes of elements in the data block.\nThe following program illustrates various variables kinds, listing the kind of each variable next to its declaration.\ndata {\n int<lower=0> N; // unmodeled data\n array[N] real y; // modeled data\n real mu_mu; // config. unmodeled param\n real<lower=0> sigma_mu; // config. unmodeled param\n}\ntransformed data {\n real<lower=0> alpha; // const. unmodeled param\n real<lower=0> beta; // const. unmodeled param\n alpha = 0.1;\n beta = 0.1;\n}\nparameters {\n real mu_y; // modeled param\n real<lower=0> tau_y; // modeled param\n}\ntransformed parameters {\n real<lower=0> sigma_y; // derived quantity (param)\n sigma_y = pow(tau_y, -0.5);\n}\nmodel {\n tau_y ~ gamma(alpha, beta);\n mu_y ~ normal(mu_mu, sigma_mu);\n for (n in 1:N) {\n y[n] ~ normal(mu_y, sigma_y);\n }\n}\ngenerated quantities {\n real variance_y; // derived quantity (transform)\n variance_y = sigma_y * sigma_y;\n}\nIn this example, y is an array of modeled data. Although it is specified in the data block, and thus must have a known value before the program may be run, it is modeled as if it were generated randomly as described by the model.\nThe variable N is a typical example of unmodeled data. It is used to indicate a size that is not part of the model itself.\nThe other variables declared in the data and transformed data block are examples of unmodeled parameters, also known as hyperparameters. Unmodeled parameters are parameters to probability densities that are not themselves modeled probabilistically. In Stan, unmodeled parameters that appear in the data block may be specified on a per-model execution basis as part of the data read. In the above model, mu_mu and sigma_mu are configurable unmodeled parameters.\nUnmodeled parameters that are hard coded in the model must be declared in the transformed data block. For example, the unmodeled parameters alpha and beta are both hard coded to the value 0.1. To allow such variables to be configurable based on data supplied to the program at run time, they must be declared in the data block, like the variables mu_mu and sigma_mu.\nThis program declares two modeled parameters, mu_y and tau_y. These are the location and precision used in the normal model of the values in y. The heart of the model will be sampling the values of these parameters from their posterior distribution.\nThe modeled parameter tau_y is transformed from a precision to a scale parameter and assigned to the variable sigma_y in the transformed parameters block. Thus the variable sigma_y is considered a derived quantity — its value is entirely determined by the values of other variables.\nThe generated quantities block defines a value variance_y, which is defined as a transform of the scale or deviation parameter sigma_y. It is defined in the generated quantities block because it is not used in the model. Making it a generated quantity allows it to be monitored for convergence (being a non-linear transform, it will have different autocorrelation and hence convergence properties than the deviation itself).\nIn later versions of Stan which have random number generators for the distributions, the generated quantities block will be usable to generate replicated data for model checking.\nFinally, the variable n is used as a loop index in the model block.\n\n\n\nThe rest of this chapter will lay out the details of each block in order, starting with the data block in this section.\n\n\nThe data block is for the declaration of variables that are read in as data. With the current model executable, each Markov chain of draws will be executed in a different process, and each such process will read the data exactly once.3\nData variables are not transformed in any way. The format for data files or data in memory depends on the interface; see the user’s guides and interface documentation for PyStan, RStan, and CmdStan for details.\n\n\n\nThe data block does not allow statements.\n\n\n\nEach variable’s value is validated against its declaration as it is read. For example, if a variable sigma is declared as real<lower=0>, then trying to assign it a negative value will raise an error. As a result, data type errors will be caught as early as possible. Similarly, attempts to provide data of the wrong size for a compound data structure will also raise an error.\n\n\n\n\nThe transformed data block is for declaring and defining variables that do not need to be changed when running the program.\n\n\nFor the transformed data block, variables are all declared in the variable declarations and defined in the statements. There is no reading from external sources and no transformations performed.\nVariables declared in the data block may be used to declare transformed variables.\n\n\n\nThe statements in a transformed data block are used to define (provide values for) variables declared in the transformed data block. Assignments are only allowed to variables declared in the transformed data block.\nThese statements are executed once, in order, right after the data is read into the data variables. This means they are executed once per chain.\nVariables declared in the data block may be used in statements in the transformed data block.\n\n\nThe statements in the transformed data block are designed to be executed once and have a deterministic result. Therefore, log probability is not accumulated and distribution statements may not be used.\n\n\n\n\nAny constraints on variables declared in the transformed data block are checked after the statements are executed. If any defined variable violates its constraints, Stan will halt with a diagnostic error message.\n\n\n\n\nThe variables declared in the parameters program block correspond directly to the variables being sampled by Stan’s samplers (HMC and NUTS). From a user’s perspective, the parameters in the program block are the parameters being sampled by Stan.\nVariables declared as parameters cannot be directly assigned values. So there is no block of statements in the parameters program block. Variable quantities derived from parameters may be declared in the transformed parameters or generated quantities blocks, or may be defined as local variables in any statement blocks following their declaration.\nThere is a substantial amount of computation involved for parameter variables in a Stan program at each leapfrog step within the HMC or NUTS samplers, and a bit more computation along with writes involved for saving the parameter values corresponding to a sample.\n\n\nStan’s two samplers, standard Hamiltonian Monte Carlo (HMC) and the adaptive No-U-Turn sampler (NUTS), are most easily (and often most effectively) implemented over a multivariate probability density that has support on all of \\(\\mathbb{R}^n\\). To do this, the parameters defined in the parameters block must be transformed so they are unconstrained.\nIn practice, the samplers keep an unconstrained parameter vector in memory representing the current state of the sampler. The model defined by the compiled Stan program defines an (unnormalized) log probability function over the unconstrained parameters. In order to do this, the log probability function must apply the inverse transform to the unconstrained parameters to calculate the constrained parameters defined in Stan’s parameters program block. The log Jacobian of the inverse transform is then added to the accumulated log probability function. This then allows the Stan model to be defined in terms of the constrained parameters.\nIn some cases, the number of parameters is reduced in the unconstrained space. For instance, a \\(K\\)-simplex only requires \\(K-1\\) unconstrained parameters, and a \\(K\\)-correlation matrix only requires \\(\\binom{K}{2}\\) unconstrained parameters. This means that the probability function defined by the compiled Stan program may have fewer parameters than it would appear from looking at the declarations in the parameters program block.\nThe probability function on the unconstrained parameters is defined in such a way that the order of the parameters in the vector corresponds to the order of the variables defined in the parameters program block. The details of the specific transformations are provided in the variable transforms chapter.\n\n\n\nHamiltonian Monte Carlo requires the gradient of the (unnormalized) log probability function with respect to the unconstrained parameters to be evaluated during every leapfrog step. There may be one leapfrog step per sample or hundreds, with more being required for models with complex posterior distribution geometries.\nGradients are calculated behind the scenes using Stan’s algorithmic differentiation library. The time to compute the gradient does not depend directly on the number of parameters, only on the number of subexpressions in the calculation of the log probability. This includes the expressions added from the transforms’ Jacobians.\nThe amount of work done by the sampler does depend on the number of unconstrained parameters, but this is usually dwarfed by the gradient calculations.\n\n\n\nIn the basic Stan compiled program, there is a file to which the values of variables are written for each draw. The constrained versions of the variables are written in the order they are defined in the parameters block. In order to do this, the transformed parameter, model, and generated quantities statements must also be executed.\n\n\n\n\nThe transformed parameters program block consists of optional variable declarations followed by statements. After the statements are executed, the constraints on the transformed parameters are validated. Any variable declared as a transformed parameter is part of the output produced for draws.\nAny variable that is defined wholly in terms of data or transformed data should be declared and defined in the transformed data block. Defining such quantities in the transformed parameters block is legal, but much less efficient than defining them as transformed data.\n\n\nLike the constraints on data, the constraints on transformed parameters is meant to catch programming errors as well as convey programmer intent. They are not automatically transformed in such a way as to be satisfied. What will happen if a transformed parameter does not match its constraint is that the current parameter values will be rejected. This can cause Stan’s algorithms to hang or to devolve to random walks. It is not intended to be a way to enforce ad hoc constraints in Stan programs. See the section on reject statements for further discussion of the behavior of reject statements.\n\n\n\n\nThe model program block consists of optional variable declarations followed by statements. The variables in the model block are local variables and are not written as part of the output.\nLocal variables may not be defined with constraints because there is no well-defined way to have them be both flexible and easy to validate.\nThe statements in the model block typically define the model. This is the block in which probability (distribution notation) statements are allowed. These are typically used when programming in the BUGS idiom to define the probability model.\n\n\n\nThe generated quantities program block is rather different than the other blocks. Nothing in the generated quantities block affects the sampled parameter values. The block is executed only after a sample has been generated.\nAmong the applications of posterior inference that can be coded in the generated quantities block are\n\nforward sampling to generate simulated data for model testing,\ngenerating predictions for new data,\ncalculating posterior event probabilities, including multiple comparisons, sign tests, etc.,\ncalculating posterior expectations,\ntransforming parameters for reporting,\napplying full Bayesian decision theory,\ncalculating log likelihoods, deviances, etc. for model comparison.\n\nParameter estimates, predictions, statistics, and event probabilities calculated directly using plug-in estimates. Stan automatically provides full Bayesian inference by producing draws from the posterior distribution of any calculated event probabilities, predictions, or statistics.\nWithin the generated quantities block, the values of all other variables declared in earlier program blocks (other than local variables) are available for use in the generated quantities block.\nIt is more efficient to define a variable in the generated quantities block instead of the transformed parameters block. Therefore, if a quantity does not play a role in the model, it should be defined in the generated quantities block.\nAfter the generated quantities statements are executed, the constraints on the declared generated quantity variables are validated.\nAll variables declared as generated quantities are printed as part of the output. Variables declared in nested blocks are local variables, not generated quantities, and thus won’t be printed. For example:\ngenerated quantities {\n int a; // added to the output\n\n {\n int b; // not added to the output\n }\n}", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#overview-of-stans-program-blocks", + "href": "reference-manual/blocks.html#overview-of-stans-program-blocks", + "title": "Program Blocks", + "section": "", + "text": "The full set of named program blocks is exemplified in the following skeletal Stan program.\nfunctions {\n // ... function declarations and definitions ...\n}\ndata {\n // ... declarations ...\n}\ntransformed data {\n // ... declarations ... statements ...\n}\nparameters {\n // ... declarations ...\n}\ntransformed parameters {\n // ... declarations ... statements ...\n}\nmodel {\n // ... declarations ... statements ...\n}\ngenerated quantities {\n // ... declarations ... statements ...\n}\nThe function-definition block contains user-defined functions. The data block declares the required data for the model. The transformed data block allows the definition of constants and transforms of the data. The parameters block declares the model’s parameters — the unconstrained version of the parameters is what’s sampled or optimized. The transformed parameters block allows variables to be defined in terms of data and parameters that may be used later and will be saved. The model block is where the log probability function is defined. The generated quantities block allows derived quantities based on parameters, data, and optionally (pseudo) random number generation.\n\n\nAll of the blocks are optional. A consequence of this is that the empty string is a valid Stan program, although it will trigger a warning message from the Stan compiler. The Stan program blocks that occur must occur in the order presented in the skeletal program above. Within each block, both declarations and statements are optional, subject to the restriction that the declarations come before the statements.\n\n\n\nThe variables declared in each block have scope over all subsequent statements. Thus a variable declared in the transformed data block may be used in the model block. But a variable declared in the generated quantities block may not be used in any earlier block, including the model block. The exception to this rule is that variables declared in the model block are always local to the model block and may not be accessed in the generated quantities block; to make a variable accessible in the model and generated quantities block, it must be declared as a transformed parameter.\nVariables declared as function parameters have scope only within that function definition’s body, and may not be assigned to (they are constant).\n\n\n\nFunctions defined in the function block may be used in any appropriate block. Most functions can be used in any block and applied to a mixture of parameters and data (including constants or program literals).\nRandom-number-generating functions are restricted to transformed data and generated quantities blocks, and within user-defined functions ending in _rng; such functions are suffixed with _rng. Log-probability modifying functions to blocks where the log probability accumulator is in scope (transformed parameters and model); such functions are suffixed with _lp.\nDensity functions defined in the program may be used in distribution statements.\n\n\n\nThe variables declared in the data and parameters block are treated differently than other variables in that they are automatically defined by the context in which they are used. This is why there are no statements allowed in the data or parameters block.\nThe variables in the data block are read from an external input source such as a file or a designated R data structure. The variables in the parameters block are read from the sampler’s current parameter values (either standard HMC or NUTS). The initial values may be provided through an external input source, which is also typically a file or a designated R data structure. In each case, the parameters are instantiated to the values for which the model defines a log probability function.\n\n\n\nThe transformed data and transformed parameters block behave similarly to each other. Both allow new variables to be declared and then defined through a sequence of statements. Because variables scope over every statement that follows them, transformed data variables may be defined in terms of the data variables.\nBefore generating any draws, data variables are read in, then the transformed data variables are declared and the associated statements executed to define them. This means the statements in the transformed data block are only ever evaluated once.1\nTransformed parameters work the same way, being defined in terms of the parameters, transformed data, and data variables. The difference is the frequency of evaluation. Parameters are read in and (inverse) transformed to constrained representations on their natural scales once per log probability and gradient evaluation. This means the inverse transforms and their log absolute Jacobian determinants are evaluated once per leapfrog step. Transformed parameters are then declared and their defining statements executed once per leapfrog step.\n\n\n\nThe generated quantity variables are defined once per sample after all the leapfrog steps have been completed. These may be random quantities, so the block must be rerun even if the Metropolis adjustment of HMC or NUTS rejects the update proposal.\n\n\n\nA table summarizing the point at which variables are read, written, and defined is given in the block actions table.\nBlock Actions Table. The read, write, transform, and evaluate actions and periodicities listed in the last column correspond to the Stan program blocks in the first column. The middle column indicates whether the block allows statements. The last row indicates that parameter initialization requires a read and transform operation applied once per chain.\n\n\n\nblock\nstatement\naction / period\n\n\n\n\ndata\nno\nread / chain\n\n\ntransformed data\nyes\nevaluate / chain\n\n\nparameters\nno\ninv. transform, Jacobian / leapfrog\n\n\n \n \ninv. transform, write / sample\n\n\ntransformed parameters\nyes\nevaluate / leapfrog\n\n\n \n \nwrite / sample\n\n\nmodel\nyes\nevaluate / leapfrog step\n\n\ngenerated quantities\nyes\neval / sample\n\n\n \n \nwrite / sample\n\n\n(initialization)\nn/a\nread, transform / chain\n\n\n\nVariable Declaration Table. This table indicates where variables that are not basic data or parameters should be declared, based on whether it is defined in terms of parameters, whether it is used in the log probability function defined in the model block, and whether it is printed. The two lines marked with asterisks (\\(*\\)) should not be used as there is no need to print a variable every iteration that does not depend on the value of any parameters.\n\n\n\nparam depend\nin target\nsave\ndeclare in\n\n\n\n\n+\n+\n+\ntransformed parameters\n\n\n+\n+\n-\nmodel (local)\n\n\n+\n-\n+\ngenerated quantities\n\n\n+\n-\n-\ngenerated quantities (local)\n\n\n-\n+\n+\ntransformed data   and generated quantities\n\n\n-\n+\n-\ntransformed data\n\n\n-\n-\n+\ngenerated quantities\n\n\n-\n-\n-\ntransformed data (local)\n\n\n\nAnother way to look at the variables is in terms of their function. To decide which variable to use, consult the charts in the variable declaration table. The last line has no corresponding location, as there is no need to print a variable every iteration that does not depend on parameters.2\nThe rest of this chapter provides full details on when and how the variables and statements in each block are executed.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#statistical-variable-taxonomy", + "href": "reference-manual/blocks.html#statistical-variable-taxonomy", + "title": "Program Blocks", + "section": "", + "text": "Statistical Variable Taxonomy Table. Variables of the kind indicated in the left column must be declared in one of the blocks declared in the right column.\n\n\n\n\n\n\n\nvariable kind\ndeclaration block\n\n\n\n\nconstants\ndata, transformed data\n\n\nunmodeled data\ndata, transformed data\n\n\nmodeled data\ndata, transformed data\n\n\nmissing data\nparameters, transformed parameters\n\n\nmodeled parameters\nparameters, transformed parameters\n\n\nunmodeled parameters\ndata, transformed data\n\n\nderived quantities\ntransformed data, transformed parameters, generated quantities\n\n\nloop indices\nloop statement\n\n\n\nPage 366 of (Gelman and Hill 2007) provides a taxonomy of the kinds of variables used in Bayesian models. The table of kinds of variables contains Gelman and Hill’s taxonomy along with a missing-data kind along with the corresponding locations of declarations and definitions in Stan.\nConstants can be built into a model as literals, data variables, or as transformed data variables. If specified as variables, their definition must be included in data files. If they are specified as transformed data variables, they cannot be used to specify the sizes of elements in the data block.\nThe following program illustrates various variables kinds, listing the kind of each variable next to its declaration.\ndata {\n int<lower=0> N; // unmodeled data\n array[N] real y; // modeled data\n real mu_mu; // config. unmodeled param\n real<lower=0> sigma_mu; // config. unmodeled param\n}\ntransformed data {\n real<lower=0> alpha; // const. unmodeled param\n real<lower=0> beta; // const. unmodeled param\n alpha = 0.1;\n beta = 0.1;\n}\nparameters {\n real mu_y; // modeled param\n real<lower=0> tau_y; // modeled param\n}\ntransformed parameters {\n real<lower=0> sigma_y; // derived quantity (param)\n sigma_y = pow(tau_y, -0.5);\n}\nmodel {\n tau_y ~ gamma(alpha, beta);\n mu_y ~ normal(mu_mu, sigma_mu);\n for (n in 1:N) {\n y[n] ~ normal(mu_y, sigma_y);\n }\n}\ngenerated quantities {\n real variance_y; // derived quantity (transform)\n variance_y = sigma_y * sigma_y;\n}\nIn this example, y is an array of modeled data. Although it is specified in the data block, and thus must have a known value before the program may be run, it is modeled as if it were generated randomly as described by the model.\nThe variable N is a typical example of unmodeled data. It is used to indicate a size that is not part of the model itself.\nThe other variables declared in the data and transformed data block are examples of unmodeled parameters, also known as hyperparameters. Unmodeled parameters are parameters to probability densities that are not themselves modeled probabilistically. In Stan, unmodeled parameters that appear in the data block may be specified on a per-model execution basis as part of the data read. In the above model, mu_mu and sigma_mu are configurable unmodeled parameters.\nUnmodeled parameters that are hard coded in the model must be declared in the transformed data block. For example, the unmodeled parameters alpha and beta are both hard coded to the value 0.1. To allow such variables to be configurable based on data supplied to the program at run time, they must be declared in the data block, like the variables mu_mu and sigma_mu.\nThis program declares two modeled parameters, mu_y and tau_y. These are the location and precision used in the normal model of the values in y. The heart of the model will be sampling the values of these parameters from their posterior distribution.\nThe modeled parameter tau_y is transformed from a precision to a scale parameter and assigned to the variable sigma_y in the transformed parameters block. Thus the variable sigma_y is considered a derived quantity — its value is entirely determined by the values of other variables.\nThe generated quantities block defines a value variance_y, which is defined as a transform of the scale or deviation parameter sigma_y. It is defined in the generated quantities block because it is not used in the model. Making it a generated quantity allows it to be monitored for convergence (being a non-linear transform, it will have different autocorrelation and hence convergence properties than the deviation itself).\nIn later versions of Stan which have random number generators for the distributions, the generated quantities block will be usable to generate replicated data for model checking.\nFinally, the variable n is used as a loop index in the model block.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#program-block-data", + "href": "reference-manual/blocks.html#program-block-data", + "title": "Program Blocks", + "section": "", + "text": "The rest of this chapter will lay out the details of each block in order, starting with the data block in this section.\n\n\nThe data block is for the declaration of variables that are read in as data. With the current model executable, each Markov chain of draws will be executed in a different process, and each such process will read the data exactly once.3\nData variables are not transformed in any way. The format for data files or data in memory depends on the interface; see the user’s guides and interface documentation for PyStan, RStan, and CmdStan for details.\n\n\n\nThe data block does not allow statements.\n\n\n\nEach variable’s value is validated against its declaration as it is read. For example, if a variable sigma is declared as real<lower=0>, then trying to assign it a negative value will raise an error. As a result, data type errors will be caught as early as possible. Similarly, attempts to provide data of the wrong size for a compound data structure will also raise an error.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#program-block-transformed-data", + "href": "reference-manual/blocks.html#program-block-transformed-data", + "title": "Program Blocks", + "section": "", + "text": "The transformed data block is for declaring and defining variables that do not need to be changed when running the program.\n\n\nFor the transformed data block, variables are all declared in the variable declarations and defined in the statements. There is no reading from external sources and no transformations performed.\nVariables declared in the data block may be used to declare transformed variables.\n\n\n\nThe statements in a transformed data block are used to define (provide values for) variables declared in the transformed data block. Assignments are only allowed to variables declared in the transformed data block.\nThese statements are executed once, in order, right after the data is read into the data variables. This means they are executed once per chain.\nVariables declared in the data block may be used in statements in the transformed data block.\n\n\nThe statements in the transformed data block are designed to be executed once and have a deterministic result. Therefore, log probability is not accumulated and distribution statements may not be used.\n\n\n\n\nAny constraints on variables declared in the transformed data block are checked after the statements are executed. If any defined variable violates its constraints, Stan will halt with a diagnostic error message.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#program-block-parameters", + "href": "reference-manual/blocks.html#program-block-parameters", + "title": "Program Blocks", + "section": "", + "text": "The variables declared in the parameters program block correspond directly to the variables being sampled by Stan’s samplers (HMC and NUTS). From a user’s perspective, the parameters in the program block are the parameters being sampled by Stan.\nVariables declared as parameters cannot be directly assigned values. So there is no block of statements in the parameters program block. Variable quantities derived from parameters may be declared in the transformed parameters or generated quantities blocks, or may be defined as local variables in any statement blocks following their declaration.\nThere is a substantial amount of computation involved for parameter variables in a Stan program at each leapfrog step within the HMC or NUTS samplers, and a bit more computation along with writes involved for saving the parameter values corresponding to a sample.\n\n\nStan’s two samplers, standard Hamiltonian Monte Carlo (HMC) and the adaptive No-U-Turn sampler (NUTS), are most easily (and often most effectively) implemented over a multivariate probability density that has support on all of \\(\\mathbb{R}^n\\). To do this, the parameters defined in the parameters block must be transformed so they are unconstrained.\nIn practice, the samplers keep an unconstrained parameter vector in memory representing the current state of the sampler. The model defined by the compiled Stan program defines an (unnormalized) log probability function over the unconstrained parameters. In order to do this, the log probability function must apply the inverse transform to the unconstrained parameters to calculate the constrained parameters defined in Stan’s parameters program block. The log Jacobian of the inverse transform is then added to the accumulated log probability function. This then allows the Stan model to be defined in terms of the constrained parameters.\nIn some cases, the number of parameters is reduced in the unconstrained space. For instance, a \\(K\\)-simplex only requires \\(K-1\\) unconstrained parameters, and a \\(K\\)-correlation matrix only requires \\(\\binom{K}{2}\\) unconstrained parameters. This means that the probability function defined by the compiled Stan program may have fewer parameters than it would appear from looking at the declarations in the parameters program block.\nThe probability function on the unconstrained parameters is defined in such a way that the order of the parameters in the vector corresponds to the order of the variables defined in the parameters program block. The details of the specific transformations are provided in the variable transforms chapter.\n\n\n\nHamiltonian Monte Carlo requires the gradient of the (unnormalized) log probability function with respect to the unconstrained parameters to be evaluated during every leapfrog step. There may be one leapfrog step per sample or hundreds, with more being required for models with complex posterior distribution geometries.\nGradients are calculated behind the scenes using Stan’s algorithmic differentiation library. The time to compute the gradient does not depend directly on the number of parameters, only on the number of subexpressions in the calculation of the log probability. This includes the expressions added from the transforms’ Jacobians.\nThe amount of work done by the sampler does depend on the number of unconstrained parameters, but this is usually dwarfed by the gradient calculations.\n\n\n\nIn the basic Stan compiled program, there is a file to which the values of variables are written for each draw. The constrained versions of the variables are written in the order they are defined in the parameters block. In order to do this, the transformed parameter, model, and generated quantities statements must also be executed.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#program-block-transformed-parameters", + "href": "reference-manual/blocks.html#program-block-transformed-parameters", + "title": "Program Blocks", + "section": "", + "text": "The transformed parameters program block consists of optional variable declarations followed by statements. After the statements are executed, the constraints on the transformed parameters are validated. Any variable declared as a transformed parameter is part of the output produced for draws.\nAny variable that is defined wholly in terms of data or transformed data should be declared and defined in the transformed data block. Defining such quantities in the transformed parameters block is legal, but much less efficient than defining them as transformed data.\n\n\nLike the constraints on data, the constraints on transformed parameters is meant to catch programming errors as well as convey programmer intent. They are not automatically transformed in such a way as to be satisfied. What will happen if a transformed parameter does not match its constraint is that the current parameter values will be rejected. This can cause Stan’s algorithms to hang or to devolve to random walks. It is not intended to be a way to enforce ad hoc constraints in Stan programs. See the section on reject statements for further discussion of the behavior of reject statements.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#program-block-model", + "href": "reference-manual/blocks.html#program-block-model", + "title": "Program Blocks", + "section": "", + "text": "The model program block consists of optional variable declarations followed by statements. The variables in the model block are local variables and are not written as part of the output.\nLocal variables may not be defined with constraints because there is no well-defined way to have them be both flexible and easy to validate.\nThe statements in the model block typically define the model. This is the block in which probability (distribution notation) statements are allowed. These are typically used when programming in the BUGS idiom to define the probability model.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#program-block-generated-quantities", + "href": "reference-manual/blocks.html#program-block-generated-quantities", + "title": "Program Blocks", + "section": "", + "text": "The generated quantities program block is rather different than the other blocks. Nothing in the generated quantities block affects the sampled parameter values. The block is executed only after a sample has been generated.\nAmong the applications of posterior inference that can be coded in the generated quantities block are\n\nforward sampling to generate simulated data for model testing,\ngenerating predictions for new data,\ncalculating posterior event probabilities, including multiple comparisons, sign tests, etc.,\ncalculating posterior expectations,\ntransforming parameters for reporting,\napplying full Bayesian decision theory,\ncalculating log likelihoods, deviances, etc. for model comparison.\n\nParameter estimates, predictions, statistics, and event probabilities calculated directly using plug-in estimates. Stan automatically provides full Bayesian inference by producing draws from the posterior distribution of any calculated event probabilities, predictions, or statistics.\nWithin the generated quantities block, the values of all other variables declared in earlier program blocks (other than local variables) are available for use in the generated quantities block.\nIt is more efficient to define a variable in the generated quantities block instead of the transformed parameters block. Therefore, if a quantity does not play a role in the model, it should be defined in the generated quantities block.\nAfter the generated quantities statements are executed, the constraints on the declared generated quantity variables are validated.\nAll variables declared as generated quantities are printed as part of the output. Variables declared in nested blocks are local variables, not generated quantities, and thus won’t be printed. For example:\ngenerated quantities {\n int a; // added to the output\n\n {\n int b; // not added to the output\n }\n}", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "reference-manual/blocks.html#footnotes", + "href": "reference-manual/blocks.html#footnotes", + "title": "Program Blocks", + "section": "Footnotes", + "text": "Footnotes\n\n\nIf the C++ code is configured for concurrent threads, the data and transformed data blocks can be executed once and reused for multiple chains.↩︎\nIt is possible to print a variable every iteration that does not depend on parameters—just define it (or redefine it if it is transformed data) in the generated quantities block.↩︎\nWith multiple threads, or even running chains sequentially in a single thread, data could be read only once per set of chains. Stan was designed to be thread safe and future versions will provide a multithreading option for Markov chains.↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "Program Blocks" + ] + }, + { + "objectID": "functions-reference/void_functions.html", + "href": "functions-reference/void_functions.html", + "title": "Void Functions", + "section": "", + "text": "Stan supports a few special statements for printing or for signaling an issue with the program.\nAlthough print, reject, and fatal_error appear to have the syntax of functions, they are actually special kinds of statements with slightly different form and behavior than other functions. First, they are the constructs that allow a variable number of arguments. Second, they are the the only constructs to accept string literals (e.g., \"hello world\") as arguments. Third, they have no effect on the log density function and operate solely through side effects.\nThe special keyword void is used for their return type because they behave like variadic functions with void return type, even though they are special kinds of statements built in to the language.\n\n\nPrinting has no effect on the model’s log probability function. Its sole purpose is the side effect (i.e., an effect not represented in a return value) of arguments being printed to whatever the standard output stream is connected to (e.g., the terminal in command-line Stan or the R console in RStan).\n \n\nvoid print(T1 x1,..., TN xN) Print the values denoted by the arguments x1 through xN on the output message stream. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).\nAvailable since 2.1\n\n\n\nThe reject statement has the same syntax as the print statement, accepting an arbitrary number of arguments of any type (including string literals). The effect of executing a reject statement is to throw an exception internally that terminates the current iteration with a rejection (the behavior of which will depend on the algorithmic context in which it occurs).\n \n\nvoid reject(T1 x1,..., TN xN) Reject the current iteration and print the values denoted by the arguments x1 through xN on the output message stream. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).\nAvailable since 2.18\n\n\n\nThe fatal error statement has the same syntax as the print and reject statements, accepting an arbitrary number of arguments of any type (including string literals). The effect of executing a fatal_error statement is to throw an exception internally that terminates the algorithm completely. It can be viewed as an unrecoverable version of reject, and as such should be used only when exiting the algorithm is the only option.\n \n\nvoid fatal_error(T1 x1,..., TN xN) Print the values denoted by the arguments x1 through xN on the output message stream and then exit the currently running algorithm. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).\nAvailable since 2.35", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Void Functions" + ] + }, + { + "objectID": "functions-reference/void_functions.html#print-statement", + "href": "functions-reference/void_functions.html#print-statement", + "title": "Void Functions", + "section": "", + "text": "Printing has no effect on the model’s log probability function. Its sole purpose is the side effect (i.e., an effect not represented in a return value) of arguments being printed to whatever the standard output stream is connected to (e.g., the terminal in command-line Stan or the R console in RStan).\n \n\nvoid print(T1 x1,..., TN xN) Print the values denoted by the arguments x1 through xN on the output message stream. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).\nAvailable since 2.1", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Void Functions" + ] + }, + { + "objectID": "functions-reference/void_functions.html#reject-statement", + "href": "functions-reference/void_functions.html#reject-statement", + "title": "Void Functions", + "section": "", + "text": "The reject statement has the same syntax as the print statement, accepting an arbitrary number of arguments of any type (including string literals). The effect of executing a reject statement is to throw an exception internally that terminates the current iteration with a rejection (the behavior of which will depend on the algorithmic context in which it occurs).\n \n\nvoid reject(T1 x1,..., TN xN) Reject the current iteration and print the values denoted by the arguments x1 through xN on the output message stream. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Void Functions" + ] + }, + { + "objectID": "functions-reference/void_functions.html#fatal-error-statement", + "href": "functions-reference/void_functions.html#fatal-error-statement", + "title": "Void Functions", + "section": "", + "text": "The fatal error statement has the same syntax as the print and reject statements, accepting an arbitrary number of arguments of any type (including string literals). The effect of executing a fatal_error statement is to throw an exception internally that terminates the algorithm completely. It can be viewed as an unrecoverable version of reject, and as such should be used only when exiting the algorithm is the only option.\n \n\nvoid fatal_error(T1 x1,..., TN xN) Print the values denoted by the arguments x1 through xN on the output message stream and then exit the currently running algorithm. There are no spaces between items in the print, but a line feed (LF; Unicode U+000A; C++ literal '\\n') is inserted at the end of the printed line. The types T1 through TN can be any of Stan’s built-in numerical types or double quoted strings of characters (bytes).\nAvailable since 2.35", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Void Functions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html", + "href": "functions-reference/unbounded_continuous_distributions.html", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "The unbounded univariate continuous probability distributions have support on all real numbers.\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Normal}(y|\\mu,\\sigma) =\n\\frac{1}{\\sqrt{2 \\pi} \\ \\sigma} \\exp\\left( - \\, \\frac{1}{2} \\left(\\frac{y - \\mu}{\\sigma} \\right)^2 \\right) \\!.\n\\end{equation*}\\]\n\n\n\ny ~ normal(mu, sigma)\nIncrement target log probability density with normal_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal normal_lpdf(reals y | reals mu, reals sigma) The log of the normal density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal normal_lupdf(reals y | reals mu, reals sigma) The log of the normal density of y given location mu and scale sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nreal normal_cdf(reals y | reals mu, reals sigma) The cumulative normal distribution of y given location mu and scale sigma; normal_cdf will underflow to 0 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) below -37.5 and overflow to 1 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) above 8.25; the function Phi_approx is more robust in the tails, but must be scaled and translated for anything other than a standard normal.\nAvailable since 2.0\n \n\nreal normal_lcdf(reals y | reals mu, reals sigma) The log of the cumulative normal distribution of y given location mu and scale sigma; normal_lcdf will underflow to \\(-\\infty\\) for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) below -37.5 and overflow to 0 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) above 8.25; log(Phi_approx(...)) is more robust in the tails, but must be scaled and translated for anything other than a standard normal.\nAvailable since 2.12\n \n\nreal normal_lccdf(reals y | reals mu, reals sigma) The log of the complementary cumulative normal distribution of y given location mu and scale sigma; normal_lccdf will overflow to 0 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) below -37.5 and underflow to \\(-\\infty\\) for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) above 8.25; log1m(Phi_approx(...)) is more robust in the tails, but must be scaled and translated for anything other than a standard normal.\nAvailable since 2.15\n \n\nR normal_rng(reals mu, reals sigma) Generate a normal variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\nThe standard normal distribution is so-called because its parameters are the units for their respective operations—the location (mean) is zero and the scale (standard deviation) one. The standard normal is parameter-free, and the unit parameters allow considerable simplification of the expression for the density. \\[\\begin{equation*}\n\\text{StdNormal}(y) \\ = \\ \\text{Normal}(y \\mid 0, 1) \\ = \\ \\frac{1}{\\sqrt{2 \\pi}} \\, \\exp \\left( \\frac{-y^2}{2} \\right)\\!.\n\\end{equation*}\\] Up to a proportion on the log scale, where Stan computes, \\[\\begin{equation*}\n\\log \\text{Normal}(y \\mid 0, 1) \\ = \\ \\frac{-y^2}{2} + \\text{const}.\n\\end{equation*}\\] With no logarithm, no subtraction, and no division by a parameter, the standard normal log density is much more efficient to compute than the normal log density with constant location \\(0\\) and scale \\(1\\).\n\n\n\ny ~ std_normal()\nIncrement target log probability density with std_normal_lupdf(y).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal std_normal_lpdf(reals y) The standard normal (location zero, scale one) log probability density of y.\nAvailable since 2.18\n \n\nreal std_normal_lupdf(reals y) The standard normal (location zero, scale one) log probability density of y dropping constant additive terms.\nAvailable since 2.25\n \n\nreal std_normal_cdf(reals y) The cumulative standard normal distribution of y; std_normal_cdf will underflow to 0 for \\(y\\) below -37.5 and overflow to 1 for \\(y\\) above 8.25; the function Phi_approx is more robust in the tails.\nAvailable since 2.21\n \n\nreal std_normal_lcdf(reals y) The log of the cumulative standard normal distribution of y; std_normal_lcdf will underflow to \\(-\\infty\\) for \\(y\\) below -37.5 and overflow to 0 for \\(y\\) above 8.25; log(Phi_approx(...)) is more robust in the tails.\nAvailable since 2.21\n \n\nreal std_normal_lccdf(reals y) The log of the complementary cumulative standard normal distribution of y; std_normal_lccdf will overflow to 0 for \\(y\\) below -37.5 and underflow to \\(-\\infty\\) for \\(y\\) above 8.25; log1m(Phi_approx(...)) is more robust in the tails.\nAvailable since 2.21\n \n\nR std_normal_qf(T x) Returns the value of the inverse standard normal cdf \\(\\Phi^{-1}\\) at the specified quantile x. The std_normal_qf is equivalent to the inv_Phi function.\nAvailable since 2.31\n \n\nR std_normal_log_qf(T x) Return the value of the inverse standard normal cdf \\(\\Phi^{-1}\\) evaluated at the log of the specified quantile x. This function is equivalent to std_normal_qf(exp(x)) but is more numerically stable.\nAvailable since 2.31\n \n\nreal std_normal_rng() Generate a normal variate with location zero and scale one; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.21\n\n\n\n\nStan also supplies a single function for a generalized linear model with normal distribution and identity link function, i.e. a function for a linear regression. This provides a more efficient implementation of linear regression than a manually written regression in terms of a normal distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m, \\sigma\\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^n\\), \\[\\begin{equation*} \\text{NormalIdGLM}(y|x, \\alpha, \\beta, \\sigma) = \\prod_{1\\leq i\n\\leq n}\\text{Normal}(y_i|\\alpha_i + x_i\\cdot \\beta, \\sigma). \\end{equation*}\\]\n\n\n\ny ~ normal_id_glm(x, alpha, beta, sigma)\nIncrement target log probability density with normal_id_glm_lupdf(y | x, alpha, beta, sigma).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal normal_id_glm_lpdf(real y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(real y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(real y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(real y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(real y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(real y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nreal normal_id_glm_lpdf(real y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(real y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nreal normal_id_glm_lpdf(vector y | row_vector x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(vector y | row_vector x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(vector y | row_vector x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(vector y | row_vector x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.23\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.23\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.30\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.30\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.30\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.30\n\n\n\n\nExponentially modified Gaussian describes the distribution of \\(Z = X + Y\\) when \\(X\\) and \\(Y\\) are independent and \\(X\\) is normally distributed (with mean \\(\\mu\\) and standard deviation \\(\\sigma\\)) and \\(Y\\) is exponentially distributed (with rate \\(\\lambda\\)).\n\n\nIf \\(\\mu \\in \\mathbb{R}\\), \\(\\sigma \\in \\mathbb{R}^+\\), and \\(\\lambda \\in\n\\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{ExpModNormal}(y|\\mu,\\sigma,\\lambda) = \\frac{\\lambda}{2} \\ \\exp\n\\left(\\frac{\\lambda}{2} \\left(2\\mu + \\lambda \\sigma^2 -\n2y\\right)\\right) \\text{erfc}\\left(\\frac{\\mu + \\lambda\\sigma^2 -\ny}{\\sqrt{2}\\sigma}\\right) . \\end{equation*}\\]\n\n\n\ny ~ exp_mod_normal(mu, sigma, lambda)\nIncrement target log probability density with exp_mod_normal_lupdf(y | mu, sigma, lambda).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal exp_mod_normal_lpdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal density of y given location mu, scale sigma, and rate lambda\nAvailable since 2.18\n \n\nreal exp_mod_normal_lupdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal density of y given location mu, scale sigma, and rate lambda dropping constant additive terms\nAvailable since 2.25\n \n\nreal exp_mod_normal_cdf(reals y | reals mu, reals sigma, reals lambda) The exponentially modified normal cumulative distribution function of y given location mu, scale sigma, and rate lambda\nAvailable since 2.0\n \n\nreal exp_mod_normal_lcdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal cumulative distribution function of y given location mu, scale sigma, and rate lambda\nAvailable since 2.18\n \n\nreal exp_mod_normal_lccdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal complementary cumulative distribution function of y given location mu, scale sigma, and rate lambda\nAvailable since 2.18\n \n\nR exp_mod_normal_rng(reals mu, reals sigma, reals lambda) Generate a exponentially modified normal variate with location mu, scale sigma, and rate lambda; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\xi \\in \\mathbb{R}\\), \\(\\omega \\in \\mathbb{R}^+\\), and \\(\\alpha \\in\n\\mathbb{R}\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*} \\text{SkewNormal}(y \\mid\n\\xi, \\omega, \\alpha) = \\frac{1}{\\omega\\sqrt{2\\pi}} \\ \\exp\\left( - \\,\n\\frac{1}{2} \\left( \\frac{y - \\xi}{\\omega} \\right)^2\n\\right) \\ \\left(1 + \\text{erf}\\left( \\alpha\\left(\\frac{y -\n\\xi}{\\omega\\sqrt{2}}\\right)\\right)\\right) . \\end{equation*}\\]\n\n\n\ny ~ skew_normal(xi, omega, alpha)\nIncrement target log probability density with skew_normal_lupdf(y | xi, omega, alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal skew_normal_lpdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal density of y given location xi, scale omega, and shape alpha\nAvailable since 2.16\n \n\nreal skew_normal_lupdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal density of y given location xi, scale omega, and shape alpha dropping constant additive terms\nAvailable since 2.25\n \n\nreal skew_normal_cdf(reals y | reals xi, reals omega, reals alpha) The skew normal distribution function of y given location xi, scale omega, and shape alpha\nAvailable since 2.16\n \n\nreal skew_normal_lcdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal cumulative distribution function of y given location xi, scale omega, and shape alpha\nAvailable since 2.18\n \n\nreal skew_normal_lccdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal complementary cumulative distribution function of y given location xi, scale omega, and shape alpha\nAvailable since 2.18\n \n\nR skew_normal_rng(reals xi, reals omega, real alpha) Generate a skew normal variate with location xi, scale omega, and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\nu \\in \\mathbb{R}^+\\), \\(\\mu \\in \\mathbb{R}\\), and \\(\\sigma \\in\n\\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{StudentT}(y|\\nu,\\mu,\\sigma) = \\frac{\\Gamma\\left((\\nu +\n1)/2\\right)} {\\Gamma(\\nu/2)} \\ \\frac{1}{\\sqrt{\\nu \\pi} \\ \\sigma}\n\\ \\left( 1 + \\frac{1}{\\nu} \\left(\\frac{y - \\mu}{\\sigma}\\right)^2\n\\right)^{-(\\nu + 1)/2} \\! . \\end{equation*}\\]\n\n\n\ny ~ student_t(nu, mu, sigma)\nIncrement target log probability density with student_t_lupdf(y | nu, mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal student_t_lpdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) density of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.12\n \n\nreal student_t_lupdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) density of y given degrees of freedom nu, location mu, and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal student_t_cdf(reals y | reals nu, reals mu, reals sigma) The Student-\\(t\\) cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.0\n \n\nreal student_t_lcdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.12\n \n\nreal student_t_lccdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) complementary cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.12\n \n\nR student_t_rng(reals nu, reals mu, reals sigma) Generate a Student-\\(t\\) variate with degrees of freedom nu, location mu, and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Cauchy}(y|\\mu,\\sigma) = \\frac{1}{\\pi \\sigma} \\ \\frac{1}{1 + \\left((y - \\mu)/\\sigma\\right)^2} .\n\\end{equation*}\\]\n\n\n\ny ~ cauchy(mu, sigma)\nIncrement target log probability density with cauchy_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal cauchy_lpdf(reals y | reals mu, reals sigma) The log of the Cauchy density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal cauchy_lupdf(reals y | reals mu, reals sigma) The log of the Cauchy density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal cauchy_cdf(reals y | reals mu, reals sigma) The Cauchy cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal cauchy_lcdf(reals y | reals mu, reals sigma) The log of the Cauchy cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal cauchy_lccdf(reals y | reals mu, reals sigma) The log of the Cauchy complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR cauchy_rng(reals mu, reals sigma) Generate a Cauchy variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}\\), \\[\\begin{equation*} \\text{DoubleExponential}(y|\\mu,\\sigma) =\n\\frac{1}{2\\sigma} \\exp \\left( - \\, \\frac{|y - \\mu|}{\\sigma} \\right)\n. \\end{equation*}\\] Note that the double exponential distribution is parameterized in terms of the scale, in contrast to the exponential distribution (see section exponential distribution), which is parameterized in terms of inverse scale.\nThe double-exponential distribution can be defined as a compound exponential-normal distribution (Ding and Blitzstein 2018). Using the inverse scale parameterization for the exponential distribution, and the standard deviation parameterization for the normal distribution, one can write \\[\\begin{equation*} \\alpha \\sim\n\\mathsf{Exponential}\\left( \\frac{1}{2 \\sigma^2} \\right) \\end{equation*}\\] and \\[\\begin{equation*} \\beta \\mid\n\\alpha \\sim \\mathsf{Normal}(\\mu, \\sqrt{\\alpha}), \\end{equation*}\\] then \\[\\begin{equation*} \\beta \\sim\n\\mathsf{DoubleExponential}(\\mu, \\sigma ). \\end{equation*}\\] This may be used to code a non-centered parameterization by taking \\[\\begin{equation*} \\beta^{\\text{raw}} \\sim\n\\mathsf{Normal}(0, 1) \\end{equation*}\\] and defining \\[\\begin{equation*} \\beta = \\mu + \\sqrt{\\alpha} \\,\n\\beta^{\\text{raw}}. \\end{equation*}\\]\n\n\n\ny ~ double_exponential(mu, sigma)\nIncrement target log probability density with double_exponential_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal double_exponential_lpdf(reals y | reals mu, reals sigma) The log of the double exponential density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal double_exponential_lupdf(reals y | reals mu, reals sigma) The log of the double exponential density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal double_exponential_cdf(reals y | reals mu, reals sigma) The double exponential cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal double_exponential_lcdf(reals y | reals mu, reals sigma) The log of the double exponential cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal double_exponential_lccdf(reals y | reals mu, reals sigma) The log of the double exponential complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR double_exponential_rng(reals mu, reals sigma) Generate a double exponential variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Logistic}(y|\\mu,\\sigma) = \\frac{1}{\\sigma} \\ \\exp\\!\\left( - \\, \\frac{y - \\mu}{\\sigma} \\right)\n\\ \\left(1 + \\exp \\!\\left( - \\, \\frac{y - \\mu}{\\sigma} \\right) \\right)^{\\!-2} \\! .\n\\end{equation*}\\]\n\n\n\ny ~ logistic(mu, sigma)\nIncrement target log probability density with logistic_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal logistic_lpdf(reals y | reals mu, reals sigma) The log of the logistic density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal logistic_lupdf(reals y | reals mu, reals sigma) The log of the logistic density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal logistic_cdf(reals y | reals mu, reals sigma) The logistic cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal logistic_lcdf(reals y | reals mu, reals sigma) The log of the logistic cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal logistic_lccdf(reals y | reals mu, reals sigma) The log of the logistic complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR logistic_rng(reals mu, reals sigma) Generate a logistic variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Gumbel}(y|\\mu,\\beta) = \\frac{1}{\\beta} \\ \\exp\\left(-\\frac{y-\\mu}{\\beta}-\\exp\\left(-\\frac{y-\\mu}{\\beta}\\right)\\right) .\n\\end{equation*}\\]\n\n\n\ny ~ gumbel(mu, beta)\nIncrement target log probability density with gumbel_lupdf(y | mu, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal gumbel_lpdf(reals y | reals mu, reals beta) The log of the gumbel density of y given location mu and scale beta\nAvailable since 2.12\n \n\nreal gumbel_lupdf(reals y | reals mu, reals beta) The log of the gumbel density of y given location mu and scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal gumbel_cdf(reals y | reals mu, reals beta) The gumbel cumulative distribution function of y given location mu and scale beta\nAvailable since 2.0\n \n\nreal gumbel_lcdf(reals y | reals mu, reals beta) The log of the gumbel cumulative distribution function of y given location mu and scale beta\nAvailable since 2.12\n \n\nreal gumbel_lccdf(reals y | reals mu, reals beta) The log of the gumbel complementary cumulative distribution function of y given location mu and scale beta\nAvailable since 2.12\n \n\nR gumbel_rng(reals mu, reals beta) Generate a gumbel variate with location mu and scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\), \\(\\sigma \\in \\mathbb{R}^+\\) and \\(\\tau \\in [0, 1]\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{aligned}\n& \\text{SkewDoubleExponential} (y|\\mu,\\sigma, \\tau) = \\\\\n& \\qquad \\qquad \\frac{2 \\tau (1 - \\tau) }{\\sigma} \\exp \\left[ - \\frac{2}{\\sigma} \\left[ \\left(1 - \\tau \\right) I(y < \\mu) (\\mu - y) + \\tau I(y > \\mu)(y-\\mu) \\right] \\right]\n\\end{aligned}\\]\n\n\n\ny ~ skew_double_exponential(mu, sigma, tau)\nIncrement target log probability density with skew_double_exponential(y | mu, sigma, tau)\nAvailable since 2.28\n \n\n\n\n\n \n\nreal skew_double_exponential_lpdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential density of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nreal skew_double_exponential_lupdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential density of y given location mu, scale sigma and skewness tau dropping constant additive terms\nAvailable since 2.28\n \n\nreal skew_double_exponential_cdf(reals y | reals mu, reals sigma, reals tau) The skew double exponential cumulative distribution function of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nreal skew_double_exponential_lcdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential cumulative distribution function of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nreal skew_double_exponential_lccdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential complementary cumulative distribution function of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nR skew_double_exponential_rng(reals mu, reals sigma, reals tau) Generate a skew double exponential variate with location mu, scale sigma and skewness tau; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#normal-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#normal-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Normal}(y|\\mu,\\sigma) =\n\\frac{1}{\\sqrt{2 \\pi} \\ \\sigma} \\exp\\left( - \\, \\frac{1}{2} \\left(\\frac{y - \\mu}{\\sigma} \\right)^2 \\right) \\!.\n\\end{equation*}\\]\n\n\n\ny ~ normal(mu, sigma)\nIncrement target log probability density with normal_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal normal_lpdf(reals y | reals mu, reals sigma) The log of the normal density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal normal_lupdf(reals y | reals mu, reals sigma) The log of the normal density of y given location mu and scale sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nreal normal_cdf(reals y | reals mu, reals sigma) The cumulative normal distribution of y given location mu and scale sigma; normal_cdf will underflow to 0 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) below -37.5 and overflow to 1 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) above 8.25; the function Phi_approx is more robust in the tails, but must be scaled and translated for anything other than a standard normal.\nAvailable since 2.0\n \n\nreal normal_lcdf(reals y | reals mu, reals sigma) The log of the cumulative normal distribution of y given location mu and scale sigma; normal_lcdf will underflow to \\(-\\infty\\) for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) below -37.5 and overflow to 0 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) above 8.25; log(Phi_approx(...)) is more robust in the tails, but must be scaled and translated for anything other than a standard normal.\nAvailable since 2.12\n \n\nreal normal_lccdf(reals y | reals mu, reals sigma) The log of the complementary cumulative normal distribution of y given location mu and scale sigma; normal_lccdf will overflow to 0 for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) below -37.5 and underflow to \\(-\\infty\\) for \\(\\frac{{y}-{\\mu}}{{\\sigma}}\\) above 8.25; log1m(Phi_approx(...)) is more robust in the tails, but must be scaled and translated for anything other than a standard normal.\nAvailable since 2.15\n \n\nR normal_rng(reals mu, reals sigma) Generate a normal variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\nThe standard normal distribution is so-called because its parameters are the units for their respective operations—the location (mean) is zero and the scale (standard deviation) one. The standard normal is parameter-free, and the unit parameters allow considerable simplification of the expression for the density. \\[\\begin{equation*}\n\\text{StdNormal}(y) \\ = \\ \\text{Normal}(y \\mid 0, 1) \\ = \\ \\frac{1}{\\sqrt{2 \\pi}} \\, \\exp \\left( \\frac{-y^2}{2} \\right)\\!.\n\\end{equation*}\\] Up to a proportion on the log scale, where Stan computes, \\[\\begin{equation*}\n\\log \\text{Normal}(y \\mid 0, 1) \\ = \\ \\frac{-y^2}{2} + \\text{const}.\n\\end{equation*}\\] With no logarithm, no subtraction, and no division by a parameter, the standard normal log density is much more efficient to compute than the normal log density with constant location \\(0\\) and scale \\(1\\).\n\n\n\ny ~ std_normal()\nIncrement target log probability density with std_normal_lupdf(y).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal std_normal_lpdf(reals y) The standard normal (location zero, scale one) log probability density of y.\nAvailable since 2.18\n \n\nreal std_normal_lupdf(reals y) The standard normal (location zero, scale one) log probability density of y dropping constant additive terms.\nAvailable since 2.25\n \n\nreal std_normal_cdf(reals y) The cumulative standard normal distribution of y; std_normal_cdf will underflow to 0 for \\(y\\) below -37.5 and overflow to 1 for \\(y\\) above 8.25; the function Phi_approx is more robust in the tails.\nAvailable since 2.21\n \n\nreal std_normal_lcdf(reals y) The log of the cumulative standard normal distribution of y; std_normal_lcdf will underflow to \\(-\\infty\\) for \\(y\\) below -37.5 and overflow to 0 for \\(y\\) above 8.25; log(Phi_approx(...)) is more robust in the tails.\nAvailable since 2.21\n \n\nreal std_normal_lccdf(reals y) The log of the complementary cumulative standard normal distribution of y; std_normal_lccdf will overflow to 0 for \\(y\\) below -37.5 and underflow to \\(-\\infty\\) for \\(y\\) above 8.25; log1m(Phi_approx(...)) is more robust in the tails.\nAvailable since 2.21\n \n\nR std_normal_qf(T x) Returns the value of the inverse standard normal cdf \\(\\Phi^{-1}\\) at the specified quantile x. The std_normal_qf is equivalent to the inv_Phi function.\nAvailable since 2.31\n \n\nR std_normal_log_qf(T x) Return the value of the inverse standard normal cdf \\(\\Phi^{-1}\\) evaluated at the log of the specified quantile x. This function is equivalent to std_normal_qf(exp(x)) but is more numerically stable.\nAvailable since 2.31\n \n\nreal std_normal_rng() Generate a normal variate with location zero and scale one; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.21", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#normal-id-glm", + "href": "functions-reference/unbounded_continuous_distributions.html#normal-id-glm", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "Stan also supplies a single function for a generalized linear model with normal distribution and identity link function, i.e. a function for a linear regression. This provides a more efficient implementation of linear regression than a manually written regression in terms of a normal distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m, \\sigma\\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^n\\), \\[\\begin{equation*} \\text{NormalIdGLM}(y|x, \\alpha, \\beta, \\sigma) = \\prod_{1\\leq i\n\\leq n}\\text{Normal}(y_i|\\alpha_i + x_i\\cdot \\beta, \\sigma). \\end{equation*}\\]\n\n\n\ny ~ normal_id_glm(x, alpha, beta, sigma)\nIncrement target log probability density with normal_id_glm_lupdf(y | x, alpha, beta, sigma).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal normal_id_glm_lpdf(real y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(real y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(real y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(real y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(real y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(real y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nreal normal_id_glm_lpdf(real y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(real y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nreal normal_id_glm_lpdf(vector y | row_vector x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(vector y | row_vector x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(vector y | row_vector x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.29\n \n\nreal normal_id_glm_lupdf(vector y | row_vector x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.29\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, real alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.23\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.23\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, vector alpha, vector beta, real sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.23\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.30\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, real alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.30\n \n\nreal normal_id_glm_lpdf(vector y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma.\nAvailable since 2.30\n \n\nreal normal_id_glm_lupdf(vector y | matrix x, vector alpha, vector beta, vector sigma) The log normal probability density of y given location alpha + x * beta and scale sigma dropping constant additive terms.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#exponentially-modified-normal-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#exponentially-modified-normal-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "Exponentially modified Gaussian describes the distribution of \\(Z = X + Y\\) when \\(X\\) and \\(Y\\) are independent and \\(X\\) is normally distributed (with mean \\(\\mu\\) and standard deviation \\(\\sigma\\)) and \\(Y\\) is exponentially distributed (with rate \\(\\lambda\\)).\n\n\nIf \\(\\mu \\in \\mathbb{R}\\), \\(\\sigma \\in \\mathbb{R}^+\\), and \\(\\lambda \\in\n\\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{ExpModNormal}(y|\\mu,\\sigma,\\lambda) = \\frac{\\lambda}{2} \\ \\exp\n\\left(\\frac{\\lambda}{2} \\left(2\\mu + \\lambda \\sigma^2 -\n2y\\right)\\right) \\text{erfc}\\left(\\frac{\\mu + \\lambda\\sigma^2 -\ny}{\\sqrt{2}\\sigma}\\right) . \\end{equation*}\\]\n\n\n\ny ~ exp_mod_normal(mu, sigma, lambda)\nIncrement target log probability density with exp_mod_normal_lupdf(y | mu, sigma, lambda).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal exp_mod_normal_lpdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal density of y given location mu, scale sigma, and rate lambda\nAvailable since 2.18\n \n\nreal exp_mod_normal_lupdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal density of y given location mu, scale sigma, and rate lambda dropping constant additive terms\nAvailable since 2.25\n \n\nreal exp_mod_normal_cdf(reals y | reals mu, reals sigma, reals lambda) The exponentially modified normal cumulative distribution function of y given location mu, scale sigma, and rate lambda\nAvailable since 2.0\n \n\nreal exp_mod_normal_lcdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal cumulative distribution function of y given location mu, scale sigma, and rate lambda\nAvailable since 2.18\n \n\nreal exp_mod_normal_lccdf(reals y | reals mu, reals sigma, reals lambda) The log of the exponentially modified normal complementary cumulative distribution function of y given location mu, scale sigma, and rate lambda\nAvailable since 2.18\n \n\nR exp_mod_normal_rng(reals mu, reals sigma, reals lambda) Generate a exponentially modified normal variate with location mu, scale sigma, and rate lambda; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#skew-normal-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#skew-normal-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\xi \\in \\mathbb{R}\\), \\(\\omega \\in \\mathbb{R}^+\\), and \\(\\alpha \\in\n\\mathbb{R}\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*} \\text{SkewNormal}(y \\mid\n\\xi, \\omega, \\alpha) = \\frac{1}{\\omega\\sqrt{2\\pi}} \\ \\exp\\left( - \\,\n\\frac{1}{2} \\left( \\frac{y - \\xi}{\\omega} \\right)^2\n\\right) \\ \\left(1 + \\text{erf}\\left( \\alpha\\left(\\frac{y -\n\\xi}{\\omega\\sqrt{2}}\\right)\\right)\\right) . \\end{equation*}\\]\n\n\n\ny ~ skew_normal(xi, omega, alpha)\nIncrement target log probability density with skew_normal_lupdf(y | xi, omega, alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal skew_normal_lpdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal density of y given location xi, scale omega, and shape alpha\nAvailable since 2.16\n \n\nreal skew_normal_lupdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal density of y given location xi, scale omega, and shape alpha dropping constant additive terms\nAvailable since 2.25\n \n\nreal skew_normal_cdf(reals y | reals xi, reals omega, reals alpha) The skew normal distribution function of y given location xi, scale omega, and shape alpha\nAvailable since 2.16\n \n\nreal skew_normal_lcdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal cumulative distribution function of y given location xi, scale omega, and shape alpha\nAvailable since 2.18\n \n\nreal skew_normal_lccdf(reals y | reals xi, reals omega, reals alpha) The log of the skew normal complementary cumulative distribution function of y given location xi, scale omega, and shape alpha\nAvailable since 2.18\n \n\nR skew_normal_rng(reals xi, reals omega, real alpha) Generate a skew normal variate with location xi, scale omega, and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#student-t-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#student-t-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\nu \\in \\mathbb{R}^+\\), \\(\\mu \\in \\mathbb{R}\\), and \\(\\sigma \\in\n\\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{StudentT}(y|\\nu,\\mu,\\sigma) = \\frac{\\Gamma\\left((\\nu +\n1)/2\\right)} {\\Gamma(\\nu/2)} \\ \\frac{1}{\\sqrt{\\nu \\pi} \\ \\sigma}\n\\ \\left( 1 + \\frac{1}{\\nu} \\left(\\frac{y - \\mu}{\\sigma}\\right)^2\n\\right)^{-(\\nu + 1)/2} \\! . \\end{equation*}\\]\n\n\n\ny ~ student_t(nu, mu, sigma)\nIncrement target log probability density with student_t_lupdf(y | nu, mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal student_t_lpdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) density of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.12\n \n\nreal student_t_lupdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) density of y given degrees of freedom nu, location mu, and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal student_t_cdf(reals y | reals nu, reals mu, reals sigma) The Student-\\(t\\) cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.0\n \n\nreal student_t_lcdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.12\n \n\nreal student_t_lccdf(reals y | reals nu, reals mu, reals sigma) The log of the Student-\\(t\\) complementary cumulative distribution function of y given degrees of freedom nu, location mu, and scale sigma\nAvailable since 2.12\n \n\nR student_t_rng(reals nu, reals mu, reals sigma) Generate a Student-\\(t\\) variate with degrees of freedom nu, location mu, and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#cauchy-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#cauchy-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Cauchy}(y|\\mu,\\sigma) = \\frac{1}{\\pi \\sigma} \\ \\frac{1}{1 + \\left((y - \\mu)/\\sigma\\right)^2} .\n\\end{equation*}\\]\n\n\n\ny ~ cauchy(mu, sigma)\nIncrement target log probability density with cauchy_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal cauchy_lpdf(reals y | reals mu, reals sigma) The log of the Cauchy density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal cauchy_lupdf(reals y | reals mu, reals sigma) The log of the Cauchy density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal cauchy_cdf(reals y | reals mu, reals sigma) The Cauchy cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal cauchy_lcdf(reals y | reals mu, reals sigma) The log of the Cauchy cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal cauchy_lccdf(reals y | reals mu, reals sigma) The log of the Cauchy complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR cauchy_rng(reals mu, reals sigma) Generate a Cauchy variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#double-exponential-laplace-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#double-exponential-laplace-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}\\), \\[\\begin{equation*} \\text{DoubleExponential}(y|\\mu,\\sigma) =\n\\frac{1}{2\\sigma} \\exp \\left( - \\, \\frac{|y - \\mu|}{\\sigma} \\right)\n. \\end{equation*}\\] Note that the double exponential distribution is parameterized in terms of the scale, in contrast to the exponential distribution (see section exponential distribution), which is parameterized in terms of inverse scale.\nThe double-exponential distribution can be defined as a compound exponential-normal distribution (Ding and Blitzstein 2018). Using the inverse scale parameterization for the exponential distribution, and the standard deviation parameterization for the normal distribution, one can write \\[\\begin{equation*} \\alpha \\sim\n\\mathsf{Exponential}\\left( \\frac{1}{2 \\sigma^2} \\right) \\end{equation*}\\] and \\[\\begin{equation*} \\beta \\mid\n\\alpha \\sim \\mathsf{Normal}(\\mu, \\sqrt{\\alpha}), \\end{equation*}\\] then \\[\\begin{equation*} \\beta \\sim\n\\mathsf{DoubleExponential}(\\mu, \\sigma ). \\end{equation*}\\] This may be used to code a non-centered parameterization by taking \\[\\begin{equation*} \\beta^{\\text{raw}} \\sim\n\\mathsf{Normal}(0, 1) \\end{equation*}\\] and defining \\[\\begin{equation*} \\beta = \\mu + \\sqrt{\\alpha} \\,\n\\beta^{\\text{raw}}. \\end{equation*}\\]\n\n\n\ny ~ double_exponential(mu, sigma)\nIncrement target log probability density with double_exponential_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal double_exponential_lpdf(reals y | reals mu, reals sigma) The log of the double exponential density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal double_exponential_lupdf(reals y | reals mu, reals sigma) The log of the double exponential density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal double_exponential_cdf(reals y | reals mu, reals sigma) The double exponential cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal double_exponential_lcdf(reals y | reals mu, reals sigma) The log of the double exponential cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal double_exponential_lccdf(reals y | reals mu, reals sigma) The log of the double exponential complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR double_exponential_rng(reals mu, reals sigma) Generate a double exponential variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#logistic-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#logistic-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Logistic}(y|\\mu,\\sigma) = \\frac{1}{\\sigma} \\ \\exp\\!\\left( - \\, \\frac{y - \\mu}{\\sigma} \\right)\n\\ \\left(1 + \\exp \\!\\left( - \\, \\frac{y - \\mu}{\\sigma} \\right) \\right)^{\\!-2} \\! .\n\\end{equation*}\\]\n\n\n\ny ~ logistic(mu, sigma)\nIncrement target log probability density with logistic_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal logistic_lpdf(reals y | reals mu, reals sigma) The log of the logistic density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal logistic_lupdf(reals y | reals mu, reals sigma) The log of the logistic density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal logistic_cdf(reals y | reals mu, reals sigma) The logistic cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal logistic_lcdf(reals y | reals mu, reals sigma) The log of the logistic cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal logistic_lccdf(reals y | reals mu, reals sigma) The log of the logistic complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR logistic_rng(reals mu, reals sigma) Generate a logistic variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#gumbel-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#gumbel-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{equation*}\n\\text{Gumbel}(y|\\mu,\\beta) = \\frac{1}{\\beta} \\ \\exp\\left(-\\frac{y-\\mu}{\\beta}-\\exp\\left(-\\frac{y-\\mu}{\\beta}\\right)\\right) .\n\\end{equation*}\\]\n\n\n\ny ~ gumbel(mu, beta)\nIncrement target log probability density with gumbel_lupdf(y | mu, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal gumbel_lpdf(reals y | reals mu, reals beta) The log of the gumbel density of y given location mu and scale beta\nAvailable since 2.12\n \n\nreal gumbel_lupdf(reals y | reals mu, reals beta) The log of the gumbel density of y given location mu and scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal gumbel_cdf(reals y | reals mu, reals beta) The gumbel cumulative distribution function of y given location mu and scale beta\nAvailable since 2.0\n \n\nreal gumbel_lcdf(reals y | reals mu, reals beta) The log of the gumbel cumulative distribution function of y given location mu and scale beta\nAvailable since 2.12\n \n\nreal gumbel_lccdf(reals y | reals mu, reals beta) The log of the gumbel complementary cumulative distribution function of y given location mu and scale beta\nAvailable since 2.12\n \n\nR gumbel_rng(reals mu, reals beta) Generate a gumbel variate with location mu and scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_continuous_distributions.html#skew-double-exponential-distribution", + "href": "functions-reference/unbounded_continuous_distributions.html#skew-double-exponential-distribution", + "title": "Unbounded Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\), \\(\\sigma \\in \\mathbb{R}^+\\) and \\(\\tau \\in [0, 1]\\), then for \\(y \\in \\mathbb{R}\\), \\[\\begin{aligned}\n& \\text{SkewDoubleExponential} (y|\\mu,\\sigma, \\tau) = \\\\\n& \\qquad \\qquad \\frac{2 \\tau (1 - \\tau) }{\\sigma} \\exp \\left[ - \\frac{2}{\\sigma} \\left[ \\left(1 - \\tau \\right) I(y < \\mu) (\\mu - y) + \\tau I(y > \\mu)(y-\\mu) \\right] \\right]\n\\end{aligned}\\]\n\n\n\ny ~ skew_double_exponential(mu, sigma, tau)\nIncrement target log probability density with skew_double_exponential(y | mu, sigma, tau)\nAvailable since 2.28\n \n\n\n\n\n \n\nreal skew_double_exponential_lpdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential density of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nreal skew_double_exponential_lupdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential density of y given location mu, scale sigma and skewness tau dropping constant additive terms\nAvailable since 2.28\n \n\nreal skew_double_exponential_cdf(reals y | reals mu, reals sigma, reals tau) The skew double exponential cumulative distribution function of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nreal skew_double_exponential_lcdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential cumulative distribution function of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nreal skew_double_exponential_lccdf(reals y | reals mu, reals sigma, reals tau) The log of the skew double exponential complementary cumulative distribution function of y given location mu, scale sigma and skewness tau\nAvailable since 2.28\n \n\nR skew_double_exponential_rng(reals mu, reals sigma, reals tau) Generate a skew double exponential variate with location mu, scale sigma and skewness tau; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Unbounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/sparse_matrix_operations.html", + "href": "functions-reference/sparse_matrix_operations.html", + "title": "Sparse Matrix Operations", + "section": "", + "text": "For sparse matrices, for which many elements are zero, it is more efficient to use specialized representations to save memory and speed up matrix arithmetic (including derivative calculations). Given Stan’s implementation, there is substantial space (memory) savings by using sparse matrices. Because of the ease of optimizing dense matrix operations, speed improvements only arise at 90% or even greater sparsity; below that level, dense matrices are faster but use more memory.\nBecause of this speedup and space savings, it may even be useful to read in a dense matrix and convert it to a sparse matrix before multiplying it by a vector. This chapter covers a very specific form of sparsity consisting of a sparse matrix multiplied by a dense vector.\n\n\nSparse matrices are represented in Stan using compressed row storage (CSR). For example, the matrix \\[\\begin{equation*}\nA = \\begin{bmatrix} 19 & 27 & 0 & 0 \\\\ 0 & 0 & 0 & 0 \\\\ 0 & 0 & 0 & 52 \\\\ 81 & 0 & 95 & 33 \\end{bmatrix}\n\\end{equation*}\\] is translated into a vector of the non-zero real values, read by row from the matrix \\(A\\), \\[\\begin{equation*}\nw(A) = \\begin{bmatrix} 19 & 27 & 52 & 81 & 95 & 33 \\end{bmatrix}^{\\top} \\! \\! \\! ,\n\\end{equation*}\\] an array of integer column indices for the values, \\[\\begin{equation*}\nv(A) = \\begin{bmatrix} 1 & 2 & 4 & 1 & 3 & 4 \\end{bmatrix} \\! ,\n\\end{equation*}\\] and an array of integer indices indicating where in \\(w(A)\\) a given row’s values start, \\[\\begin{equation*} u(A) = \\begin{bmatrix} 1 & 3 & 3 & 4 & 7 \\end{bmatrix} \\! ,\n\\end{equation*}\\] with a padded value at the end to guarantee that \\[\\begin{equation*} u(A)[n+1] - u(A)[n] \\end{equation*}\\] is the number of non-zero elements in row \\(n\\) of the matrix (here \\(2\\), \\(0\\), \\(1\\), and \\(3\\)). Note that because the second row has no non-zero elements both the second and third elements of \\(u(A)\\) correspond to the third element of \\(w(A)\\), which is \\(52\\). The values \\((w(A), \\, v(A), \\, u(A))\\) are sufficient to reconstruct \\(A\\).\nThe values are structured so that there is a real value and integer column index for each non-zero entry in the array, plus one integer for each row of the matrix, plus one for padding. There is also underlying storage for internal container pointers and sizes. The total memory usage is roughly \\(12 K + M\\) bytes plus a small constant overhead, which is often considerably fewer bytes than the \\(M \\times\nN\\) required to store a dense matrix. Even more importantly, zero values do not introduce derivatives under multiplication or addition, so many storage and evaluation steps are saved when sparse matrices are multiplied.\n\n\n\nConversion functions between dense and sparse matrices are provided.\n\n\nConverting a dense matrix \\(m\\) to a sparse representation produces a vector \\(w\\) and two integer arrays, \\(u\\) and \\(v\\).\n \n\nvector csr_extract_w(matrix a) Return non-zero values in matrix a; see section compressed row storage.\nAvailable since 2.8\n \n\narray[] int csr_extract_v(matrix a) Return column indices for values in csr_extract_w(a); see compressed row storage.\nAvailable since 2.8\n \n\narray[] int csr_extract_u(matrix a) Return array of row starting indices for entries in csr_extract_w(a) followed by the size of csr_extract_w(a) plus one; see section compressed row storage.\nAvailable since 2.8\n \n\ntuple(vector, array[] int, array[] int) csr_extract(matrix a) Return all three components of the CSR representation of the matrix a; see section compressed row storage. This function is equivalent to (csr_extract_w(a), csr_extract_v(a), csr_extract_u(a)).\nAvailable since 2.33\n\n\n\nTo convert a sparse matrix representation to a dense matrix, there is a single function.\n \n\nmatrix csr_to_dense_matrix(int m, int n, vector w, array[] int v, array[] int u) Return dense \\(\\text{m} \\times \\text{n}\\) matrix with non-zero matrix entries w, column indices v, and row starting indices u; the vector w and array v must be the same size (corresponding to the total number of nonzero entries in the matrix), array v must have index values bounded by m, array u must have length equal to m + 1 and contain index values bounded by the number of nonzeros (except for the last entry, which must be equal to the number of nonzeros plus one). See section compressed row storage for more details.\nAvailable since 2.10\n\n\n\n\n\n\nThe only supported operation is the multiplication of a sparse matrix \\(A\\) and a dense vector \\(b\\) to produce a dense vector \\(A\\,b\\). Multiplying a dense row vector \\(b\\) and a sparse matrix \\(A\\) can be coded using transposition as \\[\\begin{equation*}\nb \\, A = (A^{\\top} \\, b^{\\top})^{\\top},\n\\end{equation*}\\] but care must be taken to represent \\(A^{\\top}\\) rather than \\(A\\) as a sparse matrix.\n \n\nvector csr_matrix_times_vector(int m, int n, vector w, array[] int v, array[] int u, vector b) Multiply the \\(\\text{m} \\times \\text{n}\\) matrix represented by values w, column indices v, and row start indices u by the vector b; see compressed row storage.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Sparse Matrix Operations" + ] + }, + { + "objectID": "functions-reference/sparse_matrix_operations.html#CSR", + "href": "functions-reference/sparse_matrix_operations.html#CSR", + "title": "Sparse Matrix Operations", + "section": "", + "text": "Sparse matrices are represented in Stan using compressed row storage (CSR). For example, the matrix \\[\\begin{equation*}\nA = \\begin{bmatrix} 19 & 27 & 0 & 0 \\\\ 0 & 0 & 0 & 0 \\\\ 0 & 0 & 0 & 52 \\\\ 81 & 0 & 95 & 33 \\end{bmatrix}\n\\end{equation*}\\] is translated into a vector of the non-zero real values, read by row from the matrix \\(A\\), \\[\\begin{equation*}\nw(A) = \\begin{bmatrix} 19 & 27 & 52 & 81 & 95 & 33 \\end{bmatrix}^{\\top} \\! \\! \\! ,\n\\end{equation*}\\] an array of integer column indices for the values, \\[\\begin{equation*}\nv(A) = \\begin{bmatrix} 1 & 2 & 4 & 1 & 3 & 4 \\end{bmatrix} \\! ,\n\\end{equation*}\\] and an array of integer indices indicating where in \\(w(A)\\) a given row’s values start, \\[\\begin{equation*} u(A) = \\begin{bmatrix} 1 & 3 & 3 & 4 & 7 \\end{bmatrix} \\! ,\n\\end{equation*}\\] with a padded value at the end to guarantee that \\[\\begin{equation*} u(A)[n+1] - u(A)[n] \\end{equation*}\\] is the number of non-zero elements in row \\(n\\) of the matrix (here \\(2\\), \\(0\\), \\(1\\), and \\(3\\)). Note that because the second row has no non-zero elements both the second and third elements of \\(u(A)\\) correspond to the third element of \\(w(A)\\), which is \\(52\\). The values \\((w(A), \\, v(A), \\, u(A))\\) are sufficient to reconstruct \\(A\\).\nThe values are structured so that there is a real value and integer column index for each non-zero entry in the array, plus one integer for each row of the matrix, plus one for padding. There is also underlying storage for internal container pointers and sizes. The total memory usage is roughly \\(12 K + M\\) bytes plus a small constant overhead, which is often considerably fewer bytes than the \\(M \\times\nN\\) required to store a dense matrix. Even more importantly, zero values do not introduce derivatives under multiplication or addition, so many storage and evaluation steps are saved when sparse matrices are multiplied.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Sparse Matrix Operations" + ] + }, + { + "objectID": "functions-reference/sparse_matrix_operations.html#conversion-functions", + "href": "functions-reference/sparse_matrix_operations.html#conversion-functions", + "title": "Sparse Matrix Operations", + "section": "", + "text": "Conversion functions between dense and sparse matrices are provided.\n\n\nConverting a dense matrix \\(m\\) to a sparse representation produces a vector \\(w\\) and two integer arrays, \\(u\\) and \\(v\\).\n \n\nvector csr_extract_w(matrix a) Return non-zero values in matrix a; see section compressed row storage.\nAvailable since 2.8\n \n\narray[] int csr_extract_v(matrix a) Return column indices for values in csr_extract_w(a); see compressed row storage.\nAvailable since 2.8\n \n\narray[] int csr_extract_u(matrix a) Return array of row starting indices for entries in csr_extract_w(a) followed by the size of csr_extract_w(a) plus one; see section compressed row storage.\nAvailable since 2.8\n \n\ntuple(vector, array[] int, array[] int) csr_extract(matrix a) Return all three components of the CSR representation of the matrix a; see section compressed row storage. This function is equivalent to (csr_extract_w(a), csr_extract_v(a), csr_extract_u(a)).\nAvailable since 2.33\n\n\n\nTo convert a sparse matrix representation to a dense matrix, there is a single function.\n \n\nmatrix csr_to_dense_matrix(int m, int n, vector w, array[] int v, array[] int u) Return dense \\(\\text{m} \\times \\text{n}\\) matrix with non-zero matrix entries w, column indices v, and row starting indices u; the vector w and array v must be the same size (corresponding to the total number of nonzero entries in the matrix), array v must have index values bounded by m, array u must have length equal to m + 1 and contain index values bounded by the number of nonzeros (except for the last entry, which must be equal to the number of nonzeros plus one). See section compressed row storage for more details.\nAvailable since 2.10", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Sparse Matrix Operations" + ] + }, + { + "objectID": "functions-reference/sparse_matrix_operations.html#sparse-matrix-arithmetic", + "href": "functions-reference/sparse_matrix_operations.html#sparse-matrix-arithmetic", + "title": "Sparse Matrix Operations", + "section": "", + "text": "The only supported operation is the multiplication of a sparse matrix \\(A\\) and a dense vector \\(b\\) to produce a dense vector \\(A\\,b\\). Multiplying a dense row vector \\(b\\) and a sparse matrix \\(A\\) can be coded using transposition as \\[\\begin{equation*}\nb \\, A = (A^{\\top} \\, b^{\\top})^{\\top},\n\\end{equation*}\\] but care must be taken to represent \\(A^{\\top}\\) rather than \\(A\\) as a sparse matrix.\n \n\nvector csr_matrix_times_vector(int m, int n, vector w, array[] int v, array[] int u, vector b) Multiply the \\(\\text{m} \\times \\text{n}\\) matrix represented by values w, column indices v, and row start indices u by the vector b; see compressed row storage.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Sparse Matrix Operations" + ] + }, + { + "objectID": "functions-reference/removed_functions.html", + "href": "functions-reference/removed_functions.html", + "title": "Removed Functions", + "section": "", + "text": "Functions which once existed in the Stan language and have since been replaced or removed are listed here.\n\n\nRemoved: Currently two non-conforming functions ending in suffix _log.\nReplacement: Replace multiply_log(...) with lmultiply(...). Replace binomial_coefficient_log(...) with lchoose(...).\nRemoved In: Stan 2.33\n\n\n\nRemoved: The built-in no-argument function get_lp() is deprecated.\nReplacement: Use the no-argument function target() instead.\nRemoved In: Stan 2.33\n\n\n\nRemoved: The unary function fabs is deprecated.\nReplacement: Use the unary function abs instead. Note that the return type for abs is different for integer overloads, but this replacement is safe due to Stan’s type promotion rules.\nRemoved In: Stan 2.33\n\n\n\nThese covariance functions have been replaced by those described in Gaussian Process Covariance Functions\nWith magnitude \\(\\alpha\\) and length scale \\(l\\), the exponentiated quadratic kernel is:\n\\[\nk(x_i, x_j) = \\alpha^2 \\exp \\left(-\\dfrac{1}{2\\rho^2} \\sum_{d=1}^D (x_{i,d} - x_{j,d})^2 \\right)\n\\]\n \n\nmatrix cov_exp_quad(row_vectors x, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x.\nAvailable since 2.16, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(vectors x, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x.\nAvailable since 2.16, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(array[] real x, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x.\nAvailable since 2.16, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(row_vectors x1, row_vectors x2, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x1 and x2.\nAvailable since 2.18, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(vectors x1, vectors x2, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x1 and x2.\nAvailable since 2.18, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(array[] real x1, array[] real x2, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x1 and x2.\nAvailable since 2.18, deprecated since 2.20, removed in in 2.33\n\n\n\nRemoved: A nonzero real number (even NaN) was interpreted as true and a zero was interpreted as false.\nReplacement: Explicit x != 0 comparison is preferred instead.\nRemoved In: Stan 2.34", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Removed Functions" + ] + }, + { + "objectID": "functions-reference/removed_functions.html#multiply_log-and-binomial_coefficient_log-functions", + "href": "functions-reference/removed_functions.html#multiply_log-and-binomial_coefficient_log-functions", + "title": "Removed Functions", + "section": "", + "text": "Removed: Currently two non-conforming functions ending in suffix _log.\nReplacement: Replace multiply_log(...) with lmultiply(...). Replace binomial_coefficient_log(...) with lchoose(...).\nRemoved In: Stan 2.33", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Removed Functions" + ] + }, + { + "objectID": "functions-reference/removed_functions.html#get_lp-function", + "href": "functions-reference/removed_functions.html#get_lp-function", + "title": "Removed Functions", + "section": "", + "text": "Removed: The built-in no-argument function get_lp() is deprecated.\nReplacement: Use the no-argument function target() instead.\nRemoved In: Stan 2.33", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Removed Functions" + ] + }, + { + "objectID": "functions-reference/removed_functions.html#fabs-function", + "href": "functions-reference/removed_functions.html#fabs-function", + "title": "Removed Functions", + "section": "", + "text": "Removed: The unary function fabs is deprecated.\nReplacement: Use the unary function abs instead. Note that the return type for abs is different for integer overloads, but this replacement is safe due to Stan’s type promotion rules.\nRemoved In: Stan 2.33", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Removed Functions" + ] + }, + { + "objectID": "functions-reference/removed_functions.html#cov_exp_quad", + "href": "functions-reference/removed_functions.html#cov_exp_quad", + "title": "Removed Functions", + "section": "", + "text": "These covariance functions have been replaced by those described in Gaussian Process Covariance Functions\nWith magnitude \\(\\alpha\\) and length scale \\(l\\), the exponentiated quadratic kernel is:\n\\[\nk(x_i, x_j) = \\alpha^2 \\exp \\left(-\\dfrac{1}{2\\rho^2} \\sum_{d=1}^D (x_{i,d} - x_{j,d})^2 \\right)\n\\]\n \n\nmatrix cov_exp_quad(row_vectors x, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x.\nAvailable since 2.16, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(vectors x, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x.\nAvailable since 2.16, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(array[] real x, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x.\nAvailable since 2.16, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(row_vectors x1, row_vectors x2, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x1 and x2.\nAvailable since 2.18, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(vectors x1, vectors x2, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x1 and x2.\nAvailable since 2.18, deprecated since 2.20, removed in in 2.33\n \n\nmatrix cov_exp_quad(array[] real x1, array[] real x2, real alpha, real rho) The covariance matrix with an exponentiated quadratic kernel of x1 and x2.\nAvailable since 2.18, deprecated since 2.20, removed in in 2.33", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Removed Functions" + ] + }, + { + "objectID": "functions-reference/removed_functions.html#real-arguments-to-logical-operators-operator-operator-and-operator", + "href": "functions-reference/removed_functions.html#real-arguments-to-logical-operators-operator-operator-and-operator", + "title": "Removed Functions", + "section": "", + "text": "Removed: A nonzero real number (even NaN) was interpreted as true and a zero was interpreted as false.\nReplacement: Explicit x != 0 comparison is preferred instead.\nRemoved In: Stan 2.34", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Removed Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html", + "href": "functions-reference/real-valued_basic_functions.html", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "This chapter describes built-in functions that take zero or more real or integer arguments and return real values.\n\n\nAlthough listed in this chapter, many of Stan’s built-in functions are vectorized so that they may be applied to any argument type. The vectorized form of these functions is not any faster than writing an explicit loop that iterates over the elements applying the function—it’s just easier to read and write and less error prone.\n\n\nMany of Stan’s unary functions can be applied to any argument type. For example, the exponential function, exp, can be applied to real arguments or arrays of real arguments. Other than for integer arguments, the result type is the same as the argument type, including dimensionality and size. Integer arguments are first promoted to real values, but the result will still have the same dimensionality and size as the argument.\n\n\nWhen applied to a simple real value, the result is a real value. When applied to arrays, vectorized functions like exp() are defined elementwise. For example,\n // declare some variables for arguments\n real x0;\n array[5] real x1;\n array[4, 7] real x2;\n // ...\n // declare some variables for results\n real y0;\n array[5] real y1;\n array[4, 7] real y2;\n // ...\n // calculate and assign results\n y0 = exp(x0);\n y1 = exp(x1);\n y2 = exp(x2);\nWhen exp is applied to an array, it applies elementwise. For example, the statement above,\n y2 = exp(x2);\nproduces the same result for y2 as the explicit loop\nfor (i in 1:4) {\n for (j in 1:7) {\n y2[i, j] = exp(x2[i, j]);\n }\n}\n\n\n\nVectorized functions also apply elementwise to vectors and matrices. For example,\n vector[5] xv;\n row_vector[7] xrv;\n matrix[10, 20] xm;\n\n vector[5] yv;\n row_vector[7] yrv;\n matrix[10, 20] ym;\n\n yv = exp(xv);\n yrv = exp(xrv);\n ym = exp(xm);\nArrays of vectors and matrices work the same way. For example,\n array[12] matrix[17, 93] u;\n\n array[12] matrix[17, 93] z;\n\n z = exp(u);\nAfter this has been executed, z[i, j, k] will be equal to exp(u[i, j, k]).\n\n\n\nInteger arguments are promoted to real values in vectorized unary functions. Thus if n is of type int, exp(n) is of type real. Arrays work the same way, so that if n2 is a one dimensional array of integers, then exp(n2) will be a one-dimensional array of reals with the same number of elements as n2. For example,\n array[23] int n1;\n array[23] real z1;\n z1 = exp(n1);\nIt would be illegal to try to assign exp(n1) to an array of integers; the return type is a real array.\n\n\n\n\nLike the unary functions, many of Stan’s binary functions have been vectorized, and can be applied elementwise to combinations of both scalars or container types.\n\n\nWhen applied to two scalar values, the result is a scalar value. When applied to two arrays, or combination of a scalar value and an array, vectorized functions like pow() are defined elementwise. For example,\n // declare some variables for arguments\n real x00;\n real x01;\n array[5] real x10;\n array[5]real x11;\n array[4, 7] real x20;\n array[4, 7] real x21;\n // ...\n // declare some variables for results\n real y0;\n array[5] real y1;\n array[4, 7] real y2;\n // ...\n // calculate and assign results\n y0 = pow(x00, x01);\n y1 = pow(x10, x11);\n y2 = pow(x20, x21);\nWhen pow is applied to two arrays, it applies elementwise. For example, the statement above,\n y2 = pow(x20, x21);\nproduces the same result for y2 as the explicit loop\nfor (i in 1:4) {\n for (j in 1:7) {\n y2[i, j] = pow(x20[i, j], x21[i, j]);\n }\n}\nAlternatively, if a combination of an array and a scalar are provided, the scalar value is broadcast to be applied to each value of the array. For example, the following statement:\ny2 = pow(x20, x00);\nproduces the same result for y2 as the explicit loop:\nfor (i in 1:4) {\n for (j in 1:7) {\n y2[i, j] = pow(x20[i, j], x00);\n }\n}\n\n\n\nVectorized binary functions also apply elementwise to vectors and matrices, and to combinations of these with scalar values. For example,\n real x00;\n vector[5] xv00;\n vector[5] xv01;\n row_vector[7] xrv;\n matrix[10, 20] xm;\n\n vector[5] yv;\n row_vector[7] yrv;\n matrix[10, 20] ym;\n\n yv = pow(xv00, xv01);\n yrv = pow(xrv, x00);\n ym = pow(x00, xm);\nArrays of vectors and matrices work the same way. For example,\n array[12] matrix[17, 93] u;\n\n array[12] matrix[17, 93] z;\n\n z = pow(u, x00);\nAfter this has been executed, z[i, j, k] will be equal to pow(u[i, j, k], x00).\n\n\n\nVectorised binary functions require that both inputs, unless one is a real, be containers of the same type and size. For example, the following statements are legal:\n vector[5] xv;\n row_vector[7] xrv;\n matrix[10, 20] xm;\n\n vector[5] yv = pow(xv, xv)\n row_vector[7] yrv = pow(xrv, xrv)\n matrix[10, 20] = pow(xm, xm)\nBut the following statements are not:\n vector[5] xv;\n vector[7] xv2;\n row_vector[5] xrv;\n\n // Cannot mix different types\n vector[5] yv = pow(xv, xrv)\n\n // Cannot mix different sizes of the same type\n vector[5] yv = pow(xv, xv2)\nWhile the vectorized binary functions generally require the same input types, the only exception to this is for binary functions that require one input to be an integer and the other to be a real (e.g., bessel_first_kind). For these functions, one argument can be a container of any type while the other can be an integer array, as long as the dimensions of both are the same. For example, the following statements are legal:\n vector[5] xv;\n matrix[5, 5] xm;\n array[5] int xi;\n array[5, 5] int xii;\n\n vector[5] yv = bessel_first_kind(xi, xv);\n matrix[5, 5] ym = bessel_first_kind(xii, xm);\nWhereas these are not:\n vector[5] xv;\n matrix[5, 5] xm;\n array[7] int xi;\n\n // Dimensions of containers do not match\n vector[5] yv = bessel_first_kind(xi, xv);\n\n // Function requires first argument be an integer type\n matrix[5, 5] ym = bessel_first_kind(xm, xm);\n\n\n\n\n\nConstants are represented as functions with no arguments and must be called as such. For instance, the mathematical constant \\(\\pi\\) must be written in a Stan program as pi().\n \n\nreal pi() \\(\\pi\\), the ratio of a circle’s circumference to its diameter\nAvailable since 2.0\n \n\nreal e() \\(e\\), the base of the natural logarithm\nAvailable since 2.0\n \n\nreal sqrt2() The square root of 2\nAvailable since 2.0\n \n\nreal log2() The natural logarithm of 2\nAvailable since 2.0\n \n\nreal log10() The natural logarithm of 10\nAvailable since 2.0\n\n\n\n \n\nreal not_a_number() Not-a-number, a special non-finite real value returned to signal an error\nAvailable since 2.0\n \n\nreal positive_infinity() Positive infinity, a special non-finite real value larger than all finite numbers\nAvailable since 2.0\n \n\nreal negative_infinity() Negative infinity, a special non-finite real value smaller than all finite numbers\nAvailable since 2.0\n \n\nreal machine_precision() The smallest number \\(x\\) such that \\((x + 1) \\neq 1\\) in floating-point arithmetic on the current hardware platform\nAvailable since 2.0\n\n\n\nThe basic purpose of a Stan program is to compute a log probability function and its derivatives. The log probability function in a Stan model outputs the log density on the unconstrained scale. A log probability accumulator starts at zero and is then incremented in various ways by a Stan program. The variables are first transformed from unconstrained to constrained, and the log Jacobian determinant added to the log probability accumulator. Then the model block is executed on the constrained parameters, with each sampling statement (~) and log probability increment statement (increment_log_prob) adding to the accumulator. At the end of the model block execution, the value of the log probability accumulator is the log probability value returned by the Stan program.\nStan provides a special built-in function target() that takes no arguments and returns the current value of the log probability accumulator. This function is primarily useful for debugging purposes, where for instance, it may be used with a print statement to display the log probability accumulator at various stages of execution to see where it becomes ill defined.\n \n\nreal target() Return the current value of the log probability accumulator.\nAvailable since 2.10\ntarget acts like a function ending in _lp, meaning that it may only may only be used in the model block.\n\n\n\nLike C++, BUGS, and R, Stan uses 0 to encode false, and 1 to encode true. Stan supports the usual boolean comparison operations and boolean operators. These all have the same syntax and precedence as in C++; for the full list of operators and precedences, see the reference manual.\n\n\nAll comparison operators return boolean values, either 0 or 1. Each operator has two signatures, one for integer comparisons and one for floating-point comparisons. Comparing an integer and real value is carried out by first promoting the integer value.\n \n\nint operator<(int x, int y)\n \n\nint operator<(real x, real y) Return 1 if x is less than y and 0 otherwise. \\[\\begin{equation*} \\text{operator<}(x,y)\n= \\begin{cases} 1 & \\text{if $x < y$} \\\\ 0 & \\text{otherwise}\n\\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator<=(int x, int y)\n \n\nint operator<=(real x, real y) Return 1 if x is less than or equal y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator<=}(x,y) = \\begin{cases} 1 & \\text{if $x \\leq y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator>(int x, int y)\n \n\nint operator>(real x, real y) Return 1 if x is greater than y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator>}(x,y) = \\begin{cases} 1 & \\text{if $x > y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator>=(int x, int y)\n \n\nint operator>=(real x, real y) Return 1 if x is greater than or equal to y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator>=}(x,y) = \\begin{cases} 1 & \\text{if $x \\geq y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator==(int x, int y)\n \n\nint operator==(real x, real y) Return 1 if x is equal to y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator==}(x,y) = \\begin{cases} 1 & \\text{if $x = y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator!=(int x, int y)\n \n\nint operator!=(real x, real y) Return 1 if x is not equal to y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator!=}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq y$} \\\\ 0 &\n\\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n\n\n\nBoolean operators return either 0 for false or 1 for true. Inputs may be any real or integer values, with non-zero values being treated as true and zero values treated as false. These operators have the usual precedences, with negation (not) binding the most tightly, conjunction the next and disjunction the weakest; all of the operators bind more tightly than the comparisons. Thus an expression such as !a && b is interpreted as (!a) && b, and a < b || c >= d && e != f as (a < b) || (((c >= d) && (e != f))).\n \n\nint operator!(int x) Return 1 if x is zero and 0 otherwise. \\[\\begin{equation*} \\text{operator!}(x) =\n\\begin{cases} 0 & \\text{if $x \\neq 0$} \\\\ 1 & \\text{if $x = 0$}\n\\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator!(real x) Return 1 if x is zero and 0 otherwise. \\[\\begin{equation*} \\text{operator!}(x) =\n\\begin{cases} 0 & \\text{if $x \\neq 0.0$} \\\\ 1 & \\text{if $x = 0.0$}\n\\end{cases} \\end{equation*}\\] deprecated; - use operator== instead.\nAvailable since 2.0, deprecated in 2.31\n \n\nint operator&&(int x, int y)\nReturn 1 if x is unequal to 0 and y is unequal to 0. \\[\\begin{equation*}\n\\mathrm{operator\\&\\&}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0$}\n\\text{ and } y \\neq 0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator&&(real x, real y) Return 1 if x is unequal to 0.0 and y is unequal to 0.0. \\[\\begin{equation*}\n\\mathrm{operator\\&\\&}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0.0$}\n\\text{ and } y \\neq 0.0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\] deprecated\nAvailable since 2.0, deprecated in 2.31\n \n\nint operator||(int x, int y) Return 1 if x is unequal to 0 or y is unequal to 0. \\[\\begin{equation*}\n\\text{operator||}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0$}\n\\textrm{ or } y \\neq 0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator||(real x, real y) Return 1 if x is unequal to 0.0 or y is unequal to 0.0. \\[\\begin{equation*}\n\\text{operator||}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0.0$}\n\\textrm{ or } y \\neq 0.0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\] deprecated\nAvailable since 2.0, deprecated in 2.31\n\n\nLike in C++, the boolean operators && and || and are implemented to short circuit directly to a return value after evaluating the first argument if it is sufficient to resolve the result. In evaluating a || b, if a evaluates to a value other than zero, the expression returns the value 1 without evaluating the expression b. Similarly, evaluating a && b first evaluates a, and if the result is zero, returns 0 without evaluating b.\n\n\n\n\nThe logical functions introduce conditional behavior functionally and are primarily provided for compatibility with BUGS and JAGS.\n \n\nreal step(real x) Return 1 if x is positive and 0 otherwise. \\[\\begin{equation*} \\text{step}(x) =\n\\begin{cases} 0 & \\text{if } x < 0 \\\\ 1 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\] Warning: int_step(0) and int_step(NaN) return 0 whereas step(0) and step(NaN) return 1.\nThe step function is often used in BUGS to perform conditional operations. For instance, step(a-b) evaluates to 1 if a is greater than b and evaluates to 0 otherwise. step is a step-like functions; see the warning in section step functions applied to expressions dependent on parameters.\nAvailable since 2.0\n \n\nint is_inf(real x) Return 1 if x is infinite (positive or negative) and 0 otherwise.\nAvailable since 2.5\n \n\nint is_nan(real x) Return 1 if x is NaN and 0 otherwise.\nAvailable since 2.5\nCare must be taken because both of these indicator functions are step-like and thus can cause discontinuities in gradients when applied to parameters; see section step-like functions for details.\n\n\n\n\nThe arithmetic operators are presented using C++ notation. For instance operator+(x,y) refers to the binary addition operator and operator-(x) to the unary negation operator. In Stan programs, these are written using the usual infix and prefix notations as x + y and -x, respectively.\n\n\n \n\nreal operator+(real x, real y) Return the sum of x and y. \\[\\begin{equation*} (x + y) = \\text{operator+}(x,y) = x+y \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator-(real x, real y) Return the difference between x and y. \\[\\begin{equation*} (x - y) =\n\\text{operator-}(x,y) = x - y \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator*(real x, real y) Return the product of x and y. \\[\\begin{equation*} (x * y) = \\text{operator*}(x,y) = xy\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator/(real x, real y) Return the quotient of x and y. \\[\\begin{equation*} (x / y) = \\text{operator/}(x,y) =\n\\frac{x}{y} \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator^(real x, real y) Return x raised to the power of y. \\[\\begin{equation*} (x^\\mathrm{\\wedge}y) =\n\\text{operator}^\\mathrm{\\wedge}(x,y) = x^y \\end{equation*}\\]\nAvailable since 2.5\n\n\n\n \n\nreal operator-(real x) Return the negation of the subtrahend x. \\[\\begin{equation*} \\text{operator-}(x) = (-x)\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of reals, -x is the same shape array where each individual number is negated.\nAvailable since 2.31\n \n\nreal operator+(real x) Return the value of x. \\[\\begin{equation*} \\text{operator+}(x) = x \\end{equation*}\\]\nAvailable since 2.0\n\n\n\n\nWarning: These functions can seriously hinder sampling and optimization efficiency for gradient-based methods (e.g., NUTS, HMC, BFGS) if applied to parameters (including transformed parameters and local variables in the transformed parameters or model block). The problem is that they break gradients due to discontinuities coupled with zero gradients elsewhere. They do not hinder sampling when used in the data, transformed data, or generated quantities blocks.\n\n\n \n\nT abs(T x) The absolute value of x.\nThis function works elementwise over containers such as vectors. Given a type T which is real vector, row_vector, matrix, or an array of those types, abs returns the same type where each element has had its absolute value taken.\nAvailable since 2.0, vectorized in 2.30\n \n\nreal fdim(real x, real y) Return the positive difference between x and y, which is x - y if x is greater than y and 0 otherwise; see warning above. \\[\\begin{equation*} \\text{fdim}(x,y) = \\begin{cases} x-y &\n\\text{if } x \\geq y \\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR fdim(T1 x, T2 y) Vectorized implementation of the fdim function\nAvailable since 2.25\n\n\n\n \n\nreal fmin(real x, real y) Return the minimum of x and y; see warning above. \\[\\begin{equation*} \\text{fmin}(x,y) = \\begin{cases} x &\n\\text{if } x \\leq y \\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR fmin(T1 x, T2 y) Vectorized implementation of the fmin function\nAvailable since 2.25\n \n\nreal fmax(real x, real y) Return the maximum of x and y; see warning above. \\[\\begin{equation*} \\text{fmax}(x,y) = \\begin{cases} x &\n\\text{if } x \\geq y \\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR fmax(T1 x, T2 y) Vectorized implementation of the fmax function\nAvailable since 2.25\n\n\n\n \n\nreal fmod(real x, real y) Return the real value remainder after dividing x by y; see warning above. \\[\\begin{equation*} \\text{fmod}(x,y) = x - \\left\\lfloor \\frac{x}{y} \\right\\rfloor \\, y \\end{equation*}\\] The operator \\(\\lfloor u \\rfloor\\) is the floor operation; see below.\nAvailable since 2.0\n \n\nR fmod(T1 x, T2 y) Vectorized implementation of the fmod function\nAvailable since 2.25\n\n\n\nWarning: Rounding functions convert real values to integers. Because the output is an integer, any gradient information resulting from functions applied to the integer is not passed to the real value it was derived from. With MCMC sampling using HMC or NUTS, the MCMC acceptance procedure will correct for any error due to poor gradient calculations, but the result is likely to be reduced acceptance probabilities and less efficient sampling.\nThe rounding functions cannot be used as indices to arrays because they return real values. For operations over data or in the generated quantities block, the to_int() function can be used.\n \n\nR floor(T x) The floor of x, which is the largest integer less than or equal to x, converted to a real value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n \n\nR ceil(T x) The ceiling of x, which is the smallest integer greater than or equal to x, converted to a real value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n \n\nR round(T x) The nearest integer to x, converted to a real value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n \n\nR trunc(T x) The integer nearest to but no larger in magnitude than x, converted to a double value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n\n\n\n\n \n\nR sqrt(T x) The square root of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR cbrt(T x) The cube root of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR square(T x) The square of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR exp(T x) The natural exponential of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR exp2(T x) The base-2 exponential of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log(T x) The natural logarithm of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log2(T x) The base-2 logarithm of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log10(T x) The base-10 logarithm of x\nAvailable since 2.0, vectorized in 2.13\n \n\nreal pow(real x, real y) Return x raised to the power of y. \\[\\begin{equation*} \\text{pow}(x,y) = x^y \\end{equation*}\\]\nAvailable since 2.0\n \n\nR pow(T1 x, T2 y) Vectorized implementation of the pow function\nAvailable since 2.25\n \n\nR inv(T x) The inverse of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_sqrt(T x) The inverse of the square root of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_square(T x) The inverse of the square of x\nAvailable since 2.0, vectorized in 2.13\n\n\n\n \n\nreal hypot(real x, real y) Return the length of the hypotenuse of a right triangle with sides of length x and y. \\[\\begin{equation*} \\text{hypot}(x,y) = \\begin{cases} \\sqrt{x^2+y^2} &\n\\text{if } x,y\\geq 0 \\\\ \\textrm{NaN} & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR hypot(T1 x, T2 y) Vectorized implementation of the hypot function\nAvailable since 2.25\n \n\nR cos(T x) The cosine of the angle x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR sin(T x) The sine of the angle x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR tan(T x) The tangent of the angle x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR acos(T x) The principal arc (inverse) cosine (in radians) of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR asin(T x) The principal arc (inverse) sine (in radians) of x\nAvailable since 2.0\n \n\nR atan(T x) The principal arc (inverse) tangent (in radians) of x, with values from \\(-\\pi/2\\) to \\(\\pi/2\\)\nAvailable since 2.0, vectorized in 2.13\n \n\nR atan2(T y, T x) Return the principal arc (inverse) tangent (in radians) of y divided by x, \\[\\begin{equation*} \\text{atan2}(y, x) = \\arctan\\left(\\frac{y}{x}\\right) \\end{equation*}\\]\nAvailable since 2.0, vectorized in 2.34\n\n\n\n \n\nR cosh(T x) The hyperbolic cosine of x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR sinh(T x) The hyperbolic sine of x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR tanh(T x) The hyperbolic tangent of x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR acosh(T x) The inverse hyperbolic cosine (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR asinh(T x) The inverse hyperbolic cosine (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR atanh(T x) The inverse hyperbolic tangent (in radians) of x\nAvailable since 2.0, vectorized in 2.13\n\n\n\nThe following functions are commonly used as link functions in generalized linear models. The function \\(\\Phi\\) is also commonly used as a link function (see section probability-related functions).\n \n\nR logit(T x) The log odds, or logit, function applied to x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_logit(T x) The logistic sigmoid function applied to x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_cloglog(T x) The inverse of the complementary log-log function applied to x\nAvailable since 2.0, vectorized in 2.13\n\n\n\n\n\nThe error function erf is related to the standard normal cumulative distribution function \\(\\Phi\\) by scaling. See section normal distribution for the general normal cumulative distribution function (and its complement).\n \n\nR erf(T x) The error function, also known as the Gauss error function, of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR erfc(T x) The complementary error function of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_erfc(T x) The inverse of the complementary error function of x\nAvailable since 2.29, vectorized in 2.29\n \n\nR Phi(T x) The standard normal cumulative distribution function of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_Phi(T x) Return the value of the inverse standard normal cdf \\(\\Phi^{-1}\\) at the specified quantile x. The details of the algorithm can be found in (Wichura 1988). Quantile arguments below 1e-16 are untested; quantiles above 0.999999999 result in increasingly large errors.\nAvailable since 2.0, vectorized in 2.13\n \n\nR Phi_approx(T x) The fast approximation of the unit (may replace Phi for probit regression with maximum absolute error of 0.00014, see (Bowling et al. 2009) for details)\nAvailable since 2.0, vectorized in 2.13\n\n\n\n \n\nreal binary_log_loss(int y, real y_hat) Return the log loss function for for predicting \\(\\hat{y} \\in [0,1]\\) for boolean outcome \\(y \\in \\{0,1\\}\\). \\[\\begin{equation*}\n\\mathrm{binary\\_log\\_loss}(y,\\hat{y}) = \\begin{cases} -\\log \\hat{y} &\n\\text{if } y = 1\\\\ -\\log (1 - \\hat{y}) & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR binary_log_loss(T1 x, T2 y) Vectorized implementation of the binary_log_loss function\nAvailable since 2.25\n \n\nreal owens_t(real h, real a) Return the Owen’s T function for the probability of the event \\(X > h\\) and \\(0<Y<aX\\) where X and Y are independent standard normal random variables. \\[\\begin{equation*} \\mathrm{owens\\_t}(h,a) = \\frac{1}{2\\pi} \\int_0^a\n\\frac{\\exp(-\\frac{1}{2}h^2(1+x^2))}{1+x^2}dx \\end{equation*}\\]\nAvailable since 2.25\n \n\nR owens_t(T1 x, T2 y) Vectorized implementation of the owens_t function\nAvailable since 2.25\n\n\n\n\n \n\nreal beta(real alpha, real beta) Return the beta function applied to alpha and beta. The beta function, \\(\\text{B}(\\alpha,\\beta)\\), computes the normalizing constant for the beta distribution, and is defined for \\(\\alpha > 0\\) and \\(\\beta > 0\\). See section appendix for definition of \\(\\text{B}(\\alpha, \\beta)\\).\nAvailable since 2.25\n \n\nR beta(T1 x, T2 y) Vectorized implementation of the beta function\nAvailable since 2.25\n \n\nreal inc_beta(real alpha, real beta, real x) Return the regularized incomplete beta function up to x applied to alpha and beta. See section appendix for a definition.\nAvailable since 2.10\n \n\nreal inv_inc_beta(real alpha, real beta, real p) Return the inverse of the regularized incomplete beta function. The return value x is the value that solves p = inc_beta(alpha, beta, x). See section appendix for a definition of the inc_beta.\nAvailable since 2.30\n \n\nreal lbeta(real alpha, real beta) Return the natural logarithm of the beta function applied to alpha and beta. The beta function, \\(\\text{B}(\\alpha,\\beta)\\), computes the normalizing constant for the beta distribution, and is defined for \\(\\alpha > 0\\) and \\(\\beta > 0\\). \\[\\begin{equation*}\n\\text{lbeta}(\\alpha,\\beta) = \\log \\Gamma(\\alpha) + \\log \\Gamma(\\beta) - \\log \\Gamma(\\alpha+\\beta)\n\\end{equation*}\\] See section appendix for definition of \\(\\text{B}(\\alpha, \\beta)\\).\nAvailable since 2.0\n \n\nR lbeta(T1 x, T2 y) Vectorized implementation of the lbeta function\nAvailable since 2.25\n \n\nR tgamma(T x) The gamma function applied to x. The gamma function is the generalization of the factorial function to continuous variables, defined so that \\(\\Gamma(n+1) = n!\\). See for a full definition of \\(\\Gamma(x)\\). The function is defined for positive numbers and non-integral negative numbers,\nAvailable since 2.0, vectorized in 2.13\n \n\nR lgamma(T x) The natural logarithm of the gamma function applied to x,\nAvailable since 2.0, vectorized in 2.15\n \n\nR digamma(T x) The digamma function applied to x. The digamma function is the derivative of the natural logarithm of the Gamma function. The function is defined for positive numbers and non-integral negative numbers\nAvailable since 2.0, vectorized in 2.13\n \n\nR trigamma(T x) The trigamma function applied to x. The trigamma function is the second derivative of the natural logarithm of the Gamma function\nAvailable since 2.0, vectorized in 2.13\n \n\nreal lmgamma(int n, real x) Return the natural logarithm of the multivariate gamma function \\(\\Gamma_n\\) with n dimensions applied to x. \\[\\begin{equation*}\n\\text{lmgamma}(n,x) =\n\\begin{cases} \\frac{n(n-1)}{4} \\log \\pi + \\sum_{j=1}^n \\log \\Gamma\\left(x + \\frac{1 - j}{2}\\right)\n& \\text{if } x\\not\\in \\{\\dots,-3,-2,-1,0\\}\\\\ \\textrm{error} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR lmgamma(T1 x, T2 y) Vectorized implementation of the lmgamma function\nAvailable since 2.25\n \n\nreal gamma_p(real a, real z) Return the normalized lower incomplete gamma function of a and z defined for positive a and nonnegative z. \\[\\begin{equation*}\n\\mathrm{gamma\\_p}(a,z) =\n\\begin{cases} \\frac{1}{\\Gamma(a)}\\int_0^zt^{a-1}e^{-t}dt &\n\\text{if } a > 0, z \\geq 0 \\\\ \\textrm{error} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR gamma_p(T1 x, T2 y) Vectorized implementation of the gamma_p function\nAvailable since 2.25\n \n\nreal gamma_q(real a, real z) Return the normalized upper incomplete gamma function of a and z defined for positive a and nonnegative z. \\[\\begin{equation*}\n\\mathrm{gamma\\_q}(a,z) =\n\\begin{cases} \\frac{1}{\\Gamma(a)}\\int_z^\\infty t^{a-1}e^{-t}dt &\n\\text{if } a > 0, z \\geq 0 \\\\[6pt] \\textrm{error} & \\text{otherwise}\n\\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR gamma_q(T1 x, T2 y) Vectorized implementation of the gamma_q function\nAvailable since 2.25\n \n\nint choose(int x, int y) Return the binomial coefficient of x and y. For non-negative integer inputs, the binomial coefficient function is written as \\(\\binom{x}{y}\\) and pronounced “x choose y.” In its the antilog of the lchoose function but returns an integer rather than a real number with no non-zero decimal places. For \\(0 \\leq y \\leq x\\), the binomial coefficient function can be defined via the factorial function \\[\\begin{equation*}\n\\text{choose}(x,y) = \\frac{x!}{\\left(y!\\right)\\left(x - y\\right)!}.\n\\end{equation*}\\]\nAvailable since 2.14\n \n\nR choose(T1 x, T2 y) Vectorized implementation of the choose function\nAvailable since 2.25\n \n\nreal bessel_first_kind(int v, real x) Return the Bessel function of the first kind with order v applied to x. \\[\\begin{equation*}\n\\mathrm{bessel\\_first\\_kind}(v,x) = J_v(x),\n\\end{equation*}\\] where \\[\\begin{equation*}\nJ_v(x)=\\left(\\frac{1}{2}x\\right)^v \\sum_{k=0}^\\infty\n\\frac{\\left(-\\frac{1}{4}x^2\\right)^k}{k!\\, \\Gamma(v+k+1)}\n\\end{equation*}\\]\nAvailable since 2.5\n \n\nR bessel_first_kind(T1 x, T2 y) Vectorized implementation of the bessel_first_kind function\nAvailable since 2.25\n \n\nreal bessel_second_kind(int v, real x) Return the Bessel function of the second kind with order v applied to x defined for positive x and v. For \\(x,v > 0\\), \\[\\begin{equation*}\n\\mathrm{bessel\\_second\\_kind}(v,x) =\n\\begin{cases} Y_v(x) & \\text{if } x > 0 \\\\ \\textrm{error} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*}\nY_v(x)=\\frac{J_v(x)\\cos(v\\pi)-J_{-v}(x)}{\\sin(v\\pi)}\n\\end{equation*}\\]\nAvailable since 2.5\n \n\nR bessel_second_kind(T1 x, T2 y) Vectorized implementation of the bessel_second_kind function\nAvailable since 2.25\n \n\nreal modified_bessel_first_kind(int v, real z) Return the modified Bessel function of the first kind with order v applied to z defined for all z and integer v. \\[\\begin{equation*}\n\\mathrm{modified\\_bessel\\_first\\_kind}(v,z) = I_v(z)\n\\end{equation*}\\] where \\[\\begin{equation*}\n{I_v}(z) = \\left(\\frac{1}{2}z\\right)^v\\sum_{k=0}^\\infty \\frac{\\left(\\frac{1}{4}z^2\\right)^k}{k!\\Gamma(v+k+1)}\n\\end{equation*}\\]\nAvailable since 2.1\n \n\nR modified_bessel_first_kind(T1 x, T2 y) Vectorized implementation of the modified_bessel_first_kind function\nAvailable since 2.25\n \n\nreal log_modified_bessel_first_kind(real v, real z) Return the log of the modified Bessel function of the first kind. v does not have to be an integer.\nAvailable since 2.26\n \n\nR log_modified_bessel_first_kind(T1 x, T2 y) Vectorized implementation of the log_modified_bessel_first_kind function\nAvailable since 2.26\n \n\nreal modified_bessel_second_kind(int v, real z) Return the modified Bessel function of the second kind with order v applied to z defined for positive z and integer v. \\[\\begin{equation*}\n\\mathrm{modified\\_bessel\\_second\\_kind}(v,z) =\n\\begin{cases} K_v(z) & \\text{if } z > 0 \\\\ \\textrm{error} & \\text{if } z \\leq 0 \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*} {K_v}(z) = \\frac{\\pi}{2}\\cdot\\frac{I_{-v}(z) - I_{v}(z)}{\\sin(v\\pi)}\n\\end{equation*}\\]\nAvailable since 2.1\n \n\nR modified_bessel_second_kind(T1 x, T2 y) Vectorized implementation of the modified_bessel_second_kind function\nAvailable since 2.25\n \n\nreal falling_factorial(real x, real n) Return the falling factorial of x with power n defined for positive x and real n. \\[\\begin{equation*}\n\\mathrm{falling\\_factorial}(x,n) =\n\\begin{cases} (x)_n & \\text{if } x > 0 \\\\ \\textrm{error} & \\text{if } x \\leq 0 \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*}\n(x)_n=\\frac{\\Gamma(x+1)}{\\Gamma(x-n+1)}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR falling_factorial(T1 x, T2 y) Vectorized implementation of the falling_factorial function\nAvailable since 2.25\n \n\nreal lchoose(real x, real y) Return the natural logarithm of the generalized binomial coefficient of x and y. For non-negative integer inputs, the binomial coefficient function is written as \\(\\binom{x}{y}\\) and pronounced “x choose y.” This function generalizes to real numbers using the gamma function. For \\(0 \\leq y \\leq x\\), \\[\\begin{equation*} \\mathrm{binomial\\_coefficient\\_log}(x,y) =\n\\log\\Gamma(x+1) - \\log\\Gamma(y+1) - \\log\\Gamma(x-y+1). \\end{equation*}\\]\nAvailable since 2.10\n \n\nR lchoose(T1 x, T2 y) Vectorized implementation of the lchoose function\nAvailable since 2.29\n \n\nreal log_falling_factorial(real x, real n) Return the log of the falling factorial of x with power n defined for positive x and real n. \\[\\begin{equation*} \\mathrm{log\\_falling\\_factorial}(x,n) =\n\\begin{cases} \\log (x)_n & \\text{if } x > 0 \\\\ \\textrm{error} &\n\\text{if } x \\leq 0 \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal rising_factorial(real x, int n) Return the rising factorial of x with power n defined for positive x and integer n. \\[\\begin{equation*}\n\\mathrm{rising\\_factorial}(x,n) = \\begin{cases} x^{(n)} & \\text{if } x > 0 \\\\ \\textrm{error} & \\text{if } x \\leq 0 \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*} x^{(n)}=\\frac{\\Gamma(x+n)}{\\Gamma(x)} \\end{equation*}\\]\nAvailable since 2.20\n \n\nR rising_factorial(T1 x, T2 y) Vectorized implementation of the rising_factorial function\nAvailable since 2.25\n \n\nreal log_rising_factorial(real x, real n) Return the log of the rising factorial of x with power n defined for positive x and real n. \\[\\begin{equation*} \\mathrm{log\\_rising\\_factorial}(x,n) =\n\\begin{cases} \\log x^{(n)} & \\text{if } x > 0 \\\\ \\textrm{error} &\n\\text{if } x \\leq 0 \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR log_rising_factorial(T1 x, T2 y) Vectorized implementation of the log_rising_factorial function\nAvailable since 2.25\n\n\n\nThe functions in this section are equivalent in theory to combinations of other functions. In practice, they are implemented to be more efficient and more numerically stable than defining them directly using more basic Stan functions.\n \n\nR expm1(T x) The natural exponential of x minus 1\nAvailable since 2.0, vectorized in 2.13\n \n\nreal fma(real x, real y, real z) Return z plus the result of x multiplied by y. \\[\\begin{equation*} \\text{fma}(x,y,z) =\n(x \\times y) + z \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal ldexp(real x, int y) Return the product of x and two raised to the y power. \\[\\begin{equation*}\n\\text{ldexp}(x,y) = x 2^y \\end{equation*}\\]\nAvailable since 2.25\n \n\nR ldexp(T1 x, T2 y) Vectorized implementation of the ldexp function\nAvailable since 2.25\n \n\nreal lmultiply(real x, real y) Return the product of x and the natural logarithm of y. \\[\\begin{equation*}\n\\text{lmultiply}(x,y) = \\begin{cases} 0 & \\text{if } x = y = 0 \\\\ x\n\\log y & \\text{if } x, y \\neq 0 \\\\ \\text{NaN} & \\text{otherwise}\n\\end{cases} \\end{equation*}\\]\nAvailable since 2.10\n \n\nR lmultiply(T1 x, T2 y) Vectorized implementation of the lmultiply function\nAvailable since 2.25\n \n\nR log1p(T x) The natural logarithm of 1 plus x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log1m(T x) The natural logarithm of 1 minus x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log1p_exp(T x) The natural logarithm of one plus the natural exponentiation of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log1m_exp(T x) The logarithm of one minus the natural exponentiation of x\nAvailable since 2.0, vectorized in 2.13\n \n\nreal log_diff_exp(real x, real y) Return the natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y. \\[\\begin{equation*}\n\\mathrm{log\\_diff\\_exp}(x,y) = \\begin{cases} \\log(\\exp(x)-\\exp(y)) &\n\\text{if } +\\infty > x \\ge y \\\\[6pt]\n\\textrm{NaN} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nWhen x is equal to y, log_diff_exp(x, y) returns \\(-\\infty\\), consistent with log(0) returning \\(-\\infty\\). This includes the case in which x and y are both equal to \\(-\\infty\\), which corresponds to log(0 - 0) because exp(negative_infinity()) returns 0.\nAvailable since 2.0\n \n\nR log_diff_exp(T1 x, T2 y) Vectorized implementation of the log_diff_exp function\nAvailable since 2.25\n \n\nreal log_mix(real theta, real lp1, real lp2) Return the log mixture of the log densities lp1 and lp2 with mixing proportion theta, defined by \\[\\begin{eqnarray*}\n\\mathrm{log\\_mix}(\\theta, \\lambda_1, \\lambda_2) & = & \\log \\!\\left(\n\\theta \\exp(\\lambda_1) + \\left( 1 - \\theta \\right) \\exp(\\lambda_2)\n\\right) \\\\[3pt] & = & \\mathrm{log\\_sum\\_exp}\\!\\left(\\log(\\theta) +\n\\lambda_1, \\ \\log(1 - \\theta) + \\lambda_2\\right). \\end{eqnarray*}\\]\nAvailable since 2.6\n \n\nR log_mix(T1 thetas, T2 lps)\nCalculates the log mixture density given thetas, mixing proportions which should be between 0 and 1 and sum to 1, and lps, log densities. The lps variable must be either a 1-d container of the same length as thetas, or an array of such.\n\\[\\begin{eqnarray*}\n\\mathrm{log\\_mix}(\\theta, \\lambda)\n& = & \\log \\!\\left( \\sum_{n=1}^N \\theta_n * \\exp(\\lambda_n) \\right) \\\\[3pt]\n& = & \\mathrm{log\\_sum\\_exp}\\!\\left(\\log(\\theta) + \\lambda\\right).\n\\end{eqnarray*}\\]\nThis is a generalization of the above signature of three arguments to more than two densities. For example, log_mix(lambda, lp1, lp2) == log_mix({lambda, 1 - lambda}, {lp1, lp2}).\nAvailable since 2.26\n \n\nR log_sum_exp(T1 x, T2 y) Return the natural logarithm of the sum of the natural exponentiation of x and the natural exponentiation of y. \\[\\begin{equation*}\n\\mathrm{log\\_sum\\_exp}(x,y) = \\log(\\exp(x)+\\exp(y)) \\end{equation*}\\]\nAvailable since 2.0, vectorized in 2.33\n \n\nR log_inv_logit(T x) The natural logarithm of the inverse logit function of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log_inv_logit_diff(T1 x, T2 y) The natural logarithm of the difference of the inverse logit function of x and the inverse logit function of y\nAvailable since 2.25\n \n\nR log1m_inv_logit(T x) The natural logarithm of 1 minus the inverse logit function of x\nAvailable since 2.0, vectorized in 2.13\n\n\n\n \n\nR lambert_w0(T x) Implementation of the \\(W_0\\) branch of the Lambert W function, i.e., solution to the function \\(W_0(x) \\exp^{ W_0(x)} = x\\)\nAvailable since 2.25\n \n\nR lambert_wm1(T x) Implementation of the \\(W_{-1}\\) branch of the Lambert W function, i.e., solution to the function \\(W_{-1}(x) \\exp^{W_{-1}(x)} = x\\)\nAvailable since 2.25\n\n\n\nHypergeometric functions refer to a power series of the form \\[\\begin{equation*}\n_pF_q(a_1,...,a_p;b_1,...,b_q;z) = \\sum_{n=0}^\\infty \\frac{(a_1)_n\\cdot\\cdot\\cdot(a_p)_n}{(b_1)_n\\cdot\\cdot\\cdot(b_q)_n} \\frac{z^n}{n!}\n\\end{equation*}\\] where \\((a)_n\\) is the Pochhammer symbol defined as \\((a)_n = \\frac{\\Gamma(a+n)}{\\Gamma(a)}\\).\nThe gradients of the hypergeometric function are given by: \\[\\begin{equation*}\n\\frac{\\partial }{\\partial a_1} =\n \\sum_{k=0}^{\\infty}{\n \\frac\n {\\psi\\left(k+a_1\\right)\\left(\\prod_{j=1}^p\\left(a_j\\right)_k\\right)z^k}\n {k!\\prod_{j=1}^q\\left(b_j\\right)_k}}\n - \\psi\\left(a_1\\right){}_pF_q(a_1,...,a_p;b_1,...,b_q;z)\n\\end{equation*}\\] \\[\\begin{equation*}\n\\frac{\\partial }{\\partial b_1} =\n \\psi\\left(b_1\\right){}_pF_q(a_1,...,a_p;b_1,...,b_q;z) -\n \\sum_{k=0}^{\\infty}{\n \\frac\n {\\psi\\left(k+b_1\\right)\\left(\\prod_{j=1}^p\\left(a_j\\right)_k\\right)z^k}\n {k!\\prod_{j=1}^q\\left(b_j\\right)_k}}\n\\end{equation*}\\] \\[\\begin{equation*}\n \\frac{\\partial }{\\partial z} =\n \\frac{\\prod_{j=1}^{p}a_j}{\\prod_{j=1}^{q} b_j}{}_pF_q(a_1+1,...,a_p+1;b_1+1,...,b_q+1;z)\n\\end{equation*}\\]\nStan provides both the generalized hypergeometric function as well as several special cases for particular values of p and q.\n \n\nreal hypergeometric_1F0(real a, real z) Special case of the hypergeometric function with \\(p=1\\) and \\(q=0\\).\nAvailable since 2.37\n \n\nreal hypergeometric_2F1(real a1, real a2, real b1, real z) Special case of the hypergeometric function with \\(p=2\\) and \\(q=1\\). If the function does not meet convergence criteria for given inputs, the function will attempt to apply Euler’s transformation to improve convergence: \\[\\begin{equation*}\n{}_2F_1(a_1,a_2, b_1, z)={}_2F_1(b_1 - a_1,a_2, b_1, \\frac{z}{z-1})\\cdot(1-z)^{-a_2}\n\\end{equation*}\\]\nAvailable since 2.37\n \n\nreal hypergeometric_3F2(T1 a, T2 b, real z) Special case of the hypergeometric function with \\(p=3\\) and \\(q=2\\), where a and b are vectors of length 3 and 2, respectively.\nAvailable since 2.37\n \n\nreal hypergeometric_pFq(T1 a, T2 b, real z) Generalized hypergeometric function, where a and b are vectors of length p and q, respectively.\nAvailable since 2.37", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#fun-vectorization", + "href": "functions-reference/real-valued_basic_functions.html#fun-vectorization", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "Although listed in this chapter, many of Stan’s built-in functions are vectorized so that they may be applied to any argument type. The vectorized form of these functions is not any faster than writing an explicit loop that iterates over the elements applying the function—it’s just easier to read and write and less error prone.\n\n\nMany of Stan’s unary functions can be applied to any argument type. For example, the exponential function, exp, can be applied to real arguments or arrays of real arguments. Other than for integer arguments, the result type is the same as the argument type, including dimensionality and size. Integer arguments are first promoted to real values, but the result will still have the same dimensionality and size as the argument.\n\n\nWhen applied to a simple real value, the result is a real value. When applied to arrays, vectorized functions like exp() are defined elementwise. For example,\n // declare some variables for arguments\n real x0;\n array[5] real x1;\n array[4, 7] real x2;\n // ...\n // declare some variables for results\n real y0;\n array[5] real y1;\n array[4, 7] real y2;\n // ...\n // calculate and assign results\n y0 = exp(x0);\n y1 = exp(x1);\n y2 = exp(x2);\nWhen exp is applied to an array, it applies elementwise. For example, the statement above,\n y2 = exp(x2);\nproduces the same result for y2 as the explicit loop\nfor (i in 1:4) {\n for (j in 1:7) {\n y2[i, j] = exp(x2[i, j]);\n }\n}\n\n\n\nVectorized functions also apply elementwise to vectors and matrices. For example,\n vector[5] xv;\n row_vector[7] xrv;\n matrix[10, 20] xm;\n\n vector[5] yv;\n row_vector[7] yrv;\n matrix[10, 20] ym;\n\n yv = exp(xv);\n yrv = exp(xrv);\n ym = exp(xm);\nArrays of vectors and matrices work the same way. For example,\n array[12] matrix[17, 93] u;\n\n array[12] matrix[17, 93] z;\n\n z = exp(u);\nAfter this has been executed, z[i, j, k] will be equal to exp(u[i, j, k]).\n\n\n\nInteger arguments are promoted to real values in vectorized unary functions. Thus if n is of type int, exp(n) is of type real. Arrays work the same way, so that if n2 is a one dimensional array of integers, then exp(n2) will be a one-dimensional array of reals with the same number of elements as n2. For example,\n array[23] int n1;\n array[23] real z1;\n z1 = exp(n1);\nIt would be illegal to try to assign exp(n1) to an array of integers; the return type is a real array.\n\n\n\n\nLike the unary functions, many of Stan’s binary functions have been vectorized, and can be applied elementwise to combinations of both scalars or container types.\n\n\nWhen applied to two scalar values, the result is a scalar value. When applied to two arrays, or combination of a scalar value and an array, vectorized functions like pow() are defined elementwise. For example,\n // declare some variables for arguments\n real x00;\n real x01;\n array[5] real x10;\n array[5]real x11;\n array[4, 7] real x20;\n array[4, 7] real x21;\n // ...\n // declare some variables for results\n real y0;\n array[5] real y1;\n array[4, 7] real y2;\n // ...\n // calculate and assign results\n y0 = pow(x00, x01);\n y1 = pow(x10, x11);\n y2 = pow(x20, x21);\nWhen pow is applied to two arrays, it applies elementwise. For example, the statement above,\n y2 = pow(x20, x21);\nproduces the same result for y2 as the explicit loop\nfor (i in 1:4) {\n for (j in 1:7) {\n y2[i, j] = pow(x20[i, j], x21[i, j]);\n }\n}\nAlternatively, if a combination of an array and a scalar are provided, the scalar value is broadcast to be applied to each value of the array. For example, the following statement:\ny2 = pow(x20, x00);\nproduces the same result for y2 as the explicit loop:\nfor (i in 1:4) {\n for (j in 1:7) {\n y2[i, j] = pow(x20[i, j], x00);\n }\n}\n\n\n\nVectorized binary functions also apply elementwise to vectors and matrices, and to combinations of these with scalar values. For example,\n real x00;\n vector[5] xv00;\n vector[5] xv01;\n row_vector[7] xrv;\n matrix[10, 20] xm;\n\n vector[5] yv;\n row_vector[7] yrv;\n matrix[10, 20] ym;\n\n yv = pow(xv00, xv01);\n yrv = pow(xrv, x00);\n ym = pow(x00, xm);\nArrays of vectors and matrices work the same way. For example,\n array[12] matrix[17, 93] u;\n\n array[12] matrix[17, 93] z;\n\n z = pow(u, x00);\nAfter this has been executed, z[i, j, k] will be equal to pow(u[i, j, k], x00).\n\n\n\nVectorised binary functions require that both inputs, unless one is a real, be containers of the same type and size. For example, the following statements are legal:\n vector[5] xv;\n row_vector[7] xrv;\n matrix[10, 20] xm;\n\n vector[5] yv = pow(xv, xv)\n row_vector[7] yrv = pow(xrv, xrv)\n matrix[10, 20] = pow(xm, xm)\nBut the following statements are not:\n vector[5] xv;\n vector[7] xv2;\n row_vector[5] xrv;\n\n // Cannot mix different types\n vector[5] yv = pow(xv, xrv)\n\n // Cannot mix different sizes of the same type\n vector[5] yv = pow(xv, xv2)\nWhile the vectorized binary functions generally require the same input types, the only exception to this is for binary functions that require one input to be an integer and the other to be a real (e.g., bessel_first_kind). For these functions, one argument can be a container of any type while the other can be an integer array, as long as the dimensions of both are the same. For example, the following statements are legal:\n vector[5] xv;\n matrix[5, 5] xm;\n array[5] int xi;\n array[5, 5] int xii;\n\n vector[5] yv = bessel_first_kind(xi, xv);\n matrix[5, 5] ym = bessel_first_kind(xii, xm);\nWhereas these are not:\n vector[5] xv;\n matrix[5, 5] xm;\n array[7] int xi;\n\n // Dimensions of containers do not match\n vector[5] yv = bessel_first_kind(xi, xv);\n\n // Function requires first argument be an integer type\n matrix[5, 5] ym = bessel_first_kind(xm, xm);", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#built-in-constants", + "href": "functions-reference/real-valued_basic_functions.html#built-in-constants", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "Constants are represented as functions with no arguments and must be called as such. For instance, the mathematical constant \\(\\pi\\) must be written in a Stan program as pi().\n \n\nreal pi() \\(\\pi\\), the ratio of a circle’s circumference to its diameter\nAvailable since 2.0\n \n\nreal e() \\(e\\), the base of the natural logarithm\nAvailable since 2.0\n \n\nreal sqrt2() The square root of 2\nAvailable since 2.0\n \n\nreal log2() The natural logarithm of 2\nAvailable since 2.0\n \n\nreal log10() The natural logarithm of 10\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#special-values", + "href": "functions-reference/real-valued_basic_functions.html#special-values", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "real not_a_number() Not-a-number, a special non-finite real value returned to signal an error\nAvailable since 2.0\n \n\nreal positive_infinity() Positive infinity, a special non-finite real value larger than all finite numbers\nAvailable since 2.0\n \n\nreal negative_infinity() Negative infinity, a special non-finite real value smaller than all finite numbers\nAvailable since 2.0\n \n\nreal machine_precision() The smallest number \\(x\\) such that \\((x + 1) \\neq 1\\) in floating-point arithmetic on the current hardware platform\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#get-log-prob", + "href": "functions-reference/real-valued_basic_functions.html#get-log-prob", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "The basic purpose of a Stan program is to compute a log probability function and its derivatives. The log probability function in a Stan model outputs the log density on the unconstrained scale. A log probability accumulator starts at zero and is then incremented in various ways by a Stan program. The variables are first transformed from unconstrained to constrained, and the log Jacobian determinant added to the log probability accumulator. Then the model block is executed on the constrained parameters, with each sampling statement (~) and log probability increment statement (increment_log_prob) adding to the accumulator. At the end of the model block execution, the value of the log probability accumulator is the log probability value returned by the Stan program.\nStan provides a special built-in function target() that takes no arguments and returns the current value of the log probability accumulator. This function is primarily useful for debugging purposes, where for instance, it may be used with a print statement to display the log probability accumulator at various stages of execution to see where it becomes ill defined.\n \n\nreal target() Return the current value of the log probability accumulator.\nAvailable since 2.10\ntarget acts like a function ending in _lp, meaning that it may only may only be used in the model block.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#logical-functions", + "href": "functions-reference/real-valued_basic_functions.html#logical-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "Like C++, BUGS, and R, Stan uses 0 to encode false, and 1 to encode true. Stan supports the usual boolean comparison operations and boolean operators. These all have the same syntax and precedence as in C++; for the full list of operators and precedences, see the reference manual.\n\n\nAll comparison operators return boolean values, either 0 or 1. Each operator has two signatures, one for integer comparisons and one for floating-point comparisons. Comparing an integer and real value is carried out by first promoting the integer value.\n \n\nint operator<(int x, int y)\n \n\nint operator<(real x, real y) Return 1 if x is less than y and 0 otherwise. \\[\\begin{equation*} \\text{operator<}(x,y)\n= \\begin{cases} 1 & \\text{if $x < y$} \\\\ 0 & \\text{otherwise}\n\\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator<=(int x, int y)\n \n\nint operator<=(real x, real y) Return 1 if x is less than or equal y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator<=}(x,y) = \\begin{cases} 1 & \\text{if $x \\leq y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator>(int x, int y)\n \n\nint operator>(real x, real y) Return 1 if x is greater than y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator>}(x,y) = \\begin{cases} 1 & \\text{if $x > y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator>=(int x, int y)\n \n\nint operator>=(real x, real y) Return 1 if x is greater than or equal to y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator>=}(x,y) = \\begin{cases} 1 & \\text{if $x \\geq y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator==(int x, int y)\n \n\nint operator==(real x, real y) Return 1 if x is equal to y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator==}(x,y) = \\begin{cases} 1 & \\text{if $x = y$} \\\\ 0 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator!=(int x, int y)\n \n\nint operator!=(real x, real y) Return 1 if x is not equal to y and 0 otherwise. \\[\\begin{equation*}\n\\text{operator!=}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq y$} \\\\ 0 &\n\\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n\n\n\nBoolean operators return either 0 for false or 1 for true. Inputs may be any real or integer values, with non-zero values being treated as true and zero values treated as false. These operators have the usual precedences, with negation (not) binding the most tightly, conjunction the next and disjunction the weakest; all of the operators bind more tightly than the comparisons. Thus an expression such as !a && b is interpreted as (!a) && b, and a < b || c >= d && e != f as (a < b) || (((c >= d) && (e != f))).\n \n\nint operator!(int x) Return 1 if x is zero and 0 otherwise. \\[\\begin{equation*} \\text{operator!}(x) =\n\\begin{cases} 0 & \\text{if $x \\neq 0$} \\\\ 1 & \\text{if $x = 0$}\n\\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator!(real x) Return 1 if x is zero and 0 otherwise. \\[\\begin{equation*} \\text{operator!}(x) =\n\\begin{cases} 0 & \\text{if $x \\neq 0.0$} \\\\ 1 & \\text{if $x = 0.0$}\n\\end{cases} \\end{equation*}\\] deprecated; - use operator== instead.\nAvailable since 2.0, deprecated in 2.31\n \n\nint operator&&(int x, int y)\nReturn 1 if x is unequal to 0 and y is unequal to 0. \\[\\begin{equation*}\n\\mathrm{operator\\&\\&}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0$}\n\\text{ and } y \\neq 0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator&&(real x, real y) Return 1 if x is unequal to 0.0 and y is unequal to 0.0. \\[\\begin{equation*}\n\\mathrm{operator\\&\\&}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0.0$}\n\\text{ and } y \\neq 0.0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\] deprecated\nAvailable since 2.0, deprecated in 2.31\n \n\nint operator||(int x, int y) Return 1 if x is unequal to 0 or y is unequal to 0. \\[\\begin{equation*}\n\\text{operator||}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0$}\n\\textrm{ or } y \\neq 0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator||(real x, real y) Return 1 if x is unequal to 0.0 or y is unequal to 0.0. \\[\\begin{equation*}\n\\text{operator||}(x,y) = \\begin{cases} 1 & \\text{if $x \\neq 0.0$}\n\\textrm{ or } y \\neq 0.0\\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\] deprecated\nAvailable since 2.0, deprecated in 2.31\n\n\nLike in C++, the boolean operators && and || and are implemented to short circuit directly to a return value after evaluating the first argument if it is sufficient to resolve the result. In evaluating a || b, if a evaluates to a value other than zero, the expression returns the value 1 without evaluating the expression b. Similarly, evaluating a && b first evaluates a, and if the result is zero, returns 0 without evaluating b.\n\n\n\n\nThe logical functions introduce conditional behavior functionally and are primarily provided for compatibility with BUGS and JAGS.\n \n\nreal step(real x) Return 1 if x is positive and 0 otherwise. \\[\\begin{equation*} \\text{step}(x) =\n\\begin{cases} 0 & \\text{if } x < 0 \\\\ 1 & \\text{otherwise} \\end{cases}\n\\end{equation*}\\] Warning: int_step(0) and int_step(NaN) return 0 whereas step(0) and step(NaN) return 1.\nThe step function is often used in BUGS to perform conditional operations. For instance, step(a-b) evaluates to 1 if a is greater than b and evaluates to 0 otherwise. step is a step-like functions; see the warning in section step functions applied to expressions dependent on parameters.\nAvailable since 2.0\n \n\nint is_inf(real x) Return 1 if x is infinite (positive or negative) and 0 otherwise.\nAvailable since 2.5\n \n\nint is_nan(real x) Return 1 if x is NaN and 0 otherwise.\nAvailable since 2.5\nCare must be taken because both of these indicator functions are step-like and thus can cause discontinuities in gradients when applied to parameters; see section step-like functions for details.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#real-valued-arithmetic-operators", + "href": "functions-reference/real-valued_basic_functions.html#real-valued-arithmetic-operators", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "The arithmetic operators are presented using C++ notation. For instance operator+(x,y) refers to the binary addition operator and operator-(x) to the unary negation operator. In Stan programs, these are written using the usual infix and prefix notations as x + y and -x, respectively.\n\n\n \n\nreal operator+(real x, real y) Return the sum of x and y. \\[\\begin{equation*} (x + y) = \\text{operator+}(x,y) = x+y \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator-(real x, real y) Return the difference between x and y. \\[\\begin{equation*} (x - y) =\n\\text{operator-}(x,y) = x - y \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator*(real x, real y) Return the product of x and y. \\[\\begin{equation*} (x * y) = \\text{operator*}(x,y) = xy\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator/(real x, real y) Return the quotient of x and y. \\[\\begin{equation*} (x / y) = \\text{operator/}(x,y) =\n\\frac{x}{y} \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal operator^(real x, real y) Return x raised to the power of y. \\[\\begin{equation*} (x^\\mathrm{\\wedge}y) =\n\\text{operator}^\\mathrm{\\wedge}(x,y) = x^y \\end{equation*}\\]\nAvailable since 2.5\n\n\n\n \n\nreal operator-(real x) Return the negation of the subtrahend x. \\[\\begin{equation*} \\text{operator-}(x) = (-x)\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of reals, -x is the same shape array where each individual number is negated.\nAvailable since 2.31\n \n\nreal operator+(real x) Return the value of x. \\[\\begin{equation*} \\text{operator+}(x) = x \\end{equation*}\\]\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#step-functions", + "href": "functions-reference/real-valued_basic_functions.html#step-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "Warning: These functions can seriously hinder sampling and optimization efficiency for gradient-based methods (e.g., NUTS, HMC, BFGS) if applied to parameters (including transformed parameters and local variables in the transformed parameters or model block). The problem is that they break gradients due to discontinuities coupled with zero gradients elsewhere. They do not hinder sampling when used in the data, transformed data, or generated quantities blocks.\n\n\n \n\nT abs(T x) The absolute value of x.\nThis function works elementwise over containers such as vectors. Given a type T which is real vector, row_vector, matrix, or an array of those types, abs returns the same type where each element has had its absolute value taken.\nAvailable since 2.0, vectorized in 2.30\n \n\nreal fdim(real x, real y) Return the positive difference between x and y, which is x - y if x is greater than y and 0 otherwise; see warning above. \\[\\begin{equation*} \\text{fdim}(x,y) = \\begin{cases} x-y &\n\\text{if } x \\geq y \\\\ 0 & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR fdim(T1 x, T2 y) Vectorized implementation of the fdim function\nAvailable since 2.25\n\n\n\n \n\nreal fmin(real x, real y) Return the minimum of x and y; see warning above. \\[\\begin{equation*} \\text{fmin}(x,y) = \\begin{cases} x &\n\\text{if } x \\leq y \\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR fmin(T1 x, T2 y) Vectorized implementation of the fmin function\nAvailable since 2.25\n \n\nreal fmax(real x, real y) Return the maximum of x and y; see warning above. \\[\\begin{equation*} \\text{fmax}(x,y) = \\begin{cases} x &\n\\text{if } x \\geq y \\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR fmax(T1 x, T2 y) Vectorized implementation of the fmax function\nAvailable since 2.25\n\n\n\n \n\nreal fmod(real x, real y) Return the real value remainder after dividing x by y; see warning above. \\[\\begin{equation*} \\text{fmod}(x,y) = x - \\left\\lfloor \\frac{x}{y} \\right\\rfloor \\, y \\end{equation*}\\] The operator \\(\\lfloor u \\rfloor\\) is the floor operation; see below.\nAvailable since 2.0\n \n\nR fmod(T1 x, T2 y) Vectorized implementation of the fmod function\nAvailable since 2.25\n\n\n\nWarning: Rounding functions convert real values to integers. Because the output is an integer, any gradient information resulting from functions applied to the integer is not passed to the real value it was derived from. With MCMC sampling using HMC or NUTS, the MCMC acceptance procedure will correct for any error due to poor gradient calculations, but the result is likely to be reduced acceptance probabilities and less efficient sampling.\nThe rounding functions cannot be used as indices to arrays because they return real values. For operations over data or in the generated quantities block, the to_int() function can be used.\n \n\nR floor(T x) The floor of x, which is the largest integer less than or equal to x, converted to a real value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n \n\nR ceil(T x) The ceiling of x, which is the smallest integer greater than or equal to x, converted to a real value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n \n\nR round(T x) The nearest integer to x, converted to a real value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13\n \n\nR trunc(T x) The integer nearest to but no larger in magnitude than x, converted to a double value; see warning at start of section step-like functions\nAvailable since 2.0, vectorized in 2.13", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#power-and-logarithm-functions", + "href": "functions-reference/real-valued_basic_functions.html#power-and-logarithm-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "R sqrt(T x) The square root of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR cbrt(T x) The cube root of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR square(T x) The square of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR exp(T x) The natural exponential of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR exp2(T x) The base-2 exponential of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log(T x) The natural logarithm of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log2(T x) The base-2 logarithm of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log10(T x) The base-10 logarithm of x\nAvailable since 2.0, vectorized in 2.13\n \n\nreal pow(real x, real y) Return x raised to the power of y. \\[\\begin{equation*} \\text{pow}(x,y) = x^y \\end{equation*}\\]\nAvailable since 2.0\n \n\nR pow(T1 x, T2 y) Vectorized implementation of the pow function\nAvailable since 2.25\n \n\nR inv(T x) The inverse of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_sqrt(T x) The inverse of the square root of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_square(T x) The inverse of the square of x\nAvailable since 2.0, vectorized in 2.13", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#trigonometric-functions", + "href": "functions-reference/real-valued_basic_functions.html#trigonometric-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "real hypot(real x, real y) Return the length of the hypotenuse of a right triangle with sides of length x and y. \\[\\begin{equation*} \\text{hypot}(x,y) = \\begin{cases} \\sqrt{x^2+y^2} &\n\\text{if } x,y\\geq 0 \\\\ \\textrm{NaN} & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR hypot(T1 x, T2 y) Vectorized implementation of the hypot function\nAvailable since 2.25\n \n\nR cos(T x) The cosine of the angle x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR sin(T x) The sine of the angle x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR tan(T x) The tangent of the angle x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR acos(T x) The principal arc (inverse) cosine (in radians) of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR asin(T x) The principal arc (inverse) sine (in radians) of x\nAvailable since 2.0\n \n\nR atan(T x) The principal arc (inverse) tangent (in radians) of x, with values from \\(-\\pi/2\\) to \\(\\pi/2\\)\nAvailable since 2.0, vectorized in 2.13\n \n\nR atan2(T y, T x) Return the principal arc (inverse) tangent (in radians) of y divided by x, \\[\\begin{equation*} \\text{atan2}(y, x) = \\arctan\\left(\\frac{y}{x}\\right) \\end{equation*}\\]\nAvailable since 2.0, vectorized in 2.34", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#hyperbolic-trigonometric-functions", + "href": "functions-reference/real-valued_basic_functions.html#hyperbolic-trigonometric-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "R cosh(T x) The hyperbolic cosine of x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR sinh(T x) The hyperbolic sine of x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR tanh(T x) The hyperbolic tangent of x (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR acosh(T x) The inverse hyperbolic cosine (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR asinh(T x) The inverse hyperbolic cosine (in radians)\nAvailable since 2.0, vectorized in 2.13\n \n\nR atanh(T x) The inverse hyperbolic tangent (in radians) of x\nAvailable since 2.0, vectorized in 2.13", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#link-functions", + "href": "functions-reference/real-valued_basic_functions.html#link-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "The following functions are commonly used as link functions in generalized linear models. The function \\(\\Phi\\) is also commonly used as a link function (see section probability-related functions).\n \n\nR logit(T x) The log odds, or logit, function applied to x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_logit(T x) The logistic sigmoid function applied to x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_cloglog(T x) The inverse of the complementary log-log function applied to x\nAvailable since 2.0, vectorized in 2.13", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#Phi-function", + "href": "functions-reference/real-valued_basic_functions.html#Phi-function", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "The error function erf is related to the standard normal cumulative distribution function \\(\\Phi\\) by scaling. See section normal distribution for the general normal cumulative distribution function (and its complement).\n \n\nR erf(T x) The error function, also known as the Gauss error function, of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR erfc(T x) The complementary error function of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_erfc(T x) The inverse of the complementary error function of x\nAvailable since 2.29, vectorized in 2.29\n \n\nR Phi(T x) The standard normal cumulative distribution function of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR inv_Phi(T x) Return the value of the inverse standard normal cdf \\(\\Phi^{-1}\\) at the specified quantile x. The details of the algorithm can be found in (Wichura 1988). Quantile arguments below 1e-16 are untested; quantiles above 0.999999999 result in increasingly large errors.\nAvailable since 2.0, vectorized in 2.13\n \n\nR Phi_approx(T x) The fast approximation of the unit (may replace Phi for probit regression with maximum absolute error of 0.00014, see (Bowling et al. 2009) for details)\nAvailable since 2.0, vectorized in 2.13\n\n\n\n \n\nreal binary_log_loss(int y, real y_hat) Return the log loss function for for predicting \\(\\hat{y} \\in [0,1]\\) for boolean outcome \\(y \\in \\{0,1\\}\\). \\[\\begin{equation*}\n\\mathrm{binary\\_log\\_loss}(y,\\hat{y}) = \\begin{cases} -\\log \\hat{y} &\n\\text{if } y = 1\\\\ -\\log (1 - \\hat{y}) & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR binary_log_loss(T1 x, T2 y) Vectorized implementation of the binary_log_loss function\nAvailable since 2.25\n \n\nreal owens_t(real h, real a) Return the Owen’s T function for the probability of the event \\(X > h\\) and \\(0<Y<aX\\) where X and Y are independent standard normal random variables. \\[\\begin{equation*} \\mathrm{owens\\_t}(h,a) = \\frac{1}{2\\pi} \\int_0^a\n\\frac{\\exp(-\\frac{1}{2}h^2(1+x^2))}{1+x^2}dx \\end{equation*}\\]\nAvailable since 2.25\n \n\nR owens_t(T1 x, T2 y) Vectorized implementation of the owens_t function\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#betafun", + "href": "functions-reference/real-valued_basic_functions.html#betafun", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "real beta(real alpha, real beta) Return the beta function applied to alpha and beta. The beta function, \\(\\text{B}(\\alpha,\\beta)\\), computes the normalizing constant for the beta distribution, and is defined for \\(\\alpha > 0\\) and \\(\\beta > 0\\). See section appendix for definition of \\(\\text{B}(\\alpha, \\beta)\\).\nAvailable since 2.25\n \n\nR beta(T1 x, T2 y) Vectorized implementation of the beta function\nAvailable since 2.25\n \n\nreal inc_beta(real alpha, real beta, real x) Return the regularized incomplete beta function up to x applied to alpha and beta. See section appendix for a definition.\nAvailable since 2.10\n \n\nreal inv_inc_beta(real alpha, real beta, real p) Return the inverse of the regularized incomplete beta function. The return value x is the value that solves p = inc_beta(alpha, beta, x). See section appendix for a definition of the inc_beta.\nAvailable since 2.30\n \n\nreal lbeta(real alpha, real beta) Return the natural logarithm of the beta function applied to alpha and beta. The beta function, \\(\\text{B}(\\alpha,\\beta)\\), computes the normalizing constant for the beta distribution, and is defined for \\(\\alpha > 0\\) and \\(\\beta > 0\\). \\[\\begin{equation*}\n\\text{lbeta}(\\alpha,\\beta) = \\log \\Gamma(\\alpha) + \\log \\Gamma(\\beta) - \\log \\Gamma(\\alpha+\\beta)\n\\end{equation*}\\] See section appendix for definition of \\(\\text{B}(\\alpha, \\beta)\\).\nAvailable since 2.0\n \n\nR lbeta(T1 x, T2 y) Vectorized implementation of the lbeta function\nAvailable since 2.25\n \n\nR tgamma(T x) The gamma function applied to x. The gamma function is the generalization of the factorial function to continuous variables, defined so that \\(\\Gamma(n+1) = n!\\). See for a full definition of \\(\\Gamma(x)\\). The function is defined for positive numbers and non-integral negative numbers,\nAvailable since 2.0, vectorized in 2.13\n \n\nR lgamma(T x) The natural logarithm of the gamma function applied to x,\nAvailable since 2.0, vectorized in 2.15\n \n\nR digamma(T x) The digamma function applied to x. The digamma function is the derivative of the natural logarithm of the Gamma function. The function is defined for positive numbers and non-integral negative numbers\nAvailable since 2.0, vectorized in 2.13\n \n\nR trigamma(T x) The trigamma function applied to x. The trigamma function is the second derivative of the natural logarithm of the Gamma function\nAvailable since 2.0, vectorized in 2.13\n \n\nreal lmgamma(int n, real x) Return the natural logarithm of the multivariate gamma function \\(\\Gamma_n\\) with n dimensions applied to x. \\[\\begin{equation*}\n\\text{lmgamma}(n,x) =\n\\begin{cases} \\frac{n(n-1)}{4} \\log \\pi + \\sum_{j=1}^n \\log \\Gamma\\left(x + \\frac{1 - j}{2}\\right)\n& \\text{if } x\\not\\in \\{\\dots,-3,-2,-1,0\\}\\\\ \\textrm{error} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR lmgamma(T1 x, T2 y) Vectorized implementation of the lmgamma function\nAvailable since 2.25\n \n\nreal gamma_p(real a, real z) Return the normalized lower incomplete gamma function of a and z defined for positive a and nonnegative z. \\[\\begin{equation*}\n\\mathrm{gamma\\_p}(a,z) =\n\\begin{cases} \\frac{1}{\\Gamma(a)}\\int_0^zt^{a-1}e^{-t}dt &\n\\text{if } a > 0, z \\geq 0 \\\\ \\textrm{error} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR gamma_p(T1 x, T2 y) Vectorized implementation of the gamma_p function\nAvailable since 2.25\n \n\nreal gamma_q(real a, real z) Return the normalized upper incomplete gamma function of a and z defined for positive a and nonnegative z. \\[\\begin{equation*}\n\\mathrm{gamma\\_q}(a,z) =\n\\begin{cases} \\frac{1}{\\Gamma(a)}\\int_z^\\infty t^{a-1}e^{-t}dt &\n\\text{if } a > 0, z \\geq 0 \\\\[6pt] \\textrm{error} & \\text{otherwise}\n\\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR gamma_q(T1 x, T2 y) Vectorized implementation of the gamma_q function\nAvailable since 2.25\n \n\nint choose(int x, int y) Return the binomial coefficient of x and y. For non-negative integer inputs, the binomial coefficient function is written as \\(\\binom{x}{y}\\) and pronounced “x choose y.” In its the antilog of the lchoose function but returns an integer rather than a real number with no non-zero decimal places. For \\(0 \\leq y \\leq x\\), the binomial coefficient function can be defined via the factorial function \\[\\begin{equation*}\n\\text{choose}(x,y) = \\frac{x!}{\\left(y!\\right)\\left(x - y\\right)!}.\n\\end{equation*}\\]\nAvailable since 2.14\n \n\nR choose(T1 x, T2 y) Vectorized implementation of the choose function\nAvailable since 2.25\n \n\nreal bessel_first_kind(int v, real x) Return the Bessel function of the first kind with order v applied to x. \\[\\begin{equation*}\n\\mathrm{bessel\\_first\\_kind}(v,x) = J_v(x),\n\\end{equation*}\\] where \\[\\begin{equation*}\nJ_v(x)=\\left(\\frac{1}{2}x\\right)^v \\sum_{k=0}^\\infty\n\\frac{\\left(-\\frac{1}{4}x^2\\right)^k}{k!\\, \\Gamma(v+k+1)}\n\\end{equation*}\\]\nAvailable since 2.5\n \n\nR bessel_first_kind(T1 x, T2 y) Vectorized implementation of the bessel_first_kind function\nAvailable since 2.25\n \n\nreal bessel_second_kind(int v, real x) Return the Bessel function of the second kind with order v applied to x defined for positive x and v. For \\(x,v > 0\\), \\[\\begin{equation*}\n\\mathrm{bessel\\_second\\_kind}(v,x) =\n\\begin{cases} Y_v(x) & \\text{if } x > 0 \\\\ \\textrm{error} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*}\nY_v(x)=\\frac{J_v(x)\\cos(v\\pi)-J_{-v}(x)}{\\sin(v\\pi)}\n\\end{equation*}\\]\nAvailable since 2.5\n \n\nR bessel_second_kind(T1 x, T2 y) Vectorized implementation of the bessel_second_kind function\nAvailable since 2.25\n \n\nreal modified_bessel_first_kind(int v, real z) Return the modified Bessel function of the first kind with order v applied to z defined for all z and integer v. \\[\\begin{equation*}\n\\mathrm{modified\\_bessel\\_first\\_kind}(v,z) = I_v(z)\n\\end{equation*}\\] where \\[\\begin{equation*}\n{I_v}(z) = \\left(\\frac{1}{2}z\\right)^v\\sum_{k=0}^\\infty \\frac{\\left(\\frac{1}{4}z^2\\right)^k}{k!\\Gamma(v+k+1)}\n\\end{equation*}\\]\nAvailable since 2.1\n \n\nR modified_bessel_first_kind(T1 x, T2 y) Vectorized implementation of the modified_bessel_first_kind function\nAvailable since 2.25\n \n\nreal log_modified_bessel_first_kind(real v, real z) Return the log of the modified Bessel function of the first kind. v does not have to be an integer.\nAvailable since 2.26\n \n\nR log_modified_bessel_first_kind(T1 x, T2 y) Vectorized implementation of the log_modified_bessel_first_kind function\nAvailable since 2.26\n \n\nreal modified_bessel_second_kind(int v, real z) Return the modified Bessel function of the second kind with order v applied to z defined for positive z and integer v. \\[\\begin{equation*}\n\\mathrm{modified\\_bessel\\_second\\_kind}(v,z) =\n\\begin{cases} K_v(z) & \\text{if } z > 0 \\\\ \\textrm{error} & \\text{if } z \\leq 0 \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*} {K_v}(z) = \\frac{\\pi}{2}\\cdot\\frac{I_{-v}(z) - I_{v}(z)}{\\sin(v\\pi)}\n\\end{equation*}\\]\nAvailable since 2.1\n \n\nR modified_bessel_second_kind(T1 x, T2 y) Vectorized implementation of the modified_bessel_second_kind function\nAvailable since 2.25\n \n\nreal falling_factorial(real x, real n) Return the falling factorial of x with power n defined for positive x and real n. \\[\\begin{equation*}\n\\mathrm{falling\\_factorial}(x,n) =\n\\begin{cases} (x)_n & \\text{if } x > 0 \\\\ \\textrm{error} & \\text{if } x \\leq 0 \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*}\n(x)_n=\\frac{\\Gamma(x+1)}{\\Gamma(x-n+1)}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nR falling_factorial(T1 x, T2 y) Vectorized implementation of the falling_factorial function\nAvailable since 2.25\n \n\nreal lchoose(real x, real y) Return the natural logarithm of the generalized binomial coefficient of x and y. For non-negative integer inputs, the binomial coefficient function is written as \\(\\binom{x}{y}\\) and pronounced “x choose y.” This function generalizes to real numbers using the gamma function. For \\(0 \\leq y \\leq x\\), \\[\\begin{equation*} \\mathrm{binomial\\_coefficient\\_log}(x,y) =\n\\log\\Gamma(x+1) - \\log\\Gamma(y+1) - \\log\\Gamma(x-y+1). \\end{equation*}\\]\nAvailable since 2.10\n \n\nR lchoose(T1 x, T2 y) Vectorized implementation of the lchoose function\nAvailable since 2.29\n \n\nreal log_falling_factorial(real x, real n) Return the log of the falling factorial of x with power n defined for positive x and real n. \\[\\begin{equation*} \\mathrm{log\\_falling\\_factorial}(x,n) =\n\\begin{cases} \\log (x)_n & \\text{if } x > 0 \\\\ \\textrm{error} &\n\\text{if } x \\leq 0 \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal rising_factorial(real x, int n) Return the rising factorial of x with power n defined for positive x and integer n. \\[\\begin{equation*}\n\\mathrm{rising\\_factorial}(x,n) = \\begin{cases} x^{(n)} & \\text{if } x > 0 \\\\ \\textrm{error} & \\text{if } x \\leq 0 \\end{cases}\n\\end{equation*}\\] where \\[\\begin{equation*} x^{(n)}=\\frac{\\Gamma(x+n)}{\\Gamma(x)} \\end{equation*}\\]\nAvailable since 2.20\n \n\nR rising_factorial(T1 x, T2 y) Vectorized implementation of the rising_factorial function\nAvailable since 2.25\n \n\nreal log_rising_factorial(real x, real n) Return the log of the rising factorial of x with power n defined for positive x and real n. \\[\\begin{equation*} \\mathrm{log\\_rising\\_factorial}(x,n) =\n\\begin{cases} \\log x^{(n)} & \\text{if } x > 0 \\\\ \\textrm{error} &\n\\text{if } x \\leq 0 \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nR log_rising_factorial(T1 x, T2 y) Vectorized implementation of the log_rising_factorial function\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#composed-functions", + "href": "functions-reference/real-valued_basic_functions.html#composed-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "The functions in this section are equivalent in theory to combinations of other functions. In practice, they are implemented to be more efficient and more numerically stable than defining them directly using more basic Stan functions.\n \n\nR expm1(T x) The natural exponential of x minus 1\nAvailable since 2.0, vectorized in 2.13\n \n\nreal fma(real x, real y, real z) Return z plus the result of x multiplied by y. \\[\\begin{equation*} \\text{fma}(x,y,z) =\n(x \\times y) + z \\end{equation*}\\]\nAvailable since 2.0\n \n\nreal ldexp(real x, int y) Return the product of x and two raised to the y power. \\[\\begin{equation*}\n\\text{ldexp}(x,y) = x 2^y \\end{equation*}\\]\nAvailable since 2.25\n \n\nR ldexp(T1 x, T2 y) Vectorized implementation of the ldexp function\nAvailable since 2.25\n \n\nreal lmultiply(real x, real y) Return the product of x and the natural logarithm of y. \\[\\begin{equation*}\n\\text{lmultiply}(x,y) = \\begin{cases} 0 & \\text{if } x = y = 0 \\\\ x\n\\log y & \\text{if } x, y \\neq 0 \\\\ \\text{NaN} & \\text{otherwise}\n\\end{cases} \\end{equation*}\\]\nAvailable since 2.10\n \n\nR lmultiply(T1 x, T2 y) Vectorized implementation of the lmultiply function\nAvailable since 2.25\n \n\nR log1p(T x) The natural logarithm of 1 plus x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log1m(T x) The natural logarithm of 1 minus x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log1p_exp(T x) The natural logarithm of one plus the natural exponentiation of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log1m_exp(T x) The logarithm of one minus the natural exponentiation of x\nAvailable since 2.0, vectorized in 2.13\n \n\nreal log_diff_exp(real x, real y) Return the natural logarithm of the difference of the natural exponentiation of x and the natural exponentiation of y. \\[\\begin{equation*}\n\\mathrm{log\\_diff\\_exp}(x,y) = \\begin{cases} \\log(\\exp(x)-\\exp(y)) &\n\\text{if } +\\infty > x \\ge y \\\\[6pt]\n\\textrm{NaN} & \\text{otherwise} \\end{cases}\n\\end{equation*}\\]\nWhen x is equal to y, log_diff_exp(x, y) returns \\(-\\infty\\), consistent with log(0) returning \\(-\\infty\\). This includes the case in which x and y are both equal to \\(-\\infty\\), which corresponds to log(0 - 0) because exp(negative_infinity()) returns 0.\nAvailable since 2.0\n \n\nR log_diff_exp(T1 x, T2 y) Vectorized implementation of the log_diff_exp function\nAvailable since 2.25\n \n\nreal log_mix(real theta, real lp1, real lp2) Return the log mixture of the log densities lp1 and lp2 with mixing proportion theta, defined by \\[\\begin{eqnarray*}\n\\mathrm{log\\_mix}(\\theta, \\lambda_1, \\lambda_2) & = & \\log \\!\\left(\n\\theta \\exp(\\lambda_1) + \\left( 1 - \\theta \\right) \\exp(\\lambda_2)\n\\right) \\\\[3pt] & = & \\mathrm{log\\_sum\\_exp}\\!\\left(\\log(\\theta) +\n\\lambda_1, \\ \\log(1 - \\theta) + \\lambda_2\\right). \\end{eqnarray*}\\]\nAvailable since 2.6\n \n\nR log_mix(T1 thetas, T2 lps)\nCalculates the log mixture density given thetas, mixing proportions which should be between 0 and 1 and sum to 1, and lps, log densities. The lps variable must be either a 1-d container of the same length as thetas, or an array of such.\n\\[\\begin{eqnarray*}\n\\mathrm{log\\_mix}(\\theta, \\lambda)\n& = & \\log \\!\\left( \\sum_{n=1}^N \\theta_n * \\exp(\\lambda_n) \\right) \\\\[3pt]\n& = & \\mathrm{log\\_sum\\_exp}\\!\\left(\\log(\\theta) + \\lambda\\right).\n\\end{eqnarray*}\\]\nThis is a generalization of the above signature of three arguments to more than two densities. For example, log_mix(lambda, lp1, lp2) == log_mix({lambda, 1 - lambda}, {lp1, lp2}).\nAvailable since 2.26\n \n\nR log_sum_exp(T1 x, T2 y) Return the natural logarithm of the sum of the natural exponentiation of x and the natural exponentiation of y. \\[\\begin{equation*}\n\\mathrm{log\\_sum\\_exp}(x,y) = \\log(\\exp(x)+\\exp(y)) \\end{equation*}\\]\nAvailable since 2.0, vectorized in 2.33\n \n\nR log_inv_logit(T x) The natural logarithm of the inverse logit function of x\nAvailable since 2.0, vectorized in 2.13\n \n\nR log_inv_logit_diff(T1 x, T2 y) The natural logarithm of the difference of the inverse logit function of x and the inverse logit function of y\nAvailable since 2.25\n \n\nR log1m_inv_logit(T x) The natural logarithm of 1 minus the inverse logit function of x\nAvailable since 2.0, vectorized in 2.13", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#special-functions", + "href": "functions-reference/real-valued_basic_functions.html#special-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "R lambert_w0(T x) Implementation of the \\(W_0\\) branch of the Lambert W function, i.e., solution to the function \\(W_0(x) \\exp^{ W_0(x)} = x\\)\nAvailable since 2.25\n \n\nR lambert_wm1(T x) Implementation of the \\(W_{-1}\\) branch of the Lambert W function, i.e., solution to the function \\(W_{-1}(x) \\exp^{W_{-1}(x)} = x\\)\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/real-valued_basic_functions.html#hypergeometric-functions", + "href": "functions-reference/real-valued_basic_functions.html#hypergeometric-functions", + "title": "Real-Valued Basic Functions", + "section": "", + "text": "Hypergeometric functions refer to a power series of the form \\[\\begin{equation*}\n_pF_q(a_1,...,a_p;b_1,...,b_q;z) = \\sum_{n=0}^\\infty \\frac{(a_1)_n\\cdot\\cdot\\cdot(a_p)_n}{(b_1)_n\\cdot\\cdot\\cdot(b_q)_n} \\frac{z^n}{n!}\n\\end{equation*}\\] where \\((a)_n\\) is the Pochhammer symbol defined as \\((a)_n = \\frac{\\Gamma(a+n)}{\\Gamma(a)}\\).\nThe gradients of the hypergeometric function are given by: \\[\\begin{equation*}\n\\frac{\\partial }{\\partial a_1} =\n \\sum_{k=0}^{\\infty}{\n \\frac\n {\\psi\\left(k+a_1\\right)\\left(\\prod_{j=1}^p\\left(a_j\\right)_k\\right)z^k}\n {k!\\prod_{j=1}^q\\left(b_j\\right)_k}}\n - \\psi\\left(a_1\\right){}_pF_q(a_1,...,a_p;b_1,...,b_q;z)\n\\end{equation*}\\] \\[\\begin{equation*}\n\\frac{\\partial }{\\partial b_1} =\n \\psi\\left(b_1\\right){}_pF_q(a_1,...,a_p;b_1,...,b_q;z) -\n \\sum_{k=0}^{\\infty}{\n \\frac\n {\\psi\\left(k+b_1\\right)\\left(\\prod_{j=1}^p\\left(a_j\\right)_k\\right)z^k}\n {k!\\prod_{j=1}^q\\left(b_j\\right)_k}}\n\\end{equation*}\\] \\[\\begin{equation*}\n \\frac{\\partial }{\\partial z} =\n \\frac{\\prod_{j=1}^{p}a_j}{\\prod_{j=1}^{q} b_j}{}_pF_q(a_1+1,...,a_p+1;b_1+1,...,b_q+1;z)\n\\end{equation*}\\]\nStan provides both the generalized hypergeometric function as well as several special cases for particular values of p and q.\n \n\nreal hypergeometric_1F0(real a, real z) Special case of the hypergeometric function with \\(p=1\\) and \\(q=0\\).\nAvailable since 2.37\n \n\nreal hypergeometric_2F1(real a1, real a2, real b1, real z) Special case of the hypergeometric function with \\(p=2\\) and \\(q=1\\). If the function does not meet convergence criteria for given inputs, the function will attempt to apply Euler’s transformation to improve convergence: \\[\\begin{equation*}\n{}_2F_1(a_1,a_2, b_1, z)={}_2F_1(b_1 - a_1,a_2, b_1, \\frac{z}{z-1})\\cdot(1-z)^{-a_2}\n\\end{equation*}\\]\nAvailable since 2.37\n \n\nreal hypergeometric_3F2(T1 a, T2 b, real z) Special case of the hypergeometric function with \\(p=3\\) and \\(q=2\\), where a and b are vectors of length 3 and 2, respectively.\nAvailable since 2.37\n \n\nreal hypergeometric_pFq(T1 a, T2 b, real z) Generalized hypergeometric function, where a and b are vectors of length p and q, respectively.\nAvailable since 2.37", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Real-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html", + "href": "functions-reference/positive_continuous_distributions.html", + "title": "Positive Continuous Distributions", + "section": "", + "text": "The positive continuous probability functions have support on the positive real numbers.\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}^+\\), \\[\\begin{equation*} \\text{LogNormal}(y|\\mu,\\sigma) = \\frac{1}{\\sqrt{2\n\\pi} \\ \\sigma} \\, \\frac{1}{y} \\ \\exp \\! \\left( - \\, \\frac{1}{2}\n\\, \\left( \\frac{\\log y - \\mu}{\\sigma} \\right)^2 \\right) . \\end{equation*}\\]\n\n\n\ny ~ lognormal(mu, sigma)\nIncrement target log probability density with lognormal_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal lognormal_lpdf(reals y | reals mu, reals sigma) The log of the lognormal density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal lognormal_lupdf(reals y | reals mu, reals sigma) The log of the lognormal density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal lognormal_cdf(reals y | reals mu, reals sigma) The cumulative lognormal distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal lognormal_lcdf(reals y | reals mu, reals sigma) The log of the lognormal cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal lognormal_lccdf(reals y | reals mu, reals sigma) The log of the lognormal complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR lognormal_rng(reals mu, reals sigma) Generate a lognormal variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.22\n\n\n\n\n\n\nIf \\(\\nu \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{ChiSquare}(y|\\nu) = \\frac{2^{-\\nu/2}} {\\Gamma(\\nu / 2)} \\,\ny^{\\nu/2 - 1} \\, \\exp \\! \\left( -\\, \\frac{1}{2} \\, y \\right) . \\end{equation*}\\]\n\n\n\ny ~ chi_square(nu)\nIncrement target log probability density with chi_square_lupdf(y | nu).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal chi_square_lpdf(reals y | reals nu) The log of the Chi-square density of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal chi_square_lupdf(reals y | reals nu) The log of the Chi-square density of y given degrees of freedom nu dropping constant additive terms\nAvailable since 2.25\n \n\nreal chi_square_cdf(reals y | reals nu) The Chi-square cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.0\n \n\nreal chi_square_lcdf(reals y | reals nu) The log of the Chi-square cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal chi_square_lccdf(reals y | reals nu) The log of the complementary Chi-square cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nR chi_square_rng(reals nu) Generate a Chi-square variate with degrees of freedom nu; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\nu \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{InvChiSquare}(y \\, | \\, \\nu) = \\frac{2^{-\\nu/2}} {\\Gamma(\\nu\n/ 2)} \\, y^{-\\nu/2 - 1} \\, \\exp\\! \\left( \\! - \\, \\frac{1}{2} \\,\n\\frac{1}{y} \\right) . \\end{equation*}\\]\n\n\n\ny ~ inv_chi_square(nu)\nIncrement target log probability density with inv_chi_square_lupdf(y | nu).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal inv_chi_square_lpdf(reals y | reals nu) The log of the inverse Chi-square density of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal inv_chi_square_lupdf(reals y | reals nu) The log of the inverse Chi-square density of y given degrees of freedom nu dropping constant additive terms\nAvailable since 2.25\n \n\nreal inv_chi_square_cdf(reals y | reals nu) The inverse Chi-squared cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.0\n \n\nreal inv_chi_square_lcdf(reals y | reals nu) The log of the inverse Chi-squared cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal inv_chi_square_lccdf(reals y | reals nu) The log of the inverse Chi-squared complementary cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nR inv_chi_square_rng(reals nu) Generate an inverse Chi-squared variate with degrees of freedom nu; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\nu \\in \\mathbb{R}^+\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y\n\\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{ScaledInvChiSquare}(y|\\nu,\\sigma) =\n\\frac{(\\nu / 2)^{\\nu/2}} {\\Gamma(\\nu / 2)} \\, \\sigma^{\\nu} \\,\ny^{-(\\nu/2 + 1)} \\, \\exp \\! \\left( \\! - \\, \\frac{1}{2} \\, \\nu \\,\n\\sigma^2 \\, \\frac{1}{y} \\right) . \\end{equation*}\\]\n\n\n\ny ~ scaled_inv_chi_square(nu, sigma)\nIncrement target log probability density with scaled_inv_chi_square_lupdf(y | nu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal scaled_inv_chi_square_lpdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square density of y given degrees of freedom nu and scale sigma\nAvailable since 2.12\n \n\nreal scaled_inv_chi_square_lupdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square density of y given degrees of freedom nu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal scaled_inv_chi_square_cdf(reals y | reals nu, reals sigma) The scaled inverse Chi-square cumulative distribution function of y given degrees of freedom nu and scale sigma\nAvailable since 2.0\n \n\nreal scaled_inv_chi_square_lcdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square cumulative distribution function of y given degrees of freedom nu and scale sigma\nAvailable since 2.12\n \n\nreal scaled_inv_chi_square_lccdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square complementary cumulative distribution function of y given degrees of freedom nu and scale sigma\nAvailable since 2.12\n \n\nR scaled_inv_chi_square_rng(reals nu, reals sigma) Generate a scaled inverse Chi-squared variate with degrees of freedom nu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf inverse scale (rate) \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{Exponential}(y|\\beta) = \\beta \\, \\exp ( - \\beta \\, y ) . \\end{equation*}\\]\n\n\n\ny ~ exponential(beta)\nIncrement target log probability density with exponential_lupdf(y | beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal exponential_lpdf(reals y | reals beta) The log of the exponential density of y given inverse scale beta\nAvailable since 2.12\n \n\nreal exponential_lupdf(reals y | reals beta) The log of the exponential density of y given inverse scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal exponential_cdf(reals y | reals beta) The exponential cumulative distribution function of y given inverse scale beta\nAvailable since 2.0\n \n\nreal exponential_lcdf(reals y | reals beta) The log of the exponential cumulative distribution function of y given inverse scale beta\nAvailable since 2.12\n \n\nreal exponential_lccdf(reals y | reals beta) The log of the exponential complementary cumulative distribution function of y given inverse scale beta\nAvailable since 2.12\n \n\nR exponential_rng(reals beta) Generate an exponential variate with inverse scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf the shape parameter \\(\\alpha \\in \\mathbb{R}^+\\) and the rate (or inverse scale) parameter \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y\n\\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{Gamma}(y|\\alpha,\\beta) =\n\\frac{\\beta^{\\alpha}} {\\Gamma(\\alpha)} \\, y^{\\alpha - 1}\n\\exp(-\\beta \\, y) . \\end{equation*}\\]\nUnder the shape and rate formulation of the Gamma distribution, \\(\\mathbb{E}[y] = \\alpha / \\beta\\) and \\(\\textrm{var}[y] = \\alpha / \\beta^2\\).\n\n\n\ny ~ gamma(alpha, beta)\nIncrement target log probability density with gamma_lupdf(y | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal gamma_lpdf(reals y | reals alpha, reals beta) The log of the gamma density of y given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal gamma_lupdf(reals y | reals alpha, reals beta) The log of the gamma density of y given shape alpha and inverse scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal gamma_cdf(reals y | reals alpha, reals beta) The cumulative gamma distribution function of y given shape alpha and inverse scale beta\nAvailable since 2.0\n \n\nreal gamma_lcdf(reals y | reals alpha, reals beta) The log of the cumulative gamma distribution function of y given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal gamma_lccdf(reals y | reals alpha, reals beta) The log of the complementary cumulative gamma distribution function of y given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nR gamma_rng(reals alpha, reals beta) Generate a gamma variate with shape alpha and inverse scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y\n\\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{InvGamma}(y|\\alpha,\\beta) =\n\\frac{\\beta^{\\alpha}} {\\Gamma(\\alpha)} \\ y^{-(\\alpha + 1)} \\,\n\\exp \\! \\left( \\! - \\beta \\, \\frac{1}{y} \\right) . \\end{equation*}\\]\n\n\n\ny ~ inv_gamma(alpha, beta)\nIncrement target log probability density with inv_gamma_lupdf(y | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal inv_gamma_lpdf(reals y | reals alpha, reals beta) The log of the inverse gamma density of y given shape alpha and scale beta\nAvailable since 2.12\n \n\nreal inv_gamma_lupdf(reals y | reals alpha, reals beta) The log of the inverse gamma density of y given shape alpha and scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal inv_gamma_cdf(reals y | reals alpha, reals beta) The inverse gamma cumulative distribution function of y given shape alpha and scale beta\nAvailable since 2.0\n \n\nreal inv_gamma_lcdf(reals y | reals alpha, reals beta) The log of the inverse gamma cumulative distribution function of y given shape alpha and scale beta\nAvailable since 2.12\n \n\nreal inv_gamma_lccdf(reals y | reals alpha, reals beta) The log of the inverse gamma complementary cumulative distribution function of y given shape alpha and scale beta\nAvailable since 2.12\n \n\nR inv_gamma_rng(reals alpha, reals beta) Generate an inverse gamma variate with shape alpha and scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in [0,\\infty)\\), \\[\\begin{equation*} \\text{Weibull}(y|\\alpha,\\sigma) =\n\\frac{\\alpha}{\\sigma} \\, \\left( \\frac{y}{\\sigma} \\right)^{\\alpha - 1}\n\\, \\exp \\! \\left( \\! - \\left( \\frac{y}{\\sigma} \\right)^{\\alpha}\n\\right) . \\end{equation*}\\]\nNote that if \\(Y \\propto \\text{Weibull}(\\alpha,\\sigma)\\), then \\(Y^{-1}\n\\propto \\text{Frechet}(\\alpha,\\sigma^{-1})\\).\n\n\n\ny ~ weibull(alpha, sigma)\nIncrement target log probability density with weibull_lupdf(y | alpha, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal weibull_lpdf(reals y | reals alpha, reals sigma) The log of the Weibull density of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal weibull_lupdf(reals y | reals alpha, reals sigma) The log of the Weibull density of y given shape alpha and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal weibull_cdf(reals y | reals alpha, reals sigma) The Weibull cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.0\n \n\nreal weibull_lcdf(reals y | reals alpha, reals sigma) The log of the Weibull cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal weibull_lccdf(reals y | reals alpha, reals sigma) The log of the Weibull complementary cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nR weibull_rng(reals alpha, reals sigma) Generate a weibull variate with shape alpha and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{Frechet}(y|\\alpha,\\sigma) =\n\\frac{\\alpha}{\\sigma} \\, \\left( \\frac{y}{\\sigma} \\right)^{-\\alpha - 1}\n\\, \\exp \\! \\left( \\! - \\left( \\frac{y}{\\sigma} \\right)^{-\\alpha}\n\\right) . \\end{equation*}\\]\nNote that if \\(Y \\propto \\text{Frechet}(\\alpha,\\sigma)\\), then \\(Y^{-1}\n\\propto \\text{Weibull}(\\alpha,\\sigma^{-1})\\).\n\n\n\ny ~ frechet(alpha, sigma)\nIncrement target log probability density with frechet_lupdf(y | alpha, sigma).\nAvailable since 2.5\n \n\n\n\n\n \n\nreal frechet_lpdf(reals y | reals alpha, reals sigma) The log of the Frechet density of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal frechet_lupdf(reals y | reals alpha, reals sigma) The log of the Frechet density of y given shape alpha and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal frechet_cdf(reals y | reals alpha, reals sigma) The Frechet cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.5\n \n\nreal frechet_lcdf(reals y | reals alpha, reals sigma) The log of the Frechet cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal frechet_lccdf(reals y | reals alpha, reals sigma) The log of the Frechet complementary cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nR frechet_rng(reals alpha, reals sigma) Generate a Frechet variate with shape alpha and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in [0,\\infty)\\), \\[\\begin{equation*}\n\\text{Rayleigh}(y|\\sigma) = \\frac{y}{\\sigma^2} \\exp(-y^2 / 2\\sigma^2)\n\\!. \\end{equation*}\\]\n\n\n\ny ~ rayleigh(sigma)\nIncrement target log probability density with rayleigh_lupdf(y | sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal rayleigh_lpdf(reals y | reals sigma) The log of the Rayleigh density of y given scale sigma\nAvailable since 2.12\n \n\nreal rayleigh_lupdf(reals y | reals sigma) The log of the Rayleigh density of y given scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal rayleigh_cdf(real y | real sigma) The Rayleigh cumulative distribution of y given scale sigma\nAvailable since 2.0\n \n\nreal rayleigh_lcdf(real y | real sigma) The log of the Rayleigh cumulative distribution of y given scale sigma\nAvailable since 2.12\n \n\nreal rayleigh_lccdf(real y | real sigma) The log of the Rayleigh complementary cumulative distribution of y given scale sigma\nAvailable since 2.12\n \n\nR rayleigh_rng(reals sigma) Generate a Rayleigh variate with scale sigma; may only be used in generated quantities block. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\alpha, \\beta \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{Log-Logistic}(y|\\alpha,\\beta) =\n\\frac{\\ \\left(\\frac{\\beta}{\\alpha}\\right) \\left(\\frac{y}{\\alpha}\\right)^{\\beta-1}\\ }{\\left(1 + \\left(\\frac{y}{\\alpha}\\right)^\\beta\\right)^2} .\n\\end{equation*}\\]\n\n\n\ny ~ loglogistic(alpha, beta)\nIncrement target log probability density with unnormalized version of loglogistic_lpdf(y | alpha, beta)\nAvailable since 2.29\n \n\n\n\n\n \n\nreal loglogistic_lpdf(reals y | reals alpha, reals beta) The log of the log-logistic density of y given scale alpha and shape beta\nAvailable since 2.29\n \n\nreal loglogistic_cdf(reals y | reals alpha, reals beta) The log-logistic cumulative distribution function of y given scale alpha and shape beta\nAvailable since 2.29\n \n\nR loglogistic_rng(reals alpha, reals beta) Generate a log-logistic variate with scale alpha and shape beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.29", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#lognormal", + "href": "functions-reference/positive_continuous_distributions.html#lognormal", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}^+\\), \\[\\begin{equation*} \\text{LogNormal}(y|\\mu,\\sigma) = \\frac{1}{\\sqrt{2\n\\pi} \\ \\sigma} \\, \\frac{1}{y} \\ \\exp \\! \\left( - \\, \\frac{1}{2}\n\\, \\left( \\frac{\\log y - \\mu}{\\sigma} \\right)^2 \\right) . \\end{equation*}\\]\n\n\n\ny ~ lognormal(mu, sigma)\nIncrement target log probability density with lognormal_lupdf(y | mu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal lognormal_lpdf(reals y | reals mu, reals sigma) The log of the lognormal density of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal lognormal_lupdf(reals y | reals mu, reals sigma) The log of the lognormal density of y given location mu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal lognormal_cdf(reals y | reals mu, reals sigma) The cumulative lognormal distribution function of y given location mu and scale sigma\nAvailable since 2.0\n \n\nreal lognormal_lcdf(reals y | reals mu, reals sigma) The log of the lognormal cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nreal lognormal_lccdf(reals y | reals mu, reals sigma) The log of the lognormal complementary cumulative distribution function of y given location mu and scale sigma\nAvailable since 2.12\n \n\nR lognormal_rng(reals mu, reals sigma) Generate a lognormal variate with location mu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.22", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#chi-square-distribution", + "href": "functions-reference/positive_continuous_distributions.html#chi-square-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\nu \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{ChiSquare}(y|\\nu) = \\frac{2^{-\\nu/2}} {\\Gamma(\\nu / 2)} \\,\ny^{\\nu/2 - 1} \\, \\exp \\! \\left( -\\, \\frac{1}{2} \\, y \\right) . \\end{equation*}\\]\n\n\n\ny ~ chi_square(nu)\nIncrement target log probability density with chi_square_lupdf(y | nu).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal chi_square_lpdf(reals y | reals nu) The log of the Chi-square density of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal chi_square_lupdf(reals y | reals nu) The log of the Chi-square density of y given degrees of freedom nu dropping constant additive terms\nAvailable since 2.25\n \n\nreal chi_square_cdf(reals y | reals nu) The Chi-square cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.0\n \n\nreal chi_square_lcdf(reals y | reals nu) The log of the Chi-square cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal chi_square_lccdf(reals y | reals nu) The log of the complementary Chi-square cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nR chi_square_rng(reals nu) Generate a Chi-square variate with degrees of freedom nu; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#inverse-chi-square-distribution", + "href": "functions-reference/positive_continuous_distributions.html#inverse-chi-square-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\nu \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{InvChiSquare}(y \\, | \\, \\nu) = \\frac{2^{-\\nu/2}} {\\Gamma(\\nu\n/ 2)} \\, y^{-\\nu/2 - 1} \\, \\exp\\! \\left( \\! - \\, \\frac{1}{2} \\,\n\\frac{1}{y} \\right) . \\end{equation*}\\]\n\n\n\ny ~ inv_chi_square(nu)\nIncrement target log probability density with inv_chi_square_lupdf(y | nu).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal inv_chi_square_lpdf(reals y | reals nu) The log of the inverse Chi-square density of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal inv_chi_square_lupdf(reals y | reals nu) The log of the inverse Chi-square density of y given degrees of freedom nu dropping constant additive terms\nAvailable since 2.25\n \n\nreal inv_chi_square_cdf(reals y | reals nu) The inverse Chi-squared cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.0\n \n\nreal inv_chi_square_lcdf(reals y | reals nu) The log of the inverse Chi-squared cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nreal inv_chi_square_lccdf(reals y | reals nu) The log of the inverse Chi-squared complementary cumulative distribution function of y given degrees of freedom nu\nAvailable since 2.12\n \n\nR inv_chi_square_rng(reals nu) Generate an inverse Chi-squared variate with degrees of freedom nu; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#scaled-inverse-chi-square-distribution", + "href": "functions-reference/positive_continuous_distributions.html#scaled-inverse-chi-square-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\nu \\in \\mathbb{R}^+\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y\n\\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{ScaledInvChiSquare}(y|\\nu,\\sigma) =\n\\frac{(\\nu / 2)^{\\nu/2}} {\\Gamma(\\nu / 2)} \\, \\sigma^{\\nu} \\,\ny^{-(\\nu/2 + 1)} \\, \\exp \\! \\left( \\! - \\, \\frac{1}{2} \\, \\nu \\,\n\\sigma^2 \\, \\frac{1}{y} \\right) . \\end{equation*}\\]\n\n\n\ny ~ scaled_inv_chi_square(nu, sigma)\nIncrement target log probability density with scaled_inv_chi_square_lupdf(y | nu, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal scaled_inv_chi_square_lpdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square density of y given degrees of freedom nu and scale sigma\nAvailable since 2.12\n \n\nreal scaled_inv_chi_square_lupdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square density of y given degrees of freedom nu and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal scaled_inv_chi_square_cdf(reals y | reals nu, reals sigma) The scaled inverse Chi-square cumulative distribution function of y given degrees of freedom nu and scale sigma\nAvailable since 2.0\n \n\nreal scaled_inv_chi_square_lcdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square cumulative distribution function of y given degrees of freedom nu and scale sigma\nAvailable since 2.12\n \n\nreal scaled_inv_chi_square_lccdf(reals y | reals nu, reals sigma) The log of the scaled inverse Chi-square complementary cumulative distribution function of y given degrees of freedom nu and scale sigma\nAvailable since 2.12\n \n\nR scaled_inv_chi_square_rng(reals nu, reals sigma) Generate a scaled inverse Chi-squared variate with degrees of freedom nu and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#exponential-distribution", + "href": "functions-reference/positive_continuous_distributions.html#exponential-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If inverse scale (rate) \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{Exponential}(y|\\beta) = \\beta \\, \\exp ( - \\beta \\, y ) . \\end{equation*}\\]\n\n\n\ny ~ exponential(beta)\nIncrement target log probability density with exponential_lupdf(y | beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal exponential_lpdf(reals y | reals beta) The log of the exponential density of y given inverse scale beta\nAvailable since 2.12\n \n\nreal exponential_lupdf(reals y | reals beta) The log of the exponential density of y given inverse scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal exponential_cdf(reals y | reals beta) The exponential cumulative distribution function of y given inverse scale beta\nAvailable since 2.0\n \n\nreal exponential_lcdf(reals y | reals beta) The log of the exponential cumulative distribution function of y given inverse scale beta\nAvailable since 2.12\n \n\nreal exponential_lccdf(reals y | reals beta) The log of the exponential complementary cumulative distribution function of y given inverse scale beta\nAvailable since 2.12\n \n\nR exponential_rng(reals beta) Generate an exponential variate with inverse scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#gamma-distribution", + "href": "functions-reference/positive_continuous_distributions.html#gamma-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If the shape parameter \\(\\alpha \\in \\mathbb{R}^+\\) and the rate (or inverse scale) parameter \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y\n\\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{Gamma}(y|\\alpha,\\beta) =\n\\frac{\\beta^{\\alpha}} {\\Gamma(\\alpha)} \\, y^{\\alpha - 1}\n\\exp(-\\beta \\, y) . \\end{equation*}\\]\nUnder the shape and rate formulation of the Gamma distribution, \\(\\mathbb{E}[y] = \\alpha / \\beta\\) and \\(\\textrm{var}[y] = \\alpha / \\beta^2\\).\n\n\n\ny ~ gamma(alpha, beta)\nIncrement target log probability density with gamma_lupdf(y | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal gamma_lpdf(reals y | reals alpha, reals beta) The log of the gamma density of y given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal gamma_lupdf(reals y | reals alpha, reals beta) The log of the gamma density of y given shape alpha and inverse scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal gamma_cdf(reals y | reals alpha, reals beta) The cumulative gamma distribution function of y given shape alpha and inverse scale beta\nAvailable since 2.0\n \n\nreal gamma_lcdf(reals y | reals alpha, reals beta) The log of the cumulative gamma distribution function of y given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal gamma_lccdf(reals y | reals alpha, reals beta) The log of the complementary cumulative gamma distribution function of y given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nR gamma_rng(reals alpha, reals beta) Generate a gamma variate with shape alpha and inverse scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#inverse-gamma-distribution", + "href": "functions-reference/positive_continuous_distributions.html#inverse-gamma-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(y\n\\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{InvGamma}(y|\\alpha,\\beta) =\n\\frac{\\beta^{\\alpha}} {\\Gamma(\\alpha)} \\ y^{-(\\alpha + 1)} \\,\n\\exp \\! \\left( \\! - \\beta \\, \\frac{1}{y} \\right) . \\end{equation*}\\]\n\n\n\ny ~ inv_gamma(alpha, beta)\nIncrement target log probability density with inv_gamma_lupdf(y | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal inv_gamma_lpdf(reals y | reals alpha, reals beta) The log of the inverse gamma density of y given shape alpha and scale beta\nAvailable since 2.12\n \n\nreal inv_gamma_lupdf(reals y | reals alpha, reals beta) The log of the inverse gamma density of y given shape alpha and scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal inv_gamma_cdf(reals y | reals alpha, reals beta) The inverse gamma cumulative distribution function of y given shape alpha and scale beta\nAvailable since 2.0\n \n\nreal inv_gamma_lcdf(reals y | reals alpha, reals beta) The log of the inverse gamma cumulative distribution function of y given shape alpha and scale beta\nAvailable since 2.12\n \n\nreal inv_gamma_lccdf(reals y | reals alpha, reals beta) The log of the inverse gamma complementary cumulative distribution function of y given shape alpha and scale beta\nAvailable since 2.12\n \n\nR inv_gamma_rng(reals alpha, reals beta) Generate an inverse gamma variate with shape alpha and scale beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#weibull-distribution", + "href": "functions-reference/positive_continuous_distributions.html#weibull-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in [0,\\infty)\\), \\[\\begin{equation*} \\text{Weibull}(y|\\alpha,\\sigma) =\n\\frac{\\alpha}{\\sigma} \\, \\left( \\frac{y}{\\sigma} \\right)^{\\alpha - 1}\n\\, \\exp \\! \\left( \\! - \\left( \\frac{y}{\\sigma} \\right)^{\\alpha}\n\\right) . \\end{equation*}\\]\nNote that if \\(Y \\propto \\text{Weibull}(\\alpha,\\sigma)\\), then \\(Y^{-1}\n\\propto \\text{Frechet}(\\alpha,\\sigma^{-1})\\).\n\n\n\ny ~ weibull(alpha, sigma)\nIncrement target log probability density with weibull_lupdf(y | alpha, sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal weibull_lpdf(reals y | reals alpha, reals sigma) The log of the Weibull density of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal weibull_lupdf(reals y | reals alpha, reals sigma) The log of the Weibull density of y given shape alpha and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal weibull_cdf(reals y | reals alpha, reals sigma) The Weibull cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.0\n \n\nreal weibull_lcdf(reals y | reals alpha, reals sigma) The log of the Weibull cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal weibull_lccdf(reals y | reals alpha, reals sigma) The log of the Weibull complementary cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nR weibull_rng(reals alpha, reals sigma) Generate a weibull variate with shape alpha and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#frechet-distribution", + "href": "functions-reference/positive_continuous_distributions.html#frechet-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*} \\text{Frechet}(y|\\alpha,\\sigma) =\n\\frac{\\alpha}{\\sigma} \\, \\left( \\frac{y}{\\sigma} \\right)^{-\\alpha - 1}\n\\, \\exp \\! \\left( \\! - \\left( \\frac{y}{\\sigma} \\right)^{-\\alpha}\n\\right) . \\end{equation*}\\]\nNote that if \\(Y \\propto \\text{Frechet}(\\alpha,\\sigma)\\), then \\(Y^{-1}\n\\propto \\text{Weibull}(\\alpha,\\sigma^{-1})\\).\n\n\n\ny ~ frechet(alpha, sigma)\nIncrement target log probability density with frechet_lupdf(y | alpha, sigma).\nAvailable since 2.5\n \n\n\n\n\n \n\nreal frechet_lpdf(reals y | reals alpha, reals sigma) The log of the Frechet density of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal frechet_lupdf(reals y | reals alpha, reals sigma) The log of the Frechet density of y given shape alpha and scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal frechet_cdf(reals y | reals alpha, reals sigma) The Frechet cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.5\n \n\nreal frechet_lcdf(reals y | reals alpha, reals sigma) The log of the Frechet cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nreal frechet_lccdf(reals y | reals alpha, reals sigma) The log of the Frechet complementary cumulative distribution function of y given shape alpha and scale sigma\nAvailable since 2.12\n \n\nR frechet_rng(reals alpha, reals sigma) Generate a Frechet variate with shape alpha and scale sigma; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#rayleigh-distribution", + "href": "functions-reference/positive_continuous_distributions.html#rayleigh-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\sigma \\in \\mathbb{R}^+\\), then for \\(y \\in [0,\\infty)\\), \\[\\begin{equation*}\n\\text{Rayleigh}(y|\\sigma) = \\frac{y}{\\sigma^2} \\exp(-y^2 / 2\\sigma^2)\n\\!. \\end{equation*}\\]\n\n\n\ny ~ rayleigh(sigma)\nIncrement target log probability density with rayleigh_lupdf(y | sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal rayleigh_lpdf(reals y | reals sigma) The log of the Rayleigh density of y given scale sigma\nAvailable since 2.12\n \n\nreal rayleigh_lupdf(reals y | reals sigma) The log of the Rayleigh density of y given scale sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal rayleigh_cdf(real y | real sigma) The Rayleigh cumulative distribution of y given scale sigma\nAvailable since 2.0\n \n\nreal rayleigh_lcdf(real y | real sigma) The log of the Rayleigh cumulative distribution of y given scale sigma\nAvailable since 2.12\n \n\nreal rayleigh_lccdf(real y | real sigma) The log of the Rayleigh complementary cumulative distribution of y given scale sigma\nAvailable since 2.12\n \n\nR rayleigh_rng(reals sigma) Generate a Rayleigh variate with scale sigma; may only be used in generated quantities block. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/positive_continuous_distributions.html#log-logistic-distribution", + "href": "functions-reference/positive_continuous_distributions.html#log-logistic-distribution", + "title": "Positive Continuous Distributions", + "section": "", + "text": "If \\(\\alpha, \\beta \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\), \\[\\begin{equation*}\n\\text{Log-Logistic}(y|\\alpha,\\beta) =\n\\frac{\\ \\left(\\frac{\\beta}{\\alpha}\\right) \\left(\\frac{y}{\\alpha}\\right)^{\\beta-1}\\ }{\\left(1 + \\left(\\frac{y}{\\alpha}\\right)^\\beta\\right)^2} .\n\\end{equation*}\\]\n\n\n\ny ~ loglogistic(alpha, beta)\nIncrement target log probability density with unnormalized version of loglogistic_lpdf(y | alpha, beta)\nAvailable since 2.29\n \n\n\n\n\n \n\nreal loglogistic_lpdf(reals y | reals alpha, reals beta) The log of the log-logistic density of y given scale alpha and shape beta\nAvailable since 2.29\n \n\nreal loglogistic_cdf(reals y | reals alpha, reals beta) The log-logistic cumulative distribution function of y given scale alpha and shape beta\nAvailable since 2.29\n \n\nR loglogistic_rng(reals alpha, reals beta) Generate a log-logistic variate with scale alpha and shape beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.29", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/mixed_operations.html", + "href": "functions-reference/mixed_operations.html", + "title": "Mixed Operations", + "section": "", + "text": "Mixed Operations\nThese functions perform conversions between Stan containers matrix, vector, row vector and arrays.\n \n\nmatrix to_matrix(matrix m) Return the matrix m itself.\nAvailable since 2.3\n \n\ncomplex_matrix to_matrix(complex_matrix m) Return the matrix m itself.\nAvailable since 2.30\n \n\nmatrix to_matrix(vector v) Convert the column vector v to a size(v) by 1 matrix.\nAvailable since 2.3\n \n\ncomplex_matrix to_matrix(complex_vector v) Convert the column vector v to a size(v) by 1 matrix.\nAvailable since 2.30\n \n\nmatrix to_matrix(row_vector v) Convert the row vector v to a 1 by size(v) matrix.\nAvailable since 2.3\n \n\ncomplex_matrix to_matrix(complex_row_vector v) Convert the row vector v to a 1 by size(v) matrix.\nAvailable since 2.30\n \n\nmatrix to_matrix(matrix M, int m, int n) Convert a matrix A to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(complex_matrix M, int m, int n) Convert a matrix A to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.30\n \n\nmatrix to_matrix(vector v, int m, int n) Convert a vector v to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(complex_vector v, int m, int n) Convert a vector v to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.30\n \n\nmatrix to_matrix(row_vector v, int m, int n) Convert a row_vector v to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(complex_row_vector v, int m, int n) Convert a row vector v to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.30\n \n\nmatrix to_matrix(matrix A, int m, int n, int col_major) Convert a matrix A to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(complex_matrix A, int m, int n, int col_major) Convert a matrix A to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.30\n \n\nmatrix to_matrix(vector v, int m, int n, int col_major) Convert a vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(complex_vector v, int m, int n, int col_major) Convert a vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.30\n \n\nmatrix to_matrix(row_vector v, int m, int n, int col_major) Convert a row vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(complex_row_vector v, int m, int n, int col_major) Convert a row vector v to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.30\n \n\nmatrix to_matrix(array[] real a, int m, int n) Convert a one-dimensional array a to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.15\n \n\nmatrix to_matrix(array[] int a, int m, int n) Convert a one-dimensional array a to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(array[] complex a, int m, int n) Convert a one-dimensional array a to a matrix with m rows and n columns filled in column-major order.\nAvailable since 2.30\n \n\nmatrix to_matrix(array[] real a, int m, int n, int col_major) Convert a one-dimensional array a to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.15\n \n\nmatrix to_matrix(array[] int a, int m, int n, int col_major) Convert a one-dimensional array a to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.15\n \n\ncomplex_matrix to_matrix(array[] complex a, int m, int n, int col_major) Convert a one-dimensional array a to a matrix with m rows and n columns filled in row-major order if col_major equals 0 (otherwise, they get filled in column-major order).\nAvailable since 2.30\n \n\nmatrix to_matrix(array[] row_vector vs) Convert a one-dimensional array of row vectors to a matrix, where the size of the array is the number of rows of the resulting matrix and the length of row vectors is the number of columns.\nAvailable since 2.28\n \n\ncomplex_matrix to_matrix(array[] complex_row_vector vs) Convert a one-dimensional array of row vectors to a matrix, where the size of the array is the number of rows of the resulting matrix and the length of row vectors is the number of columns.\nAvailable since 2.30\n \n\nmatrix to_matrix(array[,] real a) Convert the two dimensional array a to a matrix with the same dimensions and indexing order.\nAvailable since 2.3\n \n\nmatrix to_matrix(array[,] int a) Convert the two dimensional array a to a matrix with the same dimensions and indexing order. If any of the dimensions of a are zero, the result will be a \\(0 \\times 0\\) matrix.\nAvailable since 2.3\n \n\ncomplex_matrix to_matrix(array[,] complex a ) Convert the two dimensional array a to a matrix with the same dimensions and indexing order.\nAvailable since 2.30\n \n\nvector to_vector(matrix m) Convert the matrix m to a column vector in column-major order.\nAvailable since 2.0\n \n\ncomplex_vector to_vector(complex_matrix m) Convert the matrix m to a column vector in column-major order.\nAvailable since 2.30\n \n\nvector to_vector(vector v) Return the column vector v itself.\nAvailable since 2.3\n \n\ncomplex_vector to_vector(complex_vector v) Return the column vector v itself.\nAvailable since 2.30\n \n\nvector to_vector(row_vector v) Convert the row vector v to a column vector.\nAvailable since 2.3\n \n\ncomplex_vector to_vector(complex_row_vector v) Convert the row vector v to a column vector.\nAvailable since 2.30\n \n\nvector to_vector(array[] real a) Convert the one-dimensional array a to a column vector.\nAvailable since 2.3\n \n\nvector to_vector(array[] int a) Convert the one-dimensional integer array a to a column vector.\nAvailable since 2.3\n \n\ncomplex_vector to_vector(array[] complex a) Convert the one-dimensional complex array a to a column vector.\nAvailable since 2.30\n \n\nrow_vector to_row_vector(matrix m) Convert the matrix m to a row vector in column-major order.\nAvailable since 2.3\n \n\ncomplex_row_vector to_row_vector(complex_matrix m) Convert the matrix m to a row vector in column-major order.\nAvailable since 2.30\n \n\nrow_vector to_row_vector(vector v) Convert the column vector v to a row vector.\nAvailable since 2.3\n \n\ncomplex_row_vector to_row_vector(complex_vector v) Convert the column vector v to a row vector.\nAvailable since 2.30\n \n\nrow_vector to_row_vector(row_vector v) Return the row vector v itself.\nAvailable since 2.3\n \n\ncomplex_row_vector to_row_vector(complex_row_vector v) Return the row vector v itself.\nAvailable since 2.30\n \n\nrow_vector to_row_vector(array[] real a) Convert the one-dimensional array a to a row vector.\nAvailable since 2.3\n \n\nrow_vector to_row_vector(array[] int a) Convert the one-dimensional array a to a row vector.\nAvailable since 2.3\n \n\ncomplex_row_vector to_row_vector(array[] complex a) Convert the one-dimensional complex array a to a row vector.\nAvailable since 2.30\n \n\narray[,] real to_array_2d(matrix m) Convert the matrix m to a two dimensional array with the same dimensions and indexing order.\nAvailable since 2.3\n \n\narray[,] complex to_array_2d(complex_matrix m) Convert the matrix m to a two dimensional array with the same dimensions and indexing order.\nAvailable since 2.30\n \n\narray[] real to_array_1d(vector v) Convert the column vector v to a one-dimensional array.\nAvailable since 2.3\n \n\narray[] complex to_array_1d(complex_vector v) Convert the column vector v to a one-dimensional array.\nAvailable since 2.30\n \n\narray[] real to_array_1d(row_vector v) Convert the row vector v to a one-dimensional array.\nAvailable since 2.3\n \n\narray[] complex to_array_1d(complex_row_vector v) Convert the row vector v to a one-dimensional array.\nAvailable since 2.30\n \n\narray[] real to_array_1d(matrix m) Convert the matrix m to a one-dimensional array in column-major order.\nAvailable since 2.3\n \n\narray[] real to_array_1d(complex_matrix m) Convert the matrix m to a one-dimensional array in column-major order.\nAvailable since 2.30\n \n\narray[] real to_array_1d(array[...] real a) Convert the array a (of any dimension up to 10) to a one-dimensional array in row-major order.\nAvailable since 2.3\n \n\narray[] int to_array_1d(array[...] int a) Convert the array a (of any dimension up to 10) to a one-dimensional array in row-major order.\nAvailable since 2.3\n \n\narray[] complex to_array_1d(array[...] complex a) Convert the array a (of any dimension up to 10) to a one-dimensional array in row-major order.\nAvailable since 2.30\n\n\n\n\n Back to top", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Mixed Operations" + ] + }, + { + "objectID": "functions-reference/mathematical_functions.html", + "href": "functions-reference/mathematical_functions.html", + "title": "Mathematical Functions", + "section": "", + "text": "This appendix provides the definition of several mathematical functions used throughout the manual.\n\n\nThe beta function, \\(\\text{B}(a, b)\\), computes the normalizing constant for the beta distribution, and is defined for \\(a > 0\\) and \\(b\n> 0\\) by \\[\\begin{equation*} \\text{B}(a,b) \\ = \\ \\int_0^1 u^{a - 1} (1 - u)^{b - 1} \\,\ndu \\ = \\ \\frac{\\Gamma(a) \\, \\Gamma(b)}{\\Gamma(a+b)} \\, , \\end{equation*}\\] where \\(\\Gamma(x)\\) is the Gamma function.\n\n\n\nThe incomplete beta function, \\(\\text{B}(x; a, b)\\), is defined for \\(x\n\\in [0, 1]\\) and \\(a, b \\geq 0\\) such that \\(a + b \\neq 0\\) by \\[\\begin{equation*}\n\\text{B}(x; \\, a, b) \\ = \\ \\int_0^x u^{a - 1} \\, (1 - u)^{b - 1} \\,\ndu, \\end{equation*}\\] where \\(\\text{B}(a, b)\\) is the beta function defined in appendix. If \\(x = 1\\), the incomplete beta function reduces to the beta function, \\(\\text{B}(1; a, b) = \\text{B}(a, b)\\).\nThe regularized incomplete beta function divides the incomplete beta function by the beta function, \\[\\begin{equation*} I_x(a, b) \\ = \\ \\frac{\\text{B}(x; \\,\na, b)}{B(a, b)} \\, . \\end{equation*}\\]\n\n\n\nThe gamma function, \\(\\Gamma(x)\\), is the generalization of the factorial function to continuous variables, defined so that for positive integers \\(n\\), \\[\\begin{equation*} \\Gamma(n+1) = n! \\end{equation*}\\] Generalizing to all positive numbers and non-integer negative numbers, \\[\\begin{equation*} \\Gamma(x) =\n\\int_0^{\\infty} u^{x - 1} \\exp(-u) \\, du. \\end{equation*}\\]\n\n\n\nThe digamma function \\(\\Psi\\) is the derivative of the \\(\\log \\Gamma\\) function, \\[\\begin{equation*}\n\\Psi(u) \\ = \\ \\frac{d}{d u} \\log \\Gamma(u) \\ = \\ \\frac{1}{\\Gamma(u)} \\ \\frac{d}{d u} \\Gamma(u).\n\\end{equation*}\\]", + "crumbs": [ + "Functions Reference", + "Appendix", + "Mathematical Functions" + ] + }, + { + "objectID": "functions-reference/mathematical_functions.html#beta-appendix", + "href": "functions-reference/mathematical_functions.html#beta-appendix", + "title": "Mathematical Functions", + "section": "", + "text": "The beta function, \\(\\text{B}(a, b)\\), computes the normalizing constant for the beta distribution, and is defined for \\(a > 0\\) and \\(b\n> 0\\) by \\[\\begin{equation*} \\text{B}(a,b) \\ = \\ \\int_0^1 u^{a - 1} (1 - u)^{b - 1} \\,\ndu \\ = \\ \\frac{\\Gamma(a) \\, \\Gamma(b)}{\\Gamma(a+b)} \\, , \\end{equation*}\\] where \\(\\Gamma(x)\\) is the Gamma function.", + "crumbs": [ + "Functions Reference", + "Appendix", + "Mathematical Functions" + ] + }, + { + "objectID": "functions-reference/mathematical_functions.html#inc-beta-appendix", + "href": "functions-reference/mathematical_functions.html#inc-beta-appendix", + "title": "Mathematical Functions", + "section": "", + "text": "The incomplete beta function, \\(\\text{B}(x; a, b)\\), is defined for \\(x\n\\in [0, 1]\\) and \\(a, b \\geq 0\\) such that \\(a + b \\neq 0\\) by \\[\\begin{equation*}\n\\text{B}(x; \\, a, b) \\ = \\ \\int_0^x u^{a - 1} \\, (1 - u)^{b - 1} \\,\ndu, \\end{equation*}\\] where \\(\\text{B}(a, b)\\) is the beta function defined in appendix. If \\(x = 1\\), the incomplete beta function reduces to the beta function, \\(\\text{B}(1; a, b) = \\text{B}(a, b)\\).\nThe regularized incomplete beta function divides the incomplete beta function by the beta function, \\[\\begin{equation*} I_x(a, b) \\ = \\ \\frac{\\text{B}(x; \\,\na, b)}{B(a, b)} \\, . \\end{equation*}\\]", + "crumbs": [ + "Functions Reference", + "Appendix", + "Mathematical Functions" + ] + }, + { + "objectID": "functions-reference/mathematical_functions.html#gamma-appendix", + "href": "functions-reference/mathematical_functions.html#gamma-appendix", + "title": "Mathematical Functions", + "section": "", + "text": "The gamma function, \\(\\Gamma(x)\\), is the generalization of the factorial function to continuous variables, defined so that for positive integers \\(n\\), \\[\\begin{equation*} \\Gamma(n+1) = n! \\end{equation*}\\] Generalizing to all positive numbers and non-integer negative numbers, \\[\\begin{equation*} \\Gamma(x) =\n\\int_0^{\\infty} u^{x - 1} \\exp(-u) \\, du. \\end{equation*}\\]", + "crumbs": [ + "Functions Reference", + "Appendix", + "Mathematical Functions" + ] + }, + { + "objectID": "functions-reference/mathematical_functions.html#digamma-appendix", + "href": "functions-reference/mathematical_functions.html#digamma-appendix", + "title": "Mathematical Functions", + "section": "", + "text": "The digamma function \\(\\Psi\\) is the derivative of the \\(\\log \\Gamma\\) function, \\[\\begin{equation*}\n\\Psi(u) \\ = \\ \\frac{d}{d u} \\log \\Gamma(u) \\ = \\ \\frac{1}{\\Gamma(u)} \\ \\frac{d}{d u} \\Gamma(u).\n\\end{equation*}\\]", + "crumbs": [ + "Functions Reference", + "Appendix", + "Mathematical Functions" + ] + }, + { + "objectID": "functions-reference/index.html", + "href": "functions-reference/index.html", + "title": "Stan Functions Reference", + "section": "", + "text": "This is the reference for the functions defined in the Stan math library and available in the Stan programming language.\nFor more information the Stan language and inference engines and how to use Stan for Bayesian inference, see\n\nthe Stan User’s Guide. The Stan user’s guide provides example models and programming techniques for coding statistical models in Stan. It also serves as an example-driven introduction to Bayesian modeling and inference:\nthe Stan Reference Manual. Stan’s modeling language is shared across all of its interfaces. The Stan Language Reference Manual provides a concise definition of the language syntax for all elements in the language together with an overview of the inference algorithms and posterior inference tools.\n\nDownload the pdf version of this manual.\n\nCopyright and trademark\n\nCopyright 2011–2025, Stan Development Team and their assignees.\nThe Stan name and logo are registered trademarks of NumFOCUS.\n\n\n\nLicensing\n\nText content: CC-BY ND 4.0 license\nComputer code: BSD 3-clause license\nLogo: Stan logo usage guidelines\n\n\n\n\n\n Back to top", + "crumbs": [ + "Functions Reference", + "Stan Functions Reference" + ] + }, + { + "objectID": "functions-reference/hidden_markov_models.html", + "href": "functions-reference/hidden_markov_models.html", + "title": "Hidden Markov Models", + "section": "", + "text": "An elementary first-order Hidden Markov model is a probabilistic model over \\(N\\) observations, \\(y_n\\), and \\(N\\) hidden states, \\(x_n\\), which can be fully defined by the conditional distributions \\(p(y_n \\mid x_n, \\phi)\\) and \\(p(x_n \\mid x_{n - 1}, \\phi)\\). Here we make the dependency on additional model parameters, \\(\\phi\\), explicit. When \\(x\\) is continuous, the user can explicitly encode these distributions in Stan and use Markov chain Monte Carlo to integrate \\(x\\) out.\nWhen each state \\(x\\) takes a value over a discrete and finite set, say \\(\\{1, 2, ..., K\\}\\), we can take advantage of the dependency structure to marginalize \\(x\\) and compute \\(p(y \\mid \\phi)\\). We start by defining the conditional observational distribution, stored in a \\(K \\times N\\) matrix \\(\\omega\\) with \\[\n\\omega_{kn} = p(y_n \\mid x_n = k, \\phi).\n\\] Next, we introduce the \\(K \\times K\\) transition matrix, \\(\\Gamma\\), with \\[\n \\Gamma_{ij} = p(x_n = j \\mid x_{n - 1} = i, \\phi).\n\\] Each row defines a probability distribution and must therefore be a simplex (i.e. its components must add to 1). Currently, Stan only supports stationary transitions where a single transition matrix is used for all transitions. Finally we define the initial state \\(K\\)-vector \\(\\rho\\), with \\[\n \\rho_k = p(x_0 = k \\mid \\phi).\n\\]\nThe Stan functions that support this type of model are special in that the user does not explicitly pass \\(y\\) and \\(\\phi\\) as arguments. Instead, the user passes \\(\\log \\omega\\), \\(\\Gamma\\), and \\(\\rho\\), which in turn depend on \\(y\\) and \\(\\phi\\).\n\n\n \n\nreal hmm_marginal(matrix log_omega, matrix Gamma, vector rho) Returns the log probability density of \\(y\\), with \\(x_n\\) integrated out at each iteration.\nAvailable since 2.24\nThe arguments represent (1) the log density of each output, (2) the transition matrix, and (3) the initial state vector.\n\nlog_omega: \\(\\log \\omega_{kn} = \\log p(y_n \\mid x_n = k, \\phi)\\), log density of each output,\nGamma: \\(\\Gamma_{ij} = p(x_n = j | x_{n - 1} = i, \\phi)\\), the transition matrix,\nrho: \\(\\rho_k = p(x_0 = k \\mid \\phi)\\), the initial state probability.\n\n \n\narray[] int hmm_latent_rng(matrix log_omega, matrix Gamma, vector rho) Returns a length \\(N\\) array of integers over \\(\\{1, ..., K\\}\\), sampled from the joint posterior distribution of the hidden states, \\(p(x \\mid \\phi, y)\\). May be only used in transformed data and generated quantities.\nAvailable since 2.24\n \n\nmatrix hmm_hidden_state_prob(matrix log_omega, matrix Gamma, vector rho) Returns the matrix of marginal posterior probabilities of each hidden state value. This will be a \\(K \\times N\\) matrix. The \\(n^\\mathrm{th}\\) column is a simplex of probabilities for the \\(n^\\mathrm{th}\\) variable. Moreover, let \\(A\\) be the output. Then \\(A_{ij} = p(x_j = i \\mid \\phi, y)\\). This function may only be used in transformed data and generated quantities.\nAvailable since 2.24", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Hidden Markov Models" + ] + }, + { + "objectID": "functions-reference/hidden_markov_models.html#hmm-stan-functions", + "href": "functions-reference/hidden_markov_models.html#hmm-stan-functions", + "title": "Hidden Markov Models", + "section": "", + "text": "real hmm_marginal(matrix log_omega, matrix Gamma, vector rho) Returns the log probability density of \\(y\\), with \\(x_n\\) integrated out at each iteration.\nAvailable since 2.24\nThe arguments represent (1) the log density of each output, (2) the transition matrix, and (3) the initial state vector.\n\nlog_omega: \\(\\log \\omega_{kn} = \\log p(y_n \\mid x_n = k, \\phi)\\), log density of each output,\nGamma: \\(\\Gamma_{ij} = p(x_n = j | x_{n - 1} = i, \\phi)\\), the transition matrix,\nrho: \\(\\rho_k = p(x_0 = k \\mid \\phi)\\), the initial state probability.\n\n \n\narray[] int hmm_latent_rng(matrix log_omega, matrix Gamma, vector rho) Returns a length \\(N\\) array of integers over \\(\\{1, ..., K\\}\\), sampled from the joint posterior distribution of the hidden states, \\(p(x \\mid \\phi, y)\\). May be only used in transformed data and generated quantities.\nAvailable since 2.24\n \n\nmatrix hmm_hidden_state_prob(matrix log_omega, matrix Gamma, vector rho) Returns the matrix of marginal posterior probabilities of each hidden state value. This will be a \\(K \\times N\\) matrix. The \\(n^\\mathrm{th}\\) column is a simplex of probabilities for the \\(n^\\mathrm{th}\\) variable. Moreover, let \\(A\\) be the output. Then \\(A_{ij} = p(x_j = i \\mid \\phi, y)\\). This function may only be used in transformed data and generated quantities.\nAvailable since 2.24", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Hidden Markov Models" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html", + "href": "functions-reference/embedded_laplace.html", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The embedded Laplace approximation can be used to approximate certain marginal and conditional distributions that arise in latent Gaussian models. Embedded Laplace replaces explicit sampling of (high-dimensional) Gaussian latent variables with a local Gaussian approximation. In doing so, it marginalizes out the latent Gaussian variables. Inference can then be performed on the remaining, often low-dimensional, parameters. The embedded Laplace approximation in Stan is best suited for latent Gaussian models when jointly sampling over all model parameters is expensive and the conditional posterior of the Gaussian latent variables is reasonably close to Gaussian.\nFor observed data \\(y\\), latent Gaussian variables \\(\\theta\\), and hyperparameters \\(\\phi\\), a latent Gaussian model observes the following hierarchical structure: \\[\\begin{eqnarray}\n \\phi &\\sim& p(\\phi), \\\\\n \\theta &\\sim& \\text{MultiNormal}(0, K(\\phi)), \\\\\n y &\\sim& p(y \\mid \\theta, \\phi).\n\\end{eqnarray}\\] In this formulation, \\(p(y \\mid \\theta, \\phi)\\) is the data model that specifies how observations are generated conditional on \\(\\theta\\) and \\(\\phi\\). \\(K(\\phi)\\) denotes the prior covariance matrix for the latent Gaussian variables \\(\\theta\\) and is parameterized by \\(\\phi\\). The prior on \\(\\theta\\) is centered at 0, however an offset can always be added when specifying the data model \\(p(y \\mid \\theta, \\phi)\\).\nConditioning on observations \\(y\\) we obtain the joint posterior \\(p(\\phi, \\theta \\mid y) \\propto p(y \\mid \\theta, \\phi) p(\\theta |\n\\phi) p(\\phi)\\), where \\(p(y \\mid \\theta, \\phi)\\) as function of \\(\\theta\\) and \\(\\phi\\) is the likelihood function. To sample from the joint posterior, we can either use a standard method, such as Markov chain Monte Carlo, or we can follow a two-step procedure:\n\nsample from the marginal posterior \\(p(\\phi \\mid y)\\),\nsample from the conditional posterior \\(p(\\theta \\mid y, \\phi)\\).\n\nIn the above procedure, neither the marginal posterior nor the conditional posterior are typically available in closed form and so they must be approximated. The marginal posterior can be written as \\(p(\\phi \\mid y) \\propto p(y \\mid \\phi) p(\\phi)\\), where \\(p(y \\mid \\phi) = \\int p(y \\mid \\phi, \\theta) p(\\theta) \\text{d}\\theta\\) is called the marginal likelihood. The Laplace method approximates \\(p(y \\mid \\phi, \\theta) p(\\theta)\\) with a normal distribution centered at the mode, \\[\n \\theta^* = \\underset{\\theta}{\\text{argmax}} \\ \\log p(\\theta \\mid y, \\phi),\n\\] and \\(\\theta^*\\) is obtained using a numerical optimizer. The resulting Gaussian integral can be evaluated analytically to obtain an approximation to the log marginal likelihood \\(\\log \\hat p(y \\mid \\phi) \\approx \\log p(y \\mid \\phi)\\). Specifically: \\[\n \\hat p(y \\mid \\phi) = \\frac{p(\\theta^* \\mid \\phi) p(y \\mid \\theta^*, \\phi)}{\\hat p (\\theta^* \\mid \\phi, y)}.\n\\]\nCombining this marginal likelihood with the prior in the model block, we can then sample from the marginal posterior \\(p(\\phi \\mid y)\\) using one of Stan’s algorithms. The marginal posterior is lower dimensional and likely to have a simpler geometry leading to more efficient inference. On the other hand each marginal likelihood computation is more costly, and the combined change in efficiency depends on the application.\nTo obtain posterior draws for \\(\\theta\\), we sample from the normal approximation to \\(p(\\theta \\mid y, \\phi)\\) in generated quantities. The process of iteratively sampling from \\(p(\\phi \\mid y)\\) (say, with MCMC) and then \\(p(\\theta \\mid y, \\phi)\\) produces posterior draws from the joint posterior \\(p(\\theta, \\phi \\mid y)\\).\nThe Laplace approximation is especially useful if \\(p(y \\mid \\phi, \\theta)\\) as function of \\(\\theta\\) is log-concave, e.g., in case of Poisson, binomial, negative-binomial, and Bernoulli. (The likelihood of normal model is also log concave, however when the likelihood is normal, marginalization can be performed exactly and does not required an approximation.) Stan’s embedded Laplace approximation is restricted to the case where the prior \\(p(\\theta \\mid \\phi)\\) is multivariate normal. Furthermore, the likelihood \\(p(y \\mid \\phi, \\theta)\\) must be computed using only operations which support higher-order derivatives (see section specifying the likelihood function).\nThe Laplace approximation can also be useful in generated quantities to marginalize out latent variables even if the sampling had been done using the full joint posterior.\n\n\nIn the model block, we increment target with laplace_marginal, a function that approximates the log marginal likelihood \\(\\log p(y \\mid \\phi)\\). The signature of the function is:\n \n\nreal laplace_marginal(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nwhich returns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\).\nThe embedded Laplace functions accept two functors whose user defined arguments are passed in as tuples to laplace_marginal.\n\nlikelihood_function - user-specified log likelihood whose first argument is the vector of latent Gaussian variables \\(\\theta\\). The subsequent arguments are user defined.\n\n\nreal likelihood_function(vector theta, likelihood_arguments_1, likelihood_arguments_2, ...).\n\n\nlikelihood_arguments - A tuple of arguments whose internal members are be passed to the log likelihood function. This tuple does NOT include the latent variable \\(\\theta\\).\nhessian_block_size - the block size of the Hessian of the log likelihood, \\(\\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial \\theta^2\\).\ncovariance_function - A function that returns the covariance matrix of the multivariate normal prior on \\(\\theta\\).\n\n\nmatrix covariance_function(covariance_argument_1, covariance_argument_2, ...).\n\n\ncovariance_arguments A tuple of the arguments whose internal members will be passed to the the covariance function.\n\nAvailable since 2.39\nBelow we go over each argument in more detail.\n\n\n\nThe first step to use the embedded Laplace approximation is to write down a function in the functions block which returns the log likelihood \\(\\log p(y \\mid \\theta, \\phi)\\).\nThere are a few constraints on this function:\n\nThe function return type must be real.\nThe first argument must be the latent Gaussian variable \\(\\theta\\) and must have type vector.\nThe operations in the function must support higher-order automatic differentiation (AD). Most functions in Stan support higher-order AD. The exceptions are functions with specialized calls for reverse-mode AD, and these are higher-order functions (algebraic solvers, differential equation solvers, and integrators), the marginalization function for hidden Markov models (HMM) function, and the embedded Laplace approximation itself.\n\nThe base signature of the function is\nreal likelihood_function(vector theta, ...)\nThe ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter.\nThe tuple after likelihood_function contains the arguments that get passed to likelihood_function excluding \\(\\theta\\). For instance, if a user defined likelihood uses a real and a matrix, the likelihood function’s signature would first have a vector and then a real and matrix argument.\nreal likelihood_fun(vector theta, real a, matrix X)\nThe call to the laplace marginal would start with this likelihood and tuple holding the other likelihood arguments. We do not need to pass theta, since it is marginalized out and therefore does not appear explicitly as a passed parameter.\nreal val = laplace_marginal(likelihood_fun, (a, X), hessian_block_size, ...);\nIf the likelihood_function has only one argument, the tuple syntax is (a, ).\nAs always, users should use parameter arguments only when necessary in order to speed up differentiation. In general, we recommend marking data only arguments with the keyword data, for example,\nreal likelihood_function(vector theta, data vector x, ...)\nIn addition to the likelihood function, users must specify the block size of the Hessian, \\(\\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial \\theta^2\\). The Hessian is often block diagonal and this structure can be taken advantage of for fast computation. For example, if \\(y_i\\) only depends on \\(\\theta_i\\), then the Hessian is diagonal and hessian_block_size=1,\nreal val = laplace_marginal(likelihood_fun, (a, X), 1, ...);\nOn the other hand, if the Hessian is not block diagonal, we can always set hessian_block_size=n where \\(n\\) is the size of \\(\\theta\\).\n\n\n\nThe argument covariance_function returns the prior covariance matrix \\(K\\). The signature for this function is the same as a standard stan function. It’s return type must be a matrix of size \\(n \\times n\\) where \\(n\\) is the size of \\(\\theta\\).\nmatrix covariance_function(...)\nThe ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter. The variables \\(\\phi\\) is implicitly defined as the collection of all non-data arguments passed to likelihood_function (excluding \\(\\theta\\)) and covariance_function.\nThe tuple after covariance_function contains the arguments that get passed to covariance_function. For instance, if a user defined covariance function uses two vectors\nmatrix cov_fun(real b, matrix Z)\nthe call to the Laplace marginal would include the covariance function and a tuple holding the covariance function arguments.\nreal val = laplace_marginal(likelihood_fun, (a, X), cov_fun, (b, Z), ...);\nIf the covariance_function has only one argument, the tuple syntax is (b, ).\n\n\n\nIt also possible to specify control parameters, which can help improve the optimization that underlies the Laplace approximation, using laplace_marginal_tol with the following signature:\n \n\nreal laplace_marginal_tol(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances)\nThe final argument, tolerances, is a tuple with the following elements\ntuple(vector theta_init, real tol, int max_steps, int solver,\n int max_steps_linesearch, int allow_fallback)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) and allows the user to tune the control parameters of the approximation.\n\ntheta_init: the initial guess for a Newton solver when finding the mode of \\(p(\\theta \\mid y, \\phi)\\). By default, it is a zero-vector.\ntol: the tolerance \\(\\epsilon\\) of the optimizer. Specifically, the optimizer stops when \\(||\\nabla \\log p(\\theta \\mid y, \\phi)|| \\le \\epsilon\\). By default, the value is \\(\\epsilon \\approx 1.49 \\times 10^{-8}\\), which is the square-root of machine precision.\nmax_num_steps: the maximum number of steps taken by the optimizer before it gives up (in which case the Metropolis proposal gets rejected). The default is 500 steps.\nsolver: choice of Newton solver. The optimizer underlying the Laplace approximation does one of three matrix decompositions to compute a Newton step. The problem determines which decomposition is numerically stable. By default (solver=1), the solver attempts a Cholesky decomposition of the negative Hessian of the log likelihood, \\(- \\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial^2 \\theta\\). This operation is legal if the negative Hessian is positive-definite, which will always be true when the likelihood as function of \\(\\theta\\) is log concave. If solver=2, the solver makes a Cholesky decomposition of the covariance matrix \\(K(\\phi)\\). Since a covariance matrix is always positive-definite, computing its Cholesky decomposition is always a legal operation, at least in theory. In practice, we may not be able to compute the Cholesky decomposition of the negative Hessian nor of the covariance matrix, either because it does not exist or because of numerical issues. In that case, we can use solver=3 which uses a more expensive but less specialized approach to compute a Newton step.\nmax_steps_linesearch: maximum number of steps in linesearch. The linesearch adjusts to step size to ensure that a Newton step leads to an increase in the objective function (i.e., \\(f(\\theta) = p(\\theta \\mid \\phi, y)\\)). If a standard Newton step does not improve the objective function, the step is adjusted iteratively until the objective function increases or the maximum number of steps in the linesearch is reached. By default, max_steps_linesearch=1000. Setting max_steps_linesearch=0 results in no linesearch.\nallow_fallback: If user set solver fails, this flag determines whether to fallback to the next solver. For example, if the user specifies solver=1 but the Cholesky decomposition of the negative Hessian \\(- \\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial^2 \\theta\\) fails, the optimizer will try solver=2 instead. By default, allow_fallback = 1 (TRUE).\n\nAvailable since 2.39\nThe embedded Laplace approximation’s options have a helper callable generate_laplace_options(int theta_size) that will generate the tuple for the user. This can be useful for quickly setting up the control parameters in the transformed data block to reuse within the model.\ntuple(vector[theta_size], real, int, int, int, int, int) laplace_ops = generate_laplace_options(theta_size);\n// Modify solver type\nlaplace_ops.5 = 2;\n// Turn off fallthrough\nlaplace_ops.7 = 0;\n \n\ntuple(vector, real, int, int, int, int) generate_laplace_options(int dimension)\nCreate a default laplace options tuple for a theta_init of size dimension.\nAvailable since 2.39\n \n\ntuple(vector, real, int, int, int, int) generate_laplace_options(vector theta_init)\nCreate a default Laplace options tuple containing theta_init.\nAvailable since 2.39\n\n\n\nIn generated quantities, it is possible to sample from the Laplace approximation of \\(p(\\theta \\mid \\phi, y)\\) using laplace_latent_rng. The signature for laplace_latent_rng follows closely the signature for laplace_marginal:\n \n\nvector laplace_latent_rng(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nSamples from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\).\nAvailable since 2.39\nOnce again, it is possible to specify control parameters:\n \n\nvector laplace_latent_tol_rng(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances) Samples from the approximate conditional posterior \\(p(\\theta \\mid y, \\phi)\\) and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n\n\n\nStan provides convenient wrappers for the embedded Laplace approximation when applied to latent Gaussian models with certain likelihoods arising from some common data models. With this wrapper, the likelihood is pre-specified and does not need to be specified by the user. The selection of supported likelihoods is currently narrow and expected to grow. The wrappers exist for the user’s convenience but are not more computationally efficient than specifying log likelihoods in the functions block.\n\n\nGiven count data, with each observed count \\(y_i\\) associated with a group \\(g(i)\\) and a corresponding latent variable \\(\\theta_{g(i)}\\), and a Poisson model, the likelihood is \\[\np(y \\mid \\theta, \\phi) = \\prod_i\\text{Poisson} (y_i \\mid \\exp(\\theta_{g(i)} + m_{g(i)})),\n\\] where \\(m_{g(i)}\\) acts as an offset for \\(\\theta_{g(i)}\\). This can also be interpreted as a prior mean on \\(\\theta_{g(i)}\\). The arguments required to compute this likelihood are:\n\ny: an array of counts.\ny_index: an array whose \\(i^\\text{th}\\) element indicates to which group the \\(i^\\text{th}\\) observation belongs to.\nm: a vector of offsets or prior means for \\(\\theta\\).\n\n \n\ny ~ laplace_marginal_poisson_log(y_index, m, hessian_block_size, covariance_function, covariance_arguments)\nIncrement target log probability density with laplace_marginal_poisson_log_lupmf(y | y_index, m, hessian_block_size, covariance_function, covariance_arguments.\nAvailable since 2.39\n \n\ny ~ laplace_marginal_tol_poisson_log(y_index, m, hessian_block_size, covariance_function, covariance_arguments, tolerances)\nIncrement target log probability density with laplace_marginal_tol_poisson_log_lupmf(y | y_index, m, hessian_block_size, covariance_function, covaraince_arguments, tolerances).\nAvailable since 2.39\nThe signatures for the embedded Laplace approximation function with a Poisson likelihood are\n \n real laplace_marginal_poisson_log_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link.\nAvailable since 2.39\n \n real laplace_marginal_tol_poisson_log_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link, and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n \n real laplace_marginal_poisson_log_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link, dropping constant terms.\nAvailable since 2.39\n \n real laplace_marginal_tol_poisson_log_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link, and allows the user to tune the control parameters of the approximation, dropping constant terms.\nAvailable since 2.39\n \n vector laplace_latent_poisson_log_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) Returns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link.\nAvailable since 2.39\n \n\nvector laplace_latent_tol_poisson_log_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n\n\n\nThe negative Binomial distribution generalizes the Poisson distribution by introducing the dispersion parameter \\(\\eta\\). The corresponding likelihood is then \\[\np(y \\mid \\theta, \\phi) = \\prod_i\\text{NegBinomial2} (y_i \\mid \\exp(\\theta_{g(i)} + m_{g(i)}), \\eta).\n\\] Here we use the alternative parameterization implemented in Stan, meaning that \\[\n\\mathbb E(y_i) = \\exp (\\theta_{g(i)} + m_{g(i)}), \\\\\n\\text{Var}(y_i) = \\mathbb E(y_i) + \\frac{(\\mathbb E(y_i))^2}{\\eta}.\n\\] The arguments for the likelihood function are:\n\ny: the observed counts\ny_index: an array whose \\(i^\\text{th}\\) element indicates to which group the \\(i^\\text{th}\\) observation belongs to.\neta: the overdispersion parameter.\nm: a vector of offsets or prior means for \\(\\theta\\).\n\n \n\ny ~ laplace_marginal_neg_binomial_2_log(y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments)\nIncrement target log probability density with laplace_marginal_neg_binomial_2_log_lupmf(y | y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments).\nAvailable since 2.39\n \n\ny ~ laplace_marginal_tol_neg_binomial_2_log(y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments, tolerances)\nIncrement target log probability density with laplace_marginal_tol_neg_binomial_2_log_lupmf(y | y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments, tolerances).\nAvailable since 2.39\nThe function signatures for the embedded Laplace approximation with a negative Binomial likelihood are\n \n\nreal laplace_marginal_neg_binomial_2_log_lpmf(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_neg_binomial_2_log_lpmf(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link, and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n \n\nreal laplace_marginal_neg_binomial_2_log_lupmf(array[] int y | array[] int y_index, real eta, vector m, function covariance_function, data int hessian_block_size, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link, dropping constant terms.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_neg_binomial_2_log_lupmf(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link, and allows the user to tune the control parameters of the approximation, dropping constant terms.\nAvailable since 2.39\n \n\nvector laplace_latent_neg_binomial_2_log_rng(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative binomial distribution with a log link.\nAvailable since 2.39\n \n\nvector laplace_latent_tol_neg_binomial_2_log_rng(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative binomial distribution with a log link and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n\n\n\nGiven binary outcome \\(y_i \\in \\{0, 1\\}\\) and Bernoulli model, the likelihood is \\[\np(y \\mid \\theta, \\phi) = \\prod_i\\text{Bernoulli} (y_i \\mid \\text{logit}^{-1}(\\theta_{g(i)} + m_{g(i)})).\n\\] The arguments of the likelihood function are:\n\ny: the observed counts\ny_index: an array whose \\(i^\\text{th}\\) element indicates to which group the \\(i^\\text{th}\\) observation belongs to.\nm: a vector of offsets or prior means for \\(\\theta\\).\n\n \n\ny ~ laplace_marginal_bernoulli_logit(y_index, m, hessian_block_size, covariance_function, covariance_arguments)\nIncrement target log probability density with laplace_marginal_bernoulli_logit_lupmf(y | y_index, m, hessian_block_size, covariance_function, covariance_arguments).\nAvailable since 2.39\n \n\ny ~ laplace_marginal_tol_bernoulli_logit(y_index, m, hessian_block_size, covariance_function, covariance_arguments, tolerances)\nIncrement target log probability density with laplace_marginal_tol_bernoulli_logit_lupmf(y | y_index, m, hessian_block_size, covariance_function, covariance_arguments, tolerances).\nAvailable since 2.39\nThe function signatures for the embedded Laplace approximation with a Bernoulli likelihood are\n \n\nreal laplace_marginal_bernoulli_logit_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_bernoulli_logit_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link and allows the user to tune the control parameters.\nAvailable since 2.39\n \n\nreal laplace_marginal_bernoulli_logit_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link, dropping constant terms.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_bernoulli_logit_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link and allows the user to tune the control parameters, dropping constant terms.\nAvailable since 2.39\n \n\nvector laplace_latent_bernoulli_logit_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Bernoulli distribution with a logit link.\nAvailable since 2.39\n \n\nvector laplace_latent_tol_bernoulli_logit_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Bernoulli distribution with a logit link, and lets the user tune the control parameters of the approximation.\nAvailable since 2.39", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html#approximating-the-log-marginal-likelihood-log-py-mid-phi", + "href": "functions-reference/embedded_laplace.html#approximating-the-log-marginal-likelihood-log-py-mid-phi", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "In the model block, we increment target with laplace_marginal, a function that approximates the log marginal likelihood \\(\\log p(y \\mid \\phi)\\). The signature of the function is:\n \n\nreal laplace_marginal(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nwhich returns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\).\nThe embedded Laplace functions accept two functors whose user defined arguments are passed in as tuples to laplace_marginal.\n\nlikelihood_function - user-specified log likelihood whose first argument is the vector of latent Gaussian variables \\(\\theta\\). The subsequent arguments are user defined.\n\n\nreal likelihood_function(vector theta, likelihood_arguments_1, likelihood_arguments_2, ...).\n\n\nlikelihood_arguments - A tuple of arguments whose internal members are be passed to the log likelihood function. This tuple does NOT include the latent variable \\(\\theta\\).\nhessian_block_size - the block size of the Hessian of the log likelihood, \\(\\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial \\theta^2\\).\ncovariance_function - A function that returns the covariance matrix of the multivariate normal prior on \\(\\theta\\).\n\n\nmatrix covariance_function(covariance_argument_1, covariance_argument_2, ...).\n\n\ncovariance_arguments A tuple of the arguments whose internal members will be passed to the the covariance function.\n\nAvailable since 2.39\nBelow we go over each argument in more detail.", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html#laplace-likelihood_spec", + "href": "functions-reference/embedded_laplace.html#laplace-likelihood_spec", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The first step to use the embedded Laplace approximation is to write down a function in the functions block which returns the log likelihood \\(\\log p(y \\mid \\theta, \\phi)\\).\nThere are a few constraints on this function:\n\nThe function return type must be real.\nThe first argument must be the latent Gaussian variable \\(\\theta\\) and must have type vector.\nThe operations in the function must support higher-order automatic differentiation (AD). Most functions in Stan support higher-order AD. The exceptions are functions with specialized calls for reverse-mode AD, and these are higher-order functions (algebraic solvers, differential equation solvers, and integrators), the marginalization function for hidden Markov models (HMM) function, and the embedded Laplace approximation itself.\n\nThe base signature of the function is\nreal likelihood_function(vector theta, ...)\nThe ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter.\nThe tuple after likelihood_function contains the arguments that get passed to likelihood_function excluding \\(\\theta\\). For instance, if a user defined likelihood uses a real and a matrix, the likelihood function’s signature would first have a vector and then a real and matrix argument.\nreal likelihood_fun(vector theta, real a, matrix X)\nThe call to the laplace marginal would start with this likelihood and tuple holding the other likelihood arguments. We do not need to pass theta, since it is marginalized out and therefore does not appear explicitly as a passed parameter.\nreal val = laplace_marginal(likelihood_fun, (a, X), hessian_block_size, ...);\nIf the likelihood_function has only one argument, the tuple syntax is (a, ).\nAs always, users should use parameter arguments only when necessary in order to speed up differentiation. In general, we recommend marking data only arguments with the keyword data, for example,\nreal likelihood_function(vector theta, data vector x, ...)\nIn addition to the likelihood function, users must specify the block size of the Hessian, \\(\\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial \\theta^2\\). The Hessian is often block diagonal and this structure can be taken advantage of for fast computation. For example, if \\(y_i\\) only depends on \\(\\theta_i\\), then the Hessian is diagonal and hessian_block_size=1,\nreal val = laplace_marginal(likelihood_fun, (a, X), 1, ...);\nOn the other hand, if the Hessian is not block diagonal, we can always set hessian_block_size=n where \\(n\\) is the size of \\(\\theta\\).", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html#specifying-the-covariance-function", + "href": "functions-reference/embedded_laplace.html#specifying-the-covariance-function", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "The argument covariance_function returns the prior covariance matrix \\(K\\). The signature for this function is the same as a standard stan function. It’s return type must be a matrix of size \\(n \\times n\\) where \\(n\\) is the size of \\(\\theta\\).\nmatrix covariance_function(...)\nThe ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter. The variables \\(\\phi\\) is implicitly defined as the collection of all non-data arguments passed to likelihood_function (excluding \\(\\theta\\)) and covariance_function.\nThe tuple after covariance_function contains the arguments that get passed to covariance_function. For instance, if a user defined covariance function uses two vectors\nmatrix cov_fun(real b, matrix Z)\nthe call to the Laplace marginal would include the covariance function and a tuple holding the covariance function arguments.\nreal val = laplace_marginal(likelihood_fun, (a, X), cov_fun, (b, Z), ...);\nIf the covariance_function has only one argument, the tuple syntax is (b, ).", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html#control-parameters", + "href": "functions-reference/embedded_laplace.html#control-parameters", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "It also possible to specify control parameters, which can help improve the optimization that underlies the Laplace approximation, using laplace_marginal_tol with the following signature:\n \n\nreal laplace_marginal_tol(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances)\nThe final argument, tolerances, is a tuple with the following elements\ntuple(vector theta_init, real tol, int max_steps, int solver,\n int max_steps_linesearch, int allow_fallback)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) and allows the user to tune the control parameters of the approximation.\n\ntheta_init: the initial guess for a Newton solver when finding the mode of \\(p(\\theta \\mid y, \\phi)\\). By default, it is a zero-vector.\ntol: the tolerance \\(\\epsilon\\) of the optimizer. Specifically, the optimizer stops when \\(||\\nabla \\log p(\\theta \\mid y, \\phi)|| \\le \\epsilon\\). By default, the value is \\(\\epsilon \\approx 1.49 \\times 10^{-8}\\), which is the square-root of machine precision.\nmax_num_steps: the maximum number of steps taken by the optimizer before it gives up (in which case the Metropolis proposal gets rejected). The default is 500 steps.\nsolver: choice of Newton solver. The optimizer underlying the Laplace approximation does one of three matrix decompositions to compute a Newton step. The problem determines which decomposition is numerically stable. By default (solver=1), the solver attempts a Cholesky decomposition of the negative Hessian of the log likelihood, \\(- \\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial^2 \\theta\\). This operation is legal if the negative Hessian is positive-definite, which will always be true when the likelihood as function of \\(\\theta\\) is log concave. If solver=2, the solver makes a Cholesky decomposition of the covariance matrix \\(K(\\phi)\\). Since a covariance matrix is always positive-definite, computing its Cholesky decomposition is always a legal operation, at least in theory. In practice, we may not be able to compute the Cholesky decomposition of the negative Hessian nor of the covariance matrix, either because it does not exist or because of numerical issues. In that case, we can use solver=3 which uses a more expensive but less specialized approach to compute a Newton step.\nmax_steps_linesearch: maximum number of steps in linesearch. The linesearch adjusts to step size to ensure that a Newton step leads to an increase in the objective function (i.e., \\(f(\\theta) = p(\\theta \\mid \\phi, y)\\)). If a standard Newton step does not improve the objective function, the step is adjusted iteratively until the objective function increases or the maximum number of steps in the linesearch is reached. By default, max_steps_linesearch=1000. Setting max_steps_linesearch=0 results in no linesearch.\nallow_fallback: If user set solver fails, this flag determines whether to fallback to the next solver. For example, if the user specifies solver=1 but the Cholesky decomposition of the negative Hessian \\(- \\partial^2 \\log p(y \\mid \\theta, \\phi) / \\partial^2 \\theta\\) fails, the optimizer will try solver=2 instead. By default, allow_fallback = 1 (TRUE).\n\nAvailable since 2.39\nThe embedded Laplace approximation’s options have a helper callable generate_laplace_options(int theta_size) that will generate the tuple for the user. This can be useful for quickly setting up the control parameters in the transformed data block to reuse within the model.\ntuple(vector[theta_size], real, int, int, int, int, int) laplace_ops = generate_laplace_options(theta_size);\n// Modify solver type\nlaplace_ops.5 = 2;\n// Turn off fallthrough\nlaplace_ops.7 = 0;\n \n\ntuple(vector, real, int, int, int, int) generate_laplace_options(int dimension)\nCreate a default laplace options tuple for a theta_init of size dimension.\nAvailable since 2.39\n \n\ntuple(vector, real, int, int, int, int) generate_laplace_options(vector theta_init)\nCreate a default Laplace options tuple containing theta_init.\nAvailable since 2.39", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html#sample-from-the-approximate-conditional-hatptheta-mid-y-phi", + "href": "functions-reference/embedded_laplace.html#sample-from-the-approximate-conditional-hatptheta-mid-y-phi", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "In generated quantities, it is possible to sample from the Laplace approximation of \\(p(\\theta \\mid \\phi, y)\\) using laplace_latent_rng. The signature for laplace_latent_rng follows closely the signature for laplace_marginal:\n \n\nvector laplace_latent_rng(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nSamples from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\).\nAvailable since 2.39\nOnce again, it is possible to specify control parameters:\n \n\nvector laplace_latent_tol_rng(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances) Samples from the approximate conditional posterior \\(p(\\theta \\mid y, \\phi)\\) and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/embedded_laplace.html#built-in-laplace-marginal-likelihood-functions", + "href": "functions-reference/embedded_laplace.html#built-in-laplace-marginal-likelihood-functions", + "title": "Embedded Laplace Approximation", + "section": "", + "text": "Stan provides convenient wrappers for the embedded Laplace approximation when applied to latent Gaussian models with certain likelihoods arising from some common data models. With this wrapper, the likelihood is pre-specified and does not need to be specified by the user. The selection of supported likelihoods is currently narrow and expected to grow. The wrappers exist for the user’s convenience but are not more computationally efficient than specifying log likelihoods in the functions block.\n\n\nGiven count data, with each observed count \\(y_i\\) associated with a group \\(g(i)\\) and a corresponding latent variable \\(\\theta_{g(i)}\\), and a Poisson model, the likelihood is \\[\np(y \\mid \\theta, \\phi) = \\prod_i\\text{Poisson} (y_i \\mid \\exp(\\theta_{g(i)} + m_{g(i)})),\n\\] where \\(m_{g(i)}\\) acts as an offset for \\(\\theta_{g(i)}\\). This can also be interpreted as a prior mean on \\(\\theta_{g(i)}\\). The arguments required to compute this likelihood are:\n\ny: an array of counts.\ny_index: an array whose \\(i^\\text{th}\\) element indicates to which group the \\(i^\\text{th}\\) observation belongs to.\nm: a vector of offsets or prior means for \\(\\theta\\).\n\n \n\ny ~ laplace_marginal_poisson_log(y_index, m, hessian_block_size, covariance_function, covariance_arguments)\nIncrement target log probability density with laplace_marginal_poisson_log_lupmf(y | y_index, m, hessian_block_size, covariance_function, covariance_arguments.\nAvailable since 2.39\n \n\ny ~ laplace_marginal_tol_poisson_log(y_index, m, hessian_block_size, covariance_function, covariance_arguments, tolerances)\nIncrement target log probability density with laplace_marginal_tol_poisson_log_lupmf(y | y_index, m, hessian_block_size, covariance_function, covaraince_arguments, tolerances).\nAvailable since 2.39\nThe signatures for the embedded Laplace approximation function with a Poisson likelihood are\n \n real laplace_marginal_poisson_log_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link.\nAvailable since 2.39\n \n real laplace_marginal_tol_poisson_log_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link, and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n \n real laplace_marginal_poisson_log_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link, dropping constant terms.\nAvailable since 2.39\n \n real laplace_marginal_tol_poisson_log_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link, and allows the user to tune the control parameters of the approximation, dropping constant terms.\nAvailable since 2.39\n \n vector laplace_latent_poisson_log_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) Returns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link.\nAvailable since 2.39\n \n\nvector laplace_latent_tol_poisson_log_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Poisson distribution with a log link and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n\n\n\nThe negative Binomial distribution generalizes the Poisson distribution by introducing the dispersion parameter \\(\\eta\\). The corresponding likelihood is then \\[\np(y \\mid \\theta, \\phi) = \\prod_i\\text{NegBinomial2} (y_i \\mid \\exp(\\theta_{g(i)} + m_{g(i)}), \\eta).\n\\] Here we use the alternative parameterization implemented in Stan, meaning that \\[\n\\mathbb E(y_i) = \\exp (\\theta_{g(i)} + m_{g(i)}), \\\\\n\\text{Var}(y_i) = \\mathbb E(y_i) + \\frac{(\\mathbb E(y_i))^2}{\\eta}.\n\\] The arguments for the likelihood function are:\n\ny: the observed counts\ny_index: an array whose \\(i^\\text{th}\\) element indicates to which group the \\(i^\\text{th}\\) observation belongs to.\neta: the overdispersion parameter.\nm: a vector of offsets or prior means for \\(\\theta\\).\n\n \n\ny ~ laplace_marginal_neg_binomial_2_log(y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments)\nIncrement target log probability density with laplace_marginal_neg_binomial_2_log_lupmf(y | y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments).\nAvailable since 2.39\n \n\ny ~ laplace_marginal_tol_neg_binomial_2_log(y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments, tolerances)\nIncrement target log probability density with laplace_marginal_tol_neg_binomial_2_log_lupmf(y | y_index, eta, m, hessian_block_size, covariance_function, covariance_arguments, tolerances).\nAvailable since 2.39\nThe function signatures for the embedded Laplace approximation with a negative Binomial likelihood are\n \n\nreal laplace_marginal_neg_binomial_2_log_lpmf(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_neg_binomial_2_log_lpmf(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link, and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n \n\nreal laplace_marginal_neg_binomial_2_log_lupmf(array[] int y | array[] int y_index, real eta, vector m, function covariance_function, data int hessian_block_size, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link, dropping constant terms.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_neg_binomial_2_log_lupmf(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative Binomial distribution with a log link, and allows the user to tune the control parameters of the approximation, dropping constant terms.\nAvailable since 2.39\n \n\nvector laplace_latent_neg_binomial_2_log_rng(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative binomial distribution with a log link.\nAvailable since 2.39\n \n\nvector laplace_latent_tol_neg_binomial_2_log_rng(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi, \\eta)\\) in the special case where the likelihood \\(p(y \\mid \\theta, \\eta)\\) is a Negative binomial distribution with a log link and allows the user to tune the control parameters of the approximation.\nAvailable since 2.39\n\n\n\nGiven binary outcome \\(y_i \\in \\{0, 1\\}\\) and Bernoulli model, the likelihood is \\[\np(y \\mid \\theta, \\phi) = \\prod_i\\text{Bernoulli} (y_i \\mid \\text{logit}^{-1}(\\theta_{g(i)} + m_{g(i)})).\n\\] The arguments of the likelihood function are:\n\ny: the observed counts\ny_index: an array whose \\(i^\\text{th}\\) element indicates to which group the \\(i^\\text{th}\\) observation belongs to.\nm: a vector of offsets or prior means for \\(\\theta\\).\n\n \n\ny ~ laplace_marginal_bernoulli_logit(y_index, m, hessian_block_size, covariance_function, covariance_arguments)\nIncrement target log probability density with laplace_marginal_bernoulli_logit_lupmf(y | y_index, m, hessian_block_size, covariance_function, covariance_arguments).\nAvailable since 2.39\n \n\ny ~ laplace_marginal_tol_bernoulli_logit(y_index, m, hessian_block_size, covariance_function, covariance_arguments, tolerances)\nIncrement target log probability density with laplace_marginal_tol_bernoulli_logit_lupmf(y | y_index, m, hessian_block_size, covariance_function, covariance_arguments, tolerances).\nAvailable since 2.39\nThe function signatures for the embedded Laplace approximation with a Bernoulli likelihood are\n \n\nreal laplace_marginal_bernoulli_logit_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_bernoulli_logit_lpmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link and allows the user to tune the control parameters.\nAvailable since 2.39\n \n\nreal laplace_marginal_bernoulli_logit_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link, dropping constant terms.\nAvailable since 2.39\n \n\nreal laplace_marginal_tol_bernoulli_logit_lupmf(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns an approximation to the log marginal likelihood \\(p(y \\mid \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a bernoulli distribution with a logit link and allows the user to tune the control parameters, dropping constant terms.\nAvailable since 2.39\n \n\nvector laplace_latent_bernoulli_logit_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Bernoulli distribution with a logit link.\nAvailable since 2.39\n \n\nvector laplace_latent_tol_bernoulli_logit_rng(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances)\nReturns a draw from the Laplace approximation to the conditional posterior \\(p(\\theta \\mid y, \\phi)\\) in the special case where the likelihood \\(p(y \\mid \\theta)\\) is a Bernoulli distribution with a logit link, and lets the user tune the control parameters of the approximation.\nAvailable since 2.39", + "crumbs": [ + "Functions Reference", + "Additional Distributions", + "Embedded Laplace Approximation" + ] + }, + { + "objectID": "functions-reference/deprecated_functions.html", + "href": "functions-reference/deprecated_functions.html", + "title": "Deprecated Functions", + "section": "", + "text": "This appendix lists currently deprecated functionality along with how to replace it.\nStarting in Stan 2.29, deprecated functions with drop in replacements (such as the renaming of get_lp or multiply_log) will be removed 3 versions later e.g., functions deprecated in Stan 2.20 will be removed in Stan 2.23 and placed in Removed Functions. The Stan compiler can automatically update these on the behalf of the user for the entire deprecation window and at least one version following the removal.\n\n\nDeprecated: Using / with two integer arguments is interpreted as integer floor division, such that\n\\[ 1 / 2 = 0 \\]\nThis is deprecated due to its confusion with real-valued division, where\n\\[ 1.0 / 2.0 = 0.5 \\]\nReplacement: Use the integer division operator operator%/% instead.\n\n\n\nThese ODE integrator functions have been replaced by those described in Ordinary Differential Equation (ODE) Solvers.\n\n\nA system of ODEs is specified as an ordinary function in Stan within the functions block. The ODE system function must have this function signature:\narray[] real ode(real time, array[] real state, array[] real theta,\n array[] real x_r, array[] int x_i);\nThe ODE system function should return the derivative of the state with respect to time at the time provided. The length of the returned real array must match the length of the state input into the function.\nThe arguments to this function are:\n\ntime, the time to evaluate the ODE system\nstate, the state of the ODE system at the time specified\ntheta, parameter values used to evaluate the ODE system\nx_r, data values used to evaluate the ODE system\nx_i, integer data values used to evaluate the ODE system.\n\nThe ODE system function separates parameter values, theta, from data values, x_r, for efficiency in computing the gradients of the ODE.\n\n\n\n \n\narray[,] real integrate_ode_rk45(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode_rk45(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method with additional control parameters for the solver.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode_adams(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i) Solves the ODE system for the times provided using the Adams-Moulton method.\nAvailable since 2.23, deprecated in 2.24\n \n\narray[,] real integrate_ode_adams(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i, data real rel_tol, data real abs_tol, data int max_num_steps) Solves the ODE system for the times provided using the Adams-Moulton method with additional control parameters for the solver.\nAvailable since 2.23, deprecated in 2.24\n\n\n\n \n\narray[,] real integrate_ode_bdf(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode_bdf(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i, data real rel_tol, data real abs_tol, data int max_num_steps) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.\nAvailable since 2.10, deprecated in 2.24\n\n\n\nThe arguments to the ODE solvers in both the stiff and non-stiff cases are as follows.\n\node: function literal referring to a function specifying the system of differential equations with signature:\n\n(real, array[] real, array[] real, data array[] real, data array[] int):array[] real\nThe arguments represent (1) time, (2) system state, (3) parameters, (4) real data, and (5) integer data, and the return value contains the derivatives with respect to time of the state,\n\ninitial_state: initial state, type array[] real,\ninitial_time: initial time, type int or real,\ntimes: solution times, type array[] real,\ntheta: parameters, type array[] real,\ndata x_r: real data, type array[] real, data only, and\ndata x_i: integer data, type array[] int, data only.\n\nFor more fine-grained control of the ODE solvers, these parameters can also be provided:\n\ndata rel_tol: relative tolerance for the ODE solver, type real, data only,\ndata abs_tol: absolute tolerance for the ODE solver, type real, data only, and\ndata max_num_steps: maximum number of steps to take in the ODE solver, type int, data only.\n\n\n\nThe return value for the ODE solvers is an array of type array[,] real, with values consisting of solutions at the specified times.\n\n\n\nThe sizes must match, and in particular, the following groups are of the same size:\n\nstate variables passed into the system function, derivatives returned by the system function, initial state passed into the solver, and rows of the return value of the solver,\nsolution times and number of rows of the return value of the solver,\nparameters, real data and integer data passed to the solver will be passed to the system function\n\n\n\n\n\n\nThese algebraic solver functions have been replaced by those described in Algebraic Equation Solvers..\n\n\nAn algebraic system is specified as an ordinary function in Stan within the function block. The algebraic system function must have this signature:\n vector algebra_system(vector y, vector theta,\n data array[] real x_r, array[] int x_i)\nThe algebraic system function should return the value of the algebraic function which goes to 0, when we plug in the solution to the algebraic system.\nThe argument of this function are:\n\ny, the unknowns we wish to solve for\ntheta, parameter values used to evaluate the algebraic system\nx_r, data values used to evaluate the algebraic system\nx_i, integer data used to evaluate the algebraic system\n\nThe algebraic system function separates parameter values, theta, from data values, x_r, for efficiency in propagating the derivatives through the algebraic system.\n\n\n\nvector algebra_solver(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i) Solves the algebraic system, given an initial guess, using the Powell hybrid algorithm.\nAvailable since 2.17, deprecated in 2.31\n \n\nvector algebra_solver(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) Solves the algebraic system, given an initial guess, using the Powell hybrid algorithm with additional control parameters for the solver.\nAvailable since 2.17, deprecated in 2.31\nNote: In future releases, the function algebra_solver will be deprecated and replaced with algebra_solver_powell.\n \n\nvector algebra_solver_newton(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i) Solves the algebraic system, given an initial guess, using Newton’s method.\nAvailable since 2.24, deprecated in 2.31\n \n\nvector algebra_solver_newton(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) Solves the algebraic system, given an initial guess, using Newton’s method with additional control parameters for the solver.\nAvailable since 2.24, deprecated in 2.31\n\n\nThe arguments to the algebraic solvers are as follows:\n\nalgebra_system: function literal referring to a function specifying the system of algebraic equations with signature (vector, vector, array[] real, array[] int):vector. The arguments represent (1) unknowns, (2) parameters, (3) real data, and (4) integer data, and the return value contains the value of the algebraic function, which goes to 0 when we plug in the solution to the algebraic system,\ny_guess: initial guess for the solution, type vector,\ntheta: parameters only, type vector,\nx_r: real data only, type array[] real, and\nx_i: integer data only, type array[] int.\n\nFor more fine-grained control of the algebraic solver, these parameters can also be provided:\n\nrel_tol: relative tolerance for the algebraic solver, type real, data only,\nfunction_tol: function tolerance for the algebraic solver, type real, data only,\nmax_num_steps: maximum number of steps to take in the algebraic solver, type int, data only.\n\n\n\n\nThe return value for the algebraic solver is an object of type vector, with values which, when plugged in as y make the algebraic function go to 0.\n\n\n\nCertain sizes have to be consistent. The initial guess, return value of the solver, and return value of the algebraic function must all be the same size.\nThe parameters, real data, and integer data will be passed from the solver directly to the system function.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Deprecated Functions" + ] + }, + { + "objectID": "functions-reference/deprecated_functions.html#integer-division-with-operator", + "href": "functions-reference/deprecated_functions.html#integer-division-with-operator", + "title": "Deprecated Functions", + "section": "", + "text": "Deprecated: Using / with two integer arguments is interpreted as integer floor division, such that\n\\[ 1 / 2 = 0 \\]\nThis is deprecated due to its confusion with real-valued division, where\n\\[ 1.0 / 2.0 = 0.5 \\]\nReplacement: Use the integer division operator operator%/% instead.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Deprecated Functions" + ] + }, + { + "objectID": "functions-reference/deprecated_functions.html#functions-old-ode-solver", + "href": "functions-reference/deprecated_functions.html#functions-old-ode-solver", + "title": "Deprecated Functions", + "section": "", + "text": "These ODE integrator functions have been replaced by those described in Ordinary Differential Equation (ODE) Solvers.\n\n\nA system of ODEs is specified as an ordinary function in Stan within the functions block. The ODE system function must have this function signature:\narray[] real ode(real time, array[] real state, array[] real theta,\n array[] real x_r, array[] int x_i);\nThe ODE system function should return the derivative of the state with respect to time at the time provided. The length of the returned real array must match the length of the state input into the function.\nThe arguments to this function are:\n\ntime, the time to evaluate the ODE system\nstate, the state of the ODE system at the time specified\ntheta, parameter values used to evaluate the ODE system\nx_r, data values used to evaluate the ODE system\nx_i, integer data values used to evaluate the ODE system.\n\nThe ODE system function separates parameter values, theta, from data values, x_r, for efficiency in computing the gradients of the ODE.\n\n\n\n \n\narray[,] real integrate_ode_rk45(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode_rk45(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method with additional control parameters for the solver.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode_adams(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i) Solves the ODE system for the times provided using the Adams-Moulton method.\nAvailable since 2.23, deprecated in 2.24\n \n\narray[,] real integrate_ode_adams(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i, data real rel_tol, data real abs_tol, data int max_num_steps) Solves the ODE system for the times provided using the Adams-Moulton method with additional control parameters for the solver.\nAvailable since 2.23, deprecated in 2.24\n\n\n\n \n\narray[,] real integrate_ode_bdf(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method.\nAvailable since 2.10, deprecated in 2.24\n \n\narray[,] real integrate_ode_bdf(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, data array[] real x_r, data array[] int x_i, data real rel_tol, data real abs_tol, data int max_num_steps) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.\nAvailable since 2.10, deprecated in 2.24\n\n\n\nThe arguments to the ODE solvers in both the stiff and non-stiff cases are as follows.\n\node: function literal referring to a function specifying the system of differential equations with signature:\n\n(real, array[] real, array[] real, data array[] real, data array[] int):array[] real\nThe arguments represent (1) time, (2) system state, (3) parameters, (4) real data, and (5) integer data, and the return value contains the derivatives with respect to time of the state,\n\ninitial_state: initial state, type array[] real,\ninitial_time: initial time, type int or real,\ntimes: solution times, type array[] real,\ntheta: parameters, type array[] real,\ndata x_r: real data, type array[] real, data only, and\ndata x_i: integer data, type array[] int, data only.\n\nFor more fine-grained control of the ODE solvers, these parameters can also be provided:\n\ndata rel_tol: relative tolerance for the ODE solver, type real, data only,\ndata abs_tol: absolute tolerance for the ODE solver, type real, data only, and\ndata max_num_steps: maximum number of steps to take in the ODE solver, type int, data only.\n\n\n\nThe return value for the ODE solvers is an array of type array[,] real, with values consisting of solutions at the specified times.\n\n\n\nThe sizes must match, and in particular, the following groups are of the same size:\n\nstate variables passed into the system function, derivatives returned by the system function, initial state passed into the solver, and rows of the return value of the solver,\nsolution times and number of rows of the return value of the solver,\nparameters, real data and integer data passed to the solver will be passed to the system function", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Deprecated Functions" + ] + }, + { + "objectID": "functions-reference/deprecated_functions.html#functions-old-algebra-solver", + "href": "functions-reference/deprecated_functions.html#functions-old-algebra-solver", + "title": "Deprecated Functions", + "section": "", + "text": "These algebraic solver functions have been replaced by those described in Algebraic Equation Solvers..\n\n\nAn algebraic system is specified as an ordinary function in Stan within the function block. The algebraic system function must have this signature:\n vector algebra_system(vector y, vector theta,\n data array[] real x_r, array[] int x_i)\nThe algebraic system function should return the value of the algebraic function which goes to 0, when we plug in the solution to the algebraic system.\nThe argument of this function are:\n\ny, the unknowns we wish to solve for\ntheta, parameter values used to evaluate the algebraic system\nx_r, data values used to evaluate the algebraic system\nx_i, integer data used to evaluate the algebraic system\n\nThe algebraic system function separates parameter values, theta, from data values, x_r, for efficiency in propagating the derivatives through the algebraic system.\n\n\n\nvector algebra_solver(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i) Solves the algebraic system, given an initial guess, using the Powell hybrid algorithm.\nAvailable since 2.17, deprecated in 2.31\n \n\nvector algebra_solver(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) Solves the algebraic system, given an initial guess, using the Powell hybrid algorithm with additional control parameters for the solver.\nAvailable since 2.17, deprecated in 2.31\nNote: In future releases, the function algebra_solver will be deprecated and replaced with algebra_solver_powell.\n \n\nvector algebra_solver_newton(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i) Solves the algebraic system, given an initial guess, using Newton’s method.\nAvailable since 2.24, deprecated in 2.31\n \n\nvector algebra_solver_newton(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) Solves the algebraic system, given an initial guess, using Newton’s method with additional control parameters for the solver.\nAvailable since 2.24, deprecated in 2.31\n\n\nThe arguments to the algebraic solvers are as follows:\n\nalgebra_system: function literal referring to a function specifying the system of algebraic equations with signature (vector, vector, array[] real, array[] int):vector. The arguments represent (1) unknowns, (2) parameters, (3) real data, and (4) integer data, and the return value contains the value of the algebraic function, which goes to 0 when we plug in the solution to the algebraic system,\ny_guess: initial guess for the solution, type vector,\ntheta: parameters only, type vector,\nx_r: real data only, type array[] real, and\nx_i: integer data only, type array[] int.\n\nFor more fine-grained control of the algebraic solver, these parameters can also be provided:\n\nrel_tol: relative tolerance for the algebraic solver, type real, data only,\nfunction_tol: function tolerance for the algebraic solver, type real, data only,\nmax_num_steps: maximum number of steps to take in the algebraic solver, type int, data only.\n\n\n\n\nThe return value for the algebraic solver is an object of type vector, with values which, when plugged in as y make the algebraic function go to 0.\n\n\n\nCertain sizes have to be consistent. The initial guess, return value of the solver, and return value of the algebraic function must all be the same size.\nThe parameters, real data, and integer data will be passed from the solver directly to the system function.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Deprecated Functions" + ] + }, + { + "objectID": "functions-reference/correlation_matrix_distributions.html", + "href": "functions-reference/correlation_matrix_distributions.html", + "title": "Correlation Matrix Distributions", + "section": "", + "text": "The correlation matrix distributions have support on the (Cholesky factors of) correlation matrices. A Cholesky factor \\(L\\) for a \\(K\n\\times K\\) correlation matrix \\(\\Sigma\\) of dimension \\(K\\) has rows of unit length so that the diagonal of \\(L L^{\\top}\\) is the unit \\(K\\)-vector. Even though models are usually conceptualized in terms of correlation matrices, it is better to operationalize them in terms of their Cholesky factors. If you are interested in the posterior distribution of the correlations, you can recover them in the generated quantities block via\n generated quantities {\n corr_matrix[K] Sigma;\n Sigma = multiply_lower_tri_self_transpose(L);\n }\n\n\n\n\nFor \\(\\eta > 0\\), if \\(\\Sigma\\) a positive-definite, symmetric matrix with unit diagonal (i.e., a correlation matrix), then \\[\\begin{equation*}\n\\text{LkjCorr}(\\Sigma|\\eta) \\propto \\det \\left( \\Sigma \\right)^{(\\eta\n- 1)}. \\end{equation*}\\] The expectation is the identity matrix for any positive value of the shape parameter \\(\\eta\\), which can be interpreted like the shape parameter of a symmetric beta distribution:\n\nif \\(\\eta = 1\\), then the density is uniform over correlation matrices of order \\(K\\);\nif \\(\\eta > 1\\), the identity matrix is the modal correlation matrix, with a sharper peak in the density at the identity matrix for larger \\(\\eta\\); and\nfor \\(0 < \\eta < 1\\), the density has a trough at the identity matrix.\nif \\(\\eta\\) were an unknown parameter, the Jeffreys prior is proportional to \\(\\sqrt{2\\sum_{k=1}^{K-1}\\left(\n\\psi_1\\left(\\eta+\\frac{K-k-1}{2}\\right) - 2\\psi_1\\left(2\\eta+K-k-1\n\\right)\\right)}\\), where \\(\\psi_1()\\) is the trigamma function\n\nSee (Lewandowski, Kurowicka, and Joe 2009) for definitions. However, it is much better computationally to work directly with the Cholesky factor of \\(\\Sigma\\), so this distribution should never be explicitly used in practice.\n\n\n\ny ~ lkj_corr(eta)\nIncrement target log probability density with lkj_corr_lupdf(y | eta).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal lkj_corr_lpdf(matrix y | real eta) The log of the LKJ density for the correlation matrix y given nonnegative shape eta. lkj_corr_cholesky_lpdf is faster, more numerically stable, uses less memory, and should be preferred to this.\nAvailable since 2.12\n \n\nreal lkj_corr_lupdf(matrix y | real eta) The log of the LKJ density for the correlation matrix y given nonnegative shape eta dropping constant additive terms. lkj_corr_cholesky_lupdf is faster, more numerically stable, uses less memory, and should be preferred to this.\nAvailable since 2.25\n \n\nmatrix lkj_corr_rng(int K, real eta) Generate a LKJ random correlation matrix of order K with shape eta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n\n\n\n\nStan provides an implicit parameterization of the LKJ correlation matrix density in terms of its Cholesky factor, which you should use rather than the explicit parameterization in the previous section. For example, if L is a Cholesky factor of a correlation matrix, then\n L ~ lkj_corr_cholesky(2.0); # implies L * L' ~ lkj_corr(2.0);\nBecause Stan requires models to have support on all valid constrained parameters, L will almost always1 be a parameter declared with the type of a Cholesky factor for a correlation matrix; for example,\n parameters { cholesky_factor_corr[K] L; # rather than corr_matrix[K] Sigma; // ...\n\n\nFor \\(\\eta > 0\\), if \\(L\\) is a \\(K \\times K\\) lower-triangular Cholesky factor of a symmetric positive-definite matrix with unit diagonal (i.e., a correlation matrix), then \\[\\begin{equation*} \\text{LkjCholesky}(L|\\eta)\n\\propto \\left|J\\right|\\det(L L^\\top)^{(\\eta - 1)} = \\prod_{k=2}^K\nL_{kk}^{K-k+2\\eta-2}. \\end{equation*}\\] See the previous section for details on interpreting the shape parameter \\(\\eta\\). Note that even if \\(\\eta=1\\), it is still essential to evaluate the density function because the density of \\(L\\) is not constant, regardless of the value of \\(\\eta\\), even though the density of \\(LL^\\top\\) is constant iff \\(\\eta=1\\).\nA lower triangular \\(L\\) is a Cholesky factor for a correlation matrix if and only if \\(L_{k,k} > 0\\) for \\(k \\in 1{:}K\\) and each row \\(L_k\\) has unit Euclidean length.\n\n\n\nL ~ lkj_corr_cholesky(eta)\nIncrement target log probability density with lkj_corr_cholesky_lupdf(L | eta).\nAvailable since 2.4\n \n\n\n\n\n \n\nreal lkj_corr_cholesky_lpdf(matrix L | real eta) The log of the LKJ density for the lower-triangular Cholesky factor L of a correlation matrix given shape eta\nAvailable since 2.12\n \n\nreal lkj_corr_cholesky_lupdf(matrix L | real eta) The log of the LKJ density for the lower-triangular Cholesky factor L of a correlation matrix given shape eta dropping constant additive terms\nAvailable since 2.25\n \n\nmatrix lkj_corr_cholesky_rng(int K, real eta) Generate a random Cholesky factor of a correlation matrix of order K that is distributed LKJ with shape eta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.4", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Correlation Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/correlation_matrix_distributions.html#lkj-correlation", + "href": "functions-reference/correlation_matrix_distributions.html#lkj-correlation", + "title": "Correlation Matrix Distributions", + "section": "", + "text": "For \\(\\eta > 0\\), if \\(\\Sigma\\) a positive-definite, symmetric matrix with unit diagonal (i.e., a correlation matrix), then \\[\\begin{equation*}\n\\text{LkjCorr}(\\Sigma|\\eta) \\propto \\det \\left( \\Sigma \\right)^{(\\eta\n- 1)}. \\end{equation*}\\] The expectation is the identity matrix for any positive value of the shape parameter \\(\\eta\\), which can be interpreted like the shape parameter of a symmetric beta distribution:\n\nif \\(\\eta = 1\\), then the density is uniform over correlation matrices of order \\(K\\);\nif \\(\\eta > 1\\), the identity matrix is the modal correlation matrix, with a sharper peak in the density at the identity matrix for larger \\(\\eta\\); and\nfor \\(0 < \\eta < 1\\), the density has a trough at the identity matrix.\nif \\(\\eta\\) were an unknown parameter, the Jeffreys prior is proportional to \\(\\sqrt{2\\sum_{k=1}^{K-1}\\left(\n\\psi_1\\left(\\eta+\\frac{K-k-1}{2}\\right) - 2\\psi_1\\left(2\\eta+K-k-1\n\\right)\\right)}\\), where \\(\\psi_1()\\) is the trigamma function\n\nSee (Lewandowski, Kurowicka, and Joe 2009) for definitions. However, it is much better computationally to work directly with the Cholesky factor of \\(\\Sigma\\), so this distribution should never be explicitly used in practice.\n\n\n\ny ~ lkj_corr(eta)\nIncrement target log probability density with lkj_corr_lupdf(y | eta).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal lkj_corr_lpdf(matrix y | real eta) The log of the LKJ density for the correlation matrix y given nonnegative shape eta. lkj_corr_cholesky_lpdf is faster, more numerically stable, uses less memory, and should be preferred to this.\nAvailable since 2.12\n \n\nreal lkj_corr_lupdf(matrix y | real eta) The log of the LKJ density for the correlation matrix y given nonnegative shape eta dropping constant additive terms. lkj_corr_cholesky_lupdf is faster, more numerically stable, uses less memory, and should be preferred to this.\nAvailable since 2.25\n \n\nmatrix lkj_corr_rng(int K, real eta) Generate a LKJ random correlation matrix of order K with shape eta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Correlation Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/correlation_matrix_distributions.html#cholesky-lkj-correlation-distribution", + "href": "functions-reference/correlation_matrix_distributions.html#cholesky-lkj-correlation-distribution", + "title": "Correlation Matrix Distributions", + "section": "", + "text": "Stan provides an implicit parameterization of the LKJ correlation matrix density in terms of its Cholesky factor, which you should use rather than the explicit parameterization in the previous section. For example, if L is a Cholesky factor of a correlation matrix, then\n L ~ lkj_corr_cholesky(2.0); # implies L * L' ~ lkj_corr(2.0);\nBecause Stan requires models to have support on all valid constrained parameters, L will almost always1 be a parameter declared with the type of a Cholesky factor for a correlation matrix; for example,\n parameters { cholesky_factor_corr[K] L; # rather than corr_matrix[K] Sigma; // ...\n\n\nFor \\(\\eta > 0\\), if \\(L\\) is a \\(K \\times K\\) lower-triangular Cholesky factor of a symmetric positive-definite matrix with unit diagonal (i.e., a correlation matrix), then \\[\\begin{equation*} \\text{LkjCholesky}(L|\\eta)\n\\propto \\left|J\\right|\\det(L L^\\top)^{(\\eta - 1)} = \\prod_{k=2}^K\nL_{kk}^{K-k+2\\eta-2}. \\end{equation*}\\] See the previous section for details on interpreting the shape parameter \\(\\eta\\). Note that even if \\(\\eta=1\\), it is still essential to evaluate the density function because the density of \\(L\\) is not constant, regardless of the value of \\(\\eta\\), even though the density of \\(LL^\\top\\) is constant iff \\(\\eta=1\\).\nA lower triangular \\(L\\) is a Cholesky factor for a correlation matrix if and only if \\(L_{k,k} > 0\\) for \\(k \\in 1{:}K\\) and each row \\(L_k\\) has unit Euclidean length.\n\n\n\nL ~ lkj_corr_cholesky(eta)\nIncrement target log probability density with lkj_corr_cholesky_lupdf(L | eta).\nAvailable since 2.4\n \n\n\n\n\n \n\nreal lkj_corr_cholesky_lpdf(matrix L | real eta) The log of the LKJ density for the lower-triangular Cholesky factor L of a correlation matrix given shape eta\nAvailable since 2.12\n \n\nreal lkj_corr_cholesky_lupdf(matrix L | real eta) The log of the LKJ density for the lower-triangular Cholesky factor L of a correlation matrix given shape eta dropping constant additive terms\nAvailable since 2.25\n \n\nmatrix lkj_corr_cholesky_rng(int K, real eta) Generate a random Cholesky factor of a correlation matrix of order K that is distributed LKJ with shape eta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.4", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Correlation Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/correlation_matrix_distributions.html#footnotes", + "href": "functions-reference/correlation_matrix_distributions.html#footnotes", + "title": "Correlation Matrix Distributions", + "section": "Footnotes", + "text": "Footnotes\n\n\nIt is possible to build up a valid L within Stan, but that would then require Jacobian adjustments to imply the intended posterior.↩︎", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Correlation Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/continuous_distributions_on_0_1.html", + "href": "functions-reference/continuous_distributions_on_0_1.html", + "title": "Continuous Distributions on [0, 1]", + "section": "", + "text": "The continuous distributions with outcomes in the interval \\([0,1]\\) are used to characterized bounded quantities, including probabilities.\n\n\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(\\theta \\in (0,1)\\), \\[\\begin{equation*} \\text{Beta}(\\theta|\\alpha,\\beta) =\n\\frac{1}{\\mathrm{B}(\\alpha,\\beta)} \\, \\theta^{\\alpha - 1} \\, (1 -\n\\theta)^{\\beta - 1} , \\end{equation*}\\] where the beta function \\(\\mathrm{B}()\\) is as defined in section combinatorial functions.\nWarning: If \\(\\theta = 0\\) or \\(\\theta = 1\\), then the probability is 0 and the log probability is \\(-\\infty\\). Similarly, the distribution requires strictly positive parameters, \\(\\alpha, \\beta >\n0\\).\n\n\n\ntheta ~ beta(alpha, beta)\nIncrement target log probability density with beta_lupdf(theta | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal beta_lpdf(reals theta | reals alpha, reals beta) The log of the beta density of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.12\n \n\nreal beta_lupdf(reals theta | reals alpha, reals beta) The log of the beta density of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal beta_cdf(reals theta | reals alpha, reals beta) The beta cumulative distribution function of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.0\n \n\nreal beta_lcdf(reals theta | reals alpha, reals beta) The log of the beta cumulative distribution function of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.12\n \n\nreal beta_lccdf(reals theta | reals alpha, reals beta) The log of the beta complementary cumulative distribution function of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.12\n \n\nR beta_rng(reals alpha, reals beta) Generate a beta variate with positive prior successes (plus one) alpha and prior failures (plus one) beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in (0, 1)\\) and \\(\\kappa \\in \\mathbb{R}^+\\), then for \\(\\theta\n\\in (0,1)\\), \\[\\begin{equation*} \\mathrm{Beta\\_Proportion}(\\theta|\\mu,\\kappa) =\n\\frac{1}{\\mathrm{B}(\\mu \\kappa, (1 - \\mu) \\kappa)} \\,\n\\theta^{\\mu\\kappa - 1} \\, (1 - \\theta)^{(1 - \\mu)\\kappa- 1} , \\end{equation*}\\] where the beta function \\(\\mathrm{B}()\\) is as defined in section combinatorial functions.\nWarning: If \\(\\theta = 0\\) or \\(\\theta = 1\\), then the probability is 0 and the log probability is \\(-\\infty\\). Similarly, the distribution requires \\(\\mu \\in (0, 1)\\) and strictly positive parameter, \\(\\kappa > 0\\).\n\n\n\ntheta ~ beta_proportion(mu, kappa)\nIncrement target log probability density with beta_proportion_lupdf(theta | mu, kappa).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal beta_proportion_lpdf(reals theta | reals mu, reals kappa) The log of the beta_proportion density of theta in \\((0,1)\\) given mean mu and precision kappa\nAvailable since 2.19\n \n\nreal beta_proportion_lupdf(reals theta | reals mu, reals kappa) The log of the beta_proportion density of theta in \\((0,1)\\) given mean mu and precision kappa dropping constant additive terms\nAvailable since 2.25\n \n\nreal beta_proportion_lcdf(reals theta | reals mu, reals kappa) The log of the beta_proportion cumulative distribution function of theta in \\((0,1)\\) given mean mu and precision kappa\nAvailable since 2.18\n \n\nreal beta_proportion_lccdf(reals theta | reals mu, reals kappa) The log of the beta_proportion complementary cumulative distribution function of theta in \\((0,1)\\) given mean mu and precision kappa\nAvailable since 2.18\n \n\nR beta_proportion_rng(reals mu, reals kappa) Generate a beta_proportion variate with mean mu and precision kappa; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Continuous Distributions on [0, 1]" + ] + }, + { + "objectID": "functions-reference/continuous_distributions_on_0_1.html#beta-distribution", + "href": "functions-reference/continuous_distributions_on_0_1.html#beta-distribution", + "title": "Continuous Distributions on [0, 1]", + "section": "", + "text": "If \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(\\theta \\in (0,1)\\), \\[\\begin{equation*} \\text{Beta}(\\theta|\\alpha,\\beta) =\n\\frac{1}{\\mathrm{B}(\\alpha,\\beta)} \\, \\theta^{\\alpha - 1} \\, (1 -\n\\theta)^{\\beta - 1} , \\end{equation*}\\] where the beta function \\(\\mathrm{B}()\\) is as defined in section combinatorial functions.\nWarning: If \\(\\theta = 0\\) or \\(\\theta = 1\\), then the probability is 0 and the log probability is \\(-\\infty\\). Similarly, the distribution requires strictly positive parameters, \\(\\alpha, \\beta >\n0\\).\n\n\n\ntheta ~ beta(alpha, beta)\nIncrement target log probability density with beta_lupdf(theta | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal beta_lpdf(reals theta | reals alpha, reals beta) The log of the beta density of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.12\n \n\nreal beta_lupdf(reals theta | reals alpha, reals beta) The log of the beta density of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal beta_cdf(reals theta | reals alpha, reals beta) The beta cumulative distribution function of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.0\n \n\nreal beta_lcdf(reals theta | reals alpha, reals beta) The log of the beta cumulative distribution function of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.12\n \n\nreal beta_lccdf(reals theta | reals alpha, reals beta) The log of the beta complementary cumulative distribution function of theta in \\([0,1]\\) given positive prior successes (plus one) alpha and prior failures (plus one) beta\nAvailable since 2.12\n \n\nR beta_rng(reals alpha, reals beta) Generate a beta variate with positive prior successes (plus one) alpha and prior failures (plus one) beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Continuous Distributions on [0, 1]" + ] + }, + { + "objectID": "functions-reference/continuous_distributions_on_0_1.html#beta-proportion-distribution", + "href": "functions-reference/continuous_distributions_on_0_1.html#beta-proportion-distribution", + "title": "Continuous Distributions on [0, 1]", + "section": "", + "text": "If \\(\\mu \\in (0, 1)\\) and \\(\\kappa \\in \\mathbb{R}^+\\), then for \\(\\theta\n\\in (0,1)\\), \\[\\begin{equation*} \\mathrm{Beta\\_Proportion}(\\theta|\\mu,\\kappa) =\n\\frac{1}{\\mathrm{B}(\\mu \\kappa, (1 - \\mu) \\kappa)} \\,\n\\theta^{\\mu\\kappa - 1} \\, (1 - \\theta)^{(1 - \\mu)\\kappa- 1} , \\end{equation*}\\] where the beta function \\(\\mathrm{B}()\\) is as defined in section combinatorial functions.\nWarning: If \\(\\theta = 0\\) or \\(\\theta = 1\\), then the probability is 0 and the log probability is \\(-\\infty\\). Similarly, the distribution requires \\(\\mu \\in (0, 1)\\) and strictly positive parameter, \\(\\kappa > 0\\).\n\n\n\ntheta ~ beta_proportion(mu, kappa)\nIncrement target log probability density with beta_proportion_lupdf(theta | mu, kappa).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal beta_proportion_lpdf(reals theta | reals mu, reals kappa) The log of the beta_proportion density of theta in \\((0,1)\\) given mean mu and precision kappa\nAvailable since 2.19\n \n\nreal beta_proportion_lupdf(reals theta | reals mu, reals kappa) The log of the beta_proportion density of theta in \\((0,1)\\) given mean mu and precision kappa dropping constant additive terms\nAvailable since 2.25\n \n\nreal beta_proportion_lcdf(reals theta | reals mu, reals kappa) The log of the beta_proportion cumulative distribution function of theta in \\((0,1)\\) given mean mu and precision kappa\nAvailable since 2.18\n \n\nreal beta_proportion_lccdf(reals theta | reals mu, reals kappa) The log of the beta_proportion complementary cumulative distribution function of theta in \\((0,1)\\) given mean mu and precision kappa\nAvailable since 2.18\n \n\nR beta_proportion_rng(reals mu, reals kappa) Generate a beta_proportion variate with mean mu and precision kappa; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Continuous Distributions on [0, 1]" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html", + "href": "functions-reference/complex_matrix_operations.html", + "title": "Complex Matrix Operations", + "section": "", + "text": "This chapter provides the details of functions that operate over complex matrices, vectors, and row vectors. These mirror the operations over real complex_matrix types and are defined in the usual way for complex numbers.\n\n\nIf an expression e can be assigned to a variable of type T, then it can be used as an argument to a function that is specified to take arguments of type T. For instance, sqrt(real) is specified to take a real argument, but an integer expression such as 2 + 2 of type int can be passed to sqrt, so that sqrt(2 + 2) is well defined. This works by promoting the integer expression 2 + 2 to be of real type.\nThe rules for promotion in Stan are simple:\n\nint may be promoted to real,\nreal may be promoted to complex,\nvector can be promoted to complex_vector,\nrow_vector can be promoted to complex_row_vector,\nmatrix can be promoted to complex_matrix,\nif T can be promoted to U and U can be promoted to V, then T can be promoted to V (transitive), and\nif T can be promoted to U, then T[] can be promoted to U[] (covariant).\n\n\n\n\nWhen a function is called, the definition requiring the fewest number of promotions is used. For example, when calling vector + vector, the real-valued signature is used. When calling any of complex_vector + vector, vector + complex_vector, or complex_vector + complex_vector, the complex signature is used. If more than one signature matches with a the minimal number of promotions, the call is ambiguous, and an error will be raised by the compiler. Promotion ambiguity leading to ill-defined calls should never happen with Stan built-in functions.\n\n\n\nComplex function signatures will only list the fully complex type. For example, with complex vector addition, we will list a single signature, complex operator+(complex_vector, complex_vector). Through promotion, operator+ may be called with one complex vector and one real vector as well, but the documentation elides the implied signatures operator+(complex_vector, vector) and operator+(vector, complex_vector).\n\n\n\nGeneric functions work for arrays containing complex, complex matrix, complex vector, or complex row vector types. This includes the functions append_array, dims, head, num_elements, rep_array, reverse, segment, size, and tail.\n\n\n\n\n \n\nint num_elements(complex_vector x) The total number of elements in the vector x (same as function rows)\nAvailable since 2.30\n \n\nint num_elements(complex_row_vector x) The total number of elements in the vector x (same as function cols)\nAvailable since 2.30\n \n\nint num_elements(complex_matrix x) The total number of elements in the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then num_elements(x) is 15\nAvailable since 2.30\n \n\nint rows(complex_vector x) The number of rows in the vector x\nAvailable since 2.30\n \n\nint rows(complex_row_vector x) The number of rows in the row vector x, namely 1\nAvailable since 2.30\n \n\nint rows(complex_matrix x) The number of rows in the matrix x\nAvailable since 2.30\n \n\nint cols(complex_vector x) The number of columns in the vector x, namely 1\nAvailable since 2.30\n \n\nint cols(complex_row_vector x) The number of columns in the row vector x\nAvailable since 2.30\n \n\nint cols(complex_matrix x) The number of columns in the matrix x\nAvailable since 2.30\n \n\nint size(complex_vector x) The size of x, i.e., the number of elements\nAvailable since 2.30\n \n\nint size(complex_row_vector x) The size of x, i.e., the number of elements\nAvailable since 2.30\n \n\nint size(matrix x) The size of the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then size(x) is 15.\nAvailable since 2.30\n\n\n\nStan supports all basic complex arithmetic operators using infix, prefix and postfix operations. This section lists the operations supported by Stan along with their argument and result types.\n\n\n \n\ncomplex_vector operator-(complex_vector x) The negation of the vector x.\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex_row_vector x) The negation of the row vector x.\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex_matrix x) The negation of the matrix x.\nAvailable since 2.30\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of matrix types, -x is the same shape array where each individual value is negated.\nAvailable since 2.31\n\n\n\n \n\ncomplex_vector operator+(complex_vector x, complex_vector y) The sum of the vectors x and y.\nAvailable since 2.30\n \n\ncomplex_row_vector operator+(complex_row_vector x, complex_row_vector y) The sum of the row vectors x and y.\nAvailable since 2.30\n \n\ncomplex_matrix operator+(complex_matrix x, complex_matrix y) The sum of the matrices x and y\nAvailable since 2.30\n \n\ncomplex_vector operator-(complex_vector x, complex_vector y) The difference between the vectors x and y.\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex_row_vector x, complex_row_vector y) The difference between the row vectors x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex_matrix x, complex_matrix y) The difference between the matrices x and y\nAvailable since 2.30\n \n\ncomplex_vector operator*(complex x, complex_vector y) The product of the scalar x and vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator*(complex x, complex_row_vector y) The product of the scalar x and the row vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex x, complex_matrix y) The product of the scalar x and the matrix y\nAvailable since 2.30\n \n\ncomplex_vector operator*(complex_vector x, complex y) The product of the scalar y and vector x\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex_vector x, complex_row_vector y) The product of the vector x and row vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator*(complex_row_vector x, complex y) The product of the scalar y and row vector x\nAvailable since 2.30\n \n\ncomplex operator*(complex_row_vector x, complex_vector y) The product of the row vector x and vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator*(complex_row_vector x, complex_matrix y) The product of the row vector x and matrix y\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex_matrix x, complex y) The product of the scalar y and matrix x\nAvailable since 2.30\n \n\ncomplex_vector operator*(complex_matrix x, complex_vector y) The product of the matrix x and vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex_matrix x, complex_matrix y) The product of the matrices x and y\nAvailable since 2.30\n\n\n\n \n\ncomplex_vector operator+(complex_vector x, complex y) The result of adding y to every entry in the vector x\nAvailable since 2.30\n \n\ncomplex_vector operator+(complex x, complex_vector y) The result of adding x to every entry in the vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator+(complex_row_vector x, complex y) The result of adding y to every entry in the row vector x\nAvailable since 2.30\n \n\ncomplex_row_vector operator+(complex x, complex_row_vector y) The result of adding x to every entry in the row vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator+(complex_matrix x, complex y) The result of adding y to every entry in the matrix x\nAvailable since 2.30\n \n\ncomplex_matrix operator+(complex x, complex_matrix y) The result of adding x to every entry in the matrix y\nAvailable since 2.30\n \n\ncomplex_vector operator-(complex_vector x, complex y) The result of subtracting y from every entry in the vector x\nAvailable since 2.30\n \n\ncomplex_vector operator-(complex x, complex_vector y) The result of adding x to every entry in the negation of the vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex_row_vector x, complex y) The result of subtracting y from every entry in the row vector x\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex x, complex_row_vector y) The result of adding x to every entry in the negation of the row vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex_matrix x, complex y) The result of subtracting y from every entry in the matrix x\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex x, complex_matrix y) The result of adding x to every entry in negation of the matrix y\nAvailable since 2.30\n \n\ncomplex_vector operator/(complex_vector x, complex y) The result of dividing each entry in the vector x by y\nAvailable since 2.30\n \n\ncomplex_row_vector operator/(complex_row_vector x, complex y) The result of dividing each entry in the row vector x by y\nAvailable since 2.30\n \n\ncomplex_matrix operator/(complex_matrix x, complex y) The result of dividing each entry in the matrix x by y\nAvailable since 2.30\n\n\n\n\nComplex complex_matrix transposition is represented using a postfix operator.\n \n\ncomplex_matrix operator'(complex_matrix x) The transpose of the matrix x, written as x'\nAvailable since 2.30\n \n\ncomplex_row_vector operator'(complex_vector x) The transpose of the vector x, written as x'\nAvailable since 2.30\n \n\ncomplex_vector operator'(complex_row_vector x) The transpose of the row vector x, written as x'\nAvailable since 2.30\n\n\n\nAs in the real case, elementwise complex functions apply a function to each element of a vector or matrix, returning a result of the same shape as the argument.\n \n\ncomplex_vector operator.*(complex_vector x, complex_vector y) The elementwise product of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator.*(complex_row_vector x, complex_row_vector y) The elementwise product of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator.*(complex_matrix x, complex_matrix y) The elementwise product of x and y\nAvailable since 2.30\n \n\ncomplex_vector operator./(complex_vector x, complex_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_vector operator./(complex x, complex_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_vector operator./(complex_vector x, complex y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator./(complex_row_vector x, complex_row_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator./(complex x, complex_row_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator./(complex_row_vector x, complex y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator./(complex_matrix x, complex_matrix y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator./(complex x, complex_matrix y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator./(complex_matrix x, complex y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\nvector operator.^(complex_vector x, complex_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nvector operator.^(complex_vector x, complex y) The elementwise power of y and x\nAvailable since 2.30\n \n\nvector operator.^(complex x, complex_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nrow_vector operator.^(complex_row_vector x, complex_row_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nrow_vector operator.^(complex_row_vector x, complex y) The elementwise power of y and x\nAvailable since 2.30\n \n\nrow_vector operator.^(complex x, complex_row_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nmatrix operator.^( complex_matrix x, complex_matrix y) The elementwise power of y and x\nAvailable since 2.30\n \n\nmatrix operator.^( complex_matrix x, complex y) The elementwise power of y and x\nAvailable since 2.30\n \n\nmatrix operator.^(complex x, complex_matrix y) The elementwise power of y and x\nAvailable since 2.30\n\n\n\n \n\ncomplex dot_product(complex_vector x, complex_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex dot_product(complex_vector x, complex_row_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex dot_product(complex_row_vector x, complex_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex dot_product(complex_row_vector x, complex_row_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_product(complex_vector x, complex_vector y) The dot product of the columns of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_product(complex_row_vector x, complex_row_vector y) The dot product of the columns of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_product(complex_matrix x, complex_matrix y) The dot product of the columns of x and y\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_product(complex_vector x, complex_vector y) The dot product of the rows of x and y\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_product(complex_row_vector x, complex_row_vector y) The dot product of the rows of x and y\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_product(complex_matrix x, complex_matrix y) The dot product of the rows of x and y\nAvailable since 2.30\n \n\ncomplex dot_self(complex_vector x) The dot product of the vector x with itself\nAvailable since 2.30\n \n\ncomplex dot_self(complex_row_vector x) The dot product of the row vector x with itself\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_self(complex_vector x) The dot product of the columns of x with themselves\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_self(complex_row_vector x) The dot product of the columns of x with themselves\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_self(complex_matrix x) The dot product of the columns of x with themselves\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_self(complex_vector x) The dot product of the rows of x with themselves\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_self(complex_row_vector x) The dot product of the rows of x with themselves\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_self(complex_matrix x) The dot product of the rows of x with themselves\nAvailable since 2.30\n\n\n \n\ncomplex_matrix diag_pre_multiply(complex_vector v, complex_matrix m) Return the product of the diagonal matrix formed from the vector v and the matrix m, i.e., diag_matrix(v) * m.\nAvailable since 2.30\n \n\ncomplex_matrix diag_pre_multiply(complex_row_vector v, complex_matrix m) Return the product of the diagonal matrix formed from the vector rv and the matrix m, i.e., diag_matrix(rv) * m.\nAvailable since 2.30\n \n\ncomplex_matrix diag_post_multiply(complex_matrix m, complex_vector v) Return the product of the matrix m and the diagonal matrix formed from the vector v, i.e., m * diag_matrix(v).\nAvailable since 2.30\n \n\ncomplex_matrix diag_post_multiply(complex_matrix m, complex_row_vector v) Return the product of the matrix m and the diagonal matrix formed from the the row vector rv, i.e., m * diag_matrix(rv).\nAvailable since 2.30\n\n\n\n\n\n\n \n\ncomplex sum(complex_vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.30\n \n\ncomplex sum(complex_row_vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.30\n \n\ncomplex sum(complex_matrix x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.30\n \n\ncomplex prod(complex_vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.30\n \n\ncomplex prod(complex_row_vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.30\n \n\ncomplex prod(complex_matrix x) The product of the values in x, or 1 if x is empty\nAvailable since 2.30\n\n\n\n\nMuch like with complex scalars, two functions are defined to get the real and imaginary components of complex-valued objects.\n\n\nThese functions return the same shape (e.g., matrix, vector, row vector, or array) object as their input, but demoted to a real type. For example, get_real(complex_matrix M) yields a matrix containing the real component of each value in M.\nThe following table contains examples of what this notation can mean:\n\n\n\nType T\nType T_demoted\n\n\n\n\ncomplex\nreal\n\n\ncomplex_vector\nvector\n\n\ncomplex_row_vector\nrow_vector\n\n\ncomplex_matrix\nmatrix\n\n\narray[] complex\narray[] real\n\n\narray[,,] complex\narray[,,] real\n\n\n\n\n\n\n \n\nT_demoted get_real(T x) Given an object of complex type T, return the same shape object but of type real by getting the real component of each element of x.\nAvailable since 2.30\n \n\nT_demoted get_imag(T x) Given an object of complex type T, return the same shape object but of type real by getting the imaginary component of each element of x.\nAvailable since 2.30\nFor example, given the Stan declaration\n complex_vector[2] z = [3+4i, 5+6i]';\nA call get_real(z) will yield the vector [3, 5]', and a call get_imag(z) will yield the vector [4, 6]'.\n\n\n\n\nThe following broadcast functions allow vectors, row vectors and matrices to be created by copying a single element into all of their cells. Matrices may also be created by stacking copies of row vectors vertically or stacking copies of column vectors horizontally.\n \n\ncomplex_vector rep_vector(complex z, int m) Return the size m (column) vector consisting of copies of z.\nAvailable since 2.30\n \n\ncomplex_row_vector rep_row_vector(complex z, int n) Return the size n row vector consisting of copies of z.\nAvailable since 2.30\n \n\ncomplex_matrix rep_matrix(complex z, int m, int n) Return the m by n matrix consisting of copies of z.\nAvailable since 2.30\n \n\ncomplex_matrix rep_matrix(complex_vector v, int n) Return the m by n matrix consisting of n copies of the (column) vector v of size m.\nAvailable since 2.30\n \n\ncomplex_matrix rep_matrix(complex_row_vector rv, int m) Return the m by n matrix consisting of m copies of the row vector rv of size n.\nAvailable since 2.30\n\n\n \n\ncomplex_matrix symmetrize_from_lower_tri(complex_matrix A) Construct a symmetric matrix from the lower triangle of A.\nAvailable since 2.30\n\n\n\n\n \n\ncomplex_matrix add_diag(complex_matrix m, complex_row_vector d) Add row_vector d to the diagonal of matrix m.\nAvailable since 2.30\n \n\ncomplex_matrix add_diag(complex_matrix m, complex_vector d) Add vector d to the diagonal of matrix m.\nAvailable since 2.30\n \n\ncomplex_matrix add_diag(complex_matrix m, complex d) Add scalar d to every diagonal element of matrix m.\nAvailable since 2.30\n \n\ncomplex_vector diagonal(complex_matrix x) The diagonal of the matrix x\nAvailable since 2.30\n \n\ncomplex_matrix diag_matrix(complex_vector x) The diagonal matrix with diagonal x\nAvailable since 2.30\n\n\n\nStan provides several functions for generating slices or blocks or diagonal entries for matrices.\n\n\n \n\ncomplex_vector col(complex_matrix x, int n) The n-th column of matrix x\nAvailable since 2.30\n \n\ncomplex_row_vector row(complex_matrix x, int m) The m-th row of matrix x\nAvailable since 2.30\n\n\n\n\n\n \n\ncomplex_matrix block(complex_matrix x, int i, int j, int n_rows, int n_cols) Return the submatrix of x that starts at row i and column j and extends n_rows rows and n_cols columns.\nAvailable since 2.30\n \n\ncomplex_vector sub_col(complex_matrix x, int i, int j, int n_rows) Return the sub-column of x that starts at row i and column j and extends n_rows rows and 1 column.\nAvailable since 2.30\n \n\ncomplex_row_vector sub_row(complex_matrix x, int i, int j, int n_cols) Return the sub-row of x that starts at row i and column j and extends 1 row and n_cols columns.\nAvailable since 2.30\n\n\n\n \n\ncomplex_vector head(complex_vector v, int n) Return the vector consisting of the first n elements of v.\nAvailable since 2.30\n \n\ncomplex_row_vector head(complex_row_vector rv, int n) Return the row vector consisting of the first n elements of rv.\nAvailable since 2.30\n \n\ncomplex_vector tail(complex_vector v, int n) Return the vector consisting of the last n elements of v.\nAvailable since 2.30\n \n\ncomplex_row_vector tail(complex_row_vector rv, int n) Return the row vector consisting of the last n elements of rv.\nAvailable since 2.30\n \n\ncomplex_vector segment(complex_vector v, int i, int n) Return the vector consisting of the n elements of v starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.30\n \n\ncomplex_row_vector segment(complex_row_vector rv, int i, int n) Return the row vector consisting of the n elements of rv starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.30\n\n\n\n\n\n\n\n \n\ncomplex_matrix append_col(complex_matrix x, complex_matrix y) Combine matrices x and y by column. The matrices must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_matrix append_col(complex_matrix x, complex_vector y) Combine matrix x and vector y by column. The matrix and the vector must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_matrix append_col(complex_vector x, complex_matrix y) Combine vector x and matrix y by column. The vector and the matrix must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_matrix append_col(complex_vector x, complex_vector y) Combine vectors x and y by column. The vectors must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_row_vector append_col(complex_row_vector x, complex_row_vector y) Combine row vectors x and y (of any size) into another row vector by appending y to the end of x.\nAvailable since 2.30\n \n\ncomplex_row_vector append_col(complex x, complex_row_vector y) Append x to the front of y, returning another row vector.\nAvailable since 2.30\n \n\ncomplex_row_vector append_col(complex_row_vector x, complex y) Append y to the end of x, returning another row vector.\nAvailable since 2.30\n\n\n\n \n\ncomplex_matrix append_row(complex_matrix x, complex_matrix y) Combine matrices x and y by row. The matrices must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_matrix append_row(complex_matrix x, complex_row_vector y) Combine matrix x and row vector y by row. The matrix and the row vector must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_matrix append_row(complex_row_vector x, complex_matrix y) Combine row vector x and matrix y by row. The row vector and the matrix must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_matrix append_row(complex_row_vector x, complex_row_vector y) Combine row vectors x and y by row. The row vectors must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_vector append_row(complex_vector x, complex_vector y) Concatenate vectors x and y of any size into another vector.\nAvailable since 2.30\n \n\ncomplex_vector append_row(complex x, complex_vector y) Append x to the top of y, returning another vector.\nAvailable since 2.30\n \n\ncomplex_vector append_row(complex_vector x, complex y) Append y to the bottom of x, returning another vector.\nAvailable since 2.30\n\n\n\n\n\n\nStan’s fast Fourier transform functions take the standard definition of the discrete Fourier transform (see the definitions below for specifics) and scale the inverse transform by one over dimensionality so that the following identities hold for complex vectors u and v,\n fft(inv_fft(u)) == u inv_fft(fft(v)) == v\nand in the 2-dimensional case for complex matrices A and B,\n fft2(inv_fft2(A)) == A inv_fft2(fft2(B)) == B\nAlthough the FFT functions only accept complex inputs, real vectors and matrices will be promoted to their complex counterparts before applying the FFT functions.\n \n\ncomplex_vector fft(complex_vector v) Return the discrete Fourier transform of the specified complex vector v. If \\(v \\in \\mathbb{C}^N\\) is a complex vector with \\(N\\) elements and \\(u =\n\\textrm{fft}(v)\\), then \\[\\begin{equation*}\nu_n = \\sum_{m < n}\nv_m \\cdot\n\\exp\\left(\\frac{-n \\cdot m \\cdot 2 \\cdot \\pi \\cdot \\sqrt{-1}}{N}\\right).\n\\end{equation*}\\]\nAvailable since 2.30\n \n\ncomplex_matrix fft2(complex_matrix m) Return the 2D discrete Fourier transform of the specified complex matrix m. The 2D FFT is defined as the result of applying the FFT to each row and then to each column.\nAvailable since 2.30\n \n\ncomplex_vector inv_fft(complex_vector u) Return the inverse of the discrete Fourier transform of the specified complex vector u. The inverse FFT (this function) is scaled so that fft(inv_fft(u)) == u. If \\(u \\in \\mathbb{C}^N\\) is a complex vector with \\(N\\) elements and \\(v = \\textrm{fft}^{-1}(u)\\), then \\[\\begin{equation*}\nv_n = \\frac{1}{N} \\sum_{m < n}\nu_m \\cdot\n\\exp\\left(\\frac{n \\cdot m \\cdot 2 \\cdot \\pi \\cdot \\sqrt{-1}}{N}\\right).\n\\end{equation*}\\] This only differs from the FFT by the sign inside the exponential and the scaling. The \\(\\frac{1}{N}\\) scaling ensures that fft(inv_fft(u)) == u and inv_fft(fft(v)) == v for complex vectors u and v.\nAvailable since 2.30\n \n\ncomplex_matrix inv_fft2(complex_matrix m) Return the inverse of the 2D discrete Fourier transform of the specified complex matrix m. The 2D inverse FFT is defined as the result of applying the inverse FFT to each row and then to each column. The invertible scaling of the inverse FFT ensures fft2(inv_fft2(A)) == A and inv_fft2(fft2(B)) == B.\nAvailable since 2.30\n\n\n\nThe cumulative sum of a sequence \\(x_1,\\ldots,x_N\\) is the sequence \\(y_1,\\ldots,y_N\\), where \\[\\begin{equation*} y_n = \\sum_{m = 1}^{n} x_m. \\end{equation*}\\]\n \n\narray[] complex cumulative_sum(array[] complex x) The cumulative sum of x\nAvailable since 2.30\n \n\ncomplex_vector cumulative_sum(complex_vector v) The cumulative sum of v\nAvailable since 2.30\n \n\ncomplex_row_vector cumulative_sum(complex_row_vector rv) The cumulative sum of rv\nAvailable since 2.30\n\n\n\n\n\n\nIn general, it is much more efficient and also more arithmetically stable to use matrix division than to multiply by an inverse.\n\n\n \n\ncomplex_row_vector operator/(complex_row_vector b, complex_matrix A) The right division of b by A; equivalently b * inverse(A)\nAvailable since 2.30\n \n\ncomplex_matrix operator/(complex_matrix B, complex_matrix A) The right division of B by A; equivalently B * inverse(A)\nAvailable since 2.30\n\n\n\n\n\n\n \n\ncomplex trace(complex_matrix A) The trace of A, or 0 if A is empty; A is not required to be diagonal\nAvailable since 2.30\n\n\n\n \n\ncomplex_vector eigenvalues(complex_matrix A) The complex-valued vector of eigenvalues of the matrix A. The eigenvalues are repeated according to their algebraic multiplicity, so there are as many eigenvalues as rows in the matrix. The eigenvalues are not sorted in any particular order.\nAvailable since 2.32\n \n\ncomplex_matrix eigenvectors(complex_matrix A) The matrix with the complex-valued (column) eigenvectors of the matrix A in the same order as returned by the function eigenvalues\nAvailable since 2.32\n \n\ntuple(complex_matrix, complex_vector) eigendecompose(complex_matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the matrix A. This function is equivalent to (eigenvectors(A), eigenvalues(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n \n\ncomplex_vector eigenvalues_sym(complex_matrix A) The vector of eigenvalues of a symmetric matrix A in ascending order\nAvailable since 2.30\n \n\ncomplex_matrix eigenvectors_sym(complex_matrix A) The matrix with the (column) eigenvectors of symmetric matrix A in the same order as returned by the function eigenvalues_sym\nAvailable since 2.30\n \n\ntuple(complex_matrix, complex_vector) eigendecompose_sym(complex_matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the symmetric matrix A. This function is equivalent to (eigenvectors_sym(A), eigenvalues_sym(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\nBecause multiplying an eigenvector by \\(-1\\) results in an eigenvector, eigenvectors returned by a decomposition are only identified up to a sign change. In order to compare the eigenvectors produced by Stan’s eigendecomposition to others, signs may need to be normalized in some way, such as by fixing the sign of a component, or doing comparisons allowing a multiplication by \\(-1\\).\nThe condition number of a symmetric matrix is defined to be the ratio of the largest eigenvalue to the smallest eigenvalue. Large condition numbers lead to difficulty in numerical algorithms such as computing inverses, and thus known as “ill conditioned.” The ratio can even be infinite in the case of singular matrices (i.e., those with eigenvalues of 0).\n\n\n\nThe matrix A can be decomposed into a diagonal matrix of singular values, D, and matrices of its left and right singular vectors, U and V, \\[\\begin{equation*} A = U D V^T. \\end{equation*}\\] The matrices of singular vectors here are thin. That is for an \\(N\\) by \\(P\\) input A, \\(M = min(N, P)\\), U is size \\(N\\) by \\(M\\) and V is size \\(P\\) by \\(M\\).\n \n\nvector singular_values(complex_matrix A) The singular values of A in descending order\nAvailable since 2.30\n \n\ncomplex_matrix svd_U(complex_matrix A) The left-singular vectors of A\nAvailable since 2.30\n \n\ncomplex_matrix svd_V(complex_matrix A) The right-singular vectors of A\nAvailable since 2.30\n \n\ntuple(complex_matrix, vector, complex_matrix) svd(complex_matrix A) Returns a tuple containing the left-singular vectors of A, the singular values of A in descending order, and the right-singular values of A. This function is equivalent to (svd_U(A), singular_values(A), svd_V(A)) but with a lower computational cost due to the shared work between the different components.\nAvailable since 2.33\n\n\n\nThe complex Schur decomposition of a square matrix \\(A\\) produces a complex unitary matrix \\(U\\) and a complex upper-triangular Schur form matrix \\(T\\) such that \\[A = U \\cdot T \\cdot U^{-1}\\]\nSince \\(U\\) is unitary, its inverse is also its conjugate transpose, \\(U^{-1} = U^*\\), \\(U^*(i, j) = \\mathrm{conj}(U(j, i))\\)\n \n\ncomplex_matrix complex_schur_decompose_t(matrix A) Compute the upper-triangular Schur form matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ncomplex_matrix complex_schur_decompose_t(complex_matrix A) Compute the upper-triangular Schur form matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ncomplex_matrix complex_schur_decompose_u(matrix A) Compute the unitary matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ncomplex_matrix complex_schur_decompose_u(complex_matrix A) Compute the unitary matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ntuple(complex_matrix, complex_matrix) complex_schur_decompose(matrix A) Returns the unitary matrix and the upper-triangular Schur form matrix of the complex Schur decomposition of A. This function is equivalent to (complex_schur_decompose_u(A), complex_schur_decompose_t(A)) but with a lower computational cost due to the shared work between the two results. This overload is equivalent to complex_schur_decompose(to_complex(A,0)) but is more efficient.\nAvailable since 2.33\n \n\ntuple(complex_matrix, complex_matrix) complex_schur_decompose(complex_matrix A) Returns the unitary matrix and the upper-triangular Schur form matrix of the complex Schur decomposition of A. This function is equivalent to (complex_schur_decompose_u(A), complex_schur_decompose_t(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n\n\n\n\n\n \n\ncomplex_vector reverse(complex_vector v) Return a new vector containing the elements of the argument in reverse order.\nAvailable since 2.30\n \n\ncomplex_row_vector reverse(complex_row_vector v) Return a new row vector containing the elements of the argument in reverse order.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-promotion", + "href": "functions-reference/complex_matrix_operations.html#complex-promotion", + "title": "Complex Matrix Operations", + "section": "", + "text": "This chapter provides the details of functions that operate over complex matrices, vectors, and row vectors. These mirror the operations over real complex_matrix types and are defined in the usual way for complex numbers.\n\n\nIf an expression e can be assigned to a variable of type T, then it can be used as an argument to a function that is specified to take arguments of type T. For instance, sqrt(real) is specified to take a real argument, but an integer expression such as 2 + 2 of type int can be passed to sqrt, so that sqrt(2 + 2) is well defined. This works by promoting the integer expression 2 + 2 to be of real type.\nThe rules for promotion in Stan are simple:\n\nint may be promoted to real,\nreal may be promoted to complex,\nvector can be promoted to complex_vector,\nrow_vector can be promoted to complex_row_vector,\nmatrix can be promoted to complex_matrix,\nif T can be promoted to U and U can be promoted to V, then T can be promoted to V (transitive), and\nif T can be promoted to U, then T[] can be promoted to U[] (covariant).\n\n\n\n\nWhen a function is called, the definition requiring the fewest number of promotions is used. For example, when calling vector + vector, the real-valued signature is used. When calling any of complex_vector + vector, vector + complex_vector, or complex_vector + complex_vector, the complex signature is used. If more than one signature matches with a the minimal number of promotions, the call is ambiguous, and an error will be raised by the compiler. Promotion ambiguity leading to ill-defined calls should never happen with Stan built-in functions.\n\n\n\nComplex function signatures will only list the fully complex type. For example, with complex vector addition, we will list a single signature, complex operator+(complex_vector, complex_vector). Through promotion, operator+ may be called with one complex vector and one real vector as well, but the documentation elides the implied signatures operator+(complex_vector, vector) and operator+(vector, complex_vector).\n\n\n\nGeneric functions work for arrays containing complex, complex matrix, complex vector, or complex row vector types. This includes the functions append_array, dims, head, num_elements, rep_array, reverse, segment, size, and tail.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#integer-valued-complex-matrix-size-functions", + "href": "functions-reference/complex_matrix_operations.html#integer-valued-complex-matrix-size-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "int num_elements(complex_vector x) The total number of elements in the vector x (same as function rows)\nAvailable since 2.30\n \n\nint num_elements(complex_row_vector x) The total number of elements in the vector x (same as function cols)\nAvailable since 2.30\n \n\nint num_elements(complex_matrix x) The total number of elements in the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then num_elements(x) is 15\nAvailable since 2.30\n \n\nint rows(complex_vector x) The number of rows in the vector x\nAvailable since 2.30\n \n\nint rows(complex_row_vector x) The number of rows in the row vector x, namely 1\nAvailable since 2.30\n \n\nint rows(complex_matrix x) The number of rows in the matrix x\nAvailable since 2.30\n \n\nint cols(complex_vector x) The number of columns in the vector x, namely 1\nAvailable since 2.30\n \n\nint cols(complex_row_vector x) The number of columns in the row vector x\nAvailable since 2.30\n \n\nint cols(complex_matrix x) The number of columns in the matrix x\nAvailable since 2.30\n \n\nint size(complex_vector x) The size of x, i.e., the number of elements\nAvailable since 2.30\n \n\nint size(complex_row_vector x) The size of x, i.e., the number of elements\nAvailable since 2.30\n \n\nint size(matrix x) The size of the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then size(x) is 15.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-matrix-arithmetic-operators", + "href": "functions-reference/complex_matrix_operations.html#complex-matrix-arithmetic-operators", + "title": "Complex Matrix Operations", + "section": "", + "text": "Stan supports all basic complex arithmetic operators using infix, prefix and postfix operations. This section lists the operations supported by Stan along with their argument and result types.\n\n\n \n\ncomplex_vector operator-(complex_vector x) The negation of the vector x.\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex_row_vector x) The negation of the row vector x.\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex_matrix x) The negation of the matrix x.\nAvailable since 2.30\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of matrix types, -x is the same shape array where each individual value is negated.\nAvailable since 2.31\n\n\n\n \n\ncomplex_vector operator+(complex_vector x, complex_vector y) The sum of the vectors x and y.\nAvailable since 2.30\n \n\ncomplex_row_vector operator+(complex_row_vector x, complex_row_vector y) The sum of the row vectors x and y.\nAvailable since 2.30\n \n\ncomplex_matrix operator+(complex_matrix x, complex_matrix y) The sum of the matrices x and y\nAvailable since 2.30\n \n\ncomplex_vector operator-(complex_vector x, complex_vector y) The difference between the vectors x and y.\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex_row_vector x, complex_row_vector y) The difference between the row vectors x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex_matrix x, complex_matrix y) The difference between the matrices x and y\nAvailable since 2.30\n \n\ncomplex_vector operator*(complex x, complex_vector y) The product of the scalar x and vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator*(complex x, complex_row_vector y) The product of the scalar x and the row vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex x, complex_matrix y) The product of the scalar x and the matrix y\nAvailable since 2.30\n \n\ncomplex_vector operator*(complex_vector x, complex y) The product of the scalar y and vector x\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex_vector x, complex_row_vector y) The product of the vector x and row vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator*(complex_row_vector x, complex y) The product of the scalar y and row vector x\nAvailable since 2.30\n \n\ncomplex operator*(complex_row_vector x, complex_vector y) The product of the row vector x and vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator*(complex_row_vector x, complex_matrix y) The product of the row vector x and matrix y\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex_matrix x, complex y) The product of the scalar y and matrix x\nAvailable since 2.30\n \n\ncomplex_vector operator*(complex_matrix x, complex_vector y) The product of the matrix x and vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator*(complex_matrix x, complex_matrix y) The product of the matrices x and y\nAvailable since 2.30\n\n\n\n \n\ncomplex_vector operator+(complex_vector x, complex y) The result of adding y to every entry in the vector x\nAvailable since 2.30\n \n\ncomplex_vector operator+(complex x, complex_vector y) The result of adding x to every entry in the vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator+(complex_row_vector x, complex y) The result of adding y to every entry in the row vector x\nAvailable since 2.30\n \n\ncomplex_row_vector operator+(complex x, complex_row_vector y) The result of adding x to every entry in the row vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator+(complex_matrix x, complex y) The result of adding y to every entry in the matrix x\nAvailable since 2.30\n \n\ncomplex_matrix operator+(complex x, complex_matrix y) The result of adding x to every entry in the matrix y\nAvailable since 2.30\n \n\ncomplex_vector operator-(complex_vector x, complex y) The result of subtracting y from every entry in the vector x\nAvailable since 2.30\n \n\ncomplex_vector operator-(complex x, complex_vector y) The result of adding x to every entry in the negation of the vector y\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex_row_vector x, complex y) The result of subtracting y from every entry in the row vector x\nAvailable since 2.30\n \n\ncomplex_row_vector operator-(complex x, complex_row_vector y) The result of adding x to every entry in the negation of the row vector y\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex_matrix x, complex y) The result of subtracting y from every entry in the matrix x\nAvailable since 2.30\n \n\ncomplex_matrix operator-(complex x, complex_matrix y) The result of adding x to every entry in negation of the matrix y\nAvailable since 2.30\n \n\ncomplex_vector operator/(complex_vector x, complex y) The result of dividing each entry in the vector x by y\nAvailable since 2.30\n \n\ncomplex_row_vector operator/(complex_row_vector x, complex y) The result of dividing each entry in the row vector x by y\nAvailable since 2.30\n \n\ncomplex_matrix operator/(complex_matrix x, complex y) The result of dividing each entry in the matrix x by y\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-transposition-operator", + "href": "functions-reference/complex_matrix_operations.html#complex-transposition-operator", + "title": "Complex Matrix Operations", + "section": "", + "text": "Complex complex_matrix transposition is represented using a postfix operator.\n \n\ncomplex_matrix operator'(complex_matrix x) The transpose of the matrix x, written as x'\nAvailable since 2.30\n \n\ncomplex_row_vector operator'(complex_vector x) The transpose of the vector x, written as x'\nAvailable since 2.30\n \n\ncomplex_vector operator'(complex_row_vector x) The transpose of the row vector x, written as x'\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-elementwise-functions", + "href": "functions-reference/complex_matrix_operations.html#complex-elementwise-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "As in the real case, elementwise complex functions apply a function to each element of a vector or matrix, returning a result of the same shape as the argument.\n \n\ncomplex_vector operator.*(complex_vector x, complex_vector y) The elementwise product of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator.*(complex_row_vector x, complex_row_vector y) The elementwise product of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator.*(complex_matrix x, complex_matrix y) The elementwise product of x and y\nAvailable since 2.30\n \n\ncomplex_vector operator./(complex_vector x, complex_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_vector operator./(complex x, complex_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_vector operator./(complex_vector x, complex y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator./(complex_row_vector x, complex_row_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator./(complex x, complex_row_vector y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector operator./(complex_row_vector x, complex y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator./(complex_matrix x, complex_matrix y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator./(complex x, complex_matrix y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\ncomplex_matrix operator./(complex_matrix x, complex y) The elementwise quotient of x and y\nAvailable since 2.30\n \n\nvector operator.^(complex_vector x, complex_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nvector operator.^(complex_vector x, complex y) The elementwise power of y and x\nAvailable since 2.30\n \n\nvector operator.^(complex x, complex_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nrow_vector operator.^(complex_row_vector x, complex_row_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nrow_vector operator.^(complex_row_vector x, complex y) The elementwise power of y and x\nAvailable since 2.30\n \n\nrow_vector operator.^(complex x, complex_row_vector y) The elementwise power of y and x\nAvailable since 2.30\n \n\nmatrix operator.^( complex_matrix x, complex_matrix y) The elementwise power of y and x\nAvailable since 2.30\n \n\nmatrix operator.^( complex_matrix x, complex y) The elementwise power of y and x\nAvailable since 2.30\n \n\nmatrix operator.^(complex x, complex_matrix y) The elementwise power of y and x\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#dot-products-and-specialized-products-for-complex-matrices", + "href": "functions-reference/complex_matrix_operations.html#dot-products-and-specialized-products-for-complex-matrices", + "title": "Complex Matrix Operations", + "section": "", + "text": "complex dot_product(complex_vector x, complex_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex dot_product(complex_vector x, complex_row_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex dot_product(complex_row_vector x, complex_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex dot_product(complex_row_vector x, complex_row_vector y) The dot product of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_product(complex_vector x, complex_vector y) The dot product of the columns of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_product(complex_row_vector x, complex_row_vector y) The dot product of the columns of x and y\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_product(complex_matrix x, complex_matrix y) The dot product of the columns of x and y\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_product(complex_vector x, complex_vector y) The dot product of the rows of x and y\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_product(complex_row_vector x, complex_row_vector y) The dot product of the rows of x and y\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_product(complex_matrix x, complex_matrix y) The dot product of the rows of x and y\nAvailable since 2.30\n \n\ncomplex dot_self(complex_vector x) The dot product of the vector x with itself\nAvailable since 2.30\n \n\ncomplex dot_self(complex_row_vector x) The dot product of the row vector x with itself\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_self(complex_vector x) The dot product of the columns of x with themselves\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_self(complex_row_vector x) The dot product of the columns of x with themselves\nAvailable since 2.30\n \n\ncomplex_row_vector columns_dot_self(complex_matrix x) The dot product of the columns of x with themselves\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_self(complex_vector x) The dot product of the rows of x with themselves\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_self(complex_row_vector x) The dot product of the rows of x with themselves\nAvailable since 2.30\n \n\ncomplex_vector rows_dot_self(complex_matrix x) The dot product of the rows of x with themselves\nAvailable since 2.30\n\n\n \n\ncomplex_matrix diag_pre_multiply(complex_vector v, complex_matrix m) Return the product of the diagonal matrix formed from the vector v and the matrix m, i.e., diag_matrix(v) * m.\nAvailable since 2.30\n \n\ncomplex_matrix diag_pre_multiply(complex_row_vector v, complex_matrix m) Return the product of the diagonal matrix formed from the vector rv and the matrix m, i.e., diag_matrix(rv) * m.\nAvailable since 2.30\n \n\ncomplex_matrix diag_post_multiply(complex_matrix m, complex_vector v) Return the product of the matrix m and the diagonal matrix formed from the vector v, i.e., m * diag_matrix(v).\nAvailable since 2.30\n \n\ncomplex_matrix diag_post_multiply(complex_matrix m, complex_row_vector v) Return the product of the matrix m and the diagonal matrix formed from the the row vector rv, i.e., m * diag_matrix(rv).\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-reductions", + "href": "functions-reference/complex_matrix_operations.html#complex-reductions", + "title": "Complex Matrix Operations", + "section": "", + "text": "complex sum(complex_vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.30\n \n\ncomplex sum(complex_row_vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.30\n \n\ncomplex sum(complex_matrix x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.30\n \n\ncomplex prod(complex_vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.30\n \n\ncomplex prod(complex_row_vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.30\n \n\ncomplex prod(complex_matrix x) The product of the values in x, or 1 if x is empty\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#vectorized-accessor-functions", + "href": "functions-reference/complex_matrix_operations.html#vectorized-accessor-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "Much like with complex scalars, two functions are defined to get the real and imaginary components of complex-valued objects.\n\n\nThese functions return the same shape (e.g., matrix, vector, row vector, or array) object as their input, but demoted to a real type. For example, get_real(complex_matrix M) yields a matrix containing the real component of each value in M.\nThe following table contains examples of what this notation can mean:\n\n\n\nType T\nType T_demoted\n\n\n\n\ncomplex\nreal\n\n\ncomplex_vector\nvector\n\n\ncomplex_row_vector\nrow_vector\n\n\ncomplex_matrix\nmatrix\n\n\narray[] complex\narray[] real\n\n\narray[,,] complex\narray[,,] real\n\n\n\n\n\n\n \n\nT_demoted get_real(T x) Given an object of complex type T, return the same shape object but of type real by getting the real component of each element of x.\nAvailable since 2.30\n \n\nT_demoted get_imag(T x) Given an object of complex type T, return the same shape object but of type real by getting the imaginary component of each element of x.\nAvailable since 2.30\nFor example, given the Stan declaration\n complex_vector[2] z = [3+4i, 5+6i]';\nA call get_real(z) will yield the vector [3, 5]', and a call get_imag(z) will yield the vector [4, 6]'.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-broadcast-functions", + "href": "functions-reference/complex_matrix_operations.html#complex-broadcast-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "The following broadcast functions allow vectors, row vectors and matrices to be created by copying a single element into all of their cells. Matrices may also be created by stacking copies of row vectors vertically or stacking copies of column vectors horizontally.\n \n\ncomplex_vector rep_vector(complex z, int m) Return the size m (column) vector consisting of copies of z.\nAvailable since 2.30\n \n\ncomplex_row_vector rep_row_vector(complex z, int n) Return the size n row vector consisting of copies of z.\nAvailable since 2.30\n \n\ncomplex_matrix rep_matrix(complex z, int m, int n) Return the m by n matrix consisting of copies of z.\nAvailable since 2.30\n \n\ncomplex_matrix rep_matrix(complex_vector v, int n) Return the m by n matrix consisting of n copies of the (column) vector v of size m.\nAvailable since 2.30\n \n\ncomplex_matrix rep_matrix(complex_row_vector rv, int m) Return the m by n matrix consisting of m copies of the row vector rv of size n.\nAvailable since 2.30\n\n\n \n\ncomplex_matrix symmetrize_from_lower_tri(complex_matrix A) Construct a symmetric matrix from the lower triangle of A.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#diagonal-complex-matrix-functions", + "href": "functions-reference/complex_matrix_operations.html#diagonal-complex-matrix-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "complex_matrix add_diag(complex_matrix m, complex_row_vector d) Add row_vector d to the diagonal of matrix m.\nAvailable since 2.30\n \n\ncomplex_matrix add_diag(complex_matrix m, complex_vector d) Add vector d to the diagonal of matrix m.\nAvailable since 2.30\n \n\ncomplex_matrix add_diag(complex_matrix m, complex d) Add scalar d to every diagonal element of matrix m.\nAvailable since 2.30\n \n\ncomplex_vector diagonal(complex_matrix x) The diagonal of the matrix x\nAvailable since 2.30\n \n\ncomplex_matrix diag_matrix(complex_vector x) The diagonal matrix with diagonal x\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#slicing-and-blocking-functions-for-complex-matrices", + "href": "functions-reference/complex_matrix_operations.html#slicing-and-blocking-functions-for-complex-matrices", + "title": "Complex Matrix Operations", + "section": "", + "text": "Stan provides several functions for generating slices or blocks or diagonal entries for matrices.\n\n\n \n\ncomplex_vector col(complex_matrix x, int n) The n-th column of matrix x\nAvailable since 2.30\n \n\ncomplex_row_vector row(complex_matrix x, int m) The m-th row of matrix x\nAvailable since 2.30\n\n\n\n\n\n \n\ncomplex_matrix block(complex_matrix x, int i, int j, int n_rows, int n_cols) Return the submatrix of x that starts at row i and column j and extends n_rows rows and n_cols columns.\nAvailable since 2.30\n \n\ncomplex_vector sub_col(complex_matrix x, int i, int j, int n_rows) Return the sub-column of x that starts at row i and column j and extends n_rows rows and 1 column.\nAvailable since 2.30\n \n\ncomplex_row_vector sub_row(complex_matrix x, int i, int j, int n_cols) Return the sub-row of x that starts at row i and column j and extends 1 row and n_cols columns.\nAvailable since 2.30\n\n\n\n \n\ncomplex_vector head(complex_vector v, int n) Return the vector consisting of the first n elements of v.\nAvailable since 2.30\n \n\ncomplex_row_vector head(complex_row_vector rv, int n) Return the row vector consisting of the first n elements of rv.\nAvailable since 2.30\n \n\ncomplex_vector tail(complex_vector v, int n) Return the vector consisting of the last n elements of v.\nAvailable since 2.30\n \n\ncomplex_row_vector tail(complex_row_vector rv, int n) Return the row vector consisting of the last n elements of rv.\nAvailable since 2.30\n \n\ncomplex_vector segment(complex_vector v, int i, int n) Return the vector consisting of the n elements of v starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.30\n \n\ncomplex_row_vector segment(complex_row_vector rv, int i, int n) Return the row vector consisting of the n elements of rv starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-matrix-concatenation", + "href": "functions-reference/complex_matrix_operations.html#complex-matrix-concatenation", + "title": "Complex Matrix Operations", + "section": "", + "text": "complex_matrix append_col(complex_matrix x, complex_matrix y) Combine matrices x and y by column. The matrices must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_matrix append_col(complex_matrix x, complex_vector y) Combine matrix x and vector y by column. The matrix and the vector must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_matrix append_col(complex_vector x, complex_matrix y) Combine vector x and matrix y by column. The vector and the matrix must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_matrix append_col(complex_vector x, complex_vector y) Combine vectors x and y by column. The vectors must have the same number of rows.\nAvailable since 2.30\n \n\ncomplex_row_vector append_col(complex_row_vector x, complex_row_vector y) Combine row vectors x and y (of any size) into another row vector by appending y to the end of x.\nAvailable since 2.30\n \n\ncomplex_row_vector append_col(complex x, complex_row_vector y) Append x to the front of y, returning another row vector.\nAvailable since 2.30\n \n\ncomplex_row_vector append_col(complex_row_vector x, complex y) Append y to the end of x, returning another row vector.\nAvailable since 2.30\n\n\n\n \n\ncomplex_matrix append_row(complex_matrix x, complex_matrix y) Combine matrices x and y by row. The matrices must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_matrix append_row(complex_matrix x, complex_row_vector y) Combine matrix x and row vector y by row. The matrix and the row vector must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_matrix append_row(complex_row_vector x, complex_matrix y) Combine row vector x and matrix y by row. The row vector and the matrix must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_matrix append_row(complex_row_vector x, complex_row_vector y) Combine row vectors x and y by row. The row vectors must have the same number of columns.\nAvailable since 2.30\n \n\ncomplex_vector append_row(complex_vector x, complex_vector y) Concatenate vectors x and y of any size into another vector.\nAvailable since 2.30\n \n\ncomplex_vector append_row(complex x, complex_vector y) Append x to the top of y, returning another vector.\nAvailable since 2.30\n \n\ncomplex_vector append_row(complex_vector x, complex y) Append y to the bottom of x, returning another vector.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-special-matrix-functions", + "href": "functions-reference/complex_matrix_operations.html#complex-special-matrix-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "Stan’s fast Fourier transform functions take the standard definition of the discrete Fourier transform (see the definitions below for specifics) and scale the inverse transform by one over dimensionality so that the following identities hold for complex vectors u and v,\n fft(inv_fft(u)) == u inv_fft(fft(v)) == v\nand in the 2-dimensional case for complex matrices A and B,\n fft2(inv_fft2(A)) == A inv_fft2(fft2(B)) == B\nAlthough the FFT functions only accept complex inputs, real vectors and matrices will be promoted to their complex counterparts before applying the FFT functions.\n \n\ncomplex_vector fft(complex_vector v) Return the discrete Fourier transform of the specified complex vector v. If \\(v \\in \\mathbb{C}^N\\) is a complex vector with \\(N\\) elements and \\(u =\n\\textrm{fft}(v)\\), then \\[\\begin{equation*}\nu_n = \\sum_{m < n}\nv_m \\cdot\n\\exp\\left(\\frac{-n \\cdot m \\cdot 2 \\cdot \\pi \\cdot \\sqrt{-1}}{N}\\right).\n\\end{equation*}\\]\nAvailable since 2.30\n \n\ncomplex_matrix fft2(complex_matrix m) Return the 2D discrete Fourier transform of the specified complex matrix m. The 2D FFT is defined as the result of applying the FFT to each row and then to each column.\nAvailable since 2.30\n \n\ncomplex_vector inv_fft(complex_vector u) Return the inverse of the discrete Fourier transform of the specified complex vector u. The inverse FFT (this function) is scaled so that fft(inv_fft(u)) == u. If \\(u \\in \\mathbb{C}^N\\) is a complex vector with \\(N\\) elements and \\(v = \\textrm{fft}^{-1}(u)\\), then \\[\\begin{equation*}\nv_n = \\frac{1}{N} \\sum_{m < n}\nu_m \\cdot\n\\exp\\left(\\frac{n \\cdot m \\cdot 2 \\cdot \\pi \\cdot \\sqrt{-1}}{N}\\right).\n\\end{equation*}\\] This only differs from the FFT by the sign inside the exponential and the scaling. The \\(\\frac{1}{N}\\) scaling ensures that fft(inv_fft(u)) == u and inv_fft(fft(v)) == v for complex vectors u and v.\nAvailable since 2.30\n \n\ncomplex_matrix inv_fft2(complex_matrix m) Return the inverse of the 2D discrete Fourier transform of the specified complex matrix m. The 2D inverse FFT is defined as the result of applying the inverse FFT to each row and then to each column. The invertible scaling of the inverse FFT ensures fft2(inv_fft2(A)) == A and inv_fft2(fft2(B)) == B.\nAvailable since 2.30\n\n\n\nThe cumulative sum of a sequence \\(x_1,\\ldots,x_N\\) is the sequence \\(y_1,\\ldots,y_N\\), where \\[\\begin{equation*} y_n = \\sum_{m = 1}^{n} x_m. \\end{equation*}\\]\n \n\narray[] complex cumulative_sum(array[] complex x) The cumulative sum of x\nAvailable since 2.30\n \n\ncomplex_vector cumulative_sum(complex_vector v) The cumulative sum of v\nAvailable since 2.30\n \n\ncomplex_row_vector cumulative_sum(complex_row_vector rv) The cumulative sum of rv\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#complex-linear-algebra-functions", + "href": "functions-reference/complex_matrix_operations.html#complex-linear-algebra-functions", + "title": "Complex Matrix Operations", + "section": "", + "text": "In general, it is much more efficient and also more arithmetically stable to use matrix division than to multiply by an inverse.\n\n\n \n\ncomplex_row_vector operator/(complex_row_vector b, complex_matrix A) The right division of b by A; equivalently b * inverse(A)\nAvailable since 2.30\n \n\ncomplex_matrix operator/(complex_matrix B, complex_matrix A) The right division of B by A; equivalently B * inverse(A)\nAvailable since 2.30\n\n\n\n\n\n\n \n\ncomplex trace(complex_matrix A) The trace of A, or 0 if A is empty; A is not required to be diagonal\nAvailable since 2.30\n\n\n\n \n\ncomplex_vector eigenvalues(complex_matrix A) The complex-valued vector of eigenvalues of the matrix A. The eigenvalues are repeated according to their algebraic multiplicity, so there are as many eigenvalues as rows in the matrix. The eigenvalues are not sorted in any particular order.\nAvailable since 2.32\n \n\ncomplex_matrix eigenvectors(complex_matrix A) The matrix with the complex-valued (column) eigenvectors of the matrix A in the same order as returned by the function eigenvalues\nAvailable since 2.32\n \n\ntuple(complex_matrix, complex_vector) eigendecompose(complex_matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the matrix A. This function is equivalent to (eigenvectors(A), eigenvalues(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n \n\ncomplex_vector eigenvalues_sym(complex_matrix A) The vector of eigenvalues of a symmetric matrix A in ascending order\nAvailable since 2.30\n \n\ncomplex_matrix eigenvectors_sym(complex_matrix A) The matrix with the (column) eigenvectors of symmetric matrix A in the same order as returned by the function eigenvalues_sym\nAvailable since 2.30\n \n\ntuple(complex_matrix, complex_vector) eigendecompose_sym(complex_matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the symmetric matrix A. This function is equivalent to (eigenvectors_sym(A), eigenvalues_sym(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\nBecause multiplying an eigenvector by \\(-1\\) results in an eigenvector, eigenvectors returned by a decomposition are only identified up to a sign change. In order to compare the eigenvectors produced by Stan’s eigendecomposition to others, signs may need to be normalized in some way, such as by fixing the sign of a component, or doing comparisons allowing a multiplication by \\(-1\\).\nThe condition number of a symmetric matrix is defined to be the ratio of the largest eigenvalue to the smallest eigenvalue. Large condition numbers lead to difficulty in numerical algorithms such as computing inverses, and thus known as “ill conditioned.” The ratio can even be infinite in the case of singular matrices (i.e., those with eigenvalues of 0).\n\n\n\nThe matrix A can be decomposed into a diagonal matrix of singular values, D, and matrices of its left and right singular vectors, U and V, \\[\\begin{equation*} A = U D V^T. \\end{equation*}\\] The matrices of singular vectors here are thin. That is for an \\(N\\) by \\(P\\) input A, \\(M = min(N, P)\\), U is size \\(N\\) by \\(M\\) and V is size \\(P\\) by \\(M\\).\n \n\nvector singular_values(complex_matrix A) The singular values of A in descending order\nAvailable since 2.30\n \n\ncomplex_matrix svd_U(complex_matrix A) The left-singular vectors of A\nAvailable since 2.30\n \n\ncomplex_matrix svd_V(complex_matrix A) The right-singular vectors of A\nAvailable since 2.30\n \n\ntuple(complex_matrix, vector, complex_matrix) svd(complex_matrix A) Returns a tuple containing the left-singular vectors of A, the singular values of A in descending order, and the right-singular values of A. This function is equivalent to (svd_U(A), singular_values(A), svd_V(A)) but with a lower computational cost due to the shared work between the different components.\nAvailable since 2.33\n\n\n\nThe complex Schur decomposition of a square matrix \\(A\\) produces a complex unitary matrix \\(U\\) and a complex upper-triangular Schur form matrix \\(T\\) such that \\[A = U \\cdot T \\cdot U^{-1}\\]\nSince \\(U\\) is unitary, its inverse is also its conjugate transpose, \\(U^{-1} = U^*\\), \\(U^*(i, j) = \\mathrm{conj}(U(j, i))\\)\n \n\ncomplex_matrix complex_schur_decompose_t(matrix A) Compute the upper-triangular Schur form matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ncomplex_matrix complex_schur_decompose_t(complex_matrix A) Compute the upper-triangular Schur form matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ncomplex_matrix complex_schur_decompose_u(matrix A) Compute the unitary matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ncomplex_matrix complex_schur_decompose_u(complex_matrix A) Compute the unitary matrix of the complex Schur decomposition of A.\nAvailable since 2.31\n \n\ntuple(complex_matrix, complex_matrix) complex_schur_decompose(matrix A) Returns the unitary matrix and the upper-triangular Schur form matrix of the complex Schur decomposition of A. This function is equivalent to (complex_schur_decompose_u(A), complex_schur_decompose_t(A)) but with a lower computational cost due to the shared work between the two results. This overload is equivalent to complex_schur_decompose(to_complex(A,0)) but is more efficient.\nAvailable since 2.33\n \n\ntuple(complex_matrix, complex_matrix) complex_schur_decompose(complex_matrix A) Returns the unitary matrix and the upper-triangular Schur form matrix of the complex Schur decomposition of A. This function is equivalent to (complex_schur_decompose_u(A), complex_schur_decompose_t(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/complex_matrix_operations.html#reverse-functions-for-complex-matrices", + "href": "functions-reference/complex_matrix_operations.html#reverse-functions-for-complex-matrices", + "title": "Complex Matrix Operations", + "section": "", + "text": "complex_vector reverse(complex_vector v) Return a new vector containing the elements of the argument in reverse order.\nAvailable since 2.30\n \n\ncomplex_row_vector reverse(complex_row_vector v) Return a new row vector containing the elements of the argument in reverse order.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex Matrix Operations" + ] + }, + { + "objectID": "functions-reference/circular_distributions.html", + "href": "functions-reference/circular_distributions.html", + "title": "Circular Distributions", + "section": "", + "text": "Circular distributions are defined for finite values y in any interval of length \\(2\\pi\\).\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\) and \\(\\kappa \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}\\), \\[\\begin{equation*} \\text{VonMises}(y|\\mu,\\kappa) =\n\\frac{\\exp(\\kappa\\cos(y-\\mu))}{2\\pi I_0(\\kappa)} \\!. \\end{equation*}\\] In order for this density to properly normalize, \\(y\\) must be restricted to some interval \\((c, c + 2\\pi)\\) of length \\(2 \\pi\\), because \\[\\begin{equation*} \\int_{c}^{c +\n2\\pi} \\text{VonMises}(y|\\mu,\\kappa) dy = 1. \\end{equation*}\\] Similarly, if \\(\\mu\\) is a parameter, it will typically be restricted to the same range as \\(y\\).\nIf \\(\\kappa > 0\\), a von Mises distribution with its \\(2 \\pi\\) interval of support centered around its location \\(\\mu\\) will have a single mode at \\(\\mu\\); for example, restricting \\(y\\) to \\((-\\pi,\\pi)\\) and taking \\(\\mu = 0\\) leads to a single local optimum at the mode \\(\\mu\\). If the location \\(\\mu\\) is not in the center of the support, the density is circularly translated and there will be a second local maximum at the boundary furthest from the mode. Ideally, the parameterization and support will be set up so that the bulk of the probability mass is in a continuous interval around the mean \\(\\mu\\).\nFor \\(\\kappa = 0\\), the Von Mises distribution corresponds to the circular uniform distribution with density \\(1 / (2 \\pi)\\) (independently of the values of \\(y\\) or \\(\\mu\\)).\n\n\n\ny ~ von_mises(mu, kappa)\nIncrement target log probability density with von_mises_lupdf(y | mu, kappa).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal von_mises_lpdf(reals y | reals mu, reals kappa) The log of the von mises density of y given location mu and scale kappa.\nAvailable since 2.18\n \n\nreal von_mises_lupdf(reals y | reals mu, reals kappa) The log of the von mises density of y given location mu and scale kappa dropping constant additive terms.\nAvailable since 2.25\n \n\nreal von_mises_cdf(reals y | reals mu, reals kappa) The von mises cumulative distribution function of y given location mu and scale kappa.\nAvailable since 2.29\n \n\nreal von_mises_lcdf(reals y | reals mu, reals kappa) The log of the von mises cumulative distribution function of y given location mu and scale kappa.\nAvailable since 2.29\n \n\nreal von_mises_lccdf(reals y | reals mu, reals kappa) The log of the von mises complementary cumulative distribution function of y given location mu and scale kappa.\nAvailable since 2.29\n \n\nR von_mises_rng(reals mu, reals kappa) Generate a Von Mises variate with location mu and scale kappa (i.e. returns values in the interval \\([(\\mu \\mod 2\\pi)-\\pi,(\\mu \\mod\n2\\pi)+\\pi]\\)); may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\nEvaluating the Von Mises distribution for \\(\\kappa > 100\\) is numerically unstable in the current implementation. Nathanael I. Lichti suggested the following workaround on the Stan users group, based on the fact that as \\(\\kappa \\rightarrow \\infty\\), \\[\\begin{equation*}\n\\text{VonMises}(y|\\mu,\\kappa) \\rightarrow \\text{Normal}(\\mu, \\sqrt{1 /\n\\kappa}). \\end{equation*}\\] The workaround is to replace y ~ von_mises(mu,kappa) with\nif (kappa < 100) {\n y ~ von_mises(mu, kappa);\n} else {\n y ~ normal(mu, sqrt(1 / kappa));\n}", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Circular Distributions" + ] + }, + { + "objectID": "functions-reference/circular_distributions.html#von-mises-distribution", + "href": "functions-reference/circular_distributions.html#von-mises-distribution", + "title": "Circular Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\) and \\(\\kappa \\in \\mathbb{R}^+\\), then for \\(y \\in\n\\mathbb{R}\\), \\[\\begin{equation*} \\text{VonMises}(y|\\mu,\\kappa) =\n\\frac{\\exp(\\kappa\\cos(y-\\mu))}{2\\pi I_0(\\kappa)} \\!. \\end{equation*}\\] In order for this density to properly normalize, \\(y\\) must be restricted to some interval \\((c, c + 2\\pi)\\) of length \\(2 \\pi\\), because \\[\\begin{equation*} \\int_{c}^{c +\n2\\pi} \\text{VonMises}(y|\\mu,\\kappa) dy = 1. \\end{equation*}\\] Similarly, if \\(\\mu\\) is a parameter, it will typically be restricted to the same range as \\(y\\).\nIf \\(\\kappa > 0\\), a von Mises distribution with its \\(2 \\pi\\) interval of support centered around its location \\(\\mu\\) will have a single mode at \\(\\mu\\); for example, restricting \\(y\\) to \\((-\\pi,\\pi)\\) and taking \\(\\mu = 0\\) leads to a single local optimum at the mode \\(\\mu\\). If the location \\(\\mu\\) is not in the center of the support, the density is circularly translated and there will be a second local maximum at the boundary furthest from the mode. Ideally, the parameterization and support will be set up so that the bulk of the probability mass is in a continuous interval around the mean \\(\\mu\\).\nFor \\(\\kappa = 0\\), the Von Mises distribution corresponds to the circular uniform distribution with density \\(1 / (2 \\pi)\\) (independently of the values of \\(y\\) or \\(\\mu\\)).\n\n\n\ny ~ von_mises(mu, kappa)\nIncrement target log probability density with von_mises_lupdf(y | mu, kappa).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal von_mises_lpdf(reals y | reals mu, reals kappa) The log of the von mises density of y given location mu and scale kappa.\nAvailable since 2.18\n \n\nreal von_mises_lupdf(reals y | reals mu, reals kappa) The log of the von mises density of y given location mu and scale kappa dropping constant additive terms.\nAvailable since 2.25\n \n\nreal von_mises_cdf(reals y | reals mu, reals kappa) The von mises cumulative distribution function of y given location mu and scale kappa.\nAvailable since 2.29\n \n\nreal von_mises_lcdf(reals y | reals mu, reals kappa) The log of the von mises cumulative distribution function of y given location mu and scale kappa.\nAvailable since 2.29\n \n\nreal von_mises_lccdf(reals y | reals mu, reals kappa) The log of the von mises complementary cumulative distribution function of y given location mu and scale kappa.\nAvailable since 2.29\n \n\nR von_mises_rng(reals mu, reals kappa) Generate a Von Mises variate with location mu and scale kappa (i.e. returns values in the interval \\([(\\mu \\mod 2\\pi)-\\pi,(\\mu \\mod\n2\\pi)+\\pi]\\)); may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\nEvaluating the Von Mises distribution for \\(\\kappa > 100\\) is numerically unstable in the current implementation. Nathanael I. Lichti suggested the following workaround on the Stan users group, based on the fact that as \\(\\kappa \\rightarrow \\infty\\), \\[\\begin{equation*}\n\\text{VonMises}(y|\\mu,\\kappa) \\rightarrow \\text{Normal}(\\mu, \\sqrt{1 /\n\\kappa}). \\end{equation*}\\] The workaround is to replace y ~ von_mises(mu,kappa) with\nif (kappa < 100) {\n y ~ von_mises(mu, kappa);\n} else {\n y ~ normal(mu, sqrt(1 / kappa));\n}", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Circular Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_continuous_distributions.html", + "href": "functions-reference/bounded_continuous_distributions.html", + "title": "Bounded Continuous Distributions", + "section": "", + "text": "The bounded continuous probabilities have support on a finite interval of real numbers.\n\n\n\n\nIf \\(\\alpha \\in \\mathbb{R}\\) and \\(\\beta \\in (\\alpha,\\infty)\\), then for \\(y \\in [\\alpha,\\beta]\\), \\[\\begin{equation*} \\text{Uniform}(y|\\alpha,\\beta) =\n\\frac{1}{\\beta - \\alpha} . \\end{equation*}\\]\n\n\n\ny ~ uniform(alpha, beta)\nIncrement target log probability density with uniform_lupdf(y | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal uniform_lpdf(reals y | reals alpha, reals beta) The log of the uniform density of y given lower bound alpha and upper bound beta\nAvailable since 2.12\n \n\nreal uniform_lupdf(reals y | reals alpha, reals beta) The log of the uniform density of y given lower bound alpha and upper bound beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal uniform_cdf(reals y | reals alpha, reals beta) The uniform cumulative distribution function of y given lower bound alpha and upper bound beta\nAvailable since 2.0\n \n\nreal uniform_lcdf(reals y | reals alpha, reals beta) The log of the uniform cumulative distribution function of y given lower bound alpha and upper bound beta\nAvailable since 2.12\n \n\nreal uniform_lccdf(reals y | reals alpha, reals beta) The log of the uniform complementary cumulative distribution function of y given lower bound alpha and upper bound beta\nAvailable since 2.12\n \n\nR uniform_rng(reals alpha, reals beta) Generate a uniform variate with lower bound alpha and upper bound beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Bounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_continuous_distributions.html#uniform-distribution", + "href": "functions-reference/bounded_continuous_distributions.html#uniform-distribution", + "title": "Bounded Continuous Distributions", + "section": "", + "text": "If \\(\\alpha \\in \\mathbb{R}\\) and \\(\\beta \\in (\\alpha,\\infty)\\), then for \\(y \\in [\\alpha,\\beta]\\), \\[\\begin{equation*} \\text{Uniform}(y|\\alpha,\\beta) =\n\\frac{1}{\\beta - \\alpha} . \\end{equation*}\\]\n\n\n\ny ~ uniform(alpha, beta)\nIncrement target log probability density with uniform_lupdf(y | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal uniform_lpdf(reals y | reals alpha, reals beta) The log of the uniform density of y given lower bound alpha and upper bound beta\nAvailable since 2.12\n \n\nreal uniform_lupdf(reals y | reals alpha, reals beta) The log of the uniform density of y given lower bound alpha and upper bound beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal uniform_cdf(reals y | reals alpha, reals beta) The uniform cumulative distribution function of y given lower bound alpha and upper bound beta\nAvailable since 2.0\n \n\nreal uniform_lcdf(reals y | reals alpha, reals beta) The log of the uniform cumulative distribution function of y given lower bound alpha and upper bound beta\nAvailable since 2.12\n \n\nreal uniform_lccdf(reals y | reals alpha, reals beta) The log of the uniform complementary cumulative distribution function of y given lower bound alpha and upper bound beta\nAvailable since 2.12\n \n\nR uniform_rng(reals alpha, reals beta) Generate a uniform variate with lower bound alpha and upper bound beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Bounded Continuous Distributions" + ] + }, + { + "objectID": "functions-reference/array_operations.html", + "href": "functions-reference/array_operations.html", + "title": "Array Operations", + "section": "", + "text": "The following operations take arrays as input and produce single output values. The boundary values for size 0 arrays are the unit with respect to the combination operation (min, max, sum, or product).\n\n\n \n\nreal min(array[] real x) The minimum value in x, or \\(+\\infty\\) if x is size 0.\nAvailable since 2.0\n \n\nint min(array[] int x) The minimum value in x, or error if x is size 0.\nAvailable since 2.0\n \n\nreal max(array[] real x) The maximum value in x, or \\(-\\infty\\) if x is size 0.\nAvailable since 2.0\n \n\nint max(array[] int x) The maximum value in x, or error if x is size 0.\nAvailable since 2.0\n\n\n\n \n\nint sum(array[] int x) The sum of the elements in x, or 0 if the array is empty.\nAvailable since 2.1\n \n\nreal sum(array[] real x) The sum of the elements in x; see definition above.\nAvailable since 2.0\n \n\ncomplex sum(array[] complex x) The sum of the elements in x; see definition above.\nAvailable since 2.30\n \n\nreal prod(array[] real x) The product of the elements in x, or 1 if x is size 0.\nAvailable since 2.0\n \n\nreal prod(array[] int x) The product of the elements in x, \\[\\begin{equation*}\n\\text{product}(x) = \\begin{cases}\n\\prod_{n=1}^N x_n & \\text{if} N > 0 \\\\[4pt] 1 & \\text{if} N = 0\n\\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nreal log_sum_exp(array[] real x) The natural logarithm of the sum of the exponentials of the elements in x, or \\(-\\infty\\) if the array is empty.\nAvailable since 2.0\n\n\n\nThe sample mean, variance, and standard deviation are calculated in the usual way. For i.i.d. draws from a distribution of finite mean, the sample mean is an unbiased estimate of the mean of the distribution. Similarly, for i.i.d. draws from a distribution of finite variance, the sample variance is an unbiased estimate of the variance.1 The sample deviation is defined as the square root of the sample deviation, but is not unbiased.\n \n\nreal mean(array[] real x) The sample mean of the elements in x. For an array \\(x\\) of size \\(N >\n0\\), \\[\\begin{equation*}\n\\text{mean}(x) \\ = \\ \\bar{x} \\ = \\ \\frac{1}{N} \\sum_{n=1}^N\nx_n.\n\\end{equation*}\\] It is an error to the call the mean function with an array of size \\(0\\).\nAvailable since 2.0\n \n\nreal variance(array[] real x) The sample variance of the elements in x. For \\(N > 0\\), \\[\\begin{equation*}\n\\text{variance}(x) \\ = \\ \\begin{cases} \\frac{1}{N-1} \\sum_{n=1}^N (x_n\n- \\bar{x})^2 & \\text{if } N > 1 \\\\[4pt] 0 & \\text{if } N = 1\n\\end{cases}\n\\end{equation*}\\] It is an error to call the variance function with an array of size 0.\nAvailable since 2.0\n \n\nreal sd(array[] real x) The sample standard deviation of elements in x. \\[\\begin{equation*}\n\\text{sd}(x) =\n\\begin{cases} \\sqrt{\\, \\text{variance}(x)} & \\text{if } N > 1 \\\\[4pt]\n0 & \\text{if } N = 0 \\end{cases}\n\\end{equation*}\\] It is an error to call the sd function with an array of size 0.\nAvailable since 2.0\n\n\n\n \n\nreal norm1(vector x) The L1 norm of x, defined by \\[\\begin{equation*}\n\\text{norm1}(x) \\ = \\ \\textstyle \\sum_{n=1}^N (|x_n|)\n\\end{equation*}\\] where N is the size of x.\nAvailable since 2.30\n \n\nreal norm1(row_vector x) The L1 norm of x\nAvailable since 2.30\n \n\nreal norm1(array[] real x) The L1 norm of x\nAvailable since 2.30\n \n\nreal norm2(vector x) The L2 norm of x, defined by \\[\\begin{equation*}\n\\text{norm2}(x) \\ = \\ \\sqrt{\\textstyle \\sum_{n=1}^N (x_n)^2}\n\\end{equation*}\\] where N is the size of x\nAvailable since 2.30\n \n\nreal norm2(row_vector x) The L2 norm of x\nAvailable since 2.30\n \n\nreal norm2(array[] real x) The L2 norm of x\nAvailable since 2.30\n\n\n\n \n\nreal distance(vector x, vector y) The Euclidean distance between x and y, defined by \\[\\begin{equation*}\n\\text{distance}(x,y) \\ = \\ \\sqrt{\\textstyle \\sum_{n=1}^N (x_n - y_n)^2}\n\\end{equation*}\\] where N is the size of x and y. It is an error to call distance with arguments of unequal size.\nAvailable since 2.2\n \n\nreal distance(vector x, row_vector y) The Euclidean distance between x and y\nAvailable since 2.2\n \n\nreal distance(row_vector x, vector y) The Euclidean distance between x and y\nAvailable since 2.2\n \n\nreal distance(row_vector x, row_vector y) The Euclidean distance between x and y\nAvailable since 2.2\n \n\nreal squared_distance(vector x, vector y) The squared Euclidean distance between x and y, defined by \\[\\begin{equation*}\n\\mathrm{squared\\_distance}(x,y) \\ = \\ \\text{distance}(x,y)^2 \\ = \\ \\textstyle \\sum_{n=1}^N (x_n - y_n)^2,\n\\end{equation*}\\] where N is the size of x and y. It is an error to call squared_distance with arguments of unequal size.\nAvailable since 2.7\n \n\nreal squared_distance(vector x, row_vector y) The squared Euclidean distance between x and y\nAvailable since 2.26\n \n\nreal squared_distance(row_vector x, vector y) The squared Euclidean distance between x and y\nAvailable since 2.26\n \n\nreal squared_distance(row_vector x, row_vector y) The Euclidean distance between x and y\nAvailable since 2.26\n\n\n\nProduces sample quantiles corresponding to the given probabilities. The smallest observation corresponds to a probability of 0 and the largest to a probability of 1.\nImplements algorithm 7 from Hyndman, R. J. and Fan, Y., Sample quantiles in Statistical Packages (R’s default quantile function).\n \n\nreal quantile(data array[] real x, data real p) The p-th quantile of x\nAvailable since 2.27\n \n\narray[] real quantile(data array[] real x, data array[] real p) An array containing the quantiles of x given by the array of probabilities p\nAvailable since 2.27\n\n\n\n\nThe size of an array or matrix can be obtained using the dims() function. The dims() function is defined to take an argument consisting of any variable with up to 8 array dimensions (and up to 2 additional matrix dimensions) and returns an array of integers with the dimensions. For example, if two variables are declared as follows,\n array[7, 8, 9] real x;\n array[7] matrix[8, 9] y;\nthen calling dims(x) or dims(y) returns an integer array of size 3 containing the elements 7, 8, and 9 in that order.\nThe size() function extracts the number of elements in an array. This is just the top-level elements, so if the array is declared as\n array[M, N] real a;\nthe size of a is M.\nThe function num_elements, on the other hand, measures all of the elements, so that the array a above has \\(M \\times N\\) elements.\nThe specialized functions rows() and cols() should be used to extract the dimensions of vectors and matrices.\n \n\narray[] int dims(T x) Return an integer array containing the dimensions of x; the type of the argument T can be any Stan type with up to 8 array dimensions.\nAvailable since 2.0\n \n\nint num_elements(array[] T x) Return the total number of elements in the array x including all elements in contained arrays, vectors, and matrices. T can be any array type. For example, if x is of type array[4, 3] real then num_elements(x) is 12, and if y is declared as array[5] matrix[3, 4] y, then size(y) evaluates to 60.\nAvailable since 2.5\n \n\nint size(array[] T x) Return the number of elements in the array x; the type of the array T can be any type, but the size is just the size of the top level array, not the total number of elements contained. For example, if x is of type array[4, 3] real then size(x) is 4.\nAvailable since 2.0\n\n\n\nThe following operations create arrays by repeating elements to fill an array of a specified size. These operations work for all input types T, including reals, integers, vectors, row vectors, matrices, or arrays.\n \n\narray[] T rep_array(T x, int n) Return the n array with every entry assigned to x.\nAvailable since 2.0\n \n\narray [,] T rep_array(T x, int m, int n) Return the m by n array with every entry assigned to x.\nAvailable since 2.0\n \n\narray[,,] T rep_array(T x, int k, int m, int n) Return the k by m by n array with every entry assigned to x.\nAvailable since 2.0\nFor example, rep_array(1.0,5) produces a real array (type array[] real) of size 5 with all values set to 1.0. On the other hand, rep_array(1,5) produces an integer array (type array[] int) of size 5 with all values set to 1. This distinction is important because it is not possible to assign an integer array to a real array. For example, the following example contrasts legal with illegal array creation and assignment\n array[5] real y;\n array[5] int x;\n\n x = rep_array(1, 5); // ok\n y = rep_array(1.0, 5); // ok\n\n x = rep_array(1.0, 5); // illegal\n y = rep_array(1, 5); // illegal\n\n x = y; // illegal\n y = x; // illegal\nIf the value being repeated v is a vector (i.e., T is vector), then rep_array(v, 27) is a size 27 array consisting of 27 copies of the vector v.\n vector[5] v;\n array[3] vector[5] a;\n\n a = rep_array(v, 3); // fill a with copies of v\n a[2, 4] = 9.0; // v[4], a[1, 4], a[3, 4] unchanged\nIf the type T of x is itself an array type, then the result will be an array with one, two, or three added dimensions, depending on which of the rep_array functions is called. For instance, consider the following legal code snippet.\n array[5, 6] real a;\n array[3, 4, 5, 6] real b;\n\n b = rep_array(a, 3, 4); // make (3 x 4) copies of a\n b[1, 1, 1, 1] = 27.9; // a[1, 1] unchanged\nAfter the assignment to b, the value for b[j, k, m, n] is equal to a[m, n] where it is defined, for j in 1:3, k in 1:4, m in 1:5, and n in 1:6.\n\n\n\n \n\nT append_array(T x, T y) Return the concatenation of two arrays in the order of the arguments. T must be an N-dimensional array of any Stan type (with a maximum N of 7). All dimensions but the first must match.\nAvailable since 2.18\nFor example, the following code appends two three dimensional arrays of matrices together. Note that all dimensions except the first match. Any mismatches will cause an error to be thrown.\n array[2, 1, 7] matrix[4, 6] x1;\n array[3, 1, 7] matrix[4, 6] x2;\n array[5, 1, 7] matrix[4, 6] x3;\n\n x3 = append_array(x1, x2);\n\n\n\nSorting can be used to sort values or the indices of those values in either ascending or descending order. For example, if v is declared as a real array of size 3, with values \\[\\begin{equation*}\n\\text{v} = (1, -10.3,\n20.987),\n\\end{equation*}\\] then the various sort routines produce \\[\\begin{eqnarray*}\n\\mathrm{sort\\_asc(v)} & = & (-10.3,1,20.987) \\\\[4pt]\n\\mathrm{sort\\_desc(v)} & = & (20.987,1,-10.3) \\\\[4pt]\n\\mathrm{sort\\_indices\\_asc(v)} & = & (2,1,3) \\\\[4pt]\n\\mathrm{sort\\_indices\\_desc(v)} & = & (3,1,2)\n\\end{eqnarray*}\\]\n \n\narray[] real sort_asc(array[] real v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\narray[] int sort_asc(array[] int v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\narray[] real sort_desc(array[] real v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\narray[] int sort_desc(array[] int v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\narray[] int sort_indices_asc(array[] real v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_asc(array[] int v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(array[] real v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(array[] int v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\nint rank(array[] real v, int s) Number of components of v less than v[s]\nAvailable since 2.0\n \n\nint rank(array[] int v, int s) Number of components of v less than v[s]\nAvailable since 2.0\n\n\n\nStan provides functions to create a new array by reversing the order of elements in an existing array. For example, if v is declared as a real array of size 3, with values \\[\\begin{equation*}\n\\text{v} = (1,\\, -10.3,\\, 20.987),\n\\end{equation*}\\] then \\[\\begin{equation*}\n\\mathrm{reverse(v)} = (20.987,\\, -10.3,\\, 1).\n\\end{equation*}\\]\n \n\narray[] T reverse(array[] T v) Return a new array containing the elements of the argument in reverse order.\nAvailable since 2.23", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#array-reductions", + "href": "functions-reference/array_operations.html#array-reductions", + "title": "Array Operations", + "section": "", + "text": "The following operations take arrays as input and produce single output values. The boundary values for size 0 arrays are the unit with respect to the combination operation (min, max, sum, or product).\n\n\n \n\nreal min(array[] real x) The minimum value in x, or \\(+\\infty\\) if x is size 0.\nAvailable since 2.0\n \n\nint min(array[] int x) The minimum value in x, or error if x is size 0.\nAvailable since 2.0\n \n\nreal max(array[] real x) The maximum value in x, or \\(-\\infty\\) if x is size 0.\nAvailable since 2.0\n \n\nint max(array[] int x) The maximum value in x, or error if x is size 0.\nAvailable since 2.0\n\n\n\n \n\nint sum(array[] int x) The sum of the elements in x, or 0 if the array is empty.\nAvailable since 2.1\n \n\nreal sum(array[] real x) The sum of the elements in x; see definition above.\nAvailable since 2.0\n \n\ncomplex sum(array[] complex x) The sum of the elements in x; see definition above.\nAvailable since 2.30\n \n\nreal prod(array[] real x) The product of the elements in x, or 1 if x is size 0.\nAvailable since 2.0\n \n\nreal prod(array[] int x) The product of the elements in x, \\[\\begin{equation*}\n\\text{product}(x) = \\begin{cases}\n\\prod_{n=1}^N x_n & \\text{if} N > 0 \\\\[4pt] 1 & \\text{if} N = 0\n\\end{cases}\n\\end{equation*}\\]\nAvailable since 2.0\n \n\nreal log_sum_exp(array[] real x) The natural logarithm of the sum of the exponentials of the elements in x, or \\(-\\infty\\) if the array is empty.\nAvailable since 2.0\n\n\n\nThe sample mean, variance, and standard deviation are calculated in the usual way. For i.i.d. draws from a distribution of finite mean, the sample mean is an unbiased estimate of the mean of the distribution. Similarly, for i.i.d. draws from a distribution of finite variance, the sample variance is an unbiased estimate of the variance.1 The sample deviation is defined as the square root of the sample deviation, but is not unbiased.\n \n\nreal mean(array[] real x) The sample mean of the elements in x. For an array \\(x\\) of size \\(N >\n0\\), \\[\\begin{equation*}\n\\text{mean}(x) \\ = \\ \\bar{x} \\ = \\ \\frac{1}{N} \\sum_{n=1}^N\nx_n.\n\\end{equation*}\\] It is an error to the call the mean function with an array of size \\(0\\).\nAvailable since 2.0\n \n\nreal variance(array[] real x) The sample variance of the elements in x. For \\(N > 0\\), \\[\\begin{equation*}\n\\text{variance}(x) \\ = \\ \\begin{cases} \\frac{1}{N-1} \\sum_{n=1}^N (x_n\n- \\bar{x})^2 & \\text{if } N > 1 \\\\[4pt] 0 & \\text{if } N = 1\n\\end{cases}\n\\end{equation*}\\] It is an error to call the variance function with an array of size 0.\nAvailable since 2.0\n \n\nreal sd(array[] real x) The sample standard deviation of elements in x. \\[\\begin{equation*}\n\\text{sd}(x) =\n\\begin{cases} \\sqrt{\\, \\text{variance}(x)} & \\text{if } N > 1 \\\\[4pt]\n0 & \\text{if } N = 0 \\end{cases}\n\\end{equation*}\\] It is an error to call the sd function with an array of size 0.\nAvailable since 2.0\n\n\n\n \n\nreal norm1(vector x) The L1 norm of x, defined by \\[\\begin{equation*}\n\\text{norm1}(x) \\ = \\ \\textstyle \\sum_{n=1}^N (|x_n|)\n\\end{equation*}\\] where N is the size of x.\nAvailable since 2.30\n \n\nreal norm1(row_vector x) The L1 norm of x\nAvailable since 2.30\n \n\nreal norm1(array[] real x) The L1 norm of x\nAvailable since 2.30\n \n\nreal norm2(vector x) The L2 norm of x, defined by \\[\\begin{equation*}\n\\text{norm2}(x) \\ = \\ \\sqrt{\\textstyle \\sum_{n=1}^N (x_n)^2}\n\\end{equation*}\\] where N is the size of x\nAvailable since 2.30\n \n\nreal norm2(row_vector x) The L2 norm of x\nAvailable since 2.30\n \n\nreal norm2(array[] real x) The L2 norm of x\nAvailable since 2.30\n\n\n\n \n\nreal distance(vector x, vector y) The Euclidean distance between x and y, defined by \\[\\begin{equation*}\n\\text{distance}(x,y) \\ = \\ \\sqrt{\\textstyle \\sum_{n=1}^N (x_n - y_n)^2}\n\\end{equation*}\\] where N is the size of x and y. It is an error to call distance with arguments of unequal size.\nAvailable since 2.2\n \n\nreal distance(vector x, row_vector y) The Euclidean distance between x and y\nAvailable since 2.2\n \n\nreal distance(row_vector x, vector y) The Euclidean distance between x and y\nAvailable since 2.2\n \n\nreal distance(row_vector x, row_vector y) The Euclidean distance between x and y\nAvailable since 2.2\n \n\nreal squared_distance(vector x, vector y) The squared Euclidean distance between x and y, defined by \\[\\begin{equation*}\n\\mathrm{squared\\_distance}(x,y) \\ = \\ \\text{distance}(x,y)^2 \\ = \\ \\textstyle \\sum_{n=1}^N (x_n - y_n)^2,\n\\end{equation*}\\] where N is the size of x and y. It is an error to call squared_distance with arguments of unequal size.\nAvailable since 2.7\n \n\nreal squared_distance(vector x, row_vector y) The squared Euclidean distance between x and y\nAvailable since 2.26\n \n\nreal squared_distance(row_vector x, vector y) The squared Euclidean distance between x and y\nAvailable since 2.26\n \n\nreal squared_distance(row_vector x, row_vector y) The Euclidean distance between x and y\nAvailable since 2.26\n\n\n\nProduces sample quantiles corresponding to the given probabilities. The smallest observation corresponds to a probability of 0 and the largest to a probability of 1.\nImplements algorithm 7 from Hyndman, R. J. and Fan, Y., Sample quantiles in Statistical Packages (R’s default quantile function).\n \n\nreal quantile(data array[] real x, data real p) The p-th quantile of x\nAvailable since 2.27\n \n\narray[] real quantile(data array[] real x, data array[] real p) An array containing the quantiles of x given by the array of probabilities p\nAvailable since 2.27", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#array-size-and-dimension-function", + "href": "functions-reference/array_operations.html#array-size-and-dimension-function", + "title": "Array Operations", + "section": "", + "text": "The size of an array or matrix can be obtained using the dims() function. The dims() function is defined to take an argument consisting of any variable with up to 8 array dimensions (and up to 2 additional matrix dimensions) and returns an array of integers with the dimensions. For example, if two variables are declared as follows,\n array[7, 8, 9] real x;\n array[7] matrix[8, 9] y;\nthen calling dims(x) or dims(y) returns an integer array of size 3 containing the elements 7, 8, and 9 in that order.\nThe size() function extracts the number of elements in an array. This is just the top-level elements, so if the array is declared as\n array[M, N] real a;\nthe size of a is M.\nThe function num_elements, on the other hand, measures all of the elements, so that the array a above has \\(M \\times N\\) elements.\nThe specialized functions rows() and cols() should be used to extract the dimensions of vectors and matrices.\n \n\narray[] int dims(T x) Return an integer array containing the dimensions of x; the type of the argument T can be any Stan type with up to 8 array dimensions.\nAvailable since 2.0\n \n\nint num_elements(array[] T x) Return the total number of elements in the array x including all elements in contained arrays, vectors, and matrices. T can be any array type. For example, if x is of type array[4, 3] real then num_elements(x) is 12, and if y is declared as array[5] matrix[3, 4] y, then size(y) evaluates to 60.\nAvailable since 2.5\n \n\nint size(array[] T x) Return the number of elements in the array x; the type of the array T can be any type, but the size is just the size of the top level array, not the total number of elements contained. For example, if x is of type array[4, 3] real then size(x) is 4.\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#array-broadcasting", + "href": "functions-reference/array_operations.html#array-broadcasting", + "title": "Array Operations", + "section": "", + "text": "The following operations create arrays by repeating elements to fill an array of a specified size. These operations work for all input types T, including reals, integers, vectors, row vectors, matrices, or arrays.\n \n\narray[] T rep_array(T x, int n) Return the n array with every entry assigned to x.\nAvailable since 2.0\n \n\narray [,] T rep_array(T x, int m, int n) Return the m by n array with every entry assigned to x.\nAvailable since 2.0\n \n\narray[,,] T rep_array(T x, int k, int m, int n) Return the k by m by n array with every entry assigned to x.\nAvailable since 2.0\nFor example, rep_array(1.0,5) produces a real array (type array[] real) of size 5 with all values set to 1.0. On the other hand, rep_array(1,5) produces an integer array (type array[] int) of size 5 with all values set to 1. This distinction is important because it is not possible to assign an integer array to a real array. For example, the following example contrasts legal with illegal array creation and assignment\n array[5] real y;\n array[5] int x;\n\n x = rep_array(1, 5); // ok\n y = rep_array(1.0, 5); // ok\n\n x = rep_array(1.0, 5); // illegal\n y = rep_array(1, 5); // illegal\n\n x = y; // illegal\n y = x; // illegal\nIf the value being repeated v is a vector (i.e., T is vector), then rep_array(v, 27) is a size 27 array consisting of 27 copies of the vector v.\n vector[5] v;\n array[3] vector[5] a;\n\n a = rep_array(v, 3); // fill a with copies of v\n a[2, 4] = 9.0; // v[4], a[1, 4], a[3, 4] unchanged\nIf the type T of x is itself an array type, then the result will be an array with one, two, or three added dimensions, depending on which of the rep_array functions is called. For instance, consider the following legal code snippet.\n array[5, 6] real a;\n array[3, 4, 5, 6] real b;\n\n b = rep_array(a, 3, 4); // make (3 x 4) copies of a\n b[1, 1, 1, 1] = 27.9; // a[1, 1] unchanged\nAfter the assignment to b, the value for b[j, k, m, n] is equal to a[m, n] where it is defined, for j in 1:3, k in 1:4, m in 1:5, and n in 1:6.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#array-concatenation", + "href": "functions-reference/array_operations.html#array-concatenation", + "title": "Array Operations", + "section": "", + "text": "T append_array(T x, T y) Return the concatenation of two arrays in the order of the arguments. T must be an N-dimensional array of any Stan type (with a maximum N of 7). All dimensions but the first must match.\nAvailable since 2.18\nFor example, the following code appends two three dimensional arrays of matrices together. Note that all dimensions except the first match. Any mismatches will cause an error to be thrown.\n array[2, 1, 7] matrix[4, 6] x1;\n array[3, 1, 7] matrix[4, 6] x2;\n array[5, 1, 7] matrix[4, 6] x3;\n\n x3 = append_array(x1, x2);", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#sorting-functions", + "href": "functions-reference/array_operations.html#sorting-functions", + "title": "Array Operations", + "section": "", + "text": "Sorting can be used to sort values or the indices of those values in either ascending or descending order. For example, if v is declared as a real array of size 3, with values \\[\\begin{equation*}\n\\text{v} = (1, -10.3,\n20.987),\n\\end{equation*}\\] then the various sort routines produce \\[\\begin{eqnarray*}\n\\mathrm{sort\\_asc(v)} & = & (-10.3,1,20.987) \\\\[4pt]\n\\mathrm{sort\\_desc(v)} & = & (20.987,1,-10.3) \\\\[4pt]\n\\mathrm{sort\\_indices\\_asc(v)} & = & (2,1,3) \\\\[4pt]\n\\mathrm{sort\\_indices\\_desc(v)} & = & (3,1,2)\n\\end{eqnarray*}\\]\n \n\narray[] real sort_asc(array[] real v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\narray[] int sort_asc(array[] int v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\narray[] real sort_desc(array[] real v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\narray[] int sort_desc(array[] int v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\narray[] int sort_indices_asc(array[] real v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_asc(array[] int v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(array[] real v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(array[] int v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\nint rank(array[] real v, int s) Number of components of v less than v[s]\nAvailable since 2.0\n \n\nint rank(array[] int v, int s) Number of components of v less than v[s]\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#reversing-functions", + "href": "functions-reference/array_operations.html#reversing-functions", + "title": "Array Operations", + "section": "", + "text": "Stan provides functions to create a new array by reversing the order of elements in an existing array. For example, if v is declared as a real array of size 3, with values \\[\\begin{equation*}\n\\text{v} = (1,\\, -10.3,\\, 20.987),\n\\end{equation*}\\] then \\[\\begin{equation*}\n\\mathrm{reverse(v)} = (20.987,\\, -10.3,\\, 1).\n\\end{equation*}\\]\n \n\narray[] T reverse(array[] T v) Return a new array containing the elements of the argument in reverse order.\nAvailable since 2.23", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "functions-reference/array_operations.html#footnotes", + "href": "functions-reference/array_operations.html#footnotes", + "title": "Array Operations", + "section": "Footnotes", + "text": "Footnotes\n\n\nDividing by \\(N\\) rather than \\((N-1)\\) produces a maximum likelihood estimate of variance, which is biased to underestimate variance.↩︎", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Array Operations" + ] + }, + { + "objectID": "cmdstan-guide/stansummary.html", + "href": "cmdstan-guide/stansummary.html", + "title": "stansummary: MCMC Output Analysis", + "section": "", + "text": "The CmdStan stansummary program reports statistics for one or more sampler chains over all sampler and model parameters and quantities of interest. The statistics reported include both summary statistics of the estimates and diagnostic statistics on the sampler chains, reported in the following order:\n\nMean - sample mean\nMCSE - Monte Carlo Standard Error, a measure of the amount of noise in the sample\nStdDev - sample standard deviation - the standard deviation around the sample mean.\nMAD - Median Absolute Deviation - the median absolute deviation around the sample median.\nQuantiles - default 5%, 50%, 95%\nESS_bulk\nESS_tail\nESS_bulk/s - Bulk ESS per second\nR_hat - \\(\\hat{R}\\) statistic, a MCMC convergence diagnostic\n\nWhen reviewing the stansummary output, it is important to check the final three output columns first - these are the diagnostic statistics on MCMC convergence and effective sample size. A \\(\\hat{R}\\) statistic of greater than \\(1\\) indicates potential convergence problems and that the sample is not presentative of the target posterior, thus the estimates of the mean and all other summary statistics are likely to be invalid. A value \\(1.01\\) can be used as generic threshold to decide whether more iterations or further convergence analysis is needed, but other thresholds can be used depending on the specific use case.\nEstimation by sampling produces an approximate value for the model parameters; the MCSE statistic indicates the amount of uncertainty in the estimate. Therefore MCSE column is placed next to the sample mean column, in order to make it easy to compare this sample with others.\nFor more information, see the Posterior Analysis chapter of the Stan Reference Manual which describes both the theory and practice of MCMC estimation techniques.\nThe statistics - Mean, StdDev, MAD, and Quantiles - are computed directly from all draws across all chains. The diagnostic statistics - ESS_bulk, ESS_tail, and R_hat are computed from the rank-normalized, folded, and splitted chains according to the definitions by Vehtari et al. (2021). the MCSE statistic is computed using split chain R_hat and autocorrelations. The summary statistics and the algorithms used to compute them are described in sections Notation for draws and Effective sample size.\n\n\nThe CmdStan makefile task build compiles the stansummary utility into the bin directory. It can be compiled directly using the makefile as follows:\n> cd <cmdstan-home>\n> make bin/stansummary\n\n\n\nThe stansummary utility processes one or more output files from a set of chains from one run of the HMC sampler. To run stansummary on the output file or files generated by a run of the sampler, on Mac or Linux:\n<cmdstan-home>/bin/stansummary <file_1.csv> ... <file_N.csv>\nOn Windows, use backslashes to call the stansummary.exe.\n<cmdstan-home>\\bin\\stansummary.exe <file_1.csv> ... <file_N.csv>\nFor example, after running 4 chains to fit the example model eight_schools.stan to the supplied example data file, we run stansummary on the resulting Stan CSV output files to get the following report:\n> bin/stansummary eight_*.csv\nInference for Stan model: eight_schools_model\n4 chains: each with iter=1000; warmup=1000; thin=1; 1000 iterations saved.\n\nWarmup took (0.065, 0.078, 0.080, 0.086) seconds, 0.31 seconds total\nSampling took (0.047, 0.044, 0.045, 0.053) seconds, 0.19 seconds total\n\n Mean MCSE StdDev MAD 5% 50% 95% ESS_bulk ESS_tail ESS_bulk/s R_hat\n\nlp__ -19 0.31 4.9 5.0 -27 -19 -11 264 275 1396 1.0\naccept_stat__ 0.77 0.024 0.31 0.096 6.5e-03 0.93 1.00 243 273 1287 1.0\nstepsize__ 0.25 nan 0.016 0.016 2.2e-01 0.25 0.26 nan nan nan nan\ntreedepth__ 3.4 0.048 0.76 0.00 2.0e+00 4.0 4.0 285 295 1507 1.0\nn_leapfrog__ 13 0.80 7.1 0.00 3.0e+00 15 31 220 274 1165 1.0\ndivergent__ 0.015 nan 0.12 0.00 0.0e+00 0.00 0.00 nan nan nan nan\nenergy__ 24 0.32 5.4 5.5 1.5e+01 24 33 289 488 1527 1.0\n\nmu 7.8 0.20 5.5 4.9 -1.3 7.7 17 688 915 3641 1.0\ntheta[1] 12 0.28 8.7 7.4 -0.36 11 28 908 763 4802 1.0\ntheta[2] 7.7 0.19 6.8 6.1 -3.4 7.8 19 1194 2011 6320 1.0\ntheta[3] 5.6 0.23 8.5 7.0 -9.1 6.2 18 1260 1723 6669 1.0\ntheta[4] 7.5 0.20 7.0 6.5 -4.1 7.6 19 1171 1744 6197 1.0\ntheta[5] 4.6 0.21 6.7 6.3 -7.0 4.9 15 1045 1513 5530 1.0\ntheta[6] 5.7 0.23 7.2 6.4 -6.8 6.0 17 1012 1626 5354 1.0\ntheta[7] 11 0.24 7.1 6.6 0.025 11 24 885 473 4682 1.0\ntheta[8] 8.4 0.23 8.5 7.3 -4.8 8.1 23 1280 1848 6773 1.0\ntau 7.8 0.26 5.9 4.5 1.8 6.3 18 248 178 1310 1.0\n\nSamples were drawn using hmc with nuts.\nFor each parameter, ESS_bulk and ESS_tail measure the effective sample size for the entire sample (bulk)\nand for the .05 and .95 tails (tail), and R_hat measures the potential scale reduction on split chains.\nAt convergence R_hat will be very close to 1.00.\nThe console output information consists of\n\nModel, chains, and timing summaries\nSampler parameter statistics\nModel parameter statistics\nSampling algorithm - either nuts (shown here) or static HMC.\n\nThere is one row per parameter and the row order in the summary report corresponds to the column order in the Stan CSV output file. NaN values for some columns are expected if the value doesn’t change, e.g. if there are no divergent transitions.\n\n\nThe initial Stan CSV columns provide information on the sampler state for each draw:\n\nlp__ - the total log probability density (up to an additive constant) at each sample\naccept_stat__ - the average Metropolis acceptance probability over each simulated Hamiltonian trajectory\nstepsize__ - integrator step size\ntreedepth__ - depth of tree used by NUTS (NUTS sampler)\nn_leapfrog__ - number of leapfrog calculations (NUTS sampler)\ndivergent__ - has value 1 if trajectory diverged, otherwise 0. (NUTS sampler)\nenergy__ - value of the Hamiltonian\nint_time__ - total integration time (static HMC sampler)\n\nBecause we ran the NUTS sampler, the above summary reports sampler parameters treedepth__, n_leapfrog__, and divergent__; the static HMC sampler would report int_time__ instead.\n\n\n\nThe remaining Stan CSV columns report the values of all parameters, transformed parameters, and generated quantities in the order in which these variables are declared in the Stan program. For container variables, i.e., vector, row_vector, matrix, and array variables, the statistics for each element are reported separately, in row-major order. The eight_schools.stan program parameters block contains the following parameter variable declarations:\n real mu;\n array[J] real theta;\n real<lower=0> tau;\nIn the example data, J is \\(8\\); therefore the stansummary listing reports on theta[1] through theta[8].\n\n\n\n\nThe stansummary command syntax provides a set of flags to customize the output which must precede the list of filenames. When invoked with no arguments or with the -h or --help option, the program prints the usage message to the console and exits.\nReport statistics for one or more Stan CSV files from a HMC sampler run.\nExample: stansummary model_chain_1.csv model_chain_2.csv\nOptions:\n -a, --autocorr [n] Display the chain autocorrelation for the n-th\n input file, in addition to statistics.\n -c, --csv_filename [file] Write statistics to a CSV file.\n -h, --help Produce help message, then exit.\n -p, --percentiles [values] Percentiles to report as ordered set of\n comma-separated numbers from (0.1,99.9), inclusive.\n Default is 5,50,95.\n -s, --sig_figs [n] Significant figures reported. Default is 2.\n Must be an integer from (1, 18), inclusive.\n -i, --include_param [name] Include the named parameter in the summary output.\n By default, all parameters in the file are summarized,\n passing this argument one or more times will filter\n the output down to just the requested arguments.\nBoth short an long option names are allowed. Short names are specified as -<o> <value>; long option names can be specified either as --<option>=<value> or --<option> <value>.\nThe --percentiles argument can also be passed an empty string \"\", which results in no percentiles being displayed in the output of the command.\nThe amount of precision in the sampler output limits the amount of real precision in the summary report. CmdStan’s command line interface also has output argument sig_figs. The default sampler output precision is 8. The --sig_figs argument to the stansummary program should not exceed the sig_figs argument to the sampler.", + "crumbs": [ + "Tools and Utilities", + "`stansummary`: MCMC Output Analysis" + ] + }, + { + "objectID": "cmdstan-guide/stansummary.html#building-the-stansummary-command", + "href": "cmdstan-guide/stansummary.html#building-the-stansummary-command", + "title": "stansummary: MCMC Output Analysis", + "section": "", + "text": "The CmdStan makefile task build compiles the stansummary utility into the bin directory. It can be compiled directly using the makefile as follows:\n> cd <cmdstan-home>\n> make bin/stansummary", + "crumbs": [ + "Tools and Utilities", + "`stansummary`: MCMC Output Analysis" + ] + }, + { + "objectID": "cmdstan-guide/stansummary.html#running-the-stansummary-program", + "href": "cmdstan-guide/stansummary.html#running-the-stansummary-program", + "title": "stansummary: MCMC Output Analysis", + "section": "", + "text": "The stansummary utility processes one or more output files from a set of chains from one run of the HMC sampler. To run stansummary on the output file or files generated by a run of the sampler, on Mac or Linux:\n<cmdstan-home>/bin/stansummary <file_1.csv> ... <file_N.csv>\nOn Windows, use backslashes to call the stansummary.exe.\n<cmdstan-home>\\bin\\stansummary.exe <file_1.csv> ... <file_N.csv>\nFor example, after running 4 chains to fit the example model eight_schools.stan to the supplied example data file, we run stansummary on the resulting Stan CSV output files to get the following report:\n> bin/stansummary eight_*.csv\nInference for Stan model: eight_schools_model\n4 chains: each with iter=1000; warmup=1000; thin=1; 1000 iterations saved.\n\nWarmup took (0.065, 0.078, 0.080, 0.086) seconds, 0.31 seconds total\nSampling took (0.047, 0.044, 0.045, 0.053) seconds, 0.19 seconds total\n\n Mean MCSE StdDev MAD 5% 50% 95% ESS_bulk ESS_tail ESS_bulk/s R_hat\n\nlp__ -19 0.31 4.9 5.0 -27 -19 -11 264 275 1396 1.0\naccept_stat__ 0.77 0.024 0.31 0.096 6.5e-03 0.93 1.00 243 273 1287 1.0\nstepsize__ 0.25 nan 0.016 0.016 2.2e-01 0.25 0.26 nan nan nan nan\ntreedepth__ 3.4 0.048 0.76 0.00 2.0e+00 4.0 4.0 285 295 1507 1.0\nn_leapfrog__ 13 0.80 7.1 0.00 3.0e+00 15 31 220 274 1165 1.0\ndivergent__ 0.015 nan 0.12 0.00 0.0e+00 0.00 0.00 nan nan nan nan\nenergy__ 24 0.32 5.4 5.5 1.5e+01 24 33 289 488 1527 1.0\n\nmu 7.8 0.20 5.5 4.9 -1.3 7.7 17 688 915 3641 1.0\ntheta[1] 12 0.28 8.7 7.4 -0.36 11 28 908 763 4802 1.0\ntheta[2] 7.7 0.19 6.8 6.1 -3.4 7.8 19 1194 2011 6320 1.0\ntheta[3] 5.6 0.23 8.5 7.0 -9.1 6.2 18 1260 1723 6669 1.0\ntheta[4] 7.5 0.20 7.0 6.5 -4.1 7.6 19 1171 1744 6197 1.0\ntheta[5] 4.6 0.21 6.7 6.3 -7.0 4.9 15 1045 1513 5530 1.0\ntheta[6] 5.7 0.23 7.2 6.4 -6.8 6.0 17 1012 1626 5354 1.0\ntheta[7] 11 0.24 7.1 6.6 0.025 11 24 885 473 4682 1.0\ntheta[8] 8.4 0.23 8.5 7.3 -4.8 8.1 23 1280 1848 6773 1.0\ntau 7.8 0.26 5.9 4.5 1.8 6.3 18 248 178 1310 1.0\n\nSamples were drawn using hmc with nuts.\nFor each parameter, ESS_bulk and ESS_tail measure the effective sample size for the entire sample (bulk)\nand for the .05 and .95 tails (tail), and R_hat measures the potential scale reduction on split chains.\nAt convergence R_hat will be very close to 1.00.\nThe console output information consists of\n\nModel, chains, and timing summaries\nSampler parameter statistics\nModel parameter statistics\nSampling algorithm - either nuts (shown here) or static HMC.\n\nThere is one row per parameter and the row order in the summary report corresponds to the column order in the Stan CSV output file. NaN values for some columns are expected if the value doesn’t change, e.g. if there are no divergent transitions.\n\n\nThe initial Stan CSV columns provide information on the sampler state for each draw:\n\nlp__ - the total log probability density (up to an additive constant) at each sample\naccept_stat__ - the average Metropolis acceptance probability over each simulated Hamiltonian trajectory\nstepsize__ - integrator step size\ntreedepth__ - depth of tree used by NUTS (NUTS sampler)\nn_leapfrog__ - number of leapfrog calculations (NUTS sampler)\ndivergent__ - has value 1 if trajectory diverged, otherwise 0. (NUTS sampler)\nenergy__ - value of the Hamiltonian\nint_time__ - total integration time (static HMC sampler)\n\nBecause we ran the NUTS sampler, the above summary reports sampler parameters treedepth__, n_leapfrog__, and divergent__; the static HMC sampler would report int_time__ instead.\n\n\n\nThe remaining Stan CSV columns report the values of all parameters, transformed parameters, and generated quantities in the order in which these variables are declared in the Stan program. For container variables, i.e., vector, row_vector, matrix, and array variables, the statistics for each element are reported separately, in row-major order. The eight_schools.stan program parameters block contains the following parameter variable declarations:\n real mu;\n array[J] real theta;\n real<lower=0> tau;\nIn the example data, J is \\(8\\); therefore the stansummary listing reports on theta[1] through theta[8].", + "crumbs": [ + "Tools and Utilities", + "`stansummary`: MCMC Output Analysis" + ] + }, + { + "objectID": "cmdstan-guide/stansummary.html#command-line-options", + "href": "cmdstan-guide/stansummary.html#command-line-options", + "title": "stansummary: MCMC Output Analysis", + "section": "", + "text": "The stansummary command syntax provides a set of flags to customize the output which must precede the list of filenames. When invoked with no arguments or with the -h or --help option, the program prints the usage message to the console and exits.\nReport statistics for one or more Stan CSV files from a HMC sampler run.\nExample: stansummary model_chain_1.csv model_chain_2.csv\nOptions:\n -a, --autocorr [n] Display the chain autocorrelation for the n-th\n input file, in addition to statistics.\n -c, --csv_filename [file] Write statistics to a CSV file.\n -h, --help Produce help message, then exit.\n -p, --percentiles [values] Percentiles to report as ordered set of\n comma-separated numbers from (0.1,99.9), inclusive.\n Default is 5,50,95.\n -s, --sig_figs [n] Significant figures reported. Default is 2.\n Must be an integer from (1, 18), inclusive.\n -i, --include_param [name] Include the named parameter in the summary output.\n By default, all parameters in the file are summarized,\n passing this argument one or more times will filter\n the output down to just the requested arguments.\nBoth short an long option names are allowed. Short names are specified as -<o> <value>; long option names can be specified either as --<option>=<value> or --<option> <value>.\nThe --percentiles argument can also be passed an empty string \"\", which results in no percentiles being displayed in the output of the command.\nThe amount of precision in the sampler output limits the amount of real precision in the summary report. CmdStan’s command line interface also has output argument sig_figs. The default sampler output precision is 8. The --sig_figs argument to the stansummary program should not exceed the sig_figs argument to the sampler.", + "crumbs": [ + "Tools and Utilities", + "`stansummary`: MCMC Output Analysis" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html", + "href": "cmdstan-guide/stan_csv_apdx.html", + "title": "Stan CSV File Format", + "section": "", + "text": "The output from all CmdStan methods is in CSV format. A Stan CSV file is a data table where the columns are the method and model parameters and quantities of interest. Each row contains one record’s worth of data in plain-text format using the comma character (‘,’) as the field delimiter (hence the name).\nFor the Stan CSV files, data is strictly numerical, however, possible values include both positive and negative infinity and “Not-a-Number” which are represented as the strings NaN, inf, +inf, -inf. All other values are written in decimal notation by default with at most 8 digits of precision. The number of significant digits written can be controlled with argument sig_figs=<int>. See more in Output control arguments section.\nStan CSV files have a header row containing the column names. They also make extensive use of CSV comments, i.e., lines which begin with the # character. In addition to initial and final comment rows, some methods also put comment rows in the middle of the data table, which makes it difficult to use many of the commonly used CSV parser packages.\n\n\nThe data table is laid out with zero or more method-specific columns followed by the Stan program variables declared in the parameter block, then the variables in the transformed parameters block, finally variables declared in the generated quantities, in declaration order.\nStan provides three types of container objects: arrays, vectors, and matrices. In order to output all elements of a container object, it is necessary to choose an indexing notation and a serialization order. The Stan CSV file indexing notation is\n\nThe column name consists of the variable name followed by the element indices.\nIndices are delimited by periods (‘.’).\nIndexing is 1-based, i.e., given a dimension of size \\(N\\), the first element index is \\(1\\) and the last element index is \\(N\\).\nTuples are laid out element-by-element, with each tuple slot being delimited by a colon (‘:’).\n\nContainer variables are serialized in column major order, a.k.a. “Fortran” order. In column major-order for a 2-D container, all elements of column 1 are listed in ascending order, followed by all elements of column 2, thus the column index changes the slowest and the row index changes the fastest. For higher dimensions, this generalizes to the last index changing the slowest and first index changing the fastest.\nTo see how this works, consider a 3-dimensional variable with dimension sizes 2, 3, and 4, e.g., an array of matrices, a 2-D array of vectors or row_vectors, or a 3-D array of scalars. Given a Stan program with model parameter variable:\n array[2, 3, 4] real foo;\nThe Stan CSV file will require 24 columns to output the elements of foo. The first 6 columns will be labeled:\nfoo.1.1.1,foo.2.1.1,foo.1.2.1,foo.2.2.1,foo.1.3.1,foo.2.3.1\nThe final 6 columns will be labeled:\nfoo.1.1.4,foo.2.1.4,foo.1.2.4,foo.2.2.4,foo.1.3.4,foo.2.3.4\nTo see how a tuple would be laid out, consider the following variable:\ntuple(real, array[3] real) bar;\nThis will correspond to 4 columns in the CSV file, which are labeled\nbar:1,bar:2.1,bar:2.2,bar:2.3\n\n\n\nThe sample method produces both a Stan CSV output file and a diagnostic file which contains the sampler parameters together with the gradients on the unconstrained scale and log probabilities for all parameters in the model.\nTo see how this works, we show snippets of the output file resulting from the following command:\n./bernoulli sample save_warmup=1 num_warmup=200 num_samples=100 \\\n data file=bernoulli.data.json \\\n output file=bernoulli_samples.csv\n\n\nThe sampler output file contains the following:\n\nInitial comment rows listing full CmdStan argument configuration.\nHeader row\nData rows containing warmup draws, if run with option save_warmup=1\nComment rows for adaptation listing step size and metric used for sampling\nSampling draws\nComment rows giving timing information\n\nInitial comments rows: argument configuration\nAll configuration arguments are listed, one per line, indented according to CmdStan’s hierarchy of arguments and sub-arguments. Arguments not overtly specified on the command line are annotated as (Default).\nIn the above example the num_samples, num_warmup, and save_warmup arguments were specified, whereas subargument thin is left at its default value, as seen in the initial comment rows:\n# stan_version_major = 2\n# stan_version_minor = 24\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = sample (Default)\n# sample\n# num_samples = 100\n# num_warmup = 200\n# save_warmup = 1\n# thin = 1 (Default)\n# adapt\n# engaged = 1 (Default)\n# gamma = 0.050000000000000003 (Default)\n# delta = 0.80000000000000004 (Default)\n# kappa = 0.75 (Default)\n# t0 = 10 (Default)\n# init_buffer = 75 (Default)\n# term_buffer = 50 (Default)\n# window = 25 (Default)\n# algorithm = hmc (Default)\n# hmc\n# engine = nuts (Default)\n# nuts\n# max_depth = 10 (Default)\n# metric = diag_e (Default)\n# metric_file = (Default)\n# stepsize = 1 (Default)\n# stepsize_jitter = 0 (Default)\n# id = 0 (Default)\n# data\n# file = bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 2991989946 (Default)\n# output\n# file = bernoulli_samples.csv\n# diagnostic_file = bernoulli_diagnostics.csv\n# refresh = 100 (Default)\nNote that when running multi-threaded programs which use reduce_sum for high-level parallelization, the number of threads used will also be included in this initial comment header.\nColumn headers\nThe CSV header row lists all sampler parameters, model parameters, transformed parameters, and quantities of interest. The sampler parameters are described in detail in the output file section of the chapter on MCMC Sampling. The example model bernoulli.stan only contains one parameter theta, therefore the CSV file data table consists of 7 sampler parameter columns and one column for the model parameter:\nlp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta\nAs a second example, we show the output of the eight_schools.stan model on run on example dataset. This model has 3 parameters: mu, theta a vector whose length is dependent on the input data, here N = 8, and tau. The initial columns are for the 7 sampler parameters, as before. The column headers for the model parameters are:\nmu,theta.1,theta.2,theta.3,theta.4,theta.5,theta.6,theta.7,theta.8,tau\nData rows containing warmup draws\nWhen run with option save_warmup=1, the thinned warmup draws are written to the CSV output file directly after the CSV header line. Since the default option is save_warmup=0, this section is usually not present in the output file.\nHere we specified num_warmup=200 and left thin at the default value \\(1\\), therefore the next 200 lines are data rows containing the sampler and model parameter values for each warmup draw.\n-6.74827,1,1,1,1,0,6.75348,0.247195\n-6.74827,4.1311e-103,14.3855,1,1,0,6.95087,0.247195\n-6.74827,1.74545e-21,2.43117,1,1,0,7.67546,0.247195\n-6.77655,0.99873,0.239791,2,7,0,6.81982,0.280619\n-6.7552,0.999392,0.323158,1,3,0,6.79175,0.26517\nComment rows for adaptation\nDuring warmup, the sampler adjusts the stepsize and the metric. At the end warmup, the sampler outputs this information as comments.\n# Adaptation terminated\n# Step size = 0.813694\n# Diagonal elements of inverse mass matrix:\n# 0.592879\nAs the example bernoulli model only contains a single parameter, and as the default metric is diag_e, the inverse mass matrix is a \\(1 \\times 1\\) matrix, and the length of the diagonal vector is also \\(1\\).\nIn contrast, if we run the eight schools example model with metric dense_e, the adaptation comments section lists both the stepsize and the full \\(10 \\times 10\\) inverse mass matrix:\n# Adaptation terminated\n# Step size = 0.211252\n# Elements of inverse mass matrix:\n# 25.6389, 17.3379, 13.9455, 15.9036, 15.1953, 8.73729, 16.9486, 14.4231, 17.4969, 0.518757\n# 17.3379, 79.8719, 12.2989, -1.28006, 9.92895, -3.51622, 10.073, 22.0196, 19.8151, 4.71028\n# 13.9455, 12.2989, 36.1572, 12.8734, 11.9446, 9.09582, 9.74519, 10.9539, 12.1204, 0.211353\n# 15.9036, -1.28006, 12.8734, 59.9998, 10.245, 8.03461, 16.9754, 3.13443, 9.68292, -1.36097\n# 15.1953, 9.92895, 11.9446, 10.245, 43.548, 15.3403, 13.0537, 7.69818, 10.1093, 0.155245\n# 8.73729, -3.51622, 9.09582, 8.03461, 15.3403, 39.981, 12.7695, 1.16248, 6.13749, -2.08507\n# 16.9486, 10.073, 9.74519, 16.9754, 13.0537, 12.7695, 45.8884, 11.6074, 8.96413, -1.15946\n# 14.4231, 22.0196, 10.9539, 3.13443, 7.69818, 1.16248, 11.6074, 49.4083, 18.9169, 3.15661\n# 17.4969, 19.8151, 12.1204, 9.68292, 10.1093, 6.13749, 8.96413, 18.9169, 68.0228, 1.74104\n# 0.518757, 4.71028, 0.211353, -1.36097, 0.155245, -2.08507, -1.15946, 3.15661, 1.74104, 1.50433\nNote that when the sampler is run with arguments algorithm=fixed_param, this section will be missing.\nData rows containing sampling draws\nThe output file contains the values for the thinned set draws during sampling. Here we specified num_sampling=100 and left thin at the default value \\(1\\), therefore the next 100 lines are data rows containing the sampler and model parameter values for each sampling iteration.\n-8.76921,0.796814,0.813694,1,1,0,9.75854,0.535093\n-6.79143,0.979604,0.813694,1,3,0,9.13092,0.214431\n-6.79451,0.955359,0.813694,2,3,0,7.19149,0.289341\nTiming information\nUpon successful completion, the sampler writes timing information to the output CSV file as a series of final comment lines:\n#\n# Elapsed Time: 0.005 seconds (Warm-up)\n# 0.002 seconds (Sampling)\n# 0.007 seconds (Total)\n#\n\n\n\nThe diagnostic file contains the following:\n\nInitial comment rows listing full CmdStan argument configuration.\nHeader row\nData rows containing warmup draws, if run with option save_warmup=1\nSampling draws\nComment rows giving timing information\n\nThe columns in this file contain, in order:\n\nall sampler parameters\nall model parameter estimates (on the unconstrained scale)\nthe latent Hamiltonian for each parameter\nthe gradient for each parameters\n\nThe labels for the latent Hamiltonian columns are the parameter column label with prefix p_ and the labels for the gradient columns are the parameter column label with prefix g_.\nThese are the column labels from the file bernoulli_diagnostic.csv:\nlp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta,p_theta,g_theta\n\n\n\nThe profiling information is stored in a plain CSV format with no meta information in the comments.\nEach row represents timing information collected in a profile statement for a given thread. It is possible that some profile statements have only one entry (if they were only executed by one thread) and others have multiple entries (if they were executed by multiple threads).\nThe columns are as follows:\n\nname, The name of the profile statement that is being timed\nthread_id, The thread that executed the profile statement\ntotal_time, The combined time spent executing statements inside the profile which includes calculation with and without automatic differentiation\nforward_time, The time spent in the profile statement during the forward pass of a reverse mode automatic differentiation calculation or during a calculation without automatic differentiation\nreverse_time, The time spent in the profile statement during the reverse (backward) pass of reverse mode automatic differentiation\nchain_stack, The number of objects allocated on the chaining automatic differentiation stack. There is a function call for each of these objects in the reverse pass\nno_chain_stack, The number of objects allocated on the non-chaining automatic differentiation stack\nautodiff_calls, The total number of times the profile statement was executed with automatic differentiation\nno_autodiff_calls - The total number of times the profile statement was executed without automatic differentiation\n\n\n\n\n\n\nConfig as comments\nHeader row\nPenalized maximum likelihood estimate\n\n\n\n\n\nConfig as comments\nHeader row\nAdaptation as comments\nVariational estimate\nSample draws from estimate of the posterior\n\n\n\n\n\nHeader row\nQuantities of interest\n\n\n\n\n\nHeader row\nGradients", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html#csv-column-names-and-order", + "href": "cmdstan-guide/stan_csv_apdx.html#csv-column-names-and-order", + "title": "Stan CSV File Format", + "section": "", + "text": "The data table is laid out with zero or more method-specific columns followed by the Stan program variables declared in the parameter block, then the variables in the transformed parameters block, finally variables declared in the generated quantities, in declaration order.\nStan provides three types of container objects: arrays, vectors, and matrices. In order to output all elements of a container object, it is necessary to choose an indexing notation and a serialization order. The Stan CSV file indexing notation is\n\nThe column name consists of the variable name followed by the element indices.\nIndices are delimited by periods (‘.’).\nIndexing is 1-based, i.e., given a dimension of size \\(N\\), the first element index is \\(1\\) and the last element index is \\(N\\).\nTuples are laid out element-by-element, with each tuple slot being delimited by a colon (‘:’).\n\nContainer variables are serialized in column major order, a.k.a. “Fortran” order. In column major-order for a 2-D container, all elements of column 1 are listed in ascending order, followed by all elements of column 2, thus the column index changes the slowest and the row index changes the fastest. For higher dimensions, this generalizes to the last index changing the slowest and first index changing the fastest.\nTo see how this works, consider a 3-dimensional variable with dimension sizes 2, 3, and 4, e.g., an array of matrices, a 2-D array of vectors or row_vectors, or a 3-D array of scalars. Given a Stan program with model parameter variable:\n array[2, 3, 4] real foo;\nThe Stan CSV file will require 24 columns to output the elements of foo. The first 6 columns will be labeled:\nfoo.1.1.1,foo.2.1.1,foo.1.2.1,foo.2.2.1,foo.1.3.1,foo.2.3.1\nThe final 6 columns will be labeled:\nfoo.1.1.4,foo.2.1.4,foo.1.2.4,foo.2.2.4,foo.1.3.4,foo.2.3.4\nTo see how a tuple would be laid out, consider the following variable:\ntuple(real, array[3] real) bar;\nThis will correspond to 4 columns in the CSV file, which are labeled\nbar:1,bar:2.1,bar:2.2,bar:2.3", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html#mcmc-sampler-csv-output", + "href": "cmdstan-guide/stan_csv_apdx.html#mcmc-sampler-csv-output", + "title": "Stan CSV File Format", + "section": "", + "text": "The sample method produces both a Stan CSV output file and a diagnostic file which contains the sampler parameters together with the gradients on the unconstrained scale and log probabilities for all parameters in the model.\nTo see how this works, we show snippets of the output file resulting from the following command:\n./bernoulli sample save_warmup=1 num_warmup=200 num_samples=100 \\\n data file=bernoulli.data.json \\\n output file=bernoulli_samples.csv\n\n\nThe sampler output file contains the following:\n\nInitial comment rows listing full CmdStan argument configuration.\nHeader row\nData rows containing warmup draws, if run with option save_warmup=1\nComment rows for adaptation listing step size and metric used for sampling\nSampling draws\nComment rows giving timing information\n\nInitial comments rows: argument configuration\nAll configuration arguments are listed, one per line, indented according to CmdStan’s hierarchy of arguments and sub-arguments. Arguments not overtly specified on the command line are annotated as (Default).\nIn the above example the num_samples, num_warmup, and save_warmup arguments were specified, whereas subargument thin is left at its default value, as seen in the initial comment rows:\n# stan_version_major = 2\n# stan_version_minor = 24\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = sample (Default)\n# sample\n# num_samples = 100\n# num_warmup = 200\n# save_warmup = 1\n# thin = 1 (Default)\n# adapt\n# engaged = 1 (Default)\n# gamma = 0.050000000000000003 (Default)\n# delta = 0.80000000000000004 (Default)\n# kappa = 0.75 (Default)\n# t0 = 10 (Default)\n# init_buffer = 75 (Default)\n# term_buffer = 50 (Default)\n# window = 25 (Default)\n# algorithm = hmc (Default)\n# hmc\n# engine = nuts (Default)\n# nuts\n# max_depth = 10 (Default)\n# metric = diag_e (Default)\n# metric_file = (Default)\n# stepsize = 1 (Default)\n# stepsize_jitter = 0 (Default)\n# id = 0 (Default)\n# data\n# file = bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 2991989946 (Default)\n# output\n# file = bernoulli_samples.csv\n# diagnostic_file = bernoulli_diagnostics.csv\n# refresh = 100 (Default)\nNote that when running multi-threaded programs which use reduce_sum for high-level parallelization, the number of threads used will also be included in this initial comment header.\nColumn headers\nThe CSV header row lists all sampler parameters, model parameters, transformed parameters, and quantities of interest. The sampler parameters are described in detail in the output file section of the chapter on MCMC Sampling. The example model bernoulli.stan only contains one parameter theta, therefore the CSV file data table consists of 7 sampler parameter columns and one column for the model parameter:\nlp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta\nAs a second example, we show the output of the eight_schools.stan model on run on example dataset. This model has 3 parameters: mu, theta a vector whose length is dependent on the input data, here N = 8, and tau. The initial columns are for the 7 sampler parameters, as before. The column headers for the model parameters are:\nmu,theta.1,theta.2,theta.3,theta.4,theta.5,theta.6,theta.7,theta.8,tau\nData rows containing warmup draws\nWhen run with option save_warmup=1, the thinned warmup draws are written to the CSV output file directly after the CSV header line. Since the default option is save_warmup=0, this section is usually not present in the output file.\nHere we specified num_warmup=200 and left thin at the default value \\(1\\), therefore the next 200 lines are data rows containing the sampler and model parameter values for each warmup draw.\n-6.74827,1,1,1,1,0,6.75348,0.247195\n-6.74827,4.1311e-103,14.3855,1,1,0,6.95087,0.247195\n-6.74827,1.74545e-21,2.43117,1,1,0,7.67546,0.247195\n-6.77655,0.99873,0.239791,2,7,0,6.81982,0.280619\n-6.7552,0.999392,0.323158,1,3,0,6.79175,0.26517\nComment rows for adaptation\nDuring warmup, the sampler adjusts the stepsize and the metric. At the end warmup, the sampler outputs this information as comments.\n# Adaptation terminated\n# Step size = 0.813694\n# Diagonal elements of inverse mass matrix:\n# 0.592879\nAs the example bernoulli model only contains a single parameter, and as the default metric is diag_e, the inverse mass matrix is a \\(1 \\times 1\\) matrix, and the length of the diagonal vector is also \\(1\\).\nIn contrast, if we run the eight schools example model with metric dense_e, the adaptation comments section lists both the stepsize and the full \\(10 \\times 10\\) inverse mass matrix:\n# Adaptation terminated\n# Step size = 0.211252\n# Elements of inverse mass matrix:\n# 25.6389, 17.3379, 13.9455, 15.9036, 15.1953, 8.73729, 16.9486, 14.4231, 17.4969, 0.518757\n# 17.3379, 79.8719, 12.2989, -1.28006, 9.92895, -3.51622, 10.073, 22.0196, 19.8151, 4.71028\n# 13.9455, 12.2989, 36.1572, 12.8734, 11.9446, 9.09582, 9.74519, 10.9539, 12.1204, 0.211353\n# 15.9036, -1.28006, 12.8734, 59.9998, 10.245, 8.03461, 16.9754, 3.13443, 9.68292, -1.36097\n# 15.1953, 9.92895, 11.9446, 10.245, 43.548, 15.3403, 13.0537, 7.69818, 10.1093, 0.155245\n# 8.73729, -3.51622, 9.09582, 8.03461, 15.3403, 39.981, 12.7695, 1.16248, 6.13749, -2.08507\n# 16.9486, 10.073, 9.74519, 16.9754, 13.0537, 12.7695, 45.8884, 11.6074, 8.96413, -1.15946\n# 14.4231, 22.0196, 10.9539, 3.13443, 7.69818, 1.16248, 11.6074, 49.4083, 18.9169, 3.15661\n# 17.4969, 19.8151, 12.1204, 9.68292, 10.1093, 6.13749, 8.96413, 18.9169, 68.0228, 1.74104\n# 0.518757, 4.71028, 0.211353, -1.36097, 0.155245, -2.08507, -1.15946, 3.15661, 1.74104, 1.50433\nNote that when the sampler is run with arguments algorithm=fixed_param, this section will be missing.\nData rows containing sampling draws\nThe output file contains the values for the thinned set draws during sampling. Here we specified num_sampling=100 and left thin at the default value \\(1\\), therefore the next 100 lines are data rows containing the sampler and model parameter values for each sampling iteration.\n-8.76921,0.796814,0.813694,1,1,0,9.75854,0.535093\n-6.79143,0.979604,0.813694,1,3,0,9.13092,0.214431\n-6.79451,0.955359,0.813694,2,3,0,7.19149,0.289341\nTiming information\nUpon successful completion, the sampler writes timing information to the output CSV file as a series of final comment lines:\n#\n# Elapsed Time: 0.005 seconds (Warm-up)\n# 0.002 seconds (Sampling)\n# 0.007 seconds (Total)\n#\n\n\n\nThe diagnostic file contains the following:\n\nInitial comment rows listing full CmdStan argument configuration.\nHeader row\nData rows containing warmup draws, if run with option save_warmup=1\nSampling draws\nComment rows giving timing information\n\nThe columns in this file contain, in order:\n\nall sampler parameters\nall model parameter estimates (on the unconstrained scale)\nthe latent Hamiltonian for each parameter\nthe gradient for each parameters\n\nThe labels for the latent Hamiltonian columns are the parameter column label with prefix p_ and the labels for the gradient columns are the parameter column label with prefix g_.\nThese are the column labels from the file bernoulli_diagnostic.csv:\nlp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta,p_theta,g_theta\n\n\n\nThe profiling information is stored in a plain CSV format with no meta information in the comments.\nEach row represents timing information collected in a profile statement for a given thread. It is possible that some profile statements have only one entry (if they were only executed by one thread) and others have multiple entries (if they were executed by multiple threads).\nThe columns are as follows:\n\nname, The name of the profile statement that is being timed\nthread_id, The thread that executed the profile statement\ntotal_time, The combined time spent executing statements inside the profile which includes calculation with and without automatic differentiation\nforward_time, The time spent in the profile statement during the forward pass of a reverse mode automatic differentiation calculation or during a calculation without automatic differentiation\nreverse_time, The time spent in the profile statement during the reverse (backward) pass of reverse mode automatic differentiation\nchain_stack, The number of objects allocated on the chaining automatic differentiation stack. There is a function call for each of these objects in the reverse pass\nno_chain_stack, The number of objects allocated on the non-chaining automatic differentiation stack\nautodiff_calls, The total number of times the profile statement was executed with automatic differentiation\nno_autodiff_calls - The total number of times the profile statement was executed without automatic differentiation", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html#optimization-output", + "href": "cmdstan-guide/stan_csv_apdx.html#optimization-output", + "title": "Stan CSV File Format", + "section": "", + "text": "Config as comments\nHeader row\nPenalized maximum likelihood estimate", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html#variational-inference-output", + "href": "cmdstan-guide/stan_csv_apdx.html#variational-inference-output", + "title": "Stan CSV File Format", + "section": "", + "text": "Config as comments\nHeader row\nAdaptation as comments\nVariational estimate\nSample draws from estimate of the posterior", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html#generate-quantities-outputs", + "href": "cmdstan-guide/stan_csv_apdx.html#generate-quantities-outputs", + "title": "Stan CSV File Format", + "section": "", + "text": "Header row\nQuantities of interest", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/stan_csv_apdx.html#diagnose-method-outputs", + "href": "cmdstan-guide/stan_csv_apdx.html#diagnose-method-outputs", + "title": "Stan CSV File Format", + "section": "", + "text": "Header row\nGradients", + "crumbs": [ + "Appendices", + "Stan CSV File Format" + ] + }, + { + "objectID": "cmdstan-guide/print.html", + "href": "cmdstan-guide/print.html", + "title": "print (deprecated): MCMC Output Analysis", + "section": "", + "text": "print (deprecated): MCMC Output Analysis\nThe print utility is deprecated, but is still available until CmdStan v3.0. It has been replaced by the stansummary utility.\n\n\n\n\n Back to top", + "crumbs": [ + "Tools and Utilities", + "`print` (deprecated): MCMC Output Analysis" + ] + }, + { + "objectID": "cmdstan-guide/parallelization.html", + "href": "cmdstan-guide/parallelization.html", + "title": "Parallelization", + "section": "", + "text": "Stan provides three ways of parallelizing execution of a Stan model:\n\nmulti-threading with Intel Threading Building Blocks (TBB),\nmulti-processing with Message Passing Interface (MPI) and\nmanycore processing with OpenCL.\n\n\n\nIn order to exploit multi-threading in a Stan model, the models must be rewritten to use the reduce_sum and map_rect functions. For instructions on how to rewrite Stan models to use these functions see Stan’s User guide chapter on parallelization, the reduce_sum case study or the Multithreading and Map-Reduce tutorial.\n\n\nOnce a model is rewritten to use the above-mentioned functions, the model must be compiled with the STAN_THREADS makefile flag. The flag can be supplied in the make call but we recommend writing the flag to the make/local file. If the STAN_THREADS flag is defined/non-empty, threads will be enabled.\nAn example of the contents of make/local to enable threading with TBB:\nSTAN_THREADS=true\nThe model is then compiled as normal:\nmake path/to/model\n\n\n\nBefore running a multi-threaded model, we need to specify the maximum number of threads the program can run (total threads for all chains). This is done by setting the num_threads argument. Valid values for num_threads are positive integers and -1. If num_threads is set to -1, all available cores will be used.\nGenerally, this number should not exceed the number of available cores for best performance.\nExample:\n./model sample data file=data.json num_threads=4 ...\nWhen the model is compiled with STAN_THREADS we can sample with multiple chains with a single executable (see section running multiple chains for cases when this is available). When running multiple chains num_threads is the maximum number of threads that can be used by all the chains combined. The exact number of threads that will be used for each chain at a given point in time is determined by the TBB scheduler. The following example start 2 chains with 8 total threads available:\n./model sample num_chains=2 data file=data.json num_threads=8 ...\n\n\n\n\nIn order to use multi-processing with MPI in a Stan model, the models must be rewritten to use the map_rect function. By using MPI, the model can be parallelized across multiple cores or a cluster. MPI with Stan is supported on MacOS and Linux.\n\n\nCompiling and running Stan models with MPI requires that the system has an MPI implementation installed. For Unix systems the most commonly used implementations are MPICH and OpenMPI.\n\n\n\nOnce a model is rewritten to use map_rect, additional makefile flags must be written to the make/local. These are:\n\nSTAN_MPI: Enables the use of MPI with Stan if defined.\nCXX: The name of the MPI C++ compiler wrapper. Typically mpicxx.\nTBB_CXX_TYPE: The C++ compiler the MPI wrapper wraps. Typically gcc on Linux and clang on macOS.\n\nAn example of make/local on Linux:\nSTAN_MPI=true\nCXX=mpicxx\nTBB_CXX_TYPE=gcc\nThe model is then compiled as normal:\nmake path/to/model\n\n\n\nThe Stan model compiled with STAN_MPI is run using an MPI launcher. The MPI standard suggests using mpiexec, but a vendor wrapper for the launcher like mpirun can also be used. The launcher is supplied the path to the built executable and the number of processes to start: -n X for mpiexec or -np X for mpirun where X is replaced by the integer representing the number of processes.\nExample for running a model with six processes:\nmpiexec -n 6 path/to/model sample data file=data.json ...\n\n\n\n\n\n\nOpenCL is supported on most modern CPUs and GPUs. In order to run OpenCL-enabled Stan models, an OpenCL runtime for the target device must be installed. This subsection lists installation instructions for OpenCL runtimes of the commonly-found devices.\nIn order to check if any OpenCL-enabled device and its runtime is already present use the clinfo tool. On Linux, clinfo can typically be installed with the default package manager (for example sudo apt-get install clinfo on Ubuntu). For Windows, pre-built clinfo binary can be found here.\nAlso use clinfo to verify successful installation of OpenCL runtimes.\n\n\n\nLinux:\nInstall the NVIDIA GPU driver and the NVIDIA CUDA Toolkit. On Ubuntu the commands to install both is:\nsudo apt update\nsudo apt install nvidia-driver-460 nvidia-cuda-toolkit\nReplace the driver version (460 in the above case) with the lastest number at the time of installation.\nWindows:\nInstall the NVIDIA GPU Driver and CUDA Toolkit.\n\n\n\n\n\nLinux:\nInstall Radeon Software for Linux available here.\nWindows:\nWe recommend installing the open source OCL-SDK.\n\n\n\n\nInstall the open source PoCL.\n\n\n\nFollow Intel’s install instructions given here (requires registration).\n\n\n\n\nIn order to enable the OpenCL backend the model must be compiled with the STAN_OPENCL makefile flag defined/non-empty. The flag can be supplied in the make call but we recommend writing the flag to the make/local file.\nAn example of the contents of make/local to enable parallelization with OpenCL:\nSTAN_OPENCL=true\nIf you are using OpenCL with an integrated GPU you also need to define the INTEGRATED_OPENCL flag, as the sharing of memory between CPU and GPU is slightly different with integrated graphics:\nINTEGRATED_OPENCL=true\nThe model is then compiled as normal:\nmake path/to/model\n\n\n\nThe Stan model compiled with STAN_OPENCL can also be supplied the OpenCL platform and device IDs of the target device. These IDs determine the device on which to run the OpenCL-supported functions on. You can list the devices on your system using the clinfo program. If the system has one GPU and no OpenCL CPU runtime, the platform and device IDs of the GPU are typically 0. In that case you can also omit the OpenCL IDs as the default 0 IDs are used in that case.\nWe supply these IDs when starting the executable as shown below:\npath/to/model sample data file=data.json opencl platform=0 device=1", + "crumbs": [ + "Getting Started", + "Parallelization" + ] + }, + { + "objectID": "cmdstan-guide/parallelization.html#multi-threading-with-tbb", + "href": "cmdstan-guide/parallelization.html#multi-threading-with-tbb", + "title": "Parallelization", + "section": "", + "text": "In order to exploit multi-threading in a Stan model, the models must be rewritten to use the reduce_sum and map_rect functions. For instructions on how to rewrite Stan models to use these functions see Stan’s User guide chapter on parallelization, the reduce_sum case study or the Multithreading and Map-Reduce tutorial.\n\n\nOnce a model is rewritten to use the above-mentioned functions, the model must be compiled with the STAN_THREADS makefile flag. The flag can be supplied in the make call but we recommend writing the flag to the make/local file. If the STAN_THREADS flag is defined/non-empty, threads will be enabled.\nAn example of the contents of make/local to enable threading with TBB:\nSTAN_THREADS=true\nThe model is then compiled as normal:\nmake path/to/model\n\n\n\nBefore running a multi-threaded model, we need to specify the maximum number of threads the program can run (total threads for all chains). This is done by setting the num_threads argument. Valid values for num_threads are positive integers and -1. If num_threads is set to -1, all available cores will be used.\nGenerally, this number should not exceed the number of available cores for best performance.\nExample:\n./model sample data file=data.json num_threads=4 ...\nWhen the model is compiled with STAN_THREADS we can sample with multiple chains with a single executable (see section running multiple chains for cases when this is available). When running multiple chains num_threads is the maximum number of threads that can be used by all the chains combined. The exact number of threads that will be used for each chain at a given point in time is determined by the TBB scheduler. The following example start 2 chains with 8 total threads available:\n./model sample num_chains=2 data file=data.json num_threads=8 ...", + "crumbs": [ + "Getting Started", + "Parallelization" + ] + }, + { + "objectID": "cmdstan-guide/parallelization.html#multi-processing-with-mpi", + "href": "cmdstan-guide/parallelization.html#multi-processing-with-mpi", + "title": "Parallelization", + "section": "", + "text": "In order to use multi-processing with MPI in a Stan model, the models must be rewritten to use the map_rect function. By using MPI, the model can be parallelized across multiple cores or a cluster. MPI with Stan is supported on MacOS and Linux.\n\n\nCompiling and running Stan models with MPI requires that the system has an MPI implementation installed. For Unix systems the most commonly used implementations are MPICH and OpenMPI.\n\n\n\nOnce a model is rewritten to use map_rect, additional makefile flags must be written to the make/local. These are:\n\nSTAN_MPI: Enables the use of MPI with Stan if defined.\nCXX: The name of the MPI C++ compiler wrapper. Typically mpicxx.\nTBB_CXX_TYPE: The C++ compiler the MPI wrapper wraps. Typically gcc on Linux and clang on macOS.\n\nAn example of make/local on Linux:\nSTAN_MPI=true\nCXX=mpicxx\nTBB_CXX_TYPE=gcc\nThe model is then compiled as normal:\nmake path/to/model\n\n\n\nThe Stan model compiled with STAN_MPI is run using an MPI launcher. The MPI standard suggests using mpiexec, but a vendor wrapper for the launcher like mpirun can also be used. The launcher is supplied the path to the built executable and the number of processes to start: -n X for mpiexec or -np X for mpirun where X is replaced by the integer representing the number of processes.\nExample for running a model with six processes:\nmpiexec -n 6 path/to/model sample data file=data.json ...", + "crumbs": [ + "Getting Started", + "Parallelization" + ] + }, + { + "objectID": "cmdstan-guide/parallelization.html#opencl", + "href": "cmdstan-guide/parallelization.html#opencl", + "title": "Parallelization", + "section": "", + "text": "OpenCL is supported on most modern CPUs and GPUs. In order to run OpenCL-enabled Stan models, an OpenCL runtime for the target device must be installed. This subsection lists installation instructions for OpenCL runtimes of the commonly-found devices.\nIn order to check if any OpenCL-enabled device and its runtime is already present use the clinfo tool. On Linux, clinfo can typically be installed with the default package manager (for example sudo apt-get install clinfo on Ubuntu). For Windows, pre-built clinfo binary can be found here.\nAlso use clinfo to verify successful installation of OpenCL runtimes.\n\n\n\nLinux:\nInstall the NVIDIA GPU driver and the NVIDIA CUDA Toolkit. On Ubuntu the commands to install both is:\nsudo apt update\nsudo apt install nvidia-driver-460 nvidia-cuda-toolkit\nReplace the driver version (460 in the above case) with the lastest number at the time of installation.\nWindows:\nInstall the NVIDIA GPU Driver and CUDA Toolkit.\n\n\n\n\n\nLinux:\nInstall Radeon Software for Linux available here.\nWindows:\nWe recommend installing the open source OCL-SDK.\n\n\n\n\nInstall the open source PoCL.\n\n\n\nFollow Intel’s install instructions given here (requires registration).\n\n\n\n\nIn order to enable the OpenCL backend the model must be compiled with the STAN_OPENCL makefile flag defined/non-empty. The flag can be supplied in the make call but we recommend writing the flag to the make/local file.\nAn example of the contents of make/local to enable parallelization with OpenCL:\nSTAN_OPENCL=true\nIf you are using OpenCL with an integrated GPU you also need to define the INTEGRATED_OPENCL flag, as the sharing of memory between CPU and GPU is slightly different with integrated graphics:\nINTEGRATED_OPENCL=true\nThe model is then compiled as normal:\nmake path/to/model\n\n\n\nThe Stan model compiled with STAN_OPENCL can also be supplied the OpenCL platform and device IDs of the target device. These IDs determine the device on which to run the OpenCL-supported functions on. You can list the devices on your system using the clinfo program. If the system has one GPU and no OpenCL CPU runtime, the platform and device IDs of the GPU are typically 0. In that case you can also omit the OpenCL IDs as the default 0 IDs are used in that case.\nWe supply these IDs when starting the executable as shown below:\npath/to/model sample data file=data.json opencl platform=0 device=1", + "crumbs": [ + "Getting Started", + "Parallelization" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html", + "href": "cmdstan-guide/mcmc_config.html", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "The sample method provides Bayesian inference over the model conditioned on data using Hamiltonian Monte Carlo (HMC) sampling. By default, the inference engine used is the No-U-Turn sampler (NUTS), an adaptive form of Hamiltonian Monte Carlo sampling. For details on HMC and NUTS, see the Stan Reference Manual chapter on MCMC Sampling.\n\n\nTo generate a sample from the posterior distribution of the model conditioned on the data, we run the executable program with the argument sample or method=sample together with the input data. The executable can be run from any directory.\nThe full set of configuration options available for the sample method is available by using the sample help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the sampler console output and in the output CSV file’s comments.\nHere, we run it in the directory which contains the Stan program and input data, <cmdstan-home>/examples/bernoulli:\n> cd examples/bernoulli\n> ls\n bernoulli bernoulli.data.json bernoulli.data.R bernoulli.stan\nTo execute sampling of the model under Linux or Mac, use:\n> ./bernoulli sample data file=bernoulli.data.json\nIn Windows, the ./ prefix is not needed:\n> bernoulli.exe sample data file=bernoulli.data.json\nThe output is the same across all supported platforms. First, the configuration of the program is echoed to the standard output:\nmethod = sample (Default)\n sample\n num_samples = 1000 (Default)\n num_warmup = 1000 (Default)\n save_warmup = false (Default)\n thin = 1 (Default)\n adapt\n engaged = true (Default)\n gamma = 0.050000000000000003 (Default)\n delta = 0.80000000000000004 (Default)\n kappa = 0.75 (Default)\n t0 = 10 (Default)\n init_buffer = 75 (Default)\n term_buffer = 50 (Default)\n window = 25 (Default)\n save_metric = false (Default)\n algorithm = hmc (Default)\n hmc\n engine = nuts (Default)\n nuts\n max_depth = 10 (Default)\n metric = diag_e (Default)\n metric_file = (Default)\n stepsize = 1 (Default)\n stepsize_jitter = 0 (Default)\n num_chains = 1 (Default)\nid = 0 (Default)\ndata\n file = bernoulli.data.json\ninit = 2 (Default)\nrandom\n seed = 3252652196 (Default)\noutput\n file = output.csv (Default)\n diagnostic_file = (Default)\n refresh = 100 (Default)\nAfter the configuration has been displayed, a short timing message is given.\nGradient evaluation took 1.2e-05 seconds\n1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.\nAdjust your expectations accordingly!\nNext, the sampler reports the iteration number, reporting the percentage complete.\nIteration: 1 / 2000 [ 0%] (Warmup)\n...\nIteration: 2000 / 2000 [100%] (Sampling)\nFinally, the sampler reports timing information:\n Elapsed Time: 0.007 seconds (Warm-up)\n 0.017 seconds (Sampling)\n 0.024 seconds (Total)\n\n\n\nEach execution of the model results in draws from a single Markov chain being written to a file in comma-separated value (CSV) format. The default name of the output file is output.csv.\nThe first part of the output file records the version of the underlying Stan library and the configuration as comments (i.e., lines beginning with the pound sign (#)).\nWhen the example model bernoulli.stan is run via the command line with all default arguments, the following configuration is displayed:\n# stan_version_major = 2\n# stan_version_minor = 23\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = sample (Default)\n# sample\n# num_samples = 1000 (Default)\n# num_warmup = 1000 (Default)\n# save_warmup = false (Default)\n# thin = 1 (Default)\n# adapt\n# engaged = 1 (Default)\n# gamma = 0.050000 (Default)\n# delta = 0.800000 (Default)\n# kappa = 0.750000 (Default)\n# t0 = 10.000000 (Default)\n# init_buffer = 75 (Default)\n# term_buffer = 50 (Default)\n# window = 25 (Default)\n# save_metric = false (Default)\n# algorithm = hmc (Default)\n# hmc\n# engine = nuts (Default)\n# nuts\n# max_depth = 10 (Default)\n# metric = diag_e (Default)\n# metric_file = (Default)\n# stepsize = 1.000000 (Default)\n# stepsize_jitter = 0.000000 (Default)\n# num_chains = 1 (Default)\n# output\n# file = output.csv (Default)\n# diagnostic_file = (Default)\n# refresh = 100 (Default)\nThis is followed by a CSV header indicating the names of the values sampled.\nlp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta\nThe first output columns report the HMC sampler information:\n\nlp__ - the total log probability density (up to an additive constant) at each sample\naccept_stat__ - the average Metropolis acceptance probability over each simulated Hamiltonian trajectory\nstepsize__ - integrator step size\ntreedepth__ - depth of tree used by NUTS (NUTS sampler)\nn_leapfrog__ - number of leapfrog calculations (NUTS sampler)\ndivergent__ - has value 1 if trajectory diverged, otherwise 0. (NUTS sampler)\nenergy__ - value of the Hamiltonian\nint_time__ - total integration time (static HMC sampler)\n\nBecause the above header is from the NUTS sampler, it has columns treedepth__, n_leapfrog__, and divergent__ and doesn’t have column int_time__. The remaining columns correspond to model parameters. For the Bernoulli model, it is just the final column, theta.\nThe header line is written to the output file before warmup begins. If option save_warmup is set to true, the warmup draws are output directly after the header. The total number of warmup draws saved is num_warmup divided by thin, rounded up (i.e., ceiling).\nFollowing the warmup draws (if any), are comments which record the results of adaptation: the stepsize, and inverse mass metric used during sampling:\n# Adaptation terminated\n# Step size = 0.884484\n# Diagonal elements of inverse mass matrix:\n# 0.535006\nThe default sampler is NUTS with an adapted step size and a diagonal inverse mass matrix. For this example, the step size is 0.884484, and the inverse mass contains the single entry 0.535006 corresponding to the parameter theta.\nDraws from the posterior distribution are printed out next, each line containing a single draw with the columns corresponding to the header.\n-6.84097,0.974135,0.884484,1,3,0,6.89299,0.198853\n-6.91767,0.985167,0.884484,1,1,0,6.92236,0.182295\n-7.04879,0.976609,0.884484,1,1,0,7.05641,0.162299\n-6.88712,1,0.884484,1,1,0,7.02101,0.188229\n-7.22917,0.899446,0.884484,1,3,0,7.73663,0.383596\n...\nThe output ends with timing details:\n# Elapsed Time: 0.007 seconds (Warm-up)\n# 0.017 seconds (Sampling)\n# 0.024 seconds (Total)\n\n\n\nAt every sampler iteration, the sampler returns a set of estimates for all parameters and quantities of interest in the model. During warmup, the NUTS algorithm adjusts the HMC algorithm parameters metric and stepsize in order to efficiently sample from typical set, the neighborhood substantial posterior probability mass through which the Markov chain will travel in equilibrium. After warmup, the fixed metric and stepsize are used to produce a set of draws.\nThe following keyword-value arguments control the total number of iterations:\n\nnum_samples\nnum_warmup\nsave_warmup\nthin\n\nThe values for arguments num_samples and num_warmup must be a non-negative integer. The default value for both is \\(1000\\).\nFor well-specified models and data, the sampler may converge faster and this many warmup iterations may be overkill. Conversely, complex models which have difficult posterior geometries may require more warmup iterations in order to arrive at good values for the step size and metric.\nThe number of sampling iterations to runs depends on the effective sample size (EFF) reported for each parameter and the desired precision of your estimates. An EFF of at least 100 is required to make a viable estimate. The precision of your estimate is \\(\\sqrt{N}\\); therefore every additional decimal place of accuracy increases this by a factor of 10.\nArgument save_warmup takes values false or true. The default value is false, i.e., warmup draws are not saved to the output file. When the value is true, the warmup draws are written to the CSV output file directly after the CSV header line.\nArgument thin controls the number of draws from the posterior written to the output file. Some users familiar with older approaches to MCMC sampling might be used to thinning to eliminate an expected autocorrelation in the draws. HMC is not nearly as susceptible to this autocorrelation problem and thus thinning is generally not required nor advised, as HMC can produce anticorrelated draws, which increase the effective sample size beyond the number of draws from the posterior. Thinning should only be used in circumstances where storage of the draws is limited and/or RAM for later processing the draws is limited.\nThe value of argument thin must be a positive integer. When thin is set to value \\(N\\), every \\(N^{th}\\) iteration is written to the output file. Should the value of thin exceed the specified number of iterations, the first iteration is saved to the output. This is because the iteration counter starts from zero and whenever the counter modulo the value of thin equals zero, the iteration is saved to the output file. Since zero modulo any positive integer is zero, the first iteration is always saved. When num_sampling=M and thin=N, the number of iterations written to the output CSV file will be ceiling(M/N). If save_warmup=true, thinning is applied to the warmup iterations as well.\n\n\n\nThe adapt keyword is used to specify non-default options for the sampler adaptation schedule and settings.\nAdaptation can be turned off by setting sub-argument engaged to value false. If engaged=false, no adaptation will be done, and all other adaptation sub-arguments will be ignored. Since the default argument is engaged=1, this keyword-value pair can be omitted from the command.\nThere are two sets of adaptation sub-arguments: step size optimization parameters and the warmup schedule. These are described in detail in the Reference Manual section Automatic Parameter Tuning.\nThe boolean sub-argument save_metric was added in Stan version 2.34. When save_metric=true, the adapted stepsize and metric are output as JSON at the end of adaptation. The saved metric file name is the output file basename with the suffix _metric.json, e.g., if using the default output filename output.csv, the saved metric file will be output_metric.json. This metric file can be reused in subsequent sampler runs as the initial metric, via sampler argument metric_file.\n\n\nThe Stan User’s Guide section on model conditioning and curvature provides a discussion of adaptation and stepsize issues. The Stan Reference Manual section on HMC algorithm parameters explains the NUTS-HMC adaptation schedule and the tuning parameters for setting the step size.\nThe following keyword-value arguments control the settings used to optimize the step size:\n\ndelta - The target Metropolis acceptance rate. The default value is \\(0.8\\). Its value must be strictly between \\(0\\) and \\(1\\). Increasing the default value forces the algorithm to use smaller step sizes. This can improve sampling efficiency (effective sample size per iteration) at the cost of increased iteration times. Raising the value of delta will also allow some models that would otherwise get stuck to overcome their blockages. Models with difficult posterior geometries may required increasing the delta argument closer to \\(1\\); we recommend first trying to raise it to \\(0.9\\) or at most \\(0.95\\). Values about \\(0.95\\) are strong indication of bad geometry; the better solution is to change the model geometry through reparameterization which could yield both more efficient and faster sampling.\ngamma - Adaptation regularization scale. Must be a positive real number, default value is \\(0.05\\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.\nkappa - Adaptation relaxation exponent. Must be a positive real number, default value is \\(0.75\\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.\nt_0 - Adaptation iteration offset. Must be a positive real number, default value is \\(10\\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.\n\n\n\n\nWhen adaptation is engaged, the warmup schedule is specified by sub-arguments, all of which take positive integers as values:\n\ninit_buffer - The number of iterations spent tuning the step size at the outset of adaptation.\nwindow - The initial number of iterations devoted to tune the metric, will be doubled successively.\nterm_buffer - The number of iterations used to re-tune the step size once the metric has been tuned.\n\nThe specified values may be modified slightly in order to ensure alignment between the warmup schedule and total number of warmup iterations.\nThe following figure is taken from the Stan Reference Manual, where label “I” correspond to init_buffer, the initial “II” corresponds to window, and the final “III” corresponds to term_buffer:\nWarmup Epochs Figure. Adaptation during warmup occurs in three stages: an initial fast adaptation interval (I), a series of expanding slow adaptation intervals (II), and a final fast adaptation interval (III). For HMC, both the fast and slow intervals are used for adapting the step size, while the slow intervals are used for learning the (co)variance necessitated by the metric. Iteration numbering starts at 1 on the left side of the figure and increases to the right.\n\n\n\n\n\nThe algorithm keyword-value pair specifies the algorithm used to generate the sample. There are two possible values: hmc, which generates from an HMC-driven Markov chain; and fixed_param which generates a new sample without changing the state of the Markov chain. The default argument is algorithm=hmc.\n\n\nIf a model doesn’t specify any parameters, then argument algorithm=fixed_param is mandatory.\nThe fixed parameter sampler generates a new sample without changing the current state of the Markov chain. This can be used to write models which generate pseudo-data via calls to RNG functions in the transformed data and generated quantities blocks.\n\n\n\nAll HMC algorithms have three parameters:\n\nstep size\nmetric\nintegration time - the number of steps taken along the Hamiltonian trajectory\n\nSee the Stan Reference Manual section on HMC algorithm parameters for further details.\n\n\nThe HMC algorithm simulates the evolution of a Hamiltonian system. The step size parameter controls the resolution of the sampler. Low step sizes can get HMC samplers unstuck that would otherwise get stuck with higher step sizes.\nThe following keyword-value arguments control the step size:\n\nstepsize - How far to move each time the Hamiltonian system evolves forward. Must be a positive real number, default value is \\(1\\).\nstepsize_jitter - Allows step size to be “jittered” randomly during sampling to avoid any poor interactions with a fixed step size and regions of high curvature. Must be a real value between \\(0\\) and \\(1\\). The default value is \\(0\\). Setting stepsize_jitter to \\(1\\) causes step sizes to be selected in the range of \\(0\\) to twice the adapted step size. Jittering below the adapted value will increase the number of steps required and will slow down sampling, while jittering above the adapted value can cause premature rejection due to simulation error in the Hamiltonian dynamics calculation. We strongly recommend always using the default value.\n\n\n\n\nAll HMC implementations in Stan utilize quadratic kinetic energy functions which are specified up to the choice of a symmetric, positive-definite matrix known as a mass matrix or, more formally, a metric Betancourt (2017).\nThe metric argument specifies the choice of Euclidean HMC implementations:\n\nmetric=unit specifies unit metric (diagonal matrix of ones).\nmetric=diag_e specifies a diagonal metric (diagonal matrix with positive diagonal entries). This is the default value.\nmetric=dense_e specifies a dense metric (a dense, symmetric positive definite matrix).\n\nBy default, the metric is estimated during warmup. However, when metric=diag_e or metric=dense_e, an initial guess for the metric can be specified with the metric_file argument whose value is the filepath to a JSON or Rdump file which contains a single variable inv_metric. For a diag_e metric the inv_metric value must be a vector of positive values, one for each parameter in the system. For a dense_e metric, inv_metric value must be a positive-definite square matrix with number of rows and columns equal to the number of parameters in the model.\nThe metric_file option can be used with and without adaptation enabled. If adaptation is enabled, the provided metric will be used as the initial guess in the adaptation process. If the initial guess is good, then adaptation should not change it much. If the metric is no good, then the adaptation will override the initial guess.\nIf adaptation is disabled, both the metric_file and stepsize arguments should be specified.\n\n\n\nThe total integration time is determined by the argument engine which take possible values:\n\nnuts - the No-U-Turn Sampler which dynamically determines the optimal integration time.\nstatic - an HMC sampler which uses a user-specified integration time.\n\nThe default argument is engine=nuts.\nThe NUTS sampler generates a proposal by starting at an initial position determined by the parameters drawn in the last iteration. It then evolves the initial system both forwards and backwards in time to form a balanced binary tree. The algorithm is iterative; at each iteration the tree depth is increased by one, doubling the number of leapfrog steps thus effectively doubling the computation time. The algorithm terminates in one of two ways: either the NUTS criterion (i.e., a U-turn in Euclidean space on a subtree) is satisfied for a new subtree or the completed tree; or the depth of the completed tree hits the maximum depth allowed.\nWhen engine=nuts, the subargument max_depth can be used to control the depth of the tree. The default argument is max_depth=10. In the case where a model has a difficult posterior from which to sample, max_depth should be increased to ensure that that the NUTS tree can grow as large as necessary.\nWhen the argument engine=static is specified, the user must specify the integration time via keyword int_time which takes as a value a positive number. The default value is \\(2\\pi\\).\n\n\n\n\n\nThe output keyword sub-argument diagnostic_file=<filepath> specifies the location of the auxiliary output file which contains sampler information for each draw, and the gradients on the unconstrained scale and log probabilities for all parameters in the model. By default, no auxiliary output file is produced.\n\n\n\nA Markov chain generates draws from the target distribution only after it has converged to equilibrium. In theory, convergence is only guaranteed asymptotically as the number of draws grows without bound. In practice, diagnostics must be applied to monitor convergence for the finite number of draws actually available. One way to monitor whether a chain has approximately converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. For robust diagnostics, we recommend running 4 chains.\nThe preferred way of using multiple chains is to run them all from the same executable using the num_chains argument. There is also the option to use the Unix or DOS shell to run multiple executables.\n\n\nThe num_chains argument can be used for all of Stan’s samplers with the exception of the static HMC engine. This will run multiple chains of MCMC from the same executable, which can save on memory usage due to only needing one copy of the model and data. Depending on whether the model was compiled with STAN_THREADS=true, these will either run in parallel or one after the other.\nWhen num_chains is greather than 1 (the default), arguments related to filenames (e.g. output file=, init=) can accept a comma separated list of values, one per each chain.\nFor example, sample will specify the names of the three chain’s output files.\n./bernoulli sample num_chains=3 data file=bernoulli.data.json output file=output_1.csv,output_2.csv,output_3.csv\nThis will write the output in output_1.csv, output_2.csv, output_3.csv.\nIf the model was not compiled with STAN_THREADS=true, the above command will run 3 chains sequentially.\nIf the model was compiled with STAN_THREADS=true, the chains can run in parallel, with the num_threads argument defining the maximum number of threads used to run the chains. If the model uses no within-chain parallelization (map_rect or reduce_sum calls), the below command will run 3 chains in parallel, provided there are cores available:\n./bernoulli sample num_chains=4 data file=bernoulli.data.json num_threads=4\nIf the model uses within-chain parallelization (map_rect or reduce_sum calls), the threads are automatically scheduled to run the parallel parts of a single chain or run the sequential parts of another chains. The below call starts 4 chains that can use 16 threads. At a given moment a single chain may use all 16 threads, 1 thread, anything in between, or can wait for a thread to be available. The scheduling is left to the Threading Building Blocks scheduler.\n./bernoulli_par sample num_chains=4 data file=bernoulli.data.json num_threads=16\n\n\nIf a comma separated list is not used, the num_chains argument changes the normal meanings of filename arguments when it is greater than 1 (the default). They are now interpreted as a “template” which is used for each chain.\nFor example, when num_chains=2, the argument output file=foo.csv no longer produces a file foo.csv, but instead produces two files, foo_1.csv and foo_2.csv. If you also supply id=5, the files produced will be foo_5.csv and foo_6.csv – id=5 gives the id of the first chain, and the remaining chains are sequential from there.\nThis also applies to input files, like those used for initialization. For example, if num_chains=3 and init=bar.json will first look for bar_1.json. If it exists, it will use bar_1.json for the first chain, bar_2.json for the second, and so on. If bar_1.json does not exist, it falls back to looking for bar.json, and if it exists, uses the same initial values for each chain. The numbers in these filenames are also based on the id argument, which defaults to 1.\nFor example, this shorthand is equivalent to the example given above:\n./bernoulli sample num_chains=3 data file=bernoulli.data.json output file=output.csv\nA suffix with the chain id is appended to the provided output filename (output.csv in the above command), so this will also produce files output_1.csv, output_2.csv, output_3.csv.\n\n\n\n\n\nThe stansummary utility processes one or more output files from a run or set of runs of Stan’s HMC sampler given a model and data. For all columns in the Stan CSV output file stansummary reports a set of statistics including mean, standard deviation, percentiles, effective sample size, and \\(\\hat{R}\\) values.\nTo run stansummary on the output files generated by the for loop above, by the above run of the bernoulli model on Mac or Linux:\n<cmdstan-home>/bin/stansummary output_*.csv\nOn Windows, use backslashes to call the stansummary.exe.\n<cmdstan-home>\\bin\\stansummary.exe output_*.csv\nThe stansummary output consists of one row of statistics per column in the Stan CSV output file. Therefore, the first rows in the stansummary report statistics over the sampler state. The final row of output summarizes the estimates of the model variable theta:\nInference for Stan model: bernoulli_model\n4 chains: each with iter=1000; warmup=1000; thin=1; 1000 iterations saved.\n\nWarmup took (0.0060, 0.0040, 0.0050, 0.0050) seconds, 0.020 seconds total\nSampling took (0.0080, 0.010, 0.010, 0.010) seconds, 0.038 seconds total\n\n Mean MCSE StdDev MAD 5% 50% 95% ESS_bulk ESS_tail ESS_bulk/s R_hat\n\nlp__ -7.3 1.9e-02 0.72 0.34 -8.7 -7.0 -6.8 1731 1610 45546 1.0\naccept_stat__ 0.93 2.7e-03 0.12 0.041 0.68 0.97 1.0 5078 3437 1.3e+05 1.0\nstepsize__ 0.90 nan 0.10 0.046 0.82 0.86 1.1 nan nan nan nan\ntreedepth__ 1.4 9.3e-03 0.51 0.00 1.0 1.0 2.0 3167 3441 8.3e+04 1.0\nn_leapfrog__ 2.7 1.7e-01 1.6 0.00 1.0 3.0 7.0 494 2000 1.3e+04 1.0\ndivergent__ 0.00 nan 0.00 0.00 0.00 0.00 0.00 nan nan nan nan\nenergy__ 7.8 2.6e-02 1.0 0.70 6.8 7.4 9.8 1598 2069 4.2e+04 1.0\n\ntheta 0.26 2.9e-03 0.12 0.12 0.084 0.24 0.47 1658 1490 43629 1.0\n\nSamples were drawn using hmc with nuts.\nFor each parameter, ESS_bulk and ESS_tail measure the effective sample size for the entire sample (bulk)\nand for the .05 and .95 tails (tail), and R_hat measures the potential scale reduction on split chains.\nAt convergence R_hat will be very close to 1.00.\nIn this example, we conditioned the model on data consisting of the outcomes of 10 bernoulli trials, where only 2 trials reported success. The 5%, 50%, and 95% percentile values for theta reflect the uncertainty in our estimate, due to the small amount of data, given the prior of beta(1, 1)\n\n\n\nNote: Many of these examples can be simplified by using the num_chains argument.\nWhen the num_chains argument is not available or is undesirable for whatever reason, built-in tools in the system shell can be used.\nTo run multiple chains given a model and data, either sequentially or in parallel, we can also use the Unix or DOS shell for loop to set up index variables needed to identify each chain and its outputs.\nOn MacOS or Linux, the for-loop syntax for both the bash and zsh interpreters is:\nfor NAME [in LIST]; do COMMANDS; done\nThe list can be a simple sequence of numbers, or you can use the shell expansion syntax {1..N} which expands to the sequence from \\(1\\) to \\(N\\), e.g. {1..4} expands to 1 2 3 4. Note that the expression {1..N} cannot contain spaces.\nTo run 4 chains for the example bernoulli model on MacOS or Linux:\n> for i in {1..4}\n do\n ./bernoulli sample data file=bernoulli.data.json \\\n output file=output_${i}.csv\n done\nThe backslash (\\) indicates a line continuation in Unix. The expression ${i} substitutes in the value of loop index variable i. To run chains in parallel, put an ampersand (&) at the end of the nested sampler command:\n> for i in {1..4}\n do\n ./bernoulli sample data file=bernoulli.data.json \\\n output file=output_${i}.csv &\n done\nThis pushes each process into the background which allows the loop to continue without waiting for the current chain to finish.\nOn Windows, the DOS for-loop syntax is one of:\nfor %i in (SET) do COMMAND COMMAND-ARGUMENTS\nfor /l %i in (START, STEP, END) do COMMAND COMMAND-ARGUMENTS\nTo run 4 chains in parallel on Windows:\n>for /l %i in (1, 1, 4) do start /b bernoulli.exe sample ^\n data file=bernoulli.data.json my_data ^\n output file=output_%i.csv\nThe caret (^) indicates a line continuation in DOS. The expression %i is the loop index.\nIn the following extended examples, we focus on just the nested sampler command for Unix.\n\n\nFor reproducibility, we specify the same RNG seed across all chains and use the chain id argument to specify the RNG offset.\nThe RNG seed is specified by random seed=<int> and the offset is specified by id=<loop index>, so the call to the sampler is:\n./my_model sample data file=my_model.data.json \\\n output file=output_${i}.csv \\\n random seed=12345 id=${i}\n\n\n\nThe warmup and sampling iteration keyword-value arguments must follow the sample keyword. The call to the sampler which overrides the default warmup and sampling iterations is:\n./my_model sample num_warmup=500 num_sampling=500 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nTo save warmup draws as part of the Stan CSV output file, use the keyword-value argument save_warmup=true. This must be grouped with the other sample keyword sub-arguments.\n./my_model sample num_warmup=500 num_sampling=500 save_warmup=true \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nBy default, all parameters are initialized on an unconstrained scale to random draws from a uniform distribution over the range \\([{-2}, 2]\\). To initialize some or all parameters to good starting points on the constrained scale from a data file in JSON or Rdump format, use the keyword-value argument init=<filepath>:\n./my_model sample init=my_param_inits.json data file=my_model.data.json \\\n output file=output_${i}.csv\nTo verify that the specified values will be used by the sampler, you can run the sampler with option algorithm=fixed_param, so that the initial values are used to generate the sample. Since this generates a set of identical draws, setting num_warmp=0 and num_samples=1 saves unnecessary iterations. As the output values are also on the constrained scale, the set of reported values will match the set of specified initial values.\nFor example, if we run the example Bernoulli model with specified initial value for parameter “theta”:\n{ \"theta\" : 0.5 }\nvia command:\n./bernoulli sample algorithm=fixed_param num_warmup=0 num_samples=1 \\\n init=bernoulli.init.json data file=bernoulli.data.json\nThe resulting output CSV file contains a single draw:\nlp__,accept_stat__,theta\n0,0,0.5\n#\n# Elapsed Time: 0 seconds (Warm-up)\n# 0 seconds (Sampling)\n# 0 seconds (Total)\n#\n\n\n\nAn initial estimate for the metric can be specified with the metric_file argument whose value is the filepath to a JSON or Rdump file which contains a variable inv_metric. The metric_file option can be used with and without adaptation enabled.\nBy default, the metric is estimated during warmup adaptation. If the initial guess is good, then adaptation should not change it much. If the metric is no good, then the adaptation will override the initial guess. For example, the JSON file bernoulli.diag_e.json, contents\n{ \"inv_metric\" : [0.296291] }\ncan be used as the initial metric as follows:\n../my_model sample algorithm=hmc metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nIf adaptation is disabled, both the metric_file and stepsize arguments should be specified.\n../my_model sample adapt engaged=false \\\n algorithm=hmc stepsize=0.9 \\\n metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nThe resulting output CSV file will contain the following set of comment lines:\n# Adaptation terminated\n# Step size = 0.9\n# Diagonal elements of inverse mass matrix:\n# 0.296291\nAs of Stan versione 2.34, the adapted metric can be saved in JSON format, via sub-argument save_metric, described above. This allows for no or minimal adaptation starting from this file. It is still necessary to specify the stepsize argument as well as the metric_file arguments; the former is the value of the stepsize element in the saved metric file, and the later is the metric file path.\n\n\n\nThe keyword-value arguments for these settings are grouped together under the adapt keyword which itself is a sub-argument of the sample keyword.\nModels with difficult posterior geometries may required increasing the delta argument closer to \\(1\\).\n./my_model sample adapt delta=0.95 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nTo skip adaptation altogether, use the keyword-value argument engaged=false. Disabling adaptation disables both metric and stepsize adaptation, so a stepsize should be provided along with a metric to enable efficient sampling.\n../my_model sample adapt engaged=false \\\n algorithm=hmc stepsize=0.9 \\\n metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nEven with adaptation disabled, it is still advisable to run warmup iterations in order to allow the initial parameter values to be adjusted to estimates which fall within the typical set.\nTo skip warmup altogether requires specifying both num_warmup=0 and adapt engaged=false.\n../my_model sample num_warmup=0 adapt engaged=false \\\n algorithm=hmc stepsize=0.9 \\\n metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nModels with difficult posterior geometries may required increasing the max_depth argument from its default value \\(10\\). This requires specifying a series of keyword-argument pairs:\n./my_model sample adapt delta=0.95 \\\n algorithm=hmc engine=nuts max_depth=15 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nThe output keyword sub-argument diagnostic_file=<filepath> write the sampler parameters and gradients of all model parameters for each draw to a CSV file:\n./my_model sample data file=my_model.data.json \\\n output file=output_${i}.csv \\\n diagnostic_file=diagnostics_${i}.csv\n\n\n\nThe output keyword sub-argument refresh=<int> specifies the number of iterations between progress messages written to the terminal window. The default value is \\(100\\) iterations. The progress updates look like:\nIteration: 1 / 2000 [ 0%] (Warmup)\nIteration: 100 / 2000 [ 5%] (Warmup)\nIteration: 200 / 2000 [ 10%] (Warmup)\nIteration: 300 / 2000 [ 15%] (Warmup)\nFor simple models which fit quickly, such updates can be annoying; to suppress them altogether, set refresh=0. This only turns off the Iteration: messages; the configuration and timing information are still written to the terminal.\n./my_model sample data file=my_model.data.json \\\n output file=output_${i}.csv \\\n refresh=0\nFor complicated models which take a long time to fit, setting the refresh rate to a low number, e.g. \\(10\\) or even \\(1\\), provides a way to more closely monitor the sampler.\n\n\n\nThe CmdStan argument parser requires keeping sampler config sub-arguments together; interleaving sampler config with the inputs, outputs, inits, RNG seed and chain id config results in an error message such as the following:\n./bernoulli sample data file=bernoulli.data.json adapt delta=0.95\nadapt is either mistyped or misplaced.\nPerhaps you meant one of the following valid configurations?\n method=sample sample adapt\n method=variational variational adapt\nFailed to parse arguments, terminating Stan\nThe following example provides a template for a call to the sampler which specifies input data, initial parameters, initial step-size and metric, adaptation, output, and RNG initialization.\n./my_model sample num_warmup=2000 \\\n init=my_param_inits.json \\\n adapt delta=0.95 init_buffer=100 \\\n window=50 term_buffer=100 \\\n algorithm=hmc engine=nuts max_depth=15 \\\n metric=dense_e metric_file=my_metric.json \\\n stepsize=0.6555 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv refresh=10 \\\n random seed=12345 id=${i}\nThe keywords sample, data, output, and random are the top-level argument groups. Within the sample config arguments, the keyword adapt groups the adaptation algorithm parameters and the keyword-value algorithm=hmc groups the NUTS-HMC parameters.\nThe top-level groups can be freely ordered with respect to one another. The following is also a valid command:\n./my_model random seed=12345 id=${i} \\\n data file=my_model.data.json \\\n output file=output_${i}.csv refresh=10 \\\n sample num_warmup=2000 \\\n init=my_param_inits.json \\\n algorithm=hmc engine=nuts max_depth=15 \\\n metric=dense_e metric_file=my_metric.json \\\n stepsize=0.6555 \\\n adapt delta=0.95 init_buffer=100 \\\n window=50 term_buffer=100", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#running-the-sampler", + "href": "cmdstan-guide/mcmc_config.html#running-the-sampler", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "To generate a sample from the posterior distribution of the model conditioned on the data, we run the executable program with the argument sample or method=sample together with the input data. The executable can be run from any directory.\nThe full set of configuration options available for the sample method is available by using the sample help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the sampler console output and in the output CSV file’s comments.\nHere, we run it in the directory which contains the Stan program and input data, <cmdstan-home>/examples/bernoulli:\n> cd examples/bernoulli\n> ls\n bernoulli bernoulli.data.json bernoulli.data.R bernoulli.stan\nTo execute sampling of the model under Linux or Mac, use:\n> ./bernoulli sample data file=bernoulli.data.json\nIn Windows, the ./ prefix is not needed:\n> bernoulli.exe sample data file=bernoulli.data.json\nThe output is the same across all supported platforms. First, the configuration of the program is echoed to the standard output:\nmethod = sample (Default)\n sample\n num_samples = 1000 (Default)\n num_warmup = 1000 (Default)\n save_warmup = false (Default)\n thin = 1 (Default)\n adapt\n engaged = true (Default)\n gamma = 0.050000000000000003 (Default)\n delta = 0.80000000000000004 (Default)\n kappa = 0.75 (Default)\n t0 = 10 (Default)\n init_buffer = 75 (Default)\n term_buffer = 50 (Default)\n window = 25 (Default)\n save_metric = false (Default)\n algorithm = hmc (Default)\n hmc\n engine = nuts (Default)\n nuts\n max_depth = 10 (Default)\n metric = diag_e (Default)\n metric_file = (Default)\n stepsize = 1 (Default)\n stepsize_jitter = 0 (Default)\n num_chains = 1 (Default)\nid = 0 (Default)\ndata\n file = bernoulli.data.json\ninit = 2 (Default)\nrandom\n seed = 3252652196 (Default)\noutput\n file = output.csv (Default)\n diagnostic_file = (Default)\n refresh = 100 (Default)\nAfter the configuration has been displayed, a short timing message is given.\nGradient evaluation took 1.2e-05 seconds\n1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.\nAdjust your expectations accordingly!\nNext, the sampler reports the iteration number, reporting the percentage complete.\nIteration: 1 / 2000 [ 0%] (Warmup)\n...\nIteration: 2000 / 2000 [100%] (Sampling)\nFinally, the sampler reports timing information:\n Elapsed Time: 0.007 seconds (Warm-up)\n 0.017 seconds (Sampling)\n 0.024 seconds (Total)", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#mcmc_output_csv", + "href": "cmdstan-guide/mcmc_config.html#mcmc_output_csv", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "Each execution of the model results in draws from a single Markov chain being written to a file in comma-separated value (CSV) format. The default name of the output file is output.csv.\nThe first part of the output file records the version of the underlying Stan library and the configuration as comments (i.e., lines beginning with the pound sign (#)).\nWhen the example model bernoulli.stan is run via the command line with all default arguments, the following configuration is displayed:\n# stan_version_major = 2\n# stan_version_minor = 23\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = sample (Default)\n# sample\n# num_samples = 1000 (Default)\n# num_warmup = 1000 (Default)\n# save_warmup = false (Default)\n# thin = 1 (Default)\n# adapt\n# engaged = 1 (Default)\n# gamma = 0.050000 (Default)\n# delta = 0.800000 (Default)\n# kappa = 0.750000 (Default)\n# t0 = 10.000000 (Default)\n# init_buffer = 75 (Default)\n# term_buffer = 50 (Default)\n# window = 25 (Default)\n# save_metric = false (Default)\n# algorithm = hmc (Default)\n# hmc\n# engine = nuts (Default)\n# nuts\n# max_depth = 10 (Default)\n# metric = diag_e (Default)\n# metric_file = (Default)\n# stepsize = 1.000000 (Default)\n# stepsize_jitter = 0.000000 (Default)\n# num_chains = 1 (Default)\n# output\n# file = output.csv (Default)\n# diagnostic_file = (Default)\n# refresh = 100 (Default)\nThis is followed by a CSV header indicating the names of the values sampled.\nlp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta\nThe first output columns report the HMC sampler information:\n\nlp__ - the total log probability density (up to an additive constant) at each sample\naccept_stat__ - the average Metropolis acceptance probability over each simulated Hamiltonian trajectory\nstepsize__ - integrator step size\ntreedepth__ - depth of tree used by NUTS (NUTS sampler)\nn_leapfrog__ - number of leapfrog calculations (NUTS sampler)\ndivergent__ - has value 1 if trajectory diverged, otherwise 0. (NUTS sampler)\nenergy__ - value of the Hamiltonian\nint_time__ - total integration time (static HMC sampler)\n\nBecause the above header is from the NUTS sampler, it has columns treedepth__, n_leapfrog__, and divergent__ and doesn’t have column int_time__. The remaining columns correspond to model parameters. For the Bernoulli model, it is just the final column, theta.\nThe header line is written to the output file before warmup begins. If option save_warmup is set to true, the warmup draws are output directly after the header. The total number of warmup draws saved is num_warmup divided by thin, rounded up (i.e., ceiling).\nFollowing the warmup draws (if any), are comments which record the results of adaptation: the stepsize, and inverse mass metric used during sampling:\n# Adaptation terminated\n# Step size = 0.884484\n# Diagonal elements of inverse mass matrix:\n# 0.535006\nThe default sampler is NUTS with an adapted step size and a diagonal inverse mass matrix. For this example, the step size is 0.884484, and the inverse mass contains the single entry 0.535006 corresponding to the parameter theta.\nDraws from the posterior distribution are printed out next, each line containing a single draw with the columns corresponding to the header.\n-6.84097,0.974135,0.884484,1,3,0,6.89299,0.198853\n-6.91767,0.985167,0.884484,1,1,0,6.92236,0.182295\n-7.04879,0.976609,0.884484,1,1,0,7.05641,0.162299\n-6.88712,1,0.884484,1,1,0,7.02101,0.188229\n-7.22917,0.899446,0.884484,1,3,0,7.73663,0.383596\n...\nThe output ends with timing details:\n# Elapsed Time: 0.007 seconds (Warm-up)\n# 0.017 seconds (Sampling)\n# 0.024 seconds (Total)", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#iterations", + "href": "cmdstan-guide/mcmc_config.html#iterations", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "At every sampler iteration, the sampler returns a set of estimates for all parameters and quantities of interest in the model. During warmup, the NUTS algorithm adjusts the HMC algorithm parameters metric and stepsize in order to efficiently sample from typical set, the neighborhood substantial posterior probability mass through which the Markov chain will travel in equilibrium. After warmup, the fixed metric and stepsize are used to produce a set of draws.\nThe following keyword-value arguments control the total number of iterations:\n\nnum_samples\nnum_warmup\nsave_warmup\nthin\n\nThe values for arguments num_samples and num_warmup must be a non-negative integer. The default value for both is \\(1000\\).\nFor well-specified models and data, the sampler may converge faster and this many warmup iterations may be overkill. Conversely, complex models which have difficult posterior geometries may require more warmup iterations in order to arrive at good values for the step size and metric.\nThe number of sampling iterations to runs depends on the effective sample size (EFF) reported for each parameter and the desired precision of your estimates. An EFF of at least 100 is required to make a viable estimate. The precision of your estimate is \\(\\sqrt{N}\\); therefore every additional decimal place of accuracy increases this by a factor of 10.\nArgument save_warmup takes values false or true. The default value is false, i.e., warmup draws are not saved to the output file. When the value is true, the warmup draws are written to the CSV output file directly after the CSV header line.\nArgument thin controls the number of draws from the posterior written to the output file. Some users familiar with older approaches to MCMC sampling might be used to thinning to eliminate an expected autocorrelation in the draws. HMC is not nearly as susceptible to this autocorrelation problem and thus thinning is generally not required nor advised, as HMC can produce anticorrelated draws, which increase the effective sample size beyond the number of draws from the posterior. Thinning should only be used in circumstances where storage of the draws is limited and/or RAM for later processing the draws is limited.\nThe value of argument thin must be a positive integer. When thin is set to value \\(N\\), every \\(N^{th}\\) iteration is written to the output file. Should the value of thin exceed the specified number of iterations, the first iteration is saved to the output. This is because the iteration counter starts from zero and whenever the counter modulo the value of thin equals zero, the iteration is saved to the output file. Since zero modulo any positive integer is zero, the first iteration is always saved. When num_sampling=M and thin=N, the number of iterations written to the output CSV file will be ceiling(M/N). If save_warmup=true, thinning is applied to the warmup iterations as well.", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#adaptation", + "href": "cmdstan-guide/mcmc_config.html#adaptation", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "The adapt keyword is used to specify non-default options for the sampler adaptation schedule and settings.\nAdaptation can be turned off by setting sub-argument engaged to value false. If engaged=false, no adaptation will be done, and all other adaptation sub-arguments will be ignored. Since the default argument is engaged=1, this keyword-value pair can be omitted from the command.\nThere are two sets of adaptation sub-arguments: step size optimization parameters and the warmup schedule. These are described in detail in the Reference Manual section Automatic Parameter Tuning.\nThe boolean sub-argument save_metric was added in Stan version 2.34. When save_metric=true, the adapted stepsize and metric are output as JSON at the end of adaptation. The saved metric file name is the output file basename with the suffix _metric.json, e.g., if using the default output filename output.csv, the saved metric file will be output_metric.json. This metric file can be reused in subsequent sampler runs as the initial metric, via sampler argument metric_file.\n\n\nThe Stan User’s Guide section on model conditioning and curvature provides a discussion of adaptation and stepsize issues. The Stan Reference Manual section on HMC algorithm parameters explains the NUTS-HMC adaptation schedule and the tuning parameters for setting the step size.\nThe following keyword-value arguments control the settings used to optimize the step size:\n\ndelta - The target Metropolis acceptance rate. The default value is \\(0.8\\). Its value must be strictly between \\(0\\) and \\(1\\). Increasing the default value forces the algorithm to use smaller step sizes. This can improve sampling efficiency (effective sample size per iteration) at the cost of increased iteration times. Raising the value of delta will also allow some models that would otherwise get stuck to overcome their blockages. Models with difficult posterior geometries may required increasing the delta argument closer to \\(1\\); we recommend first trying to raise it to \\(0.9\\) or at most \\(0.95\\). Values about \\(0.95\\) are strong indication of bad geometry; the better solution is to change the model geometry through reparameterization which could yield both more efficient and faster sampling.\ngamma - Adaptation regularization scale. Must be a positive real number, default value is \\(0.05\\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.\nkappa - Adaptation relaxation exponent. Must be a positive real number, default value is \\(0.75\\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.\nt_0 - Adaptation iteration offset. Must be a positive real number, default value is \\(10\\). This is a parameter of the Nesterov dual-averaging algorithm. We recommend always using the default value.\n\n\n\n\nWhen adaptation is engaged, the warmup schedule is specified by sub-arguments, all of which take positive integers as values:\n\ninit_buffer - The number of iterations spent tuning the step size at the outset of adaptation.\nwindow - The initial number of iterations devoted to tune the metric, will be doubled successively.\nterm_buffer - The number of iterations used to re-tune the step size once the metric has been tuned.\n\nThe specified values may be modified slightly in order to ensure alignment between the warmup schedule and total number of warmup iterations.\nThe following figure is taken from the Stan Reference Manual, where label “I” correspond to init_buffer, the initial “II” corresponds to window, and the final “III” corresponds to term_buffer:\nWarmup Epochs Figure. Adaptation during warmup occurs in three stages: an initial fast adaptation interval (I), a series of expanding slow adaptation intervals (II), and a final fast adaptation interval (III). For HMC, both the fast and slow intervals are used for adapting the step size, while the slow intervals are used for learning the (co)variance necessitated by the metric. Iteration numbering starts at 1 on the left side of the figure and increases to the right.", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#algorithm", + "href": "cmdstan-guide/mcmc_config.html#algorithm", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "The algorithm keyword-value pair specifies the algorithm used to generate the sample. There are two possible values: hmc, which generates from an HMC-driven Markov chain; and fixed_param which generates a new sample without changing the state of the Markov chain. The default argument is algorithm=hmc.\n\n\nIf a model doesn’t specify any parameters, then argument algorithm=fixed_param is mandatory.\nThe fixed parameter sampler generates a new sample without changing the current state of the Markov chain. This can be used to write models which generate pseudo-data via calls to RNG functions in the transformed data and generated quantities blocks.\n\n\n\nAll HMC algorithms have three parameters:\n\nstep size\nmetric\nintegration time - the number of steps taken along the Hamiltonian trajectory\n\nSee the Stan Reference Manual section on HMC algorithm parameters for further details.\n\n\nThe HMC algorithm simulates the evolution of a Hamiltonian system. The step size parameter controls the resolution of the sampler. Low step sizes can get HMC samplers unstuck that would otherwise get stuck with higher step sizes.\nThe following keyword-value arguments control the step size:\n\nstepsize - How far to move each time the Hamiltonian system evolves forward. Must be a positive real number, default value is \\(1\\).\nstepsize_jitter - Allows step size to be “jittered” randomly during sampling to avoid any poor interactions with a fixed step size and regions of high curvature. Must be a real value between \\(0\\) and \\(1\\). The default value is \\(0\\). Setting stepsize_jitter to \\(1\\) causes step sizes to be selected in the range of \\(0\\) to twice the adapted step size. Jittering below the adapted value will increase the number of steps required and will slow down sampling, while jittering above the adapted value can cause premature rejection due to simulation error in the Hamiltonian dynamics calculation. We strongly recommend always using the default value.\n\n\n\n\nAll HMC implementations in Stan utilize quadratic kinetic energy functions which are specified up to the choice of a symmetric, positive-definite matrix known as a mass matrix or, more formally, a metric Betancourt (2017).\nThe metric argument specifies the choice of Euclidean HMC implementations:\n\nmetric=unit specifies unit metric (diagonal matrix of ones).\nmetric=diag_e specifies a diagonal metric (diagonal matrix with positive diagonal entries). This is the default value.\nmetric=dense_e specifies a dense metric (a dense, symmetric positive definite matrix).\n\nBy default, the metric is estimated during warmup. However, when metric=diag_e or metric=dense_e, an initial guess for the metric can be specified with the metric_file argument whose value is the filepath to a JSON or Rdump file which contains a single variable inv_metric. For a diag_e metric the inv_metric value must be a vector of positive values, one for each parameter in the system. For a dense_e metric, inv_metric value must be a positive-definite square matrix with number of rows and columns equal to the number of parameters in the model.\nThe metric_file option can be used with and without adaptation enabled. If adaptation is enabled, the provided metric will be used as the initial guess in the adaptation process. If the initial guess is good, then adaptation should not change it much. If the metric is no good, then the adaptation will override the initial guess.\nIf adaptation is disabled, both the metric_file and stepsize arguments should be specified.\n\n\n\nThe total integration time is determined by the argument engine which take possible values:\n\nnuts - the No-U-Turn Sampler which dynamically determines the optimal integration time.\nstatic - an HMC sampler which uses a user-specified integration time.\n\nThe default argument is engine=nuts.\nThe NUTS sampler generates a proposal by starting at an initial position determined by the parameters drawn in the last iteration. It then evolves the initial system both forwards and backwards in time to form a balanced binary tree. The algorithm is iterative; at each iteration the tree depth is increased by one, doubling the number of leapfrog steps thus effectively doubling the computation time. The algorithm terminates in one of two ways: either the NUTS criterion (i.e., a U-turn in Euclidean space on a subtree) is satisfied for a new subtree or the completed tree; or the depth of the completed tree hits the maximum depth allowed.\nWhen engine=nuts, the subargument max_depth can be used to control the depth of the tree. The default argument is max_depth=10. In the case where a model has a difficult posterior from which to sample, max_depth should be increased to ensure that that the NUTS tree can grow as large as necessary.\nWhen the argument engine=static is specified, the user must specify the integration time via keyword int_time which takes as a value a positive number. The default value is \\(2\\pi\\).", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#sampler-diag-file", + "href": "cmdstan-guide/mcmc_config.html#sampler-diag-file", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "The output keyword sub-argument diagnostic_file=<filepath> specifies the location of the auxiliary output file which contains sampler information for each draw, and the gradients on the unconstrained scale and log probabilities for all parameters in the model. By default, no auxiliary output file is produced.", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#multi-chain-sampling", + "href": "cmdstan-guide/mcmc_config.html#multi-chain-sampling", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "A Markov chain generates draws from the target distribution only after it has converged to equilibrium. In theory, convergence is only guaranteed asymptotically as the number of draws grows without bound. In practice, diagnostics must be applied to monitor convergence for the finite number of draws actually available. One way to monitor whether a chain has approximately converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. For robust diagnostics, we recommend running 4 chains.\nThe preferred way of using multiple chains is to run them all from the same executable using the num_chains argument. There is also the option to use the Unix or DOS shell to run multiple executables.\n\n\nThe num_chains argument can be used for all of Stan’s samplers with the exception of the static HMC engine. This will run multiple chains of MCMC from the same executable, which can save on memory usage due to only needing one copy of the model and data. Depending on whether the model was compiled with STAN_THREADS=true, these will either run in parallel or one after the other.\nWhen num_chains is greather than 1 (the default), arguments related to filenames (e.g. output file=, init=) can accept a comma separated list of values, one per each chain.\nFor example, sample will specify the names of the three chain’s output files.\n./bernoulli sample num_chains=3 data file=bernoulli.data.json output file=output_1.csv,output_2.csv,output_3.csv\nThis will write the output in output_1.csv, output_2.csv, output_3.csv.\nIf the model was not compiled with STAN_THREADS=true, the above command will run 3 chains sequentially.\nIf the model was compiled with STAN_THREADS=true, the chains can run in parallel, with the num_threads argument defining the maximum number of threads used to run the chains. If the model uses no within-chain parallelization (map_rect or reduce_sum calls), the below command will run 3 chains in parallel, provided there are cores available:\n./bernoulli sample num_chains=4 data file=bernoulli.data.json num_threads=4\nIf the model uses within-chain parallelization (map_rect or reduce_sum calls), the threads are automatically scheduled to run the parallel parts of a single chain or run the sequential parts of another chains. The below call starts 4 chains that can use 16 threads. At a given moment a single chain may use all 16 threads, 1 thread, anything in between, or can wait for a thread to be available. The scheduling is left to the Threading Building Blocks scheduler.\n./bernoulli_par sample num_chains=4 data file=bernoulli.data.json num_threads=16\n\n\nIf a comma separated list is not used, the num_chains argument changes the normal meanings of filename arguments when it is greater than 1 (the default). They are now interpreted as a “template” which is used for each chain.\nFor example, when num_chains=2, the argument output file=foo.csv no longer produces a file foo.csv, but instead produces two files, foo_1.csv and foo_2.csv. If you also supply id=5, the files produced will be foo_5.csv and foo_6.csv – id=5 gives the id of the first chain, and the remaining chains are sequential from there.\nThis also applies to input files, like those used for initialization. For example, if num_chains=3 and init=bar.json will first look for bar_1.json. If it exists, it will use bar_1.json for the first chain, bar_2.json for the second, and so on. If bar_1.json does not exist, it falls back to looking for bar.json, and if it exists, uses the same initial values for each chain. The numbers in these filenames are also based on the id argument, which defaults to 1.\nFor example, this shorthand is equivalent to the example given above:\n./bernoulli sample num_chains=3 data file=bernoulli.data.json output file=output.csv\nA suffix with the chain id is appended to the provided output filename (output.csv in the above command), so this will also produce files output_1.csv, output_2.csv, output_3.csv.", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#summarizing-sampler-outputs-with-stansummary", + "href": "cmdstan-guide/mcmc_config.html#summarizing-sampler-outputs-with-stansummary", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "The stansummary utility processes one or more output files from a run or set of runs of Stan’s HMC sampler given a model and data. For all columns in the Stan CSV output file stansummary reports a set of statistics including mean, standard deviation, percentiles, effective sample size, and \\(\\hat{R}\\) values.\nTo run stansummary on the output files generated by the for loop above, by the above run of the bernoulli model on Mac or Linux:\n<cmdstan-home>/bin/stansummary output_*.csv\nOn Windows, use backslashes to call the stansummary.exe.\n<cmdstan-home>\\bin\\stansummary.exe output_*.csv\nThe stansummary output consists of one row of statistics per column in the Stan CSV output file. Therefore, the first rows in the stansummary report statistics over the sampler state. The final row of output summarizes the estimates of the model variable theta:\nInference for Stan model: bernoulli_model\n4 chains: each with iter=1000; warmup=1000; thin=1; 1000 iterations saved.\n\nWarmup took (0.0060, 0.0040, 0.0050, 0.0050) seconds, 0.020 seconds total\nSampling took (0.0080, 0.010, 0.010, 0.010) seconds, 0.038 seconds total\n\n Mean MCSE StdDev MAD 5% 50% 95% ESS_bulk ESS_tail ESS_bulk/s R_hat\n\nlp__ -7.3 1.9e-02 0.72 0.34 -8.7 -7.0 -6.8 1731 1610 45546 1.0\naccept_stat__ 0.93 2.7e-03 0.12 0.041 0.68 0.97 1.0 5078 3437 1.3e+05 1.0\nstepsize__ 0.90 nan 0.10 0.046 0.82 0.86 1.1 nan nan nan nan\ntreedepth__ 1.4 9.3e-03 0.51 0.00 1.0 1.0 2.0 3167 3441 8.3e+04 1.0\nn_leapfrog__ 2.7 1.7e-01 1.6 0.00 1.0 3.0 7.0 494 2000 1.3e+04 1.0\ndivergent__ 0.00 nan 0.00 0.00 0.00 0.00 0.00 nan nan nan nan\nenergy__ 7.8 2.6e-02 1.0 0.70 6.8 7.4 9.8 1598 2069 4.2e+04 1.0\n\ntheta 0.26 2.9e-03 0.12 0.12 0.084 0.24 0.47 1658 1490 43629 1.0\n\nSamples were drawn using hmc with nuts.\nFor each parameter, ESS_bulk and ESS_tail measure the effective sample size for the entire sample (bulk)\nand for the .05 and .95 tails (tail), and R_hat measures the potential scale reduction on split chains.\nAt convergence R_hat will be very close to 1.00.\nIn this example, we conditioned the model on data consisting of the outcomes of 10 bernoulli trials, where only 2 trials reported success. The 5%, 50%, and 95% percentile values for theta reflect the uncertainty in our estimate, due to the small amount of data, given the prior of beta(1, 1)", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/mcmc_config.html#old-multichain", + "href": "cmdstan-guide/mcmc_config.html#old-multichain", + "title": "MCMC Sampling using Hamiltonian Monte Carlo", + "section": "", + "text": "Note: Many of these examples can be simplified by using the num_chains argument.\nWhen the num_chains argument is not available or is undesirable for whatever reason, built-in tools in the system shell can be used.\nTo run multiple chains given a model and data, either sequentially or in parallel, we can also use the Unix or DOS shell for loop to set up index variables needed to identify each chain and its outputs.\nOn MacOS or Linux, the for-loop syntax for both the bash and zsh interpreters is:\nfor NAME [in LIST]; do COMMANDS; done\nThe list can be a simple sequence of numbers, or you can use the shell expansion syntax {1..N} which expands to the sequence from \\(1\\) to \\(N\\), e.g. {1..4} expands to 1 2 3 4. Note that the expression {1..N} cannot contain spaces.\nTo run 4 chains for the example bernoulli model on MacOS or Linux:\n> for i in {1..4}\n do\n ./bernoulli sample data file=bernoulli.data.json \\\n output file=output_${i}.csv\n done\nThe backslash (\\) indicates a line continuation in Unix. The expression ${i} substitutes in the value of loop index variable i. To run chains in parallel, put an ampersand (&) at the end of the nested sampler command:\n> for i in {1..4}\n do\n ./bernoulli sample data file=bernoulli.data.json \\\n output file=output_${i}.csv &\n done\nThis pushes each process into the background which allows the loop to continue without waiting for the current chain to finish.\nOn Windows, the DOS for-loop syntax is one of:\nfor %i in (SET) do COMMAND COMMAND-ARGUMENTS\nfor /l %i in (START, STEP, END) do COMMAND COMMAND-ARGUMENTS\nTo run 4 chains in parallel on Windows:\n>for /l %i in (1, 1, 4) do start /b bernoulli.exe sample ^\n data file=bernoulli.data.json my_data ^\n output file=output_%i.csv\nThe caret (^) indicates a line continuation in DOS. The expression %i is the loop index.\nIn the following extended examples, we focus on just the nested sampler command for Unix.\n\n\nFor reproducibility, we specify the same RNG seed across all chains and use the chain id argument to specify the RNG offset.\nThe RNG seed is specified by random seed=<int> and the offset is specified by id=<loop index>, so the call to the sampler is:\n./my_model sample data file=my_model.data.json \\\n output file=output_${i}.csv \\\n random seed=12345 id=${i}\n\n\n\nThe warmup and sampling iteration keyword-value arguments must follow the sample keyword. The call to the sampler which overrides the default warmup and sampling iterations is:\n./my_model sample num_warmup=500 num_sampling=500 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nTo save warmup draws as part of the Stan CSV output file, use the keyword-value argument save_warmup=true. This must be grouped with the other sample keyword sub-arguments.\n./my_model sample num_warmup=500 num_sampling=500 save_warmup=true \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nBy default, all parameters are initialized on an unconstrained scale to random draws from a uniform distribution over the range \\([{-2}, 2]\\). To initialize some or all parameters to good starting points on the constrained scale from a data file in JSON or Rdump format, use the keyword-value argument init=<filepath>:\n./my_model sample init=my_param_inits.json data file=my_model.data.json \\\n output file=output_${i}.csv\nTo verify that the specified values will be used by the sampler, you can run the sampler with option algorithm=fixed_param, so that the initial values are used to generate the sample. Since this generates a set of identical draws, setting num_warmp=0 and num_samples=1 saves unnecessary iterations. As the output values are also on the constrained scale, the set of reported values will match the set of specified initial values.\nFor example, if we run the example Bernoulli model with specified initial value for parameter “theta”:\n{ \"theta\" : 0.5 }\nvia command:\n./bernoulli sample algorithm=fixed_param num_warmup=0 num_samples=1 \\\n init=bernoulli.init.json data file=bernoulli.data.json\nThe resulting output CSV file contains a single draw:\nlp__,accept_stat__,theta\n0,0,0.5\n#\n# Elapsed Time: 0 seconds (Warm-up)\n# 0 seconds (Sampling)\n# 0 seconds (Total)\n#\n\n\n\nAn initial estimate for the metric can be specified with the metric_file argument whose value is the filepath to a JSON or Rdump file which contains a variable inv_metric. The metric_file option can be used with and without adaptation enabled.\nBy default, the metric is estimated during warmup adaptation. If the initial guess is good, then adaptation should not change it much. If the metric is no good, then the adaptation will override the initial guess. For example, the JSON file bernoulli.diag_e.json, contents\n{ \"inv_metric\" : [0.296291] }\ncan be used as the initial metric as follows:\n../my_model sample algorithm=hmc metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nIf adaptation is disabled, both the metric_file and stepsize arguments should be specified.\n../my_model sample adapt engaged=false \\\n algorithm=hmc stepsize=0.9 \\\n metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nThe resulting output CSV file will contain the following set of comment lines:\n# Adaptation terminated\n# Step size = 0.9\n# Diagonal elements of inverse mass matrix:\n# 0.296291\nAs of Stan versione 2.34, the adapted metric can be saved in JSON format, via sub-argument save_metric, described above. This allows for no or minimal adaptation starting from this file. It is still necessary to specify the stepsize argument as well as the metric_file arguments; the former is the value of the stepsize element in the saved metric file, and the later is the metric file path.\n\n\n\nThe keyword-value arguments for these settings are grouped together under the adapt keyword which itself is a sub-argument of the sample keyword.\nModels with difficult posterior geometries may required increasing the delta argument closer to \\(1\\).\n./my_model sample adapt delta=0.95 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nTo skip adaptation altogether, use the keyword-value argument engaged=false. Disabling adaptation disables both metric and stepsize adaptation, so a stepsize should be provided along with a metric to enable efficient sampling.\n../my_model sample adapt engaged=false \\\n algorithm=hmc stepsize=0.9 \\\n metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\nEven with adaptation disabled, it is still advisable to run warmup iterations in order to allow the initial parameter values to be adjusted to estimates which fall within the typical set.\nTo skip warmup altogether requires specifying both num_warmup=0 and adapt engaged=false.\n../my_model sample num_warmup=0 adapt engaged=false \\\n algorithm=hmc stepsize=0.9 \\\n metric_file=bernoulli.diag_e.json \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nModels with difficult posterior geometries may required increasing the max_depth argument from its default value \\(10\\). This requires specifying a series of keyword-argument pairs:\n./my_model sample adapt delta=0.95 \\\n algorithm=hmc engine=nuts max_depth=15 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv\n\n\n\nThe output keyword sub-argument diagnostic_file=<filepath> write the sampler parameters and gradients of all model parameters for each draw to a CSV file:\n./my_model sample data file=my_model.data.json \\\n output file=output_${i}.csv \\\n diagnostic_file=diagnostics_${i}.csv\n\n\n\nThe output keyword sub-argument refresh=<int> specifies the number of iterations between progress messages written to the terminal window. The default value is \\(100\\) iterations. The progress updates look like:\nIteration: 1 / 2000 [ 0%] (Warmup)\nIteration: 100 / 2000 [ 5%] (Warmup)\nIteration: 200 / 2000 [ 10%] (Warmup)\nIteration: 300 / 2000 [ 15%] (Warmup)\nFor simple models which fit quickly, such updates can be annoying; to suppress them altogether, set refresh=0. This only turns off the Iteration: messages; the configuration and timing information are still written to the terminal.\n./my_model sample data file=my_model.data.json \\\n output file=output_${i}.csv \\\n refresh=0\nFor complicated models which take a long time to fit, setting the refresh rate to a low number, e.g. \\(10\\) or even \\(1\\), provides a way to more closely monitor the sampler.\n\n\n\nThe CmdStan argument parser requires keeping sampler config sub-arguments together; interleaving sampler config with the inputs, outputs, inits, RNG seed and chain id config results in an error message such as the following:\n./bernoulli sample data file=bernoulli.data.json adapt delta=0.95\nadapt is either mistyped or misplaced.\nPerhaps you meant one of the following valid configurations?\n method=sample sample adapt\n method=variational variational adapt\nFailed to parse arguments, terminating Stan\nThe following example provides a template for a call to the sampler which specifies input data, initial parameters, initial step-size and metric, adaptation, output, and RNG initialization.\n./my_model sample num_warmup=2000 \\\n init=my_param_inits.json \\\n adapt delta=0.95 init_buffer=100 \\\n window=50 term_buffer=100 \\\n algorithm=hmc engine=nuts max_depth=15 \\\n metric=dense_e metric_file=my_metric.json \\\n stepsize=0.6555 \\\n data file=my_model.data.json \\\n output file=output_${i}.csv refresh=10 \\\n random seed=12345 id=${i}\nThe keywords sample, data, output, and random are the top-level argument groups. Within the sample config arguments, the keyword adapt groups the adaptation algorithm parameters and the keyword-value algorithm=hmc groups the NUTS-HMC parameters.\nThe top-level groups can be freely ordered with respect to one another. The following is also a valid command:\n./my_model random seed=12345 id=${i} \\\n data file=my_model.data.json \\\n output file=output_${i}.csv refresh=10 \\\n sample num_warmup=2000 \\\n init=my_param_inits.json \\\n algorithm=hmc engine=nuts max_depth=15 \\\n metric=dense_e metric_file=my_metric.json \\\n stepsize=0.6555 \\\n adapt delta=0.95 init_buffer=100 \\\n window=50 term_buffer=100", + "crumbs": [ + "Running CmdStan", + "MCMC Sampling using Hamiltonian Monte Carlo" + ] + }, + { + "objectID": "cmdstan-guide/laplace_sample_config.html", + "href": "cmdstan-guide/laplace_sample_config.html", + "title": "Laplace sampling", + "section": "", + "text": "The laplace method produces a sample from a normal approximation centered at the mode of a distribution in the unconstrained space. If the mode is a maximum a posteriori (MAP) estimate, the sample provides an estimate of the mean and standard deviation of the posterior distribution. If the mode is a maximum likelihood estimate (MLE), the sample provides an estimate of the standard error of the likelihood. In general, the posterior mode in the unconstrained space doesn’t correspond to the mean (nor mode) in the constrained space, and thus the sample is needed to infer the mean as well as the standard deviation. (See this case study for a visual illustration.)\nThis is computationally inexpensive compared to exact Bayesian inference with MCMC. The goodness of this estimate depends on both the estimate of the mode and how much the true posterior in the unconstrained space resembles a Gaussian.\n\n\nThis method takes several arguments:\n\nmode - Input file of parameters values on the constrained scale. When Stan’s optimize method is used to estimate the modal values, the value of boolean argument jacobian should be false if optimize was run with default settings, i.e., the input is the MLE estimate; if optimize was run with argument jacobian=true, then the laplace method default setting, jacobian=true, should be used.\njacobian - Whether or not the Jacobian adjustment should be included in the gradient. The default value is true (include adjustment). (Note: in optimization, the default value is false, for historical reasons.)\ndraws - How many total draws to return. The default is \\(1000\\).\ncalculate_lp - Whether to calculate the log probability of the model at each draw. If this is false, the log_p__ column of the output will be entirely nan. The default value is true.\n\n\n\n\nThe output file consists of the following pieces of information:\n\nThe full set of configuration options available for the laplace method is reported at the beginning of the output file as CSV comments.\nOutput columns log_p__ and log_q__, the unnormalized log density and the unnormalized density of the Laplace approximation, respectively. These can be used for diagnostics and importance sampling.\nOutput columns for all model parameters on the constrained scale.\n\n\n\n\nIf requested with output diagnostic_file=, a JSON file will be created which contains the log density, the gradient, and the Hessian of the log density evaluated at the mode.\n\n\n\nTo get an approximate estimate of the mode and standard deviation of the example Bernoulli model given the example dataset:\n\nfind the MAP estimate by running optimization with argument jacobian=true\nrun the Laplace estimator using the MAP estimate as the mode argument.\n\nBecause the default output file name from all methods is output.csv, a more informative name is used for the output of optimization. We run the commands from the CmdStan home directory. This results in a sample with mean 2.7 and standard deviation 0.12. In comparison, running the NUTS-HMC sampler results in mean 2.6 and standard deviation 0.12.\n./examples/bernoulli/bernoulli optimize jacobian=1 \\\n data file=examples/bernoulli/bernoulli.data.json \\\n output file=bernoulli_optimize_lbfgs.csv random seed=1234\n\n\n./examples/bernoulli/bernoulli laplace mode=bernoulli_optimize_lbfgs.csv \\\n data file=examples/bernoulli/bernoulli.data.json random seed=1234\nThe header and first few data rows of the output sample are shown below.\n# method = laplace\n# laplace\n# mode = bernoulli_lbfgs.csv\n# jacobian = true (Default)\n# draws = 1000 (Default)\n# calculate_lp = true (default)\n# id = 1 (Default)\n# data\n# file = examples/bernoulli/bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 875960551 (Default)\n# output\n# file = output.csv (Default)\n# diagnostic_file = (Default)\n# refresh = 100 (Default)\n# sig_figs = 8 (Default)\n# profile_file = profile.csv (Default)\n# num_threads = 1 (Default)\nlog_p__,log_q__,theta\n-9.4562,-2.33997,0.0498545\n-6.9144,-0.0117349,0.182898\n-7.18171,-0.746034,0.376428\n...", + "crumbs": [ + "Running CmdStan", + "Laplace sampling" + ] + }, + { + "objectID": "cmdstan-guide/laplace_sample_config.html#configuration", + "href": "cmdstan-guide/laplace_sample_config.html#configuration", + "title": "Laplace sampling", + "section": "", + "text": "This method takes several arguments:\n\nmode - Input file of parameters values on the constrained scale. When Stan’s optimize method is used to estimate the modal values, the value of boolean argument jacobian should be false if optimize was run with default settings, i.e., the input is the MLE estimate; if optimize was run with argument jacobian=true, then the laplace method default setting, jacobian=true, should be used.\njacobian - Whether or not the Jacobian adjustment should be included in the gradient. The default value is true (include adjustment). (Note: in optimization, the default value is false, for historical reasons.)\ndraws - How many total draws to return. The default is \\(1000\\).\ncalculate_lp - Whether to calculate the log probability of the model at each draw. If this is false, the log_p__ column of the output will be entirely nan. The default value is true.", + "crumbs": [ + "Running CmdStan", + "Laplace sampling" + ] + }, + { + "objectID": "cmdstan-guide/laplace_sample_config.html#csv-output", + "href": "cmdstan-guide/laplace_sample_config.html#csv-output", + "title": "Laplace sampling", + "section": "", + "text": "The output file consists of the following pieces of information:\n\nThe full set of configuration options available for the laplace method is reported at the beginning of the output file as CSV comments.\nOutput columns log_p__ and log_q__, the unnormalized log density and the unnormalized density of the Laplace approximation, respectively. These can be used for diagnostics and importance sampling.\nOutput columns for all model parameters on the constrained scale.", + "crumbs": [ + "Running CmdStan", + "Laplace sampling" + ] + }, + { + "objectID": "cmdstan-guide/laplace_sample_config.html#diagnostic-file-outputs", + "href": "cmdstan-guide/laplace_sample_config.html#diagnostic-file-outputs", + "title": "Laplace sampling", + "section": "", + "text": "If requested with output diagnostic_file=, a JSON file will be created which contains the log density, the gradient, and the Hessian of the log density evaluated at the mode.", + "crumbs": [ + "Running CmdStan", + "Laplace sampling" + ] + }, + { + "objectID": "cmdstan-guide/laplace_sample_config.html#example", + "href": "cmdstan-guide/laplace_sample_config.html#example", + "title": "Laplace sampling", + "section": "", + "text": "To get an approximate estimate of the mode and standard deviation of the example Bernoulli model given the example dataset:\n\nfind the MAP estimate by running optimization with argument jacobian=true\nrun the Laplace estimator using the MAP estimate as the mode argument.\n\nBecause the default output file name from all methods is output.csv, a more informative name is used for the output of optimization. We run the commands from the CmdStan home directory. This results in a sample with mean 2.7 and standard deviation 0.12. In comparison, running the NUTS-HMC sampler results in mean 2.6 and standard deviation 0.12.\n./examples/bernoulli/bernoulli optimize jacobian=1 \\\n data file=examples/bernoulli/bernoulli.data.json \\\n output file=bernoulli_optimize_lbfgs.csv random seed=1234\n\n\n./examples/bernoulli/bernoulli laplace mode=bernoulli_optimize_lbfgs.csv \\\n data file=examples/bernoulli/bernoulli.data.json random seed=1234\nThe header and first few data rows of the output sample are shown below.\n# method = laplace\n# laplace\n# mode = bernoulli_lbfgs.csv\n# jacobian = true (Default)\n# draws = 1000 (Default)\n# calculate_lp = true (default)\n# id = 1 (Default)\n# data\n# file = examples/bernoulli/bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 875960551 (Default)\n# output\n# file = output.csv (Default)\n# diagnostic_file = (Default)\n# refresh = 100 (Default)\n# sig_figs = 8 (Default)\n# profile_file = profile.csv (Default)\n# num_threads = 1 (Default)\nlog_p__,log_q__,theta\n-9.4562,-2.33997,0.0498545\n-6.9144,-0.0117349,0.182898\n-7.18171,-0.746034,0.376428\n...", + "crumbs": [ + "Running CmdStan", + "Laplace sampling" + ] + }, + { + "objectID": "cmdstan-guide/installation.html", + "href": "cmdstan-guide/installation.html", + "title": "CmdStan Installation", + "section": "", + "text": "There are a few ways that you can install CmdStan. Depending on your operating system and your level of expertise, you can either:\n\nUse the conda package management system to install a pre-built version of CmdStan along with the required dependencies. Recommended for Windows users.\nInstall the source code from GitHub CmdStan repository. This requires a modern C++ compiler and toolchain. See the C++ Toolchain section for further details.\n\n\n\nWith conda, you can install CmdStan from the conda-forge channel. This will install a pre-built version of CmdStan along with the required dependencies (i.e. a C++ compiler, a version of Make, and required libraries). The conda installation is designed so one can use the R or Python bindings to CmdStan seamlessly. Additionally, it provides the command cmdstan_model to activate the CmdStan makefile from anywhere.\nNote: This requires that conda has been installed already on your machine. We recommend using the miniforge distribution.\nWe recommend installing CmdStan in a new conda environment:\n conda create -n stan -c conda-forge cmdstan\nThis command creates a new conda environment named stan and downloads and installs the cmdstan package as well as CmdStan and the required C++ toolchain.\nTo install into an existing conda environment, use the conda install command instead of create:\n conda install -c conda-forge cmdstan\nWhichever installation method you use, afterwards you must activate the new environment or deactivate/activate the existing one. For example, if you installed cmdstan into a new environment stan, run the command\n conda activate stan\nBy default, the latest release of CmdStan is installed. If you require a specific release of CmdStan, CmdStan versions 2.26.1 and newer can be installed by specifying cmdstan==VERSION in the install command. For example to install an earlier version of CmdStan into your current conda environment, run the following command, then re-activate the environment\nconda install -c conda-forge cmdstan=2.27.0\n\n\nA Conda environment is a directory that contains a specific collection of Conda packages. To see the locations of your conda environments, use the command\n conda info -e\nThe shell environment variable CONDA_PREFIX points to the active conda environment (if any). Both CmdStan and the C++ toolchain are installed into the bin subdirectory of the conda environment directory, i.e., $CONDA_PREFIX/bin/cmdstan (Linux, MacOS), %CONDA_PREFIX%\\bin\\cmdstan (Windows).\nPlease report conda-specific install problems directly to the conda-forge issue tracker, here.\n\n\n\n\nInstallation from GitHub consists of the following steps:\n\nVerify that you have a modern C++ toolchain. See the C++ Toolchain section for details.\nDownload the CmdStan source code from GitHub\nBuild the CmdStan libraries and executables\nCheck the installation by compiling and running the CmdStan example model bernoulli.stan.\n\n\n\nThe GitHub source code is divided into sub-modules, each in its own repository. The CmdStan repo contains just the cmdstan module; the Stan inference engine algorithms and Stan math library functions are specified as submodules and stored in the GitHub repositories stan and math, respectively.\nA CmdStan release is compressed tarfile which contains CmdStan and the Stan and math library submodules. The most recent CmdStan release is always available as https://github.com/stan-dev/cmdstan/releases/latest. A CmdStan release is versioned by major, minor, patch numbers, e.g., “2.29.2”. Please ensure you download a tarfile which is named “cmdstan-<version-number” rather than using the “Source Code” links at the bottom of the release. These are automatically generated by GitHub and do not contain the required submodules. The release tarfile unpacks into a directory named “cmdstan-”, e.g. “cmdstan-2.29.2”.\nBy cloning the CmdStan repository with argument --recursive, Git automatically initializes and updates each submodule in the repository, including nested submodules if any of the submodules in the repository have submodules themselves. The following command will download the source code from the current development branch of CmdStan into a directory named cmdstan:\n> git clone https://github.com/stan-dev/cmdstan.git --recursive\nThroughout this manual, we refer to this top-level CmdStan source directory as <cmdstan-home>. This directory contains the following subdirectories:\n\ndirectory cmdstan/stan contains the sub-module stan (https://github.com/stan-dev/stan)\ndirectory cmdstan/stan/lib/stan_math contains the sub-module math (https://github.com/stan-dev/math)\n\n\n\n\nBuilding CmdStan involves preparing a set of executable programs and compiling the command line interface and supporting libraries. The CmdStan tools are:\n\nstanc: the Stan compiler (translates Stan language to C++).\nstansummary: a basic posterior analysis tool. The stansummary utility processes one or more output files from a run or set of runs of Stan’s HMC sampler. For all parameters and quantities of interest in the Stan program, stansummary reports a set of statistics including mean, standard deviation, percentiles, effective sample size, and \\(\\hat{R}\\) values.\ndiagnose: a basic sampler diagnostic tool which checks for indications that the HMC sampler was unable to sample from the full posterior.\n\nCmdStan releases include pre-built binaries of the Stan language compiler (https://github.com/stan-dev/stanc3): bin/linux-stanc, bin/mac-stanc and bin/windows-stanc. The CmdStan makefile build task copies the appropriate binary to bin/stanc. For CmdStan installations which have been cloned of downloaded from the CmdStan GitHub repository, the makefile task will download the appropriate OS-specific binary from the stanc3 repository’s nightly release.\nSteps to build CmdStan:\n\nOpen a command-line terminal window and change directories to the CmdStan home directory.\nRun the makefile target build which instantiates the CmdStan utilities and compiles all necessary C++ libraries.\n\n> cd <cmdstan-home>\n> make build\nIf your computer has multiple cores and sufficient ram, the build process can be parallelized by providing the -j option. For example, to build on 4 cores, type:\n> make -j4 build\nWhen make build is successful, the directory <cmdstan-home>/bin/ will contain the executables stanc, stansummary, and diagnose (on Windows, corresponding .exe files) and the final lines of console output will show the version of CmdStan that has just been built, e.g.:\n--- CmdStan v2.29.2 built ---\nWarning: The Make program may take 10+ minutes and consume 2+ GB of memory to build CmdStan.\nWindows only: CmdStan requires that the Intel TBB library, which is built by the above command, can be found by the Windows system. This requires that the directory <cmdstan-home>/stan/lib/stan_math/lib/tbb is part of the PATH environment variable. See these instructions for details on changing the PATH. To permanently make this setting for the current user, you may execute:\n> make install-tbb\nAfter changing the PATH environment variable, you must open an new shell in order for the new environment variable settings to take effect. (This is not necessary on Mac and Linux systems because they can use the absolute path to the Intel TBB library when linking into Stan programs.)\n\n\n\n\nTo check that the CmdStan installation is complete and in working order, run the following series of commands from the folder which CmdStan was installed.\nOn Linux and macOS:\n# compile the example\n> make examples/bernoulli/bernoulli\n\n# fit to provided data (results of 10 trials, 2 out of 10 successes)\n> ./examples/bernoulli/bernoulli sample\\\n data file=examples/bernoulli/bernoulli.data.json\n\n# default output written to file `output.csv`,\n# default num_samples is 1000, output file should have approx. 1050 lines\n> wc -l output.csv\n\n# run the `bin/stansummary utility to summarize parameter estimates\n> bin/stansummary output.csv\nOn Windows:\n# compile the example\n> make examples/bernoulli/bernoulli.exe\n\n# fit to provided data (results of 10 trials, 2 out of 10 successes)\n> ./examples/bernoulli/bernoulli.exe sample data file=examples/bernoulli/bernoulli.data.json\n\n# run the `bin/stansummary.exe utility to summarize parameter estimates\n> bin/stansummary.exe output.csv\nThe sample data in file bernoulli.json.data specifies 2 out of 10 successes, therefore the range mean(theta)\\(\\pm\\)sd(theta) should include 0.2.\n\n\n\nUpdates to CmdStan, changes in compiler options, or updates to the C++ toolchain may result in errors when trying to compile a Stan program. Often, these problems can be resolved by removing the existing CmdStan binaries and recompiling. To do this, you must run the makefile commands from the <cmdstan-home> directory:\n> cd <cmdstan-home>\n> make clean-all\n> make build\n\n\nThis section contains solutions to problems reported on https://discourse.mc-stan.org\nCompiler error message about PCH file\nTo speed up compilation, the Stan makefile pre-compiles parts of the core Stan library. If these pre-compiled files are out of sync with the compiled model, the compiler will complain, e.g.:\nerror: PCH file uses an older PCH format that is no longer supported\nIn this case, clean and rebuild CmdStan, as shown in the previous section.\nWindows: ‘g++’, ‘make’, or ‘cut’ is not recognized\nThe CmdStan makefile uses a few shell utilities which might not be present in Windows, resulting in the error message:\n'cut' is not recognized as an internal or external command,\noperable program or batch file.\nTo fix this, ensure you have followed the steps for adding the toolchain to your PATH and installing the additional utilities covered in the configuration instructions\nSpaces in paths to CmdStan or model\nmake can fail when dealing with files in folders with a space somewhere in their file path. Particularly on Windows, this can be an issue when CmdStan, or the models you are trying to build, are placed in the One Drive folder.\nUnfortunately, the errors created by this situation are not alwas informative. Some errors you may see are:\nmake: *** INTERNAL: readdir: Invalid argument\nmake: *** [make/program:50: x.hpp] Error 2\nIf the (fully-expanded) folder path to CmdStan or the model you are trying to build contains a space, we recommend trying a different location if you encounter any issues during building.\n\n\n\n\nCompiling a Stan program requires a modern C++ compiler and the GNU Make build utility (a.k.a. “gmake”). These vary by operating system.\n\n\nThe required C++ compiler is g++ 4.9 3. On most systems the GNU Make utility is pre-installed and is the default make utility. There is usually a pre-installed C++ compiler as well, however, it may not be new enough. To check, run commands:\ng++ --version\nmake --version\nIf these are at least at g++ version 4.9.3 or later and make version 3.81 or later, no additional installations are necessary. It may still be desirable to update the C++ compiler g++, because later versions are faster.\nTo install the latest version of these tools (or upgrade an older version), use the following commands or their equivalent for your distribution, install via the commands:\nsudo apt install g++\nsudo apt install make\nIf you can’t run sudo, you will need to ask your sysadmin or cluster administrator to install these tools for you.\n\n\n\nTo check if you already already have an appropriate toolchain installed, open the Terminal application and enter:\nclang++ --version\nmake --version\nIf either of these commands prints the message command not found, you will need to install Xcode’s command line tools.\nOpen the Terminal application and enter:\nxcode-select --install\nSelect “Install” in the window that opens.\nAfter the installation completes, you can double check that installation was successful by reopening the Terminal and running:\nclang++ --version\nmake --version\nYou can read more about Xcode on its site: https://developer.apple.com/xcode/\nWe don’t recommend trying to use the GNU C++ compiler, available via Homebrew, based on the number of reports of installation difficulties from Mac users on GitHub as well as the Stan forums.\n\n\n\nThe Windows toolchain consists of programs g++, the C++ compiler, and make, the GNU Make utility. To check if these are present, open a command shell [^1] and type:\ng++ --version\nmake --version\nCmdStan is known compatible with the RTools45 toolchain. The toolchain will require updating your PATH variable, See these instructions for details on changing the PATH if you are unfamiliar. The following instructions will assume that the default installation directory was used, so be sure to update the paths accordingly if you have chosen a different directory.\n\n\nAll required utilities (e.g., make, g++) for compiling and running CmdStan models on Windows are provided by the RTools45 toolchain from the R Project. Installation steps are provided below, and for more technical details on the toolchain refer to the R Project documentation.\nThe R Project provides RTools45 for both Intel/AMD 64-bit (x86_64) and ARM 64-bit (aarch64) systems. If you are unsure which to use, then you can check by going to the Windows Settings, selecting the ‘System’ menu and then the ‘About’ option. If the ‘System Type’ field lists ‘ARM-based processor’, then you should follow the ARM64 instructions below.\nNote that the toolchain is only available for 64-bit systems, and uses the new Universal C Runtime (UCRT). UCRT is only natively supported on Windows 10 and newer, older systems will require a Microsoft update.\n\n\nDownload the installer and complete the prompts for installation:\n\nRTools45\n\nNext, you need to add the toolchain directory to your PATH variable:\nC:\\rtools45\\usr\\bin\nC:\\rtools45\\x86_64-w64-mingw32.static.posix\\bin\n\n\n\nDownload the installer and complete the prompts for installation:\n\nRTools45 - ARM64\n\nNext, you need to add the toolchain directory to your PATH variable:\nC:\\rtools45-aarch64\\usr\\bin\nC:\\rtools45-aarch64\\aarch64-w64-mingw32.static.posix\\bin\n\n\n\n\n\n\nCmdStan relies on the GNU Make utility to build both the Stan model executables and the CmdStan tools.\nGNU Make builds executable programs and libraries from source code by reading files called Makefiles which specify how to derive the target program. A Makefile consists of a set of recursive rules where each rule specifies a target, its dependencies, and the specific operations required to build the target. Specifying dependencies for a target provides a way to control the build process so that targets which depend on other files will be updated as needed only when there are changes to those other files. Thus Make provides an efficient way to manage complex software.\nThe CmdStan Makefile is in the <cmdstan-home> directory and is named makefile. This is one of the default GNU Makefile names, which allows you to omit the -f makefile argument to the Make command. Because the CmdStan Makefile includes several other Makefiles, Make only works properly when invoked from the <cmdstan-home> directory; attempts to use this Makefile from another directory by specifying the full path to the file makefile won’t work. For example, trying to call Make from another directory by specifying the full path the the makefile results in the following set of error messages:\nmake -f ~/github/stan-dev/cmdstan/makefile\n/Users/mitzi/github/stan-dev/cmdstan/makefile:58: make/stanc: No such file or directory\n/Users/mitzi/github/stan-dev/cmdstan/makefile:59: make/program: No such file or directory\n/Users/mitzi/github/stan-dev/cmdstan/makefile:60: make/tests: No such file or directory\n/Users/mitzi/github/stan-dev/cmdstan/makefile:61: make/command: No such file or directory\nmake: *** No rule to make target `make/command'. Stop.\nThe conda-forge cmdstan package provides a solution to this problem via cmdstan_model command which lets you run the CmdStan makefile from anywhere to compile a Stan model.\nMakefile syntax allows general pattern rules based on file suffixes. Stan programs must be stored in files with suffix .stan; the CmdStan makefile rules specify how to transform the Stan source code into a binary executable. For example, to compile the Stan program my_program.stan in directory ../my_dir/, the make target is ../my_dir/my_program or ../my_dir/my_program.exe (on Windows).\nTo call Make, you invoke the utility name, make, followed by, in order:\n\nzero or more Make program options, then specify any Make variables as a series of\nzero of more Make variables, described below\nzero or more target names; the set of names is determined by the Makefile rules.\n\nmake <flags> <variables> <targets>\nMakefile Variables\nMake targets can be preceded by any number of Makefile variable name=value pairs. For example, to compile ../my_dir/my_program.stan for an OpenCL (GPU) machine, set the makefile variable STAN_OPENCL to TRUE:\n> make STAN_OPENCL=TRUE ../my_dir/my_program\nMakefile variables can also be set by creating a file named local in the CmdStan make subdirectory which contains a list of <VARIABLE>=<VALUE> pairs, one per line. For example, to get the same effect as the above command every time, you would put the line STAN_OPENCL=TRUE into the file <cmdstan_home>/make/local.\nThe complete set of Makefile variables can be found in file <cmdstan-home>/cmdstan/stan/lib/stan_math/make/compiler_flags.\nMake Targets\nWhen invoked without any arguments at all, Make prints a help message:\n> make\n--------------------------------------------------------------------------------\nCmdStan v2.33.1 help\n\n Build CmdStan utilities:\n > make build\n\n This target will:\n 1. Install the Stan compiler bin/stanc from stanc3 binaries.\n 2. Build the print utility bin/print (deprecated; will be removed in v3.0)\n 3. Build the stansummary utility bin/stansummary\n 4. Build the diagnose utility bin/diagnose\n 5. Build all libraries and object files compile and link an executable Stan program\n\n Note: to build using multiple cores, use the -j option to make, e.g.,\n for 4 cores:\n > make build -j4\n\n\n Build a Stan program:\n\n Given a Stan program at foo/bar.stan, build an executable by typing:\n > make foo/bar\n\n This target will:\n 1. Install the Stan compiler (bin/stanc), as needed.\n 2. Use the Stan compiler to generate C++ code, foo/bar.hpp.\n 3. Compile the C++ code using cc . to generate foo/bar\n\n Additional make options:\n STANCFLAGS: defaults to \"\". These are extra options passed to bin/stanc\n when generating C++ code. If you want to allow undefined functions in the\n Stan program, either add this to make/local or the command line:\n STANCFLAGS = --allow_undefined\n USER_HEADER: when STANCFLAGS has --allow_undefined, this is the name of the\n header file that is included. This defaults to \"user_header.hpp\" in the\n directory of the Stan program.\n STANC3_VERSION: When set, uses that tagged version specified; otherwise, downloads\n the nightly version.\n STAN_CPP_OPTIMS: Turns on additonal compiler flags for performance.\n STAN_NO_RANGE_CHECKS: Removes the range checks from the model for performance.\n\n\n Example - bernoulli model: examples/bernoulli/bernoulli.stan\n\n 1. Build the model:\n > make examples/bernoulli/bernoulli\n 2. Run the sampling algorithm given the model and data:\n > examples/bernoulli/bernoulli sample data file=examples/bernoulli/bernoulli.data.R\n 3. Look at the posterior sample:\n > bin/stansummary output.csv\n\n\n Clean CmdStan:\n\n Remove the built CmdStan tools:\n > make clean-all\n\n--------------------------------------------------------------------------------", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/installation.html#conda-install", + "href": "cmdstan-guide/installation.html#conda-install", + "title": "CmdStan Installation", + "section": "", + "text": "With conda, you can install CmdStan from the conda-forge channel. This will install a pre-built version of CmdStan along with the required dependencies (i.e. a C++ compiler, a version of Make, and required libraries). The conda installation is designed so one can use the R or Python bindings to CmdStan seamlessly. Additionally, it provides the command cmdstan_model to activate the CmdStan makefile from anywhere.\nNote: This requires that conda has been installed already on your machine. We recommend using the miniforge distribution.\nWe recommend installing CmdStan in a new conda environment:\n conda create -n stan -c conda-forge cmdstan\nThis command creates a new conda environment named stan and downloads and installs the cmdstan package as well as CmdStan and the required C++ toolchain.\nTo install into an existing conda environment, use the conda install command instead of create:\n conda install -c conda-forge cmdstan\nWhichever installation method you use, afterwards you must activate the new environment or deactivate/activate the existing one. For example, if you installed cmdstan into a new environment stan, run the command\n conda activate stan\nBy default, the latest release of CmdStan is installed. If you require a specific release of CmdStan, CmdStan versions 2.26.1 and newer can be installed by specifying cmdstan==VERSION in the install command. For example to install an earlier version of CmdStan into your current conda environment, run the following command, then re-activate the environment\nconda install -c conda-forge cmdstan=2.27.0\n\n\nA Conda environment is a directory that contains a specific collection of Conda packages. To see the locations of your conda environments, use the command\n conda info -e\nThe shell environment variable CONDA_PREFIX points to the active conda environment (if any). Both CmdStan and the C++ toolchain are installed into the bin subdirectory of the conda environment directory, i.e., $CONDA_PREFIX/bin/cmdstan (Linux, MacOS), %CONDA_PREFIX%\\bin\\cmdstan (Windows).\nPlease report conda-specific install problems directly to the conda-forge issue tracker, here.", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/installation.html#installation-from-github", + "href": "cmdstan-guide/installation.html#installation-from-github", + "title": "CmdStan Installation", + "section": "", + "text": "Installation from GitHub consists of the following steps:\n\nVerify that you have a modern C++ toolchain. See the C++ Toolchain section for details.\nDownload the CmdStan source code from GitHub\nBuild the CmdStan libraries and executables\nCheck the installation by compiling and running the CmdStan example model bernoulli.stan.\n\n\n\nThe GitHub source code is divided into sub-modules, each in its own repository. The CmdStan repo contains just the cmdstan module; the Stan inference engine algorithms and Stan math library functions are specified as submodules and stored in the GitHub repositories stan and math, respectively.\nA CmdStan release is compressed tarfile which contains CmdStan and the Stan and math library submodules. The most recent CmdStan release is always available as https://github.com/stan-dev/cmdstan/releases/latest. A CmdStan release is versioned by major, minor, patch numbers, e.g., “2.29.2”. Please ensure you download a tarfile which is named “cmdstan-<version-number” rather than using the “Source Code” links at the bottom of the release. These are automatically generated by GitHub and do not contain the required submodules. The release tarfile unpacks into a directory named “cmdstan-”, e.g. “cmdstan-2.29.2”.\nBy cloning the CmdStan repository with argument --recursive, Git automatically initializes and updates each submodule in the repository, including nested submodules if any of the submodules in the repository have submodules themselves. The following command will download the source code from the current development branch of CmdStan into a directory named cmdstan:\n> git clone https://github.com/stan-dev/cmdstan.git --recursive\nThroughout this manual, we refer to this top-level CmdStan source directory as <cmdstan-home>. This directory contains the following subdirectories:\n\ndirectory cmdstan/stan contains the sub-module stan (https://github.com/stan-dev/stan)\ndirectory cmdstan/stan/lib/stan_math contains the sub-module math (https://github.com/stan-dev/math)\n\n\n\n\nBuilding CmdStan involves preparing a set of executable programs and compiling the command line interface and supporting libraries. The CmdStan tools are:\n\nstanc: the Stan compiler (translates Stan language to C++).\nstansummary: a basic posterior analysis tool. The stansummary utility processes one or more output files from a run or set of runs of Stan’s HMC sampler. For all parameters and quantities of interest in the Stan program, stansummary reports a set of statistics including mean, standard deviation, percentiles, effective sample size, and \\(\\hat{R}\\) values.\ndiagnose: a basic sampler diagnostic tool which checks for indications that the HMC sampler was unable to sample from the full posterior.\n\nCmdStan releases include pre-built binaries of the Stan language compiler (https://github.com/stan-dev/stanc3): bin/linux-stanc, bin/mac-stanc and bin/windows-stanc. The CmdStan makefile build task copies the appropriate binary to bin/stanc. For CmdStan installations which have been cloned of downloaded from the CmdStan GitHub repository, the makefile task will download the appropriate OS-specific binary from the stanc3 repository’s nightly release.\nSteps to build CmdStan:\n\nOpen a command-line terminal window and change directories to the CmdStan home directory.\nRun the makefile target build which instantiates the CmdStan utilities and compiles all necessary C++ libraries.\n\n> cd <cmdstan-home>\n> make build\nIf your computer has multiple cores and sufficient ram, the build process can be parallelized by providing the -j option. For example, to build on 4 cores, type:\n> make -j4 build\nWhen make build is successful, the directory <cmdstan-home>/bin/ will contain the executables stanc, stansummary, and diagnose (on Windows, corresponding .exe files) and the final lines of console output will show the version of CmdStan that has just been built, e.g.:\n--- CmdStan v2.29.2 built ---\nWarning: The Make program may take 10+ minutes and consume 2+ GB of memory to build CmdStan.\nWindows only: CmdStan requires that the Intel TBB library, which is built by the above command, can be found by the Windows system. This requires that the directory <cmdstan-home>/stan/lib/stan_math/lib/tbb is part of the PATH environment variable. See these instructions for details on changing the PATH. To permanently make this setting for the current user, you may execute:\n> make install-tbb\nAfter changing the PATH environment variable, you must open an new shell in order for the new environment variable settings to take effect. (This is not necessary on Mac and Linux systems because they can use the absolute path to the Intel TBB library when linking into Stan programs.)", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/installation.html#checking-the-stan-compiler", + "href": "cmdstan-guide/installation.html#checking-the-stan-compiler", + "title": "CmdStan Installation", + "section": "", + "text": "To check that the CmdStan installation is complete and in working order, run the following series of commands from the folder which CmdStan was installed.\nOn Linux and macOS:\n# compile the example\n> make examples/bernoulli/bernoulli\n\n# fit to provided data (results of 10 trials, 2 out of 10 successes)\n> ./examples/bernoulli/bernoulli sample\\\n data file=examples/bernoulli/bernoulli.data.json\n\n# default output written to file `output.csv`,\n# default num_samples is 1000, output file should have approx. 1050 lines\n> wc -l output.csv\n\n# run the `bin/stansummary utility to summarize parameter estimates\n> bin/stansummary output.csv\nOn Windows:\n# compile the example\n> make examples/bernoulli/bernoulli.exe\n\n# fit to provided data (results of 10 trials, 2 out of 10 successes)\n> ./examples/bernoulli/bernoulli.exe sample data file=examples/bernoulli/bernoulli.data.json\n\n# run the `bin/stansummary.exe utility to summarize parameter estimates\n> bin/stansummary.exe output.csv\nThe sample data in file bernoulli.json.data specifies 2 out of 10 successes, therefore the range mean(theta)\\(\\pm\\)sd(theta) should include 0.2.", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/installation.html#troubleshooting-the-installation", + "href": "cmdstan-guide/installation.html#troubleshooting-the-installation", + "title": "CmdStan Installation", + "section": "", + "text": "Updates to CmdStan, changes in compiler options, or updates to the C++ toolchain may result in errors when trying to compile a Stan program. Often, these problems can be resolved by removing the existing CmdStan binaries and recompiling. To do this, you must run the makefile commands from the <cmdstan-home> directory:\n> cd <cmdstan-home>\n> make clean-all\n> make build\n\n\nThis section contains solutions to problems reported on https://discourse.mc-stan.org\nCompiler error message about PCH file\nTo speed up compilation, the Stan makefile pre-compiles parts of the core Stan library. If these pre-compiled files are out of sync with the compiled model, the compiler will complain, e.g.:\nerror: PCH file uses an older PCH format that is no longer supported\nIn this case, clean and rebuild CmdStan, as shown in the previous section.\nWindows: ‘g++’, ‘make’, or ‘cut’ is not recognized\nThe CmdStan makefile uses a few shell utilities which might not be present in Windows, resulting in the error message:\n'cut' is not recognized as an internal or external command,\noperable program or batch file.\nTo fix this, ensure you have followed the steps for adding the toolchain to your PATH and installing the additional utilities covered in the configuration instructions\nSpaces in paths to CmdStan or model\nmake can fail when dealing with files in folders with a space somewhere in their file path. Particularly on Windows, this can be an issue when CmdStan, or the models you are trying to build, are placed in the One Drive folder.\nUnfortunately, the errors created by this situation are not alwas informative. Some errors you may see are:\nmake: *** INTERNAL: readdir: Invalid argument\nmake: *** [make/program:50: x.hpp] Error 2\nIf the (fully-expanded) folder path to CmdStan or the model you are trying to build contains a space, we recommend trying a different location if you encounter any issues during building.", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/installation.html#cpp-toolchain", + "href": "cmdstan-guide/installation.html#cpp-toolchain", + "title": "CmdStan Installation", + "section": "", + "text": "Compiling a Stan program requires a modern C++ compiler and the GNU Make build utility (a.k.a. “gmake”). These vary by operating system.\n\n\nThe required C++ compiler is g++ 4.9 3. On most systems the GNU Make utility is pre-installed and is the default make utility. There is usually a pre-installed C++ compiler as well, however, it may not be new enough. To check, run commands:\ng++ --version\nmake --version\nIf these are at least at g++ version 4.9.3 or later and make version 3.81 or later, no additional installations are necessary. It may still be desirable to update the C++ compiler g++, because later versions are faster.\nTo install the latest version of these tools (or upgrade an older version), use the following commands or their equivalent for your distribution, install via the commands:\nsudo apt install g++\nsudo apt install make\nIf you can’t run sudo, you will need to ask your sysadmin or cluster administrator to install these tools for you.\n\n\n\nTo check if you already already have an appropriate toolchain installed, open the Terminal application and enter:\nclang++ --version\nmake --version\nIf either of these commands prints the message command not found, you will need to install Xcode’s command line tools.\nOpen the Terminal application and enter:\nxcode-select --install\nSelect “Install” in the window that opens.\nAfter the installation completes, you can double check that installation was successful by reopening the Terminal and running:\nclang++ --version\nmake --version\nYou can read more about Xcode on its site: https://developer.apple.com/xcode/\nWe don’t recommend trying to use the GNU C++ compiler, available via Homebrew, based on the number of reports of installation difficulties from Mac users on GitHub as well as the Stan forums.\n\n\n\nThe Windows toolchain consists of programs g++, the C++ compiler, and make, the GNU Make utility. To check if these are present, open a command shell [^1] and type:\ng++ --version\nmake --version\nCmdStan is known compatible with the RTools45 toolchain. The toolchain will require updating your PATH variable, See these instructions for details on changing the PATH if you are unfamiliar. The following instructions will assume that the default installation directory was used, so be sure to update the paths accordingly if you have chosen a different directory.\n\n\nAll required utilities (e.g., make, g++) for compiling and running CmdStan models on Windows are provided by the RTools45 toolchain from the R Project. Installation steps are provided below, and for more technical details on the toolchain refer to the R Project documentation.\nThe R Project provides RTools45 for both Intel/AMD 64-bit (x86_64) and ARM 64-bit (aarch64) systems. If you are unsure which to use, then you can check by going to the Windows Settings, selecting the ‘System’ menu and then the ‘About’ option. If the ‘System Type’ field lists ‘ARM-based processor’, then you should follow the ARM64 instructions below.\nNote that the toolchain is only available for 64-bit systems, and uses the new Universal C Runtime (UCRT). UCRT is only natively supported on Windows 10 and newer, older systems will require a Microsoft update.\n\n\nDownload the installer and complete the prompts for installation:\n\nRTools45\n\nNext, you need to add the toolchain directory to your PATH variable:\nC:\\rtools45\\usr\\bin\nC:\\rtools45\\x86_64-w64-mingw32.static.posix\\bin\n\n\n\nDownload the installer and complete the prompts for installation:\n\nRTools45 - ARM64\n\nNext, you need to add the toolchain directory to your PATH variable:\nC:\\rtools45-aarch64\\usr\\bin\nC:\\rtools45-aarch64\\aarch64-w64-mingw32.static.posix\\bin", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/installation.html#gnu-make", + "href": "cmdstan-guide/installation.html#gnu-make", + "title": "CmdStan Installation", + "section": "", + "text": "CmdStan relies on the GNU Make utility to build both the Stan model executables and the CmdStan tools.\nGNU Make builds executable programs and libraries from source code by reading files called Makefiles which specify how to derive the target program. A Makefile consists of a set of recursive rules where each rule specifies a target, its dependencies, and the specific operations required to build the target. Specifying dependencies for a target provides a way to control the build process so that targets which depend on other files will be updated as needed only when there are changes to those other files. Thus Make provides an efficient way to manage complex software.\nThe CmdStan Makefile is in the <cmdstan-home> directory and is named makefile. This is one of the default GNU Makefile names, which allows you to omit the -f makefile argument to the Make command. Because the CmdStan Makefile includes several other Makefiles, Make only works properly when invoked from the <cmdstan-home> directory; attempts to use this Makefile from another directory by specifying the full path to the file makefile won’t work. For example, trying to call Make from another directory by specifying the full path the the makefile results in the following set of error messages:\nmake -f ~/github/stan-dev/cmdstan/makefile\n/Users/mitzi/github/stan-dev/cmdstan/makefile:58: make/stanc: No such file or directory\n/Users/mitzi/github/stan-dev/cmdstan/makefile:59: make/program: No such file or directory\n/Users/mitzi/github/stan-dev/cmdstan/makefile:60: make/tests: No such file or directory\n/Users/mitzi/github/stan-dev/cmdstan/makefile:61: make/command: No such file or directory\nmake: *** No rule to make target `make/command'. Stop.\nThe conda-forge cmdstan package provides a solution to this problem via cmdstan_model command which lets you run the CmdStan makefile from anywhere to compile a Stan model.\nMakefile syntax allows general pattern rules based on file suffixes. Stan programs must be stored in files with suffix .stan; the CmdStan makefile rules specify how to transform the Stan source code into a binary executable. For example, to compile the Stan program my_program.stan in directory ../my_dir/, the make target is ../my_dir/my_program or ../my_dir/my_program.exe (on Windows).\nTo call Make, you invoke the utility name, make, followed by, in order:\n\nzero or more Make program options, then specify any Make variables as a series of\nzero of more Make variables, described below\nzero or more target names; the set of names is determined by the Makefile rules.\n\nmake <flags> <variables> <targets>\nMakefile Variables\nMake targets can be preceded by any number of Makefile variable name=value pairs. For example, to compile ../my_dir/my_program.stan for an OpenCL (GPU) machine, set the makefile variable STAN_OPENCL to TRUE:\n> make STAN_OPENCL=TRUE ../my_dir/my_program\nMakefile variables can also be set by creating a file named local in the CmdStan make subdirectory which contains a list of <VARIABLE>=<VALUE> pairs, one per line. For example, to get the same effect as the above command every time, you would put the line STAN_OPENCL=TRUE into the file <cmdstan_home>/make/local.\nThe complete set of Makefile variables can be found in file <cmdstan-home>/cmdstan/stan/lib/stan_math/make/compiler_flags.\nMake Targets\nWhen invoked without any arguments at all, Make prints a help message:\n> make\n--------------------------------------------------------------------------------\nCmdStan v2.33.1 help\n\n Build CmdStan utilities:\n > make build\n\n This target will:\n 1. Install the Stan compiler bin/stanc from stanc3 binaries.\n 2. Build the print utility bin/print (deprecated; will be removed in v3.0)\n 3. Build the stansummary utility bin/stansummary\n 4. Build the diagnose utility bin/diagnose\n 5. Build all libraries and object files compile and link an executable Stan program\n\n Note: to build using multiple cores, use the -j option to make, e.g.,\n for 4 cores:\n > make build -j4\n\n\n Build a Stan program:\n\n Given a Stan program at foo/bar.stan, build an executable by typing:\n > make foo/bar\n\n This target will:\n 1. Install the Stan compiler (bin/stanc), as needed.\n 2. Use the Stan compiler to generate C++ code, foo/bar.hpp.\n 3. Compile the C++ code using cc . to generate foo/bar\n\n Additional make options:\n STANCFLAGS: defaults to \"\". These are extra options passed to bin/stanc\n when generating C++ code. If you want to allow undefined functions in the\n Stan program, either add this to make/local or the command line:\n STANCFLAGS = --allow_undefined\n USER_HEADER: when STANCFLAGS has --allow_undefined, this is the name of the\n header file that is included. This defaults to \"user_header.hpp\" in the\n directory of the Stan program.\n STANC3_VERSION: When set, uses that tagged version specified; otherwise, downloads\n the nightly version.\n STAN_CPP_OPTIMS: Turns on additonal compiler flags for performance.\n STAN_NO_RANGE_CHECKS: Removes the range checks from the model for performance.\n\n\n Example - bernoulli model: examples/bernoulli/bernoulli.stan\n\n 1. Build the model:\n > make examples/bernoulli/bernoulli\n 2. Run the sampling algorithm given the model and data:\n > examples/bernoulli/bernoulli sample data file=examples/bernoulli/bernoulli.data.R\n 3. Look at the posterior sample:\n > bin/stansummary output.csv\n\n\n Clean CmdStan:\n\n Remove the built CmdStan tools:\n > make clean-all\n\n--------------------------------------------------------------------------------", + "crumbs": [ + "Getting Started", + "CmdStan Installation" + ] + }, + { + "objectID": "cmdstan-guide/generate_quantities_config.html", + "href": "cmdstan-guide/generate_quantities_config.html", + "title": "Generating Quantities of Interest from a Fitted Model", + "section": "", + "text": "The generate_quantities method allows you to generate additional quantities of interest from a fitted model without re-running the sampler. Instead, you write a modified version of the original Stan program and add a generated quantities block or modify the existing one which specifies how to compute the new quantities of interest. Running the generate_quantities method on the new program together with sampler outputs (i.e., a set of draws) from the fitted model runs the generated quantities block of the new program using the the existing sample by plugging in the per-draw parameter estimates for the computations in the generated quantities block.\nThis method requires sub-argument fitted_params which takes as its value an existing Stan CSV file that contains a parameter values from an equivalent model, i.e., a model with the same parameters block, conditioned on the same data.\nThe generated quantities block computes quantities of interest (QOIs) based on the data, transformed data, parameters, and transformed parameters. It can be used to:\n\ngenerate simulated data for model testing by forward sampling\ngenerate predictions for new data\ncalculate posterior event probabilities, including multiple comparisons, sign tests, etc.\ncalculate posterior expectations\ntransform parameters for reporting\napply full Bayesian decision theory\ncalculate log likelihoods, deviances, etc. for model comparison\n\nFor an overview of the uses of this feature, see the Stan User’s Guide section on Stand-alone generated quantities and ongoing prediction.\n\n\nTo illustrate how this works we use the generate_quantities method to do posterior predictive checks using the estimate of theta given the example bernoulli model and data, following the posterior predictive simulation procedure in the Stan User’s Guide.\nWe write a program bernoulli_ppc.stan which contains the following generated quantities block, with comments to explain the procedure:\ngenerated quantities {\n array[N] int y_sim;\n // use current estimate of theta to generate new sample\n for (n in 1:N) {\n y_sim[n] = bernoulli_rng(theta);\n }\n // estimate theta_rep from new sample\n real<lower=0, upper=1> theta_rep = sum(y_sim) * 1.0 / N;\n}\nThe rest of the program is the same as in bernoulli.stan.\nThe generate_method requires the sub-argument fitted_params which takes as its value the name of a Stan CSV file. The per-draw parameter values from the fitted_params file will be used to run the generated quantities block.\nIf we run the bernoulli.stan program for a single chain to generate a sample in file bernoulli_fit.csv:\n> ./bernoulli sample data file=bernoulli.data.json output file=bernoulli_fit.csv\nThen we can run the bernoulli_ppc.stan to carry out the posterior predictive checks:\n> ./bernoulli_ppc generate_quantities fitted_params=bernoulli_fit.csv \\\n data file=bernoulli.data.json \\\n output file=bernoulli_ppc.csv\nThe output file bernoulli_ppc.csv contains only the values for the variables declared in the generated quantities block, i.e., theta_rep and the elements of y_sim:\n# model = bernoulli_ppc_model\n# method = generate_quantities\n# generate_quantities\n# fitted_params = bernoulli_fit.csv\n# id = 1 (Default)\n# data\n# file = bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 2983956445 (Default)\n# output\n# file = output.csv (Default)\ny_sim.1,y_sim.2,y_sim.3,y_sim.4,y_sim.5,y_sim.6,y_sim.7,y_sim.8,y_sim.9,y_sim.10,theta_rep\n1,1,1,0,0,0,1,1,0,1,0.6\n1,1,0,1,0,0,1,0,1,0,0.5\n1,0,1,1,1,1,1,1,0,1,0.8\n0,1,0,1,0,1,0,1,0,0,0.4\n1,0,0,0,0,0,0,0,0,0,0.1\n0,0,0,0,0,1,1,1,0,0,0.3\n0,0,1,0,1,0,0,0,0,0,0.2\n1,0,1,0,1,1,0,1,1,0,0.6\n...\nGiven the current implementation, to see the fitted parameter values for each draw, create a copy variable in the generated quantities block, e.g.:\ngenerated quantities {\n array[N] int y_sim;\n // use current estimate of theta to generate new sample\n for (n in 1:N) {\n y_sim[n] = bernoulli_rng(theta);\n }\n real<lower=0, upper=1> theta_cp = theta;\n // estimate theta_rep from new sample\n real<lower=0, upper=1> theta_rep = sum(y_sim) * 1.0 / N;\n}\nNow the output is slightly more interpretable: theta_cp is the same as the theta used to generate the values y_sim[1] through y_sim[1]. Comparing columns theta_cp and theta_rep allows us to see how the uncertainty in our estimate of theta is carried forward into our predictions:\ny_sim.1,y_sim.2,y_sim.3,y_sim.4,y_sim.5,y_sim.6,y_sim.7,y_sim.8,y_sim.9,y_sim.10,theta_cp,theta_rep\n0,1,1,0,1,0,0,1,1,0,0.545679,0.5\n1,1,1,1,1,1,0,1,1,0,0.527164,0.8\n1,1,1,1,0,1,1,1,1,0,0.529116,0.8\n1,0,1,1,1,1,0,0,1,0,0.478844,0.6\n0,1,0,0,0,0,1,0,1,0,0.238793,0.3\n0,0,0,0,0,1,1,0,0,0,0.258294,0.2\n1,1,1,0,0,0,0,0,0,0,0.258465,0.3\n\n\n\nThe fitted_params file must be a Stan CSV file; attempts to use a regular CSV file will result an error message of the form:\nError reading fitted param names from sample csv file <filename.csv>\nThe fitted_params file must contain columns corresponding to legal values for all parameters defined in the model. If any parameters are missing, the program will exit with an error message of the form:\nError reading fitted param names from sample csv file <filename.csv>\nThe parameter values of the fitted_params are on the constrained scale and must obey all constraints. For example, if we modify the contents of the first reported draw in bernoulli_fit.csv so that the value of theta is outside the declared bounds real<lower=0, upper=1>, the program will return the following error message:\nException: lub_free: Bounded variable is 1.21397, but must be in the interval [0, 1] \\\n(in 'bernoulli_ppc.stan', line 5, column 2 to column 30)", + "crumbs": [ + "Running CmdStan", + "Generating Quantities of Interest from a Fitted Model" + ] + }, + { + "objectID": "cmdstan-guide/generate_quantities_config.html#example", + "href": "cmdstan-guide/generate_quantities_config.html#example", + "title": "Generating Quantities of Interest from a Fitted Model", + "section": "", + "text": "To illustrate how this works we use the generate_quantities method to do posterior predictive checks using the estimate of theta given the example bernoulli model and data, following the posterior predictive simulation procedure in the Stan User’s Guide.\nWe write a program bernoulli_ppc.stan which contains the following generated quantities block, with comments to explain the procedure:\ngenerated quantities {\n array[N] int y_sim;\n // use current estimate of theta to generate new sample\n for (n in 1:N) {\n y_sim[n] = bernoulli_rng(theta);\n }\n // estimate theta_rep from new sample\n real<lower=0, upper=1> theta_rep = sum(y_sim) * 1.0 / N;\n}\nThe rest of the program is the same as in bernoulli.stan.\nThe generate_method requires the sub-argument fitted_params which takes as its value the name of a Stan CSV file. The per-draw parameter values from the fitted_params file will be used to run the generated quantities block.\nIf we run the bernoulli.stan program for a single chain to generate a sample in file bernoulli_fit.csv:\n> ./bernoulli sample data file=bernoulli.data.json output file=bernoulli_fit.csv\nThen we can run the bernoulli_ppc.stan to carry out the posterior predictive checks:\n> ./bernoulli_ppc generate_quantities fitted_params=bernoulli_fit.csv \\\n data file=bernoulli.data.json \\\n output file=bernoulli_ppc.csv\nThe output file bernoulli_ppc.csv contains only the values for the variables declared in the generated quantities block, i.e., theta_rep and the elements of y_sim:\n# model = bernoulli_ppc_model\n# method = generate_quantities\n# generate_quantities\n# fitted_params = bernoulli_fit.csv\n# id = 1 (Default)\n# data\n# file = bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 2983956445 (Default)\n# output\n# file = output.csv (Default)\ny_sim.1,y_sim.2,y_sim.3,y_sim.4,y_sim.5,y_sim.6,y_sim.7,y_sim.8,y_sim.9,y_sim.10,theta_rep\n1,1,1,0,0,0,1,1,0,1,0.6\n1,1,0,1,0,0,1,0,1,0,0.5\n1,0,1,1,1,1,1,1,0,1,0.8\n0,1,0,1,0,1,0,1,0,0,0.4\n1,0,0,0,0,0,0,0,0,0,0.1\n0,0,0,0,0,1,1,1,0,0,0.3\n0,0,1,0,1,0,0,0,0,0,0.2\n1,0,1,0,1,1,0,1,1,0,0.6\n...\nGiven the current implementation, to see the fitted parameter values for each draw, create a copy variable in the generated quantities block, e.g.:\ngenerated quantities {\n array[N] int y_sim;\n // use current estimate of theta to generate new sample\n for (n in 1:N) {\n y_sim[n] = bernoulli_rng(theta);\n }\n real<lower=0, upper=1> theta_cp = theta;\n // estimate theta_rep from new sample\n real<lower=0, upper=1> theta_rep = sum(y_sim) * 1.0 / N;\n}\nNow the output is slightly more interpretable: theta_cp is the same as the theta used to generate the values y_sim[1] through y_sim[1]. Comparing columns theta_cp and theta_rep allows us to see how the uncertainty in our estimate of theta is carried forward into our predictions:\ny_sim.1,y_sim.2,y_sim.3,y_sim.4,y_sim.5,y_sim.6,y_sim.7,y_sim.8,y_sim.9,y_sim.10,theta_cp,theta_rep\n0,1,1,0,1,0,0,1,1,0,0.545679,0.5\n1,1,1,1,1,1,0,1,1,0,0.527164,0.8\n1,1,1,1,0,1,1,1,1,0,0.529116,0.8\n1,0,1,1,1,1,0,0,1,0,0.478844,0.6\n0,1,0,0,0,0,1,0,1,0,0.238793,0.3\n0,0,0,0,0,1,1,0,0,0,0.258294,0.2\n1,1,1,0,0,0,0,0,0,0,0.258465,0.3", + "crumbs": [ + "Running CmdStan", + "Generating Quantities of Interest from a Fitted Model" + ] + }, + { + "objectID": "cmdstan-guide/generate_quantities_config.html#errors", + "href": "cmdstan-guide/generate_quantities_config.html#errors", + "title": "Generating Quantities of Interest from a Fitted Model", + "section": "", + "text": "The fitted_params file must be a Stan CSV file; attempts to use a regular CSV file will result an error message of the form:\nError reading fitted param names from sample csv file <filename.csv>\nThe fitted_params file must contain columns corresponding to legal values for all parameters defined in the model. If any parameters are missing, the program will exit with an error message of the form:\nError reading fitted param names from sample csv file <filename.csv>\nThe parameter values of the fitted_params are on the constrained scale and must obey all constraints. For example, if we modify the contents of the first reported draw in bernoulli_fit.csv so that the value of theta is outside the declared bounds real<lower=0, upper=1>, the program will return the following error message:\nException: lub_free: Bounded variable is 1.21397, but must be in the interval [0, 1] \\\n(in 'bernoulli_ppc.stan', line 5, column 2 to column 30)", + "crumbs": [ + "Running CmdStan", + "Generating Quantities of Interest from a Fitted Model" + ] + }, + { + "objectID": "cmdstan-guide/example_model_data.html", + "href": "cmdstan-guide/example_model_data.html", + "title": "Example Model and Data", + "section": "", + "text": "The following is a simple, complete Stan program for a Bernoulli model of binary data.1 The model assumes the binary observed data y[1],...,y[N] are i.i.d. with Bernoulli chance-of-success theta.\ndata { \n int<lower=0> N; \n array[N] int<lower=0, upper=1> y;\n} \nparameters {\n real<lower=0, upper=1> theta;\n} \nmodel {\n theta ~ beta(1, 1); // uniform prior on interval 0,1\n y ~ bernoulli(theta);\n}\nThe input data file contains definitions for the two variables N and y which are specified in the data block of program bernoulli.stan (above).\nA data set of N=10 observations is included in the example Bernoulli model directory in both JSON notation and Rdump data format where 8 out of 10 trials had outcome 0 (failure) and 2 trials had outcome 1 (success). In JSON, this data is:\n{\n \"N\" : 10,\n \"y\" : [0,1,0,0,0,0,0,0,0,1]\n}", + "crumbs": [ + "Getting Started", + "Example Model and Data" + ] + }, + { + "objectID": "cmdstan-guide/example_model_data.html#footnotes", + "href": "cmdstan-guide/example_model_data.html#footnotes", + "title": "Example Model and Data", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe model is available with the CmdStan distribution at the path <cmdstan-home>/examples/bernoulli/bernoulli.stan.↩︎", + "crumbs": [ + "Getting Started", + "Example Model and Data" + ] + }, + { + "objectID": "cmdstan-guide/diagnose_config.html", + "href": "cmdstan-guide/diagnose_config.html", + "title": "Diagnosing HMC by Comparison of Gradients", + "section": "", + "text": "Diagnosing HMC by Comparison of Gradients\nCmdStan has a basic diagnostic feature that will calculate the gradients of the initial state and compare them with gradients calculated by finite differences. Discrepancies between the two indicate that there is a problem with the model or initial states or else there is a bug in Stan.\nTo allow for the possibility of adding other kinds of diagnostic tests, the diagnose method argument configuration has subargument test which currently only takes value gradient. There are two available gradient test configuration arguments:\n\nepsilon - The finite difference step size. Must be a positive real number. Default value is \\(1^{-6}\\)\nerror - The error threshold. Must be a positive real number. Default value is \\(1^{-6}\\)\n\nTo run on the different platforms with the default configuration, use one of the following.\nMac OS and Linux\n> ./my_model diagnose data file=my_data\nWindows\n> my_model diagnose data file=my_data\nTo relax the test threshold, specify the error argument as follows:\n> ./my_model diagnose test=gradient error=0.0001 data file=my_data\nTo see how this works, we run diagnostics on the example bernoulli model:\n> ./bernoulli diagnose data file=bernoulli.data.R\nExecuting this command prints output to the console and as a series of comment lines to the output csv file. The console output is:\nmethod = diagnose\n diagnose\n test = gradient (Default)\n gradient\n epsilon = 9.9999999999999995e-07 (Default)\n error = 9.9999999999999995e-07 (Default)\nid = 0 (Default)\ndata\n file = bernoulli.data.json\ninit = 2 (Default)\nrandom\n seed = 2152196153 (Default)\noutput\n file = output.csv (Default)\n diagnostic_file = (Default)\n refresh = 100 (Default)\n\nTEST GRADIENT MODE\n\n Log probability=-8.42814\n\n param idx value model finite diff error\n 0 0.0361376 -3.1084 -3.1084 -2.37554e-10\nThe same information is printed to the output file as csv comments, i.e., each line is prefixed with a pound sign #.\n\n\n\n\n Back to top", + "crumbs": [ + "Running CmdStan", + "Diagnosing HMC by Comparison of Gradients" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html", + "href": "cmdstan-guide/command_line_options.html", + "title": "Command-Line Interface Overview", + "section": "", + "text": "A CmdStan executable is built from the Stan model concept and the CmdStan command line parser. The command line argument syntax consists of sets of keywords and keyword-value pairs. Arguments are grouped by the following keywords:\n\nmethod - specifies the kind of inference done on the model. Each kind of inference requires further configuration via sub-arguments. The method argument is required. It can be specified overtly as the a keyword-value pair method=<inference> or implicitly as one of the following:\n\nsample - obtain a sample (set of draws) from the posterior using HMC\noptimize - penalized maximum likelihood estimation\npathfinder - quasi-Newton variational inference\nvariational - automatic differentation variational inference (ADVI)\nlaplace - sample from a normal approximation centered at the mode\ngenerate_quantities - run model’s generated quantities block on existing sample to obtain new quantities of interest.\nlog_prob - compute the log probability and gradient of the model for one set of parameters.\ndiagnose - compute and compare sampler gradient calculations to finite differences.\n\ndata - specifies the input data file, if any.\noutput - specifies program outputs, both disk files and terminal window outputs.\ninit - specifies initial values for the model parameters, if any.\nrandom - specifies the seed for the pseudo-random number.\n\nThe remainder of this chapter covers the general configuration options used for all processing. The following chapters cover the per-inference configuration options.\n\n\nThe values for all variables declared in the data block of the model are read in from an input data file in either JSON or Rdump format. The syntax for the input data argument is:\ndata file=<filepath>\nThe keyword data must be followed directly by the keyword-value pair file=<filepath>. If the model doesn’t declare any data variables, this argument is ignored.\nThe input data file must contain definitions for all data variables declared in the data block. If one or more data block variables are missing from the input data file, the program prints an error message to stderr and returns a non-zero return code. For example, the model bernoulli.stan defines two data variables N and y. If the input data file doesn’t include both variables, or if the data variable doesn’t match the declared type and dimensions, the program will exit with an error message at the point where it first encounters missing data.\nFor example if the input data file doesn’t include the definition for variable y, the executable exits with the following message:\nException: variable does not exist; processing stage=data initialization; variable name=y; base type=int (in 'examples/bernoulli/bernoulli.stan', line 3, column 2 to column 28)\n\n\n\nThe output keyword is used to specify non-default options for output files and messages written to the terminal window. The output keyword takes several keyword-value pair sub-arguments.\nThe keyword value pair file=<filepath> specifies the location of the Stan CSV output file. If unspecified, the output file is written to a file named output.csv in the current working directory.\nThe keyword value pair diagnostic_file=<filepath> specifies the location of the auxiliary output file. By default, no auxiliary output file is produced. This option is only valid for the iterative algorithms sample and variational.\nThe keyword value pair refresh=<int> specifies the number of iterations between progress messages written to the terminal window. The default value is 100 iterations.\nThe keyword value pair sig_figs=<int> specifies the number of significant digits for all numerical values in the output files. Allowable values are between 1 and 18, which is the maximum amount of precision available for 64-bit floating point arithmetic. The default value is 8.   Note: increasing sig_figs above the default will increase the size of the output CSV files accordingly.\nThe keyword value pair profile_file=<filepath> specifies the location of the output file for profiling data. If the model uses no profiling, the output profile file is not produced. If the model uses profiling and profile_file is unspecified, the profiling data is written to a file named profile.csv in the current working directory.\nThe keyword value pair save_cmdstan_config=<boolean> specifies whether to save the configuration options used to run the program to a file named <output file>_config.json alongside the other output files. The default value is false, which means the configuration file is not saved. The contents of this file are similar to the comments in the Stan CSV file, but should be more portable across versions and easier to parse.\n\n\n\nInitialization is only applied to parameters defined in the parameters block. By default, all parameters are initialized to random draws from a uniform distribution over the range \\([-2, 2]\\). These values are on the unconstrained scale, so must be inverse transformed back to satisfy the constraints declared for parameters. Because zero is chosen to be a reasonable default initial value for most parameters, the interval around zero provides a fairly diffuse starting point. For instance, unconstrained variables are initialized randomly in \\((-2, 2)\\), variables constrained to be positive are initialized roughly in \\((0.14, 7.4)\\), variables constrained to fall between 0 and 1 are initialized with values roughly in \\((0.12, 0.88)\\).\nThe initialization argument is specified as keyword-value pair with keyword init. The value can be one of the following:\n\npositive real number \\(x\\). All parameters will be initialized to random draws from a uniform distribution over the range \\([-x, x]\\).\n\\(0\\) - All parameters will be initialized to zero values on the unconstrained scale. The transforms are arranged in such a way that zero initialization provides reasonable variable initializations: \\(0\\) for unconstrained parameters; \\(1\\) for parameters constrained to be positive; \\(0.5\\) for variables to constrained to lie between \\(0\\) and \\(1\\); a symmetric (uniform) vector for simplexes; unit matrices for both correlation and covariance matrices; and so on.\nfilepath - A data file in JSON or Rdump format containing initial parameters values for some or all of the model parameters. User specified initial values must satisfy the constraints declared in the model (i.e., they are on the constrained scale). Parameters which aren’t explicitly initialized will be initialized randomly over the range \\([-2, 2]\\).\n\n\n\n\nThe random-number generator’s behavior is determined by the unsigned seed (positive integer) it is started with. If a seed is not specified, or a seed of 0 or less is specified, the system time is used to generate a seed. The seed is recorded and included with Stan’s output regardless of whether it was specified or generated randomly from the system time.\nThe syntax for the random seed argument is:\nrandom seed=<int>\nThe keyword random must be followed directly by the keyword-value pair seed=<int>.\n\n\n\nThe chain identifier argument is used in conjunction with the random seed argument when running multiple Markov chains for sampling. The chain identifier is used to advance the random number generator a very large number of random variates so that two chains with the same seed and different identifiers draw from non-overlapping subsequences of the random-number sequence determined by the seed. Together, the seed and chain identifier determine the behavior of the random number generator.\nThe syntax for the random seed argument is:\nid=<int>\nThe default value is 1.\nWhen running a set of chains from the command line with a specified seed, this argument should be set to the chain index. E.g., when running 4 chains, the value should be 1,..,4, successively. When running multiple chains from a single command, Stan’s interfaces manage the chain identifier arguments automatically.\nFor complete reproducibility, every aspect of the environment needs to be locked down from the OS and version to the C++ compiler and version to the version of Stan and all dependent libraries. See the Stan Reference Manual Reproducibility chapter for further details.\n\n\n\nCmdStan provides a help and help-all mechanism that displays either the available top-level or keyword-specific key-value argument pairs. To display top-level help, call the CmdStan executable with keyword help:\n./bernoulli help\n\n\n\nCmdStan executables and utility programs use streams standard output (stdout) and standard error (stderr) to report information and error messages, respectively. Some methods also generate warning messages when the algorithm detects potential problems with the inference. Depending on the method, these messages are sent to either standard out or standard error.\nAll program executables provide a return code between 0 and 255:\n\n0 - Program ran to termination as expected.\nvalue in range [1 : 125] - Method invoked could not run due to problems with model or data.\nvalue > 128 - Fatal error during execution, process terminated by signal. To determine the signal number, subtract 128 from the return value, e.g. return code 139 results from termination signal 11 (segmentation violation).\n\nA non-zero return code or outputs sent to stderr indicate problems with the inference. However, a return code of zero and absence of error messages doesn’t necessarily mean that the inference is valid, it is still necessary to validate the inferences using all available summary and diagnostic techniques.", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#input-data-argument", + "href": "cmdstan-guide/command_line_options.html#input-data-argument", + "title": "Command-Line Interface Overview", + "section": "", + "text": "The values for all variables declared in the data block of the model are read in from an input data file in either JSON or Rdump format. The syntax for the input data argument is:\ndata file=<filepath>\nThe keyword data must be followed directly by the keyword-value pair file=<filepath>. If the model doesn’t declare any data variables, this argument is ignored.\nThe input data file must contain definitions for all data variables declared in the data block. If one or more data block variables are missing from the input data file, the program prints an error message to stderr and returns a non-zero return code. For example, the model bernoulli.stan defines two data variables N and y. If the input data file doesn’t include both variables, or if the data variable doesn’t match the declared type and dimensions, the program will exit with an error message at the point where it first encounters missing data.\nFor example if the input data file doesn’t include the definition for variable y, the executable exits with the following message:\nException: variable does not exist; processing stage=data initialization; variable name=y; base type=int (in 'examples/bernoulli/bernoulli.stan', line 3, column 2 to column 28)", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#output-control.section", + "href": "cmdstan-guide/command_line_options.html#output-control.section", + "title": "Command-Line Interface Overview", + "section": "", + "text": "The output keyword is used to specify non-default options for output files and messages written to the terminal window. The output keyword takes several keyword-value pair sub-arguments.\nThe keyword value pair file=<filepath> specifies the location of the Stan CSV output file. If unspecified, the output file is written to a file named output.csv in the current working directory.\nThe keyword value pair diagnostic_file=<filepath> specifies the location of the auxiliary output file. By default, no auxiliary output file is produced. This option is only valid for the iterative algorithms sample and variational.\nThe keyword value pair refresh=<int> specifies the number of iterations between progress messages written to the terminal window. The default value is 100 iterations.\nThe keyword value pair sig_figs=<int> specifies the number of significant digits for all numerical values in the output files. Allowable values are between 1 and 18, which is the maximum amount of precision available for 64-bit floating point arithmetic. The default value is 8.   Note: increasing sig_figs above the default will increase the size of the output CSV files accordingly.\nThe keyword value pair profile_file=<filepath> specifies the location of the output file for profiling data. If the model uses no profiling, the output profile file is not produced. If the model uses profiling and profile_file is unspecified, the profiling data is written to a file named profile.csv in the current working directory.\nThe keyword value pair save_cmdstan_config=<boolean> specifies whether to save the configuration options used to run the program to a file named <output file>_config.json alongside the other output files. The default value is false, which means the configuration file is not saved. The contents of this file are similar to the comments in the Stan CSV file, but should be more portable across versions and easier to parse.", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#initialize-model-parameters-argument", + "href": "cmdstan-guide/command_line_options.html#initialize-model-parameters-argument", + "title": "Command-Line Interface Overview", + "section": "", + "text": "Initialization is only applied to parameters defined in the parameters block. By default, all parameters are initialized to random draws from a uniform distribution over the range \\([-2, 2]\\). These values are on the unconstrained scale, so must be inverse transformed back to satisfy the constraints declared for parameters. Because zero is chosen to be a reasonable default initial value for most parameters, the interval around zero provides a fairly diffuse starting point. For instance, unconstrained variables are initialized randomly in \\((-2, 2)\\), variables constrained to be positive are initialized roughly in \\((0.14, 7.4)\\), variables constrained to fall between 0 and 1 are initialized with values roughly in \\((0.12, 0.88)\\).\nThe initialization argument is specified as keyword-value pair with keyword init. The value can be one of the following:\n\npositive real number \\(x\\). All parameters will be initialized to random draws from a uniform distribution over the range \\([-x, x]\\).\n\\(0\\) - All parameters will be initialized to zero values on the unconstrained scale. The transforms are arranged in such a way that zero initialization provides reasonable variable initializations: \\(0\\) for unconstrained parameters; \\(1\\) for parameters constrained to be positive; \\(0.5\\) for variables to constrained to lie between \\(0\\) and \\(1\\); a symmetric (uniform) vector for simplexes; unit matrices for both correlation and covariance matrices; and so on.\nfilepath - A data file in JSON or Rdump format containing initial parameters values for some or all of the model parameters. User specified initial values must satisfy the constraints declared in the model (i.e., they are on the constrained scale). Parameters which aren’t explicitly initialized will be initialized randomly over the range \\([-2, 2]\\).", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#random-number-generator-arguments", + "href": "cmdstan-guide/command_line_options.html#random-number-generator-arguments", + "title": "Command-Line Interface Overview", + "section": "", + "text": "The random-number generator’s behavior is determined by the unsigned seed (positive integer) it is started with. If a seed is not specified, or a seed of 0 or less is specified, the system time is used to generate a seed. The seed is recorded and included with Stan’s output regardless of whether it was specified or generated randomly from the system time.\nThe syntax for the random seed argument is:\nrandom seed=<int>\nThe keyword random must be followed directly by the keyword-value pair seed=<int>.", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#chain-identifier-argument-id", + "href": "cmdstan-guide/command_line_options.html#chain-identifier-argument-id", + "title": "Command-Line Interface Overview", + "section": "", + "text": "The chain identifier argument is used in conjunction with the random seed argument when running multiple Markov chains for sampling. The chain identifier is used to advance the random number generator a very large number of random variates so that two chains with the same seed and different identifiers draw from non-overlapping subsequences of the random-number sequence determined by the seed. Together, the seed and chain identifier determine the behavior of the random number generator.\nThe syntax for the random seed argument is:\nid=<int>\nThe default value is 1.\nWhen running a set of chains from the command line with a specified seed, this argument should be set to the chain index. E.g., when running 4 chains, the value should be 1,..,4, successively. When running multiple chains from a single command, Stan’s interfaces manage the chain identifier arguments automatically.\nFor complete reproducibility, every aspect of the environment needs to be locked down from the OS and version to the C++ compiler and version to the version of Stan and all dependent libraries. See the Stan Reference Manual Reproducibility chapter for further details.", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#command-line-help", + "href": "cmdstan-guide/command_line_options.html#command-line-help", + "title": "Command-Line Interface Overview", + "section": "", + "text": "CmdStan provides a help and help-all mechanism that displays either the available top-level or keyword-specific key-value argument pairs. To display top-level help, call the CmdStan executable with keyword help:\n./bernoulli help", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "cmdstan-guide/command_line_options.html#error-messages-and-return-codes", + "href": "cmdstan-guide/command_line_options.html#error-messages-and-return-codes", + "title": "Command-Line Interface Overview", + "section": "", + "text": "CmdStan executables and utility programs use streams standard output (stdout) and standard error (stderr) to report information and error messages, respectively. Some methods also generate warning messages when the algorithm detects potential problems with the inference. Depending on the method, these messages are sent to either standard out or standard error.\nAll program executables provide a return code between 0 and 255:\n\n0 - Program ran to termination as expected.\nvalue in range [1 : 125] - Method invoked could not run due to problems with model or data.\nvalue > 128 - Fatal error during execution, process terminated by signal. To determine the signal number, subtract 128 from the return value, e.g. return code 139 results from termination signal 11 (segmentation violation).\n\nA non-zero return code or outputs sent to stderr indicate problems with the inference. However, a return code of zero and absence of error messages doesn’t necessarily mean that the inference is valid, it is still necessary to validate the inferences using all available summary and diagnostic techniques.", + "crumbs": [ + "Running CmdStan", + "Command-Line Interface Overview" + ] + }, + { + "objectID": "index.html", + "href": "index.html", + "title": "Stan Documentation", + "section": "", + "text": "This is the official documentation for Stan.\n\nThe Stan User’s Guide (pdf) provides example models and programming techniques for coding statistical models in Stan.\nThe Stan Reference Manual (pdf) specifies the Stan programming language and inference algorithms.\nThe Stan Functions Reference (pdf) specifies the functions built into the Stan programming language.\n\nThere are also separate installation and getting started guides for CmdStan (pdf), the command-line interface to the Stan inference engine, and the R, Python, and Julia interfaces.\n\nOlder Versions\nThis documentation is for Stan 2.39. Older versions of each of the documents linked above can be found in the table below:\n\n\n\nVersion\nStan Reference Manual\nStan Users Guide\nCmdStan Guide\nStan Functions Reference\n\n\n\n\n2.39\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.38\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.37\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.36\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.35\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.34\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.33\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.32\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.31\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.30\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.29\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.28\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.27\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.26\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.25\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.24\nhtml pdf\nhtml pdf\nhtml pdf\nhtml pdf\n\n\n2.23\nhtml pdf\nhtml pdf\nhtml\nhtml pdf\n\n\n2.22\nhtml pdf\nhtml pdf\n\nhtml pdf\n\n\n2.21\nhtml pdf\nhtml pdf\n\nhtml pdf\n\n\n2.20\nhtml pdf\nhtml pdf\n\nhtml pdf\n\n\n2.19\nhtml pdf\nhtml pdf\n\nhtml pdf\n\n\n2.18\nhtml pdf\nhtml pdf\n\nhtml pdf\n\n\n\nPrior to version 2.18, all documentation was part of a single document called the Stan User’s Guide and Reference Manual. These versions are still available for download as PDFs:\n\n\n\nVersion\nStan User’s Guide and Reference Manual\n\n\n\n\n2.17\npdf\n\n\n2.16\npdf\n\n\n2.15\npdf\n\n\n2.14\npdf\n\n\n2.13\npdf\n\n\n2.12\npdf\n\n\n2.11\npdf\n\n\n2.10\npdf\n\n\n2.9\npdf\n\n\n2.8\npdf\n\n\n2.7\npdf\n\n\n2.6\npdf\n\n\n2.5\npdf\n\n\n2.4\npdf\n\n\n2.3\npdf\n\n\n2.2\npdf\n\n\n2.1\npdf\n\n\n2.0\npdf\n\n\n1.3\npdf\n\n\n1.2\npdf\n\n\n1.1\npdf\n\n\n1.0\npdf\n\n\n\n\nCopyright and trademark\n\nCopyright 2011–2025, Stan Development Team and their assignees.\nThe Stan name and logo are registered trademarks of NumFOCUS.\n\n\n\nLicensing\n\nText content: CC-BY ND 4.0 license\nComputer code: BSD 3-clause license\nLogo: Stan logo usage guidelines\n\n\n\n\n\n\n Back to top" + }, + { + "objectID": "cmdstan-guide/bib.html", + "href": "cmdstan-guide/bib.html", + "title": "References", + "section": "", + "text": "References\n\n\n\n\n Back to top" + }, + { + "objectID": "cmdstan-guide/compiling_stan_programs.html", + "href": "cmdstan-guide/compiling_stan_programs.html", + "title": "Compiling a Stan Program", + "section": "", + "text": "A Stan program must be in a file with extension .stan. The CmdStan makefile rules specify all necessary steps to translate files with suffix .stan to a CmdStan executable program. This is a two-stage process:\n\nfirst the Stan program is translated to C++ by the stanc compiler\nthen the C++ compiler compiles all C++ sources and links them together with the CmdStan interface program and the Stan and math libraries.\n\n\n\nTo compile Stan programs, you must invoke the Make program from the <cmdstan-home> directory. The Stan program can be in a different directory, but the directory path names cannot contain spaces - this limitation is imposed by Make.\n> cd <cmdstan_home>\nIn the call to the Make program, the target is name of the CmdStan executable corresponding to the Stan program file. On Mac and Linux, this is the name of the Stan program with the .stan omitted. On Windows, replace .stan with .exe, and make sure that the path is given with slashes and not backslashes. To build the Bernoulli example, on Mac and Linux:\n> make examples/bernoulli/bernoulli\nOn Windows, the command is the same with the addition of .exe at the end of the target (note the use of forward slashes):\n> make examples/bernoulli/bernoulli.exe\nThe generated C++ code (bernoulli.hpp), object file (bernoulli.o) and the compiled executable will be placed in the same directory as the Stan program.\nThe compiled executable consists of the Stan model and the CmdStan command line interface which provides inference algorithms to do MCMC sampling, optimization, and variational inference. The following sections provide examples of doing inference using each method on the example model and data file.\n\n\n\nWhen executing a Make target, all its dependencies are checked to see if they are up to date, and if they are not, they are rebuilt. If the you call Make with target bernoulli twice in a row, without any editing bernoulli.stan or otherwise changing the system, on the second invocation, Make will determine that the executable is already newer than the Stan source file and will not recompile the program:\n> make examples/bernoulli/bernoulli\nmake: `examples/bernoulli/bernoulli' is up to date.\nIf the file containing the Stan program is updated, the next call to make will rebuild the CmdStan executable.\n\n\n\nThe Stan probabilistic programming language is a programming language with a rich syntax, as such, it is often the case that a carefully written program contains errors.\nThe simplest class of errors are simple syntax errors such as forgetting the semi-colon statement termination marker at the end of a line, or typos such as a misspelled variable name. For example, if in the bernoulli.stan program, we introduce a typo on line \\(9\\) by writing thata instead of theta, the Make command fails with the following\n--- Translating Stan model to C++ code ---\nbin/stanc --o=bernoulli.hpp bernoulli.stan\n\nSemantic error in 'bernoulli.stan', line 9, column 2 to column 7:\n -------------------------------------------------\n 7: }\n 8: model {\n 9: thata ~ beta(1, 1); // uniform prior on interval 0, 1\n ^\n 10: y ~ bernoulli(theta);\n 11: }\n -------------------------------------------------\n\nIdentifier 'thata' not in scope.\n\nmake: *** [bernoulli.hpp] Error 1\nStan is a strongly-typed language; and the compiler will throw an error if statements or expressions violate the type rules. The following trivial program foo.stan contains an illegal assignment statement:\ndata {\n real x;\n}\ntransformed data {\n int y = x;\n}\nThe Make command fails with the following:\nSemantic error in 'foo.stan', line 5, column 2 to column 12:\n -------------------------------------------------\n 3: }\n 4: transformed data {\n 5: int y = x;\n ^\n 6: }\n -------------------------------------------------\n\nIll-typed arguments supplied to assignment operator =:\n lhs has type int and rhs has type real\nThe Stan Reference Manual provides a complete specification of the Stan programming language. The Stan User’s Guide also contains a full description of the errors and warnings stanc can emit.\n\n\n\nIf the stanc compiler successfully translates a Stan program to C++, the resulting C++ code should be valid C++ which can be compiled into an executable. The stanc compiler is also a program, and while it has been extensively tested, it may still contain errors such that the generated C++ code fails to compile.\nThe Make command prints the following message to the terminal at the point when it compiles and links the C++ file:\n--- Compiling, linking C++ code ---\nIf the program fails to compile for any reason, the C++ compiler and linker will most likely print a long series of error messages to the console.\nIf this happens, please report the error, together with the Stan program on either the Stan Forums or on the Stan compiler GitHub issues tracker.\n\n\n\nUsers can set flags for the C++ compiler and linker and compiler to optimize their executables. We advise users to only do this once they are sure their basic setup of Cmdstan without flags works.\nThe CXXFLAGS and LDFLAGS makefile variables can be used to set compiler and linker flags respectively. We recommend setting these in the make/local file.\nFor example:\nCXXFLAGS = -O2\nA recommend a set of CXXFLAGS and LDFLAGS flags can be turned on by setting STAN_CPP_OPTIMS=true in the make/local file. These are tested compiler and link-time optimizations that can speed up execution of certain models. We have observed speedups up to 15 percent, but this depends on the model, operating system and hardware used. The use of these flags does considerably slow down compilation, so they are not used by default.\n\n\nWhen assigning or reading from with vectors, row_vectors, matrices or arrays using indexing, Stan checks that a supplied index is valid (not out of range), which avoids segmentation faults and other difficult-to-debug runtime errors.\nFor some models these checks can represent a significant part of the models execution time. By setting the STAN_NO_RANGE_CHECKS=true makefile flag in the make/local file the range checks can be removed. Use this flag with caution (only once the indexing has been validated). In case of any unexpected behavior remove the flag for easier debugging.", + "crumbs": [ + "Getting Started", + "Compiling a Stan Program" + ] + }, + { + "objectID": "cmdstan-guide/compiling_stan_programs.html#invoking-the-make-utility", + "href": "cmdstan-guide/compiling_stan_programs.html#invoking-the-make-utility", + "title": "Compiling a Stan Program", + "section": "", + "text": "To compile Stan programs, you must invoke the Make program from the <cmdstan-home> directory. The Stan program can be in a different directory, but the directory path names cannot contain spaces - this limitation is imposed by Make.\n> cd <cmdstan_home>\nIn the call to the Make program, the target is name of the CmdStan executable corresponding to the Stan program file. On Mac and Linux, this is the name of the Stan program with the .stan omitted. On Windows, replace .stan with .exe, and make sure that the path is given with slashes and not backslashes. To build the Bernoulli example, on Mac and Linux:\n> make examples/bernoulli/bernoulli\nOn Windows, the command is the same with the addition of .exe at the end of the target (note the use of forward slashes):\n> make examples/bernoulli/bernoulli.exe\nThe generated C++ code (bernoulli.hpp), object file (bernoulli.o) and the compiled executable will be placed in the same directory as the Stan program.\nThe compiled executable consists of the Stan model and the CmdStan command line interface which provides inference algorithms to do MCMC sampling, optimization, and variational inference. The following sections provide examples of doing inference using each method on the example model and data file.", + "crumbs": [ + "Getting Started", + "Compiling a Stan Program" + ] + }, + { + "objectID": "cmdstan-guide/compiling_stan_programs.html#dependencies", + "href": "cmdstan-guide/compiling_stan_programs.html#dependencies", + "title": "Compiling a Stan Program", + "section": "", + "text": "When executing a Make target, all its dependencies are checked to see if they are up to date, and if they are not, they are rebuilt. If the you call Make with target bernoulli twice in a row, without any editing bernoulli.stan or otherwise changing the system, on the second invocation, Make will determine that the executable is already newer than the Stan source file and will not recompile the program:\n> make examples/bernoulli/bernoulli\nmake: `examples/bernoulli/bernoulli' is up to date.\nIf the file containing the Stan program is updated, the next call to make will rebuild the CmdStan executable.", + "crumbs": [ + "Getting Started", + "Compiling a Stan Program" + ] + }, + { + "objectID": "cmdstan-guide/compiling_stan_programs.html#compiler-errors", + "href": "cmdstan-guide/compiling_stan_programs.html#compiler-errors", + "title": "Compiling a Stan Program", + "section": "", + "text": "The Stan probabilistic programming language is a programming language with a rich syntax, as such, it is often the case that a carefully written program contains errors.\nThe simplest class of errors are simple syntax errors such as forgetting the semi-colon statement termination marker at the end of a line, or typos such as a misspelled variable name. For example, if in the bernoulli.stan program, we introduce a typo on line \\(9\\) by writing thata instead of theta, the Make command fails with the following\n--- Translating Stan model to C++ code ---\nbin/stanc --o=bernoulli.hpp bernoulli.stan\n\nSemantic error in 'bernoulli.stan', line 9, column 2 to column 7:\n -------------------------------------------------\n 7: }\n 8: model {\n 9: thata ~ beta(1, 1); // uniform prior on interval 0, 1\n ^\n 10: y ~ bernoulli(theta);\n 11: }\n -------------------------------------------------\n\nIdentifier 'thata' not in scope.\n\nmake: *** [bernoulli.hpp] Error 1\nStan is a strongly-typed language; and the compiler will throw an error if statements or expressions violate the type rules. The following trivial program foo.stan contains an illegal assignment statement:\ndata {\n real x;\n}\ntransformed data {\n int y = x;\n}\nThe Make command fails with the following:\nSemantic error in 'foo.stan', line 5, column 2 to column 12:\n -------------------------------------------------\n 3: }\n 4: transformed data {\n 5: int y = x;\n ^\n 6: }\n -------------------------------------------------\n\nIll-typed arguments supplied to assignment operator =:\n lhs has type int and rhs has type real\nThe Stan Reference Manual provides a complete specification of the Stan programming language. The Stan User’s Guide also contains a full description of the errors and warnings stanc can emit.", + "crumbs": [ + "Getting Started", + "Compiling a Stan Program" + ] + }, + { + "objectID": "cmdstan-guide/compiling_stan_programs.html#troubleshooting-c-compiler-or-linker-errors", + "href": "cmdstan-guide/compiling_stan_programs.html#troubleshooting-c-compiler-or-linker-errors", + "title": "Compiling a Stan Program", + "section": "", + "text": "If the stanc compiler successfully translates a Stan program to C++, the resulting C++ code should be valid C++ which can be compiled into an executable. The stanc compiler is also a program, and while it has been extensively tested, it may still contain errors such that the generated C++ code fails to compile.\nThe Make command prints the following message to the terminal at the point when it compiles and links the C++ file:\n--- Compiling, linking C++ code ---\nIf the program fails to compile for any reason, the C++ compiler and linker will most likely print a long series of error messages to the console.\nIf this happens, please report the error, together with the Stan program on either the Stan Forums or on the Stan compiler GitHub issues tracker.", + "crumbs": [ + "Getting Started", + "Compiling a Stan Program" + ] + }, + { + "objectID": "cmdstan-guide/compiling_stan_programs.html#c-compilation-and-linking-flags", + "href": "cmdstan-guide/compiling_stan_programs.html#c-compilation-and-linking-flags", + "title": "Compiling a Stan Program", + "section": "", + "text": "Users can set flags for the C++ compiler and linker and compiler to optimize their executables. We advise users to only do this once they are sure their basic setup of Cmdstan without flags works.\nThe CXXFLAGS and LDFLAGS makefile variables can be used to set compiler and linker flags respectively. We recommend setting these in the make/local file.\nFor example:\nCXXFLAGS = -O2\nA recommend a set of CXXFLAGS and LDFLAGS flags can be turned on by setting STAN_CPP_OPTIMS=true in the make/local file. These are tested compiler and link-time optimizations that can speed up execution of certain models. We have observed speedups up to 15 percent, but this depends on the model, operating system and hardware used. The use of these flags does considerably slow down compilation, so they are not used by default.\n\n\nWhen assigning or reading from with vectors, row_vectors, matrices or arrays using indexing, Stan checks that a supplied index is valid (not out of range), which avoids segmentation faults and other difficult-to-debug runtime errors.\nFor some models these checks can represent a significant part of the models execution time. By setting the STAN_NO_RANGE_CHECKS=true makefile flag in the make/local file the range checks can be removed. Use this flag with caution (only once the indexing has been validated). In case of any unexpected behavior remove the flag for easier debugging.", + "crumbs": [ + "Getting Started", + "Compiling a Stan Program" + ] + }, + { + "objectID": "cmdstan-guide/diagnose_utility.html", + "href": "cmdstan-guide/diagnose_utility.html", + "title": "diagnose: Diagnosing Biased Hamiltonian Monte Carlo Inferences", + "section": "", + "text": "CmdStan is distributed with a utility that is able to read in and analyze the output of one or more Markov chains to check for the following potential problems:\n\nDivergent transitions\nTransitions that hit the maximum treedepth\nLow E-BFMI values\nLow effective sample sizes\nHigh \\(\\hat{R}\\) values\n\nThe meanings of several of these problems are discussed in https://arxiv.org/abs/1701.02434.\n\n\nThe CmdStan makefile task build compiles the diagnose utility into the bin directory. It can be compiled directly using the makefile as follows:\n> cd <cmdstan-home>\n> make bin/diagnose\n\n\n\nThe diagnose command is executed on one or more output files, which are provided as command-line arguments separated by spaces. If there are no apparent problems with the output files passed to diagnose, it outputs a message that all transitions are within treedepth limit and that no divergent transitions were found. It problems are detected, it outputs a summary of the problem along with possible ways to mitigate it.\nTo fully exercise the diagnose command, we run 4 chains to sample from the Neal’s funnel distribution, discussed in the Stan User’s Guide reparameterization section. This program defines a distribution which exemplifies the difficulties of sampling from some hierarchical models:\nparameters {\n real y;\n vector[9] x;\n}\nmodel {\n y ~ normal(0, 3);\n x ~ normal(0, exp(y / 2));\n}\nThis program is available on GitHub: https://github.com/stan-dev/example-models/blob/master/misc/funnel/funnel.stan\nStan has trouble sampling from the region where y is small and thus x is constrained to be near 0. This is due to the fact that the density’s scale changes with y, so that a step size that works well when y is large is inefficient when y is small and vice-versa.\nRunning 4 chains produces output files output_1.csv, …, output_4.csv. We run diagnose command on this fileset:\n> bin/diagnose output_*.csv\nThe output is printed to the terminal window:\nChecking sampler transitions treedepth.\n18 of 4000 (0.45%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps.\nTrajectories that are prematurely terminated due to this limit will result in slow exploration.\nFor optimal performance, increase this limit.\n\nChecking sampler transitions for divergences.\n11 of 4000 (0.28%) transitions ended with a divergence.\nThese divergent transitions indicate that HMC is not fully able to explore the posterior distribution.\nTry increasing adapt delta closer to 1.\nIf this doesn't remove all divergences, try to reparameterize the model.\n\nChecking E-BFMI - sampler transitions HMC potential energy.\nThe E-BFMI, 0.06, is below the nominal threshold of 0.30 which suggests that HMC may have trouble exploring the target distribution.\nIf possible, try to reparameterize the model.\n\nRank-normalized split effective sample size satisfactory for all parameters.\n\nThe following parameters had rank-normalized split R-hat greater than 1.01:\n y, x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9]\nSuch high values indicate incomplete mixing and biased estimation.\nYou should consider regularizing your model with additional prior information or a more effective parameterization.\n\nProcessing complete.\nIn this example, changing the model to use a non-centered parameterization is the only way to correct these problems. In this second model, the parameters x_raw and y_raw are sampled as independent standard normals, which is easy for Stan.\nparameters {\n real y_raw;\n vector[9] x_raw;\n}\ntransformed parameters {\n real y;\n vector[9] x;\n\n y = 3.0 * y_raw;\n x = exp(y / 2) * x_raw;\n}\nmodel {\n y_raw ~ std_normal(); // implies y ~ normal(0, 3)\n x_raw ~ std_normal(); // implies x ~ normal(0, exp(y / 2))\n}\nThis program is available on GitHub: https://github.com/stan-dev/example-models/blob/master/misc/funnel/funnel_reparam.stan\nWe compile the program and run 4 chains, as before. Now the diagnose command doesn’t detect any problems:\nChecking sampler transitions treedepth.\nTreedepth satisfactory for all transitions.\n\nChecking sampler transitions for divergences.\nNo divergent transitions found.\n\nChecking E-BFMI - sampler transitions HMC potential energy.\nE-BFMI satisfactory.\n\nRank-normalized split effective sample size satisfactory for all parameters.\n\nRank-normalized split R-hat values satisfactory for all parameters.\n\nProcessing complete, no problems detected.\n\n\n\n\n\nStan uses Hamiltonian Monte Carlo (HMC) to explore the target distribution — the posterior defined by a Stan program + data — by simulating the evolution of a Hamiltonian system. In order to approximate the exact solution of the Hamiltonian dynamics we need to choose a step size governing how far we move each time we evolve the system forward. That is, the step size controls the resolution of the sampler.\nUnfortunately, for particularly hard problems there are features of the target distribution that are too small for this resolution. Consequently the sampler misses those features and returns biased estimates. Fortunately, this mismatch of scales manifests as divergences which provide a practical diagnostic. If there are any divergences after warmup, then the sample based estimates may be biased.\nIf the divergent transitions cannot be eliminated by increasing the adapt_delta parameter, we have to find a different way to write the model that is logically equivalent but simplifies the geometry of the posterior distribution. This problem occurs frequently with hierarchical models and one of the simplest examples is Neal’s Funnel, which is discussed in the reparameterization section of the Stan User’s Guide.\n\n\n\nWarnings about hitting the maximum treedepth are not as serious as warnings about divergent transitions. While divergent transitions are a validity concern, hitting the maximum treedepth is an efficiency concern. Configuring the No-U-Turn-Sampler (the variant of HMC used by Stan) requires putting a cap on the depth of the trees that it evaluates during each iteration (for details on this see the Hamiltonian Monte Carlo Sampling chapter in the Stan Reference Manual). When the maximum allowed tree depth is reached it indicates that NUTS is terminating prematurely to avoid excessively long execution time.\nThis is controlled through the max_depth argument. If the number of transitions which exceed maximum treedepth is low, increasing max_depth may correct this problem.\n\n\n\nThe sampler csv output column energy__ is used to diagnose the accuracy of any Hamiltonian Monte Carlo sampler. If the standard deviation of energy is much larger than \\(\\sqrt{D / 2}\\), where \\(D\\) is the number of unconstrained parameters, then the sampler is unlikely to be able to explore the posterior adequately. This is usually due to heavy-tailed posteriors and can sometimes be remedied by reparameterizing the model.\nThe warning that some number of chains had an estimated Bayesian Fraction of Missing Information (BFMI) that was too low implies that the adaptation phase of the Markov Chains did not turn out well and those chains likely did not explore the posterior distribution efficiently. For more details on this diagnostic, see https://arxiv.org/abs/1604.00695. Should this occur, you can either run the sampler for more iterations, or consider reparameterizing your model.\n\n\n\nRoughly speaking, the effective sample size (ESS) of a quantity of interest captures how many independent draws contain the same amount of information as the dependent sample obtained by the MCMC algorithm. Clearly, the higher the ESS the better. Stan uses \\(\\hat{R}\\) adjustment to use the between-chain information in computing the ESS. For example, in case of multimodal distributions with well-separated modes, this leads to an ESS estimate that is close to the number of distinct modes that are found.\nBulk-ESS refers to the effective sample size based on the rank normalized draws. This does not directly compute the ESS relevant for computing the mean of the parameter, but instead computes a quantity that is well defined even if the chains do not have finite mean or variance. Overall bulk-ESS estimates the sampling efficiency for the location of the distribution (e.g. mean and median).\nOften quite smaller ESS would be sufficient for the desired estimation accuracy, but the estimation of ESS and convergence diagnostics themselves require higher ESS. We recommend requiring that the bulk-ESS is greater than 100 times the number of chains. For example, when running four chains, this corresponds to having a rank-normalized effective sample size of at least 400.\n\n\n\n\\(\\hat{R}\\) (R-hat) convergence diagnostic compares the between- and within-chain estimates for model parameters and other univariate quantities of interest. If chains have not mixed well (ie, the between- and within-chain estimates don’t agree), \\(\\hat{R}\\) is larger than 1. We recommend running at least four chains by default and only using the sample if \\(\\hat{R}\\) is less than 1.01. Stan reports \\(\\hat{R}\\) which is the maximum of rank normalized split-R-hat and rank normalized folded-split-R-hat, which works for thick tailed distributions and is sensitive also to differences in scale. For more details on this diagnostic, see https://arxiv.org/abs/1903.08008.\nThere is further discussion in https://arxiv.org/abs/1701.02434; however the correct resolution is necessarily model specific, hence all suggestions general guidelines only.", + "crumbs": [ + "Tools and Utilities", + "`diagnose`: Diagnosing Biased Hamiltonian Monte Carlo Inferences" + ] + }, + { + "objectID": "cmdstan-guide/diagnose_utility.html#building-the-diagnose-command", + "href": "cmdstan-guide/diagnose_utility.html#building-the-diagnose-command", + "title": "diagnose: Diagnosing Biased Hamiltonian Monte Carlo Inferences", + "section": "", + "text": "The CmdStan makefile task build compiles the diagnose utility into the bin directory. It can be compiled directly using the makefile as follows:\n> cd <cmdstan-home>\n> make bin/diagnose", + "crumbs": [ + "Tools and Utilities", + "`diagnose`: Diagnosing Biased Hamiltonian Monte Carlo Inferences" + ] + }, + { + "objectID": "cmdstan-guide/diagnose_utility.html#running-the-diagnose-command", + "href": "cmdstan-guide/diagnose_utility.html#running-the-diagnose-command", + "title": "diagnose: Diagnosing Biased Hamiltonian Monte Carlo Inferences", + "section": "", + "text": "The diagnose command is executed on one or more output files, which are provided as command-line arguments separated by spaces. If there are no apparent problems with the output files passed to diagnose, it outputs a message that all transitions are within treedepth limit and that no divergent transitions were found. It problems are detected, it outputs a summary of the problem along with possible ways to mitigate it.\nTo fully exercise the diagnose command, we run 4 chains to sample from the Neal’s funnel distribution, discussed in the Stan User’s Guide reparameterization section. This program defines a distribution which exemplifies the difficulties of sampling from some hierarchical models:\nparameters {\n real y;\n vector[9] x;\n}\nmodel {\n y ~ normal(0, 3);\n x ~ normal(0, exp(y / 2));\n}\nThis program is available on GitHub: https://github.com/stan-dev/example-models/blob/master/misc/funnel/funnel.stan\nStan has trouble sampling from the region where y is small and thus x is constrained to be near 0. This is due to the fact that the density’s scale changes with y, so that a step size that works well when y is large is inefficient when y is small and vice-versa.\nRunning 4 chains produces output files output_1.csv, …, output_4.csv. We run diagnose command on this fileset:\n> bin/diagnose output_*.csv\nThe output is printed to the terminal window:\nChecking sampler transitions treedepth.\n18 of 4000 (0.45%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps.\nTrajectories that are prematurely terminated due to this limit will result in slow exploration.\nFor optimal performance, increase this limit.\n\nChecking sampler transitions for divergences.\n11 of 4000 (0.28%) transitions ended with a divergence.\nThese divergent transitions indicate that HMC is not fully able to explore the posterior distribution.\nTry increasing adapt delta closer to 1.\nIf this doesn't remove all divergences, try to reparameterize the model.\n\nChecking E-BFMI - sampler transitions HMC potential energy.\nThe E-BFMI, 0.06, is below the nominal threshold of 0.30 which suggests that HMC may have trouble exploring the target distribution.\nIf possible, try to reparameterize the model.\n\nRank-normalized split effective sample size satisfactory for all parameters.\n\nThe following parameters had rank-normalized split R-hat greater than 1.01:\n y, x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9]\nSuch high values indicate incomplete mixing and biased estimation.\nYou should consider regularizing your model with additional prior information or a more effective parameterization.\n\nProcessing complete.\nIn this example, changing the model to use a non-centered parameterization is the only way to correct these problems. In this second model, the parameters x_raw and y_raw are sampled as independent standard normals, which is easy for Stan.\nparameters {\n real y_raw;\n vector[9] x_raw;\n}\ntransformed parameters {\n real y;\n vector[9] x;\n\n y = 3.0 * y_raw;\n x = exp(y / 2) * x_raw;\n}\nmodel {\n y_raw ~ std_normal(); // implies y ~ normal(0, 3)\n x_raw ~ std_normal(); // implies x ~ normal(0, exp(y / 2))\n}\nThis program is available on GitHub: https://github.com/stan-dev/example-models/blob/master/misc/funnel/funnel_reparam.stan\nWe compile the program and run 4 chains, as before. Now the diagnose command doesn’t detect any problems:\nChecking sampler transitions treedepth.\nTreedepth satisfactory for all transitions.\n\nChecking sampler transitions for divergences.\nNo divergent transitions found.\n\nChecking E-BFMI - sampler transitions HMC potential energy.\nE-BFMI satisfactory.\n\nRank-normalized split effective sample size satisfactory for all parameters.\n\nRank-normalized split R-hat values satisfactory for all parameters.\n\nProcessing complete, no problems detected.", + "crumbs": [ + "Tools and Utilities", + "`diagnose`: Diagnosing Biased Hamiltonian Monte Carlo Inferences" + ] + }, + { + "objectID": "cmdstan-guide/diagnose_utility.html#diagnose-warnings-and-recommendations", + "href": "cmdstan-guide/diagnose_utility.html#diagnose-warnings-and-recommendations", + "title": "diagnose: Diagnosing Biased Hamiltonian Monte Carlo Inferences", + "section": "", + "text": "Stan uses Hamiltonian Monte Carlo (HMC) to explore the target distribution — the posterior defined by a Stan program + data — by simulating the evolution of a Hamiltonian system. In order to approximate the exact solution of the Hamiltonian dynamics we need to choose a step size governing how far we move each time we evolve the system forward. That is, the step size controls the resolution of the sampler.\nUnfortunately, for particularly hard problems there are features of the target distribution that are too small for this resolution. Consequently the sampler misses those features and returns biased estimates. Fortunately, this mismatch of scales manifests as divergences which provide a practical diagnostic. If there are any divergences after warmup, then the sample based estimates may be biased.\nIf the divergent transitions cannot be eliminated by increasing the adapt_delta parameter, we have to find a different way to write the model that is logically equivalent but simplifies the geometry of the posterior distribution. This problem occurs frequently with hierarchical models and one of the simplest examples is Neal’s Funnel, which is discussed in the reparameterization section of the Stan User’s Guide.\n\n\n\nWarnings about hitting the maximum treedepth are not as serious as warnings about divergent transitions. While divergent transitions are a validity concern, hitting the maximum treedepth is an efficiency concern. Configuring the No-U-Turn-Sampler (the variant of HMC used by Stan) requires putting a cap on the depth of the trees that it evaluates during each iteration (for details on this see the Hamiltonian Monte Carlo Sampling chapter in the Stan Reference Manual). When the maximum allowed tree depth is reached it indicates that NUTS is terminating prematurely to avoid excessively long execution time.\nThis is controlled through the max_depth argument. If the number of transitions which exceed maximum treedepth is low, increasing max_depth may correct this problem.\n\n\n\nThe sampler csv output column energy__ is used to diagnose the accuracy of any Hamiltonian Monte Carlo sampler. If the standard deviation of energy is much larger than \\(\\sqrt{D / 2}\\), where \\(D\\) is the number of unconstrained parameters, then the sampler is unlikely to be able to explore the posterior adequately. This is usually due to heavy-tailed posteriors and can sometimes be remedied by reparameterizing the model.\nThe warning that some number of chains had an estimated Bayesian Fraction of Missing Information (BFMI) that was too low implies that the adaptation phase of the Markov Chains did not turn out well and those chains likely did not explore the posterior distribution efficiently. For more details on this diagnostic, see https://arxiv.org/abs/1604.00695. Should this occur, you can either run the sampler for more iterations, or consider reparameterizing your model.\n\n\n\nRoughly speaking, the effective sample size (ESS) of a quantity of interest captures how many independent draws contain the same amount of information as the dependent sample obtained by the MCMC algorithm. Clearly, the higher the ESS the better. Stan uses \\(\\hat{R}\\) adjustment to use the between-chain information in computing the ESS. For example, in case of multimodal distributions with well-separated modes, this leads to an ESS estimate that is close to the number of distinct modes that are found.\nBulk-ESS refers to the effective sample size based on the rank normalized draws. This does not directly compute the ESS relevant for computing the mean of the parameter, but instead computes a quantity that is well defined even if the chains do not have finite mean or variance. Overall bulk-ESS estimates the sampling efficiency for the location of the distribution (e.g. mean and median).\nOften quite smaller ESS would be sufficient for the desired estimation accuracy, but the estimation of ESS and convergence diagnostics themselves require higher ESS. We recommend requiring that the bulk-ESS is greater than 100 times the number of chains. For example, when running four chains, this corresponds to having a rank-normalized effective sample size of at least 400.\n\n\n\n\\(\\hat{R}\\) (R-hat) convergence diagnostic compares the between- and within-chain estimates for model parameters and other univariate quantities of interest. If chains have not mixed well (ie, the between- and within-chain estimates don’t agree), \\(\\hat{R}\\) is larger than 1. We recommend running at least four chains by default and only using the sample if \\(\\hat{R}\\) is less than 1.01. Stan reports \\(\\hat{R}\\) which is the maximum of rank normalized split-R-hat and rank normalized folded-split-R-hat, which works for thick tailed distributions and is sensitive also to differences in scale. For more details on this diagnostic, see https://arxiv.org/abs/1903.08008.\nThere is further discussion in https://arxiv.org/abs/1701.02434; however the correct resolution is necessarily model specific, hence all suggestions general guidelines only.", + "crumbs": [ + "Tools and Utilities", + "`diagnose`: Diagnosing Biased Hamiltonian Monte Carlo Inferences" + ] + }, + { + "objectID": "cmdstan-guide/external_code.html", + "href": "cmdstan-guide/external_code.html", + "title": "Using external C++ code", + "section": "", + "text": "The --allow-undefined flag can be passed to the call to stanc, which will allow undefined functions in the Stan language to be parsed without an error. We can then include a definition of the function in a C++ header file.\nThis requires specifying two makefile variables:\n\nSTANCFLAGS=--allow-undefined\nUSER_HEADER=<header_file.hpp>, where <header_file.hpp> is the name of a header file that defines a function with the same name and a compatible signature. This function can appear in the global namespace or in the model namespace, which is defined as the name of the model (either the file name, or the --name argument to stanc) followed by _namespace.\n\nThis is an advanced feature which is only recommended to users familiar with the internals of Stan’s Math library. Most existing C++ code will need to be modified to work with Stan, to varying degrees.\nAs an example, consider the following variant of the Bernoulli example\nfunctions {\n real make_odds(data real theta);\n}\ndata {\n int<lower=0> N;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n real<lower=0, upper=1> theta;\n}\nmodel {\n theta ~ beta(1, 1); // uniform prior on interval 0, 1\n y ~ bernoulli(theta);\n}\ngenerated quantities {\n real odds;\n odds = make_odds(theta);\n}\nHere the make_odds function is declared but not defined, which would ordinarily result in a parser error. However, if you put STANCFLAGS = --allow-undefined into the make/local file or into the stanc call, then the stanc compiler will translate this program to C++, but the generated C++ code will not compile unless you write a file such as examples/bernoulli/make_odds.hpp with the following lines\n#include <ostream>\n\ndouble make_odds(const double& theta, std::ostream *pstream__) {\n return theta / (1 - theta);\n}\nThe signature for this function needs to fulfill all the usages in the C++ class emitted by stanc. The pstream__ argument is mandatory in the signature but need not be used if your function does not print any output. Because make_odds was declared with a data argument and only used in generated quantites, a signature which accepts and returns double is acceptable. Functions which will have parameters passed as input in the transformed parameters or model blocks will require the ability to accept Stan’s autodiff types. If you wish to autodiff through this function, the simplest option is to make it a template, like\ntemplate <typename T>\nT make_odds(const T &theta, std::ostream *pstream__)\n{\n return theta / (1 - theta);\n}\nGiven the above, the following make invocation should work\n> make STANCFLAGS=--allow-undefined USER_HEADER=examples/bernoulli/make_odds.hpp examples/bernoulli/bernoulli # on Windows add .exe\nAlternatively, you could put STANCFLAGS and USER_HEADER into the make/local file instead of specifying them on the command-line.\nIf the function were more complicated and involved functions in the Stan Math Library, then you would need to add #include <stan/model/model_header.hpp> and prefix the function calls with stan::math::.\n\n\nExternal C++ functions are currently the only way to encode a function with a known analytic gradient outside the Stan Math Library. This is done very similarly to how a function would be added to the Math library with a reverse-mode specialization. The following code is adapted from the Stan Math documentation.\nSuppose you have the following (nonsensical) model which relies on a function called my_dot_self. We will implement this as a copy of the built-in dot_self function.\nfunctions {\n // both overloads end up using the same C++ template\n real my_dot_self(vector theta);\n real my_dot_self(row_vector theta);\n}\ndata {\n int<lower=0> N;\n vector[N] input_data;\n}\ntransformed data {\n // no autodiff for data - will call using doubles\n real ds = my_dot_self(input_data);\n}\nparameters {\n row_vector[N] thetas;\n}\nmodel {\n thetas ~ normal(0,1);\n // autodiff - will call using stan::math::var types\n input_data ~ normal(thetas, my_dot_self(thetas));\n}\nIf you wanted to autodiff through this function, the following header would suffice1:\n#include <stan/model/model_header.hpp>\n#include <ostream>\n\ntemplate <typename EigVec, stan::require_eigen_vector_t<EigVec> * = nullptr>\ninline stan::value_type_t<EigVec> my_dot_self(const EigVec &x, std::ostream *pstream__)\n{\n const auto &x_ref = stan::math::to_ref(x);\n stan::value_type_t<EigVec> sum_x = 0.0;\n for (int i = 0; i < x.size(); ++i)\n {\n sum_x += x_ref.coeff(i) * x_ref.coeff(i);\n }\n return sum_x;\n}\nHowever, we know the derivative of this function directly. To leverage this, we could use a more complicated form which has two function templates that differentiate themselves based on whether or not derivatives are required:\n#include <stan/model/model_header.hpp>\n#include <ostream>\n\ntemplate <typename EigVec, stan::require_eigen_vector_t<EigVec> * = nullptr,\n stan::require_not_st_var<EigVec> * = nullptr>\ninline double my_dot_self(const EigVec &x, std::ostream *pstream__)\n{\n auto x_ref = stan::math::to_ref(x);\n double sum = 0.0;\n for (int i = 0; i < x.size(); ++i)\n {\n sum += x_ref.coeff(i) * x_ref.coeff(i);\n }\n return sum;\n}\n\ntemplate <typename EigVec, stan::require_eigen_vt<stan::is_var, EigVec> * = nullptr>\ninline stan::math::var my_dot_self(const EigVec &v, std::ostream *pstream__)\n{\n // (1) put v into our memory arena\n stan::arena_t<EigVec> arena_v(v);\n // (2) calculate forward pass using\n // (3) the .val() method for matrices of var types\n stan::math::var res = my_dot_self(arena_v.val(), pstream__);\n // (4) Place a callback for the reverse pass on the callback stack.\n stan::math::reverse_pass_callback(\n [res, arena_v]() mutable\n { arena_v.adj() += 2.0 * res.adj() * arena_v.val(); });\n return res;\n}\nFor more details about how to write C++ code using the Stan Math Library, see the Math library documentation at https://mc-stan.org/math/ or the paper at https://arxiv.org/abs/1509.07164.\n\n\n\nSome functions have special meanings in Stan and place additional requirements on their signatures if used in external C++.\n\nRNGs must end with _rng. They will be passed a “base RNG object” as the second to last argument, before the pointer to the ostream. We recommend making this a template, since it may change. This is currently a stan::rng_t object (a type alias to boost::rng::mixmax).\nFunctions which edit the target directly must end with _lp and will be passed a reference to lp__ and a reference to a stan::math::accumulator object as the final parameters before the ostream pointer. They are also expected to have a boolean template parameter propto__ which controls whether or not constant terms can be dropped.\nProbability distributions must end with _lpdf or _lpmf and will be passed a boolean template parameter propto__ which controls whether or not constant terms can be dropped.", + "crumbs": [ + "Appendices", + "Using external C++ code" + ] + }, + { + "objectID": "cmdstan-guide/external_code.html#derivative-specializations", + "href": "cmdstan-guide/external_code.html#derivative-specializations", + "title": "Using external C++ code", + "section": "", + "text": "External C++ functions are currently the only way to encode a function with a known analytic gradient outside the Stan Math Library. This is done very similarly to how a function would be added to the Math library with a reverse-mode specialization. The following code is adapted from the Stan Math documentation.\nSuppose you have the following (nonsensical) model which relies on a function called my_dot_self. We will implement this as a copy of the built-in dot_self function.\nfunctions {\n // both overloads end up using the same C++ template\n real my_dot_self(vector theta);\n real my_dot_self(row_vector theta);\n}\ndata {\n int<lower=0> N;\n vector[N] input_data;\n}\ntransformed data {\n // no autodiff for data - will call using doubles\n real ds = my_dot_self(input_data);\n}\nparameters {\n row_vector[N] thetas;\n}\nmodel {\n thetas ~ normal(0,1);\n // autodiff - will call using stan::math::var types\n input_data ~ normal(thetas, my_dot_self(thetas));\n}\nIf you wanted to autodiff through this function, the following header would suffice1:\n#include <stan/model/model_header.hpp>\n#include <ostream>\n\ntemplate <typename EigVec, stan::require_eigen_vector_t<EigVec> * = nullptr>\ninline stan::value_type_t<EigVec> my_dot_self(const EigVec &x, std::ostream *pstream__)\n{\n const auto &x_ref = stan::math::to_ref(x);\n stan::value_type_t<EigVec> sum_x = 0.0;\n for (int i = 0; i < x.size(); ++i)\n {\n sum_x += x_ref.coeff(i) * x_ref.coeff(i);\n }\n return sum_x;\n}\nHowever, we know the derivative of this function directly. To leverage this, we could use a more complicated form which has two function templates that differentiate themselves based on whether or not derivatives are required:\n#include <stan/model/model_header.hpp>\n#include <ostream>\n\ntemplate <typename EigVec, stan::require_eigen_vector_t<EigVec> * = nullptr,\n stan::require_not_st_var<EigVec> * = nullptr>\ninline double my_dot_self(const EigVec &x, std::ostream *pstream__)\n{\n auto x_ref = stan::math::to_ref(x);\n double sum = 0.0;\n for (int i = 0; i < x.size(); ++i)\n {\n sum += x_ref.coeff(i) * x_ref.coeff(i);\n }\n return sum;\n}\n\ntemplate <typename EigVec, stan::require_eigen_vt<stan::is_var, EigVec> * = nullptr>\ninline stan::math::var my_dot_self(const EigVec &v, std::ostream *pstream__)\n{\n // (1) put v into our memory arena\n stan::arena_t<EigVec> arena_v(v);\n // (2) calculate forward pass using\n // (3) the .val() method for matrices of var types\n stan::math::var res = my_dot_self(arena_v.val(), pstream__);\n // (4) Place a callback for the reverse pass on the callback stack.\n stan::math::reverse_pass_callback(\n [res, arena_v]() mutable\n { arena_v.adj() += 2.0 * res.adj() * arena_v.val(); });\n return res;\n}\nFor more details about how to write C++ code using the Stan Math Library, see the Math library documentation at https://mc-stan.org/math/ or the paper at https://arxiv.org/abs/1509.07164.", + "crumbs": [ + "Appendices", + "Using external C++ code" + ] + }, + { + "objectID": "cmdstan-guide/external_code.html#special-functions-rngs-distributions-editing-target", + "href": "cmdstan-guide/external_code.html#special-functions-rngs-distributions-editing-target", + "title": "Using external C++ code", + "section": "", + "text": "Some functions have special meanings in Stan and place additional requirements on their signatures if used in external C++.\n\nRNGs must end with _rng. They will be passed a “base RNG object” as the second to last argument, before the pointer to the ostream. We recommend making this a template, since it may change. This is currently a stan::rng_t object (a type alias to boost::rng::mixmax).\nFunctions which edit the target directly must end with _lp and will be passed a reference to lp__ and a reference to a stan::math::accumulator object as the final parameters before the ostream pointer. They are also expected to have a boolean template parameter propto__ which controls whether or not constant terms can be dropped.\nProbability distributions must end with _lpdf or _lpmf and will be passed a boolean template parameter propto__ which controls whether or not constant terms can be dropped.", + "crumbs": [ + "Appendices", + "Using external C++ code" + ] + }, + { + "objectID": "cmdstan-guide/external_code.html#footnotes", + "href": "cmdstan-guide/external_code.html#footnotes", + "title": "Using external C++ code", + "section": "Footnotes", + "text": "Footnotes\n\n\nDetails of programming in the Stan Math style are omitted from this section, it is presented only as an example↩︎", + "crumbs": [ + "Appendices", + "Using external C++ code" + ] + }, + { + "objectID": "cmdstan-guide/index.html", + "href": "cmdstan-guide/index.html", + "title": "CmdStan User’s Guide", + "section": "", + "text": "This document is a user’s guide for CmdStan, the command-line interface to the Stan statistical modeling language. CmdStan provides the programs and tools to compile Stan programs into C++ executables that can be run directly from the command line, together with a few utilities to check and summarize the resulting outputs.\nIn CmdStan, statistical models written in the Stan probabilistic programming language are translated into a C++ program which is then compiled together with the CmdStan routines that provide the logic needed to manage all user inputs and program outputs and the Stan inference algorithms and math library. The resulting command line executable program can be used to\n\ndo inference on data, producing an exact or approximate estimate of the posterior;\ngenerate new quantities of interest from an existing estimate;\ngenerate data from the model according to a given set of parameters.\n\nThe packages CmdStanR and CmdStanPy provide interfaces to CmdStan from R and Python, respectively, similarly, JuliaStan also interfaces with CmdStan.\nDownload the pdf version of this manual.\n\nBenefits of CmdStan\n\nWith every new Stan release, there is a corresponding CmdStan release, therefore CmdStan provides access to the latest version of Stan, and can be used to run the development version of Stan as well.\nOf the Stan interfaces, CmdStan has the lightest memory footprint, therefore it can fit larger and more complex models. It has has the fewest dependencies, which makes it easier to run in limited environments such as clusters.\nThe output generated is in CSV format and can be post-processed using other Stan interfaces or general tools.\n\n\n\nStan documentation\n\nStan User’s Guide The Stan user’s guide provides example models and programming techniques for coding statistical models in Stan. It also serves as an example-driven introduction to Bayesian modeling and inference:\nStan Reference Manual Stan’s modeling language is shared across all of its interfaces. The Stan Language Reference Manual provides a concise definition of the language syntax for all elements in the language together with an overview of the inference algorithms and posterior inference tools.\nStan Functions Reference The Stan Functions Reference provides definitions and examples for all the functions defined in the Stan math library and available in the Stan programming language, including all probability distributions.\n\n\n\nCopyright and trademark\n\nCopyright 2011–2025, Stan Development Team and their assignees.\nThe Stan name and logo are registered trademarks of NumFOCUS.\n\n\n\nLicensing\n\nText content: CC-BY ND 4.0 license\nComputer code: BSD 3-clause license\nLogo: Stan logo usage guidelines\n\n\n\n\n\n Back to top", + "crumbs": [ + "CmdStan User's Guide" + ] + }, + { + "objectID": "cmdstan-guide/json_apdx.html", + "href": "cmdstan-guide/json_apdx.html", + "title": "JSON Format for CmdStan", + "section": "", + "text": "CmdStan can use JSON format for input data for both model data and parameters. Model data is read in by the model constructor. Model parameters are used to initialize the sampler and optimizer.\n\n\nYou can create the JSON file yourself using the guidelines below, but a more convenient way to create a JSON file for use with CmdStan is to use the write_stan_json() function provided by the CmdStanR interface.\n\n\n\nJSON is a data interchange notation, defined by an EMCA standard. JSON data files must in Unicode. JSON data is a series of structural tokens, literal tokens, and values:\n\nStructural tokens are the left and right curly bracket {}, left and right square bracket [], the semicolon ;, and the comma ,.\nLiteral tokens must always be in lowercase. There are three literal tokens: true, false, null.\nA primitive value is a single token which is either a literal, a string, or a number.\nA string consists of zero or more Unicode characters enclosed in double quotes, e.g. \"foo\". A backslash is used to escape the double quote character as well as the backslash itself. JSON allows the use of Unicode character escapes, e.g. \"\\\\uHHHH\" where HHHH is the Unicode code point in hex.\nNumbers are represented using either decimal notation or scientific notation. The following are examples of numbers: 17, 17.2, -17.2, -17.2e8, 17.2e-8. There is no distinction between integer and real numbers in the JSON format other than whether they have periods or scientific notation.\nThe special floating point values for positive infinity, negative infinity, and not-a-number can be represented in multiple ways. Positive infinity can be represented as the string \"Inf\", the string \"Infinity\", or the atom Infinity. Negative infinity can be represented as the string \"-Inf\", the string \"-Infinity\", or the atom -Infinity. Not-a-number can be represented as the string \"NaN\" or the atom NaN. These values may be mixed with other numerical types.\nA complex scalar is represented as a two-element array consisting of its real component followed by its imaginary component. For example, the complex number \\(2.3 - 1.83i\\) would be represented in JSON as the two-element array [2.3, -1.83].\nA JSON array is an ordered, comma-separated list of zero or more JSON values enclosed in square brackets. The elements of an array can be of any type. The following are examples of arrays: [], [1], [0.2, \"-inf\", true].\nVectors and row vectors in JSON are representing as arrays of their elements. For example, both the vector \\([1 \\quad 2]^{\\top}\\) and the row vector \\([1 \\quad 2]\\) are represented by the JSON array [1, 2].\nComplex vectors are represented as arrays of two-element arrays. For example, the complex vector \\([2.3 - 1.83i \\quad -4.8 +\n2i]^{\\top}\\) is represented as [[2.3, -1.83], [-4.8, 2]] in JSON. A complex row vector has the same representation as its transpose (the vector with the same elements).\nMatrices are represented as arrays of their row vectors. For example, the \\(2 \\times 3\\) matrix \\[\\begin{equation*}\n\\begin{bmatrix}\n1 & 2.7 & -9.8 \\\\\n4.2 & 1.8 & -7.3\n\\end{bmatrix}\n\\end{equation*}\\] is represented in JSON as [[1, 2.7, -9.8], [4.2, 1.8, -7.3]].\nComplex matrices are also represented as arrays of their row vectors. For example, the \\(2 \\times 3\\) complex matrix \\[\\begin{equation*}\n\\begin{bmatrix}\n1 + 2i & 3 - 4.2i & 13.1 + 2.7i \\\\\n3.1 & -5i & 0\n\\end{bmatrix}\n\\end{equation*}\\] would be represented in JSON as [[[1, 2], [3, -4.2], [13.1, 2.7]], [[3.1, 0], [0, -5], [0, 0]]].\nTuples are written as nested JSON objects where the keys are strings for the numbered slots in the tuple. For example, the tuple (1.5, 3.4) is represented in JSON as {\"1\": 1.5, \"2\": 3.4}.\nA name-value pair consists of a string followed by a colon followed by a value, either primitive or compound.\nA JSON object is a comma-separated series of zero or more name-value pairs enclosed in curly brackets. Each name-value pair is a member of the object. Membership is unordered. Member names are not required to be unique. The following are examples of objects: { }, {\"foo\": null}, {\"bar\" : 17, \"baz\" : [14,15,16.6] }.\n\n\n\n\nStan follows the JSON standard. A Stan input file in JSON notation consists of single JSON object which contains zero or more name-value pairs. This structure corresponds to a Python data dictionary object. The following is an example of JSON data for the simple Bernoulli example model:\n{ \"N\" : 10, \"y\" : [0,1,0,0,0,0,0,0,0,1] }\nMatrix data and multi-dimensional arrays are indexed in row-major order. For a Stan program which has data block:\ndata {\n int d1;\n int d2;\n int d3;\n array[d1, d2, d3] int ar;\n}\nthe following JSON input would be valid:\n{ \"d1\" : 2,\n \"d2\" : 3,\n \"d3\" : 4,\n \"ar\" : [[[0,1,2,3], [4,5,6,7], [8,9,10,11]],\n [[12,13,14,15], [16,17,18,19], [20,21,22,23]]]\n}\nJSON ignores whitespace. In the above examples, the spaces and newlines are only used to improve readability and can be omitted.\nAll data inputs are encoded as name-value pairs. The following table provides more examples of JSON data. The left column contains a Stan data variable declaration and the right column contains valid JSON data inputs.\n\n\n\n\n\n\n\nStan declaration\nJSON encoding\n\n\n\n\nint i\n\"i\": 17\n\n\n\n\n\n\nreal a\n\"a\" : 17\n\n\n\n\"a\" : 17.2\n\n\n\n\"a\" : \"NaN\"\n\n\n\n\"a\" : \"+inf\"\n\n\n\n\"a\" : \"-inf\"\n\n\n\n\n\n\ncomplex z\n\"z\": [1, -2.3]\n\n\narray[5] int\n\"a\" : [1, 2, 3, 4, 5]\n\n\narray[5] real a\n\"a\" : [ 1, 2, 3.3, \"NaN\", 5 ]\n\n\narray[2] complex b\n\"b\" : [[1, -2.3], [4.9, 0]]\n\n\nvector[5] a\n\"a\" : [1, 2, 3.3, \"NaN\", 5]\n\n\nrow_vector[5] a\n\"a\" : [1, 2, 3.3, \"NaN\", 5]\n\n\nmatrix[2, 3] a\n\"a\" : [[ 1, 2, 3 ], [ 4, 5, 6]]\n\n\ncomplex_vector[2] c\n\"c\" : [[-1.2, 3.3], [4.8, 1.9], [2.3, 0]]\n\n\ncomplex_row_vector[2] c\n\"c\" : [[-1.2, 3.3], [4.8, 1.9], [2.3, 0]]\n\n\ncomplex_matrix[2, 3] d\n\"d\" : [[[1, 1], [2, 2], [3, 3]], [4, 4], [5, 5], [6, 6]]]\n\n\ntuple(real, array[2] int) t\n\"t\" : { \"1\": 1.4, \"2\": [1, 2]}\n\n\n\n\n\nJSON notation is not able to distinguish between multi-dimensional arrays where any dimension is \\(0\\), e.g., a 2-D array with dimensions \\((1,0)\\), i.e., an array which contains a single array which is empty, has JSON representation [ ]. To see how this works, consider the following Stan program data block:\ndata {\n int d;\n array[d] int ar_1d;\n array[d, d] int ar_2d;\n array[d, d, d] int ar_3d;\n}\nIn the case where variable d is 1, all arrays will contain a single value. If array variable ar_d1 contains value 7, 2-D array variable ar_d2 contains (an array which contains) value 8, and 3-D array variable ar_d3 contains (an array which contains an array which contains) value 9, the JSON representation is:\n{ \"ar_d1\" : [7],\n \"ar_d2\" : [[8]],\n \"ar_d3\" : [[[9]]]\n}\nHowever, in the case where variable d is 0, ar_d1 is empty, i.e., it contains no values, as is ar_d2, ar_d3, and the JSON representation is\n{ \"d\" : 0,\n \"ar_d1\" : [ ],\n \"ar_d2\" : [ ],\n \"ar_d3\" : [ ]\n}", + "crumbs": [ + "Appendices", + "JSON Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/json_apdx.html#creating-json-files", + "href": "cmdstan-guide/json_apdx.html#creating-json-files", + "title": "JSON Format for CmdStan", + "section": "", + "text": "You can create the JSON file yourself using the guidelines below, but a more convenient way to create a JSON file for use with CmdStan is to use the write_stan_json() function provided by the CmdStanR interface.", + "crumbs": [ + "Appendices", + "JSON Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/json_apdx.html#json-syntax-summary", + "href": "cmdstan-guide/json_apdx.html#json-syntax-summary", + "title": "JSON Format for CmdStan", + "section": "", + "text": "JSON is a data interchange notation, defined by an EMCA standard. JSON data files must in Unicode. JSON data is a series of structural tokens, literal tokens, and values:\n\nStructural tokens are the left and right curly bracket {}, left and right square bracket [], the semicolon ;, and the comma ,.\nLiteral tokens must always be in lowercase. There are three literal tokens: true, false, null.\nA primitive value is a single token which is either a literal, a string, or a number.\nA string consists of zero or more Unicode characters enclosed in double quotes, e.g. \"foo\". A backslash is used to escape the double quote character as well as the backslash itself. JSON allows the use of Unicode character escapes, e.g. \"\\\\uHHHH\" where HHHH is the Unicode code point in hex.\nNumbers are represented using either decimal notation or scientific notation. The following are examples of numbers: 17, 17.2, -17.2, -17.2e8, 17.2e-8. There is no distinction between integer and real numbers in the JSON format other than whether they have periods or scientific notation.\nThe special floating point values for positive infinity, negative infinity, and not-a-number can be represented in multiple ways. Positive infinity can be represented as the string \"Inf\", the string \"Infinity\", or the atom Infinity. Negative infinity can be represented as the string \"-Inf\", the string \"-Infinity\", or the atom -Infinity. Not-a-number can be represented as the string \"NaN\" or the atom NaN. These values may be mixed with other numerical types.\nA complex scalar is represented as a two-element array consisting of its real component followed by its imaginary component. For example, the complex number \\(2.3 - 1.83i\\) would be represented in JSON as the two-element array [2.3, -1.83].\nA JSON array is an ordered, comma-separated list of zero or more JSON values enclosed in square brackets. The elements of an array can be of any type. The following are examples of arrays: [], [1], [0.2, \"-inf\", true].\nVectors and row vectors in JSON are representing as arrays of their elements. For example, both the vector \\([1 \\quad 2]^{\\top}\\) and the row vector \\([1 \\quad 2]\\) are represented by the JSON array [1, 2].\nComplex vectors are represented as arrays of two-element arrays. For example, the complex vector \\([2.3 - 1.83i \\quad -4.8 +\n2i]^{\\top}\\) is represented as [[2.3, -1.83], [-4.8, 2]] in JSON. A complex row vector has the same representation as its transpose (the vector with the same elements).\nMatrices are represented as arrays of their row vectors. For example, the \\(2 \\times 3\\) matrix \\[\\begin{equation*}\n\\begin{bmatrix}\n1 & 2.7 & -9.8 \\\\\n4.2 & 1.8 & -7.3\n\\end{bmatrix}\n\\end{equation*}\\] is represented in JSON as [[1, 2.7, -9.8], [4.2, 1.8, -7.3]].\nComplex matrices are also represented as arrays of their row vectors. For example, the \\(2 \\times 3\\) complex matrix \\[\\begin{equation*}\n\\begin{bmatrix}\n1 + 2i & 3 - 4.2i & 13.1 + 2.7i \\\\\n3.1 & -5i & 0\n\\end{bmatrix}\n\\end{equation*}\\] would be represented in JSON as [[[1, 2], [3, -4.2], [13.1, 2.7]], [[3.1, 0], [0, -5], [0, 0]]].\nTuples are written as nested JSON objects where the keys are strings for the numbered slots in the tuple. For example, the tuple (1.5, 3.4) is represented in JSON as {\"1\": 1.5, \"2\": 3.4}.\nA name-value pair consists of a string followed by a colon followed by a value, either primitive or compound.\nA JSON object is a comma-separated series of zero or more name-value pairs enclosed in curly brackets. Each name-value pair is a member of the object. Membership is unordered. Member names are not required to be unique. The following are examples of objects: { }, {\"foo\": null}, {\"bar\" : 17, \"baz\" : [14,15,16.6] }.", + "crumbs": [ + "Appendices", + "JSON Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/json_apdx.html#stan-data-types-in-json-notation", + "href": "cmdstan-guide/json_apdx.html#stan-data-types-in-json-notation", + "title": "JSON Format for CmdStan", + "section": "", + "text": "Stan follows the JSON standard. A Stan input file in JSON notation consists of single JSON object which contains zero or more name-value pairs. This structure corresponds to a Python data dictionary object. The following is an example of JSON data for the simple Bernoulli example model:\n{ \"N\" : 10, \"y\" : [0,1,0,0,0,0,0,0,0,1] }\nMatrix data and multi-dimensional arrays are indexed in row-major order. For a Stan program which has data block:\ndata {\n int d1;\n int d2;\n int d3;\n array[d1, d2, d3] int ar;\n}\nthe following JSON input would be valid:\n{ \"d1\" : 2,\n \"d2\" : 3,\n \"d3\" : 4,\n \"ar\" : [[[0,1,2,3], [4,5,6,7], [8,9,10,11]],\n [[12,13,14,15], [16,17,18,19], [20,21,22,23]]]\n}\nJSON ignores whitespace. In the above examples, the spaces and newlines are only used to improve readability and can be omitted.\nAll data inputs are encoded as name-value pairs. The following table provides more examples of JSON data. The left column contains a Stan data variable declaration and the right column contains valid JSON data inputs.\n\n\n\n\n\n\n\nStan declaration\nJSON encoding\n\n\n\n\nint i\n\"i\": 17\n\n\n\n\n\n\nreal a\n\"a\" : 17\n\n\n\n\"a\" : 17.2\n\n\n\n\"a\" : \"NaN\"\n\n\n\n\"a\" : \"+inf\"\n\n\n\n\"a\" : \"-inf\"\n\n\n\n\n\n\ncomplex z\n\"z\": [1, -2.3]\n\n\narray[5] int\n\"a\" : [1, 2, 3, 4, 5]\n\n\narray[5] real a\n\"a\" : [ 1, 2, 3.3, \"NaN\", 5 ]\n\n\narray[2] complex b\n\"b\" : [[1, -2.3], [4.9, 0]]\n\n\nvector[5] a\n\"a\" : [1, 2, 3.3, \"NaN\", 5]\n\n\nrow_vector[5] a\n\"a\" : [1, 2, 3.3, \"NaN\", 5]\n\n\nmatrix[2, 3] a\n\"a\" : [[ 1, 2, 3 ], [ 4, 5, 6]]\n\n\ncomplex_vector[2] c\n\"c\" : [[-1.2, 3.3], [4.8, 1.9], [2.3, 0]]\n\n\ncomplex_row_vector[2] c\n\"c\" : [[-1.2, 3.3], [4.8, 1.9], [2.3, 0]]\n\n\ncomplex_matrix[2, 3] d\n\"d\" : [[[1, 1], [2, 2], [3, 3]], [4, 4], [5, 5], [6, 6]]]\n\n\ntuple(real, array[2] int) t\n\"t\" : { \"1\": 1.4, \"2\": [1, 2]}\n\n\n\n\n\nJSON notation is not able to distinguish between multi-dimensional arrays where any dimension is \\(0\\), e.g., a 2-D array with dimensions \\((1,0)\\), i.e., an array which contains a single array which is empty, has JSON representation [ ]. To see how this works, consider the following Stan program data block:\ndata {\n int d;\n array[d] int ar_1d;\n array[d, d] int ar_2d;\n array[d, d, d] int ar_3d;\n}\nIn the case where variable d is 1, all arrays will contain a single value. If array variable ar_d1 contains value 7, 2-D array variable ar_d2 contains (an array which contains) value 8, and 3-D array variable ar_d3 contains (an array which contains an array which contains) value 9, the JSON representation is:\n{ \"ar_d1\" : [7],\n \"ar_d2\" : [[8]],\n \"ar_d3\" : [[[9]]]\n}\nHowever, in the case where variable d is 0, ar_d1 is empty, i.e., it contains no values, as is ar_d2, ar_d3, and the JSON representation is\n{ \"d\" : 0,\n \"ar_d1\" : [ ],\n \"ar_d2\" : [ ],\n \"ar_d3\" : [ ]\n}", + "crumbs": [ + "Appendices", + "JSON Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/log_prob_config.html", + "href": "cmdstan-guide/log_prob_config.html", + "title": "Extracting log probabilities and gradients for diagnostics", + "section": "", + "text": "CmdStan can return the computed log probability and the gradient with respect to a set of parameters.\nThis is similar to the diagnose subcommand, but the output format differs and the results here are not compared with those from finite differences.\nNote: Startup and data initialization costs mean that this method is not an efficient way to calculate these quantities. It is provided only for convenience and should not be used for serious computation.\n\n\nThis method takes 3 arguments:\n\njacobian - Whether or not the Jacobian adjustment for constrained parameters should be included in the gradient. Default value is true (include adjustment).\nconstrained_params - Input file of parameters values on the constrained scale. A single set of constrained parameters can be specified using JSON format. Alternatively, the input file can be set of draws in StanCSV format.\nunconstrained_params - Input file (JSON or R dump) of parameter values on unconstrained scale. These files should contain a single variable, called params_r, which is a flattened vector of all unconstrained parameters. If this object is two dimensional, each entry should be a vector of the same form and the output will feature multiple rows.\n\nOnly one of constrained_params and unconstrained_params can be specified.\nFor more on the differences between constrained and unconstrained parameters, see the Stan reference manual section on variable transforms.\n\n\n\nThe output file consists of the following pieces of information:\n\nThe full set of configuration options available for the log_prob method is reported at the beginning of the output file as CSV comments.\nColumn headers, the first column is labelled lp__, and the rest are named after parameters. These will be the unconstrained parameters, regardless of whether constrained or unconstrained parameters were supplied as input.\nValues which correspond to the value of the log density (column 1) and the gradient with respect to each parameter (remaining columns).\n\nFor example, if we have a file called params.json:\n{\n \"theta\" : 0.1\n}\nWe can run the example model:\n/bernoulli log_prob constrained_params=params.json data file=bernoulli.data.json\nThis yields\n# method = log_prob\n# log_prob\n# unconstrained_params = (Default)\n# constrained_params = params.json\n# jacobian = true (Default)\n# id = 1 (Default)\n# data\n# file = bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 2390820139 (Default)\n# output\n# file = output.csv (Default)\n# diagnostic_file = (Default)\n# refresh = 100 (Default)\n# sig_figs = 8 (Default)\n# profile_file = profile.csv (Default)\n# num_threads = 1 (Default)\nlp_,theta\n-7.856,1.8", + "crumbs": [ + "Running CmdStan", + "Extracting log probabilities and gradients for diagnostics" + ] + }, + { + "objectID": "cmdstan-guide/log_prob_config.html#configuration", + "href": "cmdstan-guide/log_prob_config.html#configuration", + "title": "Extracting log probabilities and gradients for diagnostics", + "section": "", + "text": "This method takes 3 arguments:\n\njacobian - Whether or not the Jacobian adjustment for constrained parameters should be included in the gradient. Default value is true (include adjustment).\nconstrained_params - Input file of parameters values on the constrained scale. A single set of constrained parameters can be specified using JSON format. Alternatively, the input file can be set of draws in StanCSV format.\nunconstrained_params - Input file (JSON or R dump) of parameter values on unconstrained scale. These files should contain a single variable, called params_r, which is a flattened vector of all unconstrained parameters. If this object is two dimensional, each entry should be a vector of the same form and the output will feature multiple rows.\n\nOnly one of constrained_params and unconstrained_params can be specified.\nFor more on the differences between constrained and unconstrained parameters, see the Stan reference manual section on variable transforms.", + "crumbs": [ + "Running CmdStan", + "Extracting log probabilities and gradients for diagnostics" + ] + }, + { + "objectID": "cmdstan-guide/log_prob_config.html#csv-output", + "href": "cmdstan-guide/log_prob_config.html#csv-output", + "title": "Extracting log probabilities and gradients for diagnostics", + "section": "", + "text": "The output file consists of the following pieces of information:\n\nThe full set of configuration options available for the log_prob method is reported at the beginning of the output file as CSV comments.\nColumn headers, the first column is labelled lp__, and the rest are named after parameters. These will be the unconstrained parameters, regardless of whether constrained or unconstrained parameters were supplied as input.\nValues which correspond to the value of the log density (column 1) and the gradient with respect to each parameter (remaining columns).\n\nFor example, if we have a file called params.json:\n{\n \"theta\" : 0.1\n}\nWe can run the example model:\n/bernoulli log_prob constrained_params=params.json data file=bernoulli.data.json\nThis yields\n# method = log_prob\n# log_prob\n# unconstrained_params = (Default)\n# constrained_params = params.json\n# jacobian = true (Default)\n# id = 1 (Default)\n# data\n# file = bernoulli.data.json\n# init = 2 (Default)\n# random\n# seed = 2390820139 (Default)\n# output\n# file = output.csv (Default)\n# diagnostic_file = (Default)\n# refresh = 100 (Default)\n# sig_figs = 8 (Default)\n# profile_file = profile.csv (Default)\n# num_threads = 1 (Default)\nlp_,theta\n-7.856,1.8", + "crumbs": [ + "Running CmdStan", + "Extracting log probabilities and gradients for diagnostics" + ] + }, + { + "objectID": "cmdstan-guide/optimize_config.html", + "href": "cmdstan-guide/optimize_config.html", + "title": "Optimization", + "section": "", + "text": "The CmdStan executable can run Stan’s optimization algorithms, which provide a deterministic method to find the posterior mode. If the posterior is not convex, there is no guarantee Stan will be able to find the global optimum as opposed to a local optimum of log probability.\nThe full set of configuration options available for the optimize method is available by using the optimize help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the optimizer console output and in the output CSV file’s comments.\nThe executable does not need to be recompiled in order to switch from sampling to optimization, and the data input format is the same. The following is a minimal call to Stan’s optimizer using defaults for everything but the location of the data file.\n> ./bernoulli optimize data file=bernoulli.data.json\nExecuting this command prints both output to the console and to a CSV file.\nThe first part of the console output reports on the configuration used. The above command uses all default configurations, therefore the optimizer used is the L-BFGS optimizer and its default initial stepsize and tolerances for monitoring convergence:\n ./bernoulli optimize data file=bernoulli.data.json\nmethod = optimize\n optimize\n algorithm = lbfgs (Default)\n lbfgs\n init_alpha = 0.001 (Default)\n tol_obj = 1e-12 (Default)\n tol_rel_obj = 10000 (Default)\n tol_grad = 1e-08 (Default)\n tol_rel_grad = 1e+07 (Default)\n tol_param = 1e-08 (Default)\n history_size = 5 (Default)\n jacobian = false (Default)\n iter = 2000 (Default)\n save_iterations = false (Default)\nid = 1 (Default)\ndata\n file = bernoulli.data.json\ninit = 2 (Default)\nrandom\n seed = 87122538 (Default)\noutput\n file = output.csv (Default)\n diagnostic_file = (Default)\n refresh = 100 (Default)\n sig_figs = 8 (Default)\n profile_file = profile.csv (Default)\n save_cmdstan_config = false (Default)\nnum_threads = 1 (Default)\nThe second part of the output indicates how well the algorithm fared, here converging and terminating normally. The numbers reported indicate that it took 5 iterations and 8 gradient evaluations. This is, not surprisingly, far fewer iterations than required for sampling; even fewer iterations would be used with less stringent user-specified convergence tolerances. The alpha value is for step size used. In the final state the change in parameters was roughly \\(0.002\\) and the length of the gradient roughly 3e-05 (\\(0.00003\\)).\nInitial log joint probability = -6.85653\n Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes\n 5 -5.00402 0.00184936 3.35074e-05 1 1 8\nOptimization terminated normally:\n Convergence detected: relative gradient magnitude is below tolerance\nThe output from optimization is written into the file output.csv by default. The output follows the same pattern as the output for sampling, first dumping the entire set of parameters used as comment lines:\n# stan_version_major = 2\n# stan_version_minor = 23\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = optimize\n# optimize\n# algorithm = lbfgs (Default)\n# lbfgs\n# init_alpha = 0.001 (Default)\n# tol_obj = 1e-12 (Default)\n# tol_rel_obj = 10000 (Default)\n# tol_grad = 1e-08 (Default)\n# tol_rel_grad = 1e+07 (Default)\n# tol_param = 1e-08 (Default)\n# history_size = 5 (Default)\n# jacobian = false (Default)\n# iter = 2000 (Default)\n# save_iterations = false (Default)\nFollowing the config information are two lines of output, the CSV headers and the recorded values:\nlp__,converged__,theta\n-5.00402,31,0.200003\nNote that everything is a comment other than a line for the header, and a line for the values. Here, the header indicates the unnormalized log probability with lp__, algorithm status in converged__, and the model parameter theta. The maximum log probability is -5.0 and the posterior mode for theta is 0.20. The mode exactly matches what we would expect from the data. Because the prior was uniform, the result 0.20 represents the maximum likelihood estimate (MLE) for the very simple Bernoulli model. Note that no uncertainty is reported.\nAll of the optimizers stream per-iteration intermediate approximations to the command line console. The sub-argument save_iterations specifies whether or not to save the intermediate iterations to the output file. Allowed values are true or false. The default value is false, i.e., intermediate iterations are not saved to the output file. Running the optimizer with save_iterations=true writes both the initial log joint probability and values for all iterations to the output CSV file.\nRunning the example model with option save_iterations=true, i.e., the command\n> ./bernoulli optimize save_iterations=1 data file=bernoulli.data.json\nproduces CSV file output rows:\nlp__,converged__,theta\n-6.85653,0,0.493689\n-6.10128,0,0.420936\n-5.02953,0,0.22956\n-5.00517,0,0.206107\n-5.00403,0,0.200299\n-5.00402,31,0.200003\n\n\nThe converged__ column is used to indicate the status of the algorithm. It can take on the following values.\n\n\n\n\n\n\n\nNumeric value\nMeaning\n\n\n\n\n-1\nLine search failed to achieve a sufficient decrease, no more progress can be made\n\n\n0\nSuccessful step completed\n\n\n10\nConvergence detected: absolute parameter change was below tolerance\n\n\n20\nConvergence detected: absolute change in objective function was below tolerance\n\n\n21\nConvergence detected: relative change in objective function was below tolerance\n\n\n30\nConvergence detected: gradient norm is below tolerance\n\n\n31\nConvergence detected: relative gradient magnitude is below tolerance\n\n\n40\nMaximum number of iterations hit, may not be at an optima\n\n\n\nNote that not all algorithms can produce all codes, and some, such as 0, will only be observed if save_iterations is used.\nA comment explaining the final value (with the same contents as the above table) is added at the end of the CSV file.\n\n\n\nIf there are constrained parameters, Stan makes a transformation to an unconstrained space and runs the optimization algorithm in the unconstrained space.\nThe jacobian argument specifies whether or not the call to the model’s log probability function should include the log absolute Jacobian determinant of inverse parameter transforms.\nIf the Jacobian adjustment is not included (the default), the optimization returns parameter values that correspond to a mode of the target in the constrained space (if such mode exists). Thus this option is useful for any optimization where we want to find the mode in the original constrained parameter space.\nIf the Jacobian adjustment is included, the optimization returns parameter values that correspond to a mode in the unconstrained space. This is useful, for example, if we want to make a distributional approximation of the posterior at the mode (see, Laplace sampling, as then Jacobian adjustment needs to be included for correct results.\n\n\n\nThe algorithm argument specifies the optimization algorithm. This argument takes one of the following three values:\n\nlbfgs A quasi-Newton optimizer. This is the default optimizer and also much faster than the other optimizers.\nbfgs A quasi-Newton optimizer.\nnewton A Newton optimizer. This is the least efficient optimization algorithm, but has the advantage of setting its own stepsize.\n\nSee the Stan Reference Manual’s Optimization chapter for a description of these algorithms.\nAll of the optimizers stream per-iteration intermediate approximations to the command line console. The sub-argument save_iterations specifies whether or not to save the intermediate iterations to the output file. Allowed values are true or false. The default value isfalse`, i.e., intermediate iterations are not saved to the output file.\n\n\n\nFor both BFGS and L-BFGS optimizers, convergence monitoring is controlled by a number of tolerance values, any one of which being satisfied causes the algorithm to terminate with a solution. See the BFGS and L-BFGS configuration section for details on the convergence tests.\nBoth BFGS and L-BFGS have the following configuration arguments:\n\ninit_alpha - The initial step size parameter. Must be a positive real number. Default value is \\(0.001\\)\ntol_obj - Convergence tolerance on changes in objective function value. Must be a positive real number. Default value is \\(1^{-12}\\).\ntol_rel_obj - Convergence tolerance on relative changes in objective function value. Must be a positive real number. Default value is \\(1^{4}\\).\ntol_grad - Convergence tolerance on the norm of the gradient. Must be a positive real number. Default value is \\(1^{-8}\\).\ntol_rel_grad - Convergence tolerance on the relative norm of the gradient. Must be a positive real number. Default value is \\(1^{7}\\).\ntol_param - Convergence tolerance on changes in parameter value. Must be a positive real number. Default value is \\(1^{-8}\\).\n\nThe init_alpha argument specifies the first step size to try on the initial iteration. If the first iteration takes a long time (and requires a lot of function evaluations), set init_alpha to be the roughly equal to the alpha used in that first iteration. The default value is very small, which is reasonable for many problems but might be too large or too small depending on the objective function and initialization. Being too big or too small just means that the first iteration will take longer (i.e., require more gradient evaluations) before the line search finds a good step length.\nIn addition to the above, the L-BFGS algorithm has argument history_size which controls the size of the history it uses to approximate the Hessian. The value should be less than the dimensionality of the parameter space and, in general, relatively small values (\\(5\\)-\\(10\\)) are sufficient; the default value is \\(5\\).\nIf L-BFGS performs poorly but BFGS performs well, consider increasing the history size. Increasing history size will increase the memory usage, although this is unlikely to be an issue for typical Stan models.\n\n\n\nThere are no configuration parameters for the Newton optimizer. It is not recommended because of the slow Hessian calculation involving finite differences.", + "crumbs": [ + "Running CmdStan", + "Optimization" + ] + }, + { + "objectID": "cmdstan-guide/optimize_config.html#meaning-of-the-converged__-column", + "href": "cmdstan-guide/optimize_config.html#meaning-of-the-converged__-column", + "title": "Optimization", + "section": "", + "text": "The converged__ column is used to indicate the status of the algorithm. It can take on the following values.\n\n\n\n\n\n\n\nNumeric value\nMeaning\n\n\n\n\n-1\nLine search failed to achieve a sufficient decrease, no more progress can be made\n\n\n0\nSuccessful step completed\n\n\n10\nConvergence detected: absolute parameter change was below tolerance\n\n\n20\nConvergence detected: absolute change in objective function was below tolerance\n\n\n21\nConvergence detected: relative change in objective function was below tolerance\n\n\n30\nConvergence detected: gradient norm is below tolerance\n\n\n31\nConvergence detected: relative gradient magnitude is below tolerance\n\n\n40\nMaximum number of iterations hit, may not be at an optima\n\n\n\nNote that not all algorithms can produce all codes, and some, such as 0, will only be observed if save_iterations is used.\nA comment explaining the final value (with the same contents as the above table) is added at the end of the CSV file.", + "crumbs": [ + "Running CmdStan", + "Optimization" + ] + }, + { + "objectID": "cmdstan-guide/optimize_config.html#jacobian-adjustments", + "href": "cmdstan-guide/optimize_config.html#jacobian-adjustments", + "title": "Optimization", + "section": "", + "text": "If there are constrained parameters, Stan makes a transformation to an unconstrained space and runs the optimization algorithm in the unconstrained space.\nThe jacobian argument specifies whether or not the call to the model’s log probability function should include the log absolute Jacobian determinant of inverse parameter transforms.\nIf the Jacobian adjustment is not included (the default), the optimization returns parameter values that correspond to a mode of the target in the constrained space (if such mode exists). Thus this option is useful for any optimization where we want to find the mode in the original constrained parameter space.\nIf the Jacobian adjustment is included, the optimization returns parameter values that correspond to a mode in the unconstrained space. This is useful, for example, if we want to make a distributional approximation of the posterior at the mode (see, Laplace sampling, as then Jacobian adjustment needs to be included for correct results.", + "crumbs": [ + "Running CmdStan", + "Optimization" + ] + }, + { + "objectID": "cmdstan-guide/optimize_config.html#optimization-algorithms", + "href": "cmdstan-guide/optimize_config.html#optimization-algorithms", + "title": "Optimization", + "section": "", + "text": "The algorithm argument specifies the optimization algorithm. This argument takes one of the following three values:\n\nlbfgs A quasi-Newton optimizer. This is the default optimizer and also much faster than the other optimizers.\nbfgs A quasi-Newton optimizer.\nnewton A Newton optimizer. This is the least efficient optimization algorithm, but has the advantage of setting its own stepsize.\n\nSee the Stan Reference Manual’s Optimization chapter for a description of these algorithms.\nAll of the optimizers stream per-iteration intermediate approximations to the command line console. The sub-argument save_iterations specifies whether or not to save the intermediate iterations to the output file. Allowed values are true or false. The default value isfalse`, i.e., intermediate iterations are not saved to the output file.", + "crumbs": [ + "Running CmdStan", + "Optimization" + ] + }, + { + "objectID": "cmdstan-guide/optimize_config.html#the-quasi-newton-optimizers", + "href": "cmdstan-guide/optimize_config.html#the-quasi-newton-optimizers", + "title": "Optimization", + "section": "", + "text": "For both BFGS and L-BFGS optimizers, convergence monitoring is controlled by a number of tolerance values, any one of which being satisfied causes the algorithm to terminate with a solution. See the BFGS and L-BFGS configuration section for details on the convergence tests.\nBoth BFGS and L-BFGS have the following configuration arguments:\n\ninit_alpha - The initial step size parameter. Must be a positive real number. Default value is \\(0.001\\)\ntol_obj - Convergence tolerance on changes in objective function value. Must be a positive real number. Default value is \\(1^{-12}\\).\ntol_rel_obj - Convergence tolerance on relative changes in objective function value. Must be a positive real number. Default value is \\(1^{4}\\).\ntol_grad - Convergence tolerance on the norm of the gradient. Must be a positive real number. Default value is \\(1^{-8}\\).\ntol_rel_grad - Convergence tolerance on the relative norm of the gradient. Must be a positive real number. Default value is \\(1^{7}\\).\ntol_param - Convergence tolerance on changes in parameter value. Must be a positive real number. Default value is \\(1^{-8}\\).\n\nThe init_alpha argument specifies the first step size to try on the initial iteration. If the first iteration takes a long time (and requires a lot of function evaluations), set init_alpha to be the roughly equal to the alpha used in that first iteration. The default value is very small, which is reasonable for many problems but might be too large or too small depending on the objective function and initialization. Being too big or too small just means that the first iteration will take longer (i.e., require more gradient evaluations) before the line search finds a good step length.\nIn addition to the above, the L-BFGS algorithm has argument history_size which controls the size of the history it uses to approximate the Hessian. The value should be less than the dimensionality of the parameter space and, in general, relatively small values (\\(5\\)-\\(10\\)) are sufficient; the default value is \\(5\\).\nIf L-BFGS performs poorly but BFGS performs well, consider increasing the history size. Increasing history size will increase the memory usage, although this is unlikely to be an issue for typical Stan models.", + "crumbs": [ + "Running CmdStan", + "Optimization" + ] + }, + { + "objectID": "cmdstan-guide/optimize_config.html#the-newton-optimizer", + "href": "cmdstan-guide/optimize_config.html#the-newton-optimizer", + "title": "Optimization", + "section": "", + "text": "There are no configuration parameters for the Newton optimizer. It is not recommended because of the slow Hessian calculation involving finite differences.", + "crumbs": [ + "Running CmdStan", + "Optimization" + ] + }, + { + "objectID": "cmdstan-guide/pathfinder_config.html", + "href": "cmdstan-guide/pathfinder_config.html", + "title": "Pathfinder Method for Approximate Bayesian Inference", + "section": "", + "text": "The CmdStan method pathfinder uses the Pathfinder algorithm of Zhang et al. (2022), which is further described in the Stan Reference Manual.\nA single run of the Pathfinder algorithm generates a set of approximate draws. Inference is improved by running multiple Pathfinder instances and using Pareto-smoothed importance resampling (PSIS) of the resulting sets of draws. This better matches non-normal target densities and also eliminates minor modes.\nThe pathfinder method runs multi-path Pathfinder by default, which returns a PSIS sample over the draws from several individual (“single-path”) Pathfinder runs. Argument num_paths specifies the number of single-path Pathfinders, the default is \\(4\\). If num_paths is set to 1, then only one individual Pathfinder is run without the PSIS reweighting of the sample.\nThe full set of configuration options available for the pathfinder method is available by using the pathfinder help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the algorithm’s console output and in the output CSV file’s comments.\nThe following is a minimal call to the Pathfinder algorithm using defaults for everything but the location of the data file.\n> ./bernoulli pathfinder data file=bernoulli.data.R\nExecuting this command prints output both to the console and to csv files.\nThe first part of the console output reports on the configuration used.\nmethod = pathfinder\n pathfinder\n init_alpha = 0.001 (Default)\n tol_obj = 1e-12 (Default)\n tol_rel_obj = 10000 (Default)\n tol_grad = 1e-08 (Default)\n tol_rel_grad = 1e+07 (Default)\n tol_param = 1e-08 (Default)\n history_size = 5 (Default)\n num_psis_draws = 1000 (Default)\n num_paths = 4 (Default)\n save_single_paths = false (Default)\n psis_resample = true (Default)\n calculate_lp = true (Default)\n max_lbfgs_iters = 1000 (Default)\n num_draws = 1000 (Default)\n num_elbo_draws = 25 (Default)\nid = 1 (Default)\ndata\n file = bernoulli.data.json\ninit = 2 (Default)\nrandom\n seed = 2790476610 (Default)\noutput\n file = output.csv (Default)\n diagnostic_file = (Default)\n refresh = 100 (Default)\n sig_figs = 8 (Default)\n profile_file = profile.csv (Default)\n save_cmdstan_config = false (Default)\nnum_threads = 1 (Default)\nThe rest of the output describes the progression of the algorithm.\nBy default, the Pathfinder algorithm runs 4 single-path Pathfinders in parallel, then uses importance resampling on the set of returned draws to produce the specified number of draws.\nPath [1] :Initial log joint density = -11.543343\nPath [1] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes\n 5 -6.748e+00 1.070e-03 1.707e-05 1.000e+00 1.000e+00 126 -6.220e+00 -6.220e+00\nPath [1] :Best Iter: [5] ELBO (-6.219833) evaluations: (126)\nPath [2] :Initial log joint density = -7.443345\nPath [2] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes\n 5 -6.748e+00 9.936e-05 3.738e-07 1.000e+00 1.000e+00 126 -6.164e+00 -6.164e+00\nPath [2] :Best Iter: [5] ELBO (-6.164015) evaluations: (126)\nPath [3] :Initial log joint density = -18.986308\nPath [3] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes\n 5 -6.748e+00 2.996e-04 4.018e-06 1.000e+00 1.000e+00 126 -6.201e+00 -6.201e+00\nPath [3] :Best Iter: [5] ELBO (-6.200559) evaluations: (126)\nPath [4] :Initial log joint density = -8.304453\nPath [4] : Iter log prob ||dx|| ||grad|| alpha alpha0 # evals ELBO Best ELBO Notes\n 5 -6.748e+00 2.814e-04 2.034e-06 1.000e+00 1.000e+00 126 -6.221e+00 -6.221e+00\nPath [4] :Best Iter: [3] ELBO (-6.161276) evaluations: (126)\nTotal log probability function evaluations:8404\n\n\n\nnum_psis_draws - Final number of draws from multi-path pathfinder. Must be a positive integer. Default value is \\(1000\\).\nnum_paths - Number of single pathfinders. Must be a positive integer. Default value is \\(4\\).\nsave_single_paths - When true, save outputs from single pathfinders. Valid values: [true, false]. Default is false.\nmax_lbfgs_iters - Maximum number of L-BFGS iterations. Must be a positive integer. Default value is \\(1000\\).\nnum_draws - Number of approximate posterior draws for each single pathfinder. Must be a positive integer. Default value is \\(1000\\). Can differ from num_psis_draws.\nnum_elbo_draws - Number of Monte Carlo draws to evaluate ELBO. Must be a positive integer. Default value is \\(25\\).\npsis_resample - If true, perform PSIS resampling on draws returned from individual pathfinders. If false, returns all num_paths * num_draws draws from the individual pathfinders. Valid values: [true, false]. Default is true.\ncalculate_lp - If true, log probabilities of the approximate draws are calculated and returned with the output. If false, each pathfinder will only calculate the lp values needed for the ELBO calculation. If false, PSIS resampling cannot be performed and the algorithm returns num_paths * num_draws samples. The output will still contain any lp values used when calculating ELBO scores within L-BFGS iterations. Valid values: [true, false]. Default is true.\n\n\n\n\nArguments init_alpha through history_size are the full set of arguments to the L-BFGS optimizer and have the same defaults for optimization.\n\n\n\nBy default, the pathfinder method uses 4 independent Pathfinder runs, each of which produces 1000 approximate draws, which are then importance resampled down to 1000 final draws. The importance resampled draws are output as a StanCSV file.\nThe CSV files have the following structure:\nThe initial CSV comment rows contain the complete set of CmdStan configuration options.\n...\n# method = pathfinder\n# pathfinder\n# init_alpha = 0.001 (Default)\n# tol_obj = 9.9999999999999998e-13 (Default)\n# tol_rel_obj = 10000 (Default)\n# tol_grad = 1e-08 (Default)\n# tol_rel_grad = 10000000 (Default)\n# tol_param = 1e-08 (Default)\n# history_size = 5 (Default)\n# num_psis_draws = 1000 (Default)\n# num_paths = 4 (Default)\n# psis_resample = 1 (Default)\n# calculate_lp = 1 (Default)\n# save_single_paths = 0 (Default)\n# max_lbfgs_iters = 1000 (Default)\n# num_draws = 1000 (Default)\n# num_elbo_draws = 25 (Default)\n...\nNext is the column header line, followed the set of approximate draws. The Pathfinder algorithm first outputs lp_approx__, the log density in the approximating distribution, and lp__, the log density in the target distribution, followed by estimates of the model parameters, transformed parameters, and generated quantities.\nlp_approx__,lp__,theta\n-2.4973, -8.2951, 0.0811852\n-0.87445, -7.06526, 0.160207\n-0.812285, -7.07124, 0.35819\n...\nThe final lines are comment lines which give timing information.\n# Elapsed Time: 0.016000 seconds (Pathfinders)\n# 0.003000 seconds (PSIS)\n# 0.019000 seconds (Total)\nPathfinder provides option save_single_paths which will save output from the single-path Pathfinder runs.\n\n\n\nThe boolean option save_single_paths is used to save both the draws and the ELBO iterations from the individual Pathfinder runs. When save_single_paths is true, the draws from each are saved to StanCSV files with the same format as the PSIS sample and the ELBO evaluations along the L-BFGS trajectory for each are saved as JSON. Given an output file name, CmdStan adds suffixes to the base filename to distinguish between the output files. For the default output file name output.csv and default number of runs (4), the resulting CSV files are\noutput.csv\noutput_path_1.csv\noutput_path_1.json\noutput_path_2.csv\noutput_path_2.json\noutput_path_3.csv\noutput_path_3.json\noutput_path_4.csv\noutput_path_4.json\nThe individual sample CSV files have the same structure as the PSIS sample CSV file. The JSON files contain information from each ELBO iteration.\nTo see how this works, we run Pathfinder on the centered-parameterization of the eight-schools model, where the posterior distribution has a funnel shape:\n> ./eight_schools pathfinder save_single_paths=true data file=eight_schools.data.json\nEach JSON file records the approximations to the target density at each point along the trajectory of the L-BFGS optimization algorithms.\n{\n \"0\": {\n \"iter\": 0,\n \"unconstrained_parameters\": [1.00595, -0.503687, 1.79367, 0.99083, 0.498077, -0.65816, 1.49176, -1.22647, 1.62911, 0.767445],\n \"grads\": [-0.868919, 0.45198, -0.107675, -0.0123304, 0.163172, 0.354362, -0.108746, 0.673306, -0.102268, -4.51445]\n },\n \"1\": {\n \"iter\": 1,\n \"unconstrained_parameters\": [1.00595, -0.503687, 1.79367, 0.99083, 0.498077, -0.65816, 1.49176, -1.22647, 1.62911, 0.767445],\n \"grads\": [-0.868919, 0.45198, -0.107675, -0.0123304, 0.163172, 0.354362, -0.108746, 0.673306, -0.102268, -4.51445],\n \"history_size\": 1,\n \"lbfgs_success\": true,\n \"pathfinder_success\": true,\n \"x_center\": [0.126047, -0.065048, 1.55708, 0.958509, 0.628075, -0.217041, 1.32032, -0.561338, 1.42988, 1.23213],\n \"logDetCholHk\": -2.6839,\n \"L_approx\": [[-0.0630456, -0.0187959], [0, 1.08328]],\n \"Qk\": [[-0.361073, 0.5624], [0.183922, -0.279474], [-0.0708175, 0.15715], [-0.00917823, 0.0215802], [0.0606019, -0.0814513], [0.164071, -0.285769], [-0.057723, 0.112428], [0.276376, -0.424348], [-0.0620524, 0.131786], [-0.846488, -0.531094]],\n \"alpha\": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n \"full\": false,\n \"lbfgs_note\": \"\"\n },\n ...,\n \"171\": {\n \"iter\": 171,\n \"unconstrained_parameters\": [1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, -35.7821],\n \"grads\": [2.66927e+15, -0.117312, -0.0639521, -2.66927e+15, -0.0445885, 0.0321579, 0.00499827, -0.163952, -0.032084, 6.4073],\n \"history_size\": 5,\n \"lbfgs_success\": true,\n \"pathfinder_success\": true,\n \"x_center\": [5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, -2.02979e+17],\n \"logDetCholHk\": 299.023,\n \"L_approx\": [[4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, -1.70162e+08], [0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 0, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 0, 0, 2.89552e+16]],\n \"Qk\": [],\n \"alpha\": [1.11027e-12, 2.24669e-12, 2.05603e-12, 3.71177e-12, 5.7855e-12, 1.80169e-12, 3.40291e-12, 2.29699e-12, 3.43423e-12, 1.25815e-08],\n \"full\": true,\n \"lbfgs_note\": \"\"\n },\n \"172\": {\n \"iter\": 172,\n \"unconstrained_parameters\": [1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, -35.801],\n \"grads\": [-0, -0.11731, -0.0639469, 0.0179895, -0.0445842, 0.0321643, 0.00500256, -0.163947, -0.0320824, 7],\n \"history_size\": 5,\n \"lbfgs_success\": false,\n \"pathfinder_success\": false,\n \"lbfgs_note\": \"\"\n }\n}\nOption num_paths=1 runs one single-path Pathfinder and the output CSV file contains the draws from that run without PSIS reweighting. The combination of arguments num_paths=1 save_single_paths=true creates just two output files, the CSV sample and the set of ELBO iterations. In this case, the default output file name is “output.csv” and the default diagnostic file name is “output.json”.", + "crumbs": [ + "Running CmdStan", + "Pathfinder Method for Approximate Bayesian Inference" + ] + }, + { + "objectID": "cmdstan-guide/pathfinder_config.html#pathfinder-configuration", + "href": "cmdstan-guide/pathfinder_config.html#pathfinder-configuration", + "title": "Pathfinder Method for Approximate Bayesian Inference", + "section": "", + "text": "num_psis_draws - Final number of draws from multi-path pathfinder. Must be a positive integer. Default value is \\(1000\\).\nnum_paths - Number of single pathfinders. Must be a positive integer. Default value is \\(4\\).\nsave_single_paths - When true, save outputs from single pathfinders. Valid values: [true, false]. Default is false.\nmax_lbfgs_iters - Maximum number of L-BFGS iterations. Must be a positive integer. Default value is \\(1000\\).\nnum_draws - Number of approximate posterior draws for each single pathfinder. Must be a positive integer. Default value is \\(1000\\). Can differ from num_psis_draws.\nnum_elbo_draws - Number of Monte Carlo draws to evaluate ELBO. Must be a positive integer. Default value is \\(25\\).\npsis_resample - If true, perform PSIS resampling on draws returned from individual pathfinders. If false, returns all num_paths * num_draws draws from the individual pathfinders. Valid values: [true, false]. Default is true.\ncalculate_lp - If true, log probabilities of the approximate draws are calculated and returned with the output. If false, each pathfinder will only calculate the lp values needed for the ELBO calculation. If false, PSIS resampling cannot be performed and the algorithm returns num_paths * num_draws samples. The output will still contain any lp values used when calculating ELBO scores within L-BFGS iterations. Valid values: [true, false]. Default is true.", + "crumbs": [ + "Running CmdStan", + "Pathfinder Method for Approximate Bayesian Inference" + ] + }, + { + "objectID": "cmdstan-guide/pathfinder_config.html#l-bfgs-configuration", + "href": "cmdstan-guide/pathfinder_config.html#l-bfgs-configuration", + "title": "Pathfinder Method for Approximate Bayesian Inference", + "section": "", + "text": "Arguments init_alpha through history_size are the full set of arguments to the L-BFGS optimizer and have the same defaults for optimization.", + "crumbs": [ + "Running CmdStan", + "Pathfinder Method for Approximate Bayesian Inference" + ] + }, + { + "objectID": "cmdstan-guide/pathfinder_config.html#pathfinder_csv", + "href": "cmdstan-guide/pathfinder_config.html#pathfinder_csv", + "title": "Pathfinder Method for Approximate Bayesian Inference", + "section": "", + "text": "By default, the pathfinder method uses 4 independent Pathfinder runs, each of which produces 1000 approximate draws, which are then importance resampled down to 1000 final draws. The importance resampled draws are output as a StanCSV file.\nThe CSV files have the following structure:\nThe initial CSV comment rows contain the complete set of CmdStan configuration options.\n...\n# method = pathfinder\n# pathfinder\n# init_alpha = 0.001 (Default)\n# tol_obj = 9.9999999999999998e-13 (Default)\n# tol_rel_obj = 10000 (Default)\n# tol_grad = 1e-08 (Default)\n# tol_rel_grad = 10000000 (Default)\n# tol_param = 1e-08 (Default)\n# history_size = 5 (Default)\n# num_psis_draws = 1000 (Default)\n# num_paths = 4 (Default)\n# psis_resample = 1 (Default)\n# calculate_lp = 1 (Default)\n# save_single_paths = 0 (Default)\n# max_lbfgs_iters = 1000 (Default)\n# num_draws = 1000 (Default)\n# num_elbo_draws = 25 (Default)\n...\nNext is the column header line, followed the set of approximate draws. The Pathfinder algorithm first outputs lp_approx__, the log density in the approximating distribution, and lp__, the log density in the target distribution, followed by estimates of the model parameters, transformed parameters, and generated quantities.\nlp_approx__,lp__,theta\n-2.4973, -8.2951, 0.0811852\n-0.87445, -7.06526, 0.160207\n-0.812285, -7.07124, 0.35819\n...\nThe final lines are comment lines which give timing information.\n# Elapsed Time: 0.016000 seconds (Pathfinders)\n# 0.003000 seconds (PSIS)\n# 0.019000 seconds (Total)\nPathfinder provides option save_single_paths which will save output from the single-path Pathfinder runs.", + "crumbs": [ + "Running CmdStan", + "Pathfinder Method for Approximate Bayesian Inference" + ] + }, + { + "objectID": "cmdstan-guide/pathfinder_config.html#single-path-pathfinder-outputs", + "href": "cmdstan-guide/pathfinder_config.html#single-path-pathfinder-outputs", + "title": "Pathfinder Method for Approximate Bayesian Inference", + "section": "", + "text": "The boolean option save_single_paths is used to save both the draws and the ELBO iterations from the individual Pathfinder runs. When save_single_paths is true, the draws from each are saved to StanCSV files with the same format as the PSIS sample and the ELBO evaluations along the L-BFGS trajectory for each are saved as JSON. Given an output file name, CmdStan adds suffixes to the base filename to distinguish between the output files. For the default output file name output.csv and default number of runs (4), the resulting CSV files are\noutput.csv\noutput_path_1.csv\noutput_path_1.json\noutput_path_2.csv\noutput_path_2.json\noutput_path_3.csv\noutput_path_3.json\noutput_path_4.csv\noutput_path_4.json\nThe individual sample CSV files have the same structure as the PSIS sample CSV file. The JSON files contain information from each ELBO iteration.\nTo see how this works, we run Pathfinder on the centered-parameterization of the eight-schools model, where the posterior distribution has a funnel shape:\n> ./eight_schools pathfinder save_single_paths=true data file=eight_schools.data.json\nEach JSON file records the approximations to the target density at each point along the trajectory of the L-BFGS optimization algorithms.\n{\n \"0\": {\n \"iter\": 0,\n \"unconstrained_parameters\": [1.00595, -0.503687, 1.79367, 0.99083, 0.498077, -0.65816, 1.49176, -1.22647, 1.62911, 0.767445],\n \"grads\": [-0.868919, 0.45198, -0.107675, -0.0123304, 0.163172, 0.354362, -0.108746, 0.673306, -0.102268, -4.51445]\n },\n \"1\": {\n \"iter\": 1,\n \"unconstrained_parameters\": [1.00595, -0.503687, 1.79367, 0.99083, 0.498077, -0.65816, 1.49176, -1.22647, 1.62911, 0.767445],\n \"grads\": [-0.868919, 0.45198, -0.107675, -0.0123304, 0.163172, 0.354362, -0.108746, 0.673306, -0.102268, -4.51445],\n \"history_size\": 1,\n \"lbfgs_success\": true,\n \"pathfinder_success\": true,\n \"x_center\": [0.126047, -0.065048, 1.55708, 0.958509, 0.628075, -0.217041, 1.32032, -0.561338, 1.42988, 1.23213],\n \"logDetCholHk\": -2.6839,\n \"L_approx\": [[-0.0630456, -0.0187959], [0, 1.08328]],\n \"Qk\": [[-0.361073, 0.5624], [0.183922, -0.279474], [-0.0708175, 0.15715], [-0.00917823, 0.0215802], [0.0606019, -0.0814513], [0.164071, -0.285769], [-0.057723, 0.112428], [0.276376, -0.424348], [-0.0620524, 0.131786], [-0.846488, -0.531094]],\n \"alpha\": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n \"full\": false,\n \"lbfgs_note\": \"\"\n },\n ...,\n \"171\": {\n \"iter\": 171,\n \"unconstrained_parameters\": [1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, 1.60479, -35.7821],\n \"grads\": [2.66927e+15, -0.117312, -0.0639521, -2.66927e+15, -0.0445885, 0.0321579, 0.00499827, -0.163952, -0.032084, 6.4073],\n \"history_size\": 5,\n \"lbfgs_success\": true,\n \"pathfinder_success\": true,\n \"x_center\": [5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, 5.58876e+15, -2.02979e+17],\n \"logDetCholHk\": 299.023,\n \"L_approx\": [[4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, 4.6852e+06, -1.70162e+08], [0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 2.19511e+13, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 0, 2.19511e+13, -7.97244e+14], [0, 0, 0, 0, 0, 0, 0, 0, 0, 2.89552e+16]],\n \"Qk\": [],\n \"alpha\": [1.11027e-12, 2.24669e-12, 2.05603e-12, 3.71177e-12, 5.7855e-12, 1.80169e-12, 3.40291e-12, 2.29699e-12, 3.43423e-12, 1.25815e-08],\n \"full\": true,\n \"lbfgs_note\": \"\"\n },\n \"172\": {\n \"iter\": 172,\n \"unconstrained_parameters\": [1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, 1.60531, -35.801],\n \"grads\": [-0, -0.11731, -0.0639469, 0.0179895, -0.0445842, 0.0321643, 0.00500256, -0.163947, -0.0320824, 7],\n \"history_size\": 5,\n \"lbfgs_success\": false,\n \"pathfinder_success\": false,\n \"lbfgs_note\": \"\"\n }\n}\nOption num_paths=1 runs one single-path Pathfinder and the output CSV file contains the draws from that run without PSIS reweighting. The combination of arguments num_paths=1 save_single_paths=true creates just two output files, the CSV sample and the set of ELBO iterations. In this case, the default output file name is “output.csv” and the default diagnostic file name is “output.json”.", + "crumbs": [ + "Running CmdStan", + "Pathfinder Method for Approximate Bayesian Inference" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html", + "href": "cmdstan-guide/rdump_apdx.html", + "title": "RDump Format for CmdStan", + "section": "", + "text": "NOTE: Although the RDump format is still supported, I/O with JSON is faster and recommended. See the chapter on JSON for more details.\nRDump format can be used to represent values for Stan variables. This format was introduced in SPLUS and is used in R, JAGS, and in BUGS (but with a different ordering).\nA dump file is structured as a sequence of variable definitions. Each variable is defined in terms of its dimensionality and its values. There are three kinds of variable declarations: - scalars - sequences - general arrays\n\n\nDump files can be created from R using RStan, via the rstan package function stan_rdump. Stan RDump files must be created via stan_rdump and not by R’s native dump function because R’s dump function uses a richer syntax than is supported by the underlying Stan i/o libraries.\n\n\n\nA simple scalar value can be thought of as having an empty list of dimensions. Its declaration in the dump format follows the SPLUS assignment syntax. For example, the following would constitute a valid dump file defining a single scalar variable y with value \\(17.2\\):\ny <- 17.2\n\n\n\nOne-dimensional arrays may be specified directly using the SPLUS sequence notation. The following example defines an integer-value and a real-valued sequence.\nn <- c(1,2,3) y <- c(2.0,3.0,9.7)\nArrays are provided without a declaration of dimensionality because the reader just counts the number of entries to determine the size of the array.\nSequence variables may alternatively be represented with R’s colon-based notation. For instance, the first example above could equivalently be written as\nn <- 1:3\nThe sequence denoted by 1:3 is of length \\(3\\), running from \\(1\\) to \\(3\\) inclusive. The colon notation allows sequences going from high to low. The following are equivalent:\nn <- 2:-2\nn <- c(2,1,0,-1,-2)\nAs a special case, a sequence of zeros can also be represented in the dump format by integer(x) and double(x), for type int and double, respectively. Here x is a non-negative integer to specify the length. If x is \\(0\\), it can be omitted. The following are some examples.\nx1 <- integer()\nx2 <- integer(0)\nx3 <- integer(2)\ny1 <- double()\ny2 <- double(0)\ny3 <- double(2)\n\n\n\nFor more than one dimension, the dump format uses a dimensionality specification. For example, the following defines a \\(2 \\times 3\\) array:\ny <- structure(c(1,2,3,4,5,6), .Dim = c(2,3))\nData is stored column-major, thus the values for y will be:\ny[1, 1] = 1\ny[1, 2] = 3\ny[1, 3] = 5\ny[2, 1] = 2\ny[2, 2] = 4\ny[2, 3] = 6\nThe structure keyword just wraps a sequence of values and a dimensionality declaration, which is itself just a sequence of non-negative integer values. The product of the dimensions must equal the length of the array.\nIf the values happen to form a contiguous sequence of integers, they may be written with colon notation. Thus the example above is equivalent to the following.\ny <- structure(1:6, .Dim = c(2,3))\nSequence notation can be used within any call to the generic c() function in R. In the above example, c(2,3) could be written as c(2:3).\nThe generalization of column-major indexing is last-index major indexing. Arrays of more than two dimensions are written in a last-index major form. For example,\nz <- structure(1:24, .Dim = c(2,3,4))\nproduces a three-dimensional int (assignable to real) array z with values:\nz[1, 1, 1] = 1\nz[2, 1, 1] = 2\nz[1, 2, 1] = 3\nz[2, 2, 1] = 4\nz[1, 3, 1] = 5\nz[2, 3, 1] = 6\nz[1, 1, 2] = 7\nz[2, 1, 2] = 8\nz[1, 2, 2] = 9\nz[2, 2, 2] = 10\nz[1, 3, 2] = 11\nz[2, 3, 2] = 12\nz[1, 1, 3] = 13\nz[2, 1, 3] = 14\nz[1, 2, 3] = 15\nz[2, 2, 3] = 16\nz[1, 3, 3] = 17\nz[2, 3, 3] = 18\nz[1, 1, 4] = 19\nz[2, 1, 4] = 20\nz[1, 2, 4] = 21\nz[2, 2, 4] = 22\nz[1, 3, 4] = 23\nz[2, 3, 4] = 24\nIf the underlying 3-D array is stored as a 1-D array in last-index major format, the innermost array elements will be contiguous.\nThe sequence of values inside structure can also be integer(x) or double(x). In particular, if one or more dimensions is zero, integer() can be put inside structure. For instance, the following example is supported by the dump format.\ny <- structure(integer(), .Dim = c(2,0))\n\n\n\nThe dump format for matrices and vectors, including arrays of matrices and vectors, is the same as that for arrays of the same shape.\n\n\nThe following three declarations have the same dump format for their data.\narray[K] real a;\nvector[K] b;\nrow_vector[K] c;\n\n\n\nThe following declarations have the same dump format.\narray[M, N] real a;\nmatrix[M, N] b;\n\n\n\nThe key to understanding arrays is that the array indexing comes before any of the container indexing. That is, an array of vectors is just that: each array element is a vector. See the chapter on array and matrix types in the user’s guide section of the language manual for more information.\nFor the dump data format, the following declarations have the same arrangement.\narray[M, N] real a;\nmatrix[M, N] b;\narray[M] vector[N] c;\narray[M] row_vector[N] d;\nSimilarly, the following also have the same dump format.\narray[P, M, N] real a;\narray[P] matrix[M, N] b;\narray[P, M] vector[N] c;\narray[P, M] row_vector[N] d;\n\n\n\n\nAt this time, there is no support for complex number input through the R dump format. As an alternative, the JSON input format supports complex numbers.\n\n\n\nThere is no declaration in a dump file that distinguishes integer versus continuous values. If a value in a dump file’s definition of a variable contains a decimal point (e.g., \\(132.3\\)) or uses scientific notation (e.g., \\(1.323e2\\)), Stan assumes that the values are real.\nFor a single value, if there is no decimal point, it may be assigned to an int or real variable in Stan. An array value may only be assigned to an int array if there is no decimal point or scientific notation in any of the values. This convention is compatible with the way R writes data.\nThe following dump file declares an integer value for y.\ny <- 2\nThis definition can be used for a Stan variable y declared as real or as int. Assigning an integer value to a real variable automatically promotes the integer value to a real value.\nInteger values may optionally be followed by L or l, denoting long integer values. The following example, where the type is explicit, is equivalent to the above.\ny <- 2L\nThe following dump file provides a real value for y.\ny <- 2.0\nEven though this is a round value, the occurrence of the decimal point in the value, \\(2.0\\), causes Stan to infer that y is real valued. This dump file may only be used for variables y declared as real in Stan.\n\n\nNumbers written in scientific notation may only be used for real values in Stan. R will write out the integer one million as \\(1e+06\\).\n\n\n\nStan’s reader supports infinite and not-a-number values for scalar quantities (see the section of the reference manual section of the language manual for more information on Stan’s numerical data types). Both infinite and not-a-number values are supported by Stan’s dump-format readers.\n\n\n\nValue\nPreferred Form\nAlternative Forms\n\n\n\n\npositive infinity\nInf\nInfinity, infinity\n\n\nnegative infinity\n-Inf\n-Infinity, -infinity\n\n\nnot a number\nNaN\n\n\n\n\nThese strings are not case sensitive, so inf may also be used for positive infinity, or NAN for not-a-number.\n\n\n\n\nIn order to support JAGS data files, variables may be double quoted. For instance, the following definition is legal in a dump file.\n\"y\" <- c(1,2,3) \\end{Verbatim}\n\n\n\nThe line breaks in a dump file are required to be consistent with the way R reads in data. Both of the following declarations are legal.\ny <- 2\ny <-\n3\nAlso following R, breaking before the assignment arrow are not allowed, so the following is invalid.\ny\n<- 2 # Syntax Error\nLines may also be broken in the middle of sequences declared using the c(...) notation., as well as between the comma following a sequence definition and the dimensionality declaration. For example, the following declaration of a \\(2 \\times 2 \\times 3\\) array is valid.\ny <-\nstructure(c(1,2,3,\n4,5,6,7,8,9,10,11,\n12), .Dim = c(2,2,\n3))\nBecause there are no decimal points in the values, the resulting dump file may be used for three-dimensional array variables declared as int or real.\n\n\n\nA more precise definition of the dump data format is provided by the following (mildly templated) Backus-Naur form grammar.\ndefinition ::= name <- value optional_semicolon\n\nname ::= char* | ''' char* ''' | '\"' char* '\"'\n\nvalue ::= value<int> | value<double>\n\nvalue<T> ::= T | seq<T> | zero_array<T> |\n'structure' '(' seq<T> ',' \".Dim\" '=' seq<int> ')' | 'structure'\n'(' zero_array<T> ',' \".Dim\" '=' seq<int> ')'\n\nseq<int> ::= int ':' int | cseq<int>\n\nzero_array<int> ::= \"integer\" '(' <non-negative int>? ')'\n\nzero_array<real> ::= \"double\" '(' <non-negative int>? ')'\n\nseq<real> ::= cseq<real>\n\ncseq<T> ::= 'c' '(' vseq<T> ')'\n\nvseq<T> ::= T | T ',' vseq<T>\nThe template parameters T will be set to either int or real. Because Stan allows promotion of integer values to real values, an integer sequence specification in the dump data format may be assigned to either an integer- or real-based variable in Stan.", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#creating-dump-files", + "href": "cmdstan-guide/rdump_apdx.html#creating-dump-files", + "title": "RDump Format for CmdStan", + "section": "", + "text": "Dump files can be created from R using RStan, via the rstan package function stan_rdump. Stan RDump files must be created via stan_rdump and not by R’s native dump function because R’s dump function uses a richer syntax than is supported by the underlying Stan i/o libraries.", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#scalar-variables", + "href": "cmdstan-guide/rdump_apdx.html#scalar-variables", + "title": "RDump Format for CmdStan", + "section": "", + "text": "A simple scalar value can be thought of as having an empty list of dimensions. Its declaration in the dump format follows the SPLUS assignment syntax. For example, the following would constitute a valid dump file defining a single scalar variable y with value \\(17.2\\):\ny <- 17.2", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#sequence-variables", + "href": "cmdstan-guide/rdump_apdx.html#sequence-variables", + "title": "RDump Format for CmdStan", + "section": "", + "text": "One-dimensional arrays may be specified directly using the SPLUS sequence notation. The following example defines an integer-value and a real-valued sequence.\nn <- c(1,2,3) y <- c(2.0,3.0,9.7)\nArrays are provided without a declaration of dimensionality because the reader just counts the number of entries to determine the size of the array.\nSequence variables may alternatively be represented with R’s colon-based notation. For instance, the first example above could equivalently be written as\nn <- 1:3\nThe sequence denoted by 1:3 is of length \\(3\\), running from \\(1\\) to \\(3\\) inclusive. The colon notation allows sequences going from high to low. The following are equivalent:\nn <- 2:-2\nn <- c(2,1,0,-1,-2)\nAs a special case, a sequence of zeros can also be represented in the dump format by integer(x) and double(x), for type int and double, respectively. Here x is a non-negative integer to specify the length. If x is \\(0\\), it can be omitted. The following are some examples.\nx1 <- integer()\nx2 <- integer(0)\nx3 <- integer(2)\ny1 <- double()\ny2 <- double(0)\ny3 <- double(2)", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#array-variables", + "href": "cmdstan-guide/rdump_apdx.html#array-variables", + "title": "RDump Format for CmdStan", + "section": "", + "text": "For more than one dimension, the dump format uses a dimensionality specification. For example, the following defines a \\(2 \\times 3\\) array:\ny <- structure(c(1,2,3,4,5,6), .Dim = c(2,3))\nData is stored column-major, thus the values for y will be:\ny[1, 1] = 1\ny[1, 2] = 3\ny[1, 3] = 5\ny[2, 1] = 2\ny[2, 2] = 4\ny[2, 3] = 6\nThe structure keyword just wraps a sequence of values and a dimensionality declaration, which is itself just a sequence of non-negative integer values. The product of the dimensions must equal the length of the array.\nIf the values happen to form a contiguous sequence of integers, they may be written with colon notation. Thus the example above is equivalent to the following.\ny <- structure(1:6, .Dim = c(2,3))\nSequence notation can be used within any call to the generic c() function in R. In the above example, c(2,3) could be written as c(2:3).\nThe generalization of column-major indexing is last-index major indexing. Arrays of more than two dimensions are written in a last-index major form. For example,\nz <- structure(1:24, .Dim = c(2,3,4))\nproduces a three-dimensional int (assignable to real) array z with values:\nz[1, 1, 1] = 1\nz[2, 1, 1] = 2\nz[1, 2, 1] = 3\nz[2, 2, 1] = 4\nz[1, 3, 1] = 5\nz[2, 3, 1] = 6\nz[1, 1, 2] = 7\nz[2, 1, 2] = 8\nz[1, 2, 2] = 9\nz[2, 2, 2] = 10\nz[1, 3, 2] = 11\nz[2, 3, 2] = 12\nz[1, 1, 3] = 13\nz[2, 1, 3] = 14\nz[1, 2, 3] = 15\nz[2, 2, 3] = 16\nz[1, 3, 3] = 17\nz[2, 3, 3] = 18\nz[1, 1, 4] = 19\nz[2, 1, 4] = 20\nz[1, 2, 4] = 21\nz[2, 2, 4] = 22\nz[1, 3, 4] = 23\nz[2, 3, 4] = 24\nIf the underlying 3-D array is stored as a 1-D array in last-index major format, the innermost array elements will be contiguous.\nThe sequence of values inside structure can also be integer(x) or double(x). In particular, if one or more dimensions is zero, integer() can be put inside structure. For instance, the following example is supported by the dump format.\ny <- structure(integer(), .Dim = c(2,0))", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#matrix--and-vector-valued-variables", + "href": "cmdstan-guide/rdump_apdx.html#matrix--and-vector-valued-variables", + "title": "RDump Format for CmdStan", + "section": "", + "text": "The dump format for matrices and vectors, including arrays of matrices and vectors, is the same as that for arrays of the same shape.\n\n\nThe following three declarations have the same dump format for their data.\narray[K] real a;\nvector[K] b;\nrow_vector[K] c;\n\n\n\nThe following declarations have the same dump format.\narray[M, N] real a;\nmatrix[M, N] b;\n\n\n\nThe key to understanding arrays is that the array indexing comes before any of the container indexing. That is, an array of vectors is just that: each array element is a vector. See the chapter on array and matrix types in the user’s guide section of the language manual for more information.\nFor the dump data format, the following declarations have the same arrangement.\narray[M, N] real a;\nmatrix[M, N] b;\narray[M] vector[N] c;\narray[M] row_vector[N] d;\nSimilarly, the following also have the same dump format.\narray[P, M, N] real a;\narray[P] matrix[M, N] b;\narray[P, M] vector[N] c;\narray[P, M] row_vector[N] d;", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#complex-valued-variables", + "href": "cmdstan-guide/rdump_apdx.html#complex-valued-variables", + "title": "RDump Format for CmdStan", + "section": "", + "text": "At this time, there is no support for complex number input through the R dump format. As an alternative, the JSON input format supports complex numbers.", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#integer--and-real-valued-variables", + "href": "cmdstan-guide/rdump_apdx.html#integer--and-real-valued-variables", + "title": "RDump Format for CmdStan", + "section": "", + "text": "There is no declaration in a dump file that distinguishes integer versus continuous values. If a value in a dump file’s definition of a variable contains a decimal point (e.g., \\(132.3\\)) or uses scientific notation (e.g., \\(1.323e2\\)), Stan assumes that the values are real.\nFor a single value, if there is no decimal point, it may be assigned to an int or real variable in Stan. An array value may only be assigned to an int array if there is no decimal point or scientific notation in any of the values. This convention is compatible with the way R writes data.\nThe following dump file declares an integer value for y.\ny <- 2\nThis definition can be used for a Stan variable y declared as real or as int. Assigning an integer value to a real variable automatically promotes the integer value to a real value.\nInteger values may optionally be followed by L or l, denoting long integer values. The following example, where the type is explicit, is equivalent to the above.\ny <- 2L\nThe following dump file provides a real value for y.\ny <- 2.0\nEven though this is a round value, the occurrence of the decimal point in the value, \\(2.0\\), causes Stan to infer that y is real valued. This dump file may only be used for variables y declared as real in Stan.\n\n\nNumbers written in scientific notation may only be used for real values in Stan. R will write out the integer one million as \\(1e+06\\).\n\n\n\nStan’s reader supports infinite and not-a-number values for scalar quantities (see the section of the reference manual section of the language manual for more information on Stan’s numerical data types). Both infinite and not-a-number values are supported by Stan’s dump-format readers.\n\n\n\nValue\nPreferred Form\nAlternative Forms\n\n\n\n\npositive infinity\nInf\nInfinity, infinity\n\n\nnegative infinity\n-Inf\n-Infinity, -infinity\n\n\nnot a number\nNaN\n\n\n\n\nThese strings are not case sensitive, so inf may also be used for positive infinity, or NAN for not-a-number.", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#quoted-variable-names", + "href": "cmdstan-guide/rdump_apdx.html#quoted-variable-names", + "title": "RDump Format for CmdStan", + "section": "", + "text": "In order to support JAGS data files, variables may be double quoted. For instance, the following definition is legal in a dump file.\n\"y\" <- c(1,2,3) \\end{Verbatim}", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#line-breaks", + "href": "cmdstan-guide/rdump_apdx.html#line-breaks", + "title": "RDump Format for CmdStan", + "section": "", + "text": "The line breaks in a dump file are required to be consistent with the way R reads in data. Both of the following declarations are legal.\ny <- 2\ny <-\n3\nAlso following R, breaking before the assignment arrow are not allowed, so the following is invalid.\ny\n<- 2 # Syntax Error\nLines may also be broken in the middle of sequences declared using the c(...) notation., as well as between the comma following a sequence definition and the dimensionality declaration. For example, the following declaration of a \\(2 \\times 2 \\times 3\\) array is valid.\ny <-\nstructure(c(1,2,3,\n4,5,6,7,8,9,10,11,\n12), .Dim = c(2,2,\n3))\nBecause there are no decimal points in the values, the resulting dump file may be used for three-dimensional array variables declared as int or real.", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/rdump_apdx.html#bnf-grammar-for-dump-data", + "href": "cmdstan-guide/rdump_apdx.html#bnf-grammar-for-dump-data", + "title": "RDump Format for CmdStan", + "section": "", + "text": "A more precise definition of the dump data format is provided by the following (mildly templated) Backus-Naur form grammar.\ndefinition ::= name <- value optional_semicolon\n\nname ::= char* | ''' char* ''' | '\"' char* '\"'\n\nvalue ::= value<int> | value<double>\n\nvalue<T> ::= T | seq<T> | zero_array<T> |\n'structure' '(' seq<T> ',' \".Dim\" '=' seq<int> ')' | 'structure'\n'(' zero_array<T> ',' \".Dim\" '=' seq<int> ')'\n\nseq<int> ::= int ':' int | cseq<int>\n\nzero_array<int> ::= \"integer\" '(' <non-negative int>? ')'\n\nzero_array<real> ::= \"double\" '(' <non-negative int>? ')'\n\nseq<real> ::= cseq<real>\n\ncseq<T> ::= 'c' '(' vseq<T> ')'\n\nvseq<T> ::= T | T ',' vseq<T>\nThe template parameters T will be set to either int or real. Because Stan allows promotion of integer values to real values, an integer sequence specification in the dump data format may be assigned to either an integer- or real-based variable in Stan.", + "crumbs": [ + "Appendices", + "RDump Format for CmdStan" + ] + }, + { + "objectID": "cmdstan-guide/stanc.html", + "href": "cmdstan-guide/stanc.html", + "title": "stanc: Translating Stan to C++", + "section": "", + "text": "CmdStan translates Stan programs to C++ using the Stan compiler program which is included in the CmdStan release bin directory as program stanc. One can view the complete stanc documentation in the Stan User’s Guide.\nAs of release 2.22, the CmdStan Stan to C++ compiler is written in OCaml. This compiler is called “stanc3” and has has its own repository https://github.com/stan-dev/stanc3, from which pre-built binaries for Linux, Mac, and Windows can be downloaded.\n\n\nBefore the Stan compiler can be used, the binary stanc must be created. This can be done using the makefile as follows. For Mac and Linux:\nmake bin/stanc\nFor Windows:\nmake bin/stanc.exe\nThis is also done as part of the make build command.\n\n\n\nThe Stan compiler program stanc converts Stan programs to C++ concepts. If the compiler encounters syntax errors in the program, it will provide an error message indicating the location in the input where the failure occurred and reason for the failure. The following example illustrates a fully qualified call to stanc to generate the C++ translation of the example model bernoulli.stan. For Linux and Mac:\n> cd <cmdstan-home>\n> bin/stanc --o=bernoulli.hpp examples/bernoulli/bernoulli.stan\nFor Windows:\n> cd <cmdstan-home>\n> bin/stanc.exe --o=bernoulli.hpp examples/bernoulli/bernoulli.stan\nThe base name of the Stan program file determines the name of the C++ model class. Because this name is the name of a C++ class, it must start with an alphabetic character (a--z or A--Z) and contain only alphanumeric characters (a--z, A--Z, and 0--9) and underscores (_) and should not conflict with any C++ reserved keyword.\nThe C++ code implementing the class is written to the file bernoulli.hpp in the current directory. The final argument, bernoulli.stan, is the file from which to read the Stan program.\nIn practice, stanc is invoked indirectly, via the GNU Make utility, which contains rules that compile a Stan program to its corresponding executable. To build the simple Bernoulli model via make, we specify the name of the target executable file. On Mac and Linux, this is the name of the Stan program with the .stan omitted. On Windows, replace .stan with .exe, and make sure that the path is given with slashes and not backslashes. For Linux and Mac:\n> make examples/bernoulli/bernoulli\nFor Windows:\n> make examples/bernoulli/bernoulli.exe\nThe makefile rules first invoke the stanc compiler to translate the Stan model to C++ , then compiles and links the C++ code to a binary executable. The makefile variable STANCFLAGS can be used to to override the default arguments to stanc, e.g.,\n> make STANCFLAGS=\"--include-paths=~/foo\" examples/bernoulli/bernoulli", + "crumbs": [ + "Tools and Utilities", + "`stanc`: Translating Stan to C++" + ] + }, + { + "objectID": "cmdstan-guide/stanc.html#instantiating-the-stanc-binary", + "href": "cmdstan-guide/stanc.html#instantiating-the-stanc-binary", + "title": "stanc: Translating Stan to C++", + "section": "", + "text": "Before the Stan compiler can be used, the binary stanc must be created. This can be done using the makefile as follows. For Mac and Linux:\nmake bin/stanc\nFor Windows:\nmake bin/stanc.exe\nThis is also done as part of the make build command.", + "crumbs": [ + "Tools and Utilities", + "`stanc`: Translating Stan to C++" + ] + }, + { + "objectID": "cmdstan-guide/stanc.html#the-stan-compiler-program", + "href": "cmdstan-guide/stanc.html#the-stan-compiler-program", + "title": "stanc: Translating Stan to C++", + "section": "", + "text": "The Stan compiler program stanc converts Stan programs to C++ concepts. If the compiler encounters syntax errors in the program, it will provide an error message indicating the location in the input where the failure occurred and reason for the failure. The following example illustrates a fully qualified call to stanc to generate the C++ translation of the example model bernoulli.stan. For Linux and Mac:\n> cd <cmdstan-home>\n> bin/stanc --o=bernoulli.hpp examples/bernoulli/bernoulli.stan\nFor Windows:\n> cd <cmdstan-home>\n> bin/stanc.exe --o=bernoulli.hpp examples/bernoulli/bernoulli.stan\nThe base name of the Stan program file determines the name of the C++ model class. Because this name is the name of a C++ class, it must start with an alphabetic character (a--z or A--Z) and contain only alphanumeric characters (a--z, A--Z, and 0--9) and underscores (_) and should not conflict with any C++ reserved keyword.\nThe C++ code implementing the class is written to the file bernoulli.hpp in the current directory. The final argument, bernoulli.stan, is the file from which to read the Stan program.\nIn practice, stanc is invoked indirectly, via the GNU Make utility, which contains rules that compile a Stan program to its corresponding executable. To build the simple Bernoulli model via make, we specify the name of the target executable file. On Mac and Linux, this is the name of the Stan program with the .stan omitted. On Windows, replace .stan with .exe, and make sure that the path is given with slashes and not backslashes. For Linux and Mac:\n> make examples/bernoulli/bernoulli\nFor Windows:\n> make examples/bernoulli/bernoulli.exe\nThe makefile rules first invoke the stanc compiler to translate the Stan model to C++ , then compiles and links the C++ code to a binary executable. The makefile variable STANCFLAGS can be used to to override the default arguments to stanc, e.g.,\n> make STANCFLAGS=\"--include-paths=~/foo\" examples/bernoulli/bernoulli", + "crumbs": [ + "Tools and Utilities", + "`stanc`: Translating Stan to C++" + ] + }, + { + "objectID": "cmdstan-guide/variational_config.html", + "href": "cmdstan-guide/variational_config.html", + "title": "Variational Inference using ADVI", + "section": "", + "text": "Stan implements an automatic variational inference algorithm, called Automatic Differentiation Variational Inference (ADVI) Kucukelbir et al. (2017). ADVI uses Monte Carlo integration to approximate the variational objective function, the ELBO (evidence lower bound). ADVI optimizes the ELBO in the real-coordinate space using stochastic gradient ascent. The measures of convergence are similar to the relative tolerance scheme of Stan’s optimization algorithms.\nThe algorithm progression consists of an adaptation phase followed by a sampling phase. The adaptation phase finds a good value for the step size scaling parameter eta. The evidence lower bound (ELBO) is the variational objective function and is evaluated based on a Monte Carlo estimate. The variational inference algorithm in Stan is stochastic, which makes it challenging to assess convergence. The algorithm runs until the mean change in ELBO drops below the specified tolerance.\nThe full set of configuration options available for the variational method is available by using the variational help-all subcommand. The arguments with their requested values or defaults are also reported at the beginning of the algorithm’s console output and in the output CSV file’s comments.\nThe following is a minimal call to Stan’s variational inference algorithm using defaults for everything but the location of the data file.\n> ./bernoulli variational data file=bernoulli.data.R\nExecuting this command prints both output to the console and to a csv file.\nThe first part of the console output reports on the configuration used: the default option algorithm=meanfield and the default tolerances for monitoring the algorithm’s convergence.\nmethod = variational\n variational\n algorithm = meanfield (Default)\n meanfield\n iter = 10000 (Default)\n grad_samples = 1 (Default)\n elbo_samples = 100 (Default)\n eta = 1 (Default)\n adapt\n engaged = true (Default)\n iter = 50 (Default)\n tol_rel_obj = 0.01 (Default)\n eval_elbo = 100 (Default)\n output_samples = 1000 (Default)\nid = 1 (Default)\ndata\n file = bernoulli.data.json\ninit = 2 (Default)\nrandom\n seed = 2790599354 (Default)\noutput\n file = output.csv (Default)\n diagnostic_file = (Default)\n refresh = 100 (Default)\n sig_figs = 8 (Default)\n profile_file = profile.csv (Default)\n save_cmdstan_config = false (Default)\nnum_threads = 1 (Default)\nAfter the configuration has been displayed, informational and timing messages are output:\n------------------------------------------------------------\nEXPERIMENTAL ALGORITHM:\n This procedure has not been thoroughly tested and may be unstable\n or buggy. The interface is subject to change.\n------------------------------------------------------------\n\nGradient evaluation took 2.1e-05 seconds\n1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.\nAdjust your expectations accordingly!\nThe rest of the output describes the progression of the algorithm. An adaptation phase finds a good value for the step size scaling parameter eta. The evidence lower bound (ELBO) is the variational objective function and is evaluated based on a Monte Carlo estimate. The variational inference algorithm in Stan is stochastic, which makes it challenging to assess convergence. That is, while the algorithm appears to have converged in \\(\\sim\\) 250 iterations, the algorithm runs for another few thousand iterations until mean change in ELBO drops below the default tolerance of 0.01.\nBegin eta adaptation.\nIteration: 1 / 250 [ 0%] (Adaptation)\nIteration: 50 / 250 [ 20%] (Adaptation)\nIteration: 100 / 250 [ 40%] (Adaptation)\nIteration: 150 / 250 [ 60%] (Adaptation)\nIteration: 200 / 250 [ 80%] (Adaptation)\nSuccess! Found best value [eta = 1] earlier than expected.\n\nBegin stochastic gradient ascent.\n iter ELBO delta_ELBO_mean delta_ELBO_med notes\n 100 -6.131 1.000 1.000\n 200 -6.458 0.525 1.000\n 300 -6.300 0.359 0.051\n 400 -6.137 0.276 0.051\n 500 -6.243 0.224 0.027\n 600 -6.305 0.188 0.027\n 700 -6.289 0.162 0.025\n 800 -6.402 0.144 0.025\n 900 -6.103 0.133 0.025\n 1000 -6.314 0.123 0.027\n 1100 -6.348 0.024 0.025\n 1200 -6.244 0.020 0.018\n 1300 -6.293 0.019 0.017\n 1400 -6.250 0.017 0.017\n 1500 -6.241 0.015 0.010 MEDIAN ELBO CONVERGED\n\nDrawing a sample of size 1000 from the approximate posterior...\nCOMPLETED.\n\n\nStan implements two variational algorithms. They differ in the approximating distribution used in the unconstrained variable space. By default, ADVI uses option algorithm=meanfield. The algorithm argument specifies the variational algorithm.\n\nalgorithm=meanfield - Use a fully factorized Gaussian for the approximation. This is the default algorithm.\nalgorithm=fullrank Use a Gaussian with a full-rank covariance matrix for the approximation.\n\n\n\n\n\niter=<int> Maximum number of iterations. Must be \\(> 0\\). Default is \\(10000\\).\ngrad_samples=<int> Number of samples for Monte Carlo estimate of gradients. Must be \\(> 0\\). Default is \\(1\\).\nelbo_samples=<int> Number of samples for Monte Carlo estimate of ELBO (objective function). Must be \\(> 0\\). Default is \\(100\\).\neta=<double> Stepsize weighting parameter for adaptive stepsize sequence. Must be \\(> 0\\). Default is \\(1.0\\).\nadapt Warmup Adaptation keyword, takes sub-arguments:\n\nengaged=<boolean> Adaptation engaged? Valid values: [true, false]. Default is true.\niter=<int> Maximum number of adaptation iterations. Must be \\(> 0\\). Default is \\(50\\).\n\ntol_rel_obj=<double> Convergence tolerance on the relative norm of the objective. Must be \\(> 0\\). Default is \\(0.01\\).\neval_elbo=<int> Evaluate ELBO every Nth iteration. Must be \\(> 0\\). Default is 100.\noutput_samples=<int> Number of posterior samples to draw and save. Must be \\(> 0\\). Default is 1000.\n\n\n\n\nThe output file consists of the following pieces of information:\n\nThe full set of configuration options available for the variational method is reported at the beginning of the sampler output file as CSV comments.\nThe first three output columns are labelled lp__, log_p__, log_g__, the rest are the model parameters.\nThe stepsize adaptation information is output as CSV comments following column header row.\nThe following line contains the mean of the variational approximation.\nThe rest of the output contains output_samples number of draws sampled from the variational approximation.\n\nTo illustrate, we call Stan’s variational inference on the example model and data:\n> ./bernoulli variational data file=bernoulli.data.R\nBy default, the output file is output.csv.\nThe output follows the same pattern as the output for sampling, first dumping the entire set of parameters used as CSV comments:\n# stan_version_major = 2\n# stan_version_minor = 23\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = variational\n# variational\n# algorithm = meanfield (Default)\n# meanfield\n# iter = 10000 (Default)\n# grad_samples = 1 (Default)\n# elbo_samples = 100 (Default)\n# eta = 1 (Default)\n# adapt\n# engaged = true (Default)\n# iter = 50 (Default)\n# tol_rel_obj = 0.01 (Default)\n# eval_elbo = 100 (Default)\n# output_samples = 1000 (Default)\n...\nNext, the column header row:\nlp__,log_p__,log_g__,theta\nAdditional comments provide stepsize adaptation information:\n# Stepsize adaptation complete.\n# eta = 1\nFollowed by the data rows. The first line is special — it is the mean of the variational approximation.\n0,0,0,0.214911\nThat is, the estimate for theta given the data is 0.2.\nThe rest of the output contains output_samples number of draws samples from the variational approximation.\nThe following is a sample based on this approximation:\n0,-14.0252,-5.21718,0.770397\n0,-7.05063,-0.10025,0.162061\n0,-6.75031,-0.0191099,0.241606\n...\nThe header indicates the unnormalized log probability with lp__. This is a legacy feature that we do not use for variational inference. The ELBO is not stored unless a diagnostic option is given.", + "crumbs": [ + "Running CmdStan", + "Variational Inference using ADVI" + ] + }, + { + "objectID": "cmdstan-guide/variational_config.html#variational-algorithms", + "href": "cmdstan-guide/variational_config.html#variational-algorithms", + "title": "Variational Inference using ADVI", + "section": "", + "text": "Stan implements two variational algorithms. They differ in the approximating distribution used in the unconstrained variable space. By default, ADVI uses option algorithm=meanfield. The algorithm argument specifies the variational algorithm.\n\nalgorithm=meanfield - Use a fully factorized Gaussian for the approximation. This is the default algorithm.\nalgorithm=fullrank Use a Gaussian with a full-rank covariance matrix for the approximation.", + "crumbs": [ + "Running CmdStan", + "Variational Inference using ADVI" + ] + }, + { + "objectID": "cmdstan-guide/variational_config.html#configuration", + "href": "cmdstan-guide/variational_config.html#configuration", + "title": "Variational Inference using ADVI", + "section": "", + "text": "iter=<int> Maximum number of iterations. Must be \\(> 0\\). Default is \\(10000\\).\ngrad_samples=<int> Number of samples for Monte Carlo estimate of gradients. Must be \\(> 0\\). Default is \\(1\\).\nelbo_samples=<int> Number of samples for Monte Carlo estimate of ELBO (objective function). Must be \\(> 0\\). Default is \\(100\\).\neta=<double> Stepsize weighting parameter for adaptive stepsize sequence. Must be \\(> 0\\). Default is \\(1.0\\).\nadapt Warmup Adaptation keyword, takes sub-arguments:\n\nengaged=<boolean> Adaptation engaged? Valid values: [true, false]. Default is true.\niter=<int> Maximum number of adaptation iterations. Must be \\(> 0\\). Default is \\(50\\).\n\ntol_rel_obj=<double> Convergence tolerance on the relative norm of the objective. Must be \\(> 0\\). Default is \\(0.01\\).\neval_elbo=<int> Evaluate ELBO every Nth iteration. Must be \\(> 0\\). Default is 100.\noutput_samples=<int> Number of posterior samples to draw and save. Must be \\(> 0\\). Default is 1000.", + "crumbs": [ + "Running CmdStan", + "Variational Inference using ADVI" + ] + }, + { + "objectID": "cmdstan-guide/variational_config.html#csv-output", + "href": "cmdstan-guide/variational_config.html#csv-output", + "title": "Variational Inference using ADVI", + "section": "", + "text": "The output file consists of the following pieces of information:\n\nThe full set of configuration options available for the variational method is reported at the beginning of the sampler output file as CSV comments.\nThe first three output columns are labelled lp__, log_p__, log_g__, the rest are the model parameters.\nThe stepsize adaptation information is output as CSV comments following column header row.\nThe following line contains the mean of the variational approximation.\nThe rest of the output contains output_samples number of draws sampled from the variational approximation.\n\nTo illustrate, we call Stan’s variational inference on the example model and data:\n> ./bernoulli variational data file=bernoulli.data.R\nBy default, the output file is output.csv.\nThe output follows the same pattern as the output for sampling, first dumping the entire set of parameters used as CSV comments:\n# stan_version_major = 2\n# stan_version_minor = 23\n# stan_version_patch = 0\n# model = bernoulli_model\n# method = variational\n# variational\n# algorithm = meanfield (Default)\n# meanfield\n# iter = 10000 (Default)\n# grad_samples = 1 (Default)\n# elbo_samples = 100 (Default)\n# eta = 1 (Default)\n# adapt\n# engaged = true (Default)\n# iter = 50 (Default)\n# tol_rel_obj = 0.01 (Default)\n# eval_elbo = 100 (Default)\n# output_samples = 1000 (Default)\n...\nNext, the column header row:\nlp__,log_p__,log_g__,theta\nAdditional comments provide stepsize adaptation information:\n# Stepsize adaptation complete.\n# eta = 1\nFollowed by the data rows. The first line is special — it is the mean of the variational approximation.\n0,0,0,0.214911\nThat is, the estimate for theta given the data is 0.2.\nThe rest of the output contains output_samples number of draws samples from the variational approximation.\nThe following is a sample based on this approximation:\n0,-14.0252,-5.21718,0.770397\n0,-7.05063,-0.10025,0.162061\n0,-6.75031,-0.0191099,0.241606\n...\nThe header indicates the unnormalized log probability with lp__. This is a legacy feature that we do not use for variational inference. The ELBO is not stored unless a diagnostic option is given.", + "crumbs": [ + "Running CmdStan", + "Variational Inference using ADVI" + ] + }, + { + "objectID": "functions-reference/binary_distributions.html", + "href": "functions-reference/binary_distributions.html", + "title": "Binary Distributions", + "section": "", + "text": "Binary probability distributions have support on \\(\\{0,1\\}\\), where 1 represents the value true and 0 the value false.\n\n\n\n\nIf \\(\\theta \\in [0,1]\\), then for \\(y \\in \\{0,1\\}\\), \\[\\begin{equation*}\n\\text{Bernoulli}(y~|~\\theta) = \\left\\{ \\begin{array}{ll} \\theta &\n\\text{if } y = 1, \\text{ and} \\\\ 1 - \\theta & \\text{if } y = 0.\n\\end{array} \\right.\n\\end{equation*}\\]\n\n\n\ny ~ bernoulli(theta)\nIncrement target log probability density with bernoulli_lupmf(y | theta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal bernoulli_lpmf(ints y | reals theta) The log Bernoulli probability mass of y given chance of success theta\nAvailable since 2.12\n \n\nreal bernoulli_lupmf(ints y | reals theta) The log Bernoulli probability mass of y given chance of success theta dropping constant additive terms\nAvailable since 2.25\n \n\nreal bernoulli_cdf(ints y | reals theta) The Bernoulli cumulative distribution function of y given chance of success theta\nAvailable since 2.0\n \n\nreal bernoulli_lcdf(ints y | reals theta) The log of the Bernoulli cumulative distribution function of y given chance of success theta\nAvailable since 2.12\n \n\nreal bernoulli_lccdf(ints y | reals theta) The log of the Bernoulli complementary cumulative distribution function of y given chance of success theta\nAvailable since 2.12\n \n\nints bernoulli_rng(reals theta) Generate a Bernoulli variate with chance of success theta or an array of Bernoulli variates given an array of thetas of the same dimensions; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\nStan also supplies a direct parameterization in terms of a logit-transformed chance-of-success parameter. This parameterization is more numerically stable if the chance-of-success parameter is on the logit scale, as with the linear predictor in a logistic regression.\n\n\nIf \\(\\alpha \\in \\mathbb{R}\\), then for \\(y \\in \\{0,1\\}\\), \\[\\begin{equation*}\n\\text{BernoulliLogit}(y~|~\\alpha) = \\text{Bernoulli}(y |\n\\text{logit}^{-1}(\\alpha)) = \\left\\{ \\begin{array}{ll}\n\\text{logit}^{-1}(\\alpha) & \\text{if } y = 1, \\text{ and} \\\\ 1 -\n\\text{logit}^{-1}(\\alpha) & \\text{if } y = 0. \\end{array} \\right.\n\\end{equation*}\\]\n\n\n\ny ~ bernoulli_logit(alpha)\nIncrement target log probability density with bernoulli_logit_lupmf(y | alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal bernoulli_logit_lpmf(ints y | reals alpha) The log Bernoulli probability mass of y given chance of success inv_logit(alpha)\nAvailable since 2.12\n \n\nreal bernoulli_logit_lupmf(ints y | reals alpha) The log Bernoulli probability mass of y given chance of success inv_logit(alpha) dropping constant additive terms\nAvailable since 2.25\n \n\nR bernoulli_logit_rng(reals alpha) Generate a Bernoulli variate with chance of success \\(\\text{logit}^{-1}(\\alpha)\\); may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\nStan also supplies a single function for a generalized linear model with Bernoulli distribution and logit link function, i.e. a function for a logistic regression. This provides a more efficient implementation of logistic regression than a manually written regression in terms of a Bernoulli distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m\\), then for \\(y \\in {\\{0,1\\}}^n\\), \\[\\begin{align*}\n&\\text{BernoulliLogitGLM}(y~|~x, \\alpha, \\beta) = \\prod_{1\\leq i \\leq\nn}\\text{Bernoulli}(y_i~|~\\text{logit}^{-1}(\\alpha_i + x_i\\cdot\n\\beta))\\\\ &= \\prod_{1\\leq i \\leq n} \\left\\{ \\begin{array}{ll}\n\\text{logit}^{-1}(\\alpha_i + \\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j)\n& \\text{if } y_i = 1, \\text{ and} \\\\ 1 - \\text{logit}^{-1}(\\alpha_i +\n\\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j) & \\text{if } y_i = 0.\n\\end{array} \\right. \\end{align*}\\]\n\n\n\ny ~ bernoulli_logit_glm(x, alpha, beta)\nIncrement target log probability density with bernoulli_logit_glm_lupmf(y | x, alpha, beta).\nAvailable since 2.25\n \n\n\n\n\n \n\nreal bernoulli_logit_glm_lpmf(int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.18\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.18\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\narray[] int bernoulli_logit_glm_rng(matrix x, vector alpha, vector beta) Generate an array of Bernoulli variates with chances of success inv_logit(alpha + x * beta); may only be used in transformed data and generated quantities blocks.\nAvailable since 2.29\n \n\narray[] int bernoulli_logit_glm_rng(row_vector x, vector alpha, vector beta) Generate an array of Bernoulli variates with chances of success inv_logit(alpha + x * beta); may only be used in transformed data and generated quantities blocks.\nAvailable since 2.29", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Binary Distributions" + ] + }, + { + "objectID": "functions-reference/binary_distributions.html#bernoulli-distribution", + "href": "functions-reference/binary_distributions.html#bernoulli-distribution", + "title": "Binary Distributions", + "section": "", + "text": "If \\(\\theta \\in [0,1]\\), then for \\(y \\in \\{0,1\\}\\), \\[\\begin{equation*}\n\\text{Bernoulli}(y~|~\\theta) = \\left\\{ \\begin{array}{ll} \\theta &\n\\text{if } y = 1, \\text{ and} \\\\ 1 - \\theta & \\text{if } y = 0.\n\\end{array} \\right.\n\\end{equation*}\\]\n\n\n\ny ~ bernoulli(theta)\nIncrement target log probability density with bernoulli_lupmf(y | theta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal bernoulli_lpmf(ints y | reals theta) The log Bernoulli probability mass of y given chance of success theta\nAvailable since 2.12\n \n\nreal bernoulli_lupmf(ints y | reals theta) The log Bernoulli probability mass of y given chance of success theta dropping constant additive terms\nAvailable since 2.25\n \n\nreal bernoulli_cdf(ints y | reals theta) The Bernoulli cumulative distribution function of y given chance of success theta\nAvailable since 2.0\n \n\nreal bernoulli_lcdf(ints y | reals theta) The log of the Bernoulli cumulative distribution function of y given chance of success theta\nAvailable since 2.12\n \n\nreal bernoulli_lccdf(ints y | reals theta) The log of the Bernoulli complementary cumulative distribution function of y given chance of success theta\nAvailable since 2.12\n \n\nints bernoulli_rng(reals theta) Generate a Bernoulli variate with chance of success theta or an array of Bernoulli variates given an array of thetas of the same dimensions; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Binary Distributions" + ] + }, + { + "objectID": "functions-reference/binary_distributions.html#bernoulli-logit-distribution", + "href": "functions-reference/binary_distributions.html#bernoulli-logit-distribution", + "title": "Binary Distributions", + "section": "", + "text": "Stan also supplies a direct parameterization in terms of a logit-transformed chance-of-success parameter. This parameterization is more numerically stable if the chance-of-success parameter is on the logit scale, as with the linear predictor in a logistic regression.\n\n\nIf \\(\\alpha \\in \\mathbb{R}\\), then for \\(y \\in \\{0,1\\}\\), \\[\\begin{equation*}\n\\text{BernoulliLogit}(y~|~\\alpha) = \\text{Bernoulli}(y |\n\\text{logit}^{-1}(\\alpha)) = \\left\\{ \\begin{array}{ll}\n\\text{logit}^{-1}(\\alpha) & \\text{if } y = 1, \\text{ and} \\\\ 1 -\n\\text{logit}^{-1}(\\alpha) & \\text{if } y = 0. \\end{array} \\right.\n\\end{equation*}\\]\n\n\n\ny ~ bernoulli_logit(alpha)\nIncrement target log probability density with bernoulli_logit_lupmf(y | alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal bernoulli_logit_lpmf(ints y | reals alpha) The log Bernoulli probability mass of y given chance of success inv_logit(alpha)\nAvailable since 2.12\n \n\nreal bernoulli_logit_lupmf(ints y | reals alpha) The log Bernoulli probability mass of y given chance of success inv_logit(alpha) dropping constant additive terms\nAvailable since 2.25\n \n\nR bernoulli_logit_rng(reals alpha) Generate a Bernoulli variate with chance of success \\(\\text{logit}^{-1}(\\alpha)\\); may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Binary Distributions" + ] + }, + { + "objectID": "functions-reference/binary_distributions.html#bernoulli-logit-glm", + "href": "functions-reference/binary_distributions.html#bernoulli-logit-glm", + "title": "Binary Distributions", + "section": "", + "text": "Stan also supplies a single function for a generalized linear model with Bernoulli distribution and logit link function, i.e. a function for a logistic regression. This provides a more efficient implementation of logistic regression than a manually written regression in terms of a Bernoulli distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m\\), then for \\(y \\in {\\{0,1\\}}^n\\), \\[\\begin{align*}\n&\\text{BernoulliLogitGLM}(y~|~x, \\alpha, \\beta) = \\prod_{1\\leq i \\leq\nn}\\text{Bernoulli}(y_i~|~\\text{logit}^{-1}(\\alpha_i + x_i\\cdot\n\\beta))\\\\ &= \\prod_{1\\leq i \\leq n} \\left\\{ \\begin{array}{ll}\n\\text{logit}^{-1}(\\alpha_i + \\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j)\n& \\text{if } y_i = 1, \\text{ and} \\\\ 1 - \\text{logit}^{-1}(\\alpha_i +\n\\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j) & \\text{if } y_i = 0.\n\\end{array} \\right. \\end{align*}\\]\n\n\n\ny ~ bernoulli_logit_glm(x, alpha, beta)\nIncrement target log probability density with bernoulli_logit_glm_lupmf(y | x, alpha, beta).\nAvailable since 2.25\n \n\n\n\n\n \n\nreal bernoulli_logit_glm_lpmf(int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.23\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.18\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | matrix x, real alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\nreal bernoulli_logit_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta).\nAvailable since 2.18\n \n\nreal bernoulli_logit_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta) The log Bernoulli probability mass of y given chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.25\n \n\narray[] int bernoulli_logit_glm_rng(matrix x, vector alpha, vector beta) Generate an array of Bernoulli variates with chances of success inv_logit(alpha + x * beta); may only be used in transformed data and generated quantities blocks.\nAvailable since 2.29\n \n\narray[] int bernoulli_logit_glm_rng(row_vector x, vector alpha, vector beta) Generate an array of Bernoulli variates with chances of success inv_logit(alpha + x * beta); may only be used in transformed data and generated quantities blocks.\nAvailable since 2.29", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Binary Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html", + "href": "functions-reference/bounded_discrete_distributions.html", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "Bounded discrete probability functions have support on \\(\\{ 0, \\ldots,\nN \\}\\) for some upper bound \\(N\\).\n\n\n\n\nSuppose \\(N \\in \\mathbb{N}\\) and \\(\\theta \\in [0,1]\\), and \\(n \\in\n\\{0,\\ldots,N\\}\\). \\[\\begin{equation*} \\text{Binomial}(n~|~N,\\theta) = \\binom{N}{n}\n\\theta^n (1 - \\theta)^{N - n}. \\end{equation*}\\]\n\n\n\n\\[\\begin{eqnarray*} \\log \\text{Binomial}(n~|~N,\\theta) & = & \\log\n\\Gamma(N+1) - \\log \\Gamma(n + 1) - \\log \\Gamma(N- n + 1) \\\\[4pt] & & {\n} + n \\log \\theta + (N - n) \\log (1 - \\theta), \\end{eqnarray*}\\]\n\n\n\n\\[\\begin{equation*} \\frac{\\partial}{\\partial \\theta} \\log \\text{Binomial}(n~|~N,\\theta)\n= \\frac{n}{\\theta} - \\frac{N - n}{1 - \\theta} \\end{equation*}\\]\n\n\n\nn ~ binomial(N, theta)\nIncrement target log probability density with binomial_lupmf(n | N, theta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal binomial_lpmf(ints n | ints N, reals theta) The log binomial probability mass of n successes in N trials given chance of success theta\nAvailable since 2.12\n \n\nreal binomial_lupmf(ints n | ints N, reals theta) The log binomial probability mass of n successes in N trials given chance of success theta dropping constant additive terms\nAvailable since 2.25\n \n\nreal binomial_cdf(ints n | ints N, reals theta) The binomial cumulative distribution function of n successes in N trials given chance of success theta\nAvailable since 2.0\n \n\nreal binomial_lcdf(ints n | ints N, reals theta) The log of the binomial cumulative distribution function of n successes in N trials given chance of success theta\nAvailable since 2.12\n \n\nreal binomial_lccdf(ints n | ints N, reals theta) The log of the binomial complementary cumulative distribution function of n successes in N trials given chance of success theta\nAvailable since 2.12\n \n\nR binomial_rng(ints N, reals theta) Generate a binomial variate with N trials and chance of success theta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\nStan also provides a version of the binomial probability mass function distribution with the chance of success parameterized on the unconstrained logistic scale.\n\n\nSuppose \\(N \\in \\mathbb{N}\\), \\(\\alpha \\in \\mathbb{R}\\), and \\(n \\in\n\\{0,\\ldots,N\\}\\). Then \\[\\begin{eqnarray*}\n\\text{BinomialLogit}(n~|~N,\\alpha) & = &\n\\text{Binomial}(n~|~N,\\text{logit}^{-1}(\\alpha)) \\\\[6pt] & = &\n\\binom{N}{n} \\left( \\text{logit}^{-1}(\\alpha) \\right)^{n} \\left( 1 -\n\\text{logit}^{-1}(\\alpha) \\right)^{N - n}. \\end{eqnarray*}\\]\n\n\n\n\\[\\begin{eqnarray*} \\log \\text{BinomialLogit}(n~|~N,\\alpha) & = & \\log\n\\Gamma(N+1) - \\log \\Gamma(n + 1) - \\log \\Gamma(N- n + 1) \\\\[4pt] & &\n{ } + n \\log \\text{logit}^{-1}(\\alpha) + (N - n) \\log \\left( 1 -\n\\text{logit}^{-1}(\\alpha) \\right), \\end{eqnarray*}\\]\n\n\n\n\\[\\begin{equation*} \\frac{\\partial}{\\partial \\alpha} \\log\n\\text{BinomialLogit}(n~|~N,\\alpha) =\n\\frac{n}{\\text{logit}^{-1}(-\\alpha)} - \\frac{N -\nn}{\\text{logit}^{-1}(\\alpha)} \\end{equation*}\\]\n\n\n\nn ~ binomial_logit(N, alpha)\nIncrement target log probability density with binomial_logit_lupmf(n | N, alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal binomial_logit_lpmf(ints n | ints N, reals alpha) The log binomial probability mass of n successes in N trials given logit-scaled chance of success alpha\nAvailable since 2.12\n \n\nreal binomial_logit_lupmf(ints n | ints N, reals alpha) The log binomial probability mass of n successes in N trials given logit-scaled chance of success alpha dropping constant additive terms\nAvailable since 2.25\n\n\n\n\nStan also supplies a single function for a generalized linear model with binomial distribution and logit link function, i.e., a function for logistic regression with aggregated outcomes. This provides a more efficient implementation of logistic regression than a manually written regression in terms of a binomial distribution and matrix multiplication.\n\n\nSuppose \\(N \\in \\mathbb{N}\\), \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta \\in \\mathbb{R}^m\\), and \\(n \\in\n\\{0,\\ldots,N\\}\\). Then \\[\\begin{align*}\n &\\text{BinomialLogitGLM}(n~|~N, x, \\alpha, \\beta) = \\text{Binomial}(n~|~N,\\text{logit}^{-1}(\\alpha_i + x_i \\cdot \\beta)) \\\\\n &= \\binom{N}{n} \\left( \\text{logit}^{-1}(\\alpha_i + \\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j) \\right)^{n} \\left( 1 - \\text{logit}^{-1}(\\alpha_i + \\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j) \\right)^{N - n}.\n\\end{align*}\\]\n\n\n\nn ~ binomial_logit_glm(N, x, alpha, beta)\nIncrement target log probability density with binomial_logit_glm_lupmf(n | N, x, alpha, beta).\nAvailable since 2.34\n \n\n\n\n\n \n\nreal binomial_logit_glm_lpmf(int n | int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(int n | int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(int n | int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(int n | int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, row_vector x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, row_vector x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n\n\n\n\n\n\nIf \\(N \\in \\mathbb{N}\\), \\(\\alpha \\in \\mathbb{R}^+\\), and \\(\\beta \\in\n\\mathbb{R}^+\\), then for \\(n \\in {0,\\ldots,N}\\), \\[\\begin{equation*}\n\\text{BetaBinomial}(n~|~N,\\alpha,\\beta) = \\binom{N}{n}\n\\frac{\\mathrm{B}(n+\\alpha, N -n + \\beta)}{\\mathrm{B}(\\alpha,\\beta)},\n\\end{equation*}\\] where the beta function \\(\\mathrm{B}(u,v)\\) is defined for \\(u \\in\n\\mathbb{R}^+\\) and \\(v \\in \\mathbb{R}^+\\) by \\[\\begin{equation*} \\mathrm{B}(u,v) =\n\\frac{\\Gamma(u) \\ \\Gamma(v)}{\\Gamma(u + v)}. \\end{equation*}\\]\n\n\n\nn ~ beta_binomial(N, alpha, beta)\nIncrement target log probability density with beta_binomial_lupmf(n | N, alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal beta_binomial_lpmf(ints n | ints N, reals alpha, reals beta) The log beta-binomial probability mass of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.12\n \n\nreal beta_binomial_lupmf(ints n | ints N, reals alpha, reals beta) The log beta-binomial probability mass of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal beta_binomial_cdf(ints n | ints N, reals alpha, reals beta) The beta-binomial cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.0\n \n\nreal beta_binomial_lcdf(ints n | ints N, reals alpha, reals beta) The log of the beta-binomial cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.12\n \n\nreal beta_binomial_lccdf(ints n | ints N, reals alpha, reals beta) The log of the beta-binomial complementary cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.12\n \n\nR beta_binomial_rng(ints N, reals alpha, reals beta) Generate a beta-binomial variate with N trials, prior success count (plus one) of alpha, and prior failure count (plus one) of beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(a \\in \\mathbb{N}\\), \\(b \\in \\mathbb{N}\\), and \\(N \\in\n\\{0,\\ldots,a+b\\}\\), then for \\(n \\in \\{\\max(0,N-b),\\ldots,\\min(a,N)\\}\\), \\[\\begin{equation*} \\text{Hypergeometric}(n~|~N,a,b) = \\frac{\\normalsize{\\binom{a}{n}\n\\binom{b}{N - n}}} {\\normalsize{\\binom{a + b}{N}}}. \\end{equation*}\\]\n\n\n\nn ~ hypergeometric(N, a, b)\nIncrement target log probability density with hypergeometric_lupmf(n | N, a, b).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal hypergeometric_lpmf(int n | int N, int a, int b) The log hypergeometric probability mass of n successes in N trials given total success count of a and total failure count of b\nAvailable since 2.12\n \n\nreal hypergeometric_lupmf(int n | int N, int a, int b) The log hypergeometric probability mass of n successes in N trials given total success count of a and total failure count of b dropping constant additive terms\nAvailable since 2.25\n \n\nint hypergeometric_rng(int N, int a, int b) Generate a hypergeometric variate with N trials, total success count of a, and total failure count of b; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(N \\in \\mathbb{N}\\), \\(N > 0\\), and if \\(\\theta \\in \\mathbb{R}^N\\) forms an \\(N\\)-simplex (i.e., has nonnegative entries summing to one), then for \\(y \\in \\{1,\\ldots,N\\}\\), \\[\\begin{equation*} \\text{Categorical}(y~|~\\theta) =\n\\theta_y. \\end{equation*}\\] In addition, Stan provides a log-odds scaled categorical distribution, \\[\\begin{equation*} \\text{CategoricalLogit}(y~|~\\beta) =\n\\text{Categorical}(y~|~\\text{softmax}(\\beta)). \\end{equation*}\\] See the definition of softmax for the definition of the softmax function.\n\n\n\ny ~ categorical(theta)\nIncrement target log probability density with categorical_lupmf(y | theta) dropping constant additive terms.\nAvailable since 2.0\n \n\n\n\n\ny ~ categorical_logit(beta)\nIncrement target log probability density with categorical_logit_lupmf(y | beta).\nAvailable since 2.4\n \n\n\n\n\nAll of the categorical distributions are vectorized so that the outcome y can be a single integer (type int) or an array of integers (type array[] int).\n \n\nreal categorical_lpmf(ints y | vector theta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given \\(N\\)-vector of outcome probabilities theta. The parameter theta must have non-negative entries that sum to one, but it need not be a variable declared as a simplex.\nAvailable since 2.12\n \n\nreal categorical_lupmf(ints y | vector theta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given \\(N\\)-vector of outcome probabilities theta dropping constant additive terms. The parameter theta must have non-negative entries that sum to one, but it need not be a variable declared as a simplex.\nAvailable since 2.25\n \n\nreal categorical_logit_lpmf(ints y | vector beta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given log-odds of outcomes beta.\nAvailable since 2.12\n \n\nreal categorical_logit_lupmf(ints y | vector beta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given log-odds of outcomes beta dropping constant additive terms.\nAvailable since 2.25\n \n\nint categorical_rng(vector theta) Generate a categorical variate with \\(N\\)-simplex distribution parameter theta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n \n\nint categorical_logit_rng(vector beta) Generate a categorical variate with outcome in range \\(1:N\\) from log-odds vector beta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.16\n\n\n\n\nStan also supplies a single function for a generalized linear model with categorical distribution and logit link function, i.e. a function for a softmax regression. This provides a more efficient implementation of softmax regression than a manually written regression in terms of a categorical distribution and matrix multiplication.\nNote that the implementation does not put any restrictions on the coefficient matrix \\(\\beta\\). It is up to the user to use a reference category, a suitable prior or some other means of identifiability. See Multi-logit in the Stan User’s Guide.\n\n\nIf \\(N,M,K \\in \\mathbb{N}\\), \\(N,M,K > 0\\), and if \\(x\\in \\mathbb{R}^{M\\times K}, \\alpha \\in \\mathbb{R}^N, \\beta\\in \\mathbb{R}^{K\\cdot N}\\), then for \\(y \\in \\{1,\\ldots,N\\}^M\\), \\[\\begin{equation*}\n\\begin{split}\n\\text{CategoricalLogitGLM}(y~|~x,\\alpha,\\beta)\n& = \\prod_{1\\leq i \\leq M}\\text{CategoricalLogit}(y_i~|~\\alpha+x_i\\cdot\\beta) \\\\[8pt]\n& = \\prod_{1\\leq i \\leq M}\\text{Categorical}(y_i~|~softmax(\\alpha+x_i\\cdot\\beta)).\n\\end{split}\n\\end{equation*}\\] See the definition of softmax for the definition of the softmax function.\n\n\n\ny ~ categorical_logit_glm(x, alpha, beta)\nIncrement target log probability density with categorical_logit_glm_lupmf(y | x, alpha, beta).\nAvailable since 2.23\n \n\n\n\n\n \n\nreal categorical_logit_glm_lpmf(int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcome y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcome y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal categorical_logit_glm_lpmf(int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal categorical_logit_glm_lpmf(array[] int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(array[] int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal categorical_logit_glm_lpmf(array[] int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(array[] int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(l, u \\in \\mathbb{Z}\\) are lower and upper bounds (\\(l \\le u\\)), then for any integer \\(y \\in \\{l,\\ldots,u\\}\\), \\[\\begin{equation*} \\text{DiscreteRange}(y ~|~ l, u) =\n\\frac{1}{u - l + 1}. \\end{equation*}\\]\n\n\n\ny ~ discrete_range(l, u)\nIncrement the target log probability density with discrete_range_lupmf(y | l, u) dropping constant additive terms.\nAvailable since 2.26\n \n\n\n\n\nAll of the discrete range distributions are vectorized so that the outcome y and the bounds l, u can be a single integer (type int) or an array of integers (type array[] int).\n \n\nreal discrete_range_lpmf(ints y | ints l, ints u) The log probability mass function with outcome(s) y in \\(l:u\\).\nAvailable since 2.26\n \n\nreal discrete_range_lupmf(ints y | ints l, ints u) The log probability mass function with outcome(s) y in \\(l:u\\) dropping constant additive terms.\nAvailable since 2.26\n \n\nreal discrete_range_cdf(ints y | ints l, ints u) The discrete range cumulative distribution function for the given y, lower and upper bounds.\nAvailable since 2.26\n \n\nreal discrete_range_lcdf(ints y | ints l, ints u) The log of the discrete range cumulative distribution function for the given y, lower and upper bounds.\nAvailable since 2.26\n \n\nreal discrete_range_lccdf(ints y | ints l, ints u) The log of the discrete range complementary cumulative distribution function for the given y, lower and upper bounds.\nAvailable since 2.26\n \n\nints discrete_range_rng(ints l, ints u) Generate a discrete variate between the given lower and upper bounds; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.26\n\n\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\) with \\(K > 2\\), \\(c \\in \\mathbb{R}^{K-1}\\) such that \\(c_k < c_{k+1}\\) for \\(k \\in \\{1,\\ldots,K-2\\}\\), and \\(\\eta \\in\n\\mathbb{R}\\), then for \\(k \\in \\{1,\\ldots,K\\}\\), \\[\\begin{equation*}\n\\text{OrderedLogistic}(k~|~\\eta,c) = \\left\\{ \\begin{array}{ll} 1 -\n\\text{logit}^{-1}(\\eta - c_1) & \\text{if } k = 1, \\\\[4pt]\n\\text{logit}^{-1}(\\eta - c_{k-1}) - \\text{logit}^{-1}(\\eta - c_{k}) &\n\\text{if } 1 < k < K, \\text{and} \\\\[4pt] \\text{logit}^{-1}(\\eta -\nc_{K-1}) - 0 & \\text{if } k = K. \\end{array} \\right. \\end{equation*}\\] The \\(k=K\\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \\(k=1\\) and \\(k=K\\) edge cases can be subsumed into the general definition by setting \\(c_0 = -\\infty\\) and \\(c_K = +\\infty\\) with \\(\\text{logit}^{-1}(-\\infty) = 0\\) and \\(\\text{logit}^{-1}(\\infty) = 1\\).\n\n\n\nk ~ ordered_logistic(eta, c)\nIncrement target log probability density with ordered_logistic_lupmf(k | eta, c).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal ordered_logistic_lpmf(ints k | vector eta, vectors c) The log ordered logistic probability mass of k given linear predictors eta, and cutpoints c.\nAvailable since 2.18\n \n\nreal ordered_logistic_lupmf(ints k | vector eta, vectors c) The log ordered logistic probability mass of k given linear predictors eta, and cutpoints c dropping constant additive terms.\nAvailable since 2.25\n \n\nint ordered_logistic_rng(real eta, vector c) Generate an ordered logistic variate with linear predictor eta and cutpoints c; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n\n\n\n\n\n\nIf \\(N,M,K \\in \\mathbb{N}\\) with \\(N, M > 0\\), \\(K > 2\\), \\(c \\in \\mathbb{R}^{K-1}\\) such that \\(c_k < c_{k+1}\\) for \\(k \\in \\{1,\\ldots,K-2\\}\\), and \\(x\\in \\mathbb{R}^{N\\times M}, \\beta\\in \\mathbb{R}^M\\), then for \\(y \\in \\{1,\\ldots,K\\}^N\\), \\[\\begin{equation*}\n\\begin{split}\n\\\\\n& \\text{OrderedLogisticGLM}(y~|~x,\\beta,c) \\\\[8pt]\n& = \\prod_{1\\leq i \\leq N}\\text{OrderedLogistic}(y_i~|~x_i\\cdot \\beta,c) \\\\\n& = \\prod_{1\\leq i \\leq N} \\left\\{ \\begin{array}{ll}\n1 - \\text{logit}^{-1}(x_i\\cdot \\beta - c_1) & \\text{if } y = 1, \\\\[4pt]\n\\text{logit}^{-1}(x_i\\cdot \\beta - c_{y-1}) - \\text{logit}^{-1}(x_i\\cdot \\beta - c_{y}) & \\text{if } 1 < y < K, \\text{and} \\\\[4pt]\n\\text{logit}^{-1}(x_i\\cdot \\beta - c_{K-1}) - 0 & \\text{if } y = K.\n\\end{array} \\right.\n\\end{split}\n\\end{equation*}\\] The \\(k=K\\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \\(y=1\\) and \\(y=K\\) edge cases can be subsumed into the general definition by setting \\(c_0 = -\\infty\\) and \\(c_K = +\\infty\\) with \\(\\text{logit}^{-1}(-\\infty) = 0\\) and \\(\\text{logit}^{-1}(\\infty) = 1\\).\n\n\n\ny ~ ordered_logistic_glm(x, beta, c)\nIncrement target log probability density with ordered_logistic_lupmf(y | x, beta, c).\nAvailable since 2.23\n \n\n\n\n\n \n\nreal ordered_logistic_glm_lpmf(int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n \n\nreal ordered_logistic_glm_lpmf(int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n \n\nreal ordered_logistic_glm_lpmf(array[] int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(array[] int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n \n\nreal ordered_logistic_glm_lpmf(array[] int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(array[] int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\) with \\(K > 2\\), \\(c \\in \\mathbb{R}^{K-1}\\) such that \\(c_k < c_{k+1}\\) for \\(k \\in \\{1,\\ldots,K-2\\}\\), and \\(\\eta \\in\n\\mathbb{R}\\), then for \\(k \\in \\{1,\\ldots,K\\}\\), \\[\\begin{equation*}\n\\text{OrderedProbit}(k~|~\\eta,c) = \\left\\{ \\begin{array}{ll} 1 -\n\\Phi(\\eta - c_1) & \\text{if } k = 1, \\\\[4pt] \\Phi(\\eta - c_{k-1}) -\n\\Phi(\\eta - c_{k}) & \\text{if } 1 < k < K, \\text{and} \\\\[4pt]\n\\Phi(\\eta - c_{K-1}) - 0 & \\text{if } k = K. \\end{array} \\right. \\end{equation*}\\] The \\(k=K\\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \\(k=1\\) and \\(k=K\\) edge cases can be subsumed into the general definition by setting \\(c_0 =\n-\\infty\\) and \\(c_K = +\\infty\\) with \\(\\Phi(-\\infty) = 0\\) and \\(\\Phi(\\infty) = 1\\).\n\n\n\nk ~ ordered_probit(eta, c)\nIncrement target log probability density with ordered_probit_lupmf(k | eta, c).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal ordered_probit_lpmf(ints k | vector eta, vectors c) The log ordered probit probability mass of k given linear predictors eta, and cutpoints c.\nAvailable since 2.18\n \n\nreal ordered_probit_lupmf(ints k | vector eta, vectors c) The log ordered probit probability mass of k given linear predictors eta, and cutpoints c dropping constant additive terms.\nAvailable since 2.25\n \n\nreal ordered_probit_lpmf(ints k | real eta, vectors c) The log ordered probit probability mass of k given linear predictor eta, and cutpoints c.\nAvailable since 2.19\n \n\nreal ordered_probit_lupmf(ints k | real eta, vectors c) The log ordered probit probability mass of k given linear predictor eta, and cutpoints c dropping constant additive terms.\nAvailable since 2.19\n \n\nint ordered_probit_rng(real eta, vector c) Generate an ordered probit variate with linear predictor eta and cutpoints c; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#binomial-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#binomial-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "Suppose \\(N \\in \\mathbb{N}\\) and \\(\\theta \\in [0,1]\\), and \\(n \\in\n\\{0,\\ldots,N\\}\\). \\[\\begin{equation*} \\text{Binomial}(n~|~N,\\theta) = \\binom{N}{n}\n\\theta^n (1 - \\theta)^{N - n}. \\end{equation*}\\]\n\n\n\n\\[\\begin{eqnarray*} \\log \\text{Binomial}(n~|~N,\\theta) & = & \\log\n\\Gamma(N+1) - \\log \\Gamma(n + 1) - \\log \\Gamma(N- n + 1) \\\\[4pt] & & {\n} + n \\log \\theta + (N - n) \\log (1 - \\theta), \\end{eqnarray*}\\]\n\n\n\n\\[\\begin{equation*} \\frac{\\partial}{\\partial \\theta} \\log \\text{Binomial}(n~|~N,\\theta)\n= \\frac{n}{\\theta} - \\frac{N - n}{1 - \\theta} \\end{equation*}\\]\n\n\n\nn ~ binomial(N, theta)\nIncrement target log probability density with binomial_lupmf(n | N, theta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal binomial_lpmf(ints n | ints N, reals theta) The log binomial probability mass of n successes in N trials given chance of success theta\nAvailable since 2.12\n \n\nreal binomial_lupmf(ints n | ints N, reals theta) The log binomial probability mass of n successes in N trials given chance of success theta dropping constant additive terms\nAvailable since 2.25\n \n\nreal binomial_cdf(ints n | ints N, reals theta) The binomial cumulative distribution function of n successes in N trials given chance of success theta\nAvailable since 2.0\n \n\nreal binomial_lcdf(ints n | ints N, reals theta) The log of the binomial cumulative distribution function of n successes in N trials given chance of success theta\nAvailable since 2.12\n \n\nreal binomial_lccdf(ints n | ints N, reals theta) The log of the binomial complementary cumulative distribution function of n successes in N trials given chance of success theta\nAvailable since 2.12\n \n\nR binomial_rng(ints N, reals theta) Generate a binomial variate with N trials and chance of success theta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#binomial-distribution-logit-parameterization", + "href": "functions-reference/bounded_discrete_distributions.html#binomial-distribution-logit-parameterization", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "Stan also provides a version of the binomial probability mass function distribution with the chance of success parameterized on the unconstrained logistic scale.\n\n\nSuppose \\(N \\in \\mathbb{N}\\), \\(\\alpha \\in \\mathbb{R}\\), and \\(n \\in\n\\{0,\\ldots,N\\}\\). Then \\[\\begin{eqnarray*}\n\\text{BinomialLogit}(n~|~N,\\alpha) & = &\n\\text{Binomial}(n~|~N,\\text{logit}^{-1}(\\alpha)) \\\\[6pt] & = &\n\\binom{N}{n} \\left( \\text{logit}^{-1}(\\alpha) \\right)^{n} \\left( 1 -\n\\text{logit}^{-1}(\\alpha) \\right)^{N - n}. \\end{eqnarray*}\\]\n\n\n\n\\[\\begin{eqnarray*} \\log \\text{BinomialLogit}(n~|~N,\\alpha) & = & \\log\n\\Gamma(N+1) - \\log \\Gamma(n + 1) - \\log \\Gamma(N- n + 1) \\\\[4pt] & &\n{ } + n \\log \\text{logit}^{-1}(\\alpha) + (N - n) \\log \\left( 1 -\n\\text{logit}^{-1}(\\alpha) \\right), \\end{eqnarray*}\\]\n\n\n\n\\[\\begin{equation*} \\frac{\\partial}{\\partial \\alpha} \\log\n\\text{BinomialLogit}(n~|~N,\\alpha) =\n\\frac{n}{\\text{logit}^{-1}(-\\alpha)} - \\frac{N -\nn}{\\text{logit}^{-1}(\\alpha)} \\end{equation*}\\]\n\n\n\nn ~ binomial_logit(N, alpha)\nIncrement target log probability density with binomial_logit_lupmf(n | N, alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal binomial_logit_lpmf(ints n | ints N, reals alpha) The log binomial probability mass of n successes in N trials given logit-scaled chance of success alpha\nAvailable since 2.12\n \n\nreal binomial_logit_lupmf(ints n | ints N, reals alpha) The log binomial probability mass of n successes in N trials given logit-scaled chance of success alpha dropping constant additive terms\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#binomial-logit-glm", + "href": "functions-reference/bounded_discrete_distributions.html#binomial-logit-glm", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "Stan also supplies a single function for a generalized linear model with binomial distribution and logit link function, i.e., a function for logistic regression with aggregated outcomes. This provides a more efficient implementation of logistic regression than a manually written regression in terms of a binomial distribution and matrix multiplication.\n\n\nSuppose \\(N \\in \\mathbb{N}\\), \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta \\in \\mathbb{R}^m\\), and \\(n \\in\n\\{0,\\ldots,N\\}\\). Then \\[\\begin{align*}\n &\\text{BinomialLogitGLM}(n~|~N, x, \\alpha, \\beta) = \\text{Binomial}(n~|~N,\\text{logit}^{-1}(\\alpha_i + x_i \\cdot \\beta)) \\\\\n &= \\binom{N}{n} \\left( \\text{logit}^{-1}(\\alpha_i + \\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j) \\right)^{n} \\left( 1 - \\text{logit}^{-1}(\\alpha_i + \\sum_{1\\leq j\\leq m}x_{ij}\\cdot \\beta_j) \\right)^{N - n}.\n\\end{align*}\\]\n\n\n\nn ~ binomial_logit_glm(N, x, alpha, beta)\nIncrement target log probability density with binomial_logit_glm_lupmf(n | N, x, alpha, beta).\nAvailable since 2.34\n \n\n\n\n\n \n\nreal binomial_logit_glm_lpmf(int n | int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(int n | int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(int n | int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(int n | int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, row_vector x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, row_vector x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, matrix x, real alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lpmf(array[] int n | array[] int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta).\nAvailable since 2.34\n \n\nreal binomial_logit_glm_lupmf(array[] int n | array[] int N, matrix x, vector alpha, vector beta) The log binomial probability mass of n given N trials and chance of success inv_logit(alpha + x * beta) dropping constant additive terms.\nAvailable since 2.34", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#beta-binomial-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#beta-binomial-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(N \\in \\mathbb{N}\\), \\(\\alpha \\in \\mathbb{R}^+\\), and \\(\\beta \\in\n\\mathbb{R}^+\\), then for \\(n \\in {0,\\ldots,N}\\), \\[\\begin{equation*}\n\\text{BetaBinomial}(n~|~N,\\alpha,\\beta) = \\binom{N}{n}\n\\frac{\\mathrm{B}(n+\\alpha, N -n + \\beta)}{\\mathrm{B}(\\alpha,\\beta)},\n\\end{equation*}\\] where the beta function \\(\\mathrm{B}(u,v)\\) is defined for \\(u \\in\n\\mathbb{R}^+\\) and \\(v \\in \\mathbb{R}^+\\) by \\[\\begin{equation*} \\mathrm{B}(u,v) =\n\\frac{\\Gamma(u) \\ \\Gamma(v)}{\\Gamma(u + v)}. \\end{equation*}\\]\n\n\n\nn ~ beta_binomial(N, alpha, beta)\nIncrement target log probability density with beta_binomial_lupmf(n | N, alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal beta_binomial_lpmf(ints n | ints N, reals alpha, reals beta) The log beta-binomial probability mass of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.12\n \n\nreal beta_binomial_lupmf(ints n | ints N, reals alpha, reals beta) The log beta-binomial probability mass of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal beta_binomial_cdf(ints n | ints N, reals alpha, reals beta) The beta-binomial cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.0\n \n\nreal beta_binomial_lcdf(ints n | ints N, reals alpha, reals beta) The log of the beta-binomial cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.12\n \n\nreal beta_binomial_lccdf(ints n | ints N, reals alpha, reals beta) The log of the beta-binomial complementary cumulative distribution function of n successes in N trials given prior success count (plus one) of alpha and prior failure count (plus one) of beta\nAvailable since 2.12\n \n\nR beta_binomial_rng(ints N, reals alpha, reals beta) Generate a beta-binomial variate with N trials, prior success count (plus one) of alpha, and prior failure count (plus one) of beta; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#hypergeometric-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#hypergeometric-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(a \\in \\mathbb{N}\\), \\(b \\in \\mathbb{N}\\), and \\(N \\in\n\\{0,\\ldots,a+b\\}\\), then for \\(n \\in \\{\\max(0,N-b),\\ldots,\\min(a,N)\\}\\), \\[\\begin{equation*} \\text{Hypergeometric}(n~|~N,a,b) = \\frac{\\normalsize{\\binom{a}{n}\n\\binom{b}{N - n}}} {\\normalsize{\\binom{a + b}{N}}}. \\end{equation*}\\]\n\n\n\nn ~ hypergeometric(N, a, b)\nIncrement target log probability density with hypergeometric_lupmf(n | N, a, b).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal hypergeometric_lpmf(int n | int N, int a, int b) The log hypergeometric probability mass of n successes in N trials given total success count of a and total failure count of b\nAvailable since 2.12\n \n\nreal hypergeometric_lupmf(int n | int N, int a, int b) The log hypergeometric probability mass of n successes in N trials given total success count of a and total failure count of b dropping constant additive terms\nAvailable since 2.25\n \n\nint hypergeometric_rng(int N, int a, int b) Generate a hypergeometric variate with N trials, total success count of a, and total failure count of b; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#categorical-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#categorical-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(N \\in \\mathbb{N}\\), \\(N > 0\\), and if \\(\\theta \\in \\mathbb{R}^N\\) forms an \\(N\\)-simplex (i.e., has nonnegative entries summing to one), then for \\(y \\in \\{1,\\ldots,N\\}\\), \\[\\begin{equation*} \\text{Categorical}(y~|~\\theta) =\n\\theta_y. \\end{equation*}\\] In addition, Stan provides a log-odds scaled categorical distribution, \\[\\begin{equation*} \\text{CategoricalLogit}(y~|~\\beta) =\n\\text{Categorical}(y~|~\\text{softmax}(\\beta)). \\end{equation*}\\] See the definition of softmax for the definition of the softmax function.\n\n\n\ny ~ categorical(theta)\nIncrement target log probability density with categorical_lupmf(y | theta) dropping constant additive terms.\nAvailable since 2.0\n \n\n\n\n\ny ~ categorical_logit(beta)\nIncrement target log probability density with categorical_logit_lupmf(y | beta).\nAvailable since 2.4\n \n\n\n\n\nAll of the categorical distributions are vectorized so that the outcome y can be a single integer (type int) or an array of integers (type array[] int).\n \n\nreal categorical_lpmf(ints y | vector theta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given \\(N\\)-vector of outcome probabilities theta. The parameter theta must have non-negative entries that sum to one, but it need not be a variable declared as a simplex.\nAvailable since 2.12\n \n\nreal categorical_lupmf(ints y | vector theta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given \\(N\\)-vector of outcome probabilities theta dropping constant additive terms. The parameter theta must have non-negative entries that sum to one, but it need not be a variable declared as a simplex.\nAvailable since 2.25\n \n\nreal categorical_logit_lpmf(ints y | vector beta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given log-odds of outcomes beta.\nAvailable since 2.12\n \n\nreal categorical_logit_lupmf(ints y | vector beta) The log categorical probability mass function with outcome(s) y in \\(1:N\\) given log-odds of outcomes beta dropping constant additive terms.\nAvailable since 2.25\n \n\nint categorical_rng(vector theta) Generate a categorical variate with \\(N\\)-simplex distribution parameter theta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n \n\nint categorical_logit_rng(vector beta) Generate a categorical variate with outcome in range \\(1:N\\) from log-odds vector beta; may only be used in transformed data and generated quantities blocks\nAvailable since 2.16", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#categorical-logit-glm", + "href": "functions-reference/bounded_discrete_distributions.html#categorical-logit-glm", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "Stan also supplies a single function for a generalized linear model with categorical distribution and logit link function, i.e. a function for a softmax regression. This provides a more efficient implementation of softmax regression than a manually written regression in terms of a categorical distribution and matrix multiplication.\nNote that the implementation does not put any restrictions on the coefficient matrix \\(\\beta\\). It is up to the user to use a reference category, a suitable prior or some other means of identifiability. See Multi-logit in the Stan User’s Guide.\n\n\nIf \\(N,M,K \\in \\mathbb{N}\\), \\(N,M,K > 0\\), and if \\(x\\in \\mathbb{R}^{M\\times K}, \\alpha \\in \\mathbb{R}^N, \\beta\\in \\mathbb{R}^{K\\cdot N}\\), then for \\(y \\in \\{1,\\ldots,N\\}^M\\), \\[\\begin{equation*}\n\\begin{split}\n\\text{CategoricalLogitGLM}(y~|~x,\\alpha,\\beta)\n& = \\prod_{1\\leq i \\leq M}\\text{CategoricalLogit}(y_i~|~\\alpha+x_i\\cdot\\beta) \\\\[8pt]\n& = \\prod_{1\\leq i \\leq M}\\text{Categorical}(y_i~|~softmax(\\alpha+x_i\\cdot\\beta)).\n\\end{split}\n\\end{equation*}\\] See the definition of softmax for the definition of the softmax function.\n\n\n\ny ~ categorical_logit_glm(x, alpha, beta)\nIncrement target log probability density with categorical_logit_glm_lupmf(y | x, alpha, beta).\nAvailable since 2.23\n \n\n\n\n\n \n\nreal categorical_logit_glm_lpmf(int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcome y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcome y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal categorical_logit_glm_lpmf(int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal categorical_logit_glm_lpmf(array[] int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(array[] int y | row_vector x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal categorical_logit_glm_lpmf(array[] int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta.\nAvailable since 2.23\n \n\nreal categorical_logit_glm_lupmf(array[] int y | matrix x, vector alpha, matrix beta) The log categorical probability mass function with outcomes y in \\(1:N\\) given \\(N\\)-vector of log-odds of outcomes alpha + x * beta dropping constant additive terms.\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#discrete-range-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#discrete-range-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(l, u \\in \\mathbb{Z}\\) are lower and upper bounds (\\(l \\le u\\)), then for any integer \\(y \\in \\{l,\\ldots,u\\}\\), \\[\\begin{equation*} \\text{DiscreteRange}(y ~|~ l, u) =\n\\frac{1}{u - l + 1}. \\end{equation*}\\]\n\n\n\ny ~ discrete_range(l, u)\nIncrement the target log probability density with discrete_range_lupmf(y | l, u) dropping constant additive terms.\nAvailable since 2.26\n \n\n\n\n\nAll of the discrete range distributions are vectorized so that the outcome y and the bounds l, u can be a single integer (type int) or an array of integers (type array[] int).\n \n\nreal discrete_range_lpmf(ints y | ints l, ints u) The log probability mass function with outcome(s) y in \\(l:u\\).\nAvailable since 2.26\n \n\nreal discrete_range_lupmf(ints y | ints l, ints u) The log probability mass function with outcome(s) y in \\(l:u\\) dropping constant additive terms.\nAvailable since 2.26\n \n\nreal discrete_range_cdf(ints y | ints l, ints u) The discrete range cumulative distribution function for the given y, lower and upper bounds.\nAvailable since 2.26\n \n\nreal discrete_range_lcdf(ints y | ints l, ints u) The log of the discrete range cumulative distribution function for the given y, lower and upper bounds.\nAvailable since 2.26\n \n\nreal discrete_range_lccdf(ints y | ints l, ints u) The log of the discrete range complementary cumulative distribution function for the given y, lower and upper bounds.\nAvailable since 2.26\n \n\nints discrete_range_rng(ints l, ints u) Generate a discrete variate between the given lower and upper bounds; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.26", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#ordered-logistic-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#ordered-logistic-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\) with \\(K > 2\\), \\(c \\in \\mathbb{R}^{K-1}\\) such that \\(c_k < c_{k+1}\\) for \\(k \\in \\{1,\\ldots,K-2\\}\\), and \\(\\eta \\in\n\\mathbb{R}\\), then for \\(k \\in \\{1,\\ldots,K\\}\\), \\[\\begin{equation*}\n\\text{OrderedLogistic}(k~|~\\eta,c) = \\left\\{ \\begin{array}{ll} 1 -\n\\text{logit}^{-1}(\\eta - c_1) & \\text{if } k = 1, \\\\[4pt]\n\\text{logit}^{-1}(\\eta - c_{k-1}) - \\text{logit}^{-1}(\\eta - c_{k}) &\n\\text{if } 1 < k < K, \\text{and} \\\\[4pt] \\text{logit}^{-1}(\\eta -\nc_{K-1}) - 0 & \\text{if } k = K. \\end{array} \\right. \\end{equation*}\\] The \\(k=K\\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \\(k=1\\) and \\(k=K\\) edge cases can be subsumed into the general definition by setting \\(c_0 = -\\infty\\) and \\(c_K = +\\infty\\) with \\(\\text{logit}^{-1}(-\\infty) = 0\\) and \\(\\text{logit}^{-1}(\\infty) = 1\\).\n\n\n\nk ~ ordered_logistic(eta, c)\nIncrement target log probability density with ordered_logistic_lupmf(k | eta, c).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal ordered_logistic_lpmf(ints k | vector eta, vectors c) The log ordered logistic probability mass of k given linear predictors eta, and cutpoints c.\nAvailable since 2.18\n \n\nreal ordered_logistic_lupmf(ints k | vector eta, vectors c) The log ordered logistic probability mass of k given linear predictors eta, and cutpoints c dropping constant additive terms.\nAvailable since 2.25\n \n\nint ordered_logistic_rng(real eta, vector c) Generate an ordered logistic variate with linear predictor eta and cutpoints c; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#ordered-logistic-generalized-linear-model-ordinal-regression", + "href": "functions-reference/bounded_discrete_distributions.html#ordered-logistic-generalized-linear-model-ordinal-regression", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(N,M,K \\in \\mathbb{N}\\) with \\(N, M > 0\\), \\(K > 2\\), \\(c \\in \\mathbb{R}^{K-1}\\) such that \\(c_k < c_{k+1}\\) for \\(k \\in \\{1,\\ldots,K-2\\}\\), and \\(x\\in \\mathbb{R}^{N\\times M}, \\beta\\in \\mathbb{R}^M\\), then for \\(y \\in \\{1,\\ldots,K\\}^N\\), \\[\\begin{equation*}\n\\begin{split}\n\\\\\n& \\text{OrderedLogisticGLM}(y~|~x,\\beta,c) \\\\[8pt]\n& = \\prod_{1\\leq i \\leq N}\\text{OrderedLogistic}(y_i~|~x_i\\cdot \\beta,c) \\\\\n& = \\prod_{1\\leq i \\leq N} \\left\\{ \\begin{array}{ll}\n1 - \\text{logit}^{-1}(x_i\\cdot \\beta - c_1) & \\text{if } y = 1, \\\\[4pt]\n\\text{logit}^{-1}(x_i\\cdot \\beta - c_{y-1}) - \\text{logit}^{-1}(x_i\\cdot \\beta - c_{y}) & \\text{if } 1 < y < K, \\text{and} \\\\[4pt]\n\\text{logit}^{-1}(x_i\\cdot \\beta - c_{K-1}) - 0 & \\text{if } y = K.\n\\end{array} \\right.\n\\end{split}\n\\end{equation*}\\] The \\(k=K\\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \\(y=1\\) and \\(y=K\\) edge cases can be subsumed into the general definition by setting \\(c_0 = -\\infty\\) and \\(c_K = +\\infty\\) with \\(\\text{logit}^{-1}(-\\infty) = 0\\) and \\(\\text{logit}^{-1}(\\infty) = 1\\).\n\n\n\ny ~ ordered_logistic_glm(x, beta, c)\nIncrement target log probability density with ordered_logistic_lupmf(y | x, beta, c).\nAvailable since 2.23\n \n\n\n\n\n \n\nreal ordered_logistic_glm_lpmf(int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n \n\nreal ordered_logistic_glm_lpmf(int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n \n\nreal ordered_logistic_glm_lpmf(array[] int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(array[] int y | row_vector x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25\n \n\nreal ordered_logistic_glm_lpmf(array[] int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c. The cutpoints c must be ordered.\nAvailable since 2.23\n \n\nreal ordered_logistic_glm_lupmf(array[] int y | matrix x, vector beta, vector c) The log ordered logistic probability mass of y, given linear predictors x * beta, and cutpoints c dropping constant additive terms. The cutpoints c must be ordered.\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/bounded_discrete_distributions.html#ordered-probit-distribution", + "href": "functions-reference/bounded_discrete_distributions.html#ordered-probit-distribution", + "title": "Bounded Discrete Distributions", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\) with \\(K > 2\\), \\(c \\in \\mathbb{R}^{K-1}\\) such that \\(c_k < c_{k+1}\\) for \\(k \\in \\{1,\\ldots,K-2\\}\\), and \\(\\eta \\in\n\\mathbb{R}\\), then for \\(k \\in \\{1,\\ldots,K\\}\\), \\[\\begin{equation*}\n\\text{OrderedProbit}(k~|~\\eta,c) = \\left\\{ \\begin{array}{ll} 1 -\n\\Phi(\\eta - c_1) & \\text{if } k = 1, \\\\[4pt] \\Phi(\\eta - c_{k-1}) -\n\\Phi(\\eta - c_{k}) & \\text{if } 1 < k < K, \\text{and} \\\\[4pt]\n\\Phi(\\eta - c_{K-1}) - 0 & \\text{if } k = K. \\end{array} \\right. \\end{equation*}\\] The \\(k=K\\) case is written with the redundant subtraction of zero to illustrate the parallelism of the cases; the \\(k=1\\) and \\(k=K\\) edge cases can be subsumed into the general definition by setting \\(c_0 =\n-\\infty\\) and \\(c_K = +\\infty\\) with \\(\\Phi(-\\infty) = 0\\) and \\(\\Phi(\\infty) = 1\\).\n\n\n\nk ~ ordered_probit(eta, c)\nIncrement target log probability density with ordered_probit_lupmf(k | eta, c).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal ordered_probit_lpmf(ints k | vector eta, vectors c) The log ordered probit probability mass of k given linear predictors eta, and cutpoints c.\nAvailable since 2.18\n \n\nreal ordered_probit_lupmf(ints k | vector eta, vectors c) The log ordered probit probability mass of k given linear predictors eta, and cutpoints c dropping constant additive terms.\nAvailable since 2.25\n \n\nreal ordered_probit_lpmf(ints k | real eta, vectors c) The log ordered probit probability mass of k given linear predictor eta, and cutpoints c.\nAvailable since 2.19\n \n\nreal ordered_probit_lupmf(ints k | real eta, vectors c) The log ordered probit probability mass of k given linear predictor eta, and cutpoints c dropping constant additive terms.\nAvailable since 2.19\n \n\nint ordered_probit_rng(real eta, vector c) Generate an ordered probit variate with linear predictor eta and cutpoints c; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Bounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html", + "href": "functions-reference/complex-valued_basic_functions.html", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "This chapter describes built-in functions that operate on complex numbers, either as an argument type or a return type. This includes the arithmetic operators generalized to complex numbers.\n\n\nJust as integers may be assigned to real variables, real variables may be assigned to complex numbers, with the result being a zero imaginary component.\nint n = 5; // n = 5\nreal x = a; // x = 5.0\ncomplex z1 = n; // z = 5.0 + 0.0i\ncomplex z2 = x; // z = 5.0 + 0.0i\n\n\nFunction arguments of type int or real may be promoted to type complex. The complex version of functions in this chapter are only used if one of the arguments is complex. For example, if z is complex, then pow(z, 2) will call the complex version of the power function and the integer 2 will be promoted to a complex number with a real component of 2 and an imaginary component of 0. The same goes for binary operators like addition and subtraction, where z + 2 will be legal and produce a complex result. Functions such as arg and conj that are only available for complex numbers can accept integer or real arguments, promoting them to complex before applying the function.\n\n\n\n\n\n\nVariables and constants of type complex are constructed from zero, one, or two real numbers.\ncomplex z1 = to_complex(); // z1 = 0.0 + 0.0i\nreal re = -2.9;\ncomplex z2 = to_complex(re); // z2 = -2.9 + 0.0i\nreal im = 1.3;\ncomplex z3 = to_complex(re, im); // z3 = -2.9 + 1.3i\n \n\ncomplex to_complex() Return complex number with real part 0.0 and imaginary part 0.0.\nAvailable since 2.28\n \n\ncomplex to_complex(real re) Return complex number with real part re and imaginary part 0.0.\nAvailable since 2.28\n \n\ncomplex to_complex(real re, real im) Return complex number with real part re and imaginary part im.\nAvailable since 2.28\n \n\nZ to_complex(T1 re, T2 im) Vectorized implementation of the to_complex function.\nT1 and T2 can either be real containers of the same size, or a real container and a real, in which case the real value is used for the corresponding component in all elements of the output.\nAvailable since 2.30\n\n\n\nGiven a complex number, its real and imaginary parts can be extracted with the following functions.\n \n\nreal get_real(complex z) Return the real part of the complex number z.\nAvailable since 2.28\n \n\nreal get_imag(complex z) Return the imaginary part of the complex number z.\nAvailable since 2.28\n\n\n\n\nThe arithmetic operators have the same precedence for complex and real arguments. The complex form of an operator will be selected if at least one of its argument is of type complex. If there are two arguments and only one is of type complex, then the other will be promoted to type complex before performing the operation.\n\n\n \n\ncomplex operator+(complex z) Return the complex argument z, \\[\\begin{equation*} +z = z. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator-(complex z) Return the negation of the complex argument z, which for \\(z = x + yi\\) is \\[\\begin{equation*} -z = -x - yi. \\end{equation*}\\]\nAvailable since 2.28\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of complex numbers, -x is the same shape array where each individual value is negated.\nAvailable since 2.31\n\n\n\n \n\ncomplex operator+(complex x, complex y) Return the sum of x and y, \\[\\begin{equation*} (x + y) = \\text{operator+}(x, y) = x + y. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator-(complex x, complex y) Return the difference between x and y, \\[\\begin{equation*} (x - y) =\n\\text{operator-}(x, y) = x - y. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator*(complex x, complex y) Return the product of x and y, \\[\\begin{equation*} (x \\, * \\, y) = \\text{operator*}(x, y) = x\n\\times y. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator/(complex x, complex y) Return the quotient of x and y, \\[\\begin{equation*} (x / y) = \\text{operator/}(x,y) =\n\\frac{x}{y} \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator^(complex x, complex y) Return x raised to the power of y, \\[\\begin{equation*}\n(x^\\mathrm{\\wedge}y)= \\text{operator}^\\mathrm{\\wedge}(x,y)\n= \\textrm{exp}(y \\, \\log(x)).\n\\end{equation*}\\]\nAvailable since 2.28\n\n\n\n\nComplex numbers are equal if and only if both their real and imaginary components are equal. That is, the conditional\nz1 == z2\nis equivalent to\nget_real(z1) == get_real(z2) && get_imag(z1) == get_imag(z2)\nAs with other complex functions, if one of the arguments is of type real or int, it will be promoted to type complex before comparison. For example, if z is of type complex, then z == 0 will be true if z has real component equal to 0.0 and complex component equal to 0.0.\nWarning: As with real values, it is usually a mistake to compare complex numbers for equality because their parts are implemented using floating-point arithmetic, which suffers from precision errors, rendering algebraically equivalent expressions not equal after evaluation.\n \n\nint operator==(complex x, complex y) Return 1 if x is equal to y and 0 otherwise, \\[\\begin{equation*}\n(x \\,\\text{==}\\, y)\n\\ = \\ \\text{operator==}(x,y)\n\\ = \\ \\begin{cases} 1 & \\text{if $x = y$}, \\ \\text{and} \\\\ 0 & \\text{otherwise.}\n\\end{cases}\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nint operator!=(complex x, complex y) Return 1 if x is not equal to y and 0 otherwise, \\[\\begin{equation*}\n(x \\,\\text{!=}\\, y)\n\\ = \\ \\text{operator!=}(x,y)\n\\ = \\ \\begin{cases} 1 & \\text{if $x \\neq y$}, \\ \\text{and} \\\\ 0 &\n\\text{otherwise.} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.28\n\n\n\nThe assignment operator only serves as a component in the assignment statement and is thus not technically a function in the Stan language. With that caveat, it is documented here for completeness.\nAssignment of complex numbers works elementwise. If an expression of type int or real is assigned to a complex number, it will be promoted before assignment as if calling to_complex(), so that the imaginary component is 0.0.\n \n\nvoid operator=(complex x, complex y) y = x; assigns a (copy of) the value of y to x.\nAvailable since 2.28\n \n\nvoid operator+=(complex x, complex y) x += y; is equivalent to x = x + y;.\nAvailable since 2.28\n \n\nvoid operator-=(complex x, complex y) x -= y; is equivalent to x = x - y;.\nAvailable since 2.28\n \n\nvoid operator*=(complex x, complex y) x *= y; is equivalent to x = x * y;.\nAvailable since 2.28\n \n\nvoid operator/=(complex x, complex y) x /= y; is equivalent to x = x / y;.\nAvailable since 2.28\n\n\n\nThe following functions are specific to complex numbers other than absolute value, which has a specific meaning for complex numbers.\n \n\nreal abs(complex z) Return the absolute value of z, also known as the modulus or magnitude, which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\textrm{abs}(z) = \\sqrt{x^2 + y^2}.\n\\end{equation*}\\]\nThis function works elementwise over containers, returning the same shape and kind of the input container but holding reals. For example, a complex_vector[n] input will return a vector[n] output, with each element transformed by the above equation.\nAvailable since 2.28, vectorized in 2.30\n \n\nreal arg(complex z) Return the phase angle (in radians) of z, which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\textrm{arg}(z) = \\textrm{atan2}(y, x) = \\textrm{atan}(y / x).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nreal norm(complex z) Return the Euclidean norm of z, which is its absolute value squared, and which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\textrm{norm}(z) = \\textrm{abs}^2(z) = x^2 + y^2.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex conj(complex z) Return the complex conjugate of z, which negates the imaginary component, so that if \\(z = x + yi\\), \\[\\begin{equation*}\n\\textrm{conj}(z) = x - yi.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nZ conj(Z z) Vectorized version of conj. This will apply the conj function to each element of a complex array, vector, or matrix.\nAvailable since 2.31\n \n\ncomplex proj(complex z) Return the projection of z onto the Riemann sphere, which for \\(z = x\n+ yi\\) is \\[\\begin{equation*}\n\\textrm{proj}(z)\n= \\begin{cases}\n z & \\textrm{if} \\ z \\ \\textrm{is finite, and} \\\\\n 0 + \\textrm{sign}(y)i & \\textrm{otherwise,}\n\\end{cases}\n\\end{equation*}\\] where \\(\\textrm{sign}(y)\\) is -1 if \\(y\\) is negative and 1 otherwise.\nAvailable since 2.28\n \n\ncomplex polar(real r, real theta) Return the complex number with magnitude (absolute value) r and phase angle theta.\nAvailable since 2.28\n\n\n\nThe exponential, log, and power functions may be supplied with complex arguments with specialized meanings that generalize their real counterparts. These versions are only called when the argument is complex.\n \n\ncomplex exp(complex z) Return the complex natural exponential of z, which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\exp z = \\exp(x) \\textrm{cis}(y) = \\exp(x) (\\cos(y) + i \\sin(y)).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex log(complex z) Return the complex natural logarithm of z, which for \\(z = \\textrm{polar}(r,\n\\theta)\\) is \\[\\begin{equation*}\n\\log z = \\log r + \\theta i.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex log10(complex z) Return the complex common logarithm of z, \\[\\begin{equation*}\n\\log_{10} z = \\frac{\\log z}{\\log 10}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex pow(complex x, complex y) Return x raised to the power of y, \\[\\begin{equation*}\n\\text{pow}(x,y) = \\textrm{exp}(y \\, \\log(x)).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nZ pow(T1 x, T2 y) Vectorized implementation of the pow function\nAvailable since 2.30\n \n\ncomplex sqrt(complex x) Return the complex square root of x with branch cut along the negative real axis. For finite inputs, the result will be in the right half-plane.\nAvailable since 2.28\n\n\n\nThe standard trigonometric functions are supported for complex numbers.\n \n\ncomplex cos(complex z) Return the complex cosine of z, which is \\[\\begin{equation*}\n\\cos(z)\n= \\textrm{cosh}(z \\, i)\n= \\frac{\\displaystyle \\exp(z \\, i) + \\exp(-z \\, i)}\n {\\displaystyle 2}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex sin(complex z) Return the complex sine of z, \\[\\begin{equation*}\n\\sin(z)\n= -\\textrm{sinh}(z \\, i) \\, i\n= \\frac{\\displaystyle \\exp(z \\, i) - \\exp(-z \\, i)}\n {\\displaystyle 2 \\, i}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex tan(complex z) Return the complex tangent of z, \\[\\begin{equation*}\n\\tan(z)\n= -\\textrm{tanh}(z \\, i) \\, i\n= \\frac{(\\exp(-z \\, i) - \\exp(z \\, i)) \\, i}\n {\\exp(-z \\, i) + \\exp(z \\, i)}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex acos(complex z) Return the complex arc (inverse) cosine of z, \\[\\begin{equation*}\n\\textrm{acos}(z)\n= \\frac{1}{2} \\pi + \\log (z \\, i + \\sqrt{1 - z^2}) \\, i.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex asin(complex z) Return the complex arc (inverse) sine of z, \\[\\begin{equation*}\n\\text{asin}(z)\n= -\\log(z \\, i + \\sqrt{1 - z^2}) \\, i.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex atan(complex z) Return the complex arc (inverse) tangent of z, \\[\\begin{equation*}\n\\text{atan}(z)\n= - \\frac{1}{2} (\\log(1 - z \\, i) - \\log(1 + z \\, i)) \\, i.\n\\end{equation*}\\]\nAvailable since 2.28\n\n\n\nThe standard hyperbolic trigonometric functions are supported for complex numbers.\n \n\ncomplex cosh(complex z) Return the complex hyperbolic cosine of z, \\[\\begin{equation*}\n\\textrm{cosh}(z)\n= \\frac{\\exp(z) + \\exp(-z)}\n {2}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex sinh(complex z) Return the complex hyperbolic sine of z, \\[\\begin{equation*}\n\\textrm{sinh}(z)\n= \\frac{\\displaystyle \\exp(z) - \\exp(-z)}\n {\\displaystyle 2}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex tanh(complex z) Return the complex hyperbolic tangent of z, \\[\\begin{equation*}\n\\textrm{tanh}(z)\n\\ = \\ \\frac{\\textrm{sinh}(z)}\n {\\textrm{cosh}(z)}\n\\ = \\ \\frac{\\displaystyle \\exp(z) - \\exp(-z)}\n {\\displaystyle \\exp(z) + \\exp(-z)}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex acosh(complex z) Return the complex hyperbolic arc (inverse) cosine of z, \\[\\begin{equation*}\n\\textrm{acosh}(z)\n= \\log(z + \\sqrt{(z + 1)(z - 1)}).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex asinh(complex z) Return the complex hyperbolic arc (inverse) sine of z, \\[\\begin{equation*}\n\\textrm{asinh}(z)\n= \\log(z + \\sqrt{1 + z^2}).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex atanh(complex z) Return the complex hyperbolic arc (inverse) tangent of z, \\[\\begin{equation*}\n\\textrm{atanh}(z)\n= \\frac{\\log(1 + z) - \\log(1 - z)}\n {2}.\n\\end{equation*}\\]\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-assign-promote", + "href": "functions-reference/complex-valued_basic_functions.html#complex-assign-promote", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "Just as integers may be assigned to real variables, real variables may be assigned to complex numbers, with the result being a zero imaginary component.\nint n = 5; // n = 5\nreal x = a; // x = 5.0\ncomplex z1 = n; // z = 5.0 + 0.0i\ncomplex z2 = x; // z = 5.0 + 0.0i\n\n\nFunction arguments of type int or real may be promoted to type complex. The complex version of functions in this chapter are only used if one of the arguments is complex. For example, if z is complex, then pow(z, 2) will call the complex version of the power function and the integer 2 will be promoted to a complex number with a real component of 2 and an imaginary component of 0. The same goes for binary operators like addition and subtraction, where z + 2 will be legal and produce a complex result. Functions such as arg and conj that are only available for complex numbers can accept integer or real arguments, promoting them to complex before applying the function.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-constructors", + "href": "functions-reference/complex-valued_basic_functions.html#complex-constructors", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "Variables and constants of type complex are constructed from zero, one, or two real numbers.\ncomplex z1 = to_complex(); // z1 = 0.0 + 0.0i\nreal re = -2.9;\ncomplex z2 = to_complex(re); // z2 = -2.9 + 0.0i\nreal im = 1.3;\ncomplex z3 = to_complex(re, im); // z3 = -2.9 + 1.3i\n \n\ncomplex to_complex() Return complex number with real part 0.0 and imaginary part 0.0.\nAvailable since 2.28\n \n\ncomplex to_complex(real re) Return complex number with real part re and imaginary part 0.0.\nAvailable since 2.28\n \n\ncomplex to_complex(real re, real im) Return complex number with real part re and imaginary part im.\nAvailable since 2.28\n \n\nZ to_complex(T1 re, T2 im) Vectorized implementation of the to_complex function.\nT1 and T2 can either be real containers of the same size, or a real container and a real, in which case the real value is used for the corresponding component in all elements of the output.\nAvailable since 2.30\n\n\n\nGiven a complex number, its real and imaginary parts can be extracted with the following functions.\n \n\nreal get_real(complex z) Return the real part of the complex number z.\nAvailable since 2.28\n \n\nreal get_imag(complex z) Return the imaginary part of the complex number z.\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-arithmetic", + "href": "functions-reference/complex-valued_basic_functions.html#complex-arithmetic", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "The arithmetic operators have the same precedence for complex and real arguments. The complex form of an operator will be selected if at least one of its argument is of type complex. If there are two arguments and only one is of type complex, then the other will be promoted to type complex before performing the operation.\n\n\n \n\ncomplex operator+(complex z) Return the complex argument z, \\[\\begin{equation*} +z = z. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator-(complex z) Return the negation of the complex argument z, which for \\(z = x + yi\\) is \\[\\begin{equation*} -z = -x - yi. \\end{equation*}\\]\nAvailable since 2.28\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of complex numbers, -x is the same shape array where each individual value is negated.\nAvailable since 2.31\n\n\n\n \n\ncomplex operator+(complex x, complex y) Return the sum of x and y, \\[\\begin{equation*} (x + y) = \\text{operator+}(x, y) = x + y. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator-(complex x, complex y) Return the difference between x and y, \\[\\begin{equation*} (x - y) =\n\\text{operator-}(x, y) = x - y. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator*(complex x, complex y) Return the product of x and y, \\[\\begin{equation*} (x \\, * \\, y) = \\text{operator*}(x, y) = x\n\\times y. \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator/(complex x, complex y) Return the quotient of x and y, \\[\\begin{equation*} (x / y) = \\text{operator/}(x,y) =\n\\frac{x}{y} \\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex operator^(complex x, complex y) Return x raised to the power of y, \\[\\begin{equation*}\n(x^\\mathrm{\\wedge}y)= \\text{operator}^\\mathrm{\\wedge}(x,y)\n= \\textrm{exp}(y \\, \\log(x)).\n\\end{equation*}\\]\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-comparison", + "href": "functions-reference/complex-valued_basic_functions.html#complex-comparison", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "Complex numbers are equal if and only if both their real and imaginary components are equal. That is, the conditional\nz1 == z2\nis equivalent to\nget_real(z1) == get_real(z2) && get_imag(z1) == get_imag(z2)\nAs with other complex functions, if one of the arguments is of type real or int, it will be promoted to type complex before comparison. For example, if z is of type complex, then z == 0 will be true if z has real component equal to 0.0 and complex component equal to 0.0.\nWarning: As with real values, it is usually a mistake to compare complex numbers for equality because their parts are implemented using floating-point arithmetic, which suffers from precision errors, rendering algebraically equivalent expressions not equal after evaluation.\n \n\nint operator==(complex x, complex y) Return 1 if x is equal to y and 0 otherwise, \\[\\begin{equation*}\n(x \\,\\text{==}\\, y)\n\\ = \\ \\text{operator==}(x,y)\n\\ = \\ \\begin{cases} 1 & \\text{if $x = y$}, \\ \\text{and} \\\\ 0 & \\text{otherwise.}\n\\end{cases}\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nint operator!=(complex x, complex y) Return 1 if x is not equal to y and 0 otherwise, \\[\\begin{equation*}\n(x \\,\\text{!=}\\, y)\n\\ = \\ \\text{operator!=}(x,y)\n\\ = \\ \\begin{cases} 1 & \\text{if $x \\neq y$}, \\ \\text{and} \\\\ 0 &\n\\text{otherwise.} \\end{cases}\n\\end{equation*}\\]\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-assignment", + "href": "functions-reference/complex-valued_basic_functions.html#complex-assignment", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "The assignment operator only serves as a component in the assignment statement and is thus not technically a function in the Stan language. With that caveat, it is documented here for completeness.\nAssignment of complex numbers works elementwise. If an expression of type int or real is assigned to a complex number, it will be promoted before assignment as if calling to_complex(), so that the imaginary component is 0.0.\n \n\nvoid operator=(complex x, complex y) y = x; assigns a (copy of) the value of y to x.\nAvailable since 2.28\n \n\nvoid operator+=(complex x, complex y) x += y; is equivalent to x = x + y;.\nAvailable since 2.28\n \n\nvoid operator-=(complex x, complex y) x -= y; is equivalent to x = x - y;.\nAvailable since 2.28\n \n\nvoid operator*=(complex x, complex y) x *= y; is equivalent to x = x * y;.\nAvailable since 2.28\n \n\nvoid operator/=(complex x, complex y) x /= y; is equivalent to x = x / y;.\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-special", + "href": "functions-reference/complex-valued_basic_functions.html#complex-special", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "The following functions are specific to complex numbers other than absolute value, which has a specific meaning for complex numbers.\n \n\nreal abs(complex z) Return the absolute value of z, also known as the modulus or magnitude, which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\textrm{abs}(z) = \\sqrt{x^2 + y^2}.\n\\end{equation*}\\]\nThis function works elementwise over containers, returning the same shape and kind of the input container but holding reals. For example, a complex_vector[n] input will return a vector[n] output, with each element transformed by the above equation.\nAvailable since 2.28, vectorized in 2.30\n \n\nreal arg(complex z) Return the phase angle (in radians) of z, which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\textrm{arg}(z) = \\textrm{atan2}(y, x) = \\textrm{atan}(y / x).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nreal norm(complex z) Return the Euclidean norm of z, which is its absolute value squared, and which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\textrm{norm}(z) = \\textrm{abs}^2(z) = x^2 + y^2.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex conj(complex z) Return the complex conjugate of z, which negates the imaginary component, so that if \\(z = x + yi\\), \\[\\begin{equation*}\n\\textrm{conj}(z) = x - yi.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nZ conj(Z z) Vectorized version of conj. This will apply the conj function to each element of a complex array, vector, or matrix.\nAvailable since 2.31\n \n\ncomplex proj(complex z) Return the projection of z onto the Riemann sphere, which for \\(z = x\n+ yi\\) is \\[\\begin{equation*}\n\\textrm{proj}(z)\n= \\begin{cases}\n z & \\textrm{if} \\ z \\ \\textrm{is finite, and} \\\\\n 0 + \\textrm{sign}(y)i & \\textrm{otherwise,}\n\\end{cases}\n\\end{equation*}\\] where \\(\\textrm{sign}(y)\\) is -1 if \\(y\\) is negative and 1 otherwise.\nAvailable since 2.28\n \n\ncomplex polar(real r, real theta) Return the complex number with magnitude (absolute value) r and phase angle theta.\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-exponential", + "href": "functions-reference/complex-valued_basic_functions.html#complex-exponential", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "The exponential, log, and power functions may be supplied with complex arguments with specialized meanings that generalize their real counterparts. These versions are only called when the argument is complex.\n \n\ncomplex exp(complex z) Return the complex natural exponential of z, which for \\(z = x + yi\\) is \\[\\begin{equation*}\n\\exp z = \\exp(x) \\textrm{cis}(y) = \\exp(x) (\\cos(y) + i \\sin(y)).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex log(complex z) Return the complex natural logarithm of z, which for \\(z = \\textrm{polar}(r,\n\\theta)\\) is \\[\\begin{equation*}\n\\log z = \\log r + \\theta i.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex log10(complex z) Return the complex common logarithm of z, \\[\\begin{equation*}\n\\log_{10} z = \\frac{\\log z}{\\log 10}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex pow(complex x, complex y) Return x raised to the power of y, \\[\\begin{equation*}\n\\text{pow}(x,y) = \\textrm{exp}(y \\, \\log(x)).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\nZ pow(T1 x, T2 y) Vectorized implementation of the pow function\nAvailable since 2.30\n \n\ncomplex sqrt(complex x) Return the complex square root of x with branch cut along the negative real axis. For finite inputs, the result will be in the right half-plane.\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-trig", + "href": "functions-reference/complex-valued_basic_functions.html#complex-trig", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "The standard trigonometric functions are supported for complex numbers.\n \n\ncomplex cos(complex z) Return the complex cosine of z, which is \\[\\begin{equation*}\n\\cos(z)\n= \\textrm{cosh}(z \\, i)\n= \\frac{\\displaystyle \\exp(z \\, i) + \\exp(-z \\, i)}\n {\\displaystyle 2}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex sin(complex z) Return the complex sine of z, \\[\\begin{equation*}\n\\sin(z)\n= -\\textrm{sinh}(z \\, i) \\, i\n= \\frac{\\displaystyle \\exp(z \\, i) - \\exp(-z \\, i)}\n {\\displaystyle 2 \\, i}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex tan(complex z) Return the complex tangent of z, \\[\\begin{equation*}\n\\tan(z)\n= -\\textrm{tanh}(z \\, i) \\, i\n= \\frac{(\\exp(-z \\, i) - \\exp(z \\, i)) \\, i}\n {\\exp(-z \\, i) + \\exp(z \\, i)}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex acos(complex z) Return the complex arc (inverse) cosine of z, \\[\\begin{equation*}\n\\textrm{acos}(z)\n= \\frac{1}{2} \\pi + \\log (z \\, i + \\sqrt{1 - z^2}) \\, i.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex asin(complex z) Return the complex arc (inverse) sine of z, \\[\\begin{equation*}\n\\text{asin}(z)\n= -\\log(z \\, i + \\sqrt{1 - z^2}) \\, i.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex atan(complex z) Return the complex arc (inverse) tangent of z, \\[\\begin{equation*}\n\\text{atan}(z)\n= - \\frac{1}{2} (\\log(1 - z \\, i) - \\log(1 + z \\, i)) \\, i.\n\\end{equation*}\\]\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/complex-valued_basic_functions.html#complex-h-trig", + "href": "functions-reference/complex-valued_basic_functions.html#complex-h-trig", + "title": "Complex-Valued Basic Functions", + "section": "", + "text": "The standard hyperbolic trigonometric functions are supported for complex numbers.\n \n\ncomplex cosh(complex z) Return the complex hyperbolic cosine of z, \\[\\begin{equation*}\n\\textrm{cosh}(z)\n= \\frac{\\exp(z) + \\exp(-z)}\n {2}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex sinh(complex z) Return the complex hyperbolic sine of z, \\[\\begin{equation*}\n\\textrm{sinh}(z)\n= \\frac{\\displaystyle \\exp(z) - \\exp(-z)}\n {\\displaystyle 2}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex tanh(complex z) Return the complex hyperbolic tangent of z, \\[\\begin{equation*}\n\\textrm{tanh}(z)\n\\ = \\ \\frac{\\textrm{sinh}(z)}\n {\\textrm{cosh}(z)}\n\\ = \\ \\frac{\\displaystyle \\exp(z) - \\exp(-z)}\n {\\displaystyle \\exp(z) + \\exp(-z)}.\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex acosh(complex z) Return the complex hyperbolic arc (inverse) cosine of z, \\[\\begin{equation*}\n\\textrm{acosh}(z)\n= \\log(z + \\sqrt{(z + 1)(z - 1)}).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex asinh(complex z) Return the complex hyperbolic arc (inverse) sine of z, \\[\\begin{equation*}\n\\textrm{asinh}(z)\n= \\log(z + \\sqrt{1 + z^2}).\n\\end{equation*}\\]\nAvailable since 2.28\n \n\ncomplex atanh(complex z) Return the complex hyperbolic arc (inverse) tangent of z, \\[\\begin{equation*}\n\\textrm{atanh}(z)\n= \\frac{\\log(1 + z) - \\log(1 - z)}\n {2}.\n\\end{equation*}\\]\nAvailable since 2.28", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Complex-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html", + "href": "functions-reference/compound_arithmetic_and_assignment.html", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound arithmetic and assignment statements combine an arithmetic operation and assignment, replacing a statement such as\n x = x op y;\nwith the more compact compound form\n x op= y;\nFor example, x = x + 1; may be replaced with x += 1;. This works for all types that support arithmetic, including the scalar types int, real, complex, the real matrix types vector, row_vector, and matrix, and the complex matrix types, complex_vector, complex_row_vector, and complex_matrix.\n\n\nCompound addition and assignment works wherever the corresponding addition and assignment would be well formed.\n \n\nvoid operator+=(T x, U y) x += y is equivalent to x = x + y. Defined for all types T and U where T = T + U is well formed.\nAvailable since 2.17, complex signatures added in 2.30\n\n\n\nCompound addition and assignment works wherever the corresponding subtraction and assignment would be well formed.\n \n\nvoid operator-=(T x, U y) x -= y is equivalent to x = x - y. Defined for all types T and U where T = T - U is well formed.\nAvailable since 2.17, complex signatures added in 2.30\n\n\n\nCompound multiplication and assignment works wherever the corresponding multiplication and assignment would be well formed.\n \n\nvoid operator*=(T x, U y) x *= y is equivalent to x = x * y. Defined for all types T and U where T = T * U is well formed.\nAvailable since 2.17, complex signatures added in 2.30\n\n\n\nCompound division and assignment works wherever the corresponding division and assignment would be well formed.\n \n\nvoid operator/=(T x, U y) x /= y is equivalent to x = x / y. Defined for all types T and U where T = T / U is well formed.\nAvailable since 2.17, complex signatures added in 2.30\n\n\n\nCompound elementwise multiplication and assignment works wherever the corresponding multiplication and assignment would be well formed.\n \n\nvoid operator.*=(T x, U y) x .*= y is equivalent to x = x .* y. Defined for all types T and U where T = T .* U is well formed.\nAvailable since 2.17, complex signatures added in 2.30\n\n\n\nCompound elementwise division and assignment works wherever the corresponding division and assignment would be well formed.\n \n\nvoid operator./=(T x, U y) x ./= y is equivalent to x = x ./ y. Defined for all types T and U where T = T ./ U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html#compound-addition-and-assignment", + "href": "functions-reference/compound_arithmetic_and_assignment.html#compound-addition-and-assignment", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound addition and assignment works wherever the corresponding addition and assignment would be well formed.\n \n\nvoid operator+=(T x, U y) x += y is equivalent to x = x + y. Defined for all types T and U where T = T + U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html#compound-subtraction-and-assignment", + "href": "functions-reference/compound_arithmetic_and_assignment.html#compound-subtraction-and-assignment", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound addition and assignment works wherever the corresponding subtraction and assignment would be well formed.\n \n\nvoid operator-=(T x, U y) x -= y is equivalent to x = x - y. Defined for all types T and U where T = T - U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html#compound-multiplication-and-assignment", + "href": "functions-reference/compound_arithmetic_and_assignment.html#compound-multiplication-and-assignment", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound multiplication and assignment works wherever the corresponding multiplication and assignment would be well formed.\n \n\nvoid operator*=(T x, U y) x *= y is equivalent to x = x * y. Defined for all types T and U where T = T * U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html#compound-division-and-assignment", + "href": "functions-reference/compound_arithmetic_and_assignment.html#compound-division-and-assignment", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound division and assignment works wherever the corresponding division and assignment would be well formed.\n \n\nvoid operator/=(T x, U y) x /= y is equivalent to x = x / y. Defined for all types T and U where T = T / U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html#compound-elementwise-multiplication-and-assignment", + "href": "functions-reference/compound_arithmetic_and_assignment.html#compound-elementwise-multiplication-and-assignment", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound elementwise multiplication and assignment works wherever the corresponding multiplication and assignment would be well formed.\n \n\nvoid operator.*=(T x, U y) x .*= y is equivalent to x = x .* y. Defined for all types T and U where T = T .* U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/compound_arithmetic_and_assignment.html#compound-elementwise-division-and-assignment", + "href": "functions-reference/compound_arithmetic_and_assignment.html#compound-elementwise-division-and-assignment", + "title": "Compound Arithmetic and Assignment", + "section": "", + "text": "Compound elementwise division and assignment works wherever the corresponding division and assignment would be well formed.\n \n\nvoid operator./=(T x, U y) x ./= y is equivalent to x = x ./ y. Defined for all types T and U where T = T ./ U is well formed.\nAvailable since 2.17, complex signatures added in 2.30", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Compound Arithmetic and Assignment" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html", + "href": "functions-reference/conventions_for_probability_functions.html", + "title": "Conventions for Probability Functions", + "section": "", + "text": "Functions associated with distributions are set up to follow the same naming conventions for both built-in distributions and for user-defined distributions.\n\n\nThe suffix is determined by the type of function according to the following table.\n\n\n\n\n\n\n\n\nfunction\noutcome\nsuffix\n\n\n\n\nlog probability mass function\ndiscrete\n_lpmf\n\n\nlog probability density function\ncontinuous\n_lpdf\n\n\nlog cumulative distribution function\nany\n_lcdf\n\n\nlog complementary cumulative distribution function\nany\n_lccdf\n\n\nrandom number generator\nany\n_rng\n\n\n\nFor example, normal_lpdf is the log of the normal probability density function (pdf) and bernoulli_lpmf is the log of the bernoulli probability mass function (pmf). The log of the corresponding cumulative distribution functions (cdf) use the same suffix, normal_lcdf and bernoulli_lcdf.\n\n\n\nEach probability function has a specific outcome value and a number of parameters. Following conditional probability notation, probability density and mass functions use a vertical bar to separate the outcome from the parameters of the distribution. For example, normal_lpdf(y | mu, sigma) returns the value of mathematical formula \\(\\log \\text{Normal}(y \\, | \\, \\mu, \\sigma)\\). Cumulative distribution functions separate the outcome from the parameters in the same way (e.g., normal_lcdf(y_low | mu, sigma)\n\n\n\nThe notation\n y ~ normal(mu, sigma);\nprovides the same (proportional) contribution to the model log density as the explicit target density increment,\n target += normal_lpdf(y | mu, sigma);\nIn both cases, the effect is to add terms to the target log density. The only difference is that the example with the sampling (~) notation drops all additive constants in the log density; the constants are not necessary for any of Stan’s sampling, approximation, or optimization algorithms.\n\n\n\nAll of the distribution functions are configured to throw exceptions (effectively rejecting iterations or optimization steps) when they are supplied with non-finite arguments. The two cases of non-finite arguments are the infinite values and not-a-number value—these are standard in floating-point arithmetic.\n\n\n\nMany distributions are defined with support or constraints on parameters forming an open interval. For example, the normal density function accepts a scale parameter \\(\\sigma > 0\\). If \\(\\sigma = 0\\), the probability function will throw an exception.\nThis is true even for (complementary) cumulative distribution functions, which will throw exceptions when given input that is out of the support.\n\n\n\nFor most of the probability functions, there is a matching pseudorandom number generator (PRNG) with the suffix _rng. For example, the function normal_rng(real, real) accepts two real arguments, an unconstrained location \\(\\mu\\) and positive scale \\(\\sigma > 0\\), and returns an unconstrained pseudorandom value drawn from \\(\\text{Normal}(\\mu,\\sigma)\\). There are also vectorized forms of random number generators which return more than one random variate at a time.\n\n\nUnlike regular functions, the PRNG functions may only be used in the transformed data or generated quantities blocks.\n\n\n\nUnlike the probability functions, only some of the PRNG functions are vectorized.\n\n\n\n\nFor most of the univariate probability functions, there is a corresponding cumulative distribution function, log cumulative distribution function, and log complementary cumulative distribution function.\nFor a univariate random variable \\(Y\\) with probability function \\(p_Y(y \\, | \\, \\theta)\\), the cumulative distribution function (CDF) \\(F_Y\\) is defined by \\[\\begin{equation*}\nF_Y(y) \\ = \\ \\text{Pr}[Y \\le y] \\ = \\ \\int_{-\\infty}^y p(y\\, | \\, \\theta) \\ \\text{d}y.\n\\end{equation*}\\] The complementary cumulative distribution function (CCDF) is defined as \\[\\begin{equation*}\n\\text{Pr}[Y > y] \\ = \\ 1 - F_Y(y).\n\\end{equation*}\\] The reason to use CCDFs instead of CDFs in floating-point arithmetic is that it is possible to represent numbers very close to 0 (the closest you can get is roughly \\(10^{-300}\\)), but not numbers very close to 1 (the closest you can get is roughly \\(1 - 10^{-15}\\)).\nIn Stan, there is a cumulative distribution function for each probability function. For instance, normal_cdf(y | mu, sigma) is defined by \\[\\begin{equation*}\n\\int_{-\\infty}^y \\text{Normal}(y \\, | \\, \\mu, \\sigma) \\ \\text{d}y.\n\\end{equation*}\\] There are also log forms of the CDF and CCDF for most univariate distributions. For example, normal_lcdf(y | mu, sigma) is defined by \\[\\begin{equation*}\n\\log \\left( \\int_{-\\infty}^y \\text{Normal}(y \\, | \\, \\mu, \\sigma) \\ \\text{d}y \\right)\n\\end{equation*}\\] and normal_lccdf(y | mu, sigma) is defined by \\[\\begin{equation*}\n\\log \\left( 1 - \\int_{-\\infty}^y \\text{Normal}(y \\, | \\, \\mu, \\sigma) \\ \\text{d}y \\right).\n\\end{equation*}\\]\n\n\n\nStan’s univariate log probability functions, including the log density functions, log mass functions, log CDFs, and log CCDFs, all support vectorized function application, with results defined to be the sum of the elementwise application of the function. Some of the PRNG functions support vectorization, see section vectorized PRNG functions for more details.\nIn all cases, matrix operations are at least as fast and usually faster than loops and vectorized log probability functions are faster than their equivalent form defined with loops. This isn’t because loops are slow in Stan, but because more efficient automatic differentiation can be used. The efficiency comes from the fact that a vectorized log probability function only introduces one new node into the expression graph, thus reducing the number of virtual function calls required to compute gradients in C++, as well as from allowing caching of repeated computations.\nStan also overloads the multivariate normal distribution, including the Cholesky-factor form, allowing arrays of row vectors or vectors for the variate and location parameter. This is a huge savings in speed because the work required to solve the linear system for the covariance matrix is only done once.\nStan also overloads some scalar functions, such as log and exp, to apply to vectors (arrays) and return vectors (arrays). These vectorizations are defined elementwise and unlike the probability functions, provide only minimal efficiency speedups over repeated application and assignment in a loop.\n\n\n\n\nThe normal probability function is specified with the signature\n normal_lpdf(reals | reals, reals);\nThe pseudotype reals is used to indicate that an argument position may be vectorized. Argument positions declared as reals may be filled with a real, a one-dimensional array, a vector, or a row-vector. If there is more than one array or vector argument, their types can be anything but their size must match. For instance, it is legal to use normal_lpdf(row_vector | vector, real) as long as the vector and row vector have the same size.\n\n\n\nThe multivariate normal distribution accepting vector or array of vector arguments is written as\n multi_normal_lpdf(vectors | vectors, matrix);\nThese arguments may be row vectors, column vectors, or arrays of row vectors or column vectors.\n\n\n\nThe pseudotype ints is used for vectorized integer arguments. Where it appears either an integer or array of integers may be used.\n\n\n\n\nThe result of a vectorized log probability function is equivalent to the sum of the evaluations on each element. Any non-vector argument, namely real or int, is repeated. For instance, if y is a vector of size N, mu is a vector of size N, and sigma is a scalar, then\n ll = normal_lpdf(y | mu, sigma);\nis just a more efficient way to write\n ll = 0;\n for (n in 1:N) {\n ll = ll + normal_lpdf(y[n] | mu[n], sigma);\n }\nWith the same arguments, the vectorized sampling statement\n y ~ normal(mu, sigma);\nhas the same effect on the total log probability as\n for (n in 1:N) {\n y[n] ~ normal(mu[n], sigma);\n }\n\n\n\nSome PRNG functions accept sequences as well as scalars as arguments. Such functions are indicated by argument pseudotypes reals or ints. In cases of sequence arguments, the output will also be a sequence. For example, the following is allowed in the transformed data and generated quantities blocks.\n vector[3] mu = // ...\n array[3] real x = normal_rng(mu, 3);\n\n\nIn the case of PRNG functions, arguments marked ints may be integers or integer arrays, whereas arguments marked reals may be integers or reals, integer or real arrays, vectors, or row vectors.\n\n\n\n\n\n\n\npseudotype\nallowable PRNG arguments\n\n\n\n\nints\nint, array[] int\n\n\nreals\nint, array[] int, real, array[] real, vector, row_vector\n\n\n\n\n\n\nIn general, if there are multiple non-scalar arguments, they must all have the same dimensions, but need not have the same type. For example, the normal_rng function may be called with one vector argument and one real array argument as long as they have the same number of elements.\n vector[3] mu = // ...\n array[3] real sigma = // ...\n array[3] real x = normal_rng(mu, sigma);\n\n\n\nThe result of a vectorized PRNG function depends on the size of the arguments and the distribution’s support. If all arguments are scalars, then the return type is a scalar. For a continuous distribution, if there are any non-scalar arguments, the return type is a real array (array[] real) matching the size of any of the non-scalar arguments, as all non-scalar arguments must have matching size. Discrete distributions return ints and continuous distributions return reals, each of appropriate size. The symbol R denotes such a return type.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#suffix-marks-type-of-function", + "href": "functions-reference/conventions_for_probability_functions.html#suffix-marks-type-of-function", + "title": "Conventions for Probability Functions", + "section": "", + "text": "The suffix is determined by the type of function according to the following table.\n\n\n\n\n\n\n\n\nfunction\noutcome\nsuffix\n\n\n\n\nlog probability mass function\ndiscrete\n_lpmf\n\n\nlog probability density function\ncontinuous\n_lpdf\n\n\nlog cumulative distribution function\nany\n_lcdf\n\n\nlog complementary cumulative distribution function\nany\n_lccdf\n\n\nrandom number generator\nany\n_rng\n\n\n\nFor example, normal_lpdf is the log of the normal probability density function (pdf) and bernoulli_lpmf is the log of the bernoulli probability mass function (pmf). The log of the corresponding cumulative distribution functions (cdf) use the same suffix, normal_lcdf and bernoulli_lcdf.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#argument-order-and-the-vertical-bar", + "href": "functions-reference/conventions_for_probability_functions.html#argument-order-and-the-vertical-bar", + "title": "Conventions for Probability Functions", + "section": "", + "text": "Each probability function has a specific outcome value and a number of parameters. Following conditional probability notation, probability density and mass functions use a vertical bar to separate the outcome from the parameters of the distribution. For example, normal_lpdf(y | mu, sigma) returns the value of mathematical formula \\(\\log \\text{Normal}(y \\, | \\, \\mu, \\sigma)\\). Cumulative distribution functions separate the outcome from the parameters in the same way (e.g., normal_lcdf(y_low | mu, sigma)", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#sampling-notation", + "href": "functions-reference/conventions_for_probability_functions.html#sampling-notation", + "title": "Conventions for Probability Functions", + "section": "", + "text": "The notation\n y ~ normal(mu, sigma);\nprovides the same (proportional) contribution to the model log density as the explicit target density increment,\n target += normal_lpdf(y | mu, sigma);\nIn both cases, the effect is to add terms to the target log density. The only difference is that the example with the sampling (~) notation drops all additive constants in the log density; the constants are not necessary for any of Stan’s sampling, approximation, or optimization algorithms.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#finite-inputs", + "href": "functions-reference/conventions_for_probability_functions.html#finite-inputs", + "title": "Conventions for Probability Functions", + "section": "", + "text": "All of the distribution functions are configured to throw exceptions (effectively rejecting iterations or optimization steps) when they are supplied with non-finite arguments. The two cases of non-finite arguments are the infinite values and not-a-number value—these are standard in floating-point arithmetic.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#boundary-conditions", + "href": "functions-reference/conventions_for_probability_functions.html#boundary-conditions", + "title": "Conventions for Probability Functions", + "section": "", + "text": "Many distributions are defined with support or constraints on parameters forming an open interval. For example, the normal density function accepts a scale parameter \\(\\sigma > 0\\). If \\(\\sigma = 0\\), the probability function will throw an exception.\nThis is true even for (complementary) cumulative distribution functions, which will throw exceptions when given input that is out of the support.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#distributions-prng", + "href": "functions-reference/conventions_for_probability_functions.html#distributions-prng", + "title": "Conventions for Probability Functions", + "section": "", + "text": "For most of the probability functions, there is a matching pseudorandom number generator (PRNG) with the suffix _rng. For example, the function normal_rng(real, real) accepts two real arguments, an unconstrained location \\(\\mu\\) and positive scale \\(\\sigma > 0\\), and returns an unconstrained pseudorandom value drawn from \\(\\text{Normal}(\\mu,\\sigma)\\). There are also vectorized forms of random number generators which return more than one random variate at a time.\n\n\nUnlike regular functions, the PRNG functions may only be used in the transformed data or generated quantities blocks.\n\n\n\nUnlike the probability functions, only some of the PRNG functions are vectorized.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#cumulative-distribution-functions", + "href": "functions-reference/conventions_for_probability_functions.html#cumulative-distribution-functions", + "title": "Conventions for Probability Functions", + "section": "", + "text": "For most of the univariate probability functions, there is a corresponding cumulative distribution function, log cumulative distribution function, and log complementary cumulative distribution function.\nFor a univariate random variable \\(Y\\) with probability function \\(p_Y(y \\, | \\, \\theta)\\), the cumulative distribution function (CDF) \\(F_Y\\) is defined by \\[\\begin{equation*}\nF_Y(y) \\ = \\ \\text{Pr}[Y \\le y] \\ = \\ \\int_{-\\infty}^y p(y\\, | \\, \\theta) \\ \\text{d}y.\n\\end{equation*}\\] The complementary cumulative distribution function (CCDF) is defined as \\[\\begin{equation*}\n\\text{Pr}[Y > y] \\ = \\ 1 - F_Y(y).\n\\end{equation*}\\] The reason to use CCDFs instead of CDFs in floating-point arithmetic is that it is possible to represent numbers very close to 0 (the closest you can get is roughly \\(10^{-300}\\)), but not numbers very close to 1 (the closest you can get is roughly \\(1 - 10^{-15}\\)).\nIn Stan, there is a cumulative distribution function for each probability function. For instance, normal_cdf(y | mu, sigma) is defined by \\[\\begin{equation*}\n\\int_{-\\infty}^y \\text{Normal}(y \\, | \\, \\mu, \\sigma) \\ \\text{d}y.\n\\end{equation*}\\] There are also log forms of the CDF and CCDF for most univariate distributions. For example, normal_lcdf(y | mu, sigma) is defined by \\[\\begin{equation*}\n\\log \\left( \\int_{-\\infty}^y \\text{Normal}(y \\, | \\, \\mu, \\sigma) \\ \\text{d}y \\right)\n\\end{equation*}\\] and normal_lccdf(y | mu, sigma) is defined by \\[\\begin{equation*}\n\\log \\left( 1 - \\int_{-\\infty}^y \\text{Normal}(y \\, | \\, \\mu, \\sigma) \\ \\text{d}y \\right).\n\\end{equation*}\\]", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/conventions_for_probability_functions.html#vectorization", + "href": "functions-reference/conventions_for_probability_functions.html#vectorization", + "title": "Conventions for Probability Functions", + "section": "", + "text": "Stan’s univariate log probability functions, including the log density functions, log mass functions, log CDFs, and log CCDFs, all support vectorized function application, with results defined to be the sum of the elementwise application of the function. Some of the PRNG functions support vectorization, see section vectorized PRNG functions for more details.\nIn all cases, matrix operations are at least as fast and usually faster than loops and vectorized log probability functions are faster than their equivalent form defined with loops. This isn’t because loops are slow in Stan, but because more efficient automatic differentiation can be used. The efficiency comes from the fact that a vectorized log probability function only introduces one new node into the expression graph, thus reducing the number of virtual function calls required to compute gradients in C++, as well as from allowing caching of repeated computations.\nStan also overloads the multivariate normal distribution, including the Cholesky-factor form, allowing arrays of row vectors or vectors for the variate and location parameter. This is a huge savings in speed because the work required to solve the linear system for the covariance matrix is only done once.\nStan also overloads some scalar functions, such as log and exp, to apply to vectors (arrays) and return vectors (arrays). These vectorizations are defined elementwise and unlike the probability functions, provide only minimal efficiency speedups over repeated application and assignment in a loop.\n\n\n\n\nThe normal probability function is specified with the signature\n normal_lpdf(reals | reals, reals);\nThe pseudotype reals is used to indicate that an argument position may be vectorized. Argument positions declared as reals may be filled with a real, a one-dimensional array, a vector, or a row-vector. If there is more than one array or vector argument, their types can be anything but their size must match. For instance, it is legal to use normal_lpdf(row_vector | vector, real) as long as the vector and row vector have the same size.\n\n\n\nThe multivariate normal distribution accepting vector or array of vector arguments is written as\n multi_normal_lpdf(vectors | vectors, matrix);\nThese arguments may be row vectors, column vectors, or arrays of row vectors or column vectors.\n\n\n\nThe pseudotype ints is used for vectorized integer arguments. Where it appears either an integer or array of integers may be used.\n\n\n\n\nThe result of a vectorized log probability function is equivalent to the sum of the evaluations on each element. Any non-vector argument, namely real or int, is repeated. For instance, if y is a vector of size N, mu is a vector of size N, and sigma is a scalar, then\n ll = normal_lpdf(y | mu, sigma);\nis just a more efficient way to write\n ll = 0;\n for (n in 1:N) {\n ll = ll + normal_lpdf(y[n] | mu[n], sigma);\n }\nWith the same arguments, the vectorized sampling statement\n y ~ normal(mu, sigma);\nhas the same effect on the total log probability as\n for (n in 1:N) {\n y[n] ~ normal(mu[n], sigma);\n }\n\n\n\nSome PRNG functions accept sequences as well as scalars as arguments. Such functions are indicated by argument pseudotypes reals or ints. In cases of sequence arguments, the output will also be a sequence. For example, the following is allowed in the transformed data and generated quantities blocks.\n vector[3] mu = // ...\n array[3] real x = normal_rng(mu, 3);\n\n\nIn the case of PRNG functions, arguments marked ints may be integers or integer arrays, whereas arguments marked reals may be integers or reals, integer or real arrays, vectors, or row vectors.\n\n\n\n\n\n\n\npseudotype\nallowable PRNG arguments\n\n\n\n\nints\nint, array[] int\n\n\nreals\nint, array[] int, real, array[] real, vector, row_vector\n\n\n\n\n\n\nIn general, if there are multiple non-scalar arguments, they must all have the same dimensions, but need not have the same type. For example, the normal_rng function may be called with one vector argument and one real array argument as long as they have the same number of elements.\n vector[3] mu = // ...\n array[3] real sigma = // ...\n array[3] real x = normal_rng(mu, sigma);\n\n\n\nThe result of a vectorized PRNG function depends on the size of the arguments and the distribution’s support. If all arguments are scalars, then the return type is a scalar. For a continuous distribution, if there are any non-scalar arguments, the return type is a real array (array[] real) matching the size of any of the non-scalar arguments, as all non-scalar arguments must have matching size. Discrete distributions return ints and continuous distributions return reals, each of appropriate size. The symbol R denotes such a return type.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Conventions for Probability Functions" + ] + }, + { + "objectID": "functions-reference/covariance_matrix_distributions.html", + "href": "functions-reference/covariance_matrix_distributions.html", + "title": "Covariance Matrix Distributions", + "section": "", + "text": "The covariance matrix distributions have support on symmetric, positive-definite \\(K \\times K\\) matrices or their Cholesky factors (square, lower triangular matrices with positive diagonal elements).\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1,\\infty)\\), and \\(S \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for symmetric and positive-definite \\(W \\in \\mathbb{R}^{K \\times K}\\), \\[\\begin{equation*}\n\\text{Wishart}(W \\mid \\nu,S) = \\frac{1}{2^{\\nu K / 2}}\n\\ \\frac{1}{\\Gamma_K \\! \\left( \\frac{\\nu}{2} \\right)}\n\\ \\left| S \\right|^{-\\nu/2} \\ \\left| W \\right|^{(\\nu - K - 1)/2}\n\\ \\exp \\! \\left(- \\frac{1}{2} \\ \\text{tr}\\left( S^{-1} W \\right) \\right) \\! ,\n\\end{equation*}\\] where \\(\\text{tr}()\\) is the matrix trace function, and \\(\\Gamma_K()\\) is the multivariate Gamma function, \\[\\begin{equation*}\n\\Gamma_K(x) = \\frac{1}{\\pi^{K(K-1)/4}} \\ \\prod_{k=1}^K \\Gamma \\left( x + \\frac{1 - k}{2} \\right) \\!.\n\\end{equation*}\\]\n\n\n\nW ~ wishart(nu, Sigma)\nIncrement target log probability density with wishart_lupdf(W | nu, Sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal wishart_lpdf(matrix W | real nu, matrix Sigma) Return the log of the Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma.\nAvailable since 2.12\n \n\nreal wishart_lupdf(matrix W | real nu, matrix Sigma) Return the log of the Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nmatrix wishart_rng(real nu, matrix Sigma) Generate a Wishart variate with degrees of freedom nu and symmetric and positive-definite scale matrix Sigma; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.0\n\n\n\n\nThe Cholesky parameterization of the Wishart distribution uses a Cholesky factor for both the variate and the parameter. If \\(S\\) and \\(W\\) are positive definite matrices with Cholesky factors \\(L_S\\) and \\(L_W\\) (i.e., \\(S = L_S L_S^{\\top}\\) and \\(W = L_W L_W^{\\top}\\)), then the Cholesky parameterization is defined so that \\[\\begin{equation*}\nL_W \\sim \\textrm{WishartCholesky}(\\nu, L_S)\n\\end{equation*}\\] if and only if \\[\\begin{equation*}\nW \\sim \\textrm{Wishart}(\\nu, S).\n\\end{equation*}\\]\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1, \\infty)\\), and \\(L_S, L_W \\in \\mathbb{R}^{K \\times K}\\) are lower triangular matrixes with positive diagonal elements, then the Cholesky parameterized Wishart density is \\[\\begin{equation*}\n\\text{WishartCholesky}(L_W \\mid \\nu,L_S)\n= \\text{Wishart}(L_W L_W^{\\top} \\mid \\nu,L_S L_S^{\\top}) \\, \\left| J_{f^{-1}} \\right|,\n\\end{equation*}\\] where \\(J_{f^{-1}}\\) is the Jacobian of the (inverse) transform of the variate, \\(f^{-1}(L_W) = L_W L_W^{\\top}\\). The log absolute determinant is \\[\\begin{equation*}\n\\log \\left| J_{f^{-1}} \\right|\n= K \\log(2) + \\sum_{k=1}^K (K - k + 1) \\log {(L_W)_{k,\\, k}}.\n\\end{equation*}\\]\nThe probability functions will raise errors if \\(\\nu \\leq K - 1\\) or if \\(L_S\\) and \\(L_W\\) are not Cholesky factors (square, lower-triangular matrices with positive diagonal elements) of the same size.\n\n\n\n \n\nreal wishart_cholesky_lpdf(matrix L_W | real nu, matrix L_S) Return the log of the Wishart density for lower-triangular Cholesky factor L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S.\nAvailable since 2.30\n \n\nreal wishart_cholesky_lupdf(matrix L_W | real nu, matrix L_S) Return the log of the Wishart density for lower-triangular Cholesky factor of L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S dropping constant additive terms.\nAvailable since 2.30\n \n\nmatrix wishart_cholesky_rng(real nu, matrix L_S) Generate the Cholesky factor of a Wishart variate with degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S; may only be used in transformed data and generated quantities blocks\nAvailable since 2.30\n\n\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1,\\infty)\\), and \\(S \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for symmetric and positive-definite \\(W \\in \\mathbb{R}^{K \\times K}\\), \\[\\begin{equation*}\n\\text{InvWishart}(W \\mid \\nu,S) = \\frac{1}{2^{\\nu K / 2}} \\ \\frac{1}{\\Gamma_K \\! \\left( \\frac{\\nu}{2} \\right)}\n\\ \\left| S \\right|^{\\nu/2} \\ \\left| W \\right|^{-(\\nu + K + 1)/2}\n\\ \\exp \\! \\left( - \\frac{1}{2} \\ \\text{tr}(SW^{-1}) \\right) \\! .\n\\end{equation*}\\]\n\n\n\nW ~ inv_wishart(nu, Sigma)\nIncrement target log probability density with inv_wishart_lupdf(W | nu, Sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal inv_wishart_lpdf(matrix W | real nu, matrix Sigma) Return the log of the inverse Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma.\nAvailable since 2.12\n \n\nreal inv_wishart_lupdf(matrix W | real nu, matrix Sigma) Return the log of the inverse Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nmatrix inv_wishart_rng(real nu, matrix Sigma) Generate an inverse Wishart variate with degrees of freedom nu and symmetric and positive-definite scale matrix Sigma; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.0\n\n\n\n\nThe Cholesky parameterization of the inverse Wishart distribution uses a Cholesky factor for both the variate and the parameter. If \\(S\\) and \\(W\\) are positive definite matrices with Cholesky factors \\(L_S\\) and \\(L_W\\) (i.e., \\(S = L_S L_S^{\\top}\\) and \\(W = L_W L_W^{\\top}\\)), then the Cholesky parameterization is defined so that \\[\\begin{equation*}\nL_W \\sim \\textrm{InvWishartCholesky}(\\nu, L_S)\n\\end{equation*}\\] if and only if \\[\\begin{equation*}\nW \\sim \\textrm{InvWishart}(\\nu, S).\n\\end{equation*}\\]\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1, \\infty)\\), and \\(L_S, L_W \\in\n\\mathbb{R}^{K \\times K}\\) are lower triangular matrixes with positive diagonal elements, then the Cholesky parameterized inverse Wishart density is \\[\\begin{equation*}\n\\text{InvWishartCholesky}(L_W \\mid \\nu,L_S) =\n\\text{InvWishart}(L_WL_W^{\\top} \\mid \\nu, L_S L_S^{\\top}) \\, \\left| J_{f^{-1}} \\right|,\n\\end{equation*}\\] where \\(J_{f^{-1}}\\) is the Jacobian of the (inverse) transform of the variate, \\(f^{-1}(L_W) = L_W L_W^{\\top}\\). The log absolute determinant is \\[\\begin{equation*}\n\\log \\left| J_{f^{-1}} \\right|\n= K \\log(2) + \\sum_{k=1}^K (K - k + 1) \\log {(L_W)_{k,\\, k}}.\n\\end{equation*}\\]\nThe probability functions will raise errors if \\(\\nu \\leq K - 1\\) or if \\(L_S\\) and \\(L_W\\) are not Cholesky factors (square, lower-triangular matrices with positive diagonal elements) of the same size.\n\n\n\n \n\nreal inv_wishart_cholesky_lpdf(matrix L_W | real nu, matrix L_S) Return the log of the inverse Wishart density for lower-triangular Cholesky factor L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S.\nAvailable since 2.30\n \n\nreal inv_wishart_cholesky_lupdf(matrix L_W | real nu, matrix L_S) Return the log of the inverse Wishart density for lower-triangular Cholesky factor of L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S dropping constant additive terms.\nAvailable since 2.30\n \n\nmatrix inv_wishart_cholesky_rng(real nu, matrix L_S) Generate the Cholesky factor of an inverse Wishart variate with degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Covariance Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/covariance_matrix_distributions.html#wishart-distribution", + "href": "functions-reference/covariance_matrix_distributions.html#wishart-distribution", + "title": "Covariance Matrix Distributions", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1,\\infty)\\), and \\(S \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for symmetric and positive-definite \\(W \\in \\mathbb{R}^{K \\times K}\\), \\[\\begin{equation*}\n\\text{Wishart}(W \\mid \\nu,S) = \\frac{1}{2^{\\nu K / 2}}\n\\ \\frac{1}{\\Gamma_K \\! \\left( \\frac{\\nu}{2} \\right)}\n\\ \\left| S \\right|^{-\\nu/2} \\ \\left| W \\right|^{(\\nu - K - 1)/2}\n\\ \\exp \\! \\left(- \\frac{1}{2} \\ \\text{tr}\\left( S^{-1} W \\right) \\right) \\! ,\n\\end{equation*}\\] where \\(\\text{tr}()\\) is the matrix trace function, and \\(\\Gamma_K()\\) is the multivariate Gamma function, \\[\\begin{equation*}\n\\Gamma_K(x) = \\frac{1}{\\pi^{K(K-1)/4}} \\ \\prod_{k=1}^K \\Gamma \\left( x + \\frac{1 - k}{2} \\right) \\!.\n\\end{equation*}\\]\n\n\n\nW ~ wishart(nu, Sigma)\nIncrement target log probability density with wishart_lupdf(W | nu, Sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal wishart_lpdf(matrix W | real nu, matrix Sigma) Return the log of the Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma.\nAvailable since 2.12\n \n\nreal wishart_lupdf(matrix W | real nu, matrix Sigma) Return the log of the Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nmatrix wishart_rng(real nu, matrix Sigma) Generate a Wishart variate with degrees of freedom nu and symmetric and positive-definite scale matrix Sigma; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Covariance Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/covariance_matrix_distributions.html#wishart-cholesky-distribution", + "href": "functions-reference/covariance_matrix_distributions.html#wishart-cholesky-distribution", + "title": "Covariance Matrix Distributions", + "section": "", + "text": "The Cholesky parameterization of the Wishart distribution uses a Cholesky factor for both the variate and the parameter. If \\(S\\) and \\(W\\) are positive definite matrices with Cholesky factors \\(L_S\\) and \\(L_W\\) (i.e., \\(S = L_S L_S^{\\top}\\) and \\(W = L_W L_W^{\\top}\\)), then the Cholesky parameterization is defined so that \\[\\begin{equation*}\nL_W \\sim \\textrm{WishartCholesky}(\\nu, L_S)\n\\end{equation*}\\] if and only if \\[\\begin{equation*}\nW \\sim \\textrm{Wishart}(\\nu, S).\n\\end{equation*}\\]\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1, \\infty)\\), and \\(L_S, L_W \\in \\mathbb{R}^{K \\times K}\\) are lower triangular matrixes with positive diagonal elements, then the Cholesky parameterized Wishart density is \\[\\begin{equation*}\n\\text{WishartCholesky}(L_W \\mid \\nu,L_S)\n= \\text{Wishart}(L_W L_W^{\\top} \\mid \\nu,L_S L_S^{\\top}) \\, \\left| J_{f^{-1}} \\right|,\n\\end{equation*}\\] where \\(J_{f^{-1}}\\) is the Jacobian of the (inverse) transform of the variate, \\(f^{-1}(L_W) = L_W L_W^{\\top}\\). The log absolute determinant is \\[\\begin{equation*}\n\\log \\left| J_{f^{-1}} \\right|\n= K \\log(2) + \\sum_{k=1}^K (K - k + 1) \\log {(L_W)_{k,\\, k}}.\n\\end{equation*}\\]\nThe probability functions will raise errors if \\(\\nu \\leq K - 1\\) or if \\(L_S\\) and \\(L_W\\) are not Cholesky factors (square, lower-triangular matrices with positive diagonal elements) of the same size.\n\n\n\n \n\nreal wishart_cholesky_lpdf(matrix L_W | real nu, matrix L_S) Return the log of the Wishart density for lower-triangular Cholesky factor L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S.\nAvailable since 2.30\n \n\nreal wishart_cholesky_lupdf(matrix L_W | real nu, matrix L_S) Return the log of the Wishart density for lower-triangular Cholesky factor of L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S dropping constant additive terms.\nAvailable since 2.30\n \n\nmatrix wishart_cholesky_rng(real nu, matrix L_S) Generate the Cholesky factor of a Wishart variate with degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S; may only be used in transformed data and generated quantities blocks\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Covariance Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/covariance_matrix_distributions.html#inverse-wishart-distribution", + "href": "functions-reference/covariance_matrix_distributions.html#inverse-wishart-distribution", + "title": "Covariance Matrix Distributions", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1,\\infty)\\), and \\(S \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for symmetric and positive-definite \\(W \\in \\mathbb{R}^{K \\times K}\\), \\[\\begin{equation*}\n\\text{InvWishart}(W \\mid \\nu,S) = \\frac{1}{2^{\\nu K / 2}} \\ \\frac{1}{\\Gamma_K \\! \\left( \\frac{\\nu}{2} \\right)}\n\\ \\left| S \\right|^{\\nu/2} \\ \\left| W \\right|^{-(\\nu + K + 1)/2}\n\\ \\exp \\! \\left( - \\frac{1}{2} \\ \\text{tr}(SW^{-1}) \\right) \\! .\n\\end{equation*}\\]\n\n\n\nW ~ inv_wishart(nu, Sigma)\nIncrement target log probability density with inv_wishart_lupdf(W | nu, Sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal inv_wishart_lpdf(matrix W | real nu, matrix Sigma) Return the log of the inverse Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma.\nAvailable since 2.12\n \n\nreal inv_wishart_lupdf(matrix W | real nu, matrix Sigma) Return the log of the inverse Wishart density for symmetric and positive-definite matrix W given degrees of freedom nu and symmetric and positive-definite scale matrix Sigma dropping constant additive terms.\nAvailable since 2.25\n \n\nmatrix inv_wishart_rng(real nu, matrix Sigma) Generate an inverse Wishart variate with degrees of freedom nu and symmetric and positive-definite scale matrix Sigma; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Covariance Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/covariance_matrix_distributions.html#inverse-wishart-cholesky-distribution", + "href": "functions-reference/covariance_matrix_distributions.html#inverse-wishart-cholesky-distribution", + "title": "Covariance Matrix Distributions", + "section": "", + "text": "The Cholesky parameterization of the inverse Wishart distribution uses a Cholesky factor for both the variate and the parameter. If \\(S\\) and \\(W\\) are positive definite matrices with Cholesky factors \\(L_S\\) and \\(L_W\\) (i.e., \\(S = L_S L_S^{\\top}\\) and \\(W = L_W L_W^{\\top}\\)), then the Cholesky parameterization is defined so that \\[\\begin{equation*}\nL_W \\sim \\textrm{InvWishartCholesky}(\\nu, L_S)\n\\end{equation*}\\] if and only if \\[\\begin{equation*}\nW \\sim \\textrm{InvWishart}(\\nu, S).\n\\end{equation*}\\]\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in (K-1, \\infty)\\), and \\(L_S, L_W \\in\n\\mathbb{R}^{K \\times K}\\) are lower triangular matrixes with positive diagonal elements, then the Cholesky parameterized inverse Wishart density is \\[\\begin{equation*}\n\\text{InvWishartCholesky}(L_W \\mid \\nu,L_S) =\n\\text{InvWishart}(L_WL_W^{\\top} \\mid \\nu, L_S L_S^{\\top}) \\, \\left| J_{f^{-1}} \\right|,\n\\end{equation*}\\] where \\(J_{f^{-1}}\\) is the Jacobian of the (inverse) transform of the variate, \\(f^{-1}(L_W) = L_W L_W^{\\top}\\). The log absolute determinant is \\[\\begin{equation*}\n\\log \\left| J_{f^{-1}} \\right|\n= K \\log(2) + \\sum_{k=1}^K (K - k + 1) \\log {(L_W)_{k,\\, k}}.\n\\end{equation*}\\]\nThe probability functions will raise errors if \\(\\nu \\leq K - 1\\) or if \\(L_S\\) and \\(L_W\\) are not Cholesky factors (square, lower-triangular matrices with positive diagonal elements) of the same size.\n\n\n\n \n\nreal inv_wishart_cholesky_lpdf(matrix L_W | real nu, matrix L_S) Return the log of the inverse Wishart density for lower-triangular Cholesky factor L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S.\nAvailable since 2.30\n \n\nreal inv_wishart_cholesky_lupdf(matrix L_W | real nu, matrix L_S) Return the log of the inverse Wishart density for lower-triangular Cholesky factor of L_W given degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S dropping constant additive terms.\nAvailable since 2.30\n \n\nmatrix inv_wishart_cholesky_rng(real nu, matrix L_S) Generate the Cholesky factor of an inverse Wishart variate with degrees of freedom nu and lower-triangular Cholesky factor of the scale matrix L_S; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Covariance Matrix Distributions" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html", + "href": "functions-reference/distributions_over_unbounded_vectors.html", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "The unbounded vector probability distributions have support on all of \\(\\mathbb{R}^K\\) for some fixed \\(K\\).\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(\\Sigma \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*}\n\\text{MultiNormal}(y|\\mu,\\Sigma) =\n\\frac{1}{\\left( 2 \\pi \\right)^{K/2}} \\ \\frac{1}{\\sqrt{|\\Sigma|}}\n\\ \\exp \\! \\left( \\! - \\frac{1}{2} (y - \\mu)^{\\top} \\, \\Sigma^{-1} \\, (y - \\mu) \\right) \\! ,\n\\end{equation*}\\] where \\(|\\Sigma|\\) is the absolute determinant of \\(\\Sigma\\).\n\n\n\ny ~ multi_normal(mu, Sigma)\nIncrement target log probability density with multi_normal_lupdf(y | mu, Sigma).\nAvailable since 2.0\n \n\n\n\n\nThe multivariate normal probability function is overloaded to allow the variate vector \\(y\\) and location vector \\(\\mu\\) to be vectors or row vectors (or to mix the two types). The density function is also vectorized, so it allows arrays of row vectors or vectors as arguments; see section vectorized function signatures for a description of vectorization.\n \n\nreal multi_normal_lpdf(vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_lpdf(vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_lpdf(row_vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(row_vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_lpdf(row_vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(row_vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\nAlthough there is a direct multi-normal RNG function, if more than one result is required, it’s much more efficient to Cholesky factor the covariance matrix and call multi_normal_cholesky_rng; see section multi-variate normal, cholesky parameterization.\n \n\nvector multi_normal_rng(vector mu, matrix Sigma) Generate a multivariate normal variate with location mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n \n\nvector multi_normal_rng(row_vector mu, matrix Sigma) Generate a multivariate normal variate with location mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_rng(vectors mu, matrix Sigma) Generate an array of multivariate normal variates with locations mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_rng(row_vectors mu, matrix Sigma) Generate an array of multivariate normal variates with locations mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(\\Omega \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*} \\text{MultiNormalPrecision}(y|\\mu,\\Omega)\n= \\text{MultiNormal}(y|\\mu,\\Omega^{-1}) \\end{equation*}\\]\n\n\n\ny ~ multi_normal_prec(mu, Omega)\nIncrement target log probability density with multi_normal_prec_lupdf(y | mu, Omega).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal multi_normal_prec_lpdf(vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_prec_lpdf(vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_prec_lpdf(row_vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(row_vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_prec_lpdf(row_vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(row_vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(L \\in\n\\mathbb{R}^{K \\times K}\\) is lower triangular and such that \\(LL^{\\top}\\) is positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*}\n\\text{MultiNormalCholesky}(y|\\mu,L) =\n\\text{MultiNormal}(y|\\mu,LL^{\\top}). \\end{equation*}\\] If \\(L\\) is lower triangular and \\(LL^{top}\\) is a \\(K \\times K\\) positive definite matrix, then \\(L_{k,k}\\) must be strictly positive for \\(k \\in 1{:}K\\). If an \\(L\\) is provided that is not the Cholesky factor of a positive-definite matrix, the probability functions will raise errors.\n\n\n\ny ~ multi_normal_cholesky(mu, L)\nIncrement target log probability density with multi_normal_cholesky_lupdf(y | mu, L).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal multi_normal_cholesky_lpdf(vectors y | vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(vectors y | vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_cholesky_lpdf(vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_cholesky_lpdf(row_vectors y | vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(row_vectors y | vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_cholesky_lpdf(row_vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(row_vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nvector multi_normal_cholesky_rng(vector mu, matrix L) Generate a multivariate normal variate with location mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.3\n \n\nvector multi_normal_cholesky_rng(row_vector mu, matrix L) Generate a multivariate normal variate with location mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_cholesky_rng(vectors mu, matrix L) Generate an array of multivariate normal variates with locations mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_cholesky_rng(row_vectors mu, matrix L) Generate an array of multivariate normal variates with locations mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(K,N \\in \\mathbb{N}\\), \\(\\Sigma \\in \\mathbb{R}^{N \\times N}\\) is symmetric, positive definite kernel matrix and \\(w \\in \\mathbb{R}^{K}\\) is a vector of positive inverse scales, then for \\(y \\in \\mathbb{R}^{K\n\\times N}\\), \\[\\begin{equation*} \\text{MultiGP}(y|\\Sigma,w) = \\prod_{i=1}^{K}\n\\text{MultiNormal}(y_i|0,w_i^{-1} \\Sigma), \\end{equation*}\\] where \\(y_i\\) is the \\(i\\)th row of \\(y\\). This is used to efficiently handle Gaussian Processes with multi-variate outputs where only the output dimensions share a kernel function but vary based on their scale. Note that this function does not take into account the mean prediction.\n\n\n\ny ~ multi_gp(Sigma, w)\nIncrement target log probability density with multi_gp_lupdf(y | Sigma, w).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal multi_gp_lpdf(matrix y | matrix Sigma, vector w) The log of the multivariate GP density of matrix y given kernel matrix Sigma and inverses scales w\nAvailable since 2.12\n \n\nreal multi_gp_lupdf(matrix y | matrix Sigma, vector w) The log of the multivariate GP density of matrix y given kernel matrix Sigma and inverses scales w dropping constant additive terms\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(K,N \\in \\mathbb{N}\\), \\(L \\in \\mathbb{R}^{N \\times N}\\) is lower triangular and such that \\(LL^{\\top}\\) is positive definite kernel matrix (implying \\(L_{n,n} > 0\\) for \\(n \\in 1{:}N\\)), and \\(w \\in\n\\mathbb{R}^{K}\\) is a vector of positive inverse scales, then for \\(y\n\\in \\mathbb{R}^{K \\times N}\\), \\[\\begin{equation*} \\text{MultiGPCholesky}(y \\, | \\ L,w)\n= \\prod_{i=1}^{K} \\text{MultiNormal}(y_i|0,w_i^{-1} LL^{\\top}), \\end{equation*}\\] where \\(y_i\\) is the \\(i\\)th row of \\(y\\). This is used to efficiently handle Gaussian Processes with multi-variate outputs where only the output dimensions share a kernel function but vary based on their scale. If the model allows parameterization in terms of Cholesky factor of the kernel matrix, this distribution is also more efficient than \\(\\text{MultiGP}()\\). Note that this function does not take into account the mean prediction.\n\n\n\ny ~ multi_gp_cholesky(L, w)\nIncrement target log probability density with multi_gp_cholesky_lupdf(y | L, w).\nAvailable since 2.5\n \n\n\n\n\n \n\nreal multi_gp_cholesky_lpdf(matrix y | matrix L, vector w) The log of the multivariate GP density of matrix y given lower-triangular Cholesky factor of the kernel matrix L and inverses scales w\nAvailable since 2.12\n \n\nreal multi_gp_cholesky_lupdf(matrix y | matrix L, vector w) The log of the multivariate GP density of matrix y given lower-triangular Cholesky factor of the kernel matrix L and inverses scales w dropping constant additive terms\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(\\nu \\in \\mathbb{R}^+\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(\\Sigma \\in \\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*} \\begin{array}{l}\n\\text{MultiStudentT}(y\\,|\\,\\nu,\\,\\mu,\\,\\Sigma) \\\\ =\n\\frac{1}{\\pi^{K/2}} \\ \\frac{1}{\\nu^{K/2}} \\ \\frac{\\Gamma\\!\\left((\\nu +\nK)/2\\right)} {\\Gamma(\\nu/2)} \\ \\frac{1}{\\sqrt{\\left| \\Sigma\n\\right|}} \\ \\left( 1 + \\frac{1}{\\nu} \\, \\left(y - \\mu\\right)^{\\top} \\,\n\\Sigma^{-1} \\, \\left(y - \\mu\\right) \\right)^{-(\\nu + K)/2} \\! .\n\\end{array} \\end{equation*}\\]\n\n\n\ny ~ multi_student_t(nu, mu, Sigma)\nIncrement target log probability density with multi_student_t_lupdf(y | nu, mu, Sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal multi_student_t_lpdf(vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_student_t_lpdf(vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_student_t_lpdf(row_vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(row_vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_student_t_lpdf(row_vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(row_vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nvector multi_student_t_rng(real nu, vector mu, matrix Sigma) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n \n\nvector multi_student_t_rng(real nu, row_vector mu, matrix Sigma) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_student_t_rng(real nu, vectors mu, matrix Sigma) Generate an array of multivariate Student-\\(t\\) variates with degrees of freedom nu, locations mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_student_t_rng(real nu, row_vectors mu, matrix Sigma) Generate an array of multivariate Student-\\(t\\) variates with degrees of freedom nu, locations mu, and scale matrix Sigma; may only be used in transformed data andgenerated quantities blocks\nAvailable since 2.18\n\n\n\n\n\n\nLet \\(K \\in \\mathbb{N}\\), \\(\\nu \\in \\mathbb{R}^+\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(L\\) a \\(K \\times K\\) lower-triangular matrix with strictly positive, finite diagonal then \\[\\begin{equation*}\n\\begin{array}{l}\n\\text{MultiStudentTCholesky}(y\\,\\mid \\nu,\\,\\mu,\\,L) \\\\ =\n\\frac{1}{\\pi^{K/2}} \\ \\frac{1}{\\nu^{K/2}} \\ \\frac{\\Gamma\\!\\left((\\nu +\nK)/2\\right)} {\\Gamma(\\nu/2)} \\ \\frac{1}{\\left| L\n\\right|} \\ \\left( 1 + \\frac{1}{\\nu} \\, \\left(y - \\mu\\right)^{\\top} \\,\nL^{-T}L^{-1} \\, \\left(y - \\mu\\right) \\right)^{-(\\nu + K)/2} \\! .\n\\end{array}\n\\end{equation*}\\]\n\n\n\ny ~ multi_student_t_cholesky(nu, mu, L)\nIncrement target log probability density with multi_student_t_cholesky_lupdf(y | nu, mu, L).\nAvailable since 2.30\n \n\n\n\n\n \n\nreal multi_student_t_cholesky_lpdf(vectors y | real nu, vectors mu, matrix L) The log of the multivariate Student-\\(t\\) density of vector or array of vectors y given degrees of freedom nu, location vector or array of vectors mu, and Cholesky factor of the scale matrix L. For a definition of the arguments compatible with the vectors type, see the probability vectorization section.\nAvailable since 2.30\n \n\nreal multi_student_t_cholesky_lupdf(vectors y | real nu, vectors mu, matrix L) The log of the multivariate Student-\\(t\\) density of vector or vector array y given degrees of freedom nu, location vector or vector array mu, and Cholesky factor of the scale matrix L, dropping constant additive terms. For a definition of arguments compatible with the vectors type, see the probability vectorization section.\nAvailable since 2.30\n \n\nvector multi_student_t_cholesky_rng(real nu, vector mu, matrix L) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30\n \n\narray[] vector multi_student_t_cholesky_rng(real nu, array[] vector mu, matrix L) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location array mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30\n \n\narray[] vector multi_student_t_cholesky_rng(real nu, array[] row_vector mu, matrix L) Generate an array of multivariate Student-\\(t\\) variate with degrees of freedom nu, location array mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30\n\n\n\n\nA Gaussian Dynamic Linear model is defined as follows, For \\(t \\in 1,\n\\dots, T\\), \\[\\begin{equation*} \\begin{aligned}[t] y_{t} &\\sim N(F' \\theta_{t}, V)\n\\\\ \\theta_{t} &\\sim N(G \\theta_{t - 1}, W) \\\\ \\theta_{0} &\\sim\nN(m_{0}, C_{0}) \\end{aligned} \\end{equation*}\\] where \\(y\\) is \\(n \\times T\\) matrix where rows are variables and columns are observations. These functions calculate the log-density of the observations marginalizing over the latent states (\\(p(y | F, G, V, W, m_{0}, C_{0})\\)). This log-density is a system that is calculated using the Kalman Filter. If \\(V\\) is diagonal, then a more efficient algorithm which sequentially processes observations and avoids a matrix inversions can be used (Durbin and Koopman 2001, sec. 6.4).\n\n\ny ~ gaussian_dlm_obs(F, G, V, W, m0, C0)\nIncrement target log probability density with gaussian_dlm_obs_lupdf(y | F, G, V, W, m0, C0).\nAvailable since 2.0\n \n\n\n\n\nThe following two functions differ in the type of their V, the first taking a full observation covariance matrix V and the second a vector V representing the diagonal of the observation covariance matrix. The sampling statement defined in the previous section works with either type of observation V.\n \n\nreal gaussian_dlm_obs_lpdf(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0.\nAvailable since 2.12\n \n\nreal gaussian_dlm_obs_lupdf(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0. This function drops constant additive terms.\nAvailable since 2.25\n \n\nreal gaussian_dlm_obs_lpdf(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix with diagonal V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0.\nAvailable since 2.12\n \n\nreal gaussian_dlm_obs_lupdf(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix with diagonal V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0. This function drops constant additive terms.\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-normal-distribution", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-normal-distribution", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(\\Sigma \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*}\n\\text{MultiNormal}(y|\\mu,\\Sigma) =\n\\frac{1}{\\left( 2 \\pi \\right)^{K/2}} \\ \\frac{1}{\\sqrt{|\\Sigma|}}\n\\ \\exp \\! \\left( \\! - \\frac{1}{2} (y - \\mu)^{\\top} \\, \\Sigma^{-1} \\, (y - \\mu) \\right) \\! ,\n\\end{equation*}\\] where \\(|\\Sigma|\\) is the absolute determinant of \\(\\Sigma\\).\n\n\n\ny ~ multi_normal(mu, Sigma)\nIncrement target log probability density with multi_normal_lupdf(y | mu, Sigma).\nAvailable since 2.0\n \n\n\n\n\nThe multivariate normal probability function is overloaded to allow the variate vector \\(y\\) and location vector \\(\\mu\\) to be vectors or row vectors (or to mix the two types). The density function is also vectorized, so it allows arrays of row vectors or vectors as arguments; see section vectorized function signatures for a description of vectorization.\n \n\nreal multi_normal_lpdf(vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_lpdf(vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_lpdf(row_vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(row_vectors y | vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_lpdf(row_vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and covariance matrix Sigma\nAvailable since 2.12\n \n\nreal multi_normal_lupdf(row_vectors y | row_vectors mu, matrix Sigma) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and covariance matrix Sigma dropping constant additive terms\nAvailable since 2.25\nAlthough there is a direct multi-normal RNG function, if more than one result is required, it’s much more efficient to Cholesky factor the covariance matrix and call multi_normal_cholesky_rng; see section multi-variate normal, cholesky parameterization.\n \n\nvector multi_normal_rng(vector mu, matrix Sigma) Generate a multivariate normal variate with location mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n \n\nvector multi_normal_rng(row_vector mu, matrix Sigma) Generate a multivariate normal variate with location mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_rng(vectors mu, matrix Sigma) Generate an array of multivariate normal variates with locations mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_rng(row_vectors mu, matrix Sigma) Generate an array of multivariate normal variates with locations mu and covariance matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-normal-distribution-precision-parameterization", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-normal-distribution-precision-parameterization", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(\\Omega \\in\n\\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*} \\text{MultiNormalPrecision}(y|\\mu,\\Omega)\n= \\text{MultiNormal}(y|\\mu,\\Omega^{-1}) \\end{equation*}\\]\n\n\n\ny ~ multi_normal_prec(mu, Omega)\nIncrement target log probability density with multi_normal_prec_lupdf(y | mu, Omega).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal multi_normal_prec_lpdf(vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_prec_lpdf(vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_prec_lpdf(row_vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(row_vectors y | vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_prec_lpdf(row_vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and positive definite precision matrix Omega\nAvailable since 2.18\n \n\nreal multi_normal_prec_lupdf(row_vectors y | row_vectors mu, matrix Omega) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and positive definite precision matrix Omega dropping constant additive terms\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multi-normal-cholesky-fun", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multi-normal-cholesky-fun", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(L \\in\n\\mathbb{R}^{K \\times K}\\) is lower triangular and such that \\(LL^{\\top}\\) is positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*}\n\\text{MultiNormalCholesky}(y|\\mu,L) =\n\\text{MultiNormal}(y|\\mu,LL^{\\top}). \\end{equation*}\\] If \\(L\\) is lower triangular and \\(LL^{top}\\) is a \\(K \\times K\\) positive definite matrix, then \\(L_{k,k}\\) must be strictly positive for \\(k \\in 1{:}K\\). If an \\(L\\) is provided that is not the Cholesky factor of a positive-definite matrix, the probability functions will raise errors.\n\n\n\ny ~ multi_normal_cholesky(mu, L)\nIncrement target log probability density with multi_normal_cholesky_lupdf(y | mu, L).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal multi_normal_cholesky_lpdf(vectors y | vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(vectors y | vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_cholesky_lpdf(vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_cholesky_lpdf(row_vectors y | vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(row_vectors y | vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_normal_cholesky_lpdf(row_vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L\nAvailable since 2.18\n \n\nreal multi_normal_cholesky_lupdf(row_vectors y | row_vectors mu, matrix L) The log of the multivariate normal density of row vector(s) y given location row vector(s) mu and lower-triangular Cholesky factor of the covariance matrix L dropping constant additive terms\nAvailable since 2.25\n \n\nvector multi_normal_cholesky_rng(vector mu, matrix L) Generate a multivariate normal variate with location mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.3\n \n\nvector multi_normal_cholesky_rng(row_vector mu, matrix L) Generate a multivariate normal variate with location mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_cholesky_rng(vectors mu, matrix L) Generate an array of multivariate normal variates with locations mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_normal_cholesky_rng(row_vectors mu, matrix L) Generate an array of multivariate normal variates with locations mu and lower-triangular Cholesky factor of the covariance matrix L; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-gaussian-process-distribution", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-gaussian-process-distribution", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "If \\(K,N \\in \\mathbb{N}\\), \\(\\Sigma \\in \\mathbb{R}^{N \\times N}\\) is symmetric, positive definite kernel matrix and \\(w \\in \\mathbb{R}^{K}\\) is a vector of positive inverse scales, then for \\(y \\in \\mathbb{R}^{K\n\\times N}\\), \\[\\begin{equation*} \\text{MultiGP}(y|\\Sigma,w) = \\prod_{i=1}^{K}\n\\text{MultiNormal}(y_i|0,w_i^{-1} \\Sigma), \\end{equation*}\\] where \\(y_i\\) is the \\(i\\)th row of \\(y\\). This is used to efficiently handle Gaussian Processes with multi-variate outputs where only the output dimensions share a kernel function but vary based on their scale. Note that this function does not take into account the mean prediction.\n\n\n\ny ~ multi_gp(Sigma, w)\nIncrement target log probability density with multi_gp_lupdf(y | Sigma, w).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal multi_gp_lpdf(matrix y | matrix Sigma, vector w) The log of the multivariate GP density of matrix y given kernel matrix Sigma and inverses scales w\nAvailable since 2.12\n \n\nreal multi_gp_lupdf(matrix y | matrix Sigma, vector w) The log of the multivariate GP density of matrix y given kernel matrix Sigma and inverses scales w dropping constant additive terms\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-gaussian-process-distribution-cholesky-parameterization", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-gaussian-process-distribution-cholesky-parameterization", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "If \\(K,N \\in \\mathbb{N}\\), \\(L \\in \\mathbb{R}^{N \\times N}\\) is lower triangular and such that \\(LL^{\\top}\\) is positive definite kernel matrix (implying \\(L_{n,n} > 0\\) for \\(n \\in 1{:}N\\)), and \\(w \\in\n\\mathbb{R}^{K}\\) is a vector of positive inverse scales, then for \\(y\n\\in \\mathbb{R}^{K \\times N}\\), \\[\\begin{equation*} \\text{MultiGPCholesky}(y \\, | \\ L,w)\n= \\prod_{i=1}^{K} \\text{MultiNormal}(y_i|0,w_i^{-1} LL^{\\top}), \\end{equation*}\\] where \\(y_i\\) is the \\(i\\)th row of \\(y\\). This is used to efficiently handle Gaussian Processes with multi-variate outputs where only the output dimensions share a kernel function but vary based on their scale. If the model allows parameterization in terms of Cholesky factor of the kernel matrix, this distribution is also more efficient than \\(\\text{MultiGP}()\\). Note that this function does not take into account the mean prediction.\n\n\n\ny ~ multi_gp_cholesky(L, w)\nIncrement target log probability density with multi_gp_cholesky_lupdf(y | L, w).\nAvailable since 2.5\n \n\n\n\n\n \n\nreal multi_gp_cholesky_lpdf(matrix y | matrix L, vector w) The log of the multivariate GP density of matrix y given lower-triangular Cholesky factor of the kernel matrix L and inverses scales w\nAvailable since 2.12\n \n\nreal multi_gp_cholesky_lupdf(matrix y | matrix L, vector w) The log of the multivariate GP density of matrix y given lower-triangular Cholesky factor of the kernel matrix L and inverses scales w dropping constant additive terms\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-student-t-distribution", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multivariate-student-t-distribution", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(\\nu \\in \\mathbb{R}^+\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(\\Sigma \\in \\mathbb{R}^{K \\times K}\\) is symmetric and positive definite, then for \\(y \\in \\mathbb{R}^K\\), \\[\\begin{equation*} \\begin{array}{l}\n\\text{MultiStudentT}(y\\,|\\,\\nu,\\,\\mu,\\,\\Sigma) \\\\ =\n\\frac{1}{\\pi^{K/2}} \\ \\frac{1}{\\nu^{K/2}} \\ \\frac{\\Gamma\\!\\left((\\nu +\nK)/2\\right)} {\\Gamma(\\nu/2)} \\ \\frac{1}{\\sqrt{\\left| \\Sigma\n\\right|}} \\ \\left( 1 + \\frac{1}{\\nu} \\, \\left(y - \\mu\\right)^{\\top} \\,\n\\Sigma^{-1} \\, \\left(y - \\mu\\right) \\right)^{-(\\nu + K)/2} \\! .\n\\end{array} \\end{equation*}\\]\n\n\n\ny ~ multi_student_t(nu, mu, Sigma)\nIncrement target log probability density with multi_student_t_lupdf(y | nu, mu, Sigma).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal multi_student_t_lpdf(vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_student_t_lpdf(vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_student_t_lpdf(row_vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(row_vectors y | real nu, vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nreal multi_student_t_lpdf(row_vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma\nAvailable since 2.18\n \n\nreal multi_student_t_lupdf(row_vectors y | real nu, row_vectors mu, matrix Sigma) The log of the multivariate Student-\\(t\\) density of row vector(s) y given degrees of freedom nu, location row vector(s) mu, and scale matrix Sigma dropping constant additive terms\nAvailable since 2.25\n \n\nvector multi_student_t_rng(real nu, vector mu, matrix Sigma) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0\n \n\nvector multi_student_t_rng(real nu, row_vector mu, matrix Sigma) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_student_t_rng(real nu, vectors mu, matrix Sigma) Generate an array of multivariate Student-\\(t\\) variates with degrees of freedom nu, locations mu, and scale matrix Sigma; may only be used in transformed data and generated quantities blocks\nAvailable since 2.18\n \n\nvectors multi_student_t_rng(real nu, row_vectors mu, matrix Sigma) Generate an array of multivariate Student-\\(t\\) variates with degrees of freedom nu, locations mu, and scale matrix Sigma; may only be used in transformed data andgenerated quantities blocks\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#multi-student-t-cholesky-fun", + "href": "functions-reference/distributions_over_unbounded_vectors.html#multi-student-t-cholesky-fun", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "Let \\(K \\in \\mathbb{N}\\), \\(\\nu \\in \\mathbb{R}^+\\), \\(\\mu \\in \\mathbb{R}^K\\), and \\(L\\) a \\(K \\times K\\) lower-triangular matrix with strictly positive, finite diagonal then \\[\\begin{equation*}\n\\begin{array}{l}\n\\text{MultiStudentTCholesky}(y\\,\\mid \\nu,\\,\\mu,\\,L) \\\\ =\n\\frac{1}{\\pi^{K/2}} \\ \\frac{1}{\\nu^{K/2}} \\ \\frac{\\Gamma\\!\\left((\\nu +\nK)/2\\right)} {\\Gamma(\\nu/2)} \\ \\frac{1}{\\left| L\n\\right|} \\ \\left( 1 + \\frac{1}{\\nu} \\, \\left(y - \\mu\\right)^{\\top} \\,\nL^{-T}L^{-1} \\, \\left(y - \\mu\\right) \\right)^{-(\\nu + K)/2} \\! .\n\\end{array}\n\\end{equation*}\\]\n\n\n\ny ~ multi_student_t_cholesky(nu, mu, L)\nIncrement target log probability density with multi_student_t_cholesky_lupdf(y | nu, mu, L).\nAvailable since 2.30\n \n\n\n\n\n \n\nreal multi_student_t_cholesky_lpdf(vectors y | real nu, vectors mu, matrix L) The log of the multivariate Student-\\(t\\) density of vector or array of vectors y given degrees of freedom nu, location vector or array of vectors mu, and Cholesky factor of the scale matrix L. For a definition of the arguments compatible with the vectors type, see the probability vectorization section.\nAvailable since 2.30\n \n\nreal multi_student_t_cholesky_lupdf(vectors y | real nu, vectors mu, matrix L) The log of the multivariate Student-\\(t\\) density of vector or vector array y given degrees of freedom nu, location vector or vector array mu, and Cholesky factor of the scale matrix L, dropping constant additive terms. For a definition of arguments compatible with the vectors type, see the probability vectorization section.\nAvailable since 2.30\n \n\nvector multi_student_t_cholesky_rng(real nu, vector mu, matrix L) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30\n \n\narray[] vector multi_student_t_cholesky_rng(real nu, array[] vector mu, matrix L) Generate a multivariate Student-\\(t\\) variate with degrees of freedom nu, location array mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30\n \n\narray[] vector multi_student_t_cholesky_rng(real nu, array[] row_vector mu, matrix L) Generate an array of multivariate Student-\\(t\\) variate with degrees of freedom nu, location array mu, and Cholesky factor of the scale matrix L; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.30", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/distributions_over_unbounded_vectors.html#gaussian-dynamic-linear-models", + "href": "functions-reference/distributions_over_unbounded_vectors.html#gaussian-dynamic-linear-models", + "title": "Distributions over Unbounded Vectors", + "section": "", + "text": "A Gaussian Dynamic Linear model is defined as follows, For \\(t \\in 1,\n\\dots, T\\), \\[\\begin{equation*} \\begin{aligned}[t] y_{t} &\\sim N(F' \\theta_{t}, V)\n\\\\ \\theta_{t} &\\sim N(G \\theta_{t - 1}, W) \\\\ \\theta_{0} &\\sim\nN(m_{0}, C_{0}) \\end{aligned} \\end{equation*}\\] where \\(y\\) is \\(n \\times T\\) matrix where rows are variables and columns are observations. These functions calculate the log-density of the observations marginalizing over the latent states (\\(p(y | F, G, V, W, m_{0}, C_{0})\\)). This log-density is a system that is calculated using the Kalman Filter. If \\(V\\) is diagonal, then a more efficient algorithm which sequentially processes observations and avoids a matrix inversions can be used (Durbin and Koopman 2001, sec. 6.4).\n\n\ny ~ gaussian_dlm_obs(F, G, V, W, m0, C0)\nIncrement target log probability density with gaussian_dlm_obs_lupdf(y | F, G, V, W, m0, C0).\nAvailable since 2.0\n \n\n\n\n\nThe following two functions differ in the type of their V, the first taking a full observation covariance matrix V and the second a vector V representing the diagonal of the observation covariance matrix. The sampling statement defined in the previous section works with either type of observation V.\n \n\nreal gaussian_dlm_obs_lpdf(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0.\nAvailable since 2.12\n \n\nreal gaussian_dlm_obs_lupdf(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0. This function drops constant additive terms.\nAvailable since 2.25\n \n\nreal gaussian_dlm_obs_lpdf(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix with diagonal V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0.\nAvailable since 2.12\n \n\nreal gaussian_dlm_obs_lupdf(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) The log of the density of the Gaussian Dynamic Linear model with observation matrix y in which rows are variables and columns are observations, design matrix F, transition matrix G, observation covariance matrix with diagonal V, system covariance matrix W, and the initial state is distributed normal with mean m0 and covariance C0. This function drops constant additive terms.\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Distributions over Unbounded Vectors" + ] + }, + { + "objectID": "functions-reference/functions_index.html", + "href": "functions-reference/functions_index.html", + "title": "Alphabetical Index", + "section": "", + "text": "abs:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\n(T x) : T (integer-valued_basic_functions.html)\n\n\n(T x) : T (real-valued_basic_functions.html)\n\n\nacos:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nacosh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nadd_diag:\n\n\n(complex_matrix m, complex_real d) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix m, complex_row_vector d) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix m, complex_vector d) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix m, real d) : matrix (matrix_operations.html)\n\n\n(matrix m, row_vector d) : matrix (matrix_operations.html)\n\n\n(matrix m, vector d) : matrix (matrix_operations.html)\n\n\nalgebra_solver:\n\n\n(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) : vector (deprecated_functions.html)\n\n\nalgebra_solver_newton:\n\n\n(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i) : vector (deprecated_functions.html)\n\n\n(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) : vector (deprecated_functions.html)\n\n\nappend_array:\n\n\n(T x, T y) : T (array_operations.html)\n\n\nappend_col:\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, vector y) : matrix (matrix_operations.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, matrix y) : matrix (matrix_operations.html)\n\n\n(vector x, vector y) : matrix (matrix_operations.html)\n\n\nappend_row:\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_row_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, row_vector y) : matrix (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, matrix y) : matrix (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : matrix (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\narg:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\nasin:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nasinh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\natan:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\natan2:\n\n\n(T y, T x) : R (real-valued_basic_functions.html)\n\n\natanh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\n\n\n\nbernoulli:\n\n\ndistribution statement (binary_distributions.html)\n\n\nbernoulli_cdf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_lccdf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_lcdf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_logit:\n\n\ndistribution statement (binary_distributions.html)\n\n\nbernoulli_logit_glm:\n\n\ndistribution statement (binary_distributions.html)\n\n\nbernoulli_logit_glm_lpmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\nbernoulli_logit_glm_lupmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\nbernoulli_logit_glm_rng:\n\n\n(matrix x, vector alpha, vector beta) : array[] int (binary_distributions.html)\n\n\n(row_vector x, vector alpha, vector beta) : array[] int (binary_distributions.html)\n\n\nbernoulli_logit_lpmf:\n\n\n(ints y | reals alpha) : real (binary_distributions.html)\n\n\nbernoulli_logit_lupmf:\n\n\n(ints y | reals alpha) : real (binary_distributions.html)\n\n\nbernoulli_logit_rng:\n\n\n(reals alpha) : R (binary_distributions.html)\n\n\nbernoulli_lpmf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_lupmf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_rng:\n\n\n(reals theta) : R (binary_distributions.html)\n\n\nbessel_first_kind:\n\n\n(int v, real x) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nbessel_second_kind:\n\n\n(int v, real x) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nbeta:\n\n\n(real alpha, real beta) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ndistribution statement (continuous_distributions_on_0_1.html)\n\n\nbeta_binomial:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbeta_binomial_cdf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lccdf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lcdf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lpmf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lupmf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_rng:\n\n\n(ints N, reals alpha, reals beta) : R (bounded_discrete_distributions.html)\n\n\nbeta_cdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lccdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lcdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lpdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lupdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_neg_binomial:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_cdf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lccdf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lcdf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lpmf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lupmf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_rng:\n\n\n(reals r, reals alpha, reals beta) : R (unbounded_discrete_distributions.html)\n\n\nbeta_proportion:\n\n\ndistribution statement (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lccdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lcdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lpdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lupdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_rng:\n\n\n(reals mu, reals kappa) : R (continuous_distributions_on_0_1.html)\n\n\nbeta_rng:\n\n\n(reals alpha, reals beta) : R (continuous_distributions_on_0_1.html)\n\n\nbinary_log_loss:\n\n\n(int y, real y_hat) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nbinomial:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbinomial_cdf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lccdf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lcdf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbinomial_logit_glm:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbinomial_logit_glm_lpmf:\n\n\n(array[] int n | array[] int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit_glm_lupmf:\n\n\n(array[] int n | array[] int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit_lpmf:\n\n\n(ints n | ints N, reals alpha) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit_lupmf:\n\n\n(ints n | ints N, reals alpha) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lpmf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lupmf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_rng:\n\n\n(ints N, reals theta) : R (bounded_discrete_distributions.html)\n\n\nblock:\n\n\n(complex_matrix x, int i, int j, int n_rows, int n_cols) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix x, int i, int j, int n_rows, int n_cols) : matrix (matrix_operations.html)\n\n\n\n\n\ncategorical:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ncategorical_logit:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ncategorical_logit_glm:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ncategorical_logit_glm_lpmf:\n\n\n(array[] int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_glm_lupmf:\n\n\n(array[] int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_lpmf:\n\n\n(ints y | vector beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_lupmf:\n\n\n(ints y | vector beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_rng:\n\n\n(vector beta) : int (bounded_discrete_distributions.html)\n\n\ncategorical_lpmf:\n\n\n(ints y | vector theta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_lupmf:\n\n\n(ints y | vector theta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_rng:\n\n\n(vector theta) : int (bounded_discrete_distributions.html)\n\n\ncauchy:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\ncauchy_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\ncbrt:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nceil:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nchi_square:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nchi_square_cdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lccdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lcdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lpdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lupdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_rng:\n\n\n(reals nu) : R (positive_continuous_distributions.html)\n\n\nchol2inv:\n\n\n(matrix L) : matrix (matrix_operations.html)\n\n\ncholesky_decompose:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\ncholesky_factor_corr_constrain:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncholesky_factor_corr_jacobian:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncholesky_factor_corr_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\ncholesky_factor_cov_constrain:\n\n\n(vectors y, int M, int N) : matrices (transform_functions.html)\n\n\ncholesky_factor_cov_jacobian:\n\n\n(vectors y, int M, int N) : matrices (transform_functions.html)\n\n\ncholesky_factor_cov_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\nchoose:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ncol:\n\n\n(complex_matrix x, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, int n) : vector (matrix_operations.html)\n\n\ncols:\n\n\n(complex_matrix x) : int (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\ncolumns_dot_product:\n\n\n(complex_matrix x, complex_matrix y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, vector y) : row_vector (matrix_operations.html)\n\n\ncolumns_dot_self:\n\n\n(complex_matrix x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x) : row_vector (matrix_operations.html)\n\n\n(row_vector x) : row_vector (matrix_operations.html)\n\n\n(vector x) : row_vector (matrix_operations.html)\n\n\ncomplex_schur_decompose:\n\n\n(complex_matrix A) : tuple(complex_matrix, complex_matrix) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(complex_matrix, complex_matrix) (complex_matrix_operations.html)\n\n\ncomplex_schur_decompose_t:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\ncomplex_schur_decompose_u:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\nconj:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(Z z) : Z (complex-valued_basic_functions.html)\n\n\ncorr_matrix_constrain:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncorr_matrix_jacobian:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncorr_matrix_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\ncos:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ncosh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ncov_exp_quad:\n\n\n(array[] real x, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(array[] real x1, array[] real x2, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(row_vectors x, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(row_vectors x1, row_vectors x2, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(vectors x, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(vectors x1, vectors x2, real alpha, real rho) : matrix (removed_functions.html)\n\n\ncov_matrix_constrain:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncov_matrix_jacobian:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncov_matrix_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\ncrossprod:\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\ncsr_extract:\n\n\n(matrix a) : tuple(vector, array[] int, array[] int) (sparse_matrix_operations.html)\n\n\ncsr_extract_u:\n\n\n(matrix a) : array[] int (sparse_matrix_operations.html)\n\n\ncsr_extract_v:\n\n\n(matrix a) : array[] int (sparse_matrix_operations.html)\n\n\ncsr_extract_w:\n\n\n(matrix a) : vector (sparse_matrix_operations.html)\n\n\ncsr_matrix_times_vector:\n\n\n(int m, int n, vector w, array[] int v, array[] int u, vector b) : vector (sparse_matrix_operations.html)\n\n\ncsr_to_dense_matrix:\n\n\n(int m, int n, vector w, array[] int v, array[] int u) : matrix (sparse_matrix_operations.html)\n\n\ncumulative_sum:\n\n\n(array[] complex x) : array[] complex (complex_matrix_operations.html)\n\n\n(array[] int x) : array[] int (matrix_operations.html)\n\n\n(array[] real x) : array[] real (matrix_operations.html)\n\n\n(complex_row_vector rv) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\n\n\n\ndae:\n\n\n(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\ndae_tol:\n\n\n(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\ndeterminant:\n\n\n(matrix A) : real (matrix_operations.html)\n\n\ndiag_matrix:\n\n\n(complex_vector x) : complex_matrix (complex_matrix_operations.html)\n\n\n(vector x) : matrix (matrix_operations.html)\n\n\ndiag_post_multiply:\n\n\n(complex_matrix m, complex_row_vector v) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix m, complex_vector v) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix m, row_vector rv) : matrix (matrix_operations.html)\n\n\n(matrix m, vector v) : matrix (matrix_operations.html)\n\n\ndiag_pre_multiply:\n\n\n(complex_row_vector v, complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector v, complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\n(row_vector rv, matrix m) : matrix (matrix_operations.html)\n\n\n(vector v, matrix m) : matrix (matrix_operations.html)\n\n\ndiagonal:\n\n\n(complex_matrix x) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x) : vector (matrix_operations.html)\n\n\ndigamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ndims:\n\n\n(T x) : array[] int (array_operations.html)\n\n\ndirichlet:\n\n\ndistribution statement (simplex_distributions.html)\n\n\ndirichlet_lpdf:\n\n\n(vectors theta | vectors alpha) : real (simplex_distributions.html)\n\n\ndirichlet_lupdf:\n\n\n(vectors theta | vectors alpha) : real (simplex_distributions.html)\n\n\ndirichlet_multinomial:\n\n\ndistribution statement (multivariate_discrete_distributions.html)\n\n\ndirichlet_multinomial_lpmf:\n\n\n(array[] int y | vector alpha) : real (multivariate_discrete_distributions.html)\n\n\ndirichlet_multinomial_lupmf:\n\n\n(array[] int y | vector alpha) : real (multivariate_discrete_distributions.html)\n\n\ndirichlet_multinomial_rng:\n\n\n(vector alpha, int N) : array[] int (multivariate_discrete_distributions.html)\n\n\ndirichlet_rng:\n\n\n(vector alpha) : vector (simplex_distributions.html)\n\n\ndiscrete_range:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ndiscrete_range_cdf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lccdf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lcdf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lpmf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lupmf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_rng:\n\n\n(ints l, ints u) : ints (bounded_discrete_distributions.html)\n\n\ndistance:\n\n\n(row_vector x, row_vector y) : real (array_operations.html)\n\n\n(row_vector x, vector y) : real (array_operations.html)\n\n\n(vector x, row_vector y) : real (array_operations.html)\n\n\n(vector x, vector y) : real (array_operations.html)\n\n\ndot_product:\n\n\n(complex_row_vector x, complex_row_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_row_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex (complex_matrix_operations.html)\n\n\n(row_vector x, row_vector y) : real (matrix_operations.html)\n\n\n(row_vector x, vector y) : real (matrix_operations.html)\n\n\n(vector x, row_vector y) : real (matrix_operations.html)\n\n\n(vector x, vector y) : real (matrix_operations.html)\n\n\ndot_self:\n\n\n(complex_row_vector x) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex (complex_matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\ndouble_exponential:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\n\n\n\ne:\n\n\n() : real (real-valued_basic_functions.html)\n\n\neigendecompose:\n\n\n(complex_matrix A) : tuple(complex_matrix, complex_vector) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(complex_matrix, complex_vector) (matrix_operations.html)\n\n\neigendecompose_sym:\n\n\n(complex_matrix A) : tuple(complex_matrix, complex_vector) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(matrix, vector) (matrix_operations.html)\n\n\neigenvalues:\n\n\n(complex_matrix A) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix A) : complex_vector (matrix_operations.html)\n\n\neigenvalues_sym:\n\n\n(complex_matrix A) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix A) : vector (matrix_operations.html)\n\n\neigenvectors:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : complex_matrix (matrix_operations.html)\n\n\neigenvectors_sym:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nerf:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nerfc:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexp:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexp2:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexp_mod_normal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_cdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lccdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lcdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lpdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lupdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_rng:\n\n\n(reals mu, reals sigma, reals lambda) : R (unbounded_continuous_distributions.html)\n\n\nexpm1:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexponential:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nexponential_cdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lccdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lcdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lpdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lupdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_rng:\n\n\n(reals beta) : R (positive_continuous_distributions.html)\n\n\n\n\n\nfalling_factorial:\n\n\n(real x, real n) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfatal_error:\n\n\n(T1 x1,..., TN xN) : void (void_functions.html)\n\n\nfdim:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfft:\n\n\n(complex_vector v) : complex_vector (complex_matrix_operations.html)\n\n\nfft2:\n\n\n(complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\nfloor:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nfma:\n\n\n(real x, real y, real z) : real (real-valued_basic_functions.html)\n\n\nfmax:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfmin:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfmod:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfrechet:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nfrechet_cdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lccdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lcdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lpdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lupdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_rng:\n\n\n(reals alpha, reals sigma) : R (positive_continuous_distributions.html)\n\n\n\n\n\ngamma:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\ngamma_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lccdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lcdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lupdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_p:\n\n\n(real a, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ngamma_q:\n\n\n(real a, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ngamma_rng:\n\n\n(reals alpha, reals beta) : R (positive_continuous_distributions.html)\n\n\ngaussian_dlm_obs:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\ngaussian_dlm_obs_lpdf:\n\n\n(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\n(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\ngaussian_dlm_obs_lupdf:\n\n\n(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\n(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\ngeneralized_inverse:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\ngenerate_laplace_options:\n\n\n(int dimension) : tuple(vector, real, int, int, int, int) (embedded_laplace.html)\n\n\n(vector theta_init) : tuple(vector, real, int, int, int, int) (embedded_laplace.html)\n\n\nget_imag:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\n(T x) : T (complex_matrix_operations.html)\n\n\nget_real:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\n(T x) : T (complex_matrix_operations.html)\n\n\ngp_dot_prod_cov:\n\n\n(array[] real x, real sigma) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma) : matrix (matrix_operations.html)\n\n\ngp_exp_quad_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\ngp_exponential_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\ngp_matern23_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\ngp_matern52_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\ngp_periodic_cov:\n\n\n(array[] real x, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\ngumbel:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\ngumbel_cdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lccdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lcdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lpdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lupdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_rng:\n\n\n(reals mu, reals beta) : R (unbounded_continuous_distributions.html)\n\n\n\n\n\nhead:\n\n\n(array[] T sv, int n) : array[] T (matrix_operations.html)\n\n\n(complex_row_vector rv, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv, int n) : row_vector (matrix_operations.html)\n\n\n(vector v, int n) : vector (matrix_operations.html)\n\n\nhmm_hidden_state_prob:\n\n\n(matrix log_omega, matrix Gamma, vector rho) : matrix (hidden_markov_models.html)\n\n\nhmm_latent_rng:\n\n\n(matrix log_omega, matrix Gamma, vector rho) : array[] int (hidden_markov_models.html)\n\n\nhmm_marginal:\n\n\n(matrix log_omega, matrix Gamma, vector rho) : real (hidden_markov_models.html)\n\n\nhypergeometric:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nhypergeometric_1F0:\n\n\n(real a, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_2F1:\n\n\n(real a1, real a2, real b1, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_3F2:\n\n\n(T1 a, T2 b, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_lpmf:\n\n\n(int n | int N, int a, int b) : real (bounded_discrete_distributions.html)\n\n\nhypergeometric_lupmf:\n\n\n(int n | int N, int a, int b) : real (bounded_discrete_distributions.html)\n\n\nhypergeometric_pFq:\n\n\n(T1 a, T2 b, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_rng:\n\n\n(int N, int a, int2 b) : int (bounded_discrete_distributions.html)\n\n\nhypot:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\n\n\n\nidentity_matrix:\n\n\n(int k) : matrix (matrix_operations.html)\n\n\ninc_beta:\n\n\n(real alpha, real beta, real x) : real (real-valued_basic_functions.html)\n\n\nint_step:\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(real x) : int (integer-valued_basic_functions.html)\n\n\nintegrate_1d:\n\n\n(function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i) : real (higher-order_functions.html)\n\n\n(function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i), real relative_tolerance) : real (higher-order_functions.html)\n\n\nintegrate_ode:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\nintegrate_ode_adams:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) : array[,] real (deprecated_functions.html)\n\n\nintegrate_ode_bdf:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) : array[,] real (deprecated_functions.html)\n\n\nintegrate_ode_rk45:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) : array[,] real (deprecated_functions.html)\n\n\ninv:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_chi_square:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\ninv_chi_square_cdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lccdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lcdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lpdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lupdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_rng:\n\n\n(reals nu) : R (positive_continuous_distributions.html)\n\n\ninv_cloglog:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_erfc:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_fft:\n\n\n(complex_vector u) : complex_vector (complex_matrix_operations.html)\n\n\ninv_fft2:\n\n\n(complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\ninv_gamma:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\ninv_gamma_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lccdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lcdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lupdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_rng:\n\n\n(reals alpha, reals beta) : R (positive_continuous_distributions.html)\n\n\ninv_inc_beta:\n\n\n(real alpha, real beta, real p) : real (real-valued_basic_functions.html)\n\n\ninv_logit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_Phi:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_sqrt:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_square:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_wishart:\n\n\ndistribution statement (covariance_matrix_distributions.html)\n\n\ninv_wishart_cholesky_lpdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_cholesky_lupdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_cholesky_rng:\n\n\n(real nu, matrix L_S) : matrix (covariance_matrix_distributions.html)\n\n\ninv_wishart_lpdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_lupdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_rng:\n\n\n(real nu, matrix Sigma) : matrix (covariance_matrix_distributions.html)\n\n\ninverse:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\ninverse_spd:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nis_inf:\n\n\n(real x) : int (real-valued_basic_functions.html)\n\n\nis_nan:\n\n\n(real x) : int (real-valued_basic_functions.html)\n\n\n\n\n\nlambert_w0:\n\n\n(reals x) : R (real-valued_basic_functions.html)\n\n\nlambert_wm1:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlaplace_latent_bernoulli_logit_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_neg_binomial_2_log_rng:\n\n\n(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_poisson_log_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_rng:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_rng_tol:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_latent_tol_bernoulli_logit_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_latent_tol_neg_binomial_2_log_rng:\n\n\n(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_latent_tol_poisson_log_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_marginal:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_bernoulli_logit:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_bernoulli_logit_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_bernoulli_logit_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_neg_binomial_2_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_neg_binomial_2_log_lpmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_neg_binomial_2_log_lupmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_poisson_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_poisson_log_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments : real (embedded_laplace.html)\n\n\nlaplace_marginal_poisson_log_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_bernoulli_logit:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_tol_bernoulli_logit_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_bernoulli_logit_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_neg_binomial_2_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_tol_neg_binomial_2_log_lpmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_neg_binomial_2_log_lupmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_poisson_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_tol_poisson_log_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_poisson_log_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlbeta:\n\n\n(real alpha, real beta) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlchoose:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nldexp:\n\n\n(real x, int y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlgamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlinspaced_array:\n\n\n(int n, data real lower, data real upper) : array[] real (matrix_operations.html)\n\n\nlinspaced_int_array:\n\n\n(int n, int lower, int upper) : array[] real (matrix_operations.html)\n\n\nlinspaced_row_vector:\n\n\n(int n, data real lower, data real upper) : row_vector (matrix_operations.html)\n\n\nlinspaced_vector:\n\n\n(int n, data real lower, data real upper) : vector (matrix_operations.html)\n\n\nlkj_corr:\n\n\ndistribution statement (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky:\n\n\ndistribution statement (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky_lpdf:\n\n\n(matrix L | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky_lupdf:\n\n\n(matrix L | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky_rng:\n\n\n(int K, real eta) : matrix (correlation_matrix_distributions.html)\n\n\nlkj_corr_lpdf:\n\n\n(matrix y | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_lupdf:\n\n\n(matrix y | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_rng:\n\n\n(int K, real eta) : matrix (correlation_matrix_distributions.html)\n\n\nlmgamma:\n\n\n(int n, real x) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlmultiply:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog10:\n\n\n() : real (real-valued_basic_functions.html)\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1m:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1m_exp:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1m_inv_logit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1p:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1p_exp:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog2:\n\n\n() : real (real-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog_determinant:\n\n\n(matrix A) : real (matrix_operations.html)\n\n\nlog_diff_exp:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_falling_factorial:\n\n\n(real x, real n) : real (real-valued_basic_functions.html)\n\n\nlog_inv_logit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog_inv_logit_diff:\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_mix:\n\n\n(real theta, real lp1, real lp2) : real (real-valued_basic_functions.html)\n\n\n(T1 thetas, T2 lps) : R (real-valued_basic_functions.html)\n\n\nlog_modified_bessel_first_kind:\n\n\n(real v, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_rising_factorial:\n\n\n(real x, real n) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_softmax:\n\n\n(vector x) : vector (matrix_operations.html)\n\n\nlog_sum_exp:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nlogistic:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nlogistic_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nlogit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nloglogistic:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nloglogistic_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\nloglogistic_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\nloglogistic_rng:\n\n\n(reals mu, reals sigma) : R (positive_continuous_distributions.html)\n\n\nlognormal:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nlognormal_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_rng:\n\n\n(reals mu, reals sigma) : R (positive_continuous_distributions.html)\n\n\nlower_bound_constrain:\n\n\n(reals y, reals lb) : reals (transform_functions.html)\n\n\nlower_bound_jacobian:\n\n\n(reals y, reals lb) : reals (transform_functions.html)\n\n\nlower_bound_unconstrain:\n\n\n(reals x, reals lb) : reals (transform_functions.html)\n\n\nlower_upper_bound_constrain:\n\n\n(reals y, reals lb, reals ub) : reals (transform_functions.html)\n\n\nlower_upper_bound_jacobian:\n\n\n(reals y, reals lb, reals ub) : reals (transform_functions.html)\n\n\nlower_upper_bound_unconstrain:\n\n\n(reals x, reals lb, reals ub) : reals (transform_functions.html)\n\n\n\n\n\nmachine_precision:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nmap_rect:\n\n\n(F f, vector phi, array[] vector theta, data array[,] real x_r, data array[,] int x_i) : vector (higher-order_functions.html)\n\n\nmatrix_exp:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nmatrix_exp_multiply:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\nmatrix_power:\n\n\n(matrix A, int B) : matrix (matrix_operations.html)\n\n\nmax:\n\n\n(array[] int x) : int (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nmdivide_left_spd:\n\n\n(matrix A, matrix B) : vector (matrix_operations.html)\n\n\n(matrix A, vector b) : matrix (matrix_operations.html)\n\n\nmdivide_left_tri_low:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector b) : vector (matrix_operations.html)\n\n\nmdivide_right_spd:\n\n\n(matrix B, matrix A) : matrix (matrix_operations.html)\n\n\n(row_vector b, matrix A) : row_vector (matrix_operations.html)\n\n\nmdivide_right_tri_low:\n\n\n(matrix B, matrix A) : matrix (matrix_operations.html)\n\n\n(row_vector b, matrix A) : row_vector (matrix_operations.html)\n\n\nmean:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nmin:\n\n\n(array[] int x) : int (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nmodified_bessel_first_kind:\n\n\n(int v, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nmodified_bessel_second_kind:\n\n\n(int v, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nmulti_gp:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_cholesky:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_cholesky_lpdf:\n\n\n(matrix y | matrix L, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_cholesky_lupdf:\n\n\n(matrix y | matrix L, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_lpdf:\n\n\n(matrix y | matrix Sigma, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_lupdf:\n\n\n(matrix y | matrix Sigma, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky_lpdf:\n\n\n(row_vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky_lupdf:\n\n\n(row_vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky_rng:\n\n\n(row_vector mu, matrix L) : vector (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors mu, matrix L) : vectors (distributions_over_unbounded_vectors.html)\n\n\n(vector mu, matrix L) : vector (distributions_over_unbounded_vectors.html)\n\n\n(vectors mu, matrix L) : vectors (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_lpdf:\n\n\n(row_vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_lupdf:\n\n\n(row_vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_prec:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_prec_lpdf:\n\n\n(row_vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_prec_lupdf:\n\n\n(row_vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_rng:\n\n\n(row_vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\n(vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_cholesky_t_rng:\n\n\n(real nu, vector mu, matrix L) : vector (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky_lpdf:\n\n\n(vectors y | real nu, vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky_lupdf:\n\n\n(vectors y | real nu, vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky_rng:\n\n\n(real nu, array[] row_vector mu, matrix L) : array[] vector (distributions_over_unbounded_vectors.html)\n\n\n(real nu, array[] vector mu, matrix L) : array[] vector (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_lpdf:\n\n\n(row_vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_lupdf:\n\n\n(row_vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_rng:\n\n\n(real nu, row_vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(real nu, row_vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\n(real nu, vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(real nu, vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\nmultinomial:\n\n\ndistribution statement (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit:\n\n\ndistribution statement (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit_lpmf:\n\n\n(array[] int y | vector gamma) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit_lupmf:\n\n\n(array[] int y | vector gamma) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit_rng:\n\n\n(vector gamma, int N) : array[] int (multivariate_discrete_distributions.html)\n\n\nmultinomial_lpmf:\n\n\n(array[] int y | vector theta) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_lupmf:\n\n\n(array[] int y | vector theta) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_rng:\n\n\n(vector theta, int N) : array[] int (multivariate_discrete_distributions.html)\n\n\nmultiply_lower_tri_self_transpose:\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\n\n\n\nneg_binomial:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_cdf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lccdf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lcdf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_glm:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_glm_lpmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_glm_lupmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_lpmf:\n\n\n(ints n | reals eta, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_lupmf:\n\n\n(ints n | reals eta, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_rng:\n\n\n(reals eta, reals phi) : R (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lpmf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lupmf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_rng:\n\n\n(reals mu, reals phi) : R (unbounded_discrete_distributions.html)\n\n\nneg_binomial_cdf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lccdf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lcdf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lpmf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lupmf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_rng:\n\n\n(reals alpha, reals beta) : R (unbounded_discrete_distributions.html)\n\n\nnegative_infinity:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nnorm:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\nnorm1:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(row_vector x) : real (array_operations.html)\n\n\n(vector x) : real (array_operations.html)\n\n\nnorm2:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(row_vector x) : real (array_operations.html)\n\n\n(vector x) : real (array_operations.html)\n\n\nnormal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nnormal_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_id_glm:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nnormal_id_glm_lpdf:\n\n\n(real y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_id_glm_lupdf:\n\n\n(real y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nnot_a_number:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nnum_elements:\n\n\n(array[] T x) : int (array_operations.html)\n\n\n(complex_matrix x) : int (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\n\n\n\node_adams:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_adams_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\node_adjoint_tol_ctl:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol_forward, data vector abs_tol_forward, data real rel_tol_backward, data vector abs_tol_backward, data real rel_tol_quadrature, data real abs_tol_qudrature, int max_num_steps, int num_steps_between_checkpoints, int interpolation_polynomial, int solver_forward, int solver_backward,...) : array[] vector (higher-order_functions.html)\n\n\node_bdf:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_bdf_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\node_ckrk:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_ckrk_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\node_rk45:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_rk45_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\noffset_multiplier_constrain:\n\n\n(reals y, reals offset, reals mult) : reals (transform_functions.html)\n\n\noffset_multiplier_jacobian:\n\n\n(reals y, reals offset, reals mult) : reals (transform_functions.html)\n\n\noffset_multiplier_unconstrain:\n\n\n(reals x, reals offset, reals mult) : reals (transform_functions.html)\n\n\none_hot_array:\n\n\n(int n, int k) : array[] real (matrix_operations.html)\n\n\none_hot_int_array:\n\n\n(int n, int k) : array[] int (matrix_operations.html)\n\n\none_hot_row_vector:\n\n\n(int n, int k) : row_vector (matrix_operations.html)\n\n\none_hot_vector:\n\n\n(int K, int k) : vector (matrix_operations.html)\n\n\nones_array:\n\n\n(int n) : array[] real (matrix_operations.html)\n\n\nones_int_array:\n\n\n(int n) : array[] int (matrix_operations.html)\n\n\nones_row_vector:\n\n\n(int n) : row_vector (matrix_operations.html)\n\n\nones_vector:\n\n\n(int n) : vector (matrix_operations.html)\n\n\noperator!:\n\n\n(int x) : int (real-valued_basic_functions.html)\n\n\n(real x) : int (real-valued_basic_functions.html)\n\n\noperator!=:\n\n\n(complex x, complex y) : int (complex-valued_basic_functions.html)\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator%:\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\noperator%/%:\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\noperator&&:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\n<a id=‘operator’’ href=‘#operator’’ class=‘anchored unlink’>operator’:\n\n\n(complex_matrix x) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\n(row_vector x) : vector (matrix_operations.html)\n\n\n(vector x) : row_vector (matrix_operations.html)\n\n\noperator*:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_matrix y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_row_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(matrix x, vector y) : vector (matrix_operations.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, matrix y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, vector y) : real (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, row_vector y) : matrix (matrix_operations.html)\n\n\noperator*=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator+:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x) : real (real-valued_basic_functions.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator+=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator-:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(complex_matrix x) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x) : real (real-valued_basic_functions.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(T x) : T (complex-valued_basic_functions.html)\n\n\n(T x) : T (complex_matrix_operations.html)\n\n\n(T x) : T (integer-valued_basic_functions.html)\n\n\n(T x) : T (matrix_operations.html)\n\n\n(T x) : T (real-valued_basic_functions.html)\n\n\n(vector x) : vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator-=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator.*:\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator.*=:\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator./:\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator./=:\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator.^:\n\n\n( complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n( complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator/:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex_matrix B, complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector b, complex_matrix A) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix B, matrix A) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(row_vector b, matrix A) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\noperator/=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator<:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator<=:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\noperator==:\n\n\n(complex x, complex y) : int (complex-valued_basic_functions.html)\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator>:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator>=:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator\\:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector b) : vector (matrix_operations.html)\n\n\noperator^:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\noperator||:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\nordered_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nordered_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nordered_logistic:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nordered_logistic_glm_lpmf:\n\n\n(array[] int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_glm_lupmf:\n\n\n(array[] int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_lpmf:\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_lupmf:\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_rng:\n\n\n(real eta, vector c) : int (bounded_discrete_distributions.html)\n\n\nordered_probit:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nordered_probit_lpmf:\n\n\n(ints k | real eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_probit_lupmf:\n\n\n(ints k | real eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_probit_rng:\n\n\n(real eta, vector c) : int (bounded_discrete_distributions.html)\n\n\nordered_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nowens_t:\n\n\n(real h, real a) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\n\n\n\npareto:\n\n\ndistribution statement (positive_lower-bounded_distributions.html)\n\n\npareto_cdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lccdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lcdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lpdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lupdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_rng:\n\n\n(reals y_min, reals alpha) : R (positive_lower-bounded_distributions.html)\n\n\npareto_type_2:\n\n\ndistribution statement (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_cdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lccdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lcdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lpdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lupdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_rng:\n\n\n(reals mu, reals lambda, reals alpha) : R (positive_lower-bounded_distributions.html)\n\n\nPhi:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nPhi_approx:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\npi:\n\n\n() : real (real-valued_basic_functions.html)\n\n\npoisson:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\npoisson_cdf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_lccdf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_lcdf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\npoisson_log_glm:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\npoisson_log_glm_lpmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_glm_lupmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_lpmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_lupmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_rng:\n\n\n(reals alpha) : R (unbounded_discrete_distributions.html)\n\n\npoisson_lpmf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_lupmf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_rng:\n\n\n(reals lambda) : R (unbounded_discrete_distributions.html)\n\n\npolar:\n\n\n(real r, real theta) : complex (complex-valued_basic_functions.html)\n\n\npositive_infinity:\n\n\n() : real (real-valued_basic_functions.html)\n\n\npositive_ordered_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\npositive_ordered_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\npositive_ordered_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\npow:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : Z (complex-valued_basic_functions.html)\n\n\nprint:\n\n\n(T1 x1,..., TN xN) : void (void_functions.html)\n\n\nprod:\n\n\n(array[] int x) : real (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(complex_matrix x) : complex (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex (complex_matrix_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nproj:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n\n\n\nqr:\n\n\n(matrix A) : tuple(matrix, matrix) (matrix_operations.html)\n\n\nqr_Q:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nqr_R:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nqr_thin:\n\n\n(matrix A) : tuple(matrix, matrix) (matrix_operations.html)\n\n\nqr_thin_Q:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nqr_thin_R:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nquad_form:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector B) : real (matrix_operations.html)\n\n\nquad_form_diag:\n\n\n(matrix m, row_vector rv) : matrix (matrix_operations.html)\n\n\n(matrix m, vector v) : matrix (matrix_operations.html)\n\n\nquad_form_sym:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector B) : real (matrix_operations.html)\n\n\nquantile:\n\n\n(data array[] real x, data array[] real p) : array[] real (array_operations.html)\n\n\n(data array[] real x, data real p) : real (array_operations.html)\n\n\n(data row_vector x, data array[] real p) : array[] real (matrix_operations.html)\n\n\n(data row_vector x, data real p) : real (matrix_operations.html)\n\n\n(data vector x, data array[] real p) : array[] real (matrix_operations.html)\n\n\n(data vector x, data real p) : real (matrix_operations.html)\n\n\n\n\n\nrank:\n\n\n(array[] int v, int s) : int (array_operations.html)\n\n\n(array[] real v, int s) : int (array_operations.html)\n\n\n(row_vector v, int s) : int (matrix_operations.html)\n\n\n(vector v, int s) : int (matrix_operations.html)\n\n\nrayleigh:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nrayleigh_cdf:\n\n\n(real y | real sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lccdf:\n\n\n(real y | real sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lcdf:\n\n\n(real y | real sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lpdf:\n\n\n(reals y | reals sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lupdf:\n\n\n(reals y | reals sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_rng:\n\n\n(reals sigma) : R (positive_continuous_distributions.html)\n\n\nreduce_sum:\n\n\n(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...) : real (higher-order_functions.html)\n\n\nreduce_sum_static:\n\n\n(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...) : real (higher-order_functions.html)\n\n\nreject:\n\n\n(T1 x1,..., TN xN) : void (void_functions.html)\n\n\nrep_array:\n\n\n(T x, int k, int m, int n) : array[,,] T (array_operations.html)\n\n\n(T x, int m, int n) : array[,] T (array_operations.html)\n\n\n(T x, int n) : array[] T (array_operations.html)\n\n\nrep_matrix:\n\n\n(complex z, int m, int n) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector rv, int m) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector v, int n) : complex_matrix (complex_matrix_operations.html)\n\n\n(real x, int m, int n) : matrix (matrix_operations.html)\n\n\n(row_vector rv, int m) : matrix (matrix_operations.html)\n\n\n(vector v, int n) : matrix (matrix_operations.html)\n\n\nrep_row_vector:\n\n\n(complex z, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(real x, int n) : row_vector (matrix_operations.html)\n\n\nrep_vector:\n\n\n(complex z, int m) : complex_vector (complex_matrix_operations.html)\n\n\n(real x, int m) : vector (matrix_operations.html)\n\n\nreverse:\n\n\n(array[] T v) : array[] T (array_operations.html)\n\n\n(complex_row_vector v) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector v) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\nrising_factorial:\n\n\n(real x, int n) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nround:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nrow:\n\n\n(complex_matrix x, int m) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x, int m) : row_vector (matrix_operations.html)\n\n\nrows:\n\n\n(complex_matrix x) : int (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\nrows_dot_product:\n\n\n(complex_matrix x, complex_matrix y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\nrows_dot_self:\n\n\n(complex_matrix x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x) : vector (matrix_operations.html)\n\n\n(row_vector x) : vector (matrix_operations.html)\n\n\n(vector x) : vector (matrix_operations.html)\n\n\n\n\n\nscale_matrix_exp_multiply:\n\n\n(real t, matrix A, matrix B) : matrix (matrix_operations.html)\n\n\nscaled_inv_chi_square:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_cdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lccdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lcdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lpdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lupdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_rng:\n\n\n(reals nu, reals sigma) : R (positive_continuous_distributions.html)\n\n\nsd:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nsegment:\n\n\n(array[] T sv, int i, int n) : array[] T (matrix_operations.html)\n\n\n(complex_row_vector rv, int i, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v, int i, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv, int i, int n) : row_vector (matrix_operations.html)\n\n\n(vector v, int i, int n) : vector (matrix_operations.html)\n\n\nsimplex_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsimplex_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsimplex_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nsin:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsingular_values:\n\n\n(complex_matrix A) : vector (complex_matrix_operations.html)\n\n\n(matrix A) : vector (matrix_operations.html)\n\n\nsinh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsize:\n\n\n(array[] T x) : int (array_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(real x) : int (integer-valued_basic_functions.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\nskew_double_exponential:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_cdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lccdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lcdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lpdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lupdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nskew_normal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nskew_normal_cdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lccdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lcdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lpdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lupdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_rng:\n\n\n(reals xi, reals omega, real alpha) : R (unbounded_continuous_distributions.html)\n\n\nsoftmax:\n\n\n(vector x) : vector (matrix_operations.html)\n\n\nsolve_newton:\n\n\n(function algebra_system, vector y_guess, ...) : vector (higher-order_functions.html)\n\n\nsolve_newton_tol:\n\n\n(function algebra_system, vector y_guess, data real scaling_step, data real f_tol, int max_steps, ...) : vector (higher-order_functions.html)\n\n\nsolve_powell:\n\n\n(function algebra_system, vector y_guess, ...) : vector (higher-order_functions.html)\n\n\nsolve_powell_tol:\n\n\n(function algebra_system, vector y_guess, data real rel_tol, data real f_tol, int max_steps, ...) : vector (higher-order_functions.html)\n\n\nsort_asc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] real (array_operations.html)\n\n\n(row_vector v) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\nsort_desc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] real (array_operations.html)\n\n\n(row_vector v) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\nsort_indices_asc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] int (array_operations.html)\n\n\n(row_vector v) : array[] int (matrix_operations.html)\n\n\n(vector v) : array[] int (matrix_operations.html)\n\n\nsort_indices_desc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] int (array_operations.html)\n\n\n(row_vector v) : array[] int (matrix_operations.html)\n\n\n(vector v) : array[] int (matrix_operations.html)\n\n\nsqrt:\n\n\n(complex x) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsqrt2:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nsquare:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsquared_distance:\n\n\n(row_vector x, row_vector y) : real (array_operations.html)\n\n\n(row_vector x, vector y) : real (array_operations.html)\n\n\n(vector x, row_vector y) : real (array_operations.html)\n\n\n(vector x, vector y) : real (array_operations.html)\n\n\nstd_normal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nstd_normal_cdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_lccdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_lcdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_log_qf:\n\n\n(T x) : R (unbounded_continuous_distributions.html)\n\n\nstd_normal_lpdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_lupdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_qf:\n\n\n(T x) : R (unbounded_continuous_distributions.html)\n\n\nstd_normal_rng:\n\n\n() : real (unbounded_continuous_distributions.html)\n\n\nstep:\n\n\n(real x) : real (real-valued_basic_functions.html)\n\n\nstochastic_column_constrain:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_column_jacobian:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_column_unconstrain:\n\n\n(matrices x) : matrices (transform_functions.html)\n\n\nstochastic_row_constrain:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_row_jacobian:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_row_unconstrain:\n\n\n(matrices x) : matrices (transform_functions.html)\n\n\nstudent_t:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nstudent_t_cdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lccdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lcdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lpdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lupdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_rng:\n\n\n(reals nu, reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nsub_col:\n\n\n(complex_matrix x, int i, int j, int n_rows) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, int i, int j, int n_rows) : vector (matrix_operations.html)\n\n\nsub_row:\n\n\n(complex_matrix x, int i, int j, int n_cols) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x, int i, int j, int n_cols) : row_vector (matrix_operations.html)\n\n\nsum:\n\n\n(array[] complex x) : complex (array_operations.html)\n\n\n(array[] int x) : int (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(complex_matrix x) : complex (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex (complex_matrix_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nsum_to_zero_constrain:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsum_to_zero_jacobian:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsum_to_zero_unconstrain:\n\n\n(matrices x) : matrices (transform_functions.html)\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nsvd:\n\n\n(complex_matrix A) : tuple(complex_matrix, vector, complex_matrix) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(matrix, vector, matrix) (matrix_operations.html)\n\n\nsvd_U:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nsvd_V:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nsymmetrize_from_lower_tri:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\n\n\n\ntail:\n\n\n(array[] T sv, int n) : array[] T (matrix_operations.html)\n\n\n(complex_row_vector rv, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv, int n) : row_vector (matrix_operations.html)\n\n\n(vector v, int n) : vector (matrix_operations.html)\n\n\ntan:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ntanh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ntarget:\n\n\n() : real (real-valued_basic_functions.html)\n\n\ntcrossprod:\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\ntgamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nto_array_1d:\n\n\n(array[...] complex a) : array[] complex (mixed_operations.html)\n\n\n(array[...] int a) : array[] int (mixed_operations.html)\n\n\n(array[...] real a) : array[] real (mixed_operations.html)\n\n\n(complex_matrix m) : array[] complex (mixed_operations.html)\n\n\n(complex_row_vector v) : array[] complex (mixed_operations.html)\n\n\n(complex_vector v) : array[] real (mixed_operations.html)\n\n\n(matrix m) : array[] real (mixed_operations.html)\n\n\n(row_vector v) : array[] real (mixed_operations.html)\n\n\n(vector v) : array[] real (mixed_operations.html)\n\n\nto_array_2d:\n\n\n(complex_matrix m) : array[,] real (mixed_operations.html)\n\n\n(matrix m) : array[,] real (mixed_operations.html)\n\n\nto_complex:\n\n\n() : complex (complex-valued_basic_functions.html)\n\n\n(real re) : complex (complex-valued_basic_functions.html)\n\n\n(real re, real im) : complex (complex-valued_basic_functions.html)\n\n\n(T1 re, T2 im) : Z (complex-valued_basic_functions.html)\n\n\nto_int:\n\n\n(data real x) : int (integer-valued_basic_functions.html)\n\n\nto_matrix:\n\n\n(array[,] complex a ) : complex_matrix (mixed_operations.html)\n\n\n(array[,] int a) : matrix (mixed_operations.html)\n\n\n(array[,] real a) : matrix (mixed_operations.html)\n\n\n(array[] complex a, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(array[] complex a, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(array[] complex_row_vector vs) : complex_matrix (mixed_operations.html)\n\n\n(array[] int a, int m, int n) : matrix (mixed_operations.html)\n\n\n(array[] int a, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(array[] real a, int m, int n) : matrix (mixed_operations.html)\n\n\n(array[] real a, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(array[] row_vector vs) : matrix (mixed_operations.html)\n\n\n(complex_matrix A, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(complex_matrix m) : complex_matrix (mixed_operations.html)\n\n\n(complex_matrix M, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(complex_row_vector v) : complex_matrix (mixed_operations.html)\n\n\n(complex_row_vector v, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(complex_row_vector v, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(complex_vector v) : complex_matrix (mixed_operations.html)\n\n\n(complex_vector v, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(complex_vector v, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(matrix A, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(matrix m) : matrix (mixed_operations.html)\n\n\n(matrix M, int m, int n) : matrix (mixed_operations.html)\n\n\n(row_vector v) : matrix (mixed_operations.html)\n\n\n(row_vector v, int m, int n) : matrix (mixed_operations.html)\n\n\n(row_vector v, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(vector v) : matrix (mixed_operations.html)\n\n\n(vector v, int m, int n) : matrix (mixed_operations.html)\n\n\n(vector v, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\nto_row_vector:\n\n\n(array[] complex a) : complex_row_vector (mixed_operations.html)\n\n\n(array[] int a) : row_vector (mixed_operations.html)\n\n\n(array[] real a) : row_vector (mixed_operations.html)\n\n\n(complex_matrix m) : complex_row_vector (mixed_operations.html)\n\n\n(complex_row_vector v) : complex_row_vector (mixed_operations.html)\n\n\n(complex_vector v) : complex_row_vector (mixed_operations.html)\n\n\n(matrix m) : row_vector (mixed_operations.html)\n\n\n(row_vector v) : row_vector (mixed_operations.html)\n\n\n(vector v) : row_vector (mixed_operations.html)\n\n\nto_vector:\n\n\n(array[] complex a) : complex_vector (mixed_operations.html)\n\n\n(array[] int a) : vector (mixed_operations.html)\n\n\n(array[] real a) : vector (mixed_operations.html)\n\n\n(complex_matrix m) : complex_vector (mixed_operations.html)\n\n\n(complex_row_vector v) : complex_vector (mixed_operations.html)\n\n\n(complex_vector v) : complex_vector (mixed_operations.html)\n\n\n(matrix m) : vector (mixed_operations.html)\n\n\n(row_vector v) : vector (mixed_operations.html)\n\n\n(vector v) : vector (mixed_operations.html)\n\n\ntrace:\n\n\n(complex_matrix A) : complex (complex_matrix_operations.html)\n\n\n(matrix A) : real (matrix_operations.html)\n\n\ntrace_dot:\n\n\n(matrix A, matrix B) : real (matrix_operations.html)\n\n\ntrace_gen_quad_form:\n\n\n(matrix D ,matrix A, matrix B) : real (matrix_operations.html)\n\n\ntrace_quad_form:\n\n\n(matrix A, matrix B) : real (matrix_operations.html)\n\n\n(matrix A, vector B) : real (matrix_operations.html)\n\n\ntrigamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ntrunc:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\n\n\n\nuniform:\n\n\ndistribution statement (bounded_continuous_distributions.html)\n\n\nuniform_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lccdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lcdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lupdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_rng:\n\n\n(reals alpha, reals beta) : R (bounded_continuous_distributions.html)\n\n\nuniform_simplex:\n\n\n(int n) : vector (matrix_operations.html)\n\n\nunit_vectors_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nunit_vectors_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nunit_vectors_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nupper_bound_constrain:\n\n\n(reals y, reals ub) : reals (transform_functions.html)\n\n\nupper_bound_jacobian:\n\n\n(reals x, reals ub) : reals (transform_functions.html)\n\n\nupper_bound_unconstrain:\n\n\n(reals x, reals ub) : reals (transform_functions.html)\n\n\n\n\n\nvariance:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nvon_mises:\n\n\ndistribution statement (circular_distributions.html)\n\n\nvon_mises_cdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lccdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lcdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lpdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lupdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_rng:\n\n\n(reals mu, reals kappa) : R (circular_distributions.html)\n\n\n\n\n\nweibull:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nweibull_cdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lccdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lcdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lpdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lupdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_rng:\n\n\n(reals alpha, reals sigma) : R (positive_continuous_distributions.html)\n\n\nwiener:\n\n\ndistribution statement (positive_lower-bounded_distributions.html)\n\n\nwiener_lccdf_unnorm:\n\n\n(real y, real alpha, real tau, real beta, real delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\nwiener_lcdf_unnorm:\n\n\n(real y, real alpha, real tau, real beta, real delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\nwiener_lpdf:\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\n(reals y | reals alpha, reals tau, reals beta, reals delta) : real (positive_lower-bounded_distributions.html)\n\n\nwiener_lupdf:\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\n(reals y | reals alpha, reals tau, reals beta, reals delta) : real (positive_lower-bounded_distributions.html)\n\n\nwishart:\n\n\ndistribution statement (covariance_matrix_distributions.html)\n\n\nwishart_cholesky_lpdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\nwishart_cholesky_lupdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\nwishart_cholesky_rng:\n\n\n(real nu, matrix L_S) : matrix (covariance_matrix_distributions.html)\n\n\nwishart_lpdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\nwishart_lupdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\nwishart_rng:\n\n\n(real nu, matrix Sigma) : matrix (covariance_matrix_distributions.html)\n\n\n\n\n\nyule_simon:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nyule_simon_cdf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lccdf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lcdf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lpmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lupmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_rng:\n\n\n(reals alpha) : R (unbounded_discrete_distributions.html)\n\n\n\n\n\nzeros_array:\n\n\n(int n) : array[] real (matrix_operations.html)\n\n\nzeros_int_array:\n\n\n(int n) : array[] int (matrix_operations.html)\n\n\nzeros_row_vector:\n\n\n(int n) : row_vector (matrix_operations.html)\n\n\nzeros_vector:\n\n\n(int n) : vector (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#a", + "href": "functions-reference/functions_index.html#a", + "title": "Alphabetical Index", + "section": "", + "text": "abs:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\n(T x) : T (integer-valued_basic_functions.html)\n\n\n(T x) : T (real-valued_basic_functions.html)\n\n\nacos:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nacosh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nadd_diag:\n\n\n(complex_matrix m, complex_real d) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix m, complex_row_vector d) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix m, complex_vector d) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix m, real d) : matrix (matrix_operations.html)\n\n\n(matrix m, row_vector d) : matrix (matrix_operations.html)\n\n\n(matrix m, vector d) : matrix (matrix_operations.html)\n\n\nalgebra_solver:\n\n\n(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) : vector (deprecated_functions.html)\n\n\nalgebra_solver_newton:\n\n\n(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i) : vector (deprecated_functions.html)\n\n\n(function algebra_system, vector y_guess, vector theta, data array[] real x_r, array[] int x_i, data real rel_tol, data real f_tol, int max_steps) : vector (deprecated_functions.html)\n\n\nappend_array:\n\n\n(T x, T y) : T (array_operations.html)\n\n\nappend_col:\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, vector y) : matrix (matrix_operations.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, matrix y) : matrix (matrix_operations.html)\n\n\n(vector x, vector y) : matrix (matrix_operations.html)\n\n\nappend_row:\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_row_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, row_vector y) : matrix (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, matrix y) : matrix (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : matrix (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\narg:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\nasin:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nasinh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\natan:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\natan2:\n\n\n(T y, T x) : R (real-valued_basic_functions.html)\n\n\natanh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#b", + "href": "functions-reference/functions_index.html#b", + "title": "Alphabetical Index", + "section": "", + "text": "bernoulli:\n\n\ndistribution statement (binary_distributions.html)\n\n\nbernoulli_cdf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_lccdf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_lcdf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_logit:\n\n\ndistribution statement (binary_distributions.html)\n\n\nbernoulli_logit_glm:\n\n\ndistribution statement (binary_distributions.html)\n\n\nbernoulli_logit_glm_lpmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\nbernoulli_logit_glm_lupmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (binary_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (binary_distributions.html)\n\n\nbernoulli_logit_glm_rng:\n\n\n(matrix x, vector alpha, vector beta) : array[] int (binary_distributions.html)\n\n\n(row_vector x, vector alpha, vector beta) : array[] int (binary_distributions.html)\n\n\nbernoulli_logit_lpmf:\n\n\n(ints y | reals alpha) : real (binary_distributions.html)\n\n\nbernoulli_logit_lupmf:\n\n\n(ints y | reals alpha) : real (binary_distributions.html)\n\n\nbernoulli_logit_rng:\n\n\n(reals alpha) : R (binary_distributions.html)\n\n\nbernoulli_lpmf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_lupmf:\n\n\n(ints y | reals theta) : real (binary_distributions.html)\n\n\nbernoulli_rng:\n\n\n(reals theta) : R (binary_distributions.html)\n\n\nbessel_first_kind:\n\n\n(int v, real x) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nbessel_second_kind:\n\n\n(int v, real x) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nbeta:\n\n\n(real alpha, real beta) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ndistribution statement (continuous_distributions_on_0_1.html)\n\n\nbeta_binomial:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbeta_binomial_cdf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lccdf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lcdf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lpmf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_lupmf:\n\n\n(ints n | ints N, reals alpha, reals beta) : real (bounded_discrete_distributions.html)\n\n\nbeta_binomial_rng:\n\n\n(ints N, reals alpha, reals beta) : R (bounded_discrete_distributions.html)\n\n\nbeta_cdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lccdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lcdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lpdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_lupdf:\n\n\n(reals theta | reals alpha, reals beta) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_neg_binomial:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_cdf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lccdf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lcdf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lpmf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_lupmf:\n\n\n(ints n | reals r, reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nbeta_neg_binomial_rng:\n\n\n(reals r, reals alpha, reals beta) : R (unbounded_discrete_distributions.html)\n\n\nbeta_proportion:\n\n\ndistribution statement (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lccdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lcdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lpdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_lupdf:\n\n\n(reals theta | reals mu, reals kappa) : real (continuous_distributions_on_0_1.html)\n\n\nbeta_proportion_rng:\n\n\n(reals mu, reals kappa) : R (continuous_distributions_on_0_1.html)\n\n\nbeta_rng:\n\n\n(reals alpha, reals beta) : R (continuous_distributions_on_0_1.html)\n\n\nbinary_log_loss:\n\n\n(int y, real y_hat) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nbinomial:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbinomial_cdf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lccdf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lcdf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbinomial_logit_glm:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nbinomial_logit_glm_lpmf:\n\n\n(array[] int n | array[] int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit_glm_lupmf:\n\n\n(array[] int n | array[] int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int n | array[] int N, row_vector x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, real alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\n(int n | int N, matrix x, vector alpha, vector beta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit_lpmf:\n\n\n(ints n | ints N, reals alpha) : real (bounded_discrete_distributions.html)\n\n\nbinomial_logit_lupmf:\n\n\n(ints n | ints N, reals alpha) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lpmf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_lupmf:\n\n\n(ints n | ints N, reals theta) : real (bounded_discrete_distributions.html)\n\n\nbinomial_rng:\n\n\n(ints N, reals theta) : R (bounded_discrete_distributions.html)\n\n\nblock:\n\n\n(complex_matrix x, int i, int j, int n_rows, int n_cols) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix x, int i, int j, int n_rows, int n_cols) : matrix (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#c", + "href": "functions-reference/functions_index.html#c", + "title": "Alphabetical Index", + "section": "", + "text": "categorical:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ncategorical_logit:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ncategorical_logit_glm:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ncategorical_logit_glm_lpmf:\n\n\n(array[] int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_glm_lupmf:\n\n\n(array[] int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector alpha, matrix beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_lpmf:\n\n\n(ints y | vector beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_lupmf:\n\n\n(ints y | vector beta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_logit_rng:\n\n\n(vector beta) : int (bounded_discrete_distributions.html)\n\n\ncategorical_lpmf:\n\n\n(ints y | vector theta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_lupmf:\n\n\n(ints y | vector theta) : real (bounded_discrete_distributions.html)\n\n\ncategorical_rng:\n\n\n(vector theta) : int (bounded_discrete_distributions.html)\n\n\ncauchy:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\ncauchy_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ncauchy_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\ncbrt:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nceil:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nchi_square:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nchi_square_cdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lccdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lcdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lpdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_lupdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\nchi_square_rng:\n\n\n(reals nu) : R (positive_continuous_distributions.html)\n\n\nchol2inv:\n\n\n(matrix L) : matrix (matrix_operations.html)\n\n\ncholesky_decompose:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\ncholesky_factor_corr_constrain:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncholesky_factor_corr_jacobian:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncholesky_factor_corr_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\ncholesky_factor_cov_constrain:\n\n\n(vectors y, int M, int N) : matrices (transform_functions.html)\n\n\ncholesky_factor_cov_jacobian:\n\n\n(vectors y, int M, int N) : matrices (transform_functions.html)\n\n\ncholesky_factor_cov_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\nchoose:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ncol:\n\n\n(complex_matrix x, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, int n) : vector (matrix_operations.html)\n\n\ncols:\n\n\n(complex_matrix x) : int (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\ncolumns_dot_product:\n\n\n(complex_matrix x, complex_matrix y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, vector y) : row_vector (matrix_operations.html)\n\n\ncolumns_dot_self:\n\n\n(complex_matrix x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x) : row_vector (matrix_operations.html)\n\n\n(row_vector x) : row_vector (matrix_operations.html)\n\n\n(vector x) : row_vector (matrix_operations.html)\n\n\ncomplex_schur_decompose:\n\n\n(complex_matrix A) : tuple(complex_matrix, complex_matrix) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(complex_matrix, complex_matrix) (complex_matrix_operations.html)\n\n\ncomplex_schur_decompose_t:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\ncomplex_schur_decompose_u:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\nconj:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(Z z) : Z (complex-valued_basic_functions.html)\n\n\ncorr_matrix_constrain:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncorr_matrix_jacobian:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncorr_matrix_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\ncos:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ncosh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ncov_exp_quad:\n\n\n(array[] real x, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(array[] real x1, array[] real x2, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(row_vectors x, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(row_vectors x1, row_vectors x2, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(vectors x, real alpha, real rho) : matrix (removed_functions.html)\n\n\n(vectors x1, vectors x2, real alpha, real rho) : matrix (removed_functions.html)\n\n\ncov_matrix_constrain:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncov_matrix_jacobian:\n\n\n(vectors y, int K) : matrices (transform_functions.html)\n\n\ncov_matrix_unconstrain:\n\n\n(matrices x) : vectors (transform_functions.html)\n\n\ncrossprod:\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\ncsr_extract:\n\n\n(matrix a) : tuple(vector, array[] int, array[] int) (sparse_matrix_operations.html)\n\n\ncsr_extract_u:\n\n\n(matrix a) : array[] int (sparse_matrix_operations.html)\n\n\ncsr_extract_v:\n\n\n(matrix a) : array[] int (sparse_matrix_operations.html)\n\n\ncsr_extract_w:\n\n\n(matrix a) : vector (sparse_matrix_operations.html)\n\n\ncsr_matrix_times_vector:\n\n\n(int m, int n, vector w, array[] int v, array[] int u, vector b) : vector (sparse_matrix_operations.html)\n\n\ncsr_to_dense_matrix:\n\n\n(int m, int n, vector w, array[] int v, array[] int u) : matrix (sparse_matrix_operations.html)\n\n\ncumulative_sum:\n\n\n(array[] complex x) : array[] complex (complex_matrix_operations.html)\n\n\n(array[] int x) : array[] int (matrix_operations.html)\n\n\n(array[] real x) : array[] real (matrix_operations.html)\n\n\n(complex_row_vector rv) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#d", + "href": "functions-reference/functions_index.html#d", + "title": "Alphabetical Index", + "section": "", + "text": "dae:\n\n\n(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\ndae_tol:\n\n\n(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\ndeterminant:\n\n\n(matrix A) : real (matrix_operations.html)\n\n\ndiag_matrix:\n\n\n(complex_vector x) : complex_matrix (complex_matrix_operations.html)\n\n\n(vector x) : matrix (matrix_operations.html)\n\n\ndiag_post_multiply:\n\n\n(complex_matrix m, complex_row_vector v) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix m, complex_vector v) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix m, row_vector rv) : matrix (matrix_operations.html)\n\n\n(matrix m, vector v) : matrix (matrix_operations.html)\n\n\ndiag_pre_multiply:\n\n\n(complex_row_vector v, complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector v, complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\n(row_vector rv, matrix m) : matrix (matrix_operations.html)\n\n\n(vector v, matrix m) : matrix (matrix_operations.html)\n\n\ndiagonal:\n\n\n(complex_matrix x) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x) : vector (matrix_operations.html)\n\n\ndigamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ndims:\n\n\n(T x) : array[] int (array_operations.html)\n\n\ndirichlet:\n\n\ndistribution statement (simplex_distributions.html)\n\n\ndirichlet_lpdf:\n\n\n(vectors theta | vectors alpha) : real (simplex_distributions.html)\n\n\ndirichlet_lupdf:\n\n\n(vectors theta | vectors alpha) : real (simplex_distributions.html)\n\n\ndirichlet_multinomial:\n\n\ndistribution statement (multivariate_discrete_distributions.html)\n\n\ndirichlet_multinomial_lpmf:\n\n\n(array[] int y | vector alpha) : real (multivariate_discrete_distributions.html)\n\n\ndirichlet_multinomial_lupmf:\n\n\n(array[] int y | vector alpha) : real (multivariate_discrete_distributions.html)\n\n\ndirichlet_multinomial_rng:\n\n\n(vector alpha, int N) : array[] int (multivariate_discrete_distributions.html)\n\n\ndirichlet_rng:\n\n\n(vector alpha) : vector (simplex_distributions.html)\n\n\ndiscrete_range:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\ndiscrete_range_cdf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lccdf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lcdf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lpmf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_lupmf:\n\n\n(ints y | ints l, ints u) : real (bounded_discrete_distributions.html)\n\n\ndiscrete_range_rng:\n\n\n(ints l, ints u) : ints (bounded_discrete_distributions.html)\n\n\ndistance:\n\n\n(row_vector x, row_vector y) : real (array_operations.html)\n\n\n(row_vector x, vector y) : real (array_operations.html)\n\n\n(vector x, row_vector y) : real (array_operations.html)\n\n\n(vector x, vector y) : real (array_operations.html)\n\n\ndot_product:\n\n\n(complex_row_vector x, complex_row_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_row_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex (complex_matrix_operations.html)\n\n\n(row_vector x, row_vector y) : real (matrix_operations.html)\n\n\n(row_vector x, vector y) : real (matrix_operations.html)\n\n\n(vector x, row_vector y) : real (matrix_operations.html)\n\n\n(vector x, vector y) : real (matrix_operations.html)\n\n\ndot_self:\n\n\n(complex_row_vector x) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex (complex_matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\ndouble_exponential:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\ndouble_exponential_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#e", + "href": "functions-reference/functions_index.html#e", + "title": "Alphabetical Index", + "section": "", + "text": "e:\n\n\n() : real (real-valued_basic_functions.html)\n\n\neigendecompose:\n\n\n(complex_matrix A) : tuple(complex_matrix, complex_vector) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(complex_matrix, complex_vector) (matrix_operations.html)\n\n\neigendecompose_sym:\n\n\n(complex_matrix A) : tuple(complex_matrix, complex_vector) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(matrix, vector) (matrix_operations.html)\n\n\neigenvalues:\n\n\n(complex_matrix A) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix A) : complex_vector (matrix_operations.html)\n\n\neigenvalues_sym:\n\n\n(complex_matrix A) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix A) : vector (matrix_operations.html)\n\n\neigenvectors:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : complex_matrix (matrix_operations.html)\n\n\neigenvectors_sym:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nerf:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nerfc:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexp:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexp2:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexp_mod_normal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_cdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lccdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lcdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lpdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_lupdf:\n\n\n(reals y | reals mu, reals sigma, reals lambda) : real (unbounded_continuous_distributions.html)\n\n\nexp_mod_normal_rng:\n\n\n(reals mu, reals sigma, reals lambda) : R (unbounded_continuous_distributions.html)\n\n\nexpm1:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nexponential:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nexponential_cdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lccdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lcdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lpdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_lupdf:\n\n\n(reals y | reals beta) : real (positive_continuous_distributions.html)\n\n\nexponential_rng:\n\n\n(reals beta) : R (positive_continuous_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#f", + "href": "functions-reference/functions_index.html#f", + "title": "Alphabetical Index", + "section": "", + "text": "falling_factorial:\n\n\n(real x, real n) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfatal_error:\n\n\n(T1 x1,..., TN xN) : void (void_functions.html)\n\n\nfdim:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfft:\n\n\n(complex_vector v) : complex_vector (complex_matrix_operations.html)\n\n\nfft2:\n\n\n(complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\nfloor:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nfma:\n\n\n(real x, real y, real z) : real (real-valued_basic_functions.html)\n\n\nfmax:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfmin:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfmod:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nfrechet:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nfrechet_cdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lccdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lcdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lpdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_lupdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nfrechet_rng:\n\n\n(reals alpha, reals sigma) : R (positive_continuous_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#g", + "href": "functions-reference/functions_index.html#g", + "title": "Alphabetical Index", + "section": "", + "text": "gamma:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\ngamma_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lccdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lcdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_lupdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ngamma_p:\n\n\n(real a, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ngamma_q:\n\n\n(real a, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\ngamma_rng:\n\n\n(reals alpha, reals beta) : R (positive_continuous_distributions.html)\n\n\ngaussian_dlm_obs:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\ngaussian_dlm_obs_lpdf:\n\n\n(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\n(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\ngaussian_dlm_obs_lupdf:\n\n\n(matrix y | matrix F, matrix G, matrix V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\n(matrix y | matrix F, matrix G, vector V, matrix W, vector m0, matrix C0) : real (distributions_over_unbounded_vectors.html)\n\n\ngeneralized_inverse:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\ngenerate_laplace_options:\n\n\n(int dimension) : tuple(vector, real, int, int, int, int) (embedded_laplace.html)\n\n\n(vector theta_init) : tuple(vector, real, int, int, int, int) (embedded_laplace.html)\n\n\nget_imag:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\n(T x) : T (complex_matrix_operations.html)\n\n\nget_real:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\n(T x) : T (complex_matrix_operations.html)\n\n\ngp_dot_prod_cov:\n\n\n(array[] real x, real sigma) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma) : matrix (matrix_operations.html)\n\n\ngp_exp_quad_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\ngp_exponential_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\ngp_matern23_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\ngp_matern52_cov:\n\n\n(array[] real x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, array[] real length_scale) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale) : matrix (matrix_operations.html)\n\n\ngp_periodic_cov:\n\n\n(array[] real x, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\n(array[] real x1, array[] real x2, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\n(vectors x, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\n(vectors x1, vectors x2, real sigma, real length_scale, real period) : matrix (matrix_operations.html)\n\n\ngumbel:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\ngumbel_cdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lccdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lcdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lpdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_lupdf:\n\n\n(reals y | reals mu, reals beta) : real (unbounded_continuous_distributions.html)\n\n\ngumbel_rng:\n\n\n(reals mu, reals beta) : R (unbounded_continuous_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#h", + "href": "functions-reference/functions_index.html#h", + "title": "Alphabetical Index", + "section": "", + "text": "head:\n\n\n(array[] T sv, int n) : array[] T (matrix_operations.html)\n\n\n(complex_row_vector rv, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv, int n) : row_vector (matrix_operations.html)\n\n\n(vector v, int n) : vector (matrix_operations.html)\n\n\nhmm_hidden_state_prob:\n\n\n(matrix log_omega, matrix Gamma, vector rho) : matrix (hidden_markov_models.html)\n\n\nhmm_latent_rng:\n\n\n(matrix log_omega, matrix Gamma, vector rho) : array[] int (hidden_markov_models.html)\n\n\nhmm_marginal:\n\n\n(matrix log_omega, matrix Gamma, vector rho) : real (hidden_markov_models.html)\n\n\nhypergeometric:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nhypergeometric_1F0:\n\n\n(real a, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_2F1:\n\n\n(real a1, real a2, real b1, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_3F2:\n\n\n(T1 a, T2 b, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_lpmf:\n\n\n(int n | int N, int a, int b) : real (bounded_discrete_distributions.html)\n\n\nhypergeometric_lupmf:\n\n\n(int n | int N, int a, int b) : real (bounded_discrete_distributions.html)\n\n\nhypergeometric_pFq:\n\n\n(T1 a, T2 b, real z) : real (real-valued_basic_functions.html)\n\n\nhypergeometric_rng:\n\n\n(int N, int a, int2 b) : int (bounded_discrete_distributions.html)\n\n\nhypot:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#i", + "href": "functions-reference/functions_index.html#i", + "title": "Alphabetical Index", + "section": "", + "text": "identity_matrix:\n\n\n(int k) : matrix (matrix_operations.html)\n\n\ninc_beta:\n\n\n(real alpha, real beta, real x) : real (real-valued_basic_functions.html)\n\n\nint_step:\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(real x) : int (integer-valued_basic_functions.html)\n\n\nintegrate_1d:\n\n\n(function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i) : real (higher-order_functions.html)\n\n\n(function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i), real relative_tolerance) : real (higher-order_functions.html)\n\n\nintegrate_ode:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\nintegrate_ode_adams:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) : array[,] real (deprecated_functions.html)\n\n\nintegrate_ode_bdf:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) : array[,] real (deprecated_functions.html)\n\n\nintegrate_ode_rk45:\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i) : array[,] real (deprecated_functions.html)\n\n\n(function ode, array[] real initial_state, real initial_time, array[] real times, array[] real theta, array[] real x_r, array[] int x_i, real rel_tol, real abs_tol, int max_num_steps) : array[,] real (deprecated_functions.html)\n\n\ninv:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_chi_square:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\ninv_chi_square_cdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lccdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lcdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lpdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_lupdf:\n\n\n(reals y | reals nu) : real (positive_continuous_distributions.html)\n\n\ninv_chi_square_rng:\n\n\n(reals nu) : R (positive_continuous_distributions.html)\n\n\ninv_cloglog:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_erfc:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_fft:\n\n\n(complex_vector u) : complex_vector (complex_matrix_operations.html)\n\n\ninv_fft2:\n\n\n(complex_matrix m) : complex_matrix (complex_matrix_operations.html)\n\n\ninv_gamma:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\ninv_gamma_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lccdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lcdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_lupdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\ninv_gamma_rng:\n\n\n(reals alpha, reals beta) : R (positive_continuous_distributions.html)\n\n\ninv_inc_beta:\n\n\n(real alpha, real beta, real p) : real (real-valued_basic_functions.html)\n\n\ninv_logit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_Phi:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_sqrt:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_square:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ninv_wishart:\n\n\ndistribution statement (covariance_matrix_distributions.html)\n\n\ninv_wishart_cholesky_lpdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_cholesky_lupdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_cholesky_rng:\n\n\n(real nu, matrix L_S) : matrix (covariance_matrix_distributions.html)\n\n\ninv_wishart_lpdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_lupdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\ninv_wishart_rng:\n\n\n(real nu, matrix Sigma) : matrix (covariance_matrix_distributions.html)\n\n\ninverse:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\ninverse_spd:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nis_inf:\n\n\n(real x) : int (real-valued_basic_functions.html)\n\n\nis_nan:\n\n\n(real x) : int (real-valued_basic_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#l", + "href": "functions-reference/functions_index.html#l", + "title": "Alphabetical Index", + "section": "", + "text": "lambert_w0:\n\n\n(reals x) : R (real-valued_basic_functions.html)\n\n\nlambert_wm1:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlaplace_latent_bernoulli_logit_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_neg_binomial_2_log_rng:\n\n\n(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_poisson_log_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_rng:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : vector (embedded_laplace.html)\n\n\nlaplace_latent_rng_tol:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_latent_tol_bernoulli_logit_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_latent_tol_neg_binomial_2_log_rng:\n\n\n(array[] int y, array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_latent_tol_poisson_log_rng:\n\n\n(array[] int y, array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : vector (embedded_laplace.html)\n\n\nlaplace_marginal:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_bernoulli_logit:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_bernoulli_logit_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_bernoulli_logit_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_neg_binomial_2_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_neg_binomial_2_log_lpmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_neg_binomial_2_log_lupmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments) : real (embedded_laplace.html)\n\n\nlaplace_marginal_poisson_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_poisson_log_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments : real (embedded_laplace.html)\n\n\nlaplace_marginal_poisson_log_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol:\n\n\n(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_bernoulli_logit:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_tol_bernoulli_logit_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_bernoulli_logit_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_neg_binomial_2_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_tol_neg_binomial_2_log_lpmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_neg_binomial_2_log_lupmf:\n\n\n(array[] int y | array[] int y_index, real eta, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_poisson_log:\n\n\ndistribution statement (embedded_laplace.html)\n\n\nlaplace_marginal_tol_poisson_log_lpmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlaplace_marginal_tol_poisson_log_lupmf:\n\n\n(array[] int y | array[] int y_index, vector m, data int hessian_block_size, function covariance_function, tuple(...) covariance_arguments, tuple(vector, real, int, int, int, int) tolerances) : real (embedded_laplace.html)\n\n\nlbeta:\n\n\n(real alpha, real beta) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlchoose:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nldexp:\n\n\n(real x, int y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlgamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlinspaced_array:\n\n\n(int n, data real lower, data real upper) : array[] real (matrix_operations.html)\n\n\nlinspaced_int_array:\n\n\n(int n, int lower, int upper) : array[] real (matrix_operations.html)\n\n\nlinspaced_row_vector:\n\n\n(int n, data real lower, data real upper) : row_vector (matrix_operations.html)\n\n\nlinspaced_vector:\n\n\n(int n, data real lower, data real upper) : vector (matrix_operations.html)\n\n\nlkj_corr:\n\n\ndistribution statement (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky:\n\n\ndistribution statement (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky_lpdf:\n\n\n(matrix L | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky_lupdf:\n\n\n(matrix L | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_cholesky_rng:\n\n\n(int K, real eta) : matrix (correlation_matrix_distributions.html)\n\n\nlkj_corr_lpdf:\n\n\n(matrix y | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_lupdf:\n\n\n(matrix y | real eta) : real (correlation_matrix_distributions.html)\n\n\nlkj_corr_rng:\n\n\n(int K, real eta) : matrix (correlation_matrix_distributions.html)\n\n\nlmgamma:\n\n\n(int n, real x) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlmultiply:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog10:\n\n\n() : real (real-valued_basic_functions.html)\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1m:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1m_exp:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1m_inv_logit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1p:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog1p_exp:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog2:\n\n\n() : real (real-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog_determinant:\n\n\n(matrix A) : real (matrix_operations.html)\n\n\nlog_diff_exp:\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_falling_factorial:\n\n\n(real x, real n) : real (real-valued_basic_functions.html)\n\n\nlog_inv_logit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nlog_inv_logit_diff:\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_mix:\n\n\n(real theta, real lp1, real lp2) : real (real-valued_basic_functions.html)\n\n\n(T1 thetas, T2 lps) : R (real-valued_basic_functions.html)\n\n\nlog_modified_bessel_first_kind:\n\n\n(real v, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_rising_factorial:\n\n\n(real x, real n) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nlog_softmax:\n\n\n(vector x) : vector (matrix_operations.html)\n\n\nlog_sum_exp:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nlogistic:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nlogistic_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nlogistic_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nlogit:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nloglogistic:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nloglogistic_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\nloglogistic_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (positive_continuous_distributions.html)\n\n\nloglogistic_rng:\n\n\n(reals mu, reals sigma) : R (positive_continuous_distributions.html)\n\n\nlognormal:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nlognormal_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nlognormal_rng:\n\n\n(reals mu, reals sigma) : R (positive_continuous_distributions.html)\n\n\nlower_bound_constrain:\n\n\n(reals y, reals lb) : reals (transform_functions.html)\n\n\nlower_bound_jacobian:\n\n\n(reals y, reals lb) : reals (transform_functions.html)\n\n\nlower_bound_unconstrain:\n\n\n(reals x, reals lb) : reals (transform_functions.html)\n\n\nlower_upper_bound_constrain:\n\n\n(reals y, reals lb, reals ub) : reals (transform_functions.html)\n\n\nlower_upper_bound_jacobian:\n\n\n(reals y, reals lb, reals ub) : reals (transform_functions.html)\n\n\nlower_upper_bound_unconstrain:\n\n\n(reals x, reals lb, reals ub) : reals (transform_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#m", + "href": "functions-reference/functions_index.html#m", + "title": "Alphabetical Index", + "section": "", + "text": "machine_precision:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nmap_rect:\n\n\n(F f, vector phi, array[] vector theta, data array[,] real x_r, data array[,] int x_i) : vector (higher-order_functions.html)\n\n\nmatrix_exp:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nmatrix_exp_multiply:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\nmatrix_power:\n\n\n(matrix A, int B) : matrix (matrix_operations.html)\n\n\nmax:\n\n\n(array[] int x) : int (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nmdivide_left_spd:\n\n\n(matrix A, matrix B) : vector (matrix_operations.html)\n\n\n(matrix A, vector b) : matrix (matrix_operations.html)\n\n\nmdivide_left_tri_low:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector b) : vector (matrix_operations.html)\n\n\nmdivide_right_spd:\n\n\n(matrix B, matrix A) : matrix (matrix_operations.html)\n\n\n(row_vector b, matrix A) : row_vector (matrix_operations.html)\n\n\nmdivide_right_tri_low:\n\n\n(matrix B, matrix A) : matrix (matrix_operations.html)\n\n\n(row_vector b, matrix A) : row_vector (matrix_operations.html)\n\n\nmean:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nmin:\n\n\n(array[] int x) : int (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nmodified_bessel_first_kind:\n\n\n(int v, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nmodified_bessel_second_kind:\n\n\n(int v, real z) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nmulti_gp:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_cholesky:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_cholesky_lpdf:\n\n\n(matrix y | matrix L, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_cholesky_lupdf:\n\n\n(matrix y | matrix L, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_lpdf:\n\n\n(matrix y | matrix Sigma, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_gp_lupdf:\n\n\n(matrix y | matrix Sigma, vector w) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky_lpdf:\n\n\n(row_vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky_lupdf:\n\n\n(row_vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_cholesky_rng:\n\n\n(row_vector mu, matrix L) : vector (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors mu, matrix L) : vectors (distributions_over_unbounded_vectors.html)\n\n\n(vector mu, matrix L) : vector (distributions_over_unbounded_vectors.html)\n\n\n(vectors mu, matrix L) : vectors (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_lpdf:\n\n\n(row_vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_lupdf:\n\n\n(row_vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_prec:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_prec_lpdf:\n\n\n(row_vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_prec_lupdf:\n\n\n(row_vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | row_vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | vectors mu, matrix Omega) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_normal_rng:\n\n\n(row_vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\n(vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_cholesky_t_rng:\n\n\n(real nu, vector mu, matrix L) : vector (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky:\n\n\ndistribution statement (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky_lpdf:\n\n\n(vectors y | real nu, vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky_lupdf:\n\n\n(vectors y | real nu, vectors mu, matrix L) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_cholesky_rng:\n\n\n(real nu, array[] row_vector mu, matrix L) : array[] vector (distributions_over_unbounded_vectors.html)\n\n\n(real nu, array[] vector mu, matrix L) : array[] vector (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_lpdf:\n\n\n(row_vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_lupdf:\n\n\n(row_vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(row_vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, row_vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\n(vectors y | real nu, vectors mu, matrix Sigma) : real (distributions_over_unbounded_vectors.html)\n\n\nmulti_student_t_rng:\n\n\n(real nu, row_vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(real nu, row_vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\n(real nu, vector mu, matrix Sigma) : vector (distributions_over_unbounded_vectors.html)\n\n\n(real nu, vectors mu, matrix Sigma) : vectors (distributions_over_unbounded_vectors.html)\n\n\nmultinomial:\n\n\ndistribution statement (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit:\n\n\ndistribution statement (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit_lpmf:\n\n\n(array[] int y | vector gamma) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit_lupmf:\n\n\n(array[] int y | vector gamma) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_logit_rng:\n\n\n(vector gamma, int N) : array[] int (multivariate_discrete_distributions.html)\n\n\nmultinomial_lpmf:\n\n\n(array[] int y | vector theta) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_lupmf:\n\n\n(array[] int y | vector theta) : real (multivariate_discrete_distributions.html)\n\n\nmultinomial_rng:\n\n\n(vector theta, int N) : array[] int (multivariate_discrete_distributions.html)\n\n\nmultiply_lower_tri_self_transpose:\n\n\n(matrix x) : matrix (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#n", + "href": "functions-reference/functions_index.html#n", + "title": "Alphabetical Index", + "section": "", + "text": "neg_binomial:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_cdf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lccdf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lcdf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_glm:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_glm_lpmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_glm_lupmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta, real phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_lpmf:\n\n\n(ints n | reals eta, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_lupmf:\n\n\n(ints n | reals eta, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_log_rng:\n\n\n(reals eta, reals phi) : R (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lpmf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_lupmf:\n\n\n(ints n | reals mu, reals phi) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_2_rng:\n\n\n(reals mu, reals phi) : R (unbounded_discrete_distributions.html)\n\n\nneg_binomial_cdf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lccdf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lcdf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lpmf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_lupmf:\n\n\n(ints n | reals alpha, reals beta) : real (unbounded_discrete_distributions.html)\n\n\nneg_binomial_rng:\n\n\n(reals alpha, reals beta) : R (unbounded_discrete_distributions.html)\n\n\nnegative_infinity:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nnorm:\n\n\n(complex z) : real (complex-valued_basic_functions.html)\n\n\nnorm1:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(row_vector x) : real (array_operations.html)\n\n\n(vector x) : real (array_operations.html)\n\n\nnorm2:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(row_vector x) : real (array_operations.html)\n\n\n(vector x) : real (array_operations.html)\n\n\nnormal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nnormal_cdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_id_glm:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nnormal_id_glm_lpdf:\n\n\n(real y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_id_glm_lupdf:\n\n\n(real y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(real y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, real alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | matrix x, vector alpha, vector beta, vector sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, real alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\n(vector y | row_vector x, vector alpha, vector beta, real sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lccdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lcdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lpdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_lupdf:\n\n\n(reals y | reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nnormal_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nnot_a_number:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nnum_elements:\n\n\n(array[] T x) : int (array_operations.html)\n\n\n(complex_matrix x) : int (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#o", + "href": "functions-reference/functions_index.html#o", + "title": "Alphabetical Index", + "section": "", + "text": "ode_adams:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_adams_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\node_adjoint_tol_ctl:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol_forward, data vector abs_tol_forward, data real rel_tol_backward, data vector abs_tol_backward, data real rel_tol_quadrature, data real abs_tol_qudrature, int max_num_steps, int num_steps_between_checkpoints, int interpolation_polynomial, int solver_forward, int solver_backward,...) : array[] vector (higher-order_functions.html)\n\n\node_bdf:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_bdf_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\node_ckrk:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_ckrk_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\node_rk45:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, ...) : array[] vector (higher-order_functions.html)\n\n\node_rk45_tol:\n\n\n(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) : array[] vector (higher-order_functions.html)\n\n\noffset_multiplier_constrain:\n\n\n(reals y, reals offset, reals mult) : reals (transform_functions.html)\n\n\noffset_multiplier_jacobian:\n\n\n(reals y, reals offset, reals mult) : reals (transform_functions.html)\n\n\noffset_multiplier_unconstrain:\n\n\n(reals x, reals offset, reals mult) : reals (transform_functions.html)\n\n\none_hot_array:\n\n\n(int n, int k) : array[] real (matrix_operations.html)\n\n\none_hot_int_array:\n\n\n(int n, int k) : array[] int (matrix_operations.html)\n\n\none_hot_row_vector:\n\n\n(int n, int k) : row_vector (matrix_operations.html)\n\n\none_hot_vector:\n\n\n(int K, int k) : vector (matrix_operations.html)\n\n\nones_array:\n\n\n(int n) : array[] real (matrix_operations.html)\n\n\nones_int_array:\n\n\n(int n) : array[] int (matrix_operations.html)\n\n\nones_row_vector:\n\n\n(int n) : row_vector (matrix_operations.html)\n\n\nones_vector:\n\n\n(int n) : vector (matrix_operations.html)\n\n\noperator!:\n\n\n(int x) : int (real-valued_basic_functions.html)\n\n\n(real x) : int (real-valued_basic_functions.html)\n\n\noperator!=:\n\n\n(complex x, complex y) : int (complex-valued_basic_functions.html)\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator%:\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\noperator%/%:\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\noperator&&:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\n<a id=‘operator’’ href=‘#operator’’ class=‘anchored unlink’>operator’:\n\n\n(complex_matrix x) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\n(row_vector x) : vector (matrix_operations.html)\n\n\n(vector x) : row_vector (matrix_operations.html)\n\n\noperator*:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_matrix y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_vector y) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_row_vector y) : complex_matrix (complex_matrix_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(matrix x, vector y) : vector (matrix_operations.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, matrix y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, vector y) : real (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, row_vector y) : matrix (matrix_operations.html)\n\n\noperator*=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator+:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x) : real (real-valued_basic_functions.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator+=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator-:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(complex_matrix x) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x) : real (real-valued_basic_functions.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(T x) : T (complex-valued_basic_functions.html)\n\n\n(T x) : T (complex_matrix_operations.html)\n\n\n(T x) : T (integer-valued_basic_functions.html)\n\n\n(T x) : T (matrix_operations.html)\n\n\n(T x) : T (real-valued_basic_functions.html)\n\n\n(vector x) : vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator-=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator.*:\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator.*=:\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator./:\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator./=:\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator.^:\n\n\n( complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n( complex_matrix x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_matrix y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x, matrix y) : matrix (matrix_operations.html)\n\n\n(real x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(real x, vector y) : vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\noperator/:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(complex_matrix B, complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_matrix x, complex y) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector b, complex_matrix A) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex y) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex y) : complex_vector (complex_matrix_operations.html)\n\n\n(int x, int y) : int (integer-valued_basic_functions.html)\n\n\n(matrix B, matrix A) : matrix (matrix_operations.html)\n\n\n(matrix x, real y) : matrix (matrix_operations.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(row_vector b, matrix A) : row_vector (matrix_operations.html)\n\n\n(row_vector x, real y) : row_vector (matrix_operations.html)\n\n\n(vector x, real y) : vector (matrix_operations.html)\n\n\noperator/=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\n(T x, U y) : void (compound_arithmetic_and_assignment.html)\n\n\noperator<:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator<=:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator=:\n\n\n(complex x, complex y) : void (complex-valued_basic_functions.html)\n\n\noperator==:\n\n\n(complex x, complex y) : int (complex-valued_basic_functions.html)\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator>:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator>=:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\noperator\\:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector b) : vector (matrix_operations.html)\n\n\noperator^:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\noperator||:\n\n\n(int x, int y) : int (real-valued_basic_functions.html)\n\n\n(real x, real y) : int (real-valued_basic_functions.html)\n\n\nordered_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nordered_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nordered_logistic:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nordered_logistic_glm_lpmf:\n\n\n(array[] int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_glm_lupmf:\n\n\n(array[] int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\n(int y | row_vector x, vector beta, vector c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_lpmf:\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_lupmf:\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_logistic_rng:\n\n\n(real eta, vector c) : int (bounded_discrete_distributions.html)\n\n\nordered_probit:\n\n\ndistribution statement (bounded_discrete_distributions.html)\n\n\nordered_probit_lpmf:\n\n\n(ints k | real eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_probit_lupmf:\n\n\n(ints k | real eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\n(ints k | vector eta, vectors c) : real (bounded_discrete_distributions.html)\n\n\nordered_probit_rng:\n\n\n(real eta, vector c) : int (bounded_discrete_distributions.html)\n\n\nordered_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nowens_t:\n\n\n(real h, real a) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#p", + "href": "functions-reference/functions_index.html#p", + "title": "Alphabetical Index", + "section": "", + "text": "pareto:\n\n\ndistribution statement (positive_lower-bounded_distributions.html)\n\n\npareto_cdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lccdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lcdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lpdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_lupdf:\n\n\n(reals y | reals y_min, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_rng:\n\n\n(reals y_min, reals alpha) : R (positive_lower-bounded_distributions.html)\n\n\npareto_type_2:\n\n\ndistribution statement (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_cdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lccdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lcdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lpdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_lupdf:\n\n\n(reals y | reals mu, reals lambda, reals alpha) : real (positive_lower-bounded_distributions.html)\n\n\npareto_type_2_rng:\n\n\n(reals mu, reals lambda, reals alpha) : R (positive_lower-bounded_distributions.html)\n\n\nPhi:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nPhi_approx:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\npi:\n\n\n() : real (real-valued_basic_functions.html)\n\n\npoisson:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\npoisson_cdf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_lccdf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_lcdf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\npoisson_log_glm:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\npoisson_log_glm_lpmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_glm_lupmf:\n\n\n(array[] int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(array[] int y | row_vector x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, real alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\n(int y | matrix x, vector alpha, vector beta) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_lpmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_lupmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\npoisson_log_rng:\n\n\n(reals alpha) : R (unbounded_discrete_distributions.html)\n\n\npoisson_lpmf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_lupmf:\n\n\n(ints n | reals lambda) : real (unbounded_discrete_distributions.html)\n\n\npoisson_rng:\n\n\n(reals lambda) : R (unbounded_discrete_distributions.html)\n\n\npolar:\n\n\n(real r, real theta) : complex (complex-valued_basic_functions.html)\n\n\npositive_infinity:\n\n\n() : real (real-valued_basic_functions.html)\n\n\npositive_ordered_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\npositive_ordered_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\npositive_ordered_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\npow:\n\n\n(complex x, complex y) : complex (complex-valued_basic_functions.html)\n\n\n(real x, real y) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : Z (complex-valued_basic_functions.html)\n\n\nprint:\n\n\n(T1 x1,..., TN xN) : void (void_functions.html)\n\n\nprod:\n\n\n(array[] int x) : real (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(complex_matrix x) : complex (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex (complex_matrix_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nproj:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#q", + "href": "functions-reference/functions_index.html#q", + "title": "Alphabetical Index", + "section": "", + "text": "qr:\n\n\n(matrix A) : tuple(matrix, matrix) (matrix_operations.html)\n\n\nqr_Q:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nqr_R:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nqr_thin:\n\n\n(matrix A) : tuple(matrix, matrix) (matrix_operations.html)\n\n\nqr_thin_Q:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nqr_thin_R:\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nquad_form:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector B) : real (matrix_operations.html)\n\n\nquad_form_diag:\n\n\n(matrix m, row_vector rv) : matrix (matrix_operations.html)\n\n\n(matrix m, vector v) : matrix (matrix_operations.html)\n\n\nquad_form_sym:\n\n\n(matrix A, matrix B) : matrix (matrix_operations.html)\n\n\n(matrix A, vector B) : real (matrix_operations.html)\n\n\nquantile:\n\n\n(data array[] real x, data array[] real p) : array[] real (array_operations.html)\n\n\n(data array[] real x, data real p) : real (array_operations.html)\n\n\n(data row_vector x, data array[] real p) : array[] real (matrix_operations.html)\n\n\n(data row_vector x, data real p) : real (matrix_operations.html)\n\n\n(data vector x, data array[] real p) : array[] real (matrix_operations.html)\n\n\n(data vector x, data real p) : real (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#r", + "href": "functions-reference/functions_index.html#r", + "title": "Alphabetical Index", + "section": "", + "text": "rank:\n\n\n(array[] int v, int s) : int (array_operations.html)\n\n\n(array[] real v, int s) : int (array_operations.html)\n\n\n(row_vector v, int s) : int (matrix_operations.html)\n\n\n(vector v, int s) : int (matrix_operations.html)\n\n\nrayleigh:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nrayleigh_cdf:\n\n\n(real y | real sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lccdf:\n\n\n(real y | real sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lcdf:\n\n\n(real y | real sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lpdf:\n\n\n(reals y | reals sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_lupdf:\n\n\n(reals y | reals sigma) : real (positive_continuous_distributions.html)\n\n\nrayleigh_rng:\n\n\n(reals sigma) : R (positive_continuous_distributions.html)\n\n\nreduce_sum:\n\n\n(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...) : real (higher-order_functions.html)\n\n\nreduce_sum_static:\n\n\n(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...) : real (higher-order_functions.html)\n\n\nreject:\n\n\n(T1 x1,..., TN xN) : void (void_functions.html)\n\n\nrep_array:\n\n\n(T x, int k, int m, int n) : array[,,] T (array_operations.html)\n\n\n(T x, int m, int n) : array[,] T (array_operations.html)\n\n\n(T x, int n) : array[] T (array_operations.html)\n\n\nrep_matrix:\n\n\n(complex z, int m, int n) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_row_vector rv, int m) : complex_matrix (complex_matrix_operations.html)\n\n\n(complex_vector v, int n) : complex_matrix (complex_matrix_operations.html)\n\n\n(real x, int m, int n) : matrix (matrix_operations.html)\n\n\n(row_vector rv, int m) : matrix (matrix_operations.html)\n\n\n(vector v, int n) : matrix (matrix_operations.html)\n\n\nrep_row_vector:\n\n\n(complex z, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(real x, int n) : row_vector (matrix_operations.html)\n\n\nrep_vector:\n\n\n(complex z, int m) : complex_vector (complex_matrix_operations.html)\n\n\n(real x, int m) : vector (matrix_operations.html)\n\n\nreverse:\n\n\n(array[] T v) : array[] T (array_operations.html)\n\n\n(complex_row_vector v) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector v) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\nrising_factorial:\n\n\n(real x, int n) : real (real-valued_basic_functions.html)\n\n\n(T1 x, T2 y) : R (real-valued_basic_functions.html)\n\n\nround:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nrow:\n\n\n(complex_matrix x, int m) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x, int m) : row_vector (matrix_operations.html)\n\n\nrows:\n\n\n(complex_matrix x) : int (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\nrows_dot_product:\n\n\n(complex_matrix x, complex_matrix y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x, complex_row_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x, complex_vector y) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, matrix y) : vector (matrix_operations.html)\n\n\n(row_vector x, row_vector y) : vector (matrix_operations.html)\n\n\n(vector x, vector y) : vector (matrix_operations.html)\n\n\nrows_dot_self:\n\n\n(complex_matrix x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x) : vector (matrix_operations.html)\n\n\n(row_vector x) : vector (matrix_operations.html)\n\n\n(vector x) : vector (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#s", + "href": "functions-reference/functions_index.html#s", + "title": "Alphabetical Index", + "section": "", + "text": "scale_matrix_exp_multiply:\n\n\n(real t, matrix A, matrix B) : matrix (matrix_operations.html)\n\n\nscaled_inv_chi_square:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_cdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lccdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lcdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lpdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_lupdf:\n\n\n(reals y | reals nu, reals sigma) : real (positive_continuous_distributions.html)\n\n\nscaled_inv_chi_square_rng:\n\n\n(reals nu, reals sigma) : R (positive_continuous_distributions.html)\n\n\nsd:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nsegment:\n\n\n(array[] T sv, int i, int n) : array[] T (matrix_operations.html)\n\n\n(complex_row_vector rv, int i, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v, int i, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv, int i, int n) : row_vector (matrix_operations.html)\n\n\n(vector v, int i, int n) : vector (matrix_operations.html)\n\n\nsimplex_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsimplex_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsimplex_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nsin:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsingular_values:\n\n\n(complex_matrix A) : vector (complex_matrix_operations.html)\n\n\n(matrix A) : vector (matrix_operations.html)\n\n\nsinh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsize:\n\n\n(array[] T x) : int (array_operations.html)\n\n\n(complex_row_vector x) : int (complex_matrix_operations.html)\n\n\n(complex_vector x) : int (complex_matrix_operations.html)\n\n\n(int x) : int (integer-valued_basic_functions.html)\n\n\n(matrix x) : int (complex_matrix_operations.html)\n\n\n(matrix x) : int (matrix_operations.html)\n\n\n(real x) : int (integer-valued_basic_functions.html)\n\n\n(row_vector x) : int (matrix_operations.html)\n\n\n(vector x) : int (matrix_operations.html)\n\n\nskew_double_exponential:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_cdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lccdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lcdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lpdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_lupdf:\n\n\n(reals y | reals mu, reals sigma, reals tau) : real (unbounded_continuous_distributions.html)\n\n\nskew_double_exponential_rng:\n\n\n(reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nskew_normal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nskew_normal_cdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lccdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lcdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lpdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_lupdf:\n\n\n(reals y | reals xi, reals omega, reals alpha) : real (unbounded_continuous_distributions.html)\n\n\nskew_normal_rng:\n\n\n(reals xi, reals omega, real alpha) : R (unbounded_continuous_distributions.html)\n\n\nsoftmax:\n\n\n(vector x) : vector (matrix_operations.html)\n\n\nsolve_newton:\n\n\n(function algebra_system, vector y_guess, ...) : vector (higher-order_functions.html)\n\n\nsolve_newton_tol:\n\n\n(function algebra_system, vector y_guess, data real scaling_step, data real f_tol, int max_steps, ...) : vector (higher-order_functions.html)\n\n\nsolve_powell:\n\n\n(function algebra_system, vector y_guess, ...) : vector (higher-order_functions.html)\n\n\nsolve_powell_tol:\n\n\n(function algebra_system, vector y_guess, data real rel_tol, data real f_tol, int max_steps, ...) : vector (higher-order_functions.html)\n\n\nsort_asc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] real (array_operations.html)\n\n\n(row_vector v) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\nsort_desc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] real (array_operations.html)\n\n\n(row_vector v) : row_vector (matrix_operations.html)\n\n\n(vector v) : vector (matrix_operations.html)\n\n\nsort_indices_asc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] int (array_operations.html)\n\n\n(row_vector v) : array[] int (matrix_operations.html)\n\n\n(vector v) : array[] int (matrix_operations.html)\n\n\nsort_indices_desc:\n\n\n(array[] int v) : array[] int (array_operations.html)\n\n\n(array[] real v) : array[] int (array_operations.html)\n\n\n(row_vector v) : array[] int (matrix_operations.html)\n\n\n(vector v) : array[] int (matrix_operations.html)\n\n\nsqrt:\n\n\n(complex x) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsqrt2:\n\n\n() : real (real-valued_basic_functions.html)\n\n\nsquare:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nsquared_distance:\n\n\n(row_vector x, row_vector y) : real (array_operations.html)\n\n\n(row_vector x, vector y) : real (array_operations.html)\n\n\n(vector x, row_vector y) : real (array_operations.html)\n\n\n(vector x, vector y) : real (array_operations.html)\n\n\nstd_normal:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nstd_normal_cdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_lccdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_lcdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_log_qf:\n\n\n(T x) : R (unbounded_continuous_distributions.html)\n\n\nstd_normal_lpdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_lupdf:\n\n\n(reals y) : real (unbounded_continuous_distributions.html)\n\n\nstd_normal_qf:\n\n\n(T x) : R (unbounded_continuous_distributions.html)\n\n\nstd_normal_rng:\n\n\n() : real (unbounded_continuous_distributions.html)\n\n\nstep:\n\n\n(real x) : real (real-valued_basic_functions.html)\n\n\nstochastic_column_constrain:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_column_jacobian:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_column_unconstrain:\n\n\n(matrices x) : matrices (transform_functions.html)\n\n\nstochastic_row_constrain:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_row_jacobian:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\nstochastic_row_unconstrain:\n\n\n(matrices x) : matrices (transform_functions.html)\n\n\nstudent_t:\n\n\ndistribution statement (unbounded_continuous_distributions.html)\n\n\nstudent_t_cdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lccdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lcdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lpdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_lupdf:\n\n\n(reals y | reals nu, reals mu, reals sigma) : real (unbounded_continuous_distributions.html)\n\n\nstudent_t_rng:\n\n\n(reals nu, reals mu, reals sigma) : R (unbounded_continuous_distributions.html)\n\n\nsub_col:\n\n\n(complex_matrix x, int i, int j, int n_rows) : complex_vector (complex_matrix_operations.html)\n\n\n(matrix x, int i, int j, int n_rows) : vector (matrix_operations.html)\n\n\nsub_row:\n\n\n(complex_matrix x, int i, int j, int n_cols) : complex_row_vector (complex_matrix_operations.html)\n\n\n(matrix x, int i, int j, int n_cols) : row_vector (matrix_operations.html)\n\n\nsum:\n\n\n(array[] complex x) : complex (array_operations.html)\n\n\n(array[] int x) : int (array_operations.html)\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(complex_matrix x) : complex (complex_matrix_operations.html)\n\n\n(complex_row_vector x) : complex (complex_matrix_operations.html)\n\n\n(complex_vector x) : complex (complex_matrix_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nsum_to_zero_constrain:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsum_to_zero_jacobian:\n\n\n(matrices y) : matrices (transform_functions.html)\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nsum_to_zero_unconstrain:\n\n\n(matrices x) : matrices (transform_functions.html)\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nsvd:\n\n\n(complex_matrix A) : tuple(complex_matrix, vector, complex_matrix) (complex_matrix_operations.html)\n\n\n(matrix A) : tuple(matrix, vector, matrix) (matrix_operations.html)\n\n\nsvd_U:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nsvd_V:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)\n\n\nsymmetrize_from_lower_tri:\n\n\n(complex_matrix A) : complex_matrix (complex_matrix_operations.html)\n\n\n(matrix A) : matrix (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#t", + "href": "functions-reference/functions_index.html#t", + "title": "Alphabetical Index", + "section": "", + "text": "tail:\n\n\n(array[] T sv, int n) : array[] T (matrix_operations.html)\n\n\n(complex_row_vector rv, int n) : complex_row_vector (complex_matrix_operations.html)\n\n\n(complex_vector v, int n) : complex_vector (complex_matrix_operations.html)\n\n\n(row_vector rv, int n) : row_vector (matrix_operations.html)\n\n\n(vector v, int n) : vector (matrix_operations.html)\n\n\ntan:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ntanh:\n\n\n(complex z) : complex (complex-valued_basic_functions.html)\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ntarget:\n\n\n() : real (real-valued_basic_functions.html)\n\n\ntcrossprod:\n\n\n(matrix x) : matrix (matrix_operations.html)\n\n\ntgamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\nto_array_1d:\n\n\n(array[...] complex a) : array[] complex (mixed_operations.html)\n\n\n(array[...] int a) : array[] int (mixed_operations.html)\n\n\n(array[...] real a) : array[] real (mixed_operations.html)\n\n\n(complex_matrix m) : array[] complex (mixed_operations.html)\n\n\n(complex_row_vector v) : array[] complex (mixed_operations.html)\n\n\n(complex_vector v) : array[] real (mixed_operations.html)\n\n\n(matrix m) : array[] real (mixed_operations.html)\n\n\n(row_vector v) : array[] real (mixed_operations.html)\n\n\n(vector v) : array[] real (mixed_operations.html)\n\n\nto_array_2d:\n\n\n(complex_matrix m) : array[,] real (mixed_operations.html)\n\n\n(matrix m) : array[,] real (mixed_operations.html)\n\n\nto_complex:\n\n\n() : complex (complex-valued_basic_functions.html)\n\n\n(real re) : complex (complex-valued_basic_functions.html)\n\n\n(real re, real im) : complex (complex-valued_basic_functions.html)\n\n\n(T1 re, T2 im) : Z (complex-valued_basic_functions.html)\n\n\nto_int:\n\n\n(data real x) : int (integer-valued_basic_functions.html)\n\n\nto_matrix:\n\n\n(array[,] complex a ) : complex_matrix (mixed_operations.html)\n\n\n(array[,] int a) : matrix (mixed_operations.html)\n\n\n(array[,] real a) : matrix (mixed_operations.html)\n\n\n(array[] complex a, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(array[] complex a, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(array[] complex_row_vector vs) : complex_matrix (mixed_operations.html)\n\n\n(array[] int a, int m, int n) : matrix (mixed_operations.html)\n\n\n(array[] int a, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(array[] real a, int m, int n) : matrix (mixed_operations.html)\n\n\n(array[] real a, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(array[] row_vector vs) : matrix (mixed_operations.html)\n\n\n(complex_matrix A, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(complex_matrix m) : complex_matrix (mixed_operations.html)\n\n\n(complex_matrix M, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(complex_row_vector v) : complex_matrix (mixed_operations.html)\n\n\n(complex_row_vector v, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(complex_row_vector v, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(complex_vector v) : complex_matrix (mixed_operations.html)\n\n\n(complex_vector v, int m, int n) : complex_matrix (mixed_operations.html)\n\n\n(complex_vector v, int m, int n, int col_major) : complex_matrix (mixed_operations.html)\n\n\n(matrix A, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(matrix m) : matrix (mixed_operations.html)\n\n\n(matrix M, int m, int n) : matrix (mixed_operations.html)\n\n\n(row_vector v) : matrix (mixed_operations.html)\n\n\n(row_vector v, int m, int n) : matrix (mixed_operations.html)\n\n\n(row_vector v, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\n(vector v) : matrix (mixed_operations.html)\n\n\n(vector v, int m, int n) : matrix (mixed_operations.html)\n\n\n(vector v, int m, int n, int col_major) : matrix (mixed_operations.html)\n\n\nto_row_vector:\n\n\n(array[] complex a) : complex_row_vector (mixed_operations.html)\n\n\n(array[] int a) : row_vector (mixed_operations.html)\n\n\n(array[] real a) : row_vector (mixed_operations.html)\n\n\n(complex_matrix m) : complex_row_vector (mixed_operations.html)\n\n\n(complex_row_vector v) : complex_row_vector (mixed_operations.html)\n\n\n(complex_vector v) : complex_row_vector (mixed_operations.html)\n\n\n(matrix m) : row_vector (mixed_operations.html)\n\n\n(row_vector v) : row_vector (mixed_operations.html)\n\n\n(vector v) : row_vector (mixed_operations.html)\n\n\nto_vector:\n\n\n(array[] complex a) : complex_vector (mixed_operations.html)\n\n\n(array[] int a) : vector (mixed_operations.html)\n\n\n(array[] real a) : vector (mixed_operations.html)\n\n\n(complex_matrix m) : complex_vector (mixed_operations.html)\n\n\n(complex_row_vector v) : complex_vector (mixed_operations.html)\n\n\n(complex_vector v) : complex_vector (mixed_operations.html)\n\n\n(matrix m) : vector (mixed_operations.html)\n\n\n(row_vector v) : vector (mixed_operations.html)\n\n\n(vector v) : vector (mixed_operations.html)\n\n\ntrace:\n\n\n(complex_matrix A) : complex (complex_matrix_operations.html)\n\n\n(matrix A) : real (matrix_operations.html)\n\n\ntrace_dot:\n\n\n(matrix A, matrix B) : real (matrix_operations.html)\n\n\ntrace_gen_quad_form:\n\n\n(matrix D ,matrix A, matrix B) : real (matrix_operations.html)\n\n\ntrace_quad_form:\n\n\n(matrix A, matrix B) : real (matrix_operations.html)\n\n\n(matrix A, vector B) : real (matrix_operations.html)\n\n\ntrigamma:\n\n\n(T x) : R (real-valued_basic_functions.html)\n\n\ntrunc:\n\n\n(T x) : R (real-valued_basic_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#u", + "href": "functions-reference/functions_index.html#u", + "title": "Alphabetical Index", + "section": "", + "text": "uniform:\n\n\ndistribution statement (bounded_continuous_distributions.html)\n\n\nuniform_cdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lccdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lcdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lpdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_lupdf:\n\n\n(reals y | reals alpha, reals beta) : real (bounded_continuous_distributions.html)\n\n\nuniform_rng:\n\n\n(reals alpha, reals beta) : R (bounded_continuous_distributions.html)\n\n\nuniform_simplex:\n\n\n(int n) : vector (matrix_operations.html)\n\n\nunit_vectors_constrain:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nunit_vectors_jacobian:\n\n\n(vectors y) : vectors (transform_functions.html)\n\n\nunit_vectors_unconstrain:\n\n\n(vectors x) : vectors (transform_functions.html)\n\n\nupper_bound_constrain:\n\n\n(reals y, reals ub) : reals (transform_functions.html)\n\n\nupper_bound_jacobian:\n\n\n(reals x, reals ub) : reals (transform_functions.html)\n\n\nupper_bound_unconstrain:\n\n\n(reals x, reals ub) : reals (transform_functions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#v", + "href": "functions-reference/functions_index.html#v", + "title": "Alphabetical Index", + "section": "", + "text": "variance:\n\n\n(array[] real x) : real (array_operations.html)\n\n\n(matrix x) : real (matrix_operations.html)\n\n\n(row_vector x) : real (matrix_operations.html)\n\n\n(vector x) : real (matrix_operations.html)\n\n\nvon_mises:\n\n\ndistribution statement (circular_distributions.html)\n\n\nvon_mises_cdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lccdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lcdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lpdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_lupdf:\n\n\n(reals y | reals mu, reals kappa) : real (circular_distributions.html)\n\n\nvon_mises_rng:\n\n\n(reals mu, reals kappa) : R (circular_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#w", + "href": "functions-reference/functions_index.html#w", + "title": "Alphabetical Index", + "section": "", + "text": "weibull:\n\n\ndistribution statement (positive_continuous_distributions.html)\n\n\nweibull_cdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lccdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lcdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lpdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_lupdf:\n\n\n(reals y | reals alpha, reals sigma) : real (positive_continuous_distributions.html)\n\n\nweibull_rng:\n\n\n(reals alpha, reals sigma) : R (positive_continuous_distributions.html)\n\n\nwiener:\n\n\ndistribution statement (positive_lower-bounded_distributions.html)\n\n\nwiener_lccdf_unnorm:\n\n\n(real y, real alpha, real tau, real beta, real delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\nwiener_lcdf_unnorm:\n\n\n(real y, real alpha, real tau, real beta, real delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\nwiener_lpdf:\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\n(reals y | reals alpha, reals tau, reals beta, reals delta) : real (positive_lower-bounded_distributions.html)\n\n\nwiener_lupdf:\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta) : real (positive_lower-bounded_distributions.html)\n\n\n(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) : real (positive_lower-bounded_distributions.html)\n\n\n(reals y | reals alpha, reals tau, reals beta, reals delta) : real (positive_lower-bounded_distributions.html)\n\n\nwishart:\n\n\ndistribution statement (covariance_matrix_distributions.html)\n\n\nwishart_cholesky_lpdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\nwishart_cholesky_lupdf:\n\n\n(matrix L_W | real nu, matrix L_S) : real (covariance_matrix_distributions.html)\n\n\nwishart_cholesky_rng:\n\n\n(real nu, matrix L_S) : matrix (covariance_matrix_distributions.html)\n\n\nwishart_lpdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\nwishart_lupdf:\n\n\n(matrix W | real nu, matrix Sigma) : real (covariance_matrix_distributions.html)\n\n\nwishart_rng:\n\n\n(real nu, matrix Sigma) : matrix (covariance_matrix_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#y", + "href": "functions-reference/functions_index.html#y", + "title": "Alphabetical Index", + "section": "", + "text": "yule_simon:\n\n\ndistribution statement (unbounded_discrete_distributions.html)\n\n\nyule_simon_cdf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lccdf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lcdf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lpmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_lupmf:\n\n\n(ints n | reals alpha) : real (unbounded_discrete_distributions.html)\n\n\nyule_simon_rng:\n\n\n(reals alpha) : R (unbounded_discrete_distributions.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/functions_index.html#z", + "href": "functions-reference/functions_index.html#z", + "title": "Alphabetical Index", + "section": "", + "text": "zeros_array:\n\n\n(int n) : array[] real (matrix_operations.html)\n\n\nzeros_int_array:\n\n\n(int n) : array[] int (matrix_operations.html)\n\n\nzeros_row_vector:\n\n\n(int n) : row_vector (matrix_operations.html)\n\n\nzeros_vector:\n\n\n(int n) : vector (matrix_operations.html)", + "crumbs": [ + "Functions Reference", + "Appendix", + "Alphabetical Index" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html", + "href": "functions-reference/higher-order_functions.html", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides a few higher-order functions that act on other functions. In all cases, the function arguments to the higher-order functions are defined as functions within the Stan language and passed by name to the higher-order functions.\n\n\nStan provides two built-in algebraic equation solvers, respectively based on the Newton method and the Powell “dog leg” hybrid method. Empirically the Newton method is found to be faster and its use is recommended for most problems.\nAn algebraic solver is a higher-order function, i.e. it takes another function as one of its arguments. Other functions in Stan which share this feature are the differential equation solvers (see section Ordinary Differential Equation (ODE) Solvers and Differential Algebraic Equation (DAE) solver). Ordinary Stan functions do not allow functions as arguments.\n\n\nAn algebraic system is specified as an ordinary function in Stan within the function block. The function must return a vector and takes in, as its first argument, the unknowns \\(y\\) we wish to solve for, also passed as a vector. This argument is followed by additional arguments as specified by the user; we call such arguments variadic arguments and denote them .... The signature of the algebraic system is then:\n vector algebra_system (vector y, ...)\nThere is no type restriction for the variadic arguments and each argument can be passed as data or parameter. However users should use parameter arguments only when necessary and mark data arguments with the keyword data. In the below example, the last variadic argument, \\(x\\), is restricted to being data:\n vector algebra_system (vector y, vector theta, data vector x)\nDistinguishing data and parameter is important for computational reasons. Augmenting the total number of parameters increases the cost of propagating derivatives through the solution to the algebraic equation, and ultimately the computational cost of evaluating the gradients.\n\n\n\n \n vector solve_newton(function algebra_system, vector y_guess, ...) Solves the algebraic system, given an initial guess, using Newton’s method.\nAvailable since 2.31\n \n vector solve_newton_tol(function algebra_system, vector y_guess, data real scaling_step, data real f_tol, int max_steps, ...) Solves the algebraic system, given an initial guess, using Newton’s method with additional control parameters for the solver.\nAvailable since 2.31\n \n vector solve_powell(function algebra_system, vector y_guess, ...) Solves the algebraic system, given an initial guess, using Powell’s hybrid method.\nAvailable since 2.31\n \n vector solve_powell_tol(function algebra_system, vector y_guess, data real rel_tol, data real f_tol, int max_steps, ...) Solves the algebraic system, given an initial guess, using Powell’s hybrid method with additional control parameters for the solver.\nAvailable since 2.31\n\n\nThe arguments to the algebraic solvers are as follows:\n\nalgebra_system: function literal referring to a function specifying the system of algebraic equations with signature (vector, ...):vector. The arguments represent (1) unknowns, (2) additional parameter and/or data arguments, and the return value contains the value of the algebraic function, which goes to 0 when we plug in the solution to the algebraic system,\ny_guess: initial guess for the solution, type vector,\n...: variadic arguments.\n\nThe algebraic solvers admit control parameters. While Stan provides default values, the user should be prepared to adjust the control parameters. The following controls are available:\n\nscaling_step: for the Newton solver only, the scaled-step stopping tolerance, type real, data only. If a Newton step is smaller than the scaling step tolerance, the code breaks, assuming the solver is no longer making significant progress. If set to 0, this constraint is ignored. Default value is \\(10^{-3}\\).\nrel_tol: for the Powell solver only, the relative tolerance, type real, data only. The relative tolerance is the estimated relative error of the solver and serves to test if a satisfactory solution has been found. Default value is \\(10^{-10}\\).\nfunction_tol: function tolerance for the algebraic solver, type real, data only. After convergence of the solver, the proposed solution is plugged into the algebraic system and its norm is compared to the function tolerance. If the norm is below the function tolerance, the solution is deemed acceptable. Default value is \\(10^{-6}\\).\nmax_num_steps: maximum number of steps to take in the algebraic solver, type int, data only. If the solver reaches this number of steps, it breaks and returns an error message. Default value is \\(200\\).\n\nThe difference in which control parameters are available has to do with the underlying implementations for the solvers and the control parameters these implementations support. The Newton solver is based on KINSOL from the SUNDIAL suites, while the Powell solver uses a module from the Eigen library.\n\n\n\nThe return value for the algebraic solver is an object of type vector, with values which, when plugged in as y make the algebraic function go to 0 (approximately, within the specified function tolerance).\n\n\n\nCertain sizes have to be consistent. The initial guess, return value of the solver, and return value of the algebraic function must all be the same size.\n\n\n\nStan offers two methods to solve algebraic equations. solve_newton and solve_newton_tol use the Newton method, a first-order derivative based numerical solver. The Stan code builds on the implementation in KINSOL from the SUNDIALS suite (Hindmarsh et al. 2005). For many problems, we find that the Newton method is faster than the Powell method. If however Newton’s method performs poorly, either failing to or requiring an excessively long time to converge, the user should be prepared to switch to the Powell method.\nsolve_powell and solve_powell_tol are based on the Powell hybrid method (Powell 1970), which also uses first-order derivatives. The Stan code builds on the implementation of the hybrid solver in the unsupported module for nonlinear optimization problems of the Eigen library (Guennebaud, Jacob, et al. 2010). This solver is in turn based on the algorithm developed for the package MINPACK-1 (Jorge J. More 1980).\nFor both solvers, derivatives are propagated through the solution to the algebraic solution using the implicit function theorem and an adjoint method of automatic differentiation; for a discussion on this topic, see (Gaebler 2021) and (Margossian and Betancourt 2022).\n\n\n\n\n\nStan provides several higher order functions for solving initial value problems specified as Ordinary Differential Equations (ODEs).\nSolving an initial value ODE means given a set of differential equations \\(y'(t, \\theta) = f(t, y, \\theta)\\) and initial conditions \\(y(t_0, \\theta)\\), solving for \\(y\\) at a sequence of times \\(t_0 < t_1 < t_2, \\cdots < t_n\\). \\(f(t, y, \\theta)\\) is referred to here as the ODE system function.\n\\(f(t, y, \\theta)\\) will be defined as a function with a certain signature and provided along with the initial conditions and output times to one of the ODE solver functions.\nTo make it easier to write ODEs, the solve functions take extra arguments that are passed along unmodified to the user-supplied system function. Because there can be any number of these arguments and they can be of different types, they are denoted below as .... The types of the arguments represented by ... in the ODE solve function call must match the types of the arguments represented by ... in the user-supplied system function.\n\n\n \n\narray[] vector ode_rk45(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.\nAvailable since 2.24\n \n\narray[] vector ode_rk45_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method with additional control parameters for the solver.\nAvailable since 2.24\n \n\narray[] vector ode_ckrk(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the Cash-Karp algorithm, a 4th/5th order explicit Runge-Kutta method.\nAvailable since 2.27\n \n\narray[] vector ode_ckrk_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the Cash-Karp algorithm, a 4th/5th order explicit Runge-Kutta method with additional control parameters for the solver.\nAvailable since 2.27\n \n\narray[] vector ode_adams(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the Adams-Moulton method.\nAvailable since 2.24\n \n\narray[] vector ode_adams_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the Adams-Moulton method with additional control parameters for the solver.\nAvailable since 2.24\n\n\n\n \n\narray[] vector ode_bdf(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method.\nAvailable since 2.24\n \n\narray[] vector ode_bdf_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.\nAvailable since 2.24\n\n\n\n \n\narray[] vector ode_adjoint_tol_ctl(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol_forward, data vector abs_tol_forward, data real rel_tol_backward, data vector abs_tol_backward, int max_num_steps, int num_steps_between_checkpoints, int interpolation_polynomial, int solver_forward, int solver_backward, ...)\nSolves the ODE system for the times provided using the adjoint ODE solver method from CVODES. The adjoint ODE solver requires a checkpointed forward in time ODE integration, a backwards in time integration that makes uses of an interpolated version of the forward solution, and the solution of a quadrature problem (the number of which depends on the number of parameters passed to the solve). The tolerances and numeric methods used for the forward solve, backward solve, quadratures, and interpolation can all be configured.\nAvailable since 2.27\n\n\n\nThe first argument to one of the ODE solvers is always the ODE system function. The ODE system function must have a vector return type, and the first two arguments must be a real and vector in that order. These two arguments are followed by the variadic arguments that are passed through from the ODE solve function call:\n vector ode(real time, vector state, ...)\nThe ODE system function should return the derivative of the state with respect to time at the time and state provided. The length of the returned vector must match the length of the state input into the function.\nThe arguments to this function are:\n\ntime, the time to evaluate the ODE system\nstate, the state of the ODE system at the time specified\n..., sequence of arguments passed unmodified from the ODE solve function call. The types here must match the types in the ... arguments of the ODE solve function call.\n\n\n\n\nThe arguments to the ODE solvers in both the stiff and non-stiff solvers are the same. The arguments to the adjoint ODE solver are different; see Arguments to the adjoint ODE solver.\n\node: ODE system function,\ninitial_state: initial state, type vector,\ninitial_time: initial time, type real,\ntimes: solution times, type array[] real,\n...: sequence of arguments that will be passed through unmodified to the ODE system function. The types here must match the types in the ... arguments of the ODE system function.\n\nFor the versions of the ode solver functions ending in _tol, these three parameters must be provided after times and before the ... arguments:\n\ndata rel_tol: relative tolerance for the ODE solver, type real, data only,\ndata abs_tol: absolute tolerance for the ODE solver, type real, data only, and\nmax_num_steps: maximum number of steps to take between output times in the ODE solver, type int, data only.\n\nBecause the tolerances are data arguments, they must be defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.\n\n\n\nThe arguments to the adjoint ODE solver are different from those for the other functions (for those see Arguments to the ODE solvers).\n\node: ODE system function,\ninitial_state: initial state, type vector,\ninitial_time: initial time, type real,\ntimes: solution times, type array[] real,\ndata rel_tol_forward: Relative tolerance for forward solve, type real, data only,\ndata abs_tol_forward: Absolute tolerance vector for each state for forward solve, type vector, data only,\ndata rel_tol_backward: Relative tolerance for backward solve, type real, data only,\ndata abs_tol_backward: Absolute tolerance vector for each state for backward solve, type vector, data only,\ndata rel_tol_quadrature: Relative tolerance for backward quadrature, type real, data only,\ndata abs_tol_quadrature: Absolute tolerance for backward quadrature, type real, data only,\ndata max_num_steps: Maximum number of time-steps to take in integrating the ODE solution between output time points for forward and backward solve, type int, data only,\nnum_steps_between_checkpoints: number of steps between checkpointing forward solution, type int, data only,\ninterpolation_polynomial: can be 1 for hermite or 2 for polynomial interpolation method of CVODES, type int, data only,\nsolver_forward: solver used for forward ODE problem: 1=Adams (non-stiff), 2=BDF (stiff), type int, data only,\nsolver_backward: solver used for backward ODE problem: 1=Adams (non-stiff), 2=BDF (stiff), type int, data only.\n...: sequence of arguments that will be passed through unmodified to the ODE system function. The types here must match the types in the ... arguments of the ODE system function.\n\nBecause the tolerances are data arguments, they must be defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.\n\n\nThe return value for the ODE solvers is an array of vectors (type array[] vector), one vector representing the state of the system at every time in specified in the times argument.\n\n\n\nThe sizes must match, and in particular, the following groups are of the same size:\n\nstate variables passed into the system function, derivatives returned by the system function, initial state passed into the solver, and length of each vector in the output,\nnumber of solution times and number of vectors in the output.\n\n\n\n\n\n\nStan provides two higher order functions for solving initial value problems specified as Differential-Algebraic Equations (DAEs) with index-1 (Serban et al. 2021).\nSolving an initial value DAE means given a set of residual functions \\(r(y'(t, \\theta), y(t, \\theta), t)\\) and initial conditions \\((y(t_0, \\theta), y'(t_0, \\theta))\\), solving for \\(y\\) at a sequence of times \\(t_0 < t_1 \\leq t_2, \\cdots \\leq t_n\\). The residual function \\(r(y', y, t, \\theta)\\) will be defined as a function with a certain signature and provided along with the initial conditions and output times to one of the DAE solver functions.\nSimilar to ODE solvers, the DAE solver function takes extra arguments that are passed along unmodified to the user-supplied system function. Because there can be any number of these arguments and they can be of different types, they are denoted below as ..., and the types of these arguments, also represented by ... in the DAE solver call, must match the types of the arguments represented by ... in the user-supplied system function.\n\n\n \n\narray[] vector dae(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, ...) Solves the DAE system using the backward differentiation formula (BDF) method (Serban et al. 2021).\nAvailable since 2.29\n \n\narray[] vector dae_tol(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the DAE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.\nAvailable since 2.29\n\n\n\nThe first argument to the DAE solver is the DAE residual function. The DAE residual function must have a vector return type, and the first three arguments must be a real, vector, and vector, in that order. These three arguments are followed by the variadic arguments that are passed through from the DAE solver function call:\n vector residual(real time, vector state, vector state_derivative, ...)\nThe DAE residual function should return the residuals at the time and state provided. The length of the returned vector must match the length of the state input into the function.\nThe arguments to this function are:\n\ntime, the time to evaluate the DAE system\nstate, the state of the DAE system at the time specified\nstate_derivative, the time derivatives of the state of the DAE system at the time specified\n..., sequence of arguments passed unmodified from the DAE solve function call. The types here must match the types in the ... arguments of the DAE solve function call.\n\n\n\n\nThe arguments to the DAE solver are\n\nresidual: DAE residual function,\ninitial_state: initial state, type vector,\ninitial_state_derivative: time derivative of the initial state, type vector,\ninitial_time: initial time, type data real,\ntimes: solution times, type data array[] real,\n...: sequence of arguments that will be passed through unmodified to the DAE residual function. The types here must match the types in the ... arguments of the DAE residual function.\n\nFor dae_tol, the following three parameters must be provided after times and before the ... arguments:\n\ndata rel_tol: relative tolerance for the DAE solver, type real, data only,\ndata abs_tol: absolute tolerance for the DAE solver, type real, data only, and\nmax_num_steps: maximum number of steps to take between output times in the DAE solver, type int, data only.\n\nBecause the tolerances are data arguments, they must be supplied as primitive numerics or defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.\n\n\nThe user is responsible to ensure the residual function becomes zero at the initial time, t0, when the arguments initial_state and initial_state_derivative are introduced as state and state_derivative, respectively.\n\n\n\nThe return value for the DAE solvers is an array of vectors (type array[] vector), one vector representing the state of the system at every time specified in the times argument.\n\n\n\nThe sizes must match, and in particular, the following groups are of the same size:\n\nstate variables and state derivatives passed into the residual function, the residual returned by the residual function, initial state and initial state derivatives passed into the solver, and length of each vector in the output,\nnumber of solution times and number of vectors in the output.\n\n\n\n\n\n\nStan provides a built-in mechanism to perform 1D integration of a function via quadrature methods.\nIt operates similarly to the algebraic solver and the ordinary differential equations solver in that it allows as an argument a function.\nLike both of those utilities, some of the arguments are limited to data only expressions. These expressions must not contain variables other than those declared in the data or transformed data blocks.\n\n\nPerforming a 1D integration requires the integrand to be specified somehow. This is done by defining a function in the Stan functions block with the special signature:\nreal integrand(real x, real xc, array[] real theta,\n array[] real x_r, array[] int x_i)\nThe function should return the value of the integrand evaluated at the point x.\nThe argument of this function are:\n\nx, the independent variable being integrated over\nxc, a high precision version of the distance from x to the nearest endpoint in a definite integral (for more into see section Precision Loss).\ntheta, parameter values used to evaluate the integral\nx_r, data values used to evaluate the integral\nx_i, integer data used to evaluate the integral\n\nLike algebraic solver and the differential equations solver, the 1D integrator separates parameter values, theta, from data values, x_r.\n\n\n\n \n\nreal integrate_1d (function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i) Integrates the integrand from a to b.\nAvailable since 2.23\n \n\nreal integrate_1d (function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i, real relative_tolerance) Integrates the integrand from a to b with the given relative tolerance.\nAvailable since 2.23\n\n\nThe arguments to the 1D integrator are as follows:\n\nintegrand: function literal referring to a function specifying the integrand with signature (real, real, array[] real, array[] real, array[] int):real The arguments represent\n\n\nwhere integrand is evaluated,\n\n\ndistance from evaluation point to integration limit for definite integrals,\n\n\nparameters,\n\n\nreal data\n\n\ninteger data, and the return value is the integrand evaluated at the given point,\n\n\na: left limit of integration, may be negative infinity, type real,\nb: right limit of integration, may be positive infinity, type real,\ntheta: parameters only, type array[] real,\nx_r: real data only, type array[] real,\nx_i: integer data only, type array[] int.\n\nA relative_tolerance argument can optionally be provided for more control over the algorithm:\n\nrelative_tolerance: relative tolerance for the 1d integrator, type real, data only.\n\n\n\n\nThe return value for the 1D integrator is a real, the value of the integral.\n\n\n\nFor numeric stability, integrals on the (possibly infinite) interval \\((a, b)\\) that cross zero are split into two integrals, one from \\((a, 0)\\) and one from \\((0, b)\\). Each integral is separately integrated to the given relative_tolerance.\n\n\n\nWhen integrating certain definite integrals, there can be significant precision loss in evaluating the integrand near the endpoints. This has to do with the breakdown in precision of double precision floating point values when adding or subtracting a small number from a number much larger than it in magnitude (for instance, 1.0 - x). xc (as passed to the integrand) is a high-precision version of the distance between x and the definite integral endpoints and can be used to address this issue. More information (and an example where this is useful) is given in the User’s Guide. For zero crossing integrals, xc will be a high precision version of the distance to the endpoints of the two smaller integrals. For any integral with an endpoint at negative infinity or positive infinity, xc is set to NaN.\n\n\n\nInternally the 1D integrator uses the double-exponential methods in the Boost 1D quadrature library. Boost in turn makes use of quadrature methods developed in (Takahasi and Mori 1974), (Mori 1978), (Bailey, Jeyabalan, and Li 2005), and (Tanaka et al. 2009).\nThe gradients of the integral are computed in accordance with the Leibniz integral rule. Gradients of the integrand are computed internally with Stan’s automatic differentiation.\n\n\n\n\n\nStan provides a higher-order reduce function for summation. A function which returns a scalar g: U -> real is mapped to every element of a list of type array[] U, { x1, x2, ... } and all the results are accumulated,\ng(x1) + g(x2) + ...\nFor efficiency reasons the reduce function doesn’t work with the element-wise evaluated function g itself, but instead works through evaluating partial sums, f: array[] U -> real, where:\nf({ x1 }) = g(x1)\nf({ x1, x2 }) = g(x1) + g(x2)\nf({ x1, x2, ... }) = g(x1) + g(x2) + ...\nMathematically the summation reduction is associative and forming arbitrary partial sums in an arbitrary order will not change the result. However, floating point numerics on computers only have a limited precision such that associativity does not hold exactly. This implies that the order of summation determines the exact numerical result. For this reason, the higher-order reduce function is available in two variants:\n\nreduce_sum: Automatically choose partial sums partitioning based on a dynamic scheduling algorithm.\nreduce_sum_static: Compute the same sum as reduce_sum, but partition the input in the same way for given data set (in reduce_sum this partitioning might change depending on computer load). This should result in stable numerical evaluations.\n\n\n\nThe higher-order reduce function takes a partial sum function f, an array argument x (with one array element for each term in the sum), a recommended grainsize, and a set of shared arguments. This representation allows parallelization of the resultant sum.\n \n\nreal reduce_sum(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\n \n real reduce_sum_static(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\nReturns the equivalent of f(x, 1, size(x), s1, s2, ...), but computes the result in parallel by breaking the array x into independent partial sums. s1, s2, ... are shared between all terms in the sum.\nAvailable since 2.23\n\nf: function literal referring to a function specifying the partial sum operation. Refer to the partial sum function.\nx: array of T, one for each term of the reduction, T can be any type,\ngrainsize: For reduce_sum, grainsize is the recommended size of the partial sum (grainsize = 1 means pick totally automatically). For reduce_sum_static, grainsize determines the maximum size of the partial sums, type int,\ns1: first (optional) shared argument, type T1, where T1 can be any type\ns2: second (optional) shared argument, type T2, where T2 can be any type,\n...: remainder of shared arguments, each of which can be any type.\n\n\n\n\nThe partial sum function must have the following signature where the type T, and the types of all the shared arguments (T1, T2, …) match those of the original reduce_sum (reduce_sum_static) call.\n(array[] T x_subset, int start, int end, T1 s1, T2 s2, ...):real\nThe partial sum function returns the sum of the start to end terms (inclusive) of the overall calculations. The arguments to the partial sum function are:\n\nx_subset, the subset of x a given partial sum is responsible for computing, type array[] T, where T matches the type of x in reduce_sum (reduce_sum_static)\nstart, the index of the first term of the partial sum, type int\nend, the index of the last term of the partial sum (inclusive), type int\ns1, first shared argument, type T1, matching type of s1 in reduce_sum (reduce_sum_static)\ns2, second shared argument, type T2, matching type of s2 in reduce_sum (reduce_sum_static)\n..., remainder of shared arguments, with types matching those in reduce_sum (reduce_sum_static)\n\n\n\n\n\nStan provides a higher-order map function. This allows map-reduce functionality to be coded in Stan as described in the user’s guide.\n\n\nThe function being mapped must have a signature identical to that of the function f in the following declaration.\n vector f(vector phi, vector theta,\n data array[] real x_r, data array[] int x_i);\nThe map function returns the sequence of results for the particular shard being evaluated. The arguments to the mapped function are:\n\nphi, the sequence of parameters shared across shards\ntheta, the sequence of parameters specific to this shard\nx_r, sequence of real-valued data\nx_i, sequence of integer data\n\nAll input for the mapped function must be packed into these sequences and all output from the mapped function must be packed into a single vector. The vector of output from each mapped function is concatenated into the final result.\n\n\n\nThe rectangular map function operates on rectangular (not ragged) data structures, with parallel data structures for job-specific parameters, job-specific real data, and job-specific integer data.\n \n\nvector map_rect(F f, vector phi, array[] vector theta, data array[,] real x_r, data array[,] int x_i) Return the concatenation of the results of applying the function f, of type (vector, vector, array[] real, array[] int):vector elementwise, i.e., f(phi, theta[n], x_r[n], x_i[n]) for each n in 1:N, where N is the size of the parallel arrays of job-specific/local parameters theta, real data x_r, and integer data x_r. The shared/global parameters phi are passed to each invocation of f.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html#functions-algebraic-solver", + "href": "functions-reference/higher-order_functions.html#functions-algebraic-solver", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides two built-in algebraic equation solvers, respectively based on the Newton method and the Powell “dog leg” hybrid method. Empirically the Newton method is found to be faster and its use is recommended for most problems.\nAn algebraic solver is a higher-order function, i.e. it takes another function as one of its arguments. Other functions in Stan which share this feature are the differential equation solvers (see section Ordinary Differential Equation (ODE) Solvers and Differential Algebraic Equation (DAE) solver). Ordinary Stan functions do not allow functions as arguments.\n\n\nAn algebraic system is specified as an ordinary function in Stan within the function block. The function must return a vector and takes in, as its first argument, the unknowns \\(y\\) we wish to solve for, also passed as a vector. This argument is followed by additional arguments as specified by the user; we call such arguments variadic arguments and denote them .... The signature of the algebraic system is then:\n vector algebra_system (vector y, ...)\nThere is no type restriction for the variadic arguments and each argument can be passed as data or parameter. However users should use parameter arguments only when necessary and mark data arguments with the keyword data. In the below example, the last variadic argument, \\(x\\), is restricted to being data:\n vector algebra_system (vector y, vector theta, data vector x)\nDistinguishing data and parameter is important for computational reasons. Augmenting the total number of parameters increases the cost of propagating derivatives through the solution to the algebraic equation, and ultimately the computational cost of evaluating the gradients.\n\n\n\n \n vector solve_newton(function algebra_system, vector y_guess, ...) Solves the algebraic system, given an initial guess, using Newton’s method.\nAvailable since 2.31\n \n vector solve_newton_tol(function algebra_system, vector y_guess, data real scaling_step, data real f_tol, int max_steps, ...) Solves the algebraic system, given an initial guess, using Newton’s method with additional control parameters for the solver.\nAvailable since 2.31\n \n vector solve_powell(function algebra_system, vector y_guess, ...) Solves the algebraic system, given an initial guess, using Powell’s hybrid method.\nAvailable since 2.31\n \n vector solve_powell_tol(function algebra_system, vector y_guess, data real rel_tol, data real f_tol, int max_steps, ...) Solves the algebraic system, given an initial guess, using Powell’s hybrid method with additional control parameters for the solver.\nAvailable since 2.31\n\n\nThe arguments to the algebraic solvers are as follows:\n\nalgebra_system: function literal referring to a function specifying the system of algebraic equations with signature (vector, ...):vector. The arguments represent (1) unknowns, (2) additional parameter and/or data arguments, and the return value contains the value of the algebraic function, which goes to 0 when we plug in the solution to the algebraic system,\ny_guess: initial guess for the solution, type vector,\n...: variadic arguments.\n\nThe algebraic solvers admit control parameters. While Stan provides default values, the user should be prepared to adjust the control parameters. The following controls are available:\n\nscaling_step: for the Newton solver only, the scaled-step stopping tolerance, type real, data only. If a Newton step is smaller than the scaling step tolerance, the code breaks, assuming the solver is no longer making significant progress. If set to 0, this constraint is ignored. Default value is \\(10^{-3}\\).\nrel_tol: for the Powell solver only, the relative tolerance, type real, data only. The relative tolerance is the estimated relative error of the solver and serves to test if a satisfactory solution has been found. Default value is \\(10^{-10}\\).\nfunction_tol: function tolerance for the algebraic solver, type real, data only. After convergence of the solver, the proposed solution is plugged into the algebraic system and its norm is compared to the function tolerance. If the norm is below the function tolerance, the solution is deemed acceptable. Default value is \\(10^{-6}\\).\nmax_num_steps: maximum number of steps to take in the algebraic solver, type int, data only. If the solver reaches this number of steps, it breaks and returns an error message. Default value is \\(200\\).\n\nThe difference in which control parameters are available has to do with the underlying implementations for the solvers and the control parameters these implementations support. The Newton solver is based on KINSOL from the SUNDIAL suites, while the Powell solver uses a module from the Eigen library.\n\n\n\nThe return value for the algebraic solver is an object of type vector, with values which, when plugged in as y make the algebraic function go to 0 (approximately, within the specified function tolerance).\n\n\n\nCertain sizes have to be consistent. The initial guess, return value of the solver, and return value of the algebraic function must all be the same size.\n\n\n\nStan offers two methods to solve algebraic equations. solve_newton and solve_newton_tol use the Newton method, a first-order derivative based numerical solver. The Stan code builds on the implementation in KINSOL from the SUNDIALS suite (Hindmarsh et al. 2005). For many problems, we find that the Newton method is faster than the Powell method. If however Newton’s method performs poorly, either failing to or requiring an excessively long time to converge, the user should be prepared to switch to the Powell method.\nsolve_powell and solve_powell_tol are based on the Powell hybrid method (Powell 1970), which also uses first-order derivatives. The Stan code builds on the implementation of the hybrid solver in the unsupported module for nonlinear optimization problems of the Eigen library (Guennebaud, Jacob, et al. 2010). This solver is in turn based on the algorithm developed for the package MINPACK-1 (Jorge J. More 1980).\nFor both solvers, derivatives are propagated through the solution to the algebraic solution using the implicit function theorem and an adjoint method of automatic differentiation; for a discussion on this topic, see (Gaebler 2021) and (Margossian and Betancourt 2022).", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html#functions-ode-solver", + "href": "functions-reference/higher-order_functions.html#functions-ode-solver", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides several higher order functions for solving initial value problems specified as Ordinary Differential Equations (ODEs).\nSolving an initial value ODE means given a set of differential equations \\(y'(t, \\theta) = f(t, y, \\theta)\\) and initial conditions \\(y(t_0, \\theta)\\), solving for \\(y\\) at a sequence of times \\(t_0 < t_1 < t_2, \\cdots < t_n\\). \\(f(t, y, \\theta)\\) is referred to here as the ODE system function.\n\\(f(t, y, \\theta)\\) will be defined as a function with a certain signature and provided along with the initial conditions and output times to one of the ODE solver functions.\nTo make it easier to write ODEs, the solve functions take extra arguments that are passed along unmodified to the user-supplied system function. Because there can be any number of these arguments and they can be of different types, they are denoted below as .... The types of the arguments represented by ... in the ODE solve function call must match the types of the arguments represented by ... in the user-supplied system function.\n\n\n \n\narray[] vector ode_rk45(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method.\nAvailable since 2.24\n \n\narray[] vector ode_rk45_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the Dormand-Prince algorithm, a 4th/5th order Runge-Kutta method with additional control parameters for the solver.\nAvailable since 2.24\n \n\narray[] vector ode_ckrk(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the Cash-Karp algorithm, a 4th/5th order explicit Runge-Kutta method.\nAvailable since 2.27\n \n\narray[] vector ode_ckrk_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the Cash-Karp algorithm, a 4th/5th order explicit Runge-Kutta method with additional control parameters for the solver.\nAvailable since 2.27\n \n\narray[] vector ode_adams(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the Adams-Moulton method.\nAvailable since 2.24\n \n\narray[] vector ode_adams_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the Adams-Moulton method with additional control parameters for the solver.\nAvailable since 2.24\n\n\n\n \n\narray[] vector ode_bdf(function ode, vector initial_state, real initial_time, array[] real times, ...) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method.\nAvailable since 2.24\n \n\narray[] vector ode_bdf_tol(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the ODE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.\nAvailable since 2.24\n\n\n\n \n\narray[] vector ode_adjoint_tol_ctl(function ode, vector initial_state, real initial_time, array[] real times, data real rel_tol_forward, data vector abs_tol_forward, data real rel_tol_backward, data vector abs_tol_backward, int max_num_steps, int num_steps_between_checkpoints, int interpolation_polynomial, int solver_forward, int solver_backward, ...)\nSolves the ODE system for the times provided using the adjoint ODE solver method from CVODES. The adjoint ODE solver requires a checkpointed forward in time ODE integration, a backwards in time integration that makes uses of an interpolated version of the forward solution, and the solution of a quadrature problem (the number of which depends on the number of parameters passed to the solve). The tolerances and numeric methods used for the forward solve, backward solve, quadratures, and interpolation can all be configured.\nAvailable since 2.27\n\n\n\nThe first argument to one of the ODE solvers is always the ODE system function. The ODE system function must have a vector return type, and the first two arguments must be a real and vector in that order. These two arguments are followed by the variadic arguments that are passed through from the ODE solve function call:\n vector ode(real time, vector state, ...)\nThe ODE system function should return the derivative of the state with respect to time at the time and state provided. The length of the returned vector must match the length of the state input into the function.\nThe arguments to this function are:\n\ntime, the time to evaluate the ODE system\nstate, the state of the ODE system at the time specified\n..., sequence of arguments passed unmodified from the ODE solve function call. The types here must match the types in the ... arguments of the ODE solve function call.\n\n\n\n\nThe arguments to the ODE solvers in both the stiff and non-stiff solvers are the same. The arguments to the adjoint ODE solver are different; see Arguments to the adjoint ODE solver.\n\node: ODE system function,\ninitial_state: initial state, type vector,\ninitial_time: initial time, type real,\ntimes: solution times, type array[] real,\n...: sequence of arguments that will be passed through unmodified to the ODE system function. The types here must match the types in the ... arguments of the ODE system function.\n\nFor the versions of the ode solver functions ending in _tol, these three parameters must be provided after times and before the ... arguments:\n\ndata rel_tol: relative tolerance for the ODE solver, type real, data only,\ndata abs_tol: absolute tolerance for the ODE solver, type real, data only, and\nmax_num_steps: maximum number of steps to take between output times in the ODE solver, type int, data only.\n\nBecause the tolerances are data arguments, they must be defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.\n\n\n\nThe arguments to the adjoint ODE solver are different from those for the other functions (for those see Arguments to the ODE solvers).\n\node: ODE system function,\ninitial_state: initial state, type vector,\ninitial_time: initial time, type real,\ntimes: solution times, type array[] real,\ndata rel_tol_forward: Relative tolerance for forward solve, type real, data only,\ndata abs_tol_forward: Absolute tolerance vector for each state for forward solve, type vector, data only,\ndata rel_tol_backward: Relative tolerance for backward solve, type real, data only,\ndata abs_tol_backward: Absolute tolerance vector for each state for backward solve, type vector, data only,\ndata rel_tol_quadrature: Relative tolerance for backward quadrature, type real, data only,\ndata abs_tol_quadrature: Absolute tolerance for backward quadrature, type real, data only,\ndata max_num_steps: Maximum number of time-steps to take in integrating the ODE solution between output time points for forward and backward solve, type int, data only,\nnum_steps_between_checkpoints: number of steps between checkpointing forward solution, type int, data only,\ninterpolation_polynomial: can be 1 for hermite or 2 for polynomial interpolation method of CVODES, type int, data only,\nsolver_forward: solver used for forward ODE problem: 1=Adams (non-stiff), 2=BDF (stiff), type int, data only,\nsolver_backward: solver used for backward ODE problem: 1=Adams (non-stiff), 2=BDF (stiff), type int, data only.\n...: sequence of arguments that will be passed through unmodified to the ODE system function. The types here must match the types in the ... arguments of the ODE system function.\n\nBecause the tolerances are data arguments, they must be defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.\n\n\nThe return value for the ODE solvers is an array of vectors (type array[] vector), one vector representing the state of the system at every time in specified in the times argument.\n\n\n\nThe sizes must match, and in particular, the following groups are of the same size:\n\nstate variables passed into the system function, derivatives returned by the system function, initial state passed into the solver, and length of each vector in the output,\nnumber of solution times and number of vectors in the output.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html#functions-dae-solver", + "href": "functions-reference/higher-order_functions.html#functions-dae-solver", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides two higher order functions for solving initial value problems specified as Differential-Algebraic Equations (DAEs) with index-1 (Serban et al. 2021).\nSolving an initial value DAE means given a set of residual functions \\(r(y'(t, \\theta), y(t, \\theta), t)\\) and initial conditions \\((y(t_0, \\theta), y'(t_0, \\theta))\\), solving for \\(y\\) at a sequence of times \\(t_0 < t_1 \\leq t_2, \\cdots \\leq t_n\\). The residual function \\(r(y', y, t, \\theta)\\) will be defined as a function with a certain signature and provided along with the initial conditions and output times to one of the DAE solver functions.\nSimilar to ODE solvers, the DAE solver function takes extra arguments that are passed along unmodified to the user-supplied system function. Because there can be any number of these arguments and they can be of different types, they are denoted below as ..., and the types of these arguments, also represented by ... in the DAE solver call, must match the types of the arguments represented by ... in the user-supplied system function.\n\n\n \n\narray[] vector dae(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, ...) Solves the DAE system using the backward differentiation formula (BDF) method (Serban et al. 2021).\nAvailable since 2.29\n \n\narray[] vector dae_tol(function residual, vector initial_state, vector initial_state_derivative, data real initial_time, data array[] real times, data real rel_tol, data real abs_tol, int max_num_steps, ...) Solves the DAE system for the times provided using the backward differentiation formula (BDF) method with additional control parameters for the solver.\nAvailable since 2.29\n\n\n\nThe first argument to the DAE solver is the DAE residual function. The DAE residual function must have a vector return type, and the first three arguments must be a real, vector, and vector, in that order. These three arguments are followed by the variadic arguments that are passed through from the DAE solver function call:\n vector residual(real time, vector state, vector state_derivative, ...)\nThe DAE residual function should return the residuals at the time and state provided. The length of the returned vector must match the length of the state input into the function.\nThe arguments to this function are:\n\ntime, the time to evaluate the DAE system\nstate, the state of the DAE system at the time specified\nstate_derivative, the time derivatives of the state of the DAE system at the time specified\n..., sequence of arguments passed unmodified from the DAE solve function call. The types here must match the types in the ... arguments of the DAE solve function call.\n\n\n\n\nThe arguments to the DAE solver are\n\nresidual: DAE residual function,\ninitial_state: initial state, type vector,\ninitial_state_derivative: time derivative of the initial state, type vector,\ninitial_time: initial time, type data real,\ntimes: solution times, type data array[] real,\n...: sequence of arguments that will be passed through unmodified to the DAE residual function. The types here must match the types in the ... arguments of the DAE residual function.\n\nFor dae_tol, the following three parameters must be provided after times and before the ... arguments:\n\ndata rel_tol: relative tolerance for the DAE solver, type real, data only,\ndata abs_tol: absolute tolerance for the DAE solver, type real, data only, and\nmax_num_steps: maximum number of steps to take between output times in the DAE solver, type int, data only.\n\nBecause the tolerances are data arguments, they must be supplied as primitive numerics or defined in either the data or transformed data blocks. They cannot be parameters, transformed parameters or functions of parameters or transformed parameters.\n\n\nThe user is responsible to ensure the residual function becomes zero at the initial time, t0, when the arguments initial_state and initial_state_derivative are introduced as state and state_derivative, respectively.\n\n\n\nThe return value for the DAE solvers is an array of vectors (type array[] vector), one vector representing the state of the system at every time specified in the times argument.\n\n\n\nThe sizes must match, and in particular, the following groups are of the same size:\n\nstate variables and state derivatives passed into the residual function, the residual returned by the residual function, initial state and initial state derivatives passed into the solver, and length of each vector in the output,\nnumber of solution times and number of vectors in the output.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html#functions-1d-integrator", + "href": "functions-reference/higher-order_functions.html#functions-1d-integrator", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides a built-in mechanism to perform 1D integration of a function via quadrature methods.\nIt operates similarly to the algebraic solver and the ordinary differential equations solver in that it allows as an argument a function.\nLike both of those utilities, some of the arguments are limited to data only expressions. These expressions must not contain variables other than those declared in the data or transformed data blocks.\n\n\nPerforming a 1D integration requires the integrand to be specified somehow. This is done by defining a function in the Stan functions block with the special signature:\nreal integrand(real x, real xc, array[] real theta,\n array[] real x_r, array[] int x_i)\nThe function should return the value of the integrand evaluated at the point x.\nThe argument of this function are:\n\nx, the independent variable being integrated over\nxc, a high precision version of the distance from x to the nearest endpoint in a definite integral (for more into see section Precision Loss).\ntheta, parameter values used to evaluate the integral\nx_r, data values used to evaluate the integral\nx_i, integer data used to evaluate the integral\n\nLike algebraic solver and the differential equations solver, the 1D integrator separates parameter values, theta, from data values, x_r.\n\n\n\n \n\nreal integrate_1d (function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i) Integrates the integrand from a to b.\nAvailable since 2.23\n \n\nreal integrate_1d (function integrand, real a, real b, array[] real theta, array[] real x_r, array[] int x_i, real relative_tolerance) Integrates the integrand from a to b with the given relative tolerance.\nAvailable since 2.23\n\n\nThe arguments to the 1D integrator are as follows:\n\nintegrand: function literal referring to a function specifying the integrand with signature (real, real, array[] real, array[] real, array[] int):real The arguments represent\n\n\nwhere integrand is evaluated,\n\n\ndistance from evaluation point to integration limit for definite integrals,\n\n\nparameters,\n\n\nreal data\n\n\ninteger data, and the return value is the integrand evaluated at the given point,\n\n\na: left limit of integration, may be negative infinity, type real,\nb: right limit of integration, may be positive infinity, type real,\ntheta: parameters only, type array[] real,\nx_r: real data only, type array[] real,\nx_i: integer data only, type array[] int.\n\nA relative_tolerance argument can optionally be provided for more control over the algorithm:\n\nrelative_tolerance: relative tolerance for the 1d integrator, type real, data only.\n\n\n\n\nThe return value for the 1D integrator is a real, the value of the integral.\n\n\n\nFor numeric stability, integrals on the (possibly infinite) interval \\((a, b)\\) that cross zero are split into two integrals, one from \\((a, 0)\\) and one from \\((0, b)\\). Each integral is separately integrated to the given relative_tolerance.\n\n\n\nWhen integrating certain definite integrals, there can be significant precision loss in evaluating the integrand near the endpoints. This has to do with the breakdown in precision of double precision floating point values when adding or subtracting a small number from a number much larger than it in magnitude (for instance, 1.0 - x). xc (as passed to the integrand) is a high-precision version of the distance between x and the definite integral endpoints and can be used to address this issue. More information (and an example where this is useful) is given in the User’s Guide. For zero crossing integrals, xc will be a high precision version of the distance to the endpoints of the two smaller integrals. For any integral with an endpoint at negative infinity or positive infinity, xc is set to NaN.\n\n\n\nInternally the 1D integrator uses the double-exponential methods in the Boost 1D quadrature library. Boost in turn makes use of quadrature methods developed in (Takahasi and Mori 1974), (Mori 1978), (Bailey, Jeyabalan, and Li 2005), and (Tanaka et al. 2009).\nThe gradients of the integral are computed in accordance with the Leibniz integral rule. Gradients of the integrand are computed internally with Stan’s automatic differentiation.", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html#functions-reduce", + "href": "functions-reference/higher-order_functions.html#functions-reduce", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides a higher-order reduce function for summation. A function which returns a scalar g: U -> real is mapped to every element of a list of type array[] U, { x1, x2, ... } and all the results are accumulated,\ng(x1) + g(x2) + ...\nFor efficiency reasons the reduce function doesn’t work with the element-wise evaluated function g itself, but instead works through evaluating partial sums, f: array[] U -> real, where:\nf({ x1 }) = g(x1)\nf({ x1, x2 }) = g(x1) + g(x2)\nf({ x1, x2, ... }) = g(x1) + g(x2) + ...\nMathematically the summation reduction is associative and forming arbitrary partial sums in an arbitrary order will not change the result. However, floating point numerics on computers only have a limited precision such that associativity does not hold exactly. This implies that the order of summation determines the exact numerical result. For this reason, the higher-order reduce function is available in two variants:\n\nreduce_sum: Automatically choose partial sums partitioning based on a dynamic scheduling algorithm.\nreduce_sum_static: Compute the same sum as reduce_sum, but partition the input in the same way for given data set (in reduce_sum this partitioning might change depending on computer load). This should result in stable numerical evaluations.\n\n\n\nThe higher-order reduce function takes a partial sum function f, an array argument x (with one array element for each term in the sum), a recommended grainsize, and a set of shared arguments. This representation allows parallelization of the resultant sum.\n \n\nreal reduce_sum(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\n \n real reduce_sum_static(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\nReturns the equivalent of f(x, 1, size(x), s1, s2, ...), but computes the result in parallel by breaking the array x into independent partial sums. s1, s2, ... are shared between all terms in the sum.\nAvailable since 2.23\n\nf: function literal referring to a function specifying the partial sum operation. Refer to the partial sum function.\nx: array of T, one for each term of the reduction, T can be any type,\ngrainsize: For reduce_sum, grainsize is the recommended size of the partial sum (grainsize = 1 means pick totally automatically). For reduce_sum_static, grainsize determines the maximum size of the partial sums, type int,\ns1: first (optional) shared argument, type T1, where T1 can be any type\ns2: second (optional) shared argument, type T2, where T2 can be any type,\n...: remainder of shared arguments, each of which can be any type.\n\n\n\n\nThe partial sum function must have the following signature where the type T, and the types of all the shared arguments (T1, T2, …) match those of the original reduce_sum (reduce_sum_static) call.\n(array[] T x_subset, int start, int end, T1 s1, T2 s2, ...):real\nThe partial sum function returns the sum of the start to end terms (inclusive) of the overall calculations. The arguments to the partial sum function are:\n\nx_subset, the subset of x a given partial sum is responsible for computing, type array[] T, where T matches the type of x in reduce_sum (reduce_sum_static)\nstart, the index of the first term of the partial sum, type int\nend, the index of the last term of the partial sum (inclusive), type int\ns1, first shared argument, type T1, matching type of s1 in reduce_sum (reduce_sum_static)\ns2, second shared argument, type T2, matching type of s2 in reduce_sum (reduce_sum_static)\n..., remainder of shared arguments, with types matching those in reduce_sum (reduce_sum_static)", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/higher-order_functions.html#functions-map", + "href": "functions-reference/higher-order_functions.html#functions-map", + "title": "Higher-Order Functions", + "section": "", + "text": "Stan provides a higher-order map function. This allows map-reduce functionality to be coded in Stan as described in the user’s guide.\n\n\nThe function being mapped must have a signature identical to that of the function f in the following declaration.\n vector f(vector phi, vector theta,\n data array[] real x_r, data array[] int x_i);\nThe map function returns the sequence of results for the particular shard being evaluated. The arguments to the mapped function are:\n\nphi, the sequence of parameters shared across shards\ntheta, the sequence of parameters specific to this shard\nx_r, sequence of real-valued data\nx_i, sequence of integer data\n\nAll input for the mapped function must be packed into these sequences and all output from the mapped function must be packed into a single vector. The vector of output from each mapped function is concatenated into the final result.\n\n\n\nThe rectangular map function operates on rectangular (not ragged) data structures, with parallel data structures for job-specific parameters, job-specific real data, and job-specific integer data.\n \n\nvector map_rect(F f, vector phi, array[] vector theta, data array[,] real x_r, data array[,] int x_i) Return the concatenation of the results of applying the function f, of type (vector, vector, array[] real, array[] int):vector elementwise, i.e., f(phi, theta[n], x_r[n], x_i[n]) for each n in 1:N, where N is the size of the parallel arrays of job-specific/local parameters theta, real data x_r, and integer data x_r. The shared/global parameters phi are passed to each invocation of f.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Higher-Order Functions" + ] + }, + { + "objectID": "functions-reference/integer-valued_basic_functions.html", + "href": "functions-reference/integer-valued_basic_functions.html", + "title": "Integer-Valued Basic Functions", + "section": "", + "text": "This chapter describes Stan’s built-in function that take various types of arguments and return integer values.\n\n\nStan’s arithmetic is based on standard double-precision C++ integer and floating-point arithmetic. If the arguments to an arithmetic operator are both integers, as in 2 + 2, integer arithmetic is used. If one argument is an integer and the other a floating-point value, as in 2.0 + 2 and 2 + 2.0, then the integer is promoted to a floating point value and floating-point arithmetic is used.\nInteger arithmetic behaves slightly differently than floating point arithmetic. The first difference is how overflow is treated. If the sum or product of two integers overflows the maximum integer representable, the result is an undesirable wraparound behavior at the bit level. If the integers were first promoted to real numbers, they would not overflow a floating-point representation. There are no extra checks in Stan to flag overflows, so it is up to the user to make sure it does not occur.\nSecondly, because the set of integers is not closed under division and there is no special infinite value for integers, integer division implicitly rounds the result. If both arguments are positive, the result is rounded down. For example, 1 / 2 evaluates to 0 and 5 / 3 evaluates to 1.\nIf one of the integer arguments to division is negative, the latest C++ specification ( C++11), requires rounding toward zero. This would have 1 / 2 and -1 / 2 evaluate to 0, -7 / 2 evaluate to -3, and 7 / 2 evaluate to 3. Before the C++11 specification, the behavior was platform dependent, allowing rounding up or down. All compilers recent enough to be able to deal with Stan’s templating should follow the C++11 specification, but it may be worth testing if you are not sure and plan to use integer division with negative values.\nUnlike floating point division, where 1.0 / 0.0 produces the special positive infinite value, integer division by zero, as in 1 / 0, has undefined behavior in the C++ standard. For example, the clang++ compiler on Mac OS X returns 3764, whereas the g++ compiler throws an exception and aborts the program with a warning. As with overflow, it is up to the user to make sure integer divide-by-zero does not occur.\n\n\nOperators are described using the C++ syntax. For instance, the binary operator of addition, written X + Y, would have the Stan signature int operator+(int, int) indicating it takes two real arguments and returns a real value. As noted previously, the value of integer division is platform-dependent when rounding is platform dependent before C++11; the descriptions below provide the C++11 definition.\n \n\nint operator+(int x, int y) The sum of the addends x and y \\[\\begin{equation*} \\text{operator+}(x,y) = (x + y) \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator-(int x, int y) The difference between the minuend x and subtrahend y \\[\\begin{equation*}\n\\text{operator-}(x,y) = (x - y) \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator*(int x, int y) The product of the factors x and y \\[\\begin{equation*} \\text{operator*}(x,y) = (x\n\\times y) \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator/(int x, int y) The integer quotient of the dividend x and divisor y \\[\\begin{equation*}\n\\text{operator/}(x,y) = \\begin{cases} \\lfloor x / y \\rfloor & \\text{if\n} x / y \\geq 0 \\\\ - \\lfloor \\text{floor}(-x / y) \\rfloor & \\text{if }\nx / y < 0. \\end{cases} \\end{equation*}\\] deprecated; - use operator%/% instead.\nAvailable since 2.0, deprecated in 2.24\n \n\nint operator%/%(int x, int y) The integer quotient of the dividend x and divisor y \\[\\begin{equation*}\n\\text{operator\\%/\\%}(x,y) = \\begin{cases} \\lfloor x / y \\rfloor & \\text{if\n} x / y \\geq 0 \\\\ - \\lfloor \\text{floor}(-x / y) \\rfloor & \\text{if }\nx / y < 0. \\end{cases} \\end{equation*}\\]\nAvailable since 2.24\n \n\nint operator%(int x, int y) x modulo y, which is the positive remainder after dividing x by y. If both x and y are non-negative, so is the result; otherwise, the sign of the result is platform dependent. \\[\\begin{equation*} \\mathrm{operator\\%}(x, y) \\ =\n\\ x \\ \\text{mod} \\ y \\ = \\ x - y * \\lfloor x / y \\rfloor \\end{equation*}\\]\nAvailable since 2.13\n\n\n\n \n\nint operator-(int x) The negation of the subtrahend x \\[\\begin{equation*} \\text{operator-}(x) = -x \\end{equation*}\\]\nAvailable since 2.0\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of integers, -x is the same shape array where each individual integer is negated.\nAvailable since 2.31\n \n\nint operator+(int x) This is a no-op. \\[\\begin{equation*} \\text{operator+}(x) = x \\end{equation*}\\]\nAvailable since 2.0\n\n\n\n\n \n\nT abs(T x) The absolute value of x.\nThis function works elementwise over containers such as vectors. Given a type T which is int, or an array of ints, abs returns the same type where each element has had its absolute value taken.\nAvailable since 2.0, vectorized in 2.30\n \n\nint int_step(int x)\n \n\nint int_step(real x) Return the step function of x as an integer, \\[\\begin{equation*} \\mathrm{int\\_step}(x)\n= \\begin{cases} 1 & \\text{if } x > 0 \\\\ 0 & \\text{if } x \\leq 0 \\text{\nor } x \\text{ is } NaN \\end{cases} \\end{equation*}\\] Warning: int_step(0) and int_step(NaN) return 0 whereas step(0) and step(NaN) return 1.\nSee the warning in section step functions about the dangers of step functions applied to anything other than data.\nAvailable since 2.0\n\n\n\n \n\nint min(int x, int y) Return the minimum of x and y. \\[\\begin{equation*} \\text{min}(x, y) = \\begin{cases} x &\n\\text{if } x < y\\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint max(int x, int y) Return the maximum of x and y. \\[\\begin{equation*} \\text{max}(x, y) = \\begin{cases} x &\n\\text{if } x > y\\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n\n\n\n \n\nint size(int x)\n \n\nint size(real x)\nReturn the size of x which for scalar-valued x is 1\nAvailable since 2.26\n\n\n\nIt is possible to cast real numbers to integers as long as the real value is data. See data only qualifiers in the Stan Reference Manual.\n \n\nint to_int(data real x)\nReturn the value x truncated to an integer. This will throw an error if the value of x is too big to represent as a 32-bit signed integer.\nThis is similar to trunc (see Rounding functions) but the return type is of type int. For example, to_int(3.9) is 3, and to_int(-3.9) is -3.\nAvailable since 2.31\n \n\nI to_int(data T x)\nThe vectorized version of to_int. This function accepts a (possibly nested) array of reals and returns an array of the same shape where each element has been truncated to an integer.\nAvailable since 2.31", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Integer-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/integer-valued_basic_functions.html#int-arithmetic", + "href": "functions-reference/integer-valued_basic_functions.html#int-arithmetic", + "title": "Integer-Valued Basic Functions", + "section": "", + "text": "Stan’s arithmetic is based on standard double-precision C++ integer and floating-point arithmetic. If the arguments to an arithmetic operator are both integers, as in 2 + 2, integer arithmetic is used. If one argument is an integer and the other a floating-point value, as in 2.0 + 2 and 2 + 2.0, then the integer is promoted to a floating point value and floating-point arithmetic is used.\nInteger arithmetic behaves slightly differently than floating point arithmetic. The first difference is how overflow is treated. If the sum or product of two integers overflows the maximum integer representable, the result is an undesirable wraparound behavior at the bit level. If the integers were first promoted to real numbers, they would not overflow a floating-point representation. There are no extra checks in Stan to flag overflows, so it is up to the user to make sure it does not occur.\nSecondly, because the set of integers is not closed under division and there is no special infinite value for integers, integer division implicitly rounds the result. If both arguments are positive, the result is rounded down. For example, 1 / 2 evaluates to 0 and 5 / 3 evaluates to 1.\nIf one of the integer arguments to division is negative, the latest C++ specification ( C++11), requires rounding toward zero. This would have 1 / 2 and -1 / 2 evaluate to 0, -7 / 2 evaluate to -3, and 7 / 2 evaluate to 3. Before the C++11 specification, the behavior was platform dependent, allowing rounding up or down. All compilers recent enough to be able to deal with Stan’s templating should follow the C++11 specification, but it may be worth testing if you are not sure and plan to use integer division with negative values.\nUnlike floating point division, where 1.0 / 0.0 produces the special positive infinite value, integer division by zero, as in 1 / 0, has undefined behavior in the C++ standard. For example, the clang++ compiler on Mac OS X returns 3764, whereas the g++ compiler throws an exception and aborts the program with a warning. As with overflow, it is up to the user to make sure integer divide-by-zero does not occur.\n\n\nOperators are described using the C++ syntax. For instance, the binary operator of addition, written X + Y, would have the Stan signature int operator+(int, int) indicating it takes two real arguments and returns a real value. As noted previously, the value of integer division is platform-dependent when rounding is platform dependent before C++11; the descriptions below provide the C++11 definition.\n \n\nint operator+(int x, int y) The sum of the addends x and y \\[\\begin{equation*} \\text{operator+}(x,y) = (x + y) \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator-(int x, int y) The difference between the minuend x and subtrahend y \\[\\begin{equation*}\n\\text{operator-}(x,y) = (x - y) \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator*(int x, int y) The product of the factors x and y \\[\\begin{equation*} \\text{operator*}(x,y) = (x\n\\times y) \\end{equation*}\\]\nAvailable since 2.0\n \n\nint operator/(int x, int y) The integer quotient of the dividend x and divisor y \\[\\begin{equation*}\n\\text{operator/}(x,y) = \\begin{cases} \\lfloor x / y \\rfloor & \\text{if\n} x / y \\geq 0 \\\\ - \\lfloor \\text{floor}(-x / y) \\rfloor & \\text{if }\nx / y < 0. \\end{cases} \\end{equation*}\\] deprecated; - use operator%/% instead.\nAvailable since 2.0, deprecated in 2.24\n \n\nint operator%/%(int x, int y) The integer quotient of the dividend x and divisor y \\[\\begin{equation*}\n\\text{operator\\%/\\%}(x,y) = \\begin{cases} \\lfloor x / y \\rfloor & \\text{if\n} x / y \\geq 0 \\\\ - \\lfloor \\text{floor}(-x / y) \\rfloor & \\text{if }\nx / y < 0. \\end{cases} \\end{equation*}\\]\nAvailable since 2.24\n \n\nint operator%(int x, int y) x modulo y, which is the positive remainder after dividing x by y. If both x and y are non-negative, so is the result; otherwise, the sign of the result is platform dependent. \\[\\begin{equation*} \\mathrm{operator\\%}(x, y) \\ =\n\\ x \\ \\text{mod} \\ y \\ = \\ x - y * \\lfloor x / y \\rfloor \\end{equation*}\\]\nAvailable since 2.13\n\n\n\n \n\nint operator-(int x) The negation of the subtrahend x \\[\\begin{equation*} \\text{operator-}(x) = -x \\end{equation*}\\]\nAvailable since 2.0\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of integers, -x is the same shape array where each individual integer is negated.\nAvailable since 2.31\n \n\nint operator+(int x) This is a no-op. \\[\\begin{equation*} \\text{operator+}(x) = x \\end{equation*}\\]\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Integer-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/integer-valued_basic_functions.html#absolute-functions", + "href": "functions-reference/integer-valued_basic_functions.html#absolute-functions", + "title": "Integer-Valued Basic Functions", + "section": "", + "text": "T abs(T x) The absolute value of x.\nThis function works elementwise over containers such as vectors. Given a type T which is int, or an array of ints, abs returns the same type where each element has had its absolute value taken.\nAvailable since 2.0, vectorized in 2.30\n \n\nint int_step(int x)\n \n\nint int_step(real x) Return the step function of x as an integer, \\[\\begin{equation*} \\mathrm{int\\_step}(x)\n= \\begin{cases} 1 & \\text{if } x > 0 \\\\ 0 & \\text{if } x \\leq 0 \\text{\nor } x \\text{ is } NaN \\end{cases} \\end{equation*}\\] Warning: int_step(0) and int_step(NaN) return 0 whereas step(0) and step(NaN) return 1.\nSee the warning in section step functions about the dangers of step functions applied to anything other than data.\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Integer-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/integer-valued_basic_functions.html#bound-functions", + "href": "functions-reference/integer-valued_basic_functions.html#bound-functions", + "title": "Integer-Valued Basic Functions", + "section": "", + "text": "int min(int x, int y) Return the minimum of x and y. \\[\\begin{equation*} \\text{min}(x, y) = \\begin{cases} x &\n\\text{if } x < y\\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0\n \n\nint max(int x, int y) Return the maximum of x and y. \\[\\begin{equation*} \\text{max}(x, y) = \\begin{cases} x &\n\\text{if } x > y\\\\ y & \\text{otherwise} \\end{cases} \\end{equation*}\\]\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Integer-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/integer-valued_basic_functions.html#size-functions", + "href": "functions-reference/integer-valued_basic_functions.html#size-functions", + "title": "Integer-Valued Basic Functions", + "section": "", + "text": "int size(int x)\n \n\nint size(real x)\nReturn the size of x which for scalar-valued x is 1\nAvailable since 2.26", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Integer-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/integer-valued_basic_functions.html#casting-functions", + "href": "functions-reference/integer-valued_basic_functions.html#casting-functions", + "title": "Integer-Valued Basic Functions", + "section": "", + "text": "It is possible to cast real numbers to integers as long as the real value is data. See data only qualifiers in the Stan Reference Manual.\n \n\nint to_int(data real x)\nReturn the value x truncated to an integer. This will throw an error if the value of x is too big to represent as a 32-bit signed integer.\nThis is similar to trunc (see Rounding functions) but the return type is of type int. For example, to_int(3.9) is 3, and to_int(-3.9) is -3.\nAvailable since 2.31\n \n\nI to_int(data T x)\nThe vectorized version of to_int. This function accepts a (possibly nested) array of reals and returns an array of the same shape where each element has been truncated to an integer.\nAvailable since 2.31", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Integer-Valued Basic Functions" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html", + "href": "functions-reference/matrix_operations.html", + "title": "Matrix Operations", + "section": "", + "text": "int num_elements(vector x) The total number of elements in the vector x (same as function rows)\nAvailable since 2.5\n \n\nint num_elements(row_vector x) The total number of elements in the vector x (same as function cols)\nAvailable since 2.5\n \n\nint num_elements(matrix x) The total number of elements in the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then num_elements(x) is 15\nAvailable since 2.5\n \n\nint rows(vector x) The number of rows in the vector x\nAvailable since 2.0\n \n\nint rows(row_vector x) The number of rows in the row vector x, namely 1\nAvailable since 2.0\n \n\nint rows(matrix x) The number of rows in the matrix x\nAvailable since 2.0\n \n\nint cols(vector x) The number of columns in the vector x, namely 1\nAvailable since 2.0\n \n\nint cols(row_vector x) The number of columns in the row vector x\nAvailable since 2.0\n \n\nint cols(matrix x) The number of columns in the matrix x\nAvailable since 2.0\n \n\nint size(vector x) The size of x, i.e., the number of elements\nAvailable since 2.26\n \n\nint size(row_vector x) The size of x, i.e., the number of elements\nAvailable since 2.26\n \n\nint size(matrix x) The size of the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then size(x) is 15\nAvailable since 2.26\n\n\n\nStan supports the basic matrix operations using infix, prefix and postfix operations. This section lists the operations supported by Stan along with their argument and result types.\n\n\n \n\nvector operator-(vector x) The negation of the vector x.\nAvailable since 2.0\n \n\nrow_vector operator-(row_vector x) The negation of the row vector x.\nAvailable since 2.0\n \n\nmatrix operator-(matrix x) The negation of the matrix x.\nAvailable since 2.0\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of matrix types, -x is the same shape array where each individual value is negated.\nAvailable since 2.31\n\n\n\n \n\nvector operator+(vector x, vector y) The sum of the vectors x and y.\nAvailable since 2.0\n \n\nrow_vector operator+(row_vector x, row_vector y) The sum of the row vectors x and y.\nAvailable since 2.0\n \n\nmatrix operator+(matrix x, matrix y) The sum of the matrices x and y\nAvailable since 2.0\n \n\nvector operator-(vector x, vector y) The difference between the vectors x and y.\nAvailable since 2.0\n \n\nrow_vector operator-(row_vector x, row_vector y) The difference between the row vectors x and y\nAvailable since 2.0\n \n\nmatrix operator-(matrix x, matrix y) The difference between the matrices x and y\nAvailable since 2.0\n \n\nvector operator*(real x, vector y) The product of the scalar x and vector y\nAvailable since 2.0\n \n\nrow_vector operator*(real x, row_vector y) The product of the scalar x and the row vector y\nAvailable since 2.0\n \n\nmatrix operator*(real x, matrix y) The product of the scalar x and the matrix y\nAvailable since 2.0\n \n\nvector operator*(vector x, real y) The product of the scalar y and vector x\nAvailable since 2.0\n \n\nmatrix operator*(vector x, row_vector y) The product of the vector x and row vector y\nAvailable since 2.0\n \n\nrow_vector operator*(row_vector x, real y) The product of the scalar y and row vector x\nAvailable since 2.0\n \n\nreal operator*(row_vector x, vector y) The product of the row vector x and vector y\nAvailable since 2.0\n \n\nrow_vector operator*(row_vector x, matrix y) The product of the row vector x and matrix y\nAvailable since 2.0\n \n\nmatrix operator*(matrix x, real y) The product of the scalar y and matrix x\nAvailable since 2.0\n \n\nvector operator*(matrix x, vector y) The product of the matrix x and vector y\nAvailable since 2.0\n \n\nmatrix operator*(matrix x, matrix y) The product of the matrices x and y\nAvailable since 2.0\n\n\n\n \n\nvector operator+(vector x, real y) The result of adding y to every entry in the vector x\nAvailable since 2.0\n \n\nvector operator+(real x, vector y) The result of adding x to every entry in the vector y\nAvailable since 2.0\n \n\nrow_vector operator+(row_vector x, real y) The result of adding y to every entry in the row vector x\nAvailable since 2.0\n \n\nrow_vector operator+(real x, row_vector y) The result of adding x to every entry in the row vector y\nAvailable since 2.0\n \n\nmatrix operator+(matrix x, real y) The result of adding y to every entry in the matrix x\nAvailable since 2.0\n \n\nmatrix operator+(real x, matrix y) The result of adding x to every entry in the matrix y\nAvailable since 2.0\n \n\nvector operator-(vector x, real y) The result of subtracting y from every entry in the vector x\nAvailable since 2.0\n \n\nvector operator-(real x, vector y) The result of adding x to every entry in the negation of the vector y\nAvailable since 2.0\n \n\nrow_vector operator-(row_vector x, real y) The result of subtracting y from every entry in the row vector x\nAvailable since 2.0\n \n\nrow_vector operator-(real x, row_vector y) The result of adding x to every entry in the negation of the row vector y\nAvailable since 2.0\n \n\nmatrix operator-(matrix x, real y) The result of subtracting y from every entry in the matrix x\nAvailable since 2.0\n \n\nmatrix operator-(real x, matrix y) The result of adding x to every entry in negation of the matrix y\nAvailable since 2.0\n \n\nvector operator/(vector x, real y) The result of dividing each entry in the vector x by y\nAvailable since 2.0\n \n\nrow_vector operator/(row_vector x, real y) The result of dividing each entry in the row vector x by y\nAvailable since 2.0\n \n\nmatrix operator/(matrix x, real y) The result of dividing each entry in the matrix x by y\nAvailable since 2.0\n\n\n\n\nMatrix transposition is represented using a postfix operator.\n \n\nmatrix operator'(matrix x) The transpose of the matrix x, written as x'\nAvailable since 2.0\n \n\nrow_vector operator'(vector x) The transpose of the vector x, written as x'\nAvailable since 2.0\n \n\nvector operator'(row_vector x) The transpose of the row vector x, written as x'\nAvailable since 2.0\n\n\n\nElementwise functions apply a function to each element of a vector or matrix, returning a result of the same shape as the argument. There are many functions that are vectorized in addition to the ad hoc cases listed in this section; see section function vectorization for the general cases.\n \n\nvector operator.*(vector x, vector y) The elementwise product of y and x\nAvailable since 2.0\n \n\nrow_vector operator.*(row_vector x, row_vector y) The elementwise product of y and x\nAvailable since 2.0\n \n\nmatrix operator.*(matrix x, matrix y) The elementwise product of y and x\nAvailable since 2.0\n \n\nvector operator./(vector x, vector y) The elementwise quotient of y and x\nAvailable since 2.0\n \n\nvector operator./(vector x, real y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nvector operator./(real x, vector y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nrow_vector operator./(row_vector x, row_vector y) The elementwise quotient of y and x\nAvailable since 2.0\n \n\nrow_vector operator./(row_vector x, real y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nrow_vector operator./(real x, row_vector y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nmatrix operator./(matrix x, matrix y) The elementwise quotient of y and x\nAvailable since 2.0\n \n\nmatrix operator./(matrix x, real y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nmatrix operator./(real x, matrix y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nvector operator.^(vector x, vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nvector operator.^(vector x, real y) The elementwise power of y and x\nAvailable since 2.24\n \n\nvector operator.^(real x, vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nrow_vector operator.^(row_vector x, row_vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nrow_vector operator.^(row_vector x, real y) The elementwise power of y and x\nAvailable since 2.24\n \n\nrow_vector operator.^(real x, row_vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nmatrix operator.^(matrix x, matrix y) The elementwise power of y and x\nAvailable since 2.24\n \n\nmatrix operator.^(matrix x, real y) The elementwise power of y and x\nAvailable since 2.24\n \n\nmatrix operator.^(real x, matrix y) The elementwise power of y and x\nAvailable since 2.24\n\n\n\n \n\nreal dot_product(vector x, vector y) The dot product of x and y\nAvailable since 2.0\n \n\nreal dot_product(vector x, row_vector y) The dot product of x and y\nAvailable since 2.0\n \n\nreal dot_product(row_vector x, vector y) The dot product of x and y\nAvailable since 2.0\n \n\nreal dot_product(row_vector x, row_vector y) The dot product of x and y\nAvailable since 2.0\n \n\nrow_vector columns_dot_product(vector x, vector y) The dot product of the columns of x and y\nAvailable since 2.0\n \n\nrow_vector columns_dot_product(row_vector x, row_vector y) The dot product of the columns of x and y\nAvailable since 2.0\n \n\nrow_vector columns_dot_product(matrix x, matrix y) The dot product of the columns of x and y\nAvailable since 2.0\n \n\nvector rows_dot_product(vector x, vector y) The dot product of the rows of x and y\nAvailable since 2.0\n \n\nvector rows_dot_product(row_vector x, row_vector y) The dot product of the rows of x and y\nAvailable since 2.0\n \n\nvector rows_dot_product(matrix x, matrix y) The dot product of the rows of x and y\nAvailable since 2.0\n \n\nreal dot_self(vector x) The dot product of the vector x with itself\nAvailable since 2.0\n \n\nreal dot_self(row_vector x) The dot product of the row vector x with itself\nAvailable since 2.0\n \n\nrow_vector columns_dot_self(vector x) The dot product of the columns of x with themselves\nAvailable since 2.0\n \n\nrow_vector columns_dot_self(row_vector x) The dot product of the columns of x with themselves\nAvailable since 2.0\n \n\nrow_vector columns_dot_self(matrix x) The dot product of the columns of x with themselves\nAvailable since 2.0\n \n\nvector rows_dot_self(vector x) The dot product of the rows of x with themselves\nAvailable since 2.0\n \n\nvector rows_dot_self(row_vector x) The dot product of the rows of x with themselves\nAvailable since 2.0\n \n\nvector rows_dot_self(matrix x) The dot product of the rows of x with themselves\nAvailable since 2.0\n\n\n \n\nmatrix tcrossprod(matrix x) The product of x postmultiplied by its own transpose, similar to the tcrossprod(x) function in R. The result is a symmetric matrix \\(\\text{x}\\,\\text{x}^{\\top}\\).\nAvailable since 2.0\n \n\nmatrix crossprod(matrix x) The product of x premultiplied by its own transpose, similar to the crossprod(x) function in R. The result is a symmetric matrix \\(\\text{x}^{\\top}\\,\\text{x}\\).\nAvailable since 2.0\nThe following functions all provide shorthand forms for common expressions, which are also much more efficient.\n \n\nmatrix quad_form(matrix A, matrix B) The quadratic form, i.e., B' * A * B.\nAvailable since 2.0\n \n\nreal quad_form(matrix A, vector B) The quadratic form, i.e., B' * A * B.\nAvailable since 2.0\n \n\nmatrix quad_form_diag(matrix m, vector v) The quadratic form using the column vector v as a diagonal matrix, i.e., diag_matrix(v) * m * diag_matrix(v).\nAvailable since 2.3\n \n\nmatrix quad_form_diag(matrix m, row_vector rv) The quadratic form using the row vector rv as a diagonal matrix, i.e., diag_matrix(rv) * m * diag_matrix(rv).\nAvailable since 2.3\n \n\nmatrix quad_form_sym(matrix A, matrix B) Similarly to quad_form, gives B' * A * B, but additionally checks if A is symmetric and ensures that the result is also symmetric.\nAvailable since 2.3\n \n\nreal quad_form_sym(matrix A, vector B) Similarly to quad_form, gives B' * A * B, but additionally checks if A is symmetric and ensures that the result is also symmetric.\nAvailable since 2.3\n \n\nreal trace_dot(matrix A, matrix B) The trace of the matrix product, i.e., trace(A * B).\nAvailable since 2.39\n \n\nreal trace_quad_form(matrix A, matrix B) The trace of the quadratic form, i.e., trace(B' * A * B).\nAvailable since 2.0\n \n\nreal trace_quad_form(matrix A, vector B) The trace of the quadratic form, i.e., trace(B' * A * B).\nAvailable since 2.0\n \n\nreal trace_gen_quad_form(matrix D, matrix A, matrix B) The trace of a generalized quadratic form, i.e., trace(D * B' * A * B).\nAvailable since 2.0\n \n\nmatrix multiply_lower_tri_self_transpose(matrix x) The product of the lower triangular portion of x (including the diagonal) times its own transpose; that is, if L is a matrix of the same dimensions as x with L(m,n) equal to x(m,n) for \\(\\text{n}\n\\leq \\text{m}\\) and L(m,n) equal to 0 if \\(\\text{n} > \\text{m}\\), the result is the symmetric matrix \\(\\text{L}\\,\\text{L}^{\\top}\\). This is a specialization of tcrossprod(x) for lower-triangular matrices. The input matrix does not need to be square.\nAvailable since 2.0\n \n\nmatrix diag_pre_multiply(vector v, matrix m) Return the product of the diagonal matrix formed from the vector v and the matrix m, i.e., diag_matrix(v) * m.\nAvailable since 2.0\n \n\nmatrix diag_pre_multiply(row_vector rv, matrix m) Return the product of the diagonal matrix formed from the vector rv and the matrix m, i.e., diag_matrix(rv) * m.\nAvailable since 2.0\n \n\nmatrix diag_post_multiply(matrix m, vector v) Return the product of the matrix m and the diagonal matrix formed from the vector v, i.e., m * diag_matrix(v).\nAvailable since 2.0\n \n\nmatrix diag_post_multiply(matrix m, row_vector rv) Return the product of the matrix m and the diagonal matrix formed from the the row vector rv, i.e., m * diag_matrix(rv).\nAvailable since 2.0\n\n\n\n\n\n\n \n\nreal log_sum_exp(vector x) The natural logarithm of the sum of the exponentials of the elements in x\nAvailable since 2.0\n \n\nreal log_sum_exp(row_vector x) The natural logarithm of the sum of the exponentials of the elements in x\nAvailable since 2.0\n \n\nreal log_sum_exp(matrix x) The natural logarithm of the sum of the exponentials of the elements in x\nAvailable since 2.0\n\n\n\n \n\nreal min(vector x) The minimum value in x, or \\(+\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal min(row_vector x) The minimum value in x, or \\(+\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal min(matrix x) The minimum value in x, or \\(+\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal max(vector x) The maximum value in x, or \\(-\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal max(row_vector x) The maximum value in x, or \\(-\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal max(matrix x) The maximum value in x, or \\(-\\infty\\) if x is empty\nAvailable since 2.0\n\n\n\n \n\nreal sum(vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.0\n \n\nreal sum(row_vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.0\n \n\nreal sum(matrix x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.0\n \n\nreal prod(vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.0\n \n\nreal prod(row_vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.0\n \n\nreal prod(matrix x) The product of the values in x, or 1 if x is empty\nAvailable since 2.0\n\n\n\nFull definitions are provided for sample moments in section array reductions.\n \n\nreal mean(vector x) The sample mean of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal mean(row_vector x) The sample mean of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal mean(matrix x) The sample mean of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal variance(vector x) The sample variance of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal variance(row_vector x) The sample variance of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal variance(matrix x) The sample variance of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal sd(vector x) The sample standard deviation of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal sd(row_vector x) The sample standard deviation of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal sd(matrix x) The sample standard deviation of the values in x; see section array reductions for details.\nAvailable since 2.0\n\n\n\nProduces sample quantiles corresponding to the given probabilities. The smallest observation corresponds to a probability of 0 and the largest to a probability of 1.\nImplements algorithm 7 from Hyndman, R. J. and Fan, Y., Sample quantiles in Statistical Packages (R’s default quantile function).\n \n\nreal quantile(data vector x, data real p) The p-th quantile of x\nAvailable since 2.27\n \n\narray[] real quantile(data vector x, data array[] real p) An array containing the quantiles of x given by the array of probabilities p\nAvailable since 2.27\n \n\nreal quantile(data row_vector x, data real p) The p-th quantile of x\nAvailable since 2.27\n \n\narray[] real quantile(data row_vector x, data array[] real p) An array containing the quantiles of x given by the array of probabilities p\nAvailable since 2.27\n\n\n\n\nThe following broadcast functions allow vectors, row vectors and matrices to be created by copying a single element into all of their cells. Matrices may also be created by stacking copies of row vectors vertically or stacking copies of column vectors horizontally.\n \n\nvector rep_vector(real x, int m) Return the size m (column) vector consisting of copies of x.\nAvailable since 2.0\n \n\nrow_vector rep_row_vector(real x, int n) Return the size n row vector consisting of copies of x.\nAvailable since 2.0\n \n\nmatrix rep_matrix(real x, int m, int n) Return the m by n matrix consisting of copies of x.\nAvailable since 2.0\n \n\nmatrix rep_matrix(vector v, int n) Return the m by n matrix consisting of n copies of the (column) vector v of size m.\nAvailable since 2.0\n \n\nmatrix rep_matrix(row_vector rv, int m) Return the m by n matrix consisting of m copies of the row vector rv of size n.\nAvailable since 2.0\nUnlike the situation with array broadcasting (see section array broadcasting), where there is a distinction between integer and real arguments, the following two statements produce the same result for vector broadcasting; row vector and matrix broadcasting behave similarly.\n vector[3] x;\n x = rep_vector(1, 3);\n x = rep_vector(1.0, 3);\nThere are no integer vector or matrix types, so integer values are automatically promoted.\n\n\n \n\nmatrix symmetrize_from_lower_tri(matrix A)\nConstruct a symmetric matrix from the lower triangle of A.\nAvailable since 2.26\n\n\n\n\n \n\nmatrix add_diag(matrix m, row_vector d) Add row_vector d to the diagonal of matrix m.\nAvailable since 2.21\n \n\nmatrix add_diag(matrix m, vector d) Add vector d to the diagonal of matrix m.\nAvailable since 2.21\n \n\nmatrix add_diag(matrix m, real d) Add scalar d to every diagonal element of matrix m.\nAvailable since 2.21\n \n\nvector diagonal(matrix x) The diagonal of the matrix x\nAvailable since 2.0\n \n\nmatrix diag_matrix(vector x) The diagonal matrix with diagonal x\nAvailable since 2.0\nAlthough the diag_matrix function is available, it is unlikely to ever show up in an efficient Stan program. For example, rather than converting a diagonal to a full matrix for use as a covariance matrix,\n y ~ multi_normal(mu, diag_matrix(square(sigma)));\nit is much more efficient to just use a univariate normal, which produces the same density,\n y ~ normal(mu, sigma);\nRather than writing m * diag_matrix(v) where m is a matrix and v is a vector, it is much more efficient to write diag_post_multiply(m, v) (and similarly for pre-multiplication). By the same token, it is better to use quad_form_diag(m, v) rather than quad_form(m, diag_matrix(v)).\n \n\nmatrix identity_matrix(int k) Create an identity matrix of size \\(k \\times k\\)\nAvailable since 2.26\n\n\n\n \n\narray[] real linspaced_array(int n, data real lower, data real upper) Create a real array of length n of equidistantly-spaced elements between lower and upper\nAvailable since 2.24\n \n\narray[] int linspaced_int_array(int n, int lower, int upper) Create a regularly spaced, increasing integer array of length n between lower and upper, inclusively. If (upper - lower) / (n - 1) is less than one, repeat each output (n - 1) / (upper - lower) times. If neither (upper - lower) / (n - 1) or (n - 1) / (upper - lower) are integers, upper is reduced until one of these is true.\nAvailable since 2.26\n \n\nvector linspaced_vector(int n, data real lower, data real upper) Create an n-dimensional vector of equidistantly-spaced elements between lower and upper\nAvailable since 2.24\n \n\nrow_vector linspaced_row_vector(int n, data real lower, data real upper) Create an n-dimensional row-vector of equidistantly-spaced elements between lower and upper\nAvailable since 2.24\n \n\narray[] int one_hot_int_array(int n, int k) Create a one-hot encoded int array of length n with array[k] = 1\nAvailable since 2.26\n \n\narray[] real one_hot_array(int n, int k) Create a one-hot encoded real array of length n with array[k] = 1\nAvailable since 2.24\n \n\nvector one_hot_vector(int n, int k) Create an n-dimensional one-hot encoded vector with vector[k] = 1\nAvailable since 2.24\n \n\nrow_vector one_hot_row_vector(int n, int k) Create an n-dimensional one-hot encoded row-vector with row_vector[k] = 1\nAvailable since 2.24\n \n\narray[] int ones_int_array(int n) Create an int array of length n of all ones\nAvailable since 2.26\n \n\narray[] real ones_array(int n) Create a real array of length n of all ones\nAvailable since 2.26\n \n\nvector ones_vector(int n) Create an n-dimensional vector of all ones\nAvailable since 2.26\n \n\nrow_vector ones_row_vector(int n) Create an n-dimensional row-vector of all ones\nAvailable since 2.26\n \n\narray[] int zeros_int_array(int n) Create an int array of length n of all zeros\nAvailable since 2.26\n \n\narray[] real zeros_array(int n) Create a real array of length n of all zeros\nAvailable since 2.24\n \n\nvector zeros_vector(int n) Create an n-dimensional vector of all zeros\nAvailable since 2.24\n \n\nrow_vector zeros_row_vector(int n) Create an n-dimensional row-vector of all zeros\nAvailable since 2.24\n \n\nvector uniform_simplex(int n) Create an n-dimensional simplex with elements vector[i] = 1 / n for all \\(i \\in 1, \\dots, n\\)\nAvailable since 2.24\n\n\n\nStan provides several functions for generating slices or blocks or diagonal entries for matrices.\n\n\n \n\nvector col(matrix x, int n) The n-th column of matrix x\nAvailable since 2.0\n \n\nrow_vector row(matrix x, int m) The m-th row of matrix x\nAvailable since 2.0\nThe row function is special in that it may be used as an lvalue in an assignment statement (i.e., something to which a value may be assigned). The row function is also special in that the indexing notation x[m] is just an alternative way of writing row(x,m). The col function may not, be used as an lvalue, nor is there an indexing based shorthand for it.\n\n\n\n\n\nBlock operations may be used to extract a sub-block of a matrix.\n \n\nmatrix block(matrix x, int i, int j, int n_rows, int n_cols) Return the submatrix of x that starts at row i and column j and extends n_rows rows and n_cols columns.\nAvailable since 2.0\nThe sub-row and sub-column operations may be used to extract a slice of row or column from a matrix\n \n\nvector sub_col(matrix x, int i, int j, int n_rows) Return the sub-column of x that starts at row i and column j and extends n_rows rows and 1 column.\nAvailable since 2.0\n \n\nrow_vector sub_row(matrix x, int i, int j, int n_cols) Return the sub-row of x that starts at row i and column j and extends 1 row and n_cols columns.\nAvailable since 2.0\n\n\n\nThe head operation extracts the first \\(n\\) elements of a vector and the tail operation the last. The segment operation extracts an arbitrary subvector.\n \n\nvector head(vector v, int n) Return the vector consisting of the first n elements of v.\nAvailable since 2.0\n \n\nrow_vector head(row_vector rv, int n) Return the row vector consisting of the first n elements of rv.\nAvailable since 2.0\n \n\narray[] T head(array[] T sv, int n) Return the array consisting of the first n elements of sv; applies to up to three-dimensional arrays containing any type of elements T.\nAvailable since 2.0\n \n\nvector tail(vector v, int n) Return the vector consisting of the last n elements of v.\nAvailable since 2.0\n \n\nrow_vector tail(row_vector rv, int n) Return the row vector consisting of the last n elements of rv.\nAvailable since 2.0\n \n\narray[] T tail(array[] T sv, int n) Return the array consisting of the last n elements of sv; applies to up to three-dimensional arrays containing any type of elements T.\nAvailable since 2.0\n \n\nvector segment(vector v, int i, int n) Return the vector consisting of the n elements of v starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.0\n \n\nrow_vector segment(row_vector rv, int i, int n) Return the row vector consisting of the n elements of rv starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.10\n \n\narray[] T segment(array[] T sv, int i, int n) Return the array consisting of the n elements of sv starting at i; i.e., elements i through through i + n - 1. Applies to up to three-dimensional arrays containing any type of elements T.\nAvailable since 2.0\n\n\n\n\n\nStan’s matrix and vector concatenation operations append_col and append_row are like the operations cbind and rbind in R.\n\n\n \n\nmatrix append_col(matrix x, matrix y) Combine matrices x and y by column. The matrices must have the same number of rows.\nAvailable since 2.5\n \n\nmatrix append_col(matrix x, vector y) Combine matrix x and vector y by column. The matrix and the vector must have the same number of rows.\nAvailable since 2.5\n \n\nmatrix append_col(vector x, matrix y) Combine vector x and matrix y by column. The vector and the matrix must have the same number of rows.\nAvailable since 2.5\n \n\nmatrix append_col(vector x, vector y) Combine vectors x and y by column. The vectors must have the same number of rows.\nAvailable since 2.5\n \n\nrow_vector append_col(row_vector x, row_vector y) Combine row vectors x and y of any size into another row vector by appending y to the end of x.\nAvailable since 2.5\n \n\nrow_vector append_col(real x, row_vector y) Append x to the front of y, returning another row vector.\nAvailable since 2.12\n \n\nrow_vector append_col(row_vector x, real y) Append y to the end of x, returning another row vector.\nAvailable since 2.12\n\n\n\n \n\nmatrix append_row(matrix x, matrix y) Combine matrices x and y by row. The matrices must have the same number of columns.\nAvailable since 2.5\n \n\nmatrix append_row(matrix x, row_vector y) Combine matrix x and row vector y by row. The matrix and the row vector must have the same number of columns.\nAvailable since 2.5\n \n\nmatrix append_row(row_vector x, matrix y) Combine row vector x and matrix y by row. The row vector and the matrix must have the same number of columns.\nAvailable since 2.5\n \n\nmatrix append_row(row_vector x, row_vector y) Combine row vectors x and y by row. The row vectors must have the same number of columns.\nAvailable since 2.5\n \n\nvector append_row(vector x, vector y) Concatenate vectors x and y of any size into another vector.\nAvailable since 2.5\n \n\nvector append_row(real x, vector y) Append x to the top of y, returning another vector.\nAvailable since 2.12\n \n\nvector append_row(vector x, real y) Append y to the bottom of x, returning another vector.\nAvailable since 2.12\n\n\n\n\n\n\nThe softmax function maps1 \\(y \\in \\mathbb{R}^K\\) to the \\(K\\)-simplex by \\[\\begin{equation*} \\text{softmax}(y) = \\frac{\\exp(y)}\n{\\sum_{k=1}^K \\exp(y_k)}, \\end{equation*}\\] where \\(\\exp(y)\\) is the componentwise exponentiation of \\(y\\). Softmax is usually calculated on the log scale, \\[\\begin{eqnarray*} \\log \\text{softmax}(y) & = & \\ y - \\log \\sum_{k=1}^K\n\\exp(y_k) \\\\[4pt] & = & y - \\mathrm{log\\_sum\\_exp}(y). \\end{eqnarray*}\\] where the vector \\(y\\) minus the scalar \\(\\mathrm{log\\_sum\\_exp}(y)\\) subtracts the scalar from each component of \\(y\\).\nStan provides the following functions for softmax and its log.\n \n\nvector softmax(vector x) The softmax of x\nAvailable since 2.0\n \n\nvector log_softmax(vector x) The natural logarithm of the softmax of x\nAvailable since 2.0\n\n\n\nThe cumulative sum of a sequence \\(x_1,\\ldots,x_N\\) is the sequence \\(y_1,\\ldots,y_N\\), where \\[\\begin{equation*} y_n = \\sum_{m = 1}^{n} x_m. \\end{equation*}\\]\n \n\narray[] int cumulative_sum(array[] int x) The cumulative sum of x\nAvailable since 2.30\n \n\narray[] real cumulative_sum(array[] real x) The cumulative sum of x\nAvailable since 2.0\n \n\nvector cumulative_sum(vector v) The cumulative sum of v\nAvailable since 2.0\n \n\nrow_vector cumulative_sum(row_vector rv) The cumulative sum of rv\nAvailable since 2.0\n\n\n\n\nThe Gaussian process covariance functions compute the covariance between observations in an input data set or the cross-covariance between two input data sets.\nFor one dimensional GPs, the input data sets are arrays of scalars. The covariance matrix is given by \\(K_{ij} = k(x_i, x_j)\\) (where \\(x_i\\) is the \\(i^{th}\\) element of the array \\(x\\)) and the cross-covariance is given by \\(K_{ij} = k(x_i, y_j)\\).\nFor multi-dimensional GPs, the input data sets are arrays of vectors. The covariance matrix is given by \\(K_{ij} = k(\\mathbf{x}_i, \\mathbf{x}_j)\\) (where \\(\\mathbf{x}_i\\) is the \\(i^{th}\\) vector in the array \\(x\\)) and the cross-covariance is given by \\(K_{ij} = k(\\mathbf{x}_i, \\mathbf{y}_j)\\).\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the exponentiated quadratic kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\exp \\left( -\\frac{|\\mathbf{x}_i - \\mathbf{x}_j|^2}{2l^2} \\right)\n\\]\n \n\nmatrix gp_exp_quad_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with exponentiated quadratic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with exponentiated quadratic kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with exponentiated quadratic kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith bias \\(\\sigma_0\\) the dot product kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma_0^2 + \\mathbf{x}_i^T \\mathbf{x}_j\n\\]\n \n\nmatrix gp_dot_prod_cov(array[] real x, real sigma)\nGaussian process covariance with dot product kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_dot_prod_cov(array[] real x1, array[] real x2, real sigma)\nGaussian process cross-covariance of x1 and x2 with dot product kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_dot_prod_cov(vectors x, real sigma)\nGaussian process covariance with dot product kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_dot_prod_cov(vectors x1, vectors x2, real sigma)\nGaussian process cross-covariance of x1 and x2 with dot product kernel in multiple dimensions.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the exponential kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\exp \\left( -\\frac{|\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right)\n\\]\n \n\nmatrix gp_exponential_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with exponential kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponential kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with exponential kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with exponential kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponential kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponential kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the Matern 3/2 kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\left( 1 + \\frac{\\sqrt{3}|\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right) \\exp \\left( -\\frac{\\sqrt{3}|\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right)\n\\]\n \n\nmatrix gp_matern32_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with Matern 3/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with Matern 3/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with Matern 3/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the Matern 5/2 kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\left( 1 + \\frac{\\sqrt{5}|\\mathbf{x}_i - \\mathbf{x}_j|}{l} + \\frac{5 |\\mathbf{x}_i - \\mathbf{x}_j|^2}{3l^2} \\right)\n\\exp \\left( -\\frac{\\sqrt{5} |\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right)\n\\]\n \n\nmatrix gp_matern52_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with Matern 5/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with Matern 5/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with Matern 5/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\), length scale \\(l\\), and period \\(p\\), the periodic kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\exp \\left(-\\frac{2 \\sin^2 \\left( \\pi \\frac{|\\mathbf{x}_i - \\mathbf{x}_j|}{p} \\right) }{l^2} \\right)\n\\]\n \n\nmatrix gp_periodic_cov(array[] real x, real sigma, real length_scale, real period)\nGaussian process covariance with periodic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_periodic_cov(array[] real x1, array[] real x2, real sigma, real length_scale, real period)\nGaussian process cross-covariance of x1 and x2 with periodic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_periodic_cov(vectors x, real sigma, real length_scale, real period)\nGaussian process covariance with periodic kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_periodic_cov(vectors x1, vectors x2, real sigma, real length_scale, real period)\nGaussian process cross-covariance of x1 and x2 with periodic kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\n\n\n\nIn general, it is much more efficient and also more arithmetically stable to use matrix division than to multiply by an inverse. There are specialized forms for lower triangular matrices and for symmetric, positive-definite matrices.\n\n\n \n\nrow_vector operator/(row_vector b, matrix A) The right division of b by A; equivalently b * inverse(A)\nAvailable since 2.0\n \n\nmatrix operator/(matrix B, matrix A) The right division of B by A; equivalently B * inverse(A)\nAvailable since 2.5\n \n\nvector operator\\(matrix A, vector b) The left division of A by b; equivalently inverse(A) * b\nAvailable since 2.18\n \n\nmatrix operator\\(matrix A, matrix B) The left division of A by B; equivalently inverse(A) * B\nAvailable since 2.18\n\n\n\nThere are four division functions which use lower triangular views of a matrix. The lower triangular view of a matrix \\(\\text{tri}(A)\\) is used in the definitions and defined by \\[\\begin{equation*} \\text{tri}(A)[m,n] = \\left\\{\n\\begin{array}{ll} A[m,n] & \\text{if } m \\geq n, \\text{ and} \\\\[4pt] 0\n& \\text{otherwise}. \\end{array} \\right. \\end{equation*}\\] When a lower triangular view of a matrix is used, the elements above the diagonal are ignored.\n \n\nvector mdivide_left_tri_low(matrix A, vector b) The left division of b by a lower-triangular view of A; algebraically equivalent to the less efficient and stable form inverse(tri(A)) * b, where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.12\n \n\nmatrix mdivide_left_tri_low(matrix A, matrix B) The left division of B by a triangular view of A; algebraically equivalent to the less efficient and stable form inverse(tri(A)) * B, where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.5\n \n\nrow_vector mdivide_right_tri_low(row_vector b, matrix A) The right division of b by a triangular view of A; algebraically equivalent to the less efficient and stable form b * inverse(tri(A)), where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.12\n \n\nmatrix mdivide_right_tri_low(matrix B, matrix A) The right division of B by a triangular view of A; algebraically equivalent to the less efficient and stable form B * inverse(tri(A)), where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.5\n\n\n\n\nThere are four division functions which are specialized for efficiency and stability for symmetric positive-definite matrix dividends. If the matrix dividend argument is not symmetric and positive definite, these will reject and print warnings.\n \n\nmatrix mdivide_left_spd(matrix A, vector b) The left division of b by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form inverse(A) * b.\nAvailable since 2.12\n \n\nvector mdivide_left_spd(matrix A, matrix B) The left division of B by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form inverse(A) * B.\nAvailable since 2.12\n \n\nrow_vector mdivide_right_spd(row_vector b, matrix A) The right division of b by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form b *inverse(A).\nAvailable since 2.12\n \n\nmatrix mdivide_right_spd(matrix B, matrix A) The right division of B by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form B * inverse(A).\nAvailable since 2.12\n\n\n\nThe exponential of the matrix \\(A\\) is formally defined by the convergent power series: \\[\\begin{equation*} e^A = \\sum_{n=0}^{\\infty} \\dfrac{A^n}{n!}\n\\end{equation*}\\]\n \n\nmatrix matrix_exp(matrix A) The matrix exponential of A\nAvailable since 2.13\n \n\nmatrix matrix_exp_multiply(matrix A, matrix B) The multiplication of matrix exponential of A and matrix B; algebraically equivalent to the less efficient form matrix_exp(A) * B.\nAvailable since 2.18\n \n\nmatrix scale_matrix_exp_multiply(real t, matrix A, matrix B) The multiplication of matrix exponential of tA and matrix B; algebraically equivalent to the less efficient form matrix_exp(t * A) * B.\nAvailable since 2.18\n\n\n\nReturns the nth power of the specific matrix: \\[\\begin{equation*} M^n = M_1 * ... * M_n \\end{equation*}\\]\n \n\nmatrix matrix_power(matrix A, int B) Matrix A raised to the power B.\nAvailable since 2.24\n\n\n\n\n\n \n\nreal trace(matrix A) The trace of A, or 0 if A is empty; A is not required to be diagonal\nAvailable since 2.0\n\n\n\n \n\nreal determinant(matrix A) The determinant of A\nAvailable since 2.0\n \n\nreal log_determinant(matrix A) The log of the absolute value of the determinant of A\nAvailable since 2.0\nreal log_determinant_spd(matrix A) The log of the absolute value of the determinant of the symmetric, positive-definite matrix A.\nAvailable since 2.30\n\n\n\nIt is almost never a good idea to use matrix inverses directly because they are both inefficient and arithmetically unstable compared to the alternatives. Rather than inverting a matrix m and post-multiplying by a vector or matrix a, as in inverse(m) * a, it is better to code this using matrix division, as in m \\ a. The pre-multiplication case is similar, with b * inverse(m) being more efficiently coded as as b / m. There are also useful special cases for triangular and symmetric, positive-definite matrices that use more efficient solvers.\nWarning: The function inv(m) is the elementwise inverse function, which returns 1 / m[i, j] for each element.\n \n\nmatrix inverse(matrix A) Compute the inverse of A\nAvailable since 2.0\n \n\nmatrix inverse_spd(matrix A) Compute the inverse of A where A is symmetric, positive definite. This version is faster and more arithmetically stable when the input is symmetric and positive definite.\nAvailable since 2.0\n \n\nmatrix chol2inv(matrix L) Compute the inverse of the matrix whose cholesky factorization is L. That is, for \\(A = L L^T\\), return \\(A^{-1}\\).\nAvailable since 2.26\n\n\n\nThe generalized inverse \\(M^+\\) of a matrix \\(M\\) is a matrix that satisfies \\(M M^+ M = M\\). For an invertible, square matrix \\(M\\), \\(M^+\\) is equivalent to \\(M^{-1}\\). The dimensions of \\(M^+\\) are equivalent to the dimensions of \\(M^T\\). The generalized inverse exists for any matrix, so the \\(M\\) may be singular or less than full rank.\nEven though the generalized inverse exists for any arbitrary matrix, the derivatives of this function only exist on matrices of locally constant rank (Golub and Pereyra 1973), meaning, the derivatives do not exist if small perturbations make the matrix change rank. For example, considered the rank of the matrix \\(A\\) as a function of \\(\\epsilon\\):\n\\[\nA = \\left(\n \\begin{array}{cccc}\n 1 + \\epsilon & 2 & 1 \\\\\n 2 & 4 & 2\n \\end{array}\n \\right)\n\\]\nWhen \\(\\epsilon = 0\\), \\(A\\) is rank 1 because the second row is twice the first (and so there is only one linearly independent row). If \\(\\epsilon \\neq 0\\), the rows are no longer linearly dependent, and the matrix is rank 2. This matrix does not have locally constant rank at \\(\\epsilon = 0\\), and so the derivatives do not exist at zero. Because HMC depends on the derivatives existing, this lack of differentiability creates undefined behavior.\n \n\nmatrix generalized_inverse(matrix A) The generalized inverse of A\nAvailable since 2.26\n\n\n\n \n\ncomplex_vector eigenvalues(matrix A) The complex-valued vector of eigenvalues of the matrix A. The eigenvalues are repeated according to their algebraic multiplicity, so there are as many eigenvalues as rows in the matrix. The eigenvalues are not sorted in any particular order.\nAvailable since 2.30\n \n\ncomplex_matrix eigenvectors(matrix A) The matrix with the complex-valued (column) eigenvectors of the matrix A in the same order as returned by the function eigenvalues\nAvailable since 2.30\n \n\ntuple(complex_matrix, complex_vector) eigendecompose(matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the matrix A. This function is equivalent to (eigenvectors(A), eigenvalues(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n \n\nvector eigenvalues_sym(matrix A) The vector of eigenvalues of a symmetric matrix A in ascending order\nAvailable since 2.0\n \n\nmatrix eigenvectors_sym(matrix A) The matrix with the (column) eigenvectors of symmetric matrix A in the same order as returned by the function eigenvalues_sym\nAvailable since 2.0\n \n\ntuple(matrix, vector) eigendecompose_sym(matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the symmetric matrix A. This function is equivalent to (eigenvectors_sym(A), eigenvalues_sym(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\nBecause multiplying an eigenvector by \\(-1\\) results in an eigenvector, eigenvectors returned by a decomposition are only identified up to a sign change. In order to compare the eigenvectors produced by Stan’s eigendecomposition to others, signs may need to be normalized in some way, such as by fixing the sign of a component, or doing comparisons allowing a multiplication by \\(-1\\).\nThe condition number of a symmetric matrix is defined to be the ratio of the largest eigenvalue to the smallest eigenvalue. Large condition numbers lead to difficulty in numerical algorithms such as computing inverses, and thus known as “ill conditioned.” The ratio can even be infinite in the case of singular matrices (i.e., those with eigenvalues of 0).\n\n\n\n \n\nmatrix qr_thin_Q(matrix A) The orthogonal matrix in the thin QR decomposition of A, which implies that the resulting matrix has the same dimensions as A\nAvailable since 2.18\n \n\nmatrix qr_thin_R(matrix A) The upper triangular matrix in the thin QR decomposition of A, which implies that the resulting matrix is square with the same number of columns as A\nAvailable since 2.18\n \n\ntuple(matrix, matrix) qr_thin(matrix A) Returns both portions of the QR decomposition of A. The first element (“Q”) is the orthonormal matrix in the thin QR decomposition and the second element (“R”) is upper triangular. This function is equivalent to (qr_thin_Q(A), qr_thin_R(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n \n\nmatrix qr_Q(matrix A) The orthogonal matrix in the fat QR decomposition of A, which implies that the resulting matrix is square with the same number of rows as A\nAvailable since 2.3\n \n\nmatrix qr_R(matrix A) The upper trapezoidal matrix in the fat QR decomposition of A, which implies that the resulting matrix will be rectangular with the same dimensions as A\nAvailable since 2.3\n \n tuple(matrix, matrix) qr(matrix A) Returns both portions of the QR decomposition of A. The first element (“Q”) is the orthogonal matrix in the fat QR decomposition and the second element (“R”) is upper trapezoidal. This function is equivalent to (qr_Q(A), qr_R(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\nThe thin QR decomposition is always preferable because it will consume much less memory when the input matrix is large than will the fat QR decomposition. Both versions of the decomposition represent the input matrix as \\[\\begin{equation*} A = Q \\, R. \\end{equation*}\\] Multiplying a column of an orthogonal matrix by \\(-1\\) still results in an orthogonal matrix, and you can multiply the corresponding row of the upper trapezoidal matrix by \\(-1\\) without changing the product. Thus, Stan adopts the normalization that the diagonal elements of the upper trapezoidal matrix are strictly positive and the columns of the orthogonal matrix are reflected if necessary. Also, these QR decomposition algorithms do not utilize pivoting and thus may be numerically unstable on input matrices that have less than full rank.\n\n\n\nEvery symmetric, positive-definite matrix (such as a correlation or covariance matrix) has a Cholesky decomposition. If \\(\\Sigma\\) is a symmetric, positive-definite matrix, its Cholesky decomposition is the lower-triangular vector \\(L\\) such that \\[\\begin{equation*} \\Sigma = L \\, L^{\\top}. \\end{equation*}\\]\n \n\nmatrix cholesky_decompose(matrix A) The lower-triangular Cholesky factor of the symmetric positive-definite matrix A\nAvailable since 2.0\n\n\n\nThe matrix A can be decomposed into a diagonal matrix of singular values, D, and matrices of its left and right singular vectors, U and V, \\[\\begin{equation*} A = U D V^T. \\end{equation*}\\] The matrices of singular vectors here are thin. That is for an \\(N\\) by \\(P\\) input A, \\(M = min(N, P)\\), U is size \\(N\\) by \\(M\\) and V is size \\(P\\) by \\(M\\).\n \n\nvector singular_values(matrix A) The singular values of A in descending order\nAvailable since 2.0\n \n\nmatrix svd_U(matrix A) The left-singular vectors of A\nAvailable since 2.26\n \n\nmatrix svd_V(matrix A) The right-singular vectors of A\nAvailable since 2.26\n \n\ntuple(matrix, vector, matrix) svd(matrix A) Returns a tuple containing the left-singular vectors of A, the singular values of A in descending order, and the right-singular values of A. This function is equivalent to (svd_U(A), singular_values(A), svd_V(A)) but with a lower computational cost due to the shared work between the different components.\nAvailable since 2.33\n\n\n\n\n\nSee the sorting functions section for examples of how the functions work.\n \n\nvector sort_asc(vector v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\nrow_vector sort_asc(row_vector v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\nvector sort_desc(vector v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\nrow_vector sort_desc(row_vector v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\narray[] int sort_indices_asc(vector v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_asc(row_vector v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(vector v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(row_vector v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\nint rank(vector v, int s) Number of components of v less than v[s]\nAvailable since 2.0\n \n\nint rank(row_vector v, int s) Number of components of v less than v[s]\nAvailable since 2.0\n\n\n\n \n\nvector reverse(vector v) Return a new vector containing the elements of the argument in reverse order.\nAvailable since 2.23\n \n\nrow_vector reverse(row_vector v) Return a new row vector containing the elements of the argument in reverse order.\nAvailable since 2.23", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#integer-valued-matrix-size-functions", + "href": "functions-reference/matrix_operations.html#integer-valued-matrix-size-functions", + "title": "Matrix Operations", + "section": "", + "text": "int num_elements(vector x) The total number of elements in the vector x (same as function rows)\nAvailable since 2.5\n \n\nint num_elements(row_vector x) The total number of elements in the vector x (same as function cols)\nAvailable since 2.5\n \n\nint num_elements(matrix x) The total number of elements in the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then num_elements(x) is 15\nAvailable since 2.5\n \n\nint rows(vector x) The number of rows in the vector x\nAvailable since 2.0\n \n\nint rows(row_vector x) The number of rows in the row vector x, namely 1\nAvailable since 2.0\n \n\nint rows(matrix x) The number of rows in the matrix x\nAvailable since 2.0\n \n\nint cols(vector x) The number of columns in the vector x, namely 1\nAvailable since 2.0\n \n\nint cols(row_vector x) The number of columns in the row vector x\nAvailable since 2.0\n \n\nint cols(matrix x) The number of columns in the matrix x\nAvailable since 2.0\n \n\nint size(vector x) The size of x, i.e., the number of elements\nAvailable since 2.26\n \n\nint size(row_vector x) The size of x, i.e., the number of elements\nAvailable since 2.26\n \n\nint size(matrix x) The size of the matrix x. For example, if x is a \\(5 \\times 3\\) matrix, then size(x) is 15\nAvailable since 2.26", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#matrix-arithmetic-operators", + "href": "functions-reference/matrix_operations.html#matrix-arithmetic-operators", + "title": "Matrix Operations", + "section": "", + "text": "Stan supports the basic matrix operations using infix, prefix and postfix operations. This section lists the operations supported by Stan along with their argument and result types.\n\n\n \n\nvector operator-(vector x) The negation of the vector x.\nAvailable since 2.0\n \n\nrow_vector operator-(row_vector x) The negation of the row vector x.\nAvailable since 2.0\n \n\nmatrix operator-(matrix x) The negation of the matrix x.\nAvailable since 2.0\n \n\nT operator-(T x) Vectorized version of operator-. If T x is a (possibly nested) array of matrix types, -x is the same shape array where each individual value is negated.\nAvailable since 2.31\n\n\n\n \n\nvector operator+(vector x, vector y) The sum of the vectors x and y.\nAvailable since 2.0\n \n\nrow_vector operator+(row_vector x, row_vector y) The sum of the row vectors x and y.\nAvailable since 2.0\n \n\nmatrix operator+(matrix x, matrix y) The sum of the matrices x and y\nAvailable since 2.0\n \n\nvector operator-(vector x, vector y) The difference between the vectors x and y.\nAvailable since 2.0\n \n\nrow_vector operator-(row_vector x, row_vector y) The difference between the row vectors x and y\nAvailable since 2.0\n \n\nmatrix operator-(matrix x, matrix y) The difference between the matrices x and y\nAvailable since 2.0\n \n\nvector operator*(real x, vector y) The product of the scalar x and vector y\nAvailable since 2.0\n \n\nrow_vector operator*(real x, row_vector y) The product of the scalar x and the row vector y\nAvailable since 2.0\n \n\nmatrix operator*(real x, matrix y) The product of the scalar x and the matrix y\nAvailable since 2.0\n \n\nvector operator*(vector x, real y) The product of the scalar y and vector x\nAvailable since 2.0\n \n\nmatrix operator*(vector x, row_vector y) The product of the vector x and row vector y\nAvailable since 2.0\n \n\nrow_vector operator*(row_vector x, real y) The product of the scalar y and row vector x\nAvailable since 2.0\n \n\nreal operator*(row_vector x, vector y) The product of the row vector x and vector y\nAvailable since 2.0\n \n\nrow_vector operator*(row_vector x, matrix y) The product of the row vector x and matrix y\nAvailable since 2.0\n \n\nmatrix operator*(matrix x, real y) The product of the scalar y and matrix x\nAvailable since 2.0\n \n\nvector operator*(matrix x, vector y) The product of the matrix x and vector y\nAvailable since 2.0\n \n\nmatrix operator*(matrix x, matrix y) The product of the matrices x and y\nAvailable since 2.0\n\n\n\n \n\nvector operator+(vector x, real y) The result of adding y to every entry in the vector x\nAvailable since 2.0\n \n\nvector operator+(real x, vector y) The result of adding x to every entry in the vector y\nAvailable since 2.0\n \n\nrow_vector operator+(row_vector x, real y) The result of adding y to every entry in the row vector x\nAvailable since 2.0\n \n\nrow_vector operator+(real x, row_vector y) The result of adding x to every entry in the row vector y\nAvailable since 2.0\n \n\nmatrix operator+(matrix x, real y) The result of adding y to every entry in the matrix x\nAvailable since 2.0\n \n\nmatrix operator+(real x, matrix y) The result of adding x to every entry in the matrix y\nAvailable since 2.0\n \n\nvector operator-(vector x, real y) The result of subtracting y from every entry in the vector x\nAvailable since 2.0\n \n\nvector operator-(real x, vector y) The result of adding x to every entry in the negation of the vector y\nAvailable since 2.0\n \n\nrow_vector operator-(row_vector x, real y) The result of subtracting y from every entry in the row vector x\nAvailable since 2.0\n \n\nrow_vector operator-(real x, row_vector y) The result of adding x to every entry in the negation of the row vector y\nAvailable since 2.0\n \n\nmatrix operator-(matrix x, real y) The result of subtracting y from every entry in the matrix x\nAvailable since 2.0\n \n\nmatrix operator-(real x, matrix y) The result of adding x to every entry in negation of the matrix y\nAvailable since 2.0\n \n\nvector operator/(vector x, real y) The result of dividing each entry in the vector x by y\nAvailable since 2.0\n \n\nrow_vector operator/(row_vector x, real y) The result of dividing each entry in the row vector x by y\nAvailable since 2.0\n \n\nmatrix operator/(matrix x, real y) The result of dividing each entry in the matrix x by y\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#transposition-operator", + "href": "functions-reference/matrix_operations.html#transposition-operator", + "title": "Matrix Operations", + "section": "", + "text": "Matrix transposition is represented using a postfix operator.\n \n\nmatrix operator'(matrix x) The transpose of the matrix x, written as x'\nAvailable since 2.0\n \n\nrow_vector operator'(vector x) The transpose of the vector x, written as x'\nAvailable since 2.0\n \n\nvector operator'(row_vector x) The transpose of the row vector x, written as x'\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#elementwise-functions", + "href": "functions-reference/matrix_operations.html#elementwise-functions", + "title": "Matrix Operations", + "section": "", + "text": "Elementwise functions apply a function to each element of a vector or matrix, returning a result of the same shape as the argument. There are many functions that are vectorized in addition to the ad hoc cases listed in this section; see section function vectorization for the general cases.\n \n\nvector operator.*(vector x, vector y) The elementwise product of y and x\nAvailable since 2.0\n \n\nrow_vector operator.*(row_vector x, row_vector y) The elementwise product of y and x\nAvailable since 2.0\n \n\nmatrix operator.*(matrix x, matrix y) The elementwise product of y and x\nAvailable since 2.0\n \n\nvector operator./(vector x, vector y) The elementwise quotient of y and x\nAvailable since 2.0\n \n\nvector operator./(vector x, real y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nvector operator./(real x, vector y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nrow_vector operator./(row_vector x, row_vector y) The elementwise quotient of y and x\nAvailable since 2.0\n \n\nrow_vector operator./(row_vector x, real y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nrow_vector operator./(real x, row_vector y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nmatrix operator./(matrix x, matrix y) The elementwise quotient of y and x\nAvailable since 2.0\n \n\nmatrix operator./(matrix x, real y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nmatrix operator./(real x, matrix y) The elementwise quotient of y and x\nAvailable since 2.4\n \n\nvector operator.^(vector x, vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nvector operator.^(vector x, real y) The elementwise power of y and x\nAvailable since 2.24\n \n\nvector operator.^(real x, vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nrow_vector operator.^(row_vector x, row_vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nrow_vector operator.^(row_vector x, real y) The elementwise power of y and x\nAvailable since 2.24\n \n\nrow_vector operator.^(real x, row_vector y) The elementwise power of y and x\nAvailable since 2.24\n \n\nmatrix operator.^(matrix x, matrix y) The elementwise power of y and x\nAvailable since 2.24\n \n\nmatrix operator.^(matrix x, real y) The elementwise power of y and x\nAvailable since 2.24\n \n\nmatrix operator.^(real x, matrix y) The elementwise power of y and x\nAvailable since 2.24", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#dot-products-and-specialized-products", + "href": "functions-reference/matrix_operations.html#dot-products-and-specialized-products", + "title": "Matrix Operations", + "section": "", + "text": "real dot_product(vector x, vector y) The dot product of x and y\nAvailable since 2.0\n \n\nreal dot_product(vector x, row_vector y) The dot product of x and y\nAvailable since 2.0\n \n\nreal dot_product(row_vector x, vector y) The dot product of x and y\nAvailable since 2.0\n \n\nreal dot_product(row_vector x, row_vector y) The dot product of x and y\nAvailable since 2.0\n \n\nrow_vector columns_dot_product(vector x, vector y) The dot product of the columns of x and y\nAvailable since 2.0\n \n\nrow_vector columns_dot_product(row_vector x, row_vector y) The dot product of the columns of x and y\nAvailable since 2.0\n \n\nrow_vector columns_dot_product(matrix x, matrix y) The dot product of the columns of x and y\nAvailable since 2.0\n \n\nvector rows_dot_product(vector x, vector y) The dot product of the rows of x and y\nAvailable since 2.0\n \n\nvector rows_dot_product(row_vector x, row_vector y) The dot product of the rows of x and y\nAvailable since 2.0\n \n\nvector rows_dot_product(matrix x, matrix y) The dot product of the rows of x and y\nAvailable since 2.0\n \n\nreal dot_self(vector x) The dot product of the vector x with itself\nAvailable since 2.0\n \n\nreal dot_self(row_vector x) The dot product of the row vector x with itself\nAvailable since 2.0\n \n\nrow_vector columns_dot_self(vector x) The dot product of the columns of x with themselves\nAvailable since 2.0\n \n\nrow_vector columns_dot_self(row_vector x) The dot product of the columns of x with themselves\nAvailable since 2.0\n \n\nrow_vector columns_dot_self(matrix x) The dot product of the columns of x with themselves\nAvailable since 2.0\n \n\nvector rows_dot_self(vector x) The dot product of the rows of x with themselves\nAvailable since 2.0\n \n\nvector rows_dot_self(row_vector x) The dot product of the rows of x with themselves\nAvailable since 2.0\n \n\nvector rows_dot_self(matrix x) The dot product of the rows of x with themselves\nAvailable since 2.0\n\n\n \n\nmatrix tcrossprod(matrix x) The product of x postmultiplied by its own transpose, similar to the tcrossprod(x) function in R. The result is a symmetric matrix \\(\\text{x}\\,\\text{x}^{\\top}\\).\nAvailable since 2.0\n \n\nmatrix crossprod(matrix x) The product of x premultiplied by its own transpose, similar to the crossprod(x) function in R. The result is a symmetric matrix \\(\\text{x}^{\\top}\\,\\text{x}\\).\nAvailable since 2.0\nThe following functions all provide shorthand forms for common expressions, which are also much more efficient.\n \n\nmatrix quad_form(matrix A, matrix B) The quadratic form, i.e., B' * A * B.\nAvailable since 2.0\n \n\nreal quad_form(matrix A, vector B) The quadratic form, i.e., B' * A * B.\nAvailable since 2.0\n \n\nmatrix quad_form_diag(matrix m, vector v) The quadratic form using the column vector v as a diagonal matrix, i.e., diag_matrix(v) * m * diag_matrix(v).\nAvailable since 2.3\n \n\nmatrix quad_form_diag(matrix m, row_vector rv) The quadratic form using the row vector rv as a diagonal matrix, i.e., diag_matrix(rv) * m * diag_matrix(rv).\nAvailable since 2.3\n \n\nmatrix quad_form_sym(matrix A, matrix B) Similarly to quad_form, gives B' * A * B, but additionally checks if A is symmetric and ensures that the result is also symmetric.\nAvailable since 2.3\n \n\nreal quad_form_sym(matrix A, vector B) Similarly to quad_form, gives B' * A * B, but additionally checks if A is symmetric and ensures that the result is also symmetric.\nAvailable since 2.3\n \n\nreal trace_dot(matrix A, matrix B) The trace of the matrix product, i.e., trace(A * B).\nAvailable since 2.39\n \n\nreal trace_quad_form(matrix A, matrix B) The trace of the quadratic form, i.e., trace(B' * A * B).\nAvailable since 2.0\n \n\nreal trace_quad_form(matrix A, vector B) The trace of the quadratic form, i.e., trace(B' * A * B).\nAvailable since 2.0\n \n\nreal trace_gen_quad_form(matrix D, matrix A, matrix B) The trace of a generalized quadratic form, i.e., trace(D * B' * A * B).\nAvailable since 2.0\n \n\nmatrix multiply_lower_tri_self_transpose(matrix x) The product of the lower triangular portion of x (including the diagonal) times its own transpose; that is, if L is a matrix of the same dimensions as x with L(m,n) equal to x(m,n) for \\(\\text{n}\n\\leq \\text{m}\\) and L(m,n) equal to 0 if \\(\\text{n} > \\text{m}\\), the result is the symmetric matrix \\(\\text{L}\\,\\text{L}^{\\top}\\). This is a specialization of tcrossprod(x) for lower-triangular matrices. The input matrix does not need to be square.\nAvailable since 2.0\n \n\nmatrix diag_pre_multiply(vector v, matrix m) Return the product of the diagonal matrix formed from the vector v and the matrix m, i.e., diag_matrix(v) * m.\nAvailable since 2.0\n \n\nmatrix diag_pre_multiply(row_vector rv, matrix m) Return the product of the diagonal matrix formed from the vector rv and the matrix m, i.e., diag_matrix(rv) * m.\nAvailable since 2.0\n \n\nmatrix diag_post_multiply(matrix m, vector v) Return the product of the matrix m and the diagonal matrix formed from the vector v, i.e., m * diag_matrix(v).\nAvailable since 2.0\n \n\nmatrix diag_post_multiply(matrix m, row_vector rv) Return the product of the matrix m and the diagonal matrix formed from the the row vector rv, i.e., m * diag_matrix(rv).\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#reductions", + "href": "functions-reference/matrix_operations.html#reductions", + "title": "Matrix Operations", + "section": "", + "text": "real log_sum_exp(vector x) The natural logarithm of the sum of the exponentials of the elements in x\nAvailable since 2.0\n \n\nreal log_sum_exp(row_vector x) The natural logarithm of the sum of the exponentials of the elements in x\nAvailable since 2.0\n \n\nreal log_sum_exp(matrix x) The natural logarithm of the sum of the exponentials of the elements in x\nAvailable since 2.0\n\n\n\n \n\nreal min(vector x) The minimum value in x, or \\(+\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal min(row_vector x) The minimum value in x, or \\(+\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal min(matrix x) The minimum value in x, or \\(+\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal max(vector x) The maximum value in x, or \\(-\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal max(row_vector x) The maximum value in x, or \\(-\\infty\\) if x is empty\nAvailable since 2.0\n \n\nreal max(matrix x) The maximum value in x, or \\(-\\infty\\) if x is empty\nAvailable since 2.0\n\n\n\n \n\nreal sum(vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.0\n \n\nreal sum(row_vector x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.0\n \n\nreal sum(matrix x) The sum of the values in x, or 0 if x is empty\nAvailable since 2.0\n \n\nreal prod(vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.0\n \n\nreal prod(row_vector x) The product of the values in x, or 1 if x is empty\nAvailable since 2.0\n \n\nreal prod(matrix x) The product of the values in x, or 1 if x is empty\nAvailable since 2.0\n\n\n\nFull definitions are provided for sample moments in section array reductions.\n \n\nreal mean(vector x) The sample mean of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal mean(row_vector x) The sample mean of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal mean(matrix x) The sample mean of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal variance(vector x) The sample variance of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal variance(row_vector x) The sample variance of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal variance(matrix x) The sample variance of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal sd(vector x) The sample standard deviation of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal sd(row_vector x) The sample standard deviation of the values in x; see section array reductions for details.\nAvailable since 2.0\n \n\nreal sd(matrix x) The sample standard deviation of the values in x; see section array reductions for details.\nAvailable since 2.0\n\n\n\nProduces sample quantiles corresponding to the given probabilities. The smallest observation corresponds to a probability of 0 and the largest to a probability of 1.\nImplements algorithm 7 from Hyndman, R. J. and Fan, Y., Sample quantiles in Statistical Packages (R’s default quantile function).\n \n\nreal quantile(data vector x, data real p) The p-th quantile of x\nAvailable since 2.27\n \n\narray[] real quantile(data vector x, data array[] real p) An array containing the quantiles of x given by the array of probabilities p\nAvailable since 2.27\n \n\nreal quantile(data row_vector x, data real p) The p-th quantile of x\nAvailable since 2.27\n \n\narray[] real quantile(data row_vector x, data array[] real p) An array containing the quantiles of x given by the array of probabilities p\nAvailable since 2.27", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#matrix-broadcast", + "href": "functions-reference/matrix_operations.html#matrix-broadcast", + "title": "Matrix Operations", + "section": "", + "text": "The following broadcast functions allow vectors, row vectors and matrices to be created by copying a single element into all of their cells. Matrices may also be created by stacking copies of row vectors vertically or stacking copies of column vectors horizontally.\n \n\nvector rep_vector(real x, int m) Return the size m (column) vector consisting of copies of x.\nAvailable since 2.0\n \n\nrow_vector rep_row_vector(real x, int n) Return the size n row vector consisting of copies of x.\nAvailable since 2.0\n \n\nmatrix rep_matrix(real x, int m, int n) Return the m by n matrix consisting of copies of x.\nAvailable since 2.0\n \n\nmatrix rep_matrix(vector v, int n) Return the m by n matrix consisting of n copies of the (column) vector v of size m.\nAvailable since 2.0\n \n\nmatrix rep_matrix(row_vector rv, int m) Return the m by n matrix consisting of m copies of the row vector rv of size n.\nAvailable since 2.0\nUnlike the situation with array broadcasting (see section array broadcasting), where there is a distinction between integer and real arguments, the following two statements produce the same result for vector broadcasting; row vector and matrix broadcasting behave similarly.\n vector[3] x;\n x = rep_vector(1, 3);\n x = rep_vector(1.0, 3);\nThere are no integer vector or matrix types, so integer values are automatically promoted.\n\n\n \n\nmatrix symmetrize_from_lower_tri(matrix A)\nConstruct a symmetric matrix from the lower triangle of A.\nAvailable since 2.26", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#diagonal-matrix-functions", + "href": "functions-reference/matrix_operations.html#diagonal-matrix-functions", + "title": "Matrix Operations", + "section": "", + "text": "matrix add_diag(matrix m, row_vector d) Add row_vector d to the diagonal of matrix m.\nAvailable since 2.21\n \n\nmatrix add_diag(matrix m, vector d) Add vector d to the diagonal of matrix m.\nAvailable since 2.21\n \n\nmatrix add_diag(matrix m, real d) Add scalar d to every diagonal element of matrix m.\nAvailable since 2.21\n \n\nvector diagonal(matrix x) The diagonal of the matrix x\nAvailable since 2.0\n \n\nmatrix diag_matrix(vector x) The diagonal matrix with diagonal x\nAvailable since 2.0\nAlthough the diag_matrix function is available, it is unlikely to ever show up in an efficient Stan program. For example, rather than converting a diagonal to a full matrix for use as a covariance matrix,\n y ~ multi_normal(mu, diag_matrix(square(sigma)));\nit is much more efficient to just use a univariate normal, which produces the same density,\n y ~ normal(mu, sigma);\nRather than writing m * diag_matrix(v) where m is a matrix and v is a vector, it is much more efficient to write diag_post_multiply(m, v) (and similarly for pre-multiplication). By the same token, it is better to use quad_form_diag(m, v) rather than quad_form(m, diag_matrix(v)).\n \n\nmatrix identity_matrix(int k) Create an identity matrix of size \\(k \\times k\\)\nAvailable since 2.26", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#container-construction", + "href": "functions-reference/matrix_operations.html#container-construction", + "title": "Matrix Operations", + "section": "", + "text": "array[] real linspaced_array(int n, data real lower, data real upper) Create a real array of length n of equidistantly-spaced elements between lower and upper\nAvailable since 2.24\n \n\narray[] int linspaced_int_array(int n, int lower, int upper) Create a regularly spaced, increasing integer array of length n between lower and upper, inclusively. If (upper - lower) / (n - 1) is less than one, repeat each output (n - 1) / (upper - lower) times. If neither (upper - lower) / (n - 1) or (n - 1) / (upper - lower) are integers, upper is reduced until one of these is true.\nAvailable since 2.26\n \n\nvector linspaced_vector(int n, data real lower, data real upper) Create an n-dimensional vector of equidistantly-spaced elements between lower and upper\nAvailable since 2.24\n \n\nrow_vector linspaced_row_vector(int n, data real lower, data real upper) Create an n-dimensional row-vector of equidistantly-spaced elements between lower and upper\nAvailable since 2.24\n \n\narray[] int one_hot_int_array(int n, int k) Create a one-hot encoded int array of length n with array[k] = 1\nAvailable since 2.26\n \n\narray[] real one_hot_array(int n, int k) Create a one-hot encoded real array of length n with array[k] = 1\nAvailable since 2.24\n \n\nvector one_hot_vector(int n, int k) Create an n-dimensional one-hot encoded vector with vector[k] = 1\nAvailable since 2.24\n \n\nrow_vector one_hot_row_vector(int n, int k) Create an n-dimensional one-hot encoded row-vector with row_vector[k] = 1\nAvailable since 2.24\n \n\narray[] int ones_int_array(int n) Create an int array of length n of all ones\nAvailable since 2.26\n \n\narray[] real ones_array(int n) Create a real array of length n of all ones\nAvailable since 2.26\n \n\nvector ones_vector(int n) Create an n-dimensional vector of all ones\nAvailable since 2.26\n \n\nrow_vector ones_row_vector(int n) Create an n-dimensional row-vector of all ones\nAvailable since 2.26\n \n\narray[] int zeros_int_array(int n) Create an int array of length n of all zeros\nAvailable since 2.26\n \n\narray[] real zeros_array(int n) Create a real array of length n of all zeros\nAvailable since 2.24\n \n\nvector zeros_vector(int n) Create an n-dimensional vector of all zeros\nAvailable since 2.24\n \n\nrow_vector zeros_row_vector(int n) Create an n-dimensional row-vector of all zeros\nAvailable since 2.24\n \n\nvector uniform_simplex(int n) Create an n-dimensional simplex with elements vector[i] = 1 / n for all \\(i \\in 1, \\dots, n\\)\nAvailable since 2.24", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#slicing-and-blocking-functions", + "href": "functions-reference/matrix_operations.html#slicing-and-blocking-functions", + "title": "Matrix Operations", + "section": "", + "text": "Stan provides several functions for generating slices or blocks or diagonal entries for matrices.\n\n\n \n\nvector col(matrix x, int n) The n-th column of matrix x\nAvailable since 2.0\n \n\nrow_vector row(matrix x, int m) The m-th row of matrix x\nAvailable since 2.0\nThe row function is special in that it may be used as an lvalue in an assignment statement (i.e., something to which a value may be assigned). The row function is also special in that the indexing notation x[m] is just an alternative way of writing row(x,m). The col function may not, be used as an lvalue, nor is there an indexing based shorthand for it.\n\n\n\n\n\nBlock operations may be used to extract a sub-block of a matrix.\n \n\nmatrix block(matrix x, int i, int j, int n_rows, int n_cols) Return the submatrix of x that starts at row i and column j and extends n_rows rows and n_cols columns.\nAvailable since 2.0\nThe sub-row and sub-column operations may be used to extract a slice of row or column from a matrix\n \n\nvector sub_col(matrix x, int i, int j, int n_rows) Return the sub-column of x that starts at row i and column j and extends n_rows rows and 1 column.\nAvailable since 2.0\n \n\nrow_vector sub_row(matrix x, int i, int j, int n_cols) Return the sub-row of x that starts at row i and column j and extends 1 row and n_cols columns.\nAvailable since 2.0\n\n\n\nThe head operation extracts the first \\(n\\) elements of a vector and the tail operation the last. The segment operation extracts an arbitrary subvector.\n \n\nvector head(vector v, int n) Return the vector consisting of the first n elements of v.\nAvailable since 2.0\n \n\nrow_vector head(row_vector rv, int n) Return the row vector consisting of the first n elements of rv.\nAvailable since 2.0\n \n\narray[] T head(array[] T sv, int n) Return the array consisting of the first n elements of sv; applies to up to three-dimensional arrays containing any type of elements T.\nAvailable since 2.0\n \n\nvector tail(vector v, int n) Return the vector consisting of the last n elements of v.\nAvailable since 2.0\n \n\nrow_vector tail(row_vector rv, int n) Return the row vector consisting of the last n elements of rv.\nAvailable since 2.0\n \n\narray[] T tail(array[] T sv, int n) Return the array consisting of the last n elements of sv; applies to up to three-dimensional arrays containing any type of elements T.\nAvailable since 2.0\n \n\nvector segment(vector v, int i, int n) Return the vector consisting of the n elements of v starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.0\n \n\nrow_vector segment(row_vector rv, int i, int n) Return the row vector consisting of the n elements of rv starting at i; i.e., elements i through through i + n - 1.\nAvailable since 2.10\n \n\narray[] T segment(array[] T sv, int i, int n) Return the array consisting of the n elements of sv starting at i; i.e., elements i through through i + n - 1. Applies to up to three-dimensional arrays containing any type of elements T.\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#matrix-concatenation", + "href": "functions-reference/matrix_operations.html#matrix-concatenation", + "title": "Matrix Operations", + "section": "", + "text": "Stan’s matrix and vector concatenation operations append_col and append_row are like the operations cbind and rbind in R.\n\n\n \n\nmatrix append_col(matrix x, matrix y) Combine matrices x and y by column. The matrices must have the same number of rows.\nAvailable since 2.5\n \n\nmatrix append_col(matrix x, vector y) Combine matrix x and vector y by column. The matrix and the vector must have the same number of rows.\nAvailable since 2.5\n \n\nmatrix append_col(vector x, matrix y) Combine vector x and matrix y by column. The vector and the matrix must have the same number of rows.\nAvailable since 2.5\n \n\nmatrix append_col(vector x, vector y) Combine vectors x and y by column. The vectors must have the same number of rows.\nAvailable since 2.5\n \n\nrow_vector append_col(row_vector x, row_vector y) Combine row vectors x and y of any size into another row vector by appending y to the end of x.\nAvailable since 2.5\n \n\nrow_vector append_col(real x, row_vector y) Append x to the front of y, returning another row vector.\nAvailable since 2.12\n \n\nrow_vector append_col(row_vector x, real y) Append y to the end of x, returning another row vector.\nAvailable since 2.12\n\n\n\n \n\nmatrix append_row(matrix x, matrix y) Combine matrices x and y by row. The matrices must have the same number of columns.\nAvailable since 2.5\n \n\nmatrix append_row(matrix x, row_vector y) Combine matrix x and row vector y by row. The matrix and the row vector must have the same number of columns.\nAvailable since 2.5\n \n\nmatrix append_row(row_vector x, matrix y) Combine row vector x and matrix y by row. The row vector and the matrix must have the same number of columns.\nAvailable since 2.5\n \n\nmatrix append_row(row_vector x, row_vector y) Combine row vectors x and y by row. The row vectors must have the same number of columns.\nAvailable since 2.5\n \n\nvector append_row(vector x, vector y) Concatenate vectors x and y of any size into another vector.\nAvailable since 2.5\n \n\nvector append_row(real x, vector y) Append x to the top of y, returning another vector.\nAvailable since 2.12\n \n\nvector append_row(vector x, real y) Append y to the bottom of x, returning another vector.\nAvailable since 2.12", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#softmax", + "href": "functions-reference/matrix_operations.html#softmax", + "title": "Matrix Operations", + "section": "", + "text": "The softmax function maps1 \\(y \\in \\mathbb{R}^K\\) to the \\(K\\)-simplex by \\[\\begin{equation*} \\text{softmax}(y) = \\frac{\\exp(y)}\n{\\sum_{k=1}^K \\exp(y_k)}, \\end{equation*}\\] where \\(\\exp(y)\\) is the componentwise exponentiation of \\(y\\). Softmax is usually calculated on the log scale, \\[\\begin{eqnarray*} \\log \\text{softmax}(y) & = & \\ y - \\log \\sum_{k=1}^K\n\\exp(y_k) \\\\[4pt] & = & y - \\mathrm{log\\_sum\\_exp}(y). \\end{eqnarray*}\\] where the vector \\(y\\) minus the scalar \\(\\mathrm{log\\_sum\\_exp}(y)\\) subtracts the scalar from each component of \\(y\\).\nStan provides the following functions for softmax and its log.\n \n\nvector softmax(vector x) The softmax of x\nAvailable since 2.0\n \n\nvector log_softmax(vector x) The natural logarithm of the softmax of x\nAvailable since 2.0\n\n\n\nThe cumulative sum of a sequence \\(x_1,\\ldots,x_N\\) is the sequence \\(y_1,\\ldots,y_N\\), where \\[\\begin{equation*} y_n = \\sum_{m = 1}^{n} x_m. \\end{equation*}\\]\n \n\narray[] int cumulative_sum(array[] int x) The cumulative sum of x\nAvailable since 2.30\n \n\narray[] real cumulative_sum(array[] real x) The cumulative sum of x\nAvailable since 2.0\n \n\nvector cumulative_sum(vector v) The cumulative sum of v\nAvailable since 2.0\n \n\nrow_vector cumulative_sum(row_vector rv) The cumulative sum of rv\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#gaussian-process-covariance-functions", + "href": "functions-reference/matrix_operations.html#gaussian-process-covariance-functions", + "title": "Matrix Operations", + "section": "", + "text": "The Gaussian process covariance functions compute the covariance between observations in an input data set or the cross-covariance between two input data sets.\nFor one dimensional GPs, the input data sets are arrays of scalars. The covariance matrix is given by \\(K_{ij} = k(x_i, x_j)\\) (where \\(x_i\\) is the \\(i^{th}\\) element of the array \\(x\\)) and the cross-covariance is given by \\(K_{ij} = k(x_i, y_j)\\).\nFor multi-dimensional GPs, the input data sets are arrays of vectors. The covariance matrix is given by \\(K_{ij} = k(\\mathbf{x}_i, \\mathbf{x}_j)\\) (where \\(\\mathbf{x}_i\\) is the \\(i^{th}\\) vector in the array \\(x\\)) and the cross-covariance is given by \\(K_{ij} = k(\\mathbf{x}_i, \\mathbf{y}_j)\\).\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the exponentiated quadratic kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\exp \\left( -\\frac{|\\mathbf{x}_i - \\mathbf{x}_j|^2}{2l^2} \\right)\n\\]\n \n\nmatrix gp_exp_quad_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with exponentiated quadratic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with exponentiated quadratic kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with exponentiated quadratic kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exp_quad_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponentiated quadratic kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith bias \\(\\sigma_0\\) the dot product kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma_0^2 + \\mathbf{x}_i^T \\mathbf{x}_j\n\\]\n \n\nmatrix gp_dot_prod_cov(array[] real x, real sigma)\nGaussian process covariance with dot product kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_dot_prod_cov(array[] real x1, array[] real x2, real sigma)\nGaussian process cross-covariance of x1 and x2 with dot product kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_dot_prod_cov(vectors x, real sigma)\nGaussian process covariance with dot product kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_dot_prod_cov(vectors x1, vectors x2, real sigma)\nGaussian process cross-covariance of x1 and x2 with dot product kernel in multiple dimensions.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the exponential kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\exp \\left( -\\frac{|\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right)\n\\]\n \n\nmatrix gp_exponential_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with exponential kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponential kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with exponential kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with exponential kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponential kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_exponential_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with exponential kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the Matern 3/2 kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\left( 1 + \\frac{\\sqrt{3}|\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right) \\exp \\left( -\\frac{\\sqrt{3}|\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right)\n\\]\n \n\nmatrix gp_matern32_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with Matern 3/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with Matern 3/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with Matern 3/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern32_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 3/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\) and length scale \\(l\\), the Matern 5/2 kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\left( 1 + \\frac{\\sqrt{5}|\\mathbf{x}_i - \\mathbf{x}_j|}{l} + \\frac{5 |\\mathbf{x}_i - \\mathbf{x}_j|^2}{3l^2} \\right)\n\\exp \\left( -\\frac{\\sqrt{5} |\\mathbf{x}_i - \\mathbf{x}_j|}{l} \\right)\n\\]\n \n\nmatrix gp_matern52_cov(array[] real x, real sigma, real length_scale)\nGaussian process covariance with Matern 5/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(array[] real x1, array[] real x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x, real sigma, real length_scale)\nGaussian process covariance with Matern 5/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x, real sigma, array[] real length_scale)\nGaussian process covariance with Matern 5/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x1, vectors x2, real sigma, real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_matern52_cov(vectors x1, vectors x2, real sigma, array[] real length_scale)\nGaussian process cross-covariance of x1 and x2 with Matern 5/2 kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20\n\n\n\nWith magnitude \\(\\sigma\\), length scale \\(l\\), and period \\(p\\), the periodic kernel is:\n\\[\nk(\\mathbf{x}_i, \\mathbf{x}_j) = \\sigma^2 \\exp \\left(-\\frac{2 \\sin^2 \\left( \\pi \\frac{|\\mathbf{x}_i - \\mathbf{x}_j|}{p} \\right) }{l^2} \\right)\n\\]\n \n\nmatrix gp_periodic_cov(array[] real x, real sigma, real length_scale, real period)\nGaussian process covariance with periodic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_periodic_cov(array[] real x1, array[] real x2, real sigma, real length_scale, real period)\nGaussian process cross-covariance of x1 and x2 with periodic kernel in one dimension.\nAvailable since 2.20\n \n\nmatrix gp_periodic_cov(vectors x, real sigma, real length_scale, real period)\nGaussian process covariance with periodic kernel in multiple dimensions.\nAvailable since 2.20\n \n\nmatrix gp_periodic_cov(vectors x1, vectors x2, real sigma, real length_scale, real period)\nGaussian process cross-covariance of x1 and x2 with periodic kernel in multiple dimensions with a length scale for each dimension.\nAvailable since 2.20", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#linear-algebra-functions-and-solvers", + "href": "functions-reference/matrix_operations.html#linear-algebra-functions-and-solvers", + "title": "Matrix Operations", + "section": "", + "text": "In general, it is much more efficient and also more arithmetically stable to use matrix division than to multiply by an inverse. There are specialized forms for lower triangular matrices and for symmetric, positive-definite matrices.\n\n\n \n\nrow_vector operator/(row_vector b, matrix A) The right division of b by A; equivalently b * inverse(A)\nAvailable since 2.0\n \n\nmatrix operator/(matrix B, matrix A) The right division of B by A; equivalently B * inverse(A)\nAvailable since 2.5\n \n\nvector operator\\(matrix A, vector b) The left division of A by b; equivalently inverse(A) * b\nAvailable since 2.18\n \n\nmatrix operator\\(matrix A, matrix B) The left division of A by B; equivalently inverse(A) * B\nAvailable since 2.18\n\n\n\nThere are four division functions which use lower triangular views of a matrix. The lower triangular view of a matrix \\(\\text{tri}(A)\\) is used in the definitions and defined by \\[\\begin{equation*} \\text{tri}(A)[m,n] = \\left\\{\n\\begin{array}{ll} A[m,n] & \\text{if } m \\geq n, \\text{ and} \\\\[4pt] 0\n& \\text{otherwise}. \\end{array} \\right. \\end{equation*}\\] When a lower triangular view of a matrix is used, the elements above the diagonal are ignored.\n \n\nvector mdivide_left_tri_low(matrix A, vector b) The left division of b by a lower-triangular view of A; algebraically equivalent to the less efficient and stable form inverse(tri(A)) * b, where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.12\n \n\nmatrix mdivide_left_tri_low(matrix A, matrix B) The left division of B by a triangular view of A; algebraically equivalent to the less efficient and stable form inverse(tri(A)) * B, where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.5\n \n\nrow_vector mdivide_right_tri_low(row_vector b, matrix A) The right division of b by a triangular view of A; algebraically equivalent to the less efficient and stable form b * inverse(tri(A)), where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.12\n \n\nmatrix mdivide_right_tri_low(matrix B, matrix A) The right division of B by a triangular view of A; algebraically equivalent to the less efficient and stable form B * inverse(tri(A)), where tri(A) is the lower-triangular portion of A with the above-diagonal entries set to zero.\nAvailable since 2.5\n\n\n\n\nThere are four division functions which are specialized for efficiency and stability for symmetric positive-definite matrix dividends. If the matrix dividend argument is not symmetric and positive definite, these will reject and print warnings.\n \n\nmatrix mdivide_left_spd(matrix A, vector b) The left division of b by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form inverse(A) * b.\nAvailable since 2.12\n \n\nvector mdivide_left_spd(matrix A, matrix B) The left division of B by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form inverse(A) * B.\nAvailable since 2.12\n \n\nrow_vector mdivide_right_spd(row_vector b, matrix A) The right division of b by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form b *inverse(A).\nAvailable since 2.12\n \n\nmatrix mdivide_right_spd(matrix B, matrix A) The right division of B by the symmetric, positive-definite matrix A; algebraically equivalent to the less efficient and stable form B * inverse(A).\nAvailable since 2.12\n\n\n\nThe exponential of the matrix \\(A\\) is formally defined by the convergent power series: \\[\\begin{equation*} e^A = \\sum_{n=0}^{\\infty} \\dfrac{A^n}{n!}\n\\end{equation*}\\]\n \n\nmatrix matrix_exp(matrix A) The matrix exponential of A\nAvailable since 2.13\n \n\nmatrix matrix_exp_multiply(matrix A, matrix B) The multiplication of matrix exponential of A and matrix B; algebraically equivalent to the less efficient form matrix_exp(A) * B.\nAvailable since 2.18\n \n\nmatrix scale_matrix_exp_multiply(real t, matrix A, matrix B) The multiplication of matrix exponential of tA and matrix B; algebraically equivalent to the less efficient form matrix_exp(t * A) * B.\nAvailable since 2.18\n\n\n\nReturns the nth power of the specific matrix: \\[\\begin{equation*} M^n = M_1 * ... * M_n \\end{equation*}\\]\n \n\nmatrix matrix_power(matrix A, int B) Matrix A raised to the power B.\nAvailable since 2.24\n\n\n\n\n\n \n\nreal trace(matrix A) The trace of A, or 0 if A is empty; A is not required to be diagonal\nAvailable since 2.0\n\n\n\n \n\nreal determinant(matrix A) The determinant of A\nAvailable since 2.0\n \n\nreal log_determinant(matrix A) The log of the absolute value of the determinant of A\nAvailable since 2.0\nreal log_determinant_spd(matrix A) The log of the absolute value of the determinant of the symmetric, positive-definite matrix A.\nAvailable since 2.30\n\n\n\nIt is almost never a good idea to use matrix inverses directly because they are both inefficient and arithmetically unstable compared to the alternatives. Rather than inverting a matrix m and post-multiplying by a vector or matrix a, as in inverse(m) * a, it is better to code this using matrix division, as in m \\ a. The pre-multiplication case is similar, with b * inverse(m) being more efficiently coded as as b / m. There are also useful special cases for triangular and symmetric, positive-definite matrices that use more efficient solvers.\nWarning: The function inv(m) is the elementwise inverse function, which returns 1 / m[i, j] for each element.\n \n\nmatrix inverse(matrix A) Compute the inverse of A\nAvailable since 2.0\n \n\nmatrix inverse_spd(matrix A) Compute the inverse of A where A is symmetric, positive definite. This version is faster and more arithmetically stable when the input is symmetric and positive definite.\nAvailable since 2.0\n \n\nmatrix chol2inv(matrix L) Compute the inverse of the matrix whose cholesky factorization is L. That is, for \\(A = L L^T\\), return \\(A^{-1}\\).\nAvailable since 2.26\n\n\n\nThe generalized inverse \\(M^+\\) of a matrix \\(M\\) is a matrix that satisfies \\(M M^+ M = M\\). For an invertible, square matrix \\(M\\), \\(M^+\\) is equivalent to \\(M^{-1}\\). The dimensions of \\(M^+\\) are equivalent to the dimensions of \\(M^T\\). The generalized inverse exists for any matrix, so the \\(M\\) may be singular or less than full rank.\nEven though the generalized inverse exists for any arbitrary matrix, the derivatives of this function only exist on matrices of locally constant rank (Golub and Pereyra 1973), meaning, the derivatives do not exist if small perturbations make the matrix change rank. For example, considered the rank of the matrix \\(A\\) as a function of \\(\\epsilon\\):\n\\[\nA = \\left(\n \\begin{array}{cccc}\n 1 + \\epsilon & 2 & 1 \\\\\n 2 & 4 & 2\n \\end{array}\n \\right)\n\\]\nWhen \\(\\epsilon = 0\\), \\(A\\) is rank 1 because the second row is twice the first (and so there is only one linearly independent row). If \\(\\epsilon \\neq 0\\), the rows are no longer linearly dependent, and the matrix is rank 2. This matrix does not have locally constant rank at \\(\\epsilon = 0\\), and so the derivatives do not exist at zero. Because HMC depends on the derivatives existing, this lack of differentiability creates undefined behavior.\n \n\nmatrix generalized_inverse(matrix A) The generalized inverse of A\nAvailable since 2.26\n\n\n\n \n\ncomplex_vector eigenvalues(matrix A) The complex-valued vector of eigenvalues of the matrix A. The eigenvalues are repeated according to their algebraic multiplicity, so there are as many eigenvalues as rows in the matrix. The eigenvalues are not sorted in any particular order.\nAvailable since 2.30\n \n\ncomplex_matrix eigenvectors(matrix A) The matrix with the complex-valued (column) eigenvectors of the matrix A in the same order as returned by the function eigenvalues\nAvailable since 2.30\n \n\ntuple(complex_matrix, complex_vector) eigendecompose(matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the matrix A. This function is equivalent to (eigenvectors(A), eigenvalues(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n \n\nvector eigenvalues_sym(matrix A) The vector of eigenvalues of a symmetric matrix A in ascending order\nAvailable since 2.0\n \n\nmatrix eigenvectors_sym(matrix A) The matrix with the (column) eigenvectors of symmetric matrix A in the same order as returned by the function eigenvalues_sym\nAvailable since 2.0\n \n\ntuple(matrix, vector) eigendecompose_sym(matrix A) Return the matrix of (column) eigenvectors and vector of eigenvalues of the symmetric matrix A. This function is equivalent to (eigenvectors_sym(A), eigenvalues_sym(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\nBecause multiplying an eigenvector by \\(-1\\) results in an eigenvector, eigenvectors returned by a decomposition are only identified up to a sign change. In order to compare the eigenvectors produced by Stan’s eigendecomposition to others, signs may need to be normalized in some way, such as by fixing the sign of a component, or doing comparisons allowing a multiplication by \\(-1\\).\nThe condition number of a symmetric matrix is defined to be the ratio of the largest eigenvalue to the smallest eigenvalue. Large condition numbers lead to difficulty in numerical algorithms such as computing inverses, and thus known as “ill conditioned.” The ratio can even be infinite in the case of singular matrices (i.e., those with eigenvalues of 0).\n\n\n\n \n\nmatrix qr_thin_Q(matrix A) The orthogonal matrix in the thin QR decomposition of A, which implies that the resulting matrix has the same dimensions as A\nAvailable since 2.18\n \n\nmatrix qr_thin_R(matrix A) The upper triangular matrix in the thin QR decomposition of A, which implies that the resulting matrix is square with the same number of columns as A\nAvailable since 2.18\n \n\ntuple(matrix, matrix) qr_thin(matrix A) Returns both portions of the QR decomposition of A. The first element (“Q”) is the orthonormal matrix in the thin QR decomposition and the second element (“R”) is upper triangular. This function is equivalent to (qr_thin_Q(A), qr_thin_R(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\n \n\nmatrix qr_Q(matrix A) The orthogonal matrix in the fat QR decomposition of A, which implies that the resulting matrix is square with the same number of rows as A\nAvailable since 2.3\n \n\nmatrix qr_R(matrix A) The upper trapezoidal matrix in the fat QR decomposition of A, which implies that the resulting matrix will be rectangular with the same dimensions as A\nAvailable since 2.3\n \n tuple(matrix, matrix) qr(matrix A) Returns both portions of the QR decomposition of A. The first element (“Q”) is the orthogonal matrix in the fat QR decomposition and the second element (“R”) is upper trapezoidal. This function is equivalent to (qr_Q(A), qr_R(A)) but with a lower computational cost due to the shared work between the two results.\nAvailable since 2.33\nThe thin QR decomposition is always preferable because it will consume much less memory when the input matrix is large than will the fat QR decomposition. Both versions of the decomposition represent the input matrix as \\[\\begin{equation*} A = Q \\, R. \\end{equation*}\\] Multiplying a column of an orthogonal matrix by \\(-1\\) still results in an orthogonal matrix, and you can multiply the corresponding row of the upper trapezoidal matrix by \\(-1\\) without changing the product. Thus, Stan adopts the normalization that the diagonal elements of the upper trapezoidal matrix are strictly positive and the columns of the orthogonal matrix are reflected if necessary. Also, these QR decomposition algorithms do not utilize pivoting and thus may be numerically unstable on input matrices that have less than full rank.\n\n\n\nEvery symmetric, positive-definite matrix (such as a correlation or covariance matrix) has a Cholesky decomposition. If \\(\\Sigma\\) is a symmetric, positive-definite matrix, its Cholesky decomposition is the lower-triangular vector \\(L\\) such that \\[\\begin{equation*} \\Sigma = L \\, L^{\\top}. \\end{equation*}\\]\n \n\nmatrix cholesky_decompose(matrix A) The lower-triangular Cholesky factor of the symmetric positive-definite matrix A\nAvailable since 2.0\n\n\n\nThe matrix A can be decomposed into a diagonal matrix of singular values, D, and matrices of its left and right singular vectors, U and V, \\[\\begin{equation*} A = U D V^T. \\end{equation*}\\] The matrices of singular vectors here are thin. That is for an \\(N\\) by \\(P\\) input A, \\(M = min(N, P)\\), U is size \\(N\\) by \\(M\\) and V is size \\(P\\) by \\(M\\).\n \n\nvector singular_values(matrix A) The singular values of A in descending order\nAvailable since 2.0\n \n\nmatrix svd_U(matrix A) The left-singular vectors of A\nAvailable since 2.26\n \n\nmatrix svd_V(matrix A) The right-singular vectors of A\nAvailable since 2.26\n \n\ntuple(matrix, vector, matrix) svd(matrix A) Returns a tuple containing the left-singular vectors of A, the singular values of A in descending order, and the right-singular values of A. This function is equivalent to (svd_U(A), singular_values(A), svd_V(A)) but with a lower computational cost due to the shared work between the different components.\nAvailable since 2.33", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#sort-functions", + "href": "functions-reference/matrix_operations.html#sort-functions", + "title": "Matrix Operations", + "section": "", + "text": "See the sorting functions section for examples of how the functions work.\n \n\nvector sort_asc(vector v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\nrow_vector sort_asc(row_vector v) Sort the elements of v in ascending order\nAvailable since 2.0\n \n\nvector sort_desc(vector v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\nrow_vector sort_desc(row_vector v) Sort the elements of v in descending order\nAvailable since 2.0\n \n\narray[] int sort_indices_asc(vector v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_asc(row_vector v) Return an array of indices between 1 and the size of v, sorted to index v in ascending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(vector v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\narray[] int sort_indices_desc(row_vector v) Return an array of indices between 1 and the size of v, sorted to index v in descending order.\nAvailable since 2.3\n \n\nint rank(vector v, int s) Number of components of v less than v[s]\nAvailable since 2.0\n \n\nint rank(row_vector v, int s) Number of components of v less than v[s]\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#reverse-functions", + "href": "functions-reference/matrix_operations.html#reverse-functions", + "title": "Matrix Operations", + "section": "", + "text": "vector reverse(vector v) Return a new vector containing the elements of the argument in reverse order.\nAvailable since 2.23\n \n\nrow_vector reverse(row_vector v) Return a new row vector containing the elements of the argument in reverse order.\nAvailable since 2.23", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/matrix_operations.html#footnotes", + "href": "functions-reference/matrix_operations.html#footnotes", + "title": "Matrix Operations", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe softmax function is so called because in the limit as \\(y_n \\rightarrow \\infty\\) with \\(y_m\\) for \\(m \\neq n\\) held constant, the result tends toward the “one-hot” vector \\(\\theta\\) with \\(\\theta_n\n= 1\\) and \\(\\theta_m = 0\\) for \\(m \\neq n\\), thus providing a “soft” version of the maximum function.↩︎", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Matrix Operations" + ] + }, + { + "objectID": "functions-reference/multivariate_discrete_distributions.html", + "href": "functions-reference/multivariate_discrete_distributions.html", + "title": "Multivariate Discrete Distributions", + "section": "", + "text": "The multivariate discrete distributions are over multiple integer values, which are expressed in Stan as arrays.\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(N \\in \\mathbb{N}\\), and \\(\\theta \\in \\text{$K$-simplex}\\), then for \\(y \\in \\mathbb{N}^K\\) such that \\(\\sum_{k=1}^K y_k = N\\), \\[\\begin{equation*}\n\\text{Multinomial}(y|\\theta) = \\binom{N}{y_1,\\ldots,y_K} \\prod_{k=1}^K \\theta_k^{y_k},\n\\end{equation*}\\] where the multinomial coefficient is defined by \\[\\begin{equation*}\n\\binom{N}{y_1,\\ldots,y_k} = \\frac{N!}{\\prod_{k=1}^K y_k!}.\n\\end{equation*}\\]\n\n\n\ny ~ multinomial(theta)\nIncrement target log probability density with multinomial_lupmf(y | theta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal multinomial_lpmf(array[] int y | vector theta) The log multinomial probability mass function with outcome array y of size \\(K\\) given the \\(K\\)-simplex distribution parameter theta and (implicit) total count N = sum(y)\nAvailable since 2.12\n \n\nreal multinomial_lupmf(array[] int y | vector theta) The log multinomial probability mass function with outcome array y of size \\(K\\) given the \\(K\\)-simplex distribution parameter theta and (implicit) total count N = sum(y) dropping constant additive terms\nAvailable since 2.25\n \n\narray[] int multinomial_rng(vector theta, int N) Generate a multinomial variate with simplex distribution parameter theta and total count \\(N\\); may only be used in transformed data and generated quantities blocks\nAvailable since 2.8\n\n\n\n\nStan also provides a version of the multinomial probability mass function distribution with the \\(\\text{$K$-simplex}\\) for the event count probabilities per category given on the unconstrained logistic scale.\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(N \\in \\mathbb{N}\\), and \\(\\text{softmax}(\\theta) \\in \\text{$K$-simplex}\\), then for \\(y \\in \\mathbb{N}^K\\) such that \\(\\sum_{k=1}^K y_k = N\\), \\[\\begin{equation*}\n\\begin{split}\n\\text{MultinomialLogit}(y \\mid \\gamma) & = \\text{Multinomial}(y \\mid \\text{softmax}(\\gamma)) \\\\\n& = \\binom{N}{y_1,\\ldots,y_K} \\prod_{k=1}^K [\\text{softmax}(\\gamma_k)]^{y_k},\n\\end{split}\n\\end{equation*}\\] where the multinomial coefficient is defined by \\[\\begin{equation*}\n\\binom{N}{y_1,\\ldots,y_k} = \\frac{N!}{\\prod_{k=1}^K y_k!}.\n\\end{equation*}\\]\n\n\n\ny ~ multinomial_logit(gamma)\nIncrement target log probability density with multinomial_logit_lupmf(y | gamma).\nAvailable since 2.24\n \n\n\n\n\n \n\nreal multinomial_logit_lpmf(array[] int y | vector gamma) The log multinomial probability mass function with outcome array y of size \\(K\\) given the log \\(K\\)-simplex distribution parameter \\(\\gamma\\) and (implicit) total count N = sum(y)\nAvailable since 2.24\n \n\nreal multinomial_logit_lupmf(array[] int y | vector gamma) The log multinomial probability mass function with outcome array y of size \\(K\\) given the log \\(K\\)-simplex distribution parameter \\(\\gamma\\) and (implicit) total count N = sum(y) dropping constant additive terms\nAvailable since 2.25\n \n\narray[] int multinomial_logit_rng(vector gamma, int N) Generate a variate from a multinomial distribution with probabilities softmax(gamma) and total count N; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.24\n\n\n\n\nStan also provides the Dirichlet-multinomial distribution, which generalizes the Beta-binomial distribution to more than two categories. As such, it is an overdispersed version of the multinomial distribution.\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(N \\in \\mathbb{N}\\), and \\(\\alpha \\in\n\\mathbb{R}_{+}^K\\), then for \\(y \\in \\mathbb{N}^K\\) such that \\(\\sum_{k=1}^K y_k = N\\), the PMF of the Dirichlet-multinomial distribution is defined as \\[\\begin{equation*}\n\\text{DirMult}(y|\\theta) =\n\\frac{\\Gamma(\\alpha_0)\\Gamma(N+1)}{\\Gamma(N+\\alpha_0)} \\prod_{k=1}^K \\frac{\\Gamma(y_k + \\alpha_k)}{\\Gamma(\\alpha_k)\\Gamma(y_k+1)},\n\\end{equation*}\\] where \\(\\alpha_0\\) is defined as \\(\\alpha_0 = \\sum_{k=1}^K \\alpha_k\\).\n\n\n\ny ~ dirichlet_multinomial(alpha)\nIncrement target log probability density with dirichlet_multinomial_lupmf(y | alpha).\nAvailable since 2.34\n \n\n\n\n\n \n\nreal dirichlet_multinomial_lpmf(array[] int y | vector alpha) The log multinomial probability mass function with outcome array y with \\(K\\) elements given the positive \\(K\\)-vector distribution parameter alpha and (implicit) total count N = sum(y).\nAvailable since 2.34\n \n\nreal dirichlet_multinomial_lupmf(array[] int y | vector alpha) The log multinomial probability mass function with outcome array y with \\(K\\) elements, given the positive \\(K\\)-vector distribution parameter alpha and (implicit) total count N = sum(y) dropping constant additive terms.\nAvailable since 2.34\n \n\narray[] int dirichlet_multinomial_rng(vector alpha, int N) Generate a multinomial variate with positive vector distribution parameter alpha and total count N; may only be used in transformed data and generated quantities blocks. This is equivalent to multinomial_rng(dirichlet_rng(alpha), N).\nAvailable since 2.34", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Multivariate Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/multivariate_discrete_distributions.html#multinomial-distribution", + "href": "functions-reference/multivariate_discrete_distributions.html#multinomial-distribution", + "title": "Multivariate Discrete Distributions", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\), \\(N \\in \\mathbb{N}\\), and \\(\\theta \\in \\text{$K$-simplex}\\), then for \\(y \\in \\mathbb{N}^K\\) such that \\(\\sum_{k=1}^K y_k = N\\), \\[\\begin{equation*}\n\\text{Multinomial}(y|\\theta) = \\binom{N}{y_1,\\ldots,y_K} \\prod_{k=1}^K \\theta_k^{y_k},\n\\end{equation*}\\] where the multinomial coefficient is defined by \\[\\begin{equation*}\n\\binom{N}{y_1,\\ldots,y_k} = \\frac{N!}{\\prod_{k=1}^K y_k!}.\n\\end{equation*}\\]\n\n\n\ny ~ multinomial(theta)\nIncrement target log probability density with multinomial_lupmf(y | theta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal multinomial_lpmf(array[] int y | vector theta) The log multinomial probability mass function with outcome array y of size \\(K\\) given the \\(K\\)-simplex distribution parameter theta and (implicit) total count N = sum(y)\nAvailable since 2.12\n \n\nreal multinomial_lupmf(array[] int y | vector theta) The log multinomial probability mass function with outcome array y of size \\(K\\) given the \\(K\\)-simplex distribution parameter theta and (implicit) total count N = sum(y) dropping constant additive terms\nAvailable since 2.25\n \n\narray[] int multinomial_rng(vector theta, int N) Generate a multinomial variate with simplex distribution parameter theta and total count \\(N\\); may only be used in transformed data and generated quantities blocks\nAvailable since 2.8", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Multivariate Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/multivariate_discrete_distributions.html#multinomial-distribution-logit-parameterization", + "href": "functions-reference/multivariate_discrete_distributions.html#multinomial-distribution-logit-parameterization", + "title": "Multivariate Discrete Distributions", + "section": "", + "text": "Stan also provides a version of the multinomial probability mass function distribution with the \\(\\text{$K$-simplex}\\) for the event count probabilities per category given on the unconstrained logistic scale.\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(N \\in \\mathbb{N}\\), and \\(\\text{softmax}(\\theta) \\in \\text{$K$-simplex}\\), then for \\(y \\in \\mathbb{N}^K\\) such that \\(\\sum_{k=1}^K y_k = N\\), \\[\\begin{equation*}\n\\begin{split}\n\\text{MultinomialLogit}(y \\mid \\gamma) & = \\text{Multinomial}(y \\mid \\text{softmax}(\\gamma)) \\\\\n& = \\binom{N}{y_1,\\ldots,y_K} \\prod_{k=1}^K [\\text{softmax}(\\gamma_k)]^{y_k},\n\\end{split}\n\\end{equation*}\\] where the multinomial coefficient is defined by \\[\\begin{equation*}\n\\binom{N}{y_1,\\ldots,y_k} = \\frac{N!}{\\prod_{k=1}^K y_k!}.\n\\end{equation*}\\]\n\n\n\ny ~ multinomial_logit(gamma)\nIncrement target log probability density with multinomial_logit_lupmf(y | gamma).\nAvailable since 2.24\n \n\n\n\n\n \n\nreal multinomial_logit_lpmf(array[] int y | vector gamma) The log multinomial probability mass function with outcome array y of size \\(K\\) given the log \\(K\\)-simplex distribution parameter \\(\\gamma\\) and (implicit) total count N = sum(y)\nAvailable since 2.24\n \n\nreal multinomial_logit_lupmf(array[] int y | vector gamma) The log multinomial probability mass function with outcome array y of size \\(K\\) given the log \\(K\\)-simplex distribution parameter \\(\\gamma\\) and (implicit) total count N = sum(y) dropping constant additive terms\nAvailable since 2.25\n \n\narray[] int multinomial_logit_rng(vector gamma, int N) Generate a variate from a multinomial distribution with probabilities softmax(gamma) and total count N; may only be used in transformed data and generated quantities blocks.\nAvailable since 2.24", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Multivariate Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/multivariate_discrete_distributions.html#dirichlet-multinomial-distribution", + "href": "functions-reference/multivariate_discrete_distributions.html#dirichlet-multinomial-distribution", + "title": "Multivariate Discrete Distributions", + "section": "", + "text": "Stan also provides the Dirichlet-multinomial distribution, which generalizes the Beta-binomial distribution to more than two categories. As such, it is an overdispersed version of the multinomial distribution.\n\n\nIf \\(K \\in \\mathbb{N}\\), \\(N \\in \\mathbb{N}\\), and \\(\\alpha \\in\n\\mathbb{R}_{+}^K\\), then for \\(y \\in \\mathbb{N}^K\\) such that \\(\\sum_{k=1}^K y_k = N\\), the PMF of the Dirichlet-multinomial distribution is defined as \\[\\begin{equation*}\n\\text{DirMult}(y|\\theta) =\n\\frac{\\Gamma(\\alpha_0)\\Gamma(N+1)}{\\Gamma(N+\\alpha_0)} \\prod_{k=1}^K \\frac{\\Gamma(y_k + \\alpha_k)}{\\Gamma(\\alpha_k)\\Gamma(y_k+1)},\n\\end{equation*}\\] where \\(\\alpha_0\\) is defined as \\(\\alpha_0 = \\sum_{k=1}^K \\alpha_k\\).\n\n\n\ny ~ dirichlet_multinomial(alpha)\nIncrement target log probability density with dirichlet_multinomial_lupmf(y | alpha).\nAvailable since 2.34\n \n\n\n\n\n \n\nreal dirichlet_multinomial_lpmf(array[] int y | vector alpha) The log multinomial probability mass function with outcome array y with \\(K\\) elements given the positive \\(K\\)-vector distribution parameter alpha and (implicit) total count N = sum(y).\nAvailable since 2.34\n \n\nreal dirichlet_multinomial_lupmf(array[] int y | vector alpha) The log multinomial probability mass function with outcome array y with \\(K\\) elements, given the positive \\(K\\)-vector distribution parameter alpha and (implicit) total count N = sum(y) dropping constant additive terms.\nAvailable since 2.34\n \n\narray[] int dirichlet_multinomial_rng(vector alpha, int N) Generate a multinomial variate with positive vector distribution parameter alpha and total count N; may only be used in transformed data and generated quantities blocks. This is equivalent to multinomial_rng(dirichlet_rng(alpha), N).\nAvailable since 2.34", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Multivariate Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/positive_lower-bounded_distributions.html", + "href": "functions-reference/positive_lower-bounded_distributions.html", + "title": "Positive Lower-Bounded Distributions", + "section": "", + "text": "The positive lower-bounded probabilities have support on real values above some positive minimum value.\n\n\n\n\nIf \\(y_{\\text{min}} \\in \\mathbb{R}^+\\) and \\(\\alpha \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\) with \\(y \\geq y_{\\text{min}}\\), \\[\\begin{equation*}\n\\text{Pareto}(y|y_{\\text{min}},\\alpha) = \\frac{\\displaystyle \\alpha\\,y_{\\text{min}}^\\alpha}{\\displaystyle y^{\\alpha+1}}.\n\\end{equation*}\\]\n\n\n\ny ~ pareto(y_min, alpha)\nIncrement target log probability density with pareto_lupdf(y | y_min, alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal pareto_lpdf(reals y | reals y_min, reals alpha) The log of the Pareto density of y given positive minimum value y_min and shape alpha\nAvailable since 2.12\n \n\nreal pareto_lupdf(reals y | reals y_min, reals alpha) The log of the Pareto density of y given positive minimum value y_min and shape alpha dropping constant additive terms\nAvailable since 2.25\n \n\nreal pareto_cdf(reals y | reals y_min, reals alpha) The Pareto cumulative distribution function of y given positive minimum value y_min and shape alpha\nAvailable since 2.0\n \n\nreal pareto_lcdf(reals y | reals y_min, reals alpha) The log of the Pareto cumulative distribution function of y given positive minimum value y_min and shape alpha\nAvailable since 2.12\n \n\nreal pareto_lccdf(reals y | reals y_min, reals alpha) The log of the Pareto complementary cumulative distribution function of y given positive minimum value y_min and shape alpha\nAvailable since 2.12\n \n\nR pareto_rng(reals y_min, reals alpha) Generate a Pareto variate with positive minimum value y_min and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\n\n\nIf \\(\\mu \\in \\mathbb{R}\\), \\(\\lambda \\in \\mathbb{R}^+\\), and \\(\\alpha \\in \\mathbb{R}^+\\), then for \\(y \\geq \\mu\\), \\[\\begin{equation*}\n\\mathrm{Pareto\\_Type\\_2}(y|\\mu,\\lambda,\\alpha) =\n\\ \\frac{\\alpha}{\\lambda} \\, \\left( 1+\\frac{y-\\mu}{\\lambda} \\right)^{-(\\alpha+1)} \\! .\n\\end{equation*}\\]\nNote that the Lomax distribution is a Pareto Type 2 distribution with \\(\\mu=0\\).\n\n\n\ny ~ pareto_type_2(mu, lambda, alpha)\nIncrement target log probability density with pareto_type_2_lupdf(y | mu, lambda, alpha).\nAvailable since 2.5\n \n\n\n\n\n \n\nreal pareto_type_2_lpdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 density of y given location mu, scale lambda, and shape alpha\nAvailable since 2.18\n \n\nreal pareto_type_2_lupdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 density of y given location mu, scale lambda, and shape alpha dropping constant additive terms\nAvailable since 2.25\n \n\nreal pareto_type_2_cdf(reals y | reals mu, reals lambda, reals alpha) The Pareto Type 2 cumulative distribution function of y given location mu, scale lambda, and shape alpha\nAvailable since 2.5\n \n\nreal pareto_type_2_lcdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 cumulative distribution function of y given location mu, scale lambda, and shape alpha\nAvailable since 2.18\n \n\nreal pareto_type_2_lccdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 complementary cumulative distribution function of y given location mu, scale lambda, and shape alpha\nAvailable since 2.18\n \n\nR pareto_type_2_rng(reals mu, reals lambda, reals alpha) Generate a Pareto Type 2 variate with location mu, scale lambda, and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18\n\n\n\n\nFor an extended explanation of how to use the wiener_lpdf and wiener_l[c]cdf_unnorm functions, see Henrich et al. (2024).\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\), \\(\\tau \\in \\mathbb{R}^+\\), \\(\\beta \\in (0, 1)\\), \\(\\delta \\in \\mathbb{R}\\), \\(s_{\\delta} \\in \\mathbb{R}^{\\geq 0}\\), \\(s_{\\beta} \\in [0, 1)\\), and \\(s_{\\tau} \\in \\mathbb{R}^{\\geq 0}\\) then for \\(y > \\tau\\),\n\\[\\begin{equation*}\n\\begin{split}\n&\\text{Wiener}(y\\mid \\alpha,\\tau,\\beta,\\delta,s_{\\delta},s_{\\beta},s_{\\tau}) =\n\\\\\n&\\frac{1}{s_{\\tau}}\\int_{\\tau}^{\\tau+s_{\\tau}}\\frac{1}{s_{\\beta}}\\int_{\\beta-\\frac{1}{2}s_{\\beta}}^{\\beta+\\frac{1}{2}s_{\\beta}}\\int_{-\\infty}^{\\infty} p_3(y-{\\tau_0}\\mid \\alpha,\\nu,\\omega)\n\\\\\n&\\times \\frac{1}{\\sqrt{2\\pi s_{\\delta}^2}}\\exp\\Bigl(-\\frac{(\\nu-\\delta)^2}{2s_{\\delta}^2}\\Bigr) \\,d\\nu \\,d\\omega \\,d{\\tau_0}=\n\\\\\n&\\frac{1}{s_{\\tau}}\\int_{\\tau}^{\\tau+s_{\\tau}}\\frac{1}{s_{\\beta}}\\int_{\\beta-\\frac{1}{2}s_{\\beta}}^{\\beta+\\frac{1}{2}s_{\\beta}} M\\times p_3(y-{\\tau_0}\\mid \\alpha,\\nu,\\omega) \\,d\\omega \\,d{\\tau_0},\n\\end{split}\n\\end{equation*}\\]\nwhere \\(p()\\) denotes the density function, and \\(M\\) and \\(p_3()\\) are defined, by using \\(t:=y-{\\tau_0}\\), as\n\\[\\begin{equation*}\nM \\coloneqq \\frac{1}{\\sqrt{1+s_{\\delta}^2t}}\\exp\\Bigl(\\alpha{\\delta}\\omega+\\frac{\\delta^2t}{2}+\\frac{s_{\\delta}^2\\alpha^2\\omega^2-2\\alpha{\\delta}\\omega-\\delta^2t}{2(1+s_{\\delta}^2t)}\\Bigr)\\text{ and}\n\\end{equation*}\\]\n\\[\\begin{equation*}\np_3(t\\mid \\alpha,\\delta,\\beta) \\coloneqq \\frac{1}{\\alpha^2}\\exp\\Bigl(-\\alpha\\delta\\beta-\\frac{\\delta^2t}{2}\\Bigr)f(\\frac{t}{\\alpha^2}\\mid 0,1,\\beta),\n\\end{equation*}\\]\nwhere \\(f(t^*=\\frac{t}{\\alpha^2}\\mid0,1,\\beta)\\) can be specified in two ways:\n\\[\\begin{equation*}\nf_l(t^*\\mid 0,1,\\beta) = \\sum_{k=1}^\\infty k\\pi \\exp\\Bigl(-\\frac{k^2\\pi^2t^*}{2}\\Bigr)\\sin(k\\pi \\beta)\\text{ and}\n\\end{equation*}\\]\n\\[\\begin{equation*}\nf_s(t^*\\mid0,1,\\beta) = \\sum_{k=-\\infty}^\\infty \\frac{1}{\\sqrt{2\\pi(t^*)^3}}(\\beta+2k) \\exp\\Bigl(-\\frac{(\\beta+2k)^2}{2t^*}\\Bigr).\n\\end{equation*}\\]\nWhich of these is used in the computations depends on which expression requires the smaller number of components \\(k\\) to guarantee a pre-specified precision\nIn the case where \\(s_{\\delta}\\), \\(s_{\\beta}\\), and \\(s_{\\tau}\\) are all \\(0\\), this simplifies to one representation that converges fast for small reaction-time values (“small time expansion”): \\[\\begin{equation*}\n\\text{Wiener}(y|\\alpha, \\tau, \\beta, \\delta) =\n\\frac{\\alpha}{(y-\\tau)^{3/2}} \\exp \\! \\left(- \\delta \\alpha \\beta -\n\\frac{\\delta^2(y-\\tau)}{2}\\right) \\sum_{k = - \\infty}^{\\infty} (2k +\n\\beta) \\phi \\! \\left(\\frac{(2k + \\beta)\\alpha }{\\sqrt{y - \\tau}}\\right),\n\\end{equation*}\\] where \\(\\phi(x)\\) denotes the standard normal density function, and one representation that converges fast for large reaction-time values (“large time expansion”): \\[\\begin{equation*}\n\\text{Wiener}(y|\\alpha, \\tau, \\beta, \\delta) =\n\\frac{\\pi}{\\alpha^2} \\exp \\! \\left(- \\delta \\alpha \\beta -\n\\frac{\\delta^2(y-\\tau)}{2}\\right) \\sum_{k = 1}^{\\infty} k \\exp \\! \\left(-\\frac{k^2\\pi^2(y-\\tau)};\n{2\\alpha^2}\\right) \\sin \\!(k\\pi\\beta)\n\\end{equation*}\\] see (Feller 1968), (Navarro and Fuss 2009).\n\n\n\nFor the cumulative distribution function (cdf) there also exist two expressions depending on the reaction time.\nLet \\(\\alpha\\), \\(\\tau\\), \\(\\beta\\), \\(\\delta\\), \\(s_{\\delta}\\), \\(s_{\\beta}\\), \\(s_{\\tau}\\) and \\(y\\) be as above.\nThe formula for the large-time cdf of decision times (excluding the additive reaction time components summarized in \\(\\tau\\) for the time being) at the upper boundary is stated as follows:\n\\[\\begin{equation}\nF(y|\\alpha, \\beta, \\delta) = P(\\alpha, \\beta, \\delta) -\n \\exp\\left(\\delta\\alpha(1-\\beta)-\\frac{\\delta^2 y}{2}\\right)F_l(y|\\alpha,\\beta,\\delta),\n\\end{equation}\\] where \\(P(\\alpha,\\beta,\\delta)\\) is the probability to hit the upper boundary, defined as\n\\[\\begin{equation}\nP(\\alpha, \\beta, \\delta) =\n\\begin{cases}\n \\frac{1-\\exp(2\\delta \\alpha \\beta)}{\\exp(-2\\delta \\alpha(1-\\beta)) - \\exp(2\\delta \\alpha \\beta)}, & \\text{for } \\delta\\neq 0 \\\\\n \\beta, & \\text{for } \\delta=0,\n\\end{cases}\n\\end{equation}\\]\nand\n\\[\\begin{equation}\nF_l(y|\\alpha, \\beta, \\delta) =\n \\frac{2\\pi}{\\alpha^2}\\sum_{k=1}^{\\infty}{\\frac{k\\sin{k\\pi(1-\\beta)}}{\\delta^2+(k\\pi)^2/\\alpha^2}}\\exp(-\\frac{k^2\\pi^2y}{2\\alpha^2}).\n\\end{equation}\\]\nThe formula for the small-time cdf at the upper boundary is stated as follows:\n\\[\\begin{equation}\nF(y|\\alpha,\\beta,\\delta) = \\exp\\left(\\delta \\alpha(1-\\beta) -\\frac{\\delta^2y}{2}\\right)F_s(y|\\alpha, \\beta,\\delta),\n\\end{equation}\\] where\n\\[\\begin{equation}\nF_s(y|\\alpha,\\beta,\\delta) = \\sum_{k=0}^{\\infty}(-1)^k\\phi\\left(\\frac{\\alpha(k+\\beta^{*}_k)}\n {\\sqrt{y}} \\right) \\times \\left( R \\left(\\frac{\\alpha(k+\\beta^{*}_k)+\\delta y}{\\sqrt{y}} \\right)\n + R \\left(\\frac{\\alpha(k+\\beta^*_k)-\\delta y}{\\sqrt{y}} \\right)\\right),\n\\end{equation}\\]\nwhere \\(\\beta^*_k=(1-\\beta)\\) for \\(k\\) even, \\(\\beta^*_k=\\beta\\) for \\(k\\) odd, and \\(R\\) is Mill’s ratio.\nThe cdf for the lower boundary is \\(F(y|\\alpha,1-\\beta,-\\delta)\\)\n\n\n\ny ~ wiener(alpha, tau, beta, delta)\nIncrement target log probability density with wiener_lupdf(y | alpha, tau, beta, delta).\nAvailable since 2.7\ny ~ wiener(alpha, tau, beta, delta, var_delta) Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta, var_delta).\nAvailable since 2.35\ny ~ wiener(alpha, tau, beta, delta, var_delta, var_beta, var_tau) Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta, var_delta, var_beta, var_tau).\nAvailable since 2.35\n \n\n\n\n\n \n\nreal wiener_lpdf(reals y | reals alpha, reals tau, reals beta, reals delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.\nAvailable since 2.18\n \n\nreal wiener_lpdf(real y | real alpha, real tau, real beta, real delta, real var_delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, and inter-trial drift rate variability var_delta.\nSetting var_delta to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n \n\nreal wiener_lpdf(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.\nSetting var_delta, var_beta, and var_tau to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n \n\nreal wiener_lupdf(reals y | reals alpha, reals tau, reals beta, reals delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta, dropping constant additive terms\nAvailable since 2.25\n \n\nreal wiener_lupdf(real y | real alpha, real tau, real beta, real delta, real var_delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, and inter-trial drift rate variability var_delta, dropping constant additive terms.\nSetting var_delta to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n \n\nreal wiener_lupdf(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau, dropping constant additive terms.\nSetting var_delta, var_beta, and var_tau to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n\nNote: The lcdf and lccdf functions for the wiener distribution are conditional and unnormalized, meaning that the cdf does not asymptote at 1, but rather at the probability to hit the upper boundary.\nSimilarly, the ccdf is defined as the probability to hit the upper boundary less the value of the cdf, as opposed to the more typical \\(1 - \\textrm{cdf}\\).\n \n\nreal wiener_lcdf_unnorm(real y, real alpha, real tau, real beta, real delta)\nThe log of the cumulative distribution function (cdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.\nAvailable since 2.38\n \n\nreal wiener_lccdf_unnorm(real y, real alpha, real tau, real beta, real delta)\nThe log of the complementary cumulative distribution function (ccdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.\nAvailable since 2.38\n \n\nreal wiener_lcdf_unnorm(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)\nThe log of the cumulative distribution function (cdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.\nAvailable since 2.38\n \n\nreal wiener_lccdf_unnorm(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)\nThe log of the complementary cumulative distribution function (ccdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.\nAvailable since 2.38\n\n\n\nStan returns the first passage time of the accumulation process over the upper boundary only. To get the result for the lower boundary, use \\[\\begin{equation*}\n\\text{Wiener}(y | \\alpha, \\tau, 1 - \\beta, - \\delta)\n\\end{equation*}\\] For more details, see the appendix of Vandekerckhove and Wabersich (2014).\n\n\n\nThe 5- and 7-argument forms of the wiener distribution functions (listed above as recieving only real) are implemented in such a way where they can be fully vectorized, but currently only versions that accept all real and all vector arguments are exposed by Stan. If there are additional signatures that would prove useful, please request them by opening an issue.\n\n\n\nThe 5- and 7-argument forms of the wiener distribution functions can also accept an additional data real argument controlling the required precision of the gradient calculation of the function. If omitted, this defaults to 1e-4 for the density and 1e-8 for the cdf functions.", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Lower-Bounded Distributions" + ] + }, + { + "objectID": "functions-reference/positive_lower-bounded_distributions.html#pareto-distribution", + "href": "functions-reference/positive_lower-bounded_distributions.html#pareto-distribution", + "title": "Positive Lower-Bounded Distributions", + "section": "", + "text": "If \\(y_{\\text{min}} \\in \\mathbb{R}^+\\) and \\(\\alpha \\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{R}^+\\) with \\(y \\geq y_{\\text{min}}\\), \\[\\begin{equation*}\n\\text{Pareto}(y|y_{\\text{min}},\\alpha) = \\frac{\\displaystyle \\alpha\\,y_{\\text{min}}^\\alpha}{\\displaystyle y^{\\alpha+1}}.\n\\end{equation*}\\]\n\n\n\ny ~ pareto(y_min, alpha)\nIncrement target log probability density with pareto_lupdf(y | y_min, alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal pareto_lpdf(reals y | reals y_min, reals alpha) The log of the Pareto density of y given positive minimum value y_min and shape alpha\nAvailable since 2.12\n \n\nreal pareto_lupdf(reals y | reals y_min, reals alpha) The log of the Pareto density of y given positive minimum value y_min and shape alpha dropping constant additive terms\nAvailable since 2.25\n \n\nreal pareto_cdf(reals y | reals y_min, reals alpha) The Pareto cumulative distribution function of y given positive minimum value y_min and shape alpha\nAvailable since 2.0\n \n\nreal pareto_lcdf(reals y | reals y_min, reals alpha) The log of the Pareto cumulative distribution function of y given positive minimum value y_min and shape alpha\nAvailable since 2.12\n \n\nreal pareto_lccdf(reals y | reals y_min, reals alpha) The log of the Pareto complementary cumulative distribution function of y given positive minimum value y_min and shape alpha\nAvailable since 2.12\n \n\nR pareto_rng(reals y_min, reals alpha) Generate a Pareto variate with positive minimum value y_min and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Lower-Bounded Distributions" + ] + }, + { + "objectID": "functions-reference/positive_lower-bounded_distributions.html#pareto-type-2-distribution", + "href": "functions-reference/positive_lower-bounded_distributions.html#pareto-type-2-distribution", + "title": "Positive Lower-Bounded Distributions", + "section": "", + "text": "If \\(\\mu \\in \\mathbb{R}\\), \\(\\lambda \\in \\mathbb{R}^+\\), and \\(\\alpha \\in \\mathbb{R}^+\\), then for \\(y \\geq \\mu\\), \\[\\begin{equation*}\n\\mathrm{Pareto\\_Type\\_2}(y|\\mu,\\lambda,\\alpha) =\n\\ \\frac{\\alpha}{\\lambda} \\, \\left( 1+\\frac{y-\\mu}{\\lambda} \\right)^{-(\\alpha+1)} \\! .\n\\end{equation*}\\]\nNote that the Lomax distribution is a Pareto Type 2 distribution with \\(\\mu=0\\).\n\n\n\ny ~ pareto_type_2(mu, lambda, alpha)\nIncrement target log probability density with pareto_type_2_lupdf(y | mu, lambda, alpha).\nAvailable since 2.5\n \n\n\n\n\n \n\nreal pareto_type_2_lpdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 density of y given location mu, scale lambda, and shape alpha\nAvailable since 2.18\n \n\nreal pareto_type_2_lupdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 density of y given location mu, scale lambda, and shape alpha dropping constant additive terms\nAvailable since 2.25\n \n\nreal pareto_type_2_cdf(reals y | reals mu, reals lambda, reals alpha) The Pareto Type 2 cumulative distribution function of y given location mu, scale lambda, and shape alpha\nAvailable since 2.5\n \n\nreal pareto_type_2_lcdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 cumulative distribution function of y given location mu, scale lambda, and shape alpha\nAvailable since 2.18\n \n\nreal pareto_type_2_lccdf(reals y | reals mu, reals lambda, reals alpha) The log of the Pareto Type 2 complementary cumulative distribution function of y given location mu, scale lambda, and shape alpha\nAvailable since 2.18\n \n\nR pareto_type_2_rng(reals mu, reals lambda, reals alpha) Generate a Pareto Type 2 variate with location mu, scale lambda, and shape alpha; may only be used in transformed data and generated quantities blocks. For a description of argument and return types, see section vectorized PRNG functions.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Lower-Bounded Distributions" + ] + }, + { + "objectID": "functions-reference/positive_lower-bounded_distributions.html#wiener-first-passage-time-distribution", + "href": "functions-reference/positive_lower-bounded_distributions.html#wiener-first-passage-time-distribution", + "title": "Positive Lower-Bounded Distributions", + "section": "", + "text": "For an extended explanation of how to use the wiener_lpdf and wiener_l[c]cdf_unnorm functions, see Henrich et al. (2024).\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\), \\(\\tau \\in \\mathbb{R}^+\\), \\(\\beta \\in (0, 1)\\), \\(\\delta \\in \\mathbb{R}\\), \\(s_{\\delta} \\in \\mathbb{R}^{\\geq 0}\\), \\(s_{\\beta} \\in [0, 1)\\), and \\(s_{\\tau} \\in \\mathbb{R}^{\\geq 0}\\) then for \\(y > \\tau\\),\n\\[\\begin{equation*}\n\\begin{split}\n&\\text{Wiener}(y\\mid \\alpha,\\tau,\\beta,\\delta,s_{\\delta},s_{\\beta},s_{\\tau}) =\n\\\\\n&\\frac{1}{s_{\\tau}}\\int_{\\tau}^{\\tau+s_{\\tau}}\\frac{1}{s_{\\beta}}\\int_{\\beta-\\frac{1}{2}s_{\\beta}}^{\\beta+\\frac{1}{2}s_{\\beta}}\\int_{-\\infty}^{\\infty} p_3(y-{\\tau_0}\\mid \\alpha,\\nu,\\omega)\n\\\\\n&\\times \\frac{1}{\\sqrt{2\\pi s_{\\delta}^2}}\\exp\\Bigl(-\\frac{(\\nu-\\delta)^2}{2s_{\\delta}^2}\\Bigr) \\,d\\nu \\,d\\omega \\,d{\\tau_0}=\n\\\\\n&\\frac{1}{s_{\\tau}}\\int_{\\tau}^{\\tau+s_{\\tau}}\\frac{1}{s_{\\beta}}\\int_{\\beta-\\frac{1}{2}s_{\\beta}}^{\\beta+\\frac{1}{2}s_{\\beta}} M\\times p_3(y-{\\tau_0}\\mid \\alpha,\\nu,\\omega) \\,d\\omega \\,d{\\tau_0},\n\\end{split}\n\\end{equation*}\\]\nwhere \\(p()\\) denotes the density function, and \\(M\\) and \\(p_3()\\) are defined, by using \\(t:=y-{\\tau_0}\\), as\n\\[\\begin{equation*}\nM \\coloneqq \\frac{1}{\\sqrt{1+s_{\\delta}^2t}}\\exp\\Bigl(\\alpha{\\delta}\\omega+\\frac{\\delta^2t}{2}+\\frac{s_{\\delta}^2\\alpha^2\\omega^2-2\\alpha{\\delta}\\omega-\\delta^2t}{2(1+s_{\\delta}^2t)}\\Bigr)\\text{ and}\n\\end{equation*}\\]\n\\[\\begin{equation*}\np_3(t\\mid \\alpha,\\delta,\\beta) \\coloneqq \\frac{1}{\\alpha^2}\\exp\\Bigl(-\\alpha\\delta\\beta-\\frac{\\delta^2t}{2}\\Bigr)f(\\frac{t}{\\alpha^2}\\mid 0,1,\\beta),\n\\end{equation*}\\]\nwhere \\(f(t^*=\\frac{t}{\\alpha^2}\\mid0,1,\\beta)\\) can be specified in two ways:\n\\[\\begin{equation*}\nf_l(t^*\\mid 0,1,\\beta) = \\sum_{k=1}^\\infty k\\pi \\exp\\Bigl(-\\frac{k^2\\pi^2t^*}{2}\\Bigr)\\sin(k\\pi \\beta)\\text{ and}\n\\end{equation*}\\]\n\\[\\begin{equation*}\nf_s(t^*\\mid0,1,\\beta) = \\sum_{k=-\\infty}^\\infty \\frac{1}{\\sqrt{2\\pi(t^*)^3}}(\\beta+2k) \\exp\\Bigl(-\\frac{(\\beta+2k)^2}{2t^*}\\Bigr).\n\\end{equation*}\\]\nWhich of these is used in the computations depends on which expression requires the smaller number of components \\(k\\) to guarantee a pre-specified precision\nIn the case where \\(s_{\\delta}\\), \\(s_{\\beta}\\), and \\(s_{\\tau}\\) are all \\(0\\), this simplifies to one representation that converges fast for small reaction-time values (“small time expansion”): \\[\\begin{equation*}\n\\text{Wiener}(y|\\alpha, \\tau, \\beta, \\delta) =\n\\frac{\\alpha}{(y-\\tau)^{3/2}} \\exp \\! \\left(- \\delta \\alpha \\beta -\n\\frac{\\delta^2(y-\\tau)}{2}\\right) \\sum_{k = - \\infty}^{\\infty} (2k +\n\\beta) \\phi \\! \\left(\\frac{(2k + \\beta)\\alpha }{\\sqrt{y - \\tau}}\\right),\n\\end{equation*}\\] where \\(\\phi(x)\\) denotes the standard normal density function, and one representation that converges fast for large reaction-time values (“large time expansion”): \\[\\begin{equation*}\n\\text{Wiener}(y|\\alpha, \\tau, \\beta, \\delta) =\n\\frac{\\pi}{\\alpha^2} \\exp \\! \\left(- \\delta \\alpha \\beta -\n\\frac{\\delta^2(y-\\tau)}{2}\\right) \\sum_{k = 1}^{\\infty} k \\exp \\! \\left(-\\frac{k^2\\pi^2(y-\\tau)};\n{2\\alpha^2}\\right) \\sin \\!(k\\pi\\beta)\n\\end{equation*}\\] see (Feller 1968), (Navarro and Fuss 2009).\n\n\n\nFor the cumulative distribution function (cdf) there also exist two expressions depending on the reaction time.\nLet \\(\\alpha\\), \\(\\tau\\), \\(\\beta\\), \\(\\delta\\), \\(s_{\\delta}\\), \\(s_{\\beta}\\), \\(s_{\\tau}\\) and \\(y\\) be as above.\nThe formula for the large-time cdf of decision times (excluding the additive reaction time components summarized in \\(\\tau\\) for the time being) at the upper boundary is stated as follows:\n\\[\\begin{equation}\nF(y|\\alpha, \\beta, \\delta) = P(\\alpha, \\beta, \\delta) -\n \\exp\\left(\\delta\\alpha(1-\\beta)-\\frac{\\delta^2 y}{2}\\right)F_l(y|\\alpha,\\beta,\\delta),\n\\end{equation}\\] where \\(P(\\alpha,\\beta,\\delta)\\) is the probability to hit the upper boundary, defined as\n\\[\\begin{equation}\nP(\\alpha, \\beta, \\delta) =\n\\begin{cases}\n \\frac{1-\\exp(2\\delta \\alpha \\beta)}{\\exp(-2\\delta \\alpha(1-\\beta)) - \\exp(2\\delta \\alpha \\beta)}, & \\text{for } \\delta\\neq 0 \\\\\n \\beta, & \\text{for } \\delta=0,\n\\end{cases}\n\\end{equation}\\]\nand\n\\[\\begin{equation}\nF_l(y|\\alpha, \\beta, \\delta) =\n \\frac{2\\pi}{\\alpha^2}\\sum_{k=1}^{\\infty}{\\frac{k\\sin{k\\pi(1-\\beta)}}{\\delta^2+(k\\pi)^2/\\alpha^2}}\\exp(-\\frac{k^2\\pi^2y}{2\\alpha^2}).\n\\end{equation}\\]\nThe formula for the small-time cdf at the upper boundary is stated as follows:\n\\[\\begin{equation}\nF(y|\\alpha,\\beta,\\delta) = \\exp\\left(\\delta \\alpha(1-\\beta) -\\frac{\\delta^2y}{2}\\right)F_s(y|\\alpha, \\beta,\\delta),\n\\end{equation}\\] where\n\\[\\begin{equation}\nF_s(y|\\alpha,\\beta,\\delta) = \\sum_{k=0}^{\\infty}(-1)^k\\phi\\left(\\frac{\\alpha(k+\\beta^{*}_k)}\n {\\sqrt{y}} \\right) \\times \\left( R \\left(\\frac{\\alpha(k+\\beta^{*}_k)+\\delta y}{\\sqrt{y}} \\right)\n + R \\left(\\frac{\\alpha(k+\\beta^*_k)-\\delta y}{\\sqrt{y}} \\right)\\right),\n\\end{equation}\\]\nwhere \\(\\beta^*_k=(1-\\beta)\\) for \\(k\\) even, \\(\\beta^*_k=\\beta\\) for \\(k\\) odd, and \\(R\\) is Mill’s ratio.\nThe cdf for the lower boundary is \\(F(y|\\alpha,1-\\beta,-\\delta)\\)\n\n\n\ny ~ wiener(alpha, tau, beta, delta)\nIncrement target log probability density with wiener_lupdf(y | alpha, tau, beta, delta).\nAvailable since 2.7\ny ~ wiener(alpha, tau, beta, delta, var_delta) Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta, var_delta).\nAvailable since 2.35\ny ~ wiener(alpha, tau, beta, delta, var_delta, var_beta, var_tau) Increment target log probability density with wiener_lupdf(y | alpha, tau, beta, delta, var_delta, var_beta, var_tau).\nAvailable since 2.35\n \n\n\n\n\n \n\nreal wiener_lpdf(reals y | reals alpha, reals tau, reals beta, reals delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.\nAvailable since 2.18\n \n\nreal wiener_lpdf(real y | real alpha, real tau, real beta, real delta, real var_delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, and inter-trial drift rate variability var_delta.\nSetting var_delta to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n \n\nreal wiener_lpdf(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.\nSetting var_delta, var_beta, and var_tau to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n \n\nreal wiener_lupdf(reals y | reals alpha, reals tau, reals beta, reals delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta, dropping constant additive terms\nAvailable since 2.25\n \n\nreal wiener_lupdf(real y | real alpha, real tau, real beta, real delta, real var_delta) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, and inter-trial drift rate variability var_delta, dropping constant additive terms.\nSetting var_delta to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n \n\nreal wiener_lupdf(real y | real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau) The log of the Wiener first passage time density of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau, dropping constant additive terms.\nSetting var_delta, var_beta, and var_tau to 0 recovers the 4-parameter signature above.\nAvailable since 2.35\n\nNote: The lcdf and lccdf functions for the wiener distribution are conditional and unnormalized, meaning that the cdf does not asymptote at 1, but rather at the probability to hit the upper boundary.\nSimilarly, the ccdf is defined as the probability to hit the upper boundary less the value of the cdf, as opposed to the more typical \\(1 - \\textrm{cdf}\\).\n \n\nreal wiener_lcdf_unnorm(real y, real alpha, real tau, real beta, real delta)\nThe log of the cumulative distribution function (cdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.\nAvailable since 2.38\n \n\nreal wiener_lccdf_unnorm(real y, real alpha, real tau, real beta, real delta)\nThe log of the complementary cumulative distribution function (ccdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, and drift rate delta.\nAvailable since 2.38\n \n\nreal wiener_lcdf_unnorm(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)\nThe log of the cumulative distribution function (cdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.\nAvailable since 2.38\n \n\nreal wiener_lccdf_unnorm(real y, real alpha, real tau, real beta, real delta, real var_delta, real var_beta, real var_tau)\nThe log of the complementary cumulative distribution function (ccdf) of the Wiener distribution of y given boundary separation alpha, non-decision time tau, starting point beta, drift rate delta, inter-trial drift rate variability var_delta, inter-trial variability of the starting point (bias) var_beta, and inter-trial variability of the non-decision time var_tau.\nAvailable since 2.38\n\n\n\nStan returns the first passage time of the accumulation process over the upper boundary only. To get the result for the lower boundary, use \\[\\begin{equation*}\n\\text{Wiener}(y | \\alpha, \\tau, 1 - \\beta, - \\delta)\n\\end{equation*}\\] For more details, see the appendix of Vandekerckhove and Wabersich (2014).\n\n\n\nThe 5- and 7-argument forms of the wiener distribution functions (listed above as recieving only real) are implemented in such a way where they can be fully vectorized, but currently only versions that accept all real and all vector arguments are exposed by Stan. If there are additional signatures that would prove useful, please request them by opening an issue.\n\n\n\nThe 5- and 7-argument forms of the wiener distribution functions can also accept an additional data real argument controlling the required precision of the gradient calculation of the function. If omitted, this defaults to 1e-4 for the density and 1e-8 for the cdf functions.", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Positive Lower-Bounded Distributions" + ] + }, + { + "objectID": "functions-reference/references.html", + "href": "functions-reference/references.html", + "title": "References", + "section": "", + "text": "References\n\n\n\n\n Back to top" + }, + { + "objectID": "functions-reference/simplex_distributions.html", + "href": "functions-reference/simplex_distributions.html", + "title": "Simplex Distributions", + "section": "", + "text": "The simplex probabilities have support on the unit \\(K\\)-simplex for a specified \\(K\\). A \\(K\\)-dimensional vector \\(\\theta\\) is a unit \\(K\\)-simplex if \\(\\theta_k \\geq 0\\) for \\(k \\in \\{1,\\ldots,K\\}\\) and \\(\\sum_{k = 1}^K \\theta_k = 1\\).\n\n\n\n\nIf \\(K \\in \\mathbb{N}\\) and \\(\\alpha \\in (\\mathbb{R}^+)^{K}\\), then for \\(\\theta \\in \\text{$K$-simplex}\\),\n\\[\\begin{equation*}\n\\text{Dirichlet}(\\theta|\\alpha) =\n\\frac{\\Gamma \\! \\left( \\sum_{k=1}^K \\alpha_k \\right)}{\\prod_{k=1}^K \\Gamma(\\alpha_k)} \\prod_{k=1}^K \\theta_k^{\\alpha_k -1}\n\\end{equation*}\\]\nWarning: If any of the components of \\(\\theta\\) satisfies \\(\\theta_i = 0\\) or \\(\\theta_i = 1\\), then the probability is \\(0\\) and the log probability is \\(-\\infty\\). Similarly, the distribution requires strictly positive parameters, with \\(\\alpha_i > 0\\) for each \\(i\\).\n\n\n\nA symmetric Dirichlet prior is \\([\\alpha, \\ldots, \\alpha]^{\\top}\\). To code this in Stan,\n data {\n int<lower=1> K;\n real<lower=0> alpha;\n }\n generated quantities {\n vector[K] theta = dirichlet_rng(rep_vector(alpha, K));\n }\nTaking \\(K = 10\\), here are the first five draws for \\(\\alpha = 1\\). For \\(\\alpha = 1\\), the distribution is uniform over simplexes.\n 1) 0.17 0.05 0.07 0.17 0.03 0.13 0.03 0.03 0.27 0.05\n 2) 0.08 0.02 0.12 0.07 0.52 0.01 0.07 0.04 0.01 0.06\n 3) 0.02 0.03 0.22 0.29 0.17 0.10 0.09 0.00 0.05 0.03\n 4) 0.04 0.03 0.21 0.13 0.04 0.01 0.10 0.04 0.22 0.18\n 5) 0.11 0.22 0.02 0.01 0.06 0.18 0.33 0.04 0.01 0.01\nThat does not mean it’s uniform over the marginal probabilities of each element. As the size of the simplex grows, the marginal draws become more and more concentrated below (not around) \\(1/K\\). When one component of the simplex is large, the others must all be relatively small to compensate. For example, in a uniform distribution on \\(10\\)-simplexes, the probability that a component is greater than the mean of \\(1/10\\) is only 39%. Most of the posterior marginal probability mass for each component is in the interval \\((0, 0.1)\\).\nWhen the \\(\\alpha\\) value is small, the draws gravitate to the corners of the simplex. Here are the first five draws for \\(\\alpha = 0.001\\).\n 1) 3e-203 0e+00 2e-298 9e-106 1e+000 0e+00 0e+000 1e-047 0e+00 4e-279\n 2) 1e+000 0e+00 5e-279 2e-014 1e-275 0e+00 3e-285 9e-147 0e+00 0e+000\n 3) 1e-308 0e+00 1e-213 0e+000 0e+000 8e-75 0e+000 1e+000 4e-58 7e-112\n 4) 6e-166 5e-65 3e-068 3e-147 0e+000 1e+00 3e-249 0e+000 0e+00 0e+000\n 5) 2e-091 0e+00 0e+000 0e+000 1e-060 0e+00 4e-312 1e+000 0e+00 0e+000\nEach row denotes a draw. Each draw has a single value that rounds to one and other values that are very close to zero or rounded down to zero.\nAs \\(\\alpha\\) increases, the draws become increasingly uniform. For \\(\\alpha = 1000\\),\n 1) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n 2) 0.10 0.10 0.09 0.10 0.10 0.10 0.11 0.10 0.10 0.10\n 3) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n 4) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n 5) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n\n\n\ntheta ~ dirichlet(alpha)\nIncrement target log probability density with dirichlet_lupdf(theta | alpha).\nAvailable since 2.0\n \n\n\n\n\nThe Dirichlet probability functions are overloaded to allow the simplex \\(\\theta\\) and prior counts (plus one) \\(\\alpha\\) to be vectors or row vectors (or to mix the two types). The density functions are also vectorized, so they allow arrays of row vectors or vectors as arguments; see section vectorized function signatures for a description of vectorization.\n \n\nreal dirichlet_lpdf(vectors theta | vectors alpha) The log of the Dirichlet density for simplex(es) theta given prior counts (plus one) alpha\nAvailable since 2.12, vectorized in 2.21\n \n\nreal dirichlet_lupdf(vectors theta | vectors alpha) The log of the Dirichlet density for simplex(es) theta given prior counts (plus one) alpha dropping constant additive terms\nAvailable since 2.25\n \n\nvector dirichlet_rng(vector alpha) Generate a Dirichlet variate with prior counts (plus one) alpha; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Simplex Distributions" + ] + }, + { + "objectID": "functions-reference/simplex_distributions.html#dirichlet-distribution", + "href": "functions-reference/simplex_distributions.html#dirichlet-distribution", + "title": "Simplex Distributions", + "section": "", + "text": "If \\(K \\in \\mathbb{N}\\) and \\(\\alpha \\in (\\mathbb{R}^+)^{K}\\), then for \\(\\theta \\in \\text{$K$-simplex}\\),\n\\[\\begin{equation*}\n\\text{Dirichlet}(\\theta|\\alpha) =\n\\frac{\\Gamma \\! \\left( \\sum_{k=1}^K \\alpha_k \\right)}{\\prod_{k=1}^K \\Gamma(\\alpha_k)} \\prod_{k=1}^K \\theta_k^{\\alpha_k -1}\n\\end{equation*}\\]\nWarning: If any of the components of \\(\\theta\\) satisfies \\(\\theta_i = 0\\) or \\(\\theta_i = 1\\), then the probability is \\(0\\) and the log probability is \\(-\\infty\\). Similarly, the distribution requires strictly positive parameters, with \\(\\alpha_i > 0\\) for each \\(i\\).\n\n\n\nA symmetric Dirichlet prior is \\([\\alpha, \\ldots, \\alpha]^{\\top}\\). To code this in Stan,\n data {\n int<lower=1> K;\n real<lower=0> alpha;\n }\n generated quantities {\n vector[K] theta = dirichlet_rng(rep_vector(alpha, K));\n }\nTaking \\(K = 10\\), here are the first five draws for \\(\\alpha = 1\\). For \\(\\alpha = 1\\), the distribution is uniform over simplexes.\n 1) 0.17 0.05 0.07 0.17 0.03 0.13 0.03 0.03 0.27 0.05\n 2) 0.08 0.02 0.12 0.07 0.52 0.01 0.07 0.04 0.01 0.06\n 3) 0.02 0.03 0.22 0.29 0.17 0.10 0.09 0.00 0.05 0.03\n 4) 0.04 0.03 0.21 0.13 0.04 0.01 0.10 0.04 0.22 0.18\n 5) 0.11 0.22 0.02 0.01 0.06 0.18 0.33 0.04 0.01 0.01\nThat does not mean it’s uniform over the marginal probabilities of each element. As the size of the simplex grows, the marginal draws become more and more concentrated below (not around) \\(1/K\\). When one component of the simplex is large, the others must all be relatively small to compensate. For example, in a uniform distribution on \\(10\\)-simplexes, the probability that a component is greater than the mean of \\(1/10\\) is only 39%. Most of the posterior marginal probability mass for each component is in the interval \\((0, 0.1)\\).\nWhen the \\(\\alpha\\) value is small, the draws gravitate to the corners of the simplex. Here are the first five draws for \\(\\alpha = 0.001\\).\n 1) 3e-203 0e+00 2e-298 9e-106 1e+000 0e+00 0e+000 1e-047 0e+00 4e-279\n 2) 1e+000 0e+00 5e-279 2e-014 1e-275 0e+00 3e-285 9e-147 0e+00 0e+000\n 3) 1e-308 0e+00 1e-213 0e+000 0e+000 8e-75 0e+000 1e+000 4e-58 7e-112\n 4) 6e-166 5e-65 3e-068 3e-147 0e+000 1e+00 3e-249 0e+000 0e+00 0e+000\n 5) 2e-091 0e+00 0e+000 0e+000 1e-060 0e+00 4e-312 1e+000 0e+00 0e+000\nEach row denotes a draw. Each draw has a single value that rounds to one and other values that are very close to zero or rounded down to zero.\nAs \\(\\alpha\\) increases, the draws become increasingly uniform. For \\(\\alpha = 1000\\),\n 1) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n 2) 0.10 0.10 0.09 0.10 0.10 0.10 0.11 0.10 0.10 0.10\n 3) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n 4) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n 5) 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10\n\n\n\ntheta ~ dirichlet(alpha)\nIncrement target log probability density with dirichlet_lupdf(theta | alpha).\nAvailable since 2.0\n \n\n\n\n\nThe Dirichlet probability functions are overloaded to allow the simplex \\(\\theta\\) and prior counts (plus one) \\(\\alpha\\) to be vectors or row vectors (or to mix the two types). The density functions are also vectorized, so they allow arrays of row vectors or vectors as arguments; see section vectorized function signatures for a description of vectorization.\n \n\nreal dirichlet_lpdf(vectors theta | vectors alpha) The log of the Dirichlet density for simplex(es) theta given prior counts (plus one) alpha\nAvailable since 2.12, vectorized in 2.21\n \n\nreal dirichlet_lupdf(vectors theta | vectors alpha) The log of the Dirichlet density for simplex(es) theta given prior counts (plus one) alpha dropping constant additive terms\nAvailable since 2.25\n \n\nvector dirichlet_rng(vector alpha) Generate a Dirichlet variate with prior counts (plus one) alpha; may only be used in transformed data and generated quantities blocks\nAvailable since 2.0", + "crumbs": [ + "Functions Reference", + "Continuous Distributions", + "Simplex Distributions" + ] + }, + { + "objectID": "functions-reference/transform_functions.html", + "href": "functions-reference/transform_functions.html", + "title": "Variable Transformation Functions", + "section": "", + "text": "Variable transformation functions provide implementations of the built-in constraining and unconstraining transforms defined in Stan Reference Manual.\nFor each of the built-in variable transforms there are three functions named after the transform with differing suffixes. A _unconstrain function that maps from the constrained space back to free variables (the “transform”), A _constrain function that maps from free variables to constrained variables (the “inverse transform”), and a _jacobian function, which computes the same value as the _constrain function while also incrementing the Jacobian accumulator with the log Jacobian determinant.\nFor this page, variables named y are unconstrained, while variables named x are in the constrained space. The unconstraining functions will reject if their input does not satisfy the declared constraint.\n\n\nThese transformations take unconstrained values on the real number line and either constrain the, to a subset of the real line with a lower bound, upper bound, or both, or provide an affine map that does not constrain values but can help with shifting and scaling them so they are more standardized.\nThe functions are all overloaded to apply to containers elementwise. If the y argument is a container, the others must be either scalars or containers of exactly the same size.\n\n\nThese functions perform the transform and inverse transform described in the Lower Bounded Scalar section.\n \n\nreals lower_bound_constrain(reals y, reals lb) Takes a value y and lower bound lb and returns the corresponding value which is greater than lb (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nAvailable since 2.37\n \n\nreals lower_bound_jacobian(reals y, reals lb) Takes a value y and lower bound lb and returns the corresponding value which is greater than lb (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals lower_bound_unconstrain(reals x, reals lb) Takes a value x which is greater than lb and returns the corresponding unconstrained value.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Upper Bounded Scalar section.\n \n\nreals upper_bound_constrain(reals y, reals ub) Takes a value y and upper bound ub and returns the corresponding value which is less than ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nAvailable since 2.37\n \n\nreals upper_bound_jacobian(reals x, reals ub) Takes a value y and upper bound ub and returns the corresponding value which is less than ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals upper_bound_unconstrain(reals x, reals ub) Takes a value x which is less than ub and returns the corresponding unconstrained value.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Lower and Upper Bounded Scalar section.\n \n\nreals lower_upper_bound_constrain(reals y, reals lb, reals ub) Takes a value y, lower bound lb, and upper bound ub and returns the corresponding value which is bounded between lb and ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nAvailable since 2.37\n \n\nreals lower_upper_bound_jacobian(reals y, reals lb, reals ub) Takes a value y, lower bound lb, and upper bound ub and returns the corresponding value which is bounded between lb and ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals lower_upper_bound_unconstrain(reals x, reals lb, reals ub) Takes a value x which is bounded between lb and ub and returns returns the corresponding unconstrained value.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Affinely Transformed Scalar section.\n \n\nreals offset_multiplier_constrain(reals y, reals offset, reals mult) Takes a value y, shift offset, and scale mult and returns a rescaled and shifted value.\nAvailable since 2.37\n \n\nreals offset_multiplier_jacobian(reals y, reals offset, reals mult) Takes a value y, shift offset, and scale mult and returns a rescaled and shifted value.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals offset_multiplier_unconstrain(reals x, reals offset, reals mult) Takes a value x, shift offset, and scale mult and a value which has been un-scaled and un-shifted.\nAvailable since 2.37\n\n\n\n\nThese functions constrain entire vectors hollistically. Some transforms also change the length of the vector, as noted in the documentation.\nWhere vectors is used, this indicates that either a vector or a (possibly multidimensional) array of vectors may be provided. The array will be processed element by element.\n\n\nThese functions perform the transform and inverse transform described in the Ordered Vector section.\n \n\nvectors ordered_constrain(vectors y) Takes a free vector y and returns a vector with elements in ascending order.\nAvailable since 2.37\n \n\nvectors ordered_jacobian(vectors y) Takes a free vector y and returns a vector with elements in ascending order.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors ordered_unconstrain(vectors x) Takes an ordered vector x and returns the corresponding free vector.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Positive Ordered Vector section.\n \n\nvectors positive_ordered_constrain(vectors y) Takes a free vector y and returns a vector with positive elements in ascending order.\nAvailable since 2.37\n \n\nvectors positive_ordered_jacobian(vectors y) Takes a free vector y and returns a vector with positive elements in ascending order.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors positive_ordered_unconstrain(vectors x) Takes an ordered vector x with positive entries and returns the corresponding free vector.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Unit Simplex section.\n \n\nvectors simplex_constrain(vectors y) Takes a free vector y and returns a simplex (a vector such that each element is between 0 and 1, and the sum of the elements is 1, up to rounding errors).\nThis returned vector will have one extra element compared to the input y.\nAvailable since 2.37\n \n\nvectors simplex_jacobian(vectors y) Takes a free vector y and returns a simplex (a vector such that each element is between 0 and 1, and the sum of the elements is 1, up to rounding errors).\nThis returned vector will have one extra element compared to the input y.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors simplex_unconstrain(vectors x) Takes a simplex x and returns the corresponding free vector.\nThis returned vector will have one fewer elements compared to the input x.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Zero Sum Vector section.\n \n\nvectors sum_to_zero_constrain(vectors y) Takes a free vector y and returns a vector such that the elements sum to 0.\nThis returned vector will have one extra element compared to the input y.\nAvailable since 2.37\n \n\nvectors sum_to_zero_jacobian(vectors y) Takes a free vector y and returns a vector such that the elements sum to 0.\nThe returned vector will have one extra element compared to the input y.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors sum_to_zero_unconstrain(vectors x) Takes a vector x with elements that sum to 0 and returns the corresponding free vector.\nThis returned vector will have one fewer elements compared to the input x.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Unit Vector section.\n \n\nvectors unit_vectors_constrain(vectors y) Takes a free vector y and returns a vector with unit length, i.e., norm2(unit_vectors_constrain(y)) == 1 for any y that has a positive and finite norm itself (if y does not, the function rejects). Note that, in particular, this implies the function rejects if given a vector of all zeros.\nAvailable since 2.37\n \n\nvectors unit_vectors_jacobian(vectors y) Takes a free vector y and returns a vector with unit length. This function rejects if given a vector of all zeros.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors unit_vectors_unconstrain(vectors x) Takes a vector x of unit length and returns the corresponding free vector.\nAvailable since 2.37\n\n\n\n\nSimilarly to the above, vectors means a vector or array thereof, and matrices means a matrix or array thereof.\n\n\nThese functions perform the transform and inverse transform described in the Cholesky Factors of Correlation Matrices section.\n \n\nmatrices cholesky_factor_corr_constrain(vectors y, int K) Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K Cholesky factor of a correlation matrix. This matrix is a Cholesky factor of a covariance matrix (i.e., a lower triangular matrix with a strictly positive diagonal), but with the additional constraint that each row is of unit length.\nAvailable since 2.37\n \n Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K Cholesky factor of a correlation matrix.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment. matrices cholesky_factor_corr_jacobian(vectors y, int K)\nAvailable since 2.37\n \n\nvectors cholesky_factor_corr_unconstrain(matrices x) Takes x, a (\\(K \\times K\\)) matrix which is the Cholesky factor of a correlation matrix (a lower triangular matrix with a strictly positive diagonal and each row having unit length), and returns the corresponding free vector of length $ imes $.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Cholesky Factors of Covariance Matrices section.\n \n\nmatrices cholesky_factor_cov_constrain(vectors y, int M, int N) Takes a free vector y and integers M and N and returns the M by N Cholesky factor of a covariance matrix. This matrix is a lower triangular matrix \\(L\\), with a strictly positive diagonal, such that \\(L^T L\\) is positive definite.\nNote that y must have length N + choose(N, 2) + (M - N) * N, and M must be greater than or equal to N.\nAvailable since 2.37\n \n\nmatrices cholesky_factor_cov_jacobian(vectors y, int M, int N) Takes a free vector y and integers M and N and returns the M by N Cholesky factor of a covariance matrix. This matrix is a lower triangular matrix \\(L\\), with a strictly positive diagonal, such that \\(L^T L\\) is positive definite.\nNote that y must have length N + choose(N, 2) + (M - N) * N, and M must be greater than or equal to N.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors cholesky_factor_cov_unconstrain(matrices x) Takes a \\(M \\times N\\) matrix x which is a Cholesky factor of a covariance matrix (a matrix \\(L\\) such that \\(L^T L\\) is positive definite) and returns the corresponding free vector of length \\(N + \\binom{N}{2} + (M - N)N\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Correlation Matrices section.\n \n\nmatrices corr_matrix_constrain(vectors y, int K) Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K correlation matrix (a positive definite matrix with a unit diagonal).\nAvailable since 2.37\n \n\nmatrices corr_matrix_jacobian(vectors y, int K) Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K correlation matrix (a positive definite matrix with a unit diagonal).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors corr_matrix_unconstrain(matrices x) Takes a \\(K \\times K\\) matrix x which is a correlation matrix (a positive definite matrix with a unit diagonal) and returns the corresponding free vector of size \\(\\binom{K}{2}\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Covariance Matrices section.\n \n\nmatrices cov_matrix_constrain(vectors y, int K) Takes a vector y and integer K, where length(y) == K + choose(K, 2), and returns a K by K covariance matrix (a positive definite matrix).\nAvailable since 2.37\n \n\nmatrices cov_matrix_jacobian(vectors y, int K) Takes a vector y and integer K, where length(y) == K + choose(K, 2), and returns a K by K covariance matrix (a positive definite matrix).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors cov_matrix_unconstrain(matrices x) Takes a \\(K \\times K\\) positive definite matrix x and returns the corresponding free vector of size \\(K + \\binom{K}{2}\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Stochastic Matrix section for column (left) stochastic matrices.\n \n\nmatrices stochastic_column_constrain(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a left stochastic matrix (a matrix where each column is a simplex) of size \\(N+1 \\times M\\).\nAvailable since 2.37\n \n\nmatrices stochastic_column_jacobian(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a left stochastic matrix (a matrix where each column is a simplex) of size \\(N+1 \\times M\\).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nmatrices stochastic_column_unconstrain(matrices x) Takes a left stochastic matrix x of size \\(N+1 \\times M\\) and returns the corresponding free matrix of size \\(N \\times M\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Stochastic Matrix section for row (right) stochastic matrices.\n \n\nmatrices stochastic_row_constrain(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a right stochastic matrix (a matrix where each row is a simplex) of size \\(N \\times M+1\\).\nAvailable since 2.37\n \n\nmatrices stochastic_row_jacobian(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a right stochastic matrix (a matrix where each row is a simplex) of size \\(N \\times M+1\\).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nmatrices stochastic_row_unconstrain(matrices x) Takes a right stochastic matrix x of size \\(N \\times M+1\\) and returns the corresponding free matrix of size \\(N \\times M\\).\nAvailable since 2.37\n\n\n\nThe sum-to-zero matrix transforms map between unconstrained values and matrices whose rows and columns sum to zero; full definitions of the function and Jacobian can be found in the sum-to-zero matrix section of the Reference Manual.\n \n\nmatrices sum_to_zero_constrain(matrices y) The constraining function maps an unconstrained N x M matrix to an (N + 1) x (M + 1) matrix for which the rows and columns all sum to zero. This function covers the incrementation of the log Jacobian because the incrementation is zero.\nThis returned matrix will have one extra row and column compared to the input y.\nAvailable since 2.37\n \n\nmatrices sum_to_zero_jacobian(matrices y) The constraining function maps an unconstrained N x M matrix to an (N + 1) x (M + 1) matrix for which the rows and columns all sum to zero. Because the log Jacobian incrementation is zero, this is identical to sum_to_zero_constrain.\nThis returned matrix will have one extra row and column compared to the input y.\nAvailable since 2.37\n \n\nmatrices sum_to_zero_unconstrain(matrices x) This function maps a matrix with rows that sum to zero and columns that sum to zero to an unconstrained matrix with one fewer row and and one fewer column.\nAvailable since 2.37", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Variable Transformation Functions" + ] + }, + { + "objectID": "functions-reference/transform_functions.html#transforms-for-scalars", + "href": "functions-reference/transform_functions.html#transforms-for-scalars", + "title": "Variable Transformation Functions", + "section": "", + "text": "These transformations take unconstrained values on the real number line and either constrain the, to a subset of the real line with a lower bound, upper bound, or both, or provide an affine map that does not constrain values but can help with shifting and scaling them so they are more standardized.\nThe functions are all overloaded to apply to containers elementwise. If the y argument is a container, the others must be either scalars or containers of exactly the same size.\n\n\nThese functions perform the transform and inverse transform described in the Lower Bounded Scalar section.\n \n\nreals lower_bound_constrain(reals y, reals lb) Takes a value y and lower bound lb and returns the corresponding value which is greater than lb (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nAvailable since 2.37\n \n\nreals lower_bound_jacobian(reals y, reals lb) Takes a value y and lower bound lb and returns the corresponding value which is greater than lb (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals lower_bound_unconstrain(reals x, reals lb) Takes a value x which is greater than lb and returns the corresponding unconstrained value.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Upper Bounded Scalar section.\n \n\nreals upper_bound_constrain(reals y, reals ub) Takes a value y and upper bound ub and returns the corresponding value which is less than ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nAvailable since 2.37\n \n\nreals upper_bound_jacobian(reals x, reals ub) Takes a value y and upper bound ub and returns the corresponding value which is less than ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals upper_bound_unconstrain(reals x, reals ub) Takes a value x which is less than ub and returns the corresponding unconstrained value.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Lower and Upper Bounded Scalar section.\n \n\nreals lower_upper_bound_constrain(reals y, reals lb, reals ub) Takes a value y, lower bound lb, and upper bound ub and returns the corresponding value which is bounded between lb and ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nAvailable since 2.37\n \n\nreals lower_upper_bound_jacobian(reals y, reals lb, reals ub) Takes a value y, lower bound lb, and upper bound ub and returns the corresponding value which is bounded between lb and ub (except for the possibility of rounding due to numeric precision issues, in which case it will be equal to the bound).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals lower_upper_bound_unconstrain(reals x, reals lb, reals ub) Takes a value x which is bounded between lb and ub and returns returns the corresponding unconstrained value.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Affinely Transformed Scalar section.\n \n\nreals offset_multiplier_constrain(reals y, reals offset, reals mult) Takes a value y, shift offset, and scale mult and returns a rescaled and shifted value.\nAvailable since 2.37\n \n\nreals offset_multiplier_jacobian(reals y, reals offset, reals mult) Takes a value y, shift offset, and scale mult and returns a rescaled and shifted value.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nreals offset_multiplier_unconstrain(reals x, reals offset, reals mult) Takes a value x, shift offset, and scale mult and a value which has been un-scaled and un-shifted.\nAvailable since 2.37", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Variable Transformation Functions" + ] + }, + { + "objectID": "functions-reference/transform_functions.html#transforms-for-constrained-vectors", + "href": "functions-reference/transform_functions.html#transforms-for-constrained-vectors", + "title": "Variable Transformation Functions", + "section": "", + "text": "These functions constrain entire vectors hollistically. Some transforms also change the length of the vector, as noted in the documentation.\nWhere vectors is used, this indicates that either a vector or a (possibly multidimensional) array of vectors may be provided. The array will be processed element by element.\n\n\nThese functions perform the transform and inverse transform described in the Ordered Vector section.\n \n\nvectors ordered_constrain(vectors y) Takes a free vector y and returns a vector with elements in ascending order.\nAvailable since 2.37\n \n\nvectors ordered_jacobian(vectors y) Takes a free vector y and returns a vector with elements in ascending order.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors ordered_unconstrain(vectors x) Takes an ordered vector x and returns the corresponding free vector.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Positive Ordered Vector section.\n \n\nvectors positive_ordered_constrain(vectors y) Takes a free vector y and returns a vector with positive elements in ascending order.\nAvailable since 2.37\n \n\nvectors positive_ordered_jacobian(vectors y) Takes a free vector y and returns a vector with positive elements in ascending order.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors positive_ordered_unconstrain(vectors x) Takes an ordered vector x with positive entries and returns the corresponding free vector.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Unit Simplex section.\n \n\nvectors simplex_constrain(vectors y) Takes a free vector y and returns a simplex (a vector such that each element is between 0 and 1, and the sum of the elements is 1, up to rounding errors).\nThis returned vector will have one extra element compared to the input y.\nAvailable since 2.37\n \n\nvectors simplex_jacobian(vectors y) Takes a free vector y and returns a simplex (a vector such that each element is between 0 and 1, and the sum of the elements is 1, up to rounding errors).\nThis returned vector will have one extra element compared to the input y.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors simplex_unconstrain(vectors x) Takes a simplex x and returns the corresponding free vector.\nThis returned vector will have one fewer elements compared to the input x.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Zero Sum Vector section.\n \n\nvectors sum_to_zero_constrain(vectors y) Takes a free vector y and returns a vector such that the elements sum to 0.\nThis returned vector will have one extra element compared to the input y.\nAvailable since 2.37\n \n\nvectors sum_to_zero_jacobian(vectors y) Takes a free vector y and returns a vector such that the elements sum to 0.\nThe returned vector will have one extra element compared to the input y.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors sum_to_zero_unconstrain(vectors x) Takes a vector x with elements that sum to 0 and returns the corresponding free vector.\nThis returned vector will have one fewer elements compared to the input x.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Unit Vector section.\n \n\nvectors unit_vectors_constrain(vectors y) Takes a free vector y and returns a vector with unit length, i.e., norm2(unit_vectors_constrain(y)) == 1 for any y that has a positive and finite norm itself (if y does not, the function rejects). Note that, in particular, this implies the function rejects if given a vector of all zeros.\nAvailable since 2.37\n \n\nvectors unit_vectors_jacobian(vectors y) Takes a free vector y and returns a vector with unit length. This function rejects if given a vector of all zeros.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors unit_vectors_unconstrain(vectors x) Takes a vector x of unit length and returns the corresponding free vector.\nAvailable since 2.37", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Variable Transformation Functions" + ] + }, + { + "objectID": "functions-reference/transform_functions.html#transforms-for-constrained-matrices", + "href": "functions-reference/transform_functions.html#transforms-for-constrained-matrices", + "title": "Variable Transformation Functions", + "section": "", + "text": "Similarly to the above, vectors means a vector or array thereof, and matrices means a matrix or array thereof.\n\n\nThese functions perform the transform and inverse transform described in the Cholesky Factors of Correlation Matrices section.\n \n\nmatrices cholesky_factor_corr_constrain(vectors y, int K) Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K Cholesky factor of a correlation matrix. This matrix is a Cholesky factor of a covariance matrix (i.e., a lower triangular matrix with a strictly positive diagonal), but with the additional constraint that each row is of unit length.\nAvailable since 2.37\n \n Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K Cholesky factor of a correlation matrix.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment. matrices cholesky_factor_corr_jacobian(vectors y, int K)\nAvailable since 2.37\n \n\nvectors cholesky_factor_corr_unconstrain(matrices x) Takes x, a (\\(K \\times K\\)) matrix which is the Cholesky factor of a correlation matrix (a lower triangular matrix with a strictly positive diagonal and each row having unit length), and returns the corresponding free vector of length $ imes $.\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Cholesky Factors of Covariance Matrices section.\n \n\nmatrices cholesky_factor_cov_constrain(vectors y, int M, int N) Takes a free vector y and integers M and N and returns the M by N Cholesky factor of a covariance matrix. This matrix is a lower triangular matrix \\(L\\), with a strictly positive diagonal, such that \\(L^T L\\) is positive definite.\nNote that y must have length N + choose(N, 2) + (M - N) * N, and M must be greater than or equal to N.\nAvailable since 2.37\n \n\nmatrices cholesky_factor_cov_jacobian(vectors y, int M, int N) Takes a free vector y and integers M and N and returns the M by N Cholesky factor of a covariance matrix. This matrix is a lower triangular matrix \\(L\\), with a strictly positive diagonal, such that \\(L^T L\\) is positive definite.\nNote that y must have length N + choose(N, 2) + (M - N) * N, and M must be greater than or equal to N.\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors cholesky_factor_cov_unconstrain(matrices x) Takes a \\(M \\times N\\) matrix x which is a Cholesky factor of a covariance matrix (a matrix \\(L\\) such that \\(L^T L\\) is positive definite) and returns the corresponding free vector of length \\(N + \\binom{N}{2} + (M - N)N\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Correlation Matrices section.\n \n\nmatrices corr_matrix_constrain(vectors y, int K) Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K correlation matrix (a positive definite matrix with a unit diagonal).\nAvailable since 2.37\n \n\nmatrices corr_matrix_jacobian(vectors y, int K) Takes a vector y and integer K, where length(y) == choose(K, 2), and returns a K by K correlation matrix (a positive definite matrix with a unit diagonal).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors corr_matrix_unconstrain(matrices x) Takes a \\(K \\times K\\) matrix x which is a correlation matrix (a positive definite matrix with a unit diagonal) and returns the corresponding free vector of size \\(\\binom{K}{2}\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Covariance Matrices section.\n \n\nmatrices cov_matrix_constrain(vectors y, int K) Takes a vector y and integer K, where length(y) == K + choose(K, 2), and returns a K by K covariance matrix (a positive definite matrix).\nAvailable since 2.37\n \n\nmatrices cov_matrix_jacobian(vectors y, int K) Takes a vector y and integer K, where length(y) == K + choose(K, 2), and returns a K by K covariance matrix (a positive definite matrix).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nvectors cov_matrix_unconstrain(matrices x) Takes a \\(K \\times K\\) positive definite matrix x and returns the corresponding free vector of size \\(K + \\binom{K}{2}\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Stochastic Matrix section for column (left) stochastic matrices.\n \n\nmatrices stochastic_column_constrain(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a left stochastic matrix (a matrix where each column is a simplex) of size \\(N+1 \\times M\\).\nAvailable since 2.37\n \n\nmatrices stochastic_column_jacobian(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a left stochastic matrix (a matrix where each column is a simplex) of size \\(N+1 \\times M\\).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nmatrices stochastic_column_unconstrain(matrices x) Takes a left stochastic matrix x of size \\(N+1 \\times M\\) and returns the corresponding free matrix of size \\(N \\times M\\).\nAvailable since 2.37\n\n\n\nThese functions perform the transform and inverse transform described in the Stochastic Matrix section for row (right) stochastic matrices.\n \n\nmatrices stochastic_row_constrain(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a right stochastic matrix (a matrix where each row is a simplex) of size \\(N \\times M+1\\).\nAvailable since 2.37\n \n\nmatrices stochastic_row_jacobian(matrices y) Takes a free matrix y of size \\(N \\times M\\) and returns a right stochastic matrix (a matrix where each row is a simplex) of size \\(N \\times M+1\\).\nThis function also increments the Jacobian accumulator with the corresponding change of variables adjustment.\nAvailable since 2.37\n \n\nmatrices stochastic_row_unconstrain(matrices x) Takes a right stochastic matrix x of size \\(N \\times M+1\\) and returns the corresponding free matrix of size \\(N \\times M\\).\nAvailable since 2.37\n\n\n\nThe sum-to-zero matrix transforms map between unconstrained values and matrices whose rows and columns sum to zero; full definitions of the function and Jacobian can be found in the sum-to-zero matrix section of the Reference Manual.\n \n\nmatrices sum_to_zero_constrain(matrices y) The constraining function maps an unconstrained N x M matrix to an (N + 1) x (M + 1) matrix for which the rows and columns all sum to zero. This function covers the incrementation of the log Jacobian because the incrementation is zero.\nThis returned matrix will have one extra row and column compared to the input y.\nAvailable since 2.37\n \n\nmatrices sum_to_zero_jacobian(matrices y) The constraining function maps an unconstrained N x M matrix to an (N + 1) x (M + 1) matrix for which the rows and columns all sum to zero. Because the log Jacobian incrementation is zero, this is identical to sum_to_zero_constrain.\nThis returned matrix will have one extra row and column compared to the input y.\nAvailable since 2.37\n \n\nmatrices sum_to_zero_unconstrain(matrices x) This function maps a matrix with rows that sum to zero and columns that sum to zero to an unconstrained matrix with one fewer row and and one fewer column.\nAvailable since 2.37", + "crumbs": [ + "Functions Reference", + "Built-in Functions", + "Variable Transformation Functions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html", + "href": "functions-reference/unbounded_discrete_distributions.html", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "The unbounded discrete distributions have support over the natural numbers (i.e., the non-negative integers).\n\n\nFor the negative binomial distribution Stan uses the parameterization described in Gelman et al. (2013). For alternative parameterizations, see section negative binomial glm.\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(n\n\\in \\mathbb{N}\\), \\[\\begin{equation*} \\text{NegBinomial}(n~|~\\alpha,\\beta) = \\binom{n +\n\\alpha - 1}{\\alpha - 1} \\, \\left( \\frac{\\beta}{\\beta+1}\n\\right)^{\\!\\alpha} \\, \\left( \\frac{1}{\\beta + 1} \\right)^{\\!n} \\!. \\end{equation*}\\]\nThe mean and variance of a random variable \\(n \\sim\n\\text{NegBinomial}(\\alpha,\\beta)\\) are given by \\[\\begin{equation*} \\mathbb{E}[n] =\n\\frac{\\alpha}{\\beta} \\ \\ \\text{ and } \\ \\ \\text{Var}[n] =\n\\frac{\\alpha}{\\beta^2} (\\beta + 1). \\end{equation*}\\]\n\n\n\nn ~ neg_binomial(alpha, beta)\nIncrement target log probability density with neg_binomial_lupmf(n | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal neg_binomial_lpmf(ints n | reals alpha, reals beta) The log negative binomial probability mass of n given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal neg_binomial_lupmf(ints n | reals alpha, reals beta) The log negative binomial probability mass of n given shape alpha and inverse scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal neg_binomial_cdf(ints n | reals alpha, reals beta) The negative binomial cumulative distribution function of n given shape alpha and inverse scale beta\nAvailable since 2.0\n \n\nreal neg_binomial_lcdf(ints n | reals alpha, reals beta) The log of the negative binomial cumulative distribution function of n given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal neg_binomial_lccdf(ints n | reals alpha, reals beta) The log of the negative binomial complementary cumulative distribution function of n given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nR neg_binomial_rng(reals alpha, reals beta) Generate a negative binomial variate with shape alpha and inverse scale beta; may only be used in transformed data and generated quantities blocks. alpha \\(/\\) beta must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18\n\n\n\n\nStan also provides an alternative parameterization of the negative binomial distribution directly using a mean (i.e., location) parameter and a parameter that controls overdispersion relative to the square of the mean. Section combinatorial functions, below, provides a second alternative parameterization directly in terms of the log mean.\n\n\nThe first parameterization is for \\(\\mu \\in \\mathbb{R}^+\\) and \\(\\phi \\in\n\\mathbb{R}^+\\), which for \\(n \\in \\mathbb{N}\\) is defined as \\[\\begin{equation*}\n\\text{NegBinomial2}(n \\, | \\, \\mu, \\phi)\n= \\binom{n + \\phi - 1}{n} \\, \\left( \\frac{\\mu}{\\mu+\\phi} \\right)^{\\!n} \\, \\left( \\frac{\\phi}{\\mu+\\phi} \\right)^{\\!\\phi} \\!.\n\\end{equation*}\\]\nThe mean and variance of a random variable \\(n \\sim\n\\text{NegBinomial2}(n~|~\\mu,\\phi)\\) are \\[\\begin{equation*}\n\\mathbb{E}[n] = \\mu \\ \\ \\ \\text{ and } \\ \\ \\ \\text{Var}[n] = \\mu + \\frac{\\mu^2}{\\phi}.\n\\end{equation*}\\] Recall that \\(\\text{Poisson}(\\mu)\\) has variance \\(\\mu\\), so \\(\\mu^2 / \\phi > 0\\) is the additional variance of the negative binomial above that of the Poisson with mean \\(\\mu\\). So the inverse of parameter \\(\\phi\\) controls the overdispersion, scaled by the square of the mean, \\(\\mu^2\\).\n\n\n\nn ~ neg_binomial_2(mu, phi)\nIncrement target log probability density with neg_binomial_2_lupmf(n | mu, phi).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal neg_binomial_2_lpmf(ints n | reals mu, reals phi) The log negative binomial probability mass of n given location mu and precision phi.\nAvailable since 2.20\n \n\nreal neg_binomial_2_lupmf(ints n | reals mu, reals phi) The log negative binomial probability mass of n given location mu and precision phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_cdf(ints n | reals mu, reals phi) The negative binomial cumulative distribution function of n given location mu and precision phi.\nAvailable since 2.6\n \n\nreal neg_binomial_2_lcdf(ints n | reals mu, reals phi) The log of the negative binomial cumulative distribution function of n given location mu and precision phi.\nAvailable since 2.12\n \n\nreal neg_binomial_2_lccdf(ints n | reals mu, reals phi) The log of the negative binomial complementary cumulative distribution function of n given location mu and precision phi.\nAvailable since 2.12\n \n\nR neg_binomial_2_rng(reals mu, reals phi) Generate a negative binomial variate with location mu and precision phi; may only be used in transformed data and generated quantities blocks. mu must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18\n\n\n\n\nRelated to the parameterization in section negative binomial, alternative parameterization, the following parameterization uses a log mean parameter \\(\\eta = \\log(\\mu)\\), defined for \\(\\eta \\in \\mathbb{R}\\), \\(\\phi \\in \\mathbb{R}^+\\), so that for \\(n \\in\n\\mathbb{N}\\), \\[\\begin{equation*} \\text{NegBinomial2Log}(n \\, | \\, \\eta, \\phi) =\n\\text{NegBinomial2}(n | \\exp(\\eta), \\phi). \\end{equation*}\\] This alternative may be used for sampling, as a function, and for random number generation, but as of yet, there are no CDFs implemented for it. This is especially useful for log-linear negative binomial regressions.\n\n\nn ~ neg_binomial_2_log(eta, phi)\nIncrement target log probability density with neg_binomial_2_log_lupmf(n | eta, phi).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal neg_binomial_2_log_lpmf(ints n | reals eta, reals phi) The log negative binomial probability mass of n given log-location eta and inverse overdispersion parameter phi.\nAvailable since 2.20\n \n\nreal neg_binomial_2_log_lupmf(ints n | reals eta, reals phi) The log negative binomial probability mass of n given log-location eta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nR neg_binomial_2_log_rng(reals eta, reals phi) Generate a negative binomial variate with log-location eta and inverse overdispersion control phi; may only be used in transformed data and generated quantities blocks. eta must be less than \\(29 \\log 2\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18\n\n\n\n\nStan also supplies a single function for a generalized linear model with negative binomial distribution and log link function, i.e. a function for a negative binomial regression. This provides a more efficient implementation of negative binomial regression than a manually written regression in terms of a negative binomial distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m, \\phi\\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{N}^n\\), \\[\\begin{equation*}\n\\text{NegBinomial2LogGLM}(y~|~x, \\alpha, \\beta, \\phi) = \\prod_{1\\leq i\n\\leq n}\\text{NegBinomial2}(y_i~|~\\exp(\\alpha_i + x_i\\cdot \\beta),\n\\phi). \\end{equation*}\\]\n\n\n\ny ~ neg_binomial_2_log_glm(x, alpha, beta, phi)\nIncrement target log probability density with neg_binomial_2_log_glm_lupmf(y | x, alpha, beta, phi).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal neg_binomial_2_log_glm_lpmf(int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.18\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.18\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(\\lambda \\in \\mathbb{R}^+\\), then for \\(n \\in \\mathbb{N}\\), \\[\\begin{equation*}\n\\text{Poisson}(n|\\lambda) = \\frac{1}{n!} \\, \\lambda^n \\,\n\\exp(-\\lambda). \\end{equation*}\\]\n\n\n\nn ~ poisson(lambda)\nIncrement target log probability density with poisson_lupmf(n | lambda).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal poisson_lpmf(ints n | reals lambda) The log Poisson probability mass of n given rate lambda\nAvailable since 2.12\n \n\nreal poisson_lupmf(ints n | reals lambda) The log Poisson probability mass of n given rate lambda dropping constant additive terms\nAvailable since 2.25\n \n\nreal poisson_cdf(ints n | reals lambda) The Poisson cumulative distribution function of n given rate lambda\nAvailable since 2.0\n \n\nreal poisson_lcdf(ints n | reals lambda) The log of the Poisson cumulative distribution function of n given rate lambda\nAvailable since 2.12\n \n\nreal poisson_lccdf(ints n | reals lambda) The log of the Poisson complementary cumulative distribution function of n given rate lambda\nAvailable since 2.12\n \n\nR poisson_rng(reals lambda) Generate a Poisson variate with rate lambda; may only be used in transformed data and generated quantities blocks. lambda must be less than \\(2^{30}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18\n\n\n\n\nStan also provides a parameterization of the Poisson using the log rate \\(\\alpha = \\log \\lambda\\) as a parameter. This is useful for log-linear Poisson regressions so that the predictor does not need to be exponentiated and passed into the standard Poisson probability function.\n\n\nIf \\(\\alpha \\in \\mathbb{R}\\), then for \\(n \\in \\mathbb{N}\\), \\[\\begin{equation*}\n\\text{PoissonLog}(n|\\alpha) = \\frac{1}{n!} \\, \\exp \\left(n\\alpha -\n\\exp(\\alpha) \\right). \\end{equation*}\\]\n\n\n\nn ~ poisson_log(alpha)\nIncrement target log probability density with poisson_log_lupmf(n | alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal poisson_log_lpmf(ints n | reals alpha) The log Poisson probability mass of n given log rate alpha\nAvailable since 2.12\n \n\nreal poisson_log_lupmf(ints n | reals alpha) The log Poisson probability mass of n given log rate alpha dropping constant additive terms\nAvailable since 2.25\n \n\nR poisson_log_rng(reals alpha) Generate a Poisson variate with log rate alpha; may only be used in transformed data and generated quantities blocks. alpha must be less than \\(30 \\log 2\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18\n\n\n\n\nStan also supplies a single function for a generalized linear model with Poisson distribution and log link function, i.e. a function for a Poisson regression. This provides a more efficient implementation of Poisson regression than a manually written regression in terms of a Poisson distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m\\), then for \\(y \\in \\mathbb{N}^n\\), \\[\\begin{equation*}\n\\text{PoissonLogGLM}(y|x, \\alpha, \\beta) = \\prod_{1\\leq i \\leq\nn}\\text{Poisson}(y_i|\\exp(\\alpha_i + x_i\\cdot \\beta)). \\end{equation*}\\]\n\n\n\ny ~ poisson_log_glm(x, alpha, beta)\nIncrement target log probability density with poisson_log_glm_lupmf(y | x, alpha, beta).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal poisson_log_glm_lpmf(int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.18\n \n\nreal poisson_log_glm_lupmf(array[] int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.18\n \n\nreal poisson_log_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n\n\n\n\n\n\nIf \\(r \\in \\mathbb{R}^+\\), \\(\\alpha \\in \\mathbb{R}^+\\), and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(n \\in \\mathbb{N}\\), \\[\\begin{equation*}\n\\text{BetaNegBinomial}(n|r,\\alpha,\\beta) = \\frac {\\Gamma (n+r )}{n!\\;\\Gamma (r )}\n\\frac {\\mathrm {B} (\\beta+n,\\alpha +r )}{\\mathrm {B} (\\beta,\\alpha )}. \\end{equation*}\\]\n\n\n\nn ~ beta_neg_binomial(r,alpha,beta)\nIncrement target log probability density with beta_neg_binomial_lupmf(n | r, alpha, beta).\nAvailable since 2.36\n \n\n\n\n\n \n\nreal beta_neg_binomial_lpmf(ints n | reals r, reals alpha, reals beta) The log beta negative binomial probability mass of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_lupmf(ints n | reals r, reals alpha, reals beta) The log beta negative binomial probability mass of n given parameters r, alpha and beta dropping constant additive terms.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_cdf(ints n | reals r, reals alpha, reals beta) The beta negative binomial cumulative distribution function of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_lcdf(ints n | reals r, reals alpha, reals beta) The log of the beta negative binomial cumulative distribution function of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_lccdf(ints n | reals r, reals alpha, reals beta) The log of the beta negative binomial complementary cumulative distribution function of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nR beta_neg_binomial_rng(reals r, reals alpha, reals beta) Generate a beta negative binomial variate with parameters r, alpha and beta; may only be used in transformed data and generated quantities blocks. r \\(\\cdot\\) beta \\(/\\) (alpha\\(-1\\)) must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.36\n\n\n\n\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\), then for \\(n \\in \\mathbb{N}^+=\\{1,2,...\\}\\), \\[\\begin{equation*}\n\\text{YuleSimon}(n|\\alpha) = \\alpha \\, \\mathrm{B}(\\alpha + 1, n) = \\alpha \\, \\frac{\\Gamma(n) \\, \\Gamma(\\alpha + 1)}{\\Gamma(n + \\alpha + 1)}.\n\\end{equation*}\\]\n\n\n\nn ~ yule_simon(alpha)\nIncrement target log probability density with yule_simon_lupmf(n | alpha).\nAvailable since 2.39\n \n\n\n\n\n \n\nreal yule_simon_lpmf(ints n | reals alpha) The log Yule Simon probability mass of n given parameter alpha.\nAvailable since 2.39\n \n\nreal yule_simon_lupmf(ints n | reals alpha) The log Yule Simon probability mass of n given parameter alpha dropping constant additive terms.\nAvailable since 2.39\n \n\nreal yule_simon_cdf(ints n | reals alpha) The Yule Simon cumulative distribution function of n given parameter alpha.\nAvailable since 2.39\n \n\nreal yule_simon_lcdf(ints n | reals alpha) The log of the Yule Simon cumulative distribution function of n given parameter alpha.\nAvailable since 2.39\n \n\nreal yule_simon_lccdf(ints n | reals alpha) The log of the Yule Simon complementary cumulative distribution function of n given parameter alpha.\nAvailable since 2.39\n \n\nR yule_simon_rng(reals alpha) Generate a Yule Simon variate with parameter alpha; may only be used in transformed data and generated quantities blocks. alpha \\(/\\) (alpha\\(-1\\)) must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.39", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#negative-binomial-distribution", + "href": "functions-reference/unbounded_discrete_distributions.html#negative-binomial-distribution", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "For the negative binomial distribution Stan uses the parameterization described in Gelman et al. (2013). For alternative parameterizations, see section negative binomial glm.\n\n\nIf \\(\\alpha \\in \\mathbb{R}^+\\) and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(n\n\\in \\mathbb{N}\\), \\[\\begin{equation*} \\text{NegBinomial}(n~|~\\alpha,\\beta) = \\binom{n +\n\\alpha - 1}{\\alpha - 1} \\, \\left( \\frac{\\beta}{\\beta+1}\n\\right)^{\\!\\alpha} \\, \\left( \\frac{1}{\\beta + 1} \\right)^{\\!n} \\!. \\end{equation*}\\]\nThe mean and variance of a random variable \\(n \\sim\n\\text{NegBinomial}(\\alpha,\\beta)\\) are given by \\[\\begin{equation*} \\mathbb{E}[n] =\n\\frac{\\alpha}{\\beta} \\ \\ \\text{ and } \\ \\ \\text{Var}[n] =\n\\frac{\\alpha}{\\beta^2} (\\beta + 1). \\end{equation*}\\]\n\n\n\nn ~ neg_binomial(alpha, beta)\nIncrement target log probability density with neg_binomial_lupmf(n | alpha, beta).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal neg_binomial_lpmf(ints n | reals alpha, reals beta) The log negative binomial probability mass of n given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal neg_binomial_lupmf(ints n | reals alpha, reals beta) The log negative binomial probability mass of n given shape alpha and inverse scale beta dropping constant additive terms\nAvailable since 2.25\n \n\nreal neg_binomial_cdf(ints n | reals alpha, reals beta) The negative binomial cumulative distribution function of n given shape alpha and inverse scale beta\nAvailable since 2.0\n \n\nreal neg_binomial_lcdf(ints n | reals alpha, reals beta) The log of the negative binomial cumulative distribution function of n given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nreal neg_binomial_lccdf(ints n | reals alpha, reals beta) The log of the negative binomial complementary cumulative distribution function of n given shape alpha and inverse scale beta\nAvailable since 2.12\n \n\nR neg_binomial_rng(reals alpha, reals beta) Generate a negative binomial variate with shape alpha and inverse scale beta; may only be used in transformed data and generated quantities blocks. alpha \\(/\\) beta must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#nbalt", + "href": "functions-reference/unbounded_discrete_distributions.html#nbalt", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "Stan also provides an alternative parameterization of the negative binomial distribution directly using a mean (i.e., location) parameter and a parameter that controls overdispersion relative to the square of the mean. Section combinatorial functions, below, provides a second alternative parameterization directly in terms of the log mean.\n\n\nThe first parameterization is for \\(\\mu \\in \\mathbb{R}^+\\) and \\(\\phi \\in\n\\mathbb{R}^+\\), which for \\(n \\in \\mathbb{N}\\) is defined as \\[\\begin{equation*}\n\\text{NegBinomial2}(n \\, | \\, \\mu, \\phi)\n= \\binom{n + \\phi - 1}{n} \\, \\left( \\frac{\\mu}{\\mu+\\phi} \\right)^{\\!n} \\, \\left( \\frac{\\phi}{\\mu+\\phi} \\right)^{\\!\\phi} \\!.\n\\end{equation*}\\]\nThe mean and variance of a random variable \\(n \\sim\n\\text{NegBinomial2}(n~|~\\mu,\\phi)\\) are \\[\\begin{equation*}\n\\mathbb{E}[n] = \\mu \\ \\ \\ \\text{ and } \\ \\ \\ \\text{Var}[n] = \\mu + \\frac{\\mu^2}{\\phi}.\n\\end{equation*}\\] Recall that \\(\\text{Poisson}(\\mu)\\) has variance \\(\\mu\\), so \\(\\mu^2 / \\phi > 0\\) is the additional variance of the negative binomial above that of the Poisson with mean \\(\\mu\\). So the inverse of parameter \\(\\phi\\) controls the overdispersion, scaled by the square of the mean, \\(\\mu^2\\).\n\n\n\nn ~ neg_binomial_2(mu, phi)\nIncrement target log probability density with neg_binomial_2_lupmf(n | mu, phi).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal neg_binomial_2_lpmf(ints n | reals mu, reals phi) The log negative binomial probability mass of n given location mu and precision phi.\nAvailable since 2.20\n \n\nreal neg_binomial_2_lupmf(ints n | reals mu, reals phi) The log negative binomial probability mass of n given location mu and precision phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_cdf(ints n | reals mu, reals phi) The negative binomial cumulative distribution function of n given location mu and precision phi.\nAvailable since 2.6\n \n\nreal neg_binomial_2_lcdf(ints n | reals mu, reals phi) The log of the negative binomial cumulative distribution function of n given location mu and precision phi.\nAvailable since 2.12\n \n\nreal neg_binomial_2_lccdf(ints n | reals mu, reals phi) The log of the negative binomial complementary cumulative distribution function of n given location mu and precision phi.\nAvailable since 2.12\n \n\nR neg_binomial_2_rng(reals mu, reals phi) Generate a negative binomial variate with location mu and precision phi; may only be used in transformed data and generated quantities blocks. mu must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#neg-binom-2-log", + "href": "functions-reference/unbounded_discrete_distributions.html#neg-binom-2-log", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "Related to the parameterization in section negative binomial, alternative parameterization, the following parameterization uses a log mean parameter \\(\\eta = \\log(\\mu)\\), defined for \\(\\eta \\in \\mathbb{R}\\), \\(\\phi \\in \\mathbb{R}^+\\), so that for \\(n \\in\n\\mathbb{N}\\), \\[\\begin{equation*} \\text{NegBinomial2Log}(n \\, | \\, \\eta, \\phi) =\n\\text{NegBinomial2}(n | \\exp(\\eta), \\phi). \\end{equation*}\\] This alternative may be used for sampling, as a function, and for random number generation, but as of yet, there are no CDFs implemented for it. This is especially useful for log-linear negative binomial regressions.\n\n\nn ~ neg_binomial_2_log(eta, phi)\nIncrement target log probability density with neg_binomial_2_log_lupmf(n | eta, phi).\nAvailable since 2.3\n \n\n\n\n\n \n\nreal neg_binomial_2_log_lpmf(ints n | reals eta, reals phi) The log negative binomial probability mass of n given log-location eta and inverse overdispersion parameter phi.\nAvailable since 2.20\n \n\nreal neg_binomial_2_log_lupmf(ints n | reals eta, reals phi) The log negative binomial probability mass of n given log-location eta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nR neg_binomial_2_log_rng(reals eta, reals phi) Generate a negative binomial variate with log-location eta and inverse overdispersion control phi; may only be used in transformed data and generated quantities blocks. eta must be less than \\(29 \\log 2\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#neg-binom-2-log-glm", + "href": "functions-reference/unbounded_discrete_distributions.html#neg-binom-2-log-glm", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "Stan also supplies a single function for a generalized linear model with negative binomial distribution and log link function, i.e. a function for a negative binomial regression. This provides a more efficient implementation of negative binomial regression than a manually written regression in terms of a negative binomial distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m, \\phi\\in \\mathbb{R}^+\\), then for \\(y \\in \\mathbb{N}^n\\), \\[\\begin{equation*}\n\\text{NegBinomial2LogGLM}(y~|~x, \\alpha, \\beta, \\phi) = \\prod_{1\\leq i\n\\leq n}\\text{NegBinomial2}(y_i~|~\\exp(\\alpha_i + x_i\\cdot \\beta),\n\\phi). \\end{equation*}\\]\n\n\n\ny ~ neg_binomial_2_log_glm(x, alpha, beta, phi)\nIncrement target log probability density with neg_binomial_2_log_glm_lupmf(y | x, alpha, beta, phi).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal neg_binomial_2_log_glm_lpmf(int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.23\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.18\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | matrix x, real alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25\n \n\nreal neg_binomial_2_log_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi.\nAvailable since 2.18\n \n\nreal neg_binomial_2_log_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta, real phi) The log negative binomial probability mass of y given log-location alpha + x * beta and inverse overdispersion parameter phi dropping constant additive terms.\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#poisson", + "href": "functions-reference/unbounded_discrete_distributions.html#poisson", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "If \\(\\lambda \\in \\mathbb{R}^+\\), then for \\(n \\in \\mathbb{N}\\), \\[\\begin{equation*}\n\\text{Poisson}(n|\\lambda) = \\frac{1}{n!} \\, \\lambda^n \\,\n\\exp(-\\lambda). \\end{equation*}\\]\n\n\n\nn ~ poisson(lambda)\nIncrement target log probability density with poisson_lupmf(n | lambda).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal poisson_lpmf(ints n | reals lambda) The log Poisson probability mass of n given rate lambda\nAvailable since 2.12\n \n\nreal poisson_lupmf(ints n | reals lambda) The log Poisson probability mass of n given rate lambda dropping constant additive terms\nAvailable since 2.25\n \n\nreal poisson_cdf(ints n | reals lambda) The Poisson cumulative distribution function of n given rate lambda\nAvailable since 2.0\n \n\nreal poisson_lcdf(ints n | reals lambda) The log of the Poisson cumulative distribution function of n given rate lambda\nAvailable since 2.12\n \n\nreal poisson_lccdf(ints n | reals lambda) The log of the Poisson complementary cumulative distribution function of n given rate lambda\nAvailable since 2.12\n \n\nR poisson_rng(reals lambda) Generate a Poisson variate with rate lambda; may only be used in transformed data and generated quantities blocks. lambda must be less than \\(2^{30}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#poisson-distribution-log-parameterization", + "href": "functions-reference/unbounded_discrete_distributions.html#poisson-distribution-log-parameterization", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "Stan also provides a parameterization of the Poisson using the log rate \\(\\alpha = \\log \\lambda\\) as a parameter. This is useful for log-linear Poisson regressions so that the predictor does not need to be exponentiated and passed into the standard Poisson probability function.\n\n\nIf \\(\\alpha \\in \\mathbb{R}\\), then for \\(n \\in \\mathbb{N}\\), \\[\\begin{equation*}\n\\text{PoissonLog}(n|\\alpha) = \\frac{1}{n!} \\, \\exp \\left(n\\alpha -\n\\exp(\\alpha) \\right). \\end{equation*}\\]\n\n\n\nn ~ poisson_log(alpha)\nIncrement target log probability density with poisson_log_lupmf(n | alpha).\nAvailable since 2.0\n \n\n\n\n\n \n\nreal poisson_log_lpmf(ints n | reals alpha) The log Poisson probability mass of n given log rate alpha\nAvailable since 2.12\n \n\nreal poisson_log_lupmf(ints n | reals alpha) The log Poisson probability mass of n given log rate alpha dropping constant additive terms\nAvailable since 2.25\n \n\nR poisson_log_rng(reals alpha) Generate a Poisson variate with log rate alpha; may only be used in transformed data and generated quantities blocks. alpha must be less than \\(30 \\log 2\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.18", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#poisson-log-glm", + "href": "functions-reference/unbounded_discrete_distributions.html#poisson-log-glm", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "Stan also supplies a single function for a generalized linear model with Poisson distribution and log link function, i.e. a function for a Poisson regression. This provides a more efficient implementation of Poisson regression than a manually written regression in terms of a Poisson distribution and matrix multiplication.\n\n\nIf \\(x\\in \\mathbb{R}^{n\\cdot m}, \\alpha \\in \\mathbb{R}^n, \\beta\\in\n\\mathbb{R}^m\\), then for \\(y \\in \\mathbb{N}^n\\), \\[\\begin{equation*}\n\\text{PoissonLogGLM}(y|x, \\alpha, \\beta) = \\prod_{1\\leq i \\leq\nn}\\text{Poisson}(y_i|\\exp(\\alpha_i + x_i\\cdot \\beta)). \\end{equation*}\\]\n\n\n\ny ~ poisson_log_glm(x, alpha, beta)\nIncrement target log probability density with poisson_log_glm_lupmf(y | x, alpha, beta).\nAvailable since 2.19\n \n\n\n\n\n \n\nreal poisson_log_glm_lpmf(int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | row_vector x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(array[] int y | row_vector x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | row_vector x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.23\n \n\nreal poisson_log_glm_lupmf(array[] int y | row_vector x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.18\n \n\nreal poisson_log_glm_lupmf(array[] int y | matrix x, real alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25\n \n\nreal poisson_log_glm_lpmf(array[] int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta.\nAvailable since 2.18\n \n\nreal poisson_log_glm_lupmf(array[] int y | matrix x, vector alpha, vector beta) The log Poisson probability mass of y given the log-rate alpha + x * beta dropping constant additive terms.\nAvailable since 2.25", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#beta-neg-binomial", + "href": "functions-reference/unbounded_discrete_distributions.html#beta-neg-binomial", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "If \\(r \\in \\mathbb{R}^+\\), \\(\\alpha \\in \\mathbb{R}^+\\), and \\(\\beta \\in \\mathbb{R}^+\\), then for \\(n \\in \\mathbb{N}\\), \\[\\begin{equation*}\n\\text{BetaNegBinomial}(n|r,\\alpha,\\beta) = \\frac {\\Gamma (n+r )}{n!\\;\\Gamma (r )}\n\\frac {\\mathrm {B} (\\beta+n,\\alpha +r )}{\\mathrm {B} (\\beta,\\alpha )}. \\end{equation*}\\]\n\n\n\nn ~ beta_neg_binomial(r,alpha,beta)\nIncrement target log probability density with beta_neg_binomial_lupmf(n | r, alpha, beta).\nAvailable since 2.36\n \n\n\n\n\n \n\nreal beta_neg_binomial_lpmf(ints n | reals r, reals alpha, reals beta) The log beta negative binomial probability mass of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_lupmf(ints n | reals r, reals alpha, reals beta) The log beta negative binomial probability mass of n given parameters r, alpha and beta dropping constant additive terms.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_cdf(ints n | reals r, reals alpha, reals beta) The beta negative binomial cumulative distribution function of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_lcdf(ints n | reals r, reals alpha, reals beta) The log of the beta negative binomial cumulative distribution function of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nreal beta_neg_binomial_lccdf(ints n | reals r, reals alpha, reals beta) The log of the beta negative binomial complementary cumulative distribution function of n given parameters r, alpha and beta.\nAvailable since 2.36\n \n\nR beta_neg_binomial_rng(reals r, reals alpha, reals beta) Generate a beta negative binomial variate with parameters r, alpha and beta; may only be used in transformed data and generated quantities blocks. r \\(\\cdot\\) beta \\(/\\) (alpha\\(-1\\)) must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.36", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "functions-reference/unbounded_discrete_distributions.html#yule-simon", + "href": "functions-reference/unbounded_discrete_distributions.html#yule-simon", + "title": "Unbounded Discrete Distributions", + "section": "", + "text": "If \\(\\alpha \\in \\mathbb{R}^+\\), then for \\(n \\in \\mathbb{N}^+=\\{1,2,...\\}\\), \\[\\begin{equation*}\n\\text{YuleSimon}(n|\\alpha) = \\alpha \\, \\mathrm{B}(\\alpha + 1, n) = \\alpha \\, \\frac{\\Gamma(n) \\, \\Gamma(\\alpha + 1)}{\\Gamma(n + \\alpha + 1)}.\n\\end{equation*}\\]\n\n\n\nn ~ yule_simon(alpha)\nIncrement target log probability density with yule_simon_lupmf(n | alpha).\nAvailable since 2.39\n \n\n\n\n\n \n\nreal yule_simon_lpmf(ints n | reals alpha) The log Yule Simon probability mass of n given parameter alpha.\nAvailable since 2.39\n \n\nreal yule_simon_lupmf(ints n | reals alpha) The log Yule Simon probability mass of n given parameter alpha dropping constant additive terms.\nAvailable since 2.39\n \n\nreal yule_simon_cdf(ints n | reals alpha) The Yule Simon cumulative distribution function of n given parameter alpha.\nAvailable since 2.39\n \n\nreal yule_simon_lcdf(ints n | reals alpha) The log of the Yule Simon cumulative distribution function of n given parameter alpha.\nAvailable since 2.39\n \n\nreal yule_simon_lccdf(ints n | reals alpha) The log of the Yule Simon complementary cumulative distribution function of n given parameter alpha.\nAvailable since 2.39\n \n\nR yule_simon_rng(reals alpha) Generate a Yule Simon variate with parameter alpha; may only be used in transformed data and generated quantities blocks. alpha \\(/\\) (alpha\\(-1\\)) must be less than \\(2 ^ {29}\\). For a description of argument and return types, see section vectorized function signatures.\nAvailable since 2.39", + "crumbs": [ + "Functions Reference", + "Discrete Distributions", + "Unbounded Discrete Distributions" + ] + }, + { + "objectID": "reference-manual/analysis.html", + "href": "reference-manual/analysis.html", + "title": "Posterior Analysis", + "section": "", + "text": "Stan uses Markov chain Monte Carlo (MCMC) techniques to generate draws from the posterior distribution for full Bayesian inference. Markov chain Monte Carlo (MCMC) methods were developed for situations in which it is not straightforward to make independent draws Metropolis et al. (1953).\nStan’s variational inference algorithm provides draws from the variational approximation to the posterior which may be analyzed just as any other MCMC output, despite the fact that it is not actually a Markov chain.\nStan’s Laplace algorithm produces a sample from a normal approximation centered at the mode of a distribution in the unconstrained space. If the mode is a maximum a posteriori (MAP) estimate, the sample provides an estimate of the mean and standard deviation of the posterior distribution. If the mode is a maximum likelihood estimate (MLE), the sample provides an estimate of the standard error of the likelihood.\n\n\nA Markov chain is a sequence of random variables \\(\\theta^{(1)},\n\\theta^{(2)},\\ldots\\) where each variable is conditionally independent of all other variables given the value of the previous value. Thus if \\(\\theta = \\theta^{(1)}, \\theta^{(2)},\\ldots, \\theta^{(N)}\\), then\n\\[\np(\\theta) = p(\\theta^{(1)}) \\prod_{n=2}^N p(\\theta^{(n)}|\\theta^{(n-1)}).\n\\]\nStan uses Hamiltonian Monte Carlo to generate a next state in a manner described in the Hamiltonian Monte Carlo chapter.\nThe Markov chains Stan and other MCMC samplers generate are ergodic in the sense required by the Markov chain central limit theorem, meaning roughly that there is a reasonable chance of reaching one value of \\(\\theta\\) from another. The Markov chains are also stationary, meaning that the transition probabilities do not change at different positions in the chain, so that for \\(n, n' \\geq 0\\), the probability function \\(p(\\theta^{(n+1)}|\\theta^{(n)})\\) is the same as \\(p(\\theta^{(n'+1)}|\\theta^{(n')})\\) (following the convention of overloading random and bound variables and picking out a probability function by its arguments).\nStationary Markov chains have an equilibrium distribution on states in which each has the same marginal probability function, so that \\(p(\\theta^{(n)})\\) is the same probability function as \\(p(\\theta^{(n+1)})\\). In Stan, this equilibrium distribution \\(p(\\theta^{(n)})\\) is the target density \\(p(\\theta)\\) defined by a Stan program, which is typically a proper Bayesian posterior density \\(p(\\theta | y)\\) defined on the log scale up to a constant.\nUsing MCMC methods introduces two difficulties that are not faced by independent sample Monte Carlo methods. The first problem is determining when a randomly initialized Markov chain has converged to its equilibrium distribution. The second problem is that the draws from a Markov chain may be correlated or even anti-correlated, and thus the central limit theorem’s bound on estimation error no longer applies. These problems are addressed in the next two sections.\nStan’s posterior analysis tools compute a number of summary statistics, estimates, and diagnostics for Markov chain Monte Carlo (MCMC) sample. Stan’s estimators and diagnostics are more robust in the face of non-convergence, antithetical sampling, and long-term Markov chain correlations than most of the other tools available. The algorithms Stan uses to achieve this are described in this chapter.\n\n\n\nBy definition, a Markov chain samples from the target distribution only after it has converged to equilibrium (i.e., equilibrium is defined as being achieved when \\(p(\\theta^{(n)})\\) is the target density). The following point cannot be expressed strongly enough:\n\nIn theory, convergence is only guaranteed asymptotically as the number of draws grows without bound.\nIn practice, diagnostics must be applied to monitor convergence for the finite number of draws actually available.\n\n\n\n\nTo establish basic notation, suppose a target Bayesian posterior density \\(p(\\theta | y)\\) given real-valued vectors of parameters \\(\\theta\\) and real- and discrete-valued data \\(y\\).1\nAn MCMC sample consists of a set of a sequence of \\(M\\) Markov chains, each consisting of an ordered sequence of \\(N\\) draws from the posterior.2 The sample thus consists of \\(M \\times N\\) draws from the posterior.\n\n\nOne way to monitor whether a chain has converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. This is the motivation for the Gelman and Rubin (1992) potential scale reduction statistic, \\(\\hat{R}\\). The \\(\\hat{R}\\) statistic measures the ratio of the average variance of drawss within each chain to the variance of the pooled draws across chains; if all chains are at equilibrium, these will be the same and \\(\\hat{R}\\) will be one. If the chains have not converged to a common distribution, the \\(\\hat{R}\\) statistic will be greater than one.\nGelman and Rubin’s recommendation is that the independent Markov chains be initialized with diffuse starting values for the parameters and sampled until all values for \\(\\hat{R}\\) are below some threshold. Vehtari et al. (2021) suggest in general to use a threshold \\(1.01\\), but othe thresholds can be used depending on the use case. Stan allows users to specify initial values for parameters and it is also able to draw diffuse random initializations automatically satisfying the declared parameter constraints.\nThe \\(\\hat{R}\\) statistic is defined for a set of \\(M\\) Markov chains, \\(\\theta_m\\), each of which has \\(N\\) draws \\(\\theta^{(n)}_m\\). The between-chain variance estimate is\n\\[\nB\n=\n\\frac{N}{M-1}\n\\,\n\\sum_{m=1}^M (\\bar{\\theta}^{(\\bullet)}_{m}\n - \\bar{\\theta}^{(\\bullet)}_{\\bullet})^2,\n\\]\nwhere\n\\[\n\\bar{\\theta}_m^{(\\bullet)}\n= \\frac{1}{N} \\sum_{n = 1}^N \\theta_m^{(n)}\n\\]\nand\n\\[\n\\bar{\\theta}^{(\\bullet)}_{\\bullet}\n= \\frac{1}{M} \\, \\sum_{m=1}^M \\bar{\\theta}_m^{(\\bullet)}.\n\\]\nThe within-chain variance is averaged over the chains,\n\\[\nW = \\frac{1}{M} \\, \\sum_{m=1}^M s_m^2,\n\\]\nwhere\n\\[\ns_m^2\n=\n\\frac{1}{N-1}\n\\, \\sum_{n=1}^N (\\theta^{(n)}_m - \\bar{\\theta}^{(\\bullet)}_m)^2.\n\\]\nThe variance estimator is a mixture of the within-chain and cross-chain sample variances,\n\\[\n\\widehat{\\mbox{var}}^{+}\\!(\\theta|y)\n= \\frac{N-1}{N}\\, W \\, + \\, \\frac{1}{N} \\, B.\n\\]\nFinally, the potential scale reduction statistic is defined by\n\\[\n\\hat{R}\n\\, = \\,\n\\sqrt{\\frac{\\widehat{\\mbox{var}}^{+}\\!(\\theta|y)}{W}}.\n\\]\n\n\n\nBefore Stan calculating the potential-scale-reduction statistic \\(\\hat{R}\\), each chain is split into two halves. This provides an additional means to detect non-stationarity in the individual chains. If one chain involves gradually increasing values and one involves gradually decreasing values, they have not mixed well, but they can have \\(\\hat{R}\\) values near unity. In this case, splitting each chain into two parts leads to \\(\\hat{R}\\) values substantially greater than 1 because the first half of each chain has not mixed with the second half.\n\n\n\nSplit R-hat and the effective sample size (ESS) are well defined only if the marginal posteriors have finite mean and variance. Therefore, following Vehtari et al. (2021), we compute the rank normalized parameter values and then feed them into the formulas for split R-hat and ESS.\nRank normalization proceeds as follows:\n\nFirst, replace each value \\(\\theta^{(nm)}\\) by its rank \\(r^{(nm)}\\) within the pooled draws from all chains. Average rank for ties are used to conserve the number of unique values of discrete quantities.\nSecond, transform ranks to normal scores using the inverse normal transformation and a fractional offset:\n\n\\[\nz_{(nm)} = \\Phi^{-1} \\left( \\frac{r_{(nm)} - 3/8}{S - 1/4} \\right)\n\\]\nTo further improve sensitivity to chains having different scales,\nrank normalized R-hat is computed also for the for the corresponding folded draws \\(\\zeta^{(mn)}\\), absolute deviations from the median, \\[\n\\label{zeta}\n\\zeta^{(mn)} = \\left|\\theta^{(nm)}-{\\rm median}(\\theta)\\right|.\n\\] The rank normalized split-\\(\\widehat{R}\\) measure computed on the \\(\\zeta^{(mn)}\\) values is called -\\(\\widehat{R}\\). This measures convergence in the tails rather than in the bulk of the distribution.\nTo obtain a single conservative \\(\\widehat{R}\\) estimate, we propose to report the maximum of rank normalized split-\\(\\widehat{R}\\) and rank normalized folded-split-\\(\\widehat{R}\\) for each parameter.\nBulk-ESS is defined as ESS for rank normalized split chains. Tail-ESS is defined as the minimum ESS for the 5% and 95% quantiles. See Effective Sample Size section for details on how ESS is estimated.\n\n\n\nA question that often arises is whether it is acceptable to monitor convergence of only a subset of the parameters or generated quantities. The short answer is “no,” but this is elaborated further in this section.\nFor example, consider the value lp__, which is the log posterior density (up to a constant).3\nIt is thus a mistake to declare convergence in any practical sense if lp__ has not converged, because different chains are really in different parts of the space. Yet measuring convergence for lp__ is particularly tricky, as noted below.\n\n\nMarkov chain convergence is a global property in the sense that it does not depend on the choice of function of the parameters that is monitored. There is no hard cutoff between pre-convergence “transience” and post-convergence “equilibrium.” What happens is that as the number of states in the chain approaches infinity, the distribution of possible states in the chain approaches the target distribution and in that limit the expected value of the Monte Carlo estimator of any integrable function converges to the true expectation. There is nothing like warmup here, because in the limit, the effects of initial state are completely washed out.\n\n\n\nThe \\(\\hat{R}\\) statistic considers the composition of a Markov chain and a function, and if the Markov chain has converged then each Markov chain and function composition will have converged. Multivariate functions converge when all of their margins have converged by the Cramer-Wold theorem.\nThe transformation from unconstrained space to constrained space is just another function, so does not effect convergence.\nDifferent functions may have different autocorrelations, but if the Markov chain has equilibrated then all Markov chain plus function compositions should be consistent with convergence. Formally, any function that appears inconsistent is of concern and although it would be unreasonable to test every function, lp__ and other measured quantities should at least be consistent.\nThe obvious difference in lp__ is that it tends to vary quickly with position and is consequently susceptible to outliers.\n\n\n\nThe question is what happens for finite numbers of states? If we can prove a strong geometric ergodicity property (which depends on the sampler and the target distribution), then one can show that there exists a finite time after which the chain forgets its initial state with a large probability. This is both the autocorrelation time and the warmup time. But even if you can show it exists and is finite (which is nigh impossible) you can’t compute an actual value analytically.\nSo what we do in practice is hope that the finite number of draws is large enough for the expectations to be reasonably accurate. Removing warmup iterations improves the accuracy of the expectations but there is no guarantee that removing any finite number of draws will be enough.\n\n\n\nFirstly, as noted above, for any finite number of draws, there will always be some residual effect of the initial state, which typically manifests as some small (or large if the autocorrelation time is huge) probability of having a large outlier. Functions robust to such outliers (say, quantiles) will appear more stable and have better \\(\\hat{R}\\). Functions vulnerable to such outliers may show fragility.\nSecondly, use of the \\(\\hat{R}\\) statistic makes very strong assumptions. In particular, it assumes that the functions being considered are Gaussian or it only uses the first two moments and assumes some kind of independence. The point is that strong assumptions are made that do not always hold. In particular, the distribution for the log posterior density (lp__) almost never looks Gaussian, instead it features long tails that can lead to large \\(\\hat{R}\\) even in the large \\(N\\) limit. Tweaks to \\(\\hat{R}\\), such as using quantiles in place of raw values, have the flavor of making the sample of interest more Gaussian and hence the \\(\\hat{R}\\) statistic more accurate.\n\n\n\n“Convergence” is a global property and holds for all integrable functions at once, but employing the \\(\\hat{R}\\) statistic requires additional assumptions and thus may not work for all functions equally well.\nNote that if you just compare the expectations between chains then we can rely on the Markov chain asymptotics for Gaussian distributions and can apply the standard tests.\n\n\n\n\n\nThe second technical difficulty posed by MCMC methods is that the draws will typically be autocorrelated (or anticorrelated) within a chain. This increases (or reduces) the uncertainty of the estimation of posterior quantities of interest, such as means, variances, or quantiles; see Charles J. Geyer (2011).\nStan estimates an effective sample size for each parameter, which plays the role in the Markov chain Monte Carlo central limit theorem (MCMC CLT) as the number of independent draws plays in the standard central limit theorem (CLT).\nUnlike most packages, the particular calculations used by Stan follow those for split-\\(\\hat{R}\\), which involve both cross-chain (mean) and within-chain calculations (autocorrelation); see Gelman et al. (2013) and Vehtari et al. (2021).\n\n\nThe amount by which autocorrelation within the chains increases uncertainty in estimates can be measured by effective sample size (ESS). Given independent sample (with finite variance), the central limit theorem bounds uncertainty in estimates based on the sample size \\(N\\). Given dependent sample, the sample size is replaced with the effective sample size \\(N_{\\mathrm{eff}}\\).\nFor example, Monte Carlo standard error (MCSE) is proportional to \\(1 / \\sqrt{N_{\\mathrm{eff}}}\\) rather than \\(1/\\sqrt{N}\\).\nThe effective sample size of a sequence is defined in terms of the autocorrelations within the sequence at different lags. The autocorrelation \\(\\rho_t\\) at lag \\(t \\geq 0\\) for a chain with joint probability function \\(p(\\theta)\\) with mean \\(\\mu\\) and variance \\(\\sigma^2\\) is defined to be\n\\[\n\\rho_t\n=\n\\frac{1}{\\sigma^2} \\, \\int_{\\Theta} (\\theta^{(n)} - \\mu)\n(\\theta^{(n+t)} - \\mu) \\, p(\\theta) \\, d\\theta.\n\\]\nThis is the correlation between the two chains offset by \\(t\\) positions (i.e., a lag in time-series terminology). Because we know \\(\\theta^{(n)}\\) and \\(\\theta^{(n+t)}\\) have the same marginal distribution in an MCMC setting, multiplying the two difference terms and reducing yields\n\\[\n\\rho_t\n= \\frac{1}{\\sigma^2}\n\\, \\int_{\\Theta}\n \\theta^{(n)} \\, \\theta^{(n+t)} \\, p(\\theta)\n \\, d\\theta - \\frac{\\mu^2}{\\sigma^2}.\n\\]\nThe effective sample size of \\(N\\) draws generated by a process with autocorrelations \\(\\rho_t\\) is defined by \\[\nN_{\\mathrm{eff}}\n\\ = \\\n\\frac{N}{\\sum_{t = -\\infty}^{\\infty} \\rho_t}\n\\ = \\\n\\frac{N}{1 + 2 \\sum_{t = 1}^{\\infty} \\rho_t}.\n\\]\nFor independent draws, the effective sample size is just the number of iterations. For correlated draws, the effective sample size is usually lower than the number of iterations, but in case of anticorrelated draws, the effective sample size can be larger than the number of iterations. In this latter case, MCMC can work better than independent sampling for some estimation problems. Hamiltonian Monte Carlo, including the no-U-turn sampler used by default in Stan, can produce anticorrelated draws if the posterior is close to Gaussian with little posterior correlation.\n\n\n\nIn practice, the probability function in question cannot be tractably integrated and thus the autocorrelation cannot be calculated, nor the effective sample size. Instead, these quantities must be estimated from the draws themselves. The rest of this section describes a autocorrelations and split-\\(\\hat{R}\\) based effective sample size estimator, based on multiple chains. As before, each chain \\(\\theta_m\\) will be assumed to be of length \\(N\\).\nStan carries out the autocorrelation computations for all lags simultaneously using Eigen’s fast Fourier transform (FFT) package with appropriate padding; see Charles J. Geyer (2011) for more detail on using FFT for autocorrelation calculations. The autocorrelation estimates \\(\\hat{\\rho}_{t,m}\\) at lag \\(t\\) from multiple chains \\(m \\in\n(1,\\ldots,M)\\) are combined with within-sample variance estimate \\(W\\) and multi-chain variance estimate \\(\\widehat{\\mbox{var}}^{+}\\) introduced in the previous section to compute the combined autocorrelation at lag \\(t\\) as\n\\[\n\\hat{\\rho}_t\n= 1 - \\frac{\\displaystyle W\n - \\textstyle \\frac{1}{M}\\sum_{m=1}^M s_m^2 \\hat{\\rho}_{t,m}}\n {\\widehat{\\mbox{var}}^{+}}.\n\\]\nIf the chains have not converged, the variance estimator \\(\\widehat{\\mbox{var}}^{+}\\) will overestimate variance, leading to an overestimate of autocorrelation and an underestimate effective sample size.\nBecause of the noise in the correlation estimates \\(\\hat{\\rho}_t\\) as \\(t\\) increases, a typical truncated sum of \\(\\hat{\\rho}_t\\) is used. Negative autocorrelations may occur only on odd lags and by summing over pairs starting from lag 0, the paired autocorrelation is guaranteed to be positive, monotone and convex modulo estimator noise Charles J. Geyer (1992), Charles J. Geyer (2011). Stan uses Geyer’s initial monotone sequence criterion. The effective sample size estimator is defined as\n\\[\n\\hat{N}_{\\mathrm{eff}} = \\frac{M \\cdot N}{\\hat{\\tau}},\n\\]\nwhere\n\\[\n\\hat{\\tau} = 1 + 2 \\sum_{t=1}^{2m+1} \\hat{\\rho}_t = -1 + 2 \\sum_{t'=0}^{m} \\hat{P}_{t'},\n\\]\nwhere \\(\\hat{P}_{t'}=\\hat{\\rho}_{2t'}+\\hat{\\rho}_{2t'+1}\\). Initial positive sequence estimators is obtained by choosing the largest \\(m\\) such that \\(\\hat{P}_{t'}>0, \\quad t' = 1,\\ldots,m\\). The initial monotone sequence is obtained by further reducing \\(\\hat{P}_{t'}\\) to the minimum of the preceding ones so that the estimated sequence is monotone.\n\n\n\nThe posterior standard deviation of a parameter \\(\\theta_n\\) conditioned on observed data \\(y\\) is just the standard deviation of the posterior density \\(p(\\theta_n | y)\\). This is estimated by the standard deviation of the combined posterior draws across chains,\n\\[\n\\hat{\\sigma}_n = \\mathrm{sd}(\\theta^{(1)}_n, \\ldots, \\theta^{(m)}_n).\n\\]\nThe previous section showed how to estimate \\(N_{\\mathrm{eff}}\\) for a parameter \\(\\theta_n\\) based on multiple chains of posterior draws.\nThe mean of the posterior draws of \\(\\theta_n\\) \\[\n\\hat{\\theta}_n\n= \\mathrm{mean}(\\theta^{(1)}_n, \\ldots, \\theta^{(m)}_n)\n\\]\nis treated as an estimator of the true posterior mean,\n\\[\n\\mathbb{E}[\\theta_n \\mid y]\n\\ = \\\n\\int_{-\\infty}^{\\infty}\n \\, \\theta \\, p(\\theta | y)\n\\, \\mathrm{d}\\theta_n,\n\\]\nbased the observed data \\(y\\).\nThe standard error for the estimator \\(\\hat{\\theta}_n\\) is given by the posterior standard deviation divided by the square root of the effective sample size. This standard error is itself estimated as \\(\\hat{\\sigma}_n / \\sqrt{N_{\\mathrm{eff}}}\\). The smaller the standard error, the closer the estimate \\(\\hat{\\theta}_n\\) is expected to be to the true value. This is just the MCMC CLT applied to an estimator; see Charles J. Geyer (2011) for more details of the MCMC central limit theorem.\n\n\n\nIn complex posteriors, draws are almost always positively correlated. In these situations, the autocorrelation at lag \\(t\\), \\(\\rho_t\\), decreases as the lag, \\(t\\), increases. In this situation, thinning the sample by keeping only every \\(N\\)-th draw will reduce the autocorrelation of the resulting chain. This is particularly useful if we need to save storage or re-use the draws for inference.\nFor instance, consider generating one thousand posterior draws in one of the following two ways.\n\nGenerate 1000 draws after convergence and save all of them.\nGenerate 10,000 draws after convergence and save every tenth draw.\n\nEven though both produce a sample consisting one thousand draws, the second approach with thinning can produce a higher effective sample size when the draws are positively correlated. That’s because the autocorrelation \\(\\rho_t\\) for the thinned sequence is equivalent to \\(\\rho_{10t}\\) in the unthinned sequence, so the sum of the autocorrelations usually will be lower and thus the effective sample size higher.\nNow contrast the second approach above with the unthinned alternative,\n\nGenerate 10,000 draws after convergence and save every draw.\n\nThis will typically have a higher effective sample than the thinned sample consisting of every tenth drawn. But the gap might not be very large. To summarize, the only reason to thin a sample is to reduce memory requirements.\nIf draws are anticorrelated, then thinning will increase correlation and further reduce the overall effective sample size.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Posterior Analysis" + ] + }, + { + "objectID": "reference-manual/analysis.html#markov-chains", + "href": "reference-manual/analysis.html#markov-chains", + "title": "Posterior Analysis", + "section": "", + "text": "A Markov chain is a sequence of random variables \\(\\theta^{(1)},\n\\theta^{(2)},\\ldots\\) where each variable is conditionally independent of all other variables given the value of the previous value. Thus if \\(\\theta = \\theta^{(1)}, \\theta^{(2)},\\ldots, \\theta^{(N)}\\), then\n\\[\np(\\theta) = p(\\theta^{(1)}) \\prod_{n=2}^N p(\\theta^{(n)}|\\theta^{(n-1)}).\n\\]\nStan uses Hamiltonian Monte Carlo to generate a next state in a manner described in the Hamiltonian Monte Carlo chapter.\nThe Markov chains Stan and other MCMC samplers generate are ergodic in the sense required by the Markov chain central limit theorem, meaning roughly that there is a reasonable chance of reaching one value of \\(\\theta\\) from another. The Markov chains are also stationary, meaning that the transition probabilities do not change at different positions in the chain, so that for \\(n, n' \\geq 0\\), the probability function \\(p(\\theta^{(n+1)}|\\theta^{(n)})\\) is the same as \\(p(\\theta^{(n'+1)}|\\theta^{(n')})\\) (following the convention of overloading random and bound variables and picking out a probability function by its arguments).\nStationary Markov chains have an equilibrium distribution on states in which each has the same marginal probability function, so that \\(p(\\theta^{(n)})\\) is the same probability function as \\(p(\\theta^{(n+1)})\\). In Stan, this equilibrium distribution \\(p(\\theta^{(n)})\\) is the target density \\(p(\\theta)\\) defined by a Stan program, which is typically a proper Bayesian posterior density \\(p(\\theta | y)\\) defined on the log scale up to a constant.\nUsing MCMC methods introduces two difficulties that are not faced by independent sample Monte Carlo methods. The first problem is determining when a randomly initialized Markov chain has converged to its equilibrium distribution. The second problem is that the draws from a Markov chain may be correlated or even anti-correlated, and thus the central limit theorem’s bound on estimation error no longer applies. These problems are addressed in the next two sections.\nStan’s posterior analysis tools compute a number of summary statistics, estimates, and diagnostics for Markov chain Monte Carlo (MCMC) sample. Stan’s estimators and diagnostics are more robust in the face of non-convergence, antithetical sampling, and long-term Markov chain correlations than most of the other tools available. The algorithms Stan uses to achieve this are described in this chapter.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Posterior Analysis" + ] + }, + { + "objectID": "reference-manual/analysis.html#convergence", + "href": "reference-manual/analysis.html#convergence", + "title": "Posterior Analysis", + "section": "", + "text": "By definition, a Markov chain samples from the target distribution only after it has converged to equilibrium (i.e., equilibrium is defined as being achieved when \\(p(\\theta^{(n)})\\) is the target density). The following point cannot be expressed strongly enough:\n\nIn theory, convergence is only guaranteed asymptotically as the number of draws grows without bound.\nIn practice, diagnostics must be applied to monitor convergence for the finite number of draws actually available.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Posterior Analysis" + ] + }, + { + "objectID": "reference-manual/analysis.html#notation-for-samples-chains-and-draws", + "href": "reference-manual/analysis.html#notation-for-samples-chains-and-draws", + "title": "Posterior Analysis", + "section": "", + "text": "To establish basic notation, suppose a target Bayesian posterior density \\(p(\\theta | y)\\) given real-valued vectors of parameters \\(\\theta\\) and real- and discrete-valued data \\(y\\).1\nAn MCMC sample consists of a set of a sequence of \\(M\\) Markov chains, each consisting of an ordered sequence of \\(N\\) draws from the posterior.2 The sample thus consists of \\(M \\times N\\) draws from the posterior.\n\n\nOne way to monitor whether a chain has converged to the equilibrium distribution is to compare its behavior to other randomly initialized chains. This is the motivation for the Gelman and Rubin (1992) potential scale reduction statistic, \\(\\hat{R}\\). The \\(\\hat{R}\\) statistic measures the ratio of the average variance of drawss within each chain to the variance of the pooled draws across chains; if all chains are at equilibrium, these will be the same and \\(\\hat{R}\\) will be one. If the chains have not converged to a common distribution, the \\(\\hat{R}\\) statistic will be greater than one.\nGelman and Rubin’s recommendation is that the independent Markov chains be initialized with diffuse starting values for the parameters and sampled until all values for \\(\\hat{R}\\) are below some threshold. Vehtari et al. (2021) suggest in general to use a threshold \\(1.01\\), but othe thresholds can be used depending on the use case. Stan allows users to specify initial values for parameters and it is also able to draw diffuse random initializations automatically satisfying the declared parameter constraints.\nThe \\(\\hat{R}\\) statistic is defined for a set of \\(M\\) Markov chains, \\(\\theta_m\\), each of which has \\(N\\) draws \\(\\theta^{(n)}_m\\). The between-chain variance estimate is\n\\[\nB\n=\n\\frac{N}{M-1}\n\\,\n\\sum_{m=1}^M (\\bar{\\theta}^{(\\bullet)}_{m}\n - \\bar{\\theta}^{(\\bullet)}_{\\bullet})^2,\n\\]\nwhere\n\\[\n\\bar{\\theta}_m^{(\\bullet)}\n= \\frac{1}{N} \\sum_{n = 1}^N \\theta_m^{(n)}\n\\]\nand\n\\[\n\\bar{\\theta}^{(\\bullet)}_{\\bullet}\n= \\frac{1}{M} \\, \\sum_{m=1}^M \\bar{\\theta}_m^{(\\bullet)}.\n\\]\nThe within-chain variance is averaged over the chains,\n\\[\nW = \\frac{1}{M} \\, \\sum_{m=1}^M s_m^2,\n\\]\nwhere\n\\[\ns_m^2\n=\n\\frac{1}{N-1}\n\\, \\sum_{n=1}^N (\\theta^{(n)}_m - \\bar{\\theta}^{(\\bullet)}_m)^2.\n\\]\nThe variance estimator is a mixture of the within-chain and cross-chain sample variances,\n\\[\n\\widehat{\\mbox{var}}^{+}\\!(\\theta|y)\n= \\frac{N-1}{N}\\, W \\, + \\, \\frac{1}{N} \\, B.\n\\]\nFinally, the potential scale reduction statistic is defined by\n\\[\n\\hat{R}\n\\, = \\,\n\\sqrt{\\frac{\\widehat{\\mbox{var}}^{+}\\!(\\theta|y)}{W}}.\n\\]\n\n\n\nBefore Stan calculating the potential-scale-reduction statistic \\(\\hat{R}\\), each chain is split into two halves. This provides an additional means to detect non-stationarity in the individual chains. If one chain involves gradually increasing values and one involves gradually decreasing values, they have not mixed well, but they can have \\(\\hat{R}\\) values near unity. In this case, splitting each chain into two parts leads to \\(\\hat{R}\\) values substantially greater than 1 because the first half of each chain has not mixed with the second half.\n\n\n\nSplit R-hat and the effective sample size (ESS) are well defined only if the marginal posteriors have finite mean and variance. Therefore, following Vehtari et al. (2021), we compute the rank normalized parameter values and then feed them into the formulas for split R-hat and ESS.\nRank normalization proceeds as follows:\n\nFirst, replace each value \\(\\theta^{(nm)}\\) by its rank \\(r^{(nm)}\\) within the pooled draws from all chains. Average rank for ties are used to conserve the number of unique values of discrete quantities.\nSecond, transform ranks to normal scores using the inverse normal transformation and a fractional offset:\n\n\\[\nz_{(nm)} = \\Phi^{-1} \\left( \\frac{r_{(nm)} - 3/8}{S - 1/4} \\right)\n\\]\nTo further improve sensitivity to chains having different scales,\nrank normalized R-hat is computed also for the for the corresponding folded draws \\(\\zeta^{(mn)}\\), absolute deviations from the median, \\[\n\\label{zeta}\n\\zeta^{(mn)} = \\left|\\theta^{(nm)}-{\\rm median}(\\theta)\\right|.\n\\] The rank normalized split-\\(\\widehat{R}\\) measure computed on the \\(\\zeta^{(mn)}\\) values is called -\\(\\widehat{R}\\). This measures convergence in the tails rather than in the bulk of the distribution.\nTo obtain a single conservative \\(\\widehat{R}\\) estimate, we propose to report the maximum of rank normalized split-\\(\\widehat{R}\\) and rank normalized folded-split-\\(\\widehat{R}\\) for each parameter.\nBulk-ESS is defined as ESS for rank normalized split chains. Tail-ESS is defined as the minimum ESS for the 5% and 95% quantiles. See Effective Sample Size section for details on how ESS is estimated.\n\n\n\nA question that often arises is whether it is acceptable to monitor convergence of only a subset of the parameters or generated quantities. The short answer is “no,” but this is elaborated further in this section.\nFor example, consider the value lp__, which is the log posterior density (up to a constant).3\nIt is thus a mistake to declare convergence in any practical sense if lp__ has not converged, because different chains are really in different parts of the space. Yet measuring convergence for lp__ is particularly tricky, as noted below.\n\n\nMarkov chain convergence is a global property in the sense that it does not depend on the choice of function of the parameters that is monitored. There is no hard cutoff between pre-convergence “transience” and post-convergence “equilibrium.” What happens is that as the number of states in the chain approaches infinity, the distribution of possible states in the chain approaches the target distribution and in that limit the expected value of the Monte Carlo estimator of any integrable function converges to the true expectation. There is nothing like warmup here, because in the limit, the effects of initial state are completely washed out.\n\n\n\nThe \\(\\hat{R}\\) statistic considers the composition of a Markov chain and a function, and if the Markov chain has converged then each Markov chain and function composition will have converged. Multivariate functions converge when all of their margins have converged by the Cramer-Wold theorem.\nThe transformation from unconstrained space to constrained space is just another function, so does not effect convergence.\nDifferent functions may have different autocorrelations, but if the Markov chain has equilibrated then all Markov chain plus function compositions should be consistent with convergence. Formally, any function that appears inconsistent is of concern and although it would be unreasonable to test every function, lp__ and other measured quantities should at least be consistent.\nThe obvious difference in lp__ is that it tends to vary quickly with position and is consequently susceptible to outliers.\n\n\n\nThe question is what happens for finite numbers of states? If we can prove a strong geometric ergodicity property (which depends on the sampler and the target distribution), then one can show that there exists a finite time after which the chain forgets its initial state with a large probability. This is both the autocorrelation time and the warmup time. But even if you can show it exists and is finite (which is nigh impossible) you can’t compute an actual value analytically.\nSo what we do in practice is hope that the finite number of draws is large enough for the expectations to be reasonably accurate. Removing warmup iterations improves the accuracy of the expectations but there is no guarantee that removing any finite number of draws will be enough.\n\n\n\nFirstly, as noted above, for any finite number of draws, there will always be some residual effect of the initial state, which typically manifests as some small (or large if the autocorrelation time is huge) probability of having a large outlier. Functions robust to such outliers (say, quantiles) will appear more stable and have better \\(\\hat{R}\\). Functions vulnerable to such outliers may show fragility.\nSecondly, use of the \\(\\hat{R}\\) statistic makes very strong assumptions. In particular, it assumes that the functions being considered are Gaussian or it only uses the first two moments and assumes some kind of independence. The point is that strong assumptions are made that do not always hold. In particular, the distribution for the log posterior density (lp__) almost never looks Gaussian, instead it features long tails that can lead to large \\(\\hat{R}\\) even in the large \\(N\\) limit. Tweaks to \\(\\hat{R}\\), such as using quantiles in place of raw values, have the flavor of making the sample of interest more Gaussian and hence the \\(\\hat{R}\\) statistic more accurate.\n\n\n\n“Convergence” is a global property and holds for all integrable functions at once, but employing the \\(\\hat{R}\\) statistic requires additional assumptions and thus may not work for all functions equally well.\nNote that if you just compare the expectations between chains then we can rely on the Markov chain asymptotics for Gaussian distributions and can apply the standard tests.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Posterior Analysis" + ] + }, + { + "objectID": "reference-manual/analysis.html#effective-sample-size.section", + "href": "reference-manual/analysis.html#effective-sample-size.section", + "title": "Posterior Analysis", + "section": "", + "text": "The second technical difficulty posed by MCMC methods is that the draws will typically be autocorrelated (or anticorrelated) within a chain. This increases (or reduces) the uncertainty of the estimation of posterior quantities of interest, such as means, variances, or quantiles; see Charles J. Geyer (2011).\nStan estimates an effective sample size for each parameter, which plays the role in the Markov chain Monte Carlo central limit theorem (MCMC CLT) as the number of independent draws plays in the standard central limit theorem (CLT).\nUnlike most packages, the particular calculations used by Stan follow those for split-\\(\\hat{R}\\), which involve both cross-chain (mean) and within-chain calculations (autocorrelation); see Gelman et al. (2013) and Vehtari et al. (2021).\n\n\nThe amount by which autocorrelation within the chains increases uncertainty in estimates can be measured by effective sample size (ESS). Given independent sample (with finite variance), the central limit theorem bounds uncertainty in estimates based on the sample size \\(N\\). Given dependent sample, the sample size is replaced with the effective sample size \\(N_{\\mathrm{eff}}\\).\nFor example, Monte Carlo standard error (MCSE) is proportional to \\(1 / \\sqrt{N_{\\mathrm{eff}}}\\) rather than \\(1/\\sqrt{N}\\).\nThe effective sample size of a sequence is defined in terms of the autocorrelations within the sequence at different lags. The autocorrelation \\(\\rho_t\\) at lag \\(t \\geq 0\\) for a chain with joint probability function \\(p(\\theta)\\) with mean \\(\\mu\\) and variance \\(\\sigma^2\\) is defined to be\n\\[\n\\rho_t\n=\n\\frac{1}{\\sigma^2} \\, \\int_{\\Theta} (\\theta^{(n)} - \\mu)\n(\\theta^{(n+t)} - \\mu) \\, p(\\theta) \\, d\\theta.\n\\]\nThis is the correlation between the two chains offset by \\(t\\) positions (i.e., a lag in time-series terminology). Because we know \\(\\theta^{(n)}\\) and \\(\\theta^{(n+t)}\\) have the same marginal distribution in an MCMC setting, multiplying the two difference terms and reducing yields\n\\[\n\\rho_t\n= \\frac{1}{\\sigma^2}\n\\, \\int_{\\Theta}\n \\theta^{(n)} \\, \\theta^{(n+t)} \\, p(\\theta)\n \\, d\\theta - \\frac{\\mu^2}{\\sigma^2}.\n\\]\nThe effective sample size of \\(N\\) draws generated by a process with autocorrelations \\(\\rho_t\\) is defined by \\[\nN_{\\mathrm{eff}}\n\\ = \\\n\\frac{N}{\\sum_{t = -\\infty}^{\\infty} \\rho_t}\n\\ = \\\n\\frac{N}{1 + 2 \\sum_{t = 1}^{\\infty} \\rho_t}.\n\\]\nFor independent draws, the effective sample size is just the number of iterations. For correlated draws, the effective sample size is usually lower than the number of iterations, but in case of anticorrelated draws, the effective sample size can be larger than the number of iterations. In this latter case, MCMC can work better than independent sampling for some estimation problems. Hamiltonian Monte Carlo, including the no-U-turn sampler used by default in Stan, can produce anticorrelated draws if the posterior is close to Gaussian with little posterior correlation.\n\n\n\nIn practice, the probability function in question cannot be tractably integrated and thus the autocorrelation cannot be calculated, nor the effective sample size. Instead, these quantities must be estimated from the draws themselves. The rest of this section describes a autocorrelations and split-\\(\\hat{R}\\) based effective sample size estimator, based on multiple chains. As before, each chain \\(\\theta_m\\) will be assumed to be of length \\(N\\).\nStan carries out the autocorrelation computations for all lags simultaneously using Eigen’s fast Fourier transform (FFT) package with appropriate padding; see Charles J. Geyer (2011) for more detail on using FFT for autocorrelation calculations. The autocorrelation estimates \\(\\hat{\\rho}_{t,m}\\) at lag \\(t\\) from multiple chains \\(m \\in\n(1,\\ldots,M)\\) are combined with within-sample variance estimate \\(W\\) and multi-chain variance estimate \\(\\widehat{\\mbox{var}}^{+}\\) introduced in the previous section to compute the combined autocorrelation at lag \\(t\\) as\n\\[\n\\hat{\\rho}_t\n= 1 - \\frac{\\displaystyle W\n - \\textstyle \\frac{1}{M}\\sum_{m=1}^M s_m^2 \\hat{\\rho}_{t,m}}\n {\\widehat{\\mbox{var}}^{+}}.\n\\]\nIf the chains have not converged, the variance estimator \\(\\widehat{\\mbox{var}}^{+}\\) will overestimate variance, leading to an overestimate of autocorrelation and an underestimate effective sample size.\nBecause of the noise in the correlation estimates \\(\\hat{\\rho}_t\\) as \\(t\\) increases, a typical truncated sum of \\(\\hat{\\rho}_t\\) is used. Negative autocorrelations may occur only on odd lags and by summing over pairs starting from lag 0, the paired autocorrelation is guaranteed to be positive, monotone and convex modulo estimator noise Charles J. Geyer (1992), Charles J. Geyer (2011). Stan uses Geyer’s initial monotone sequence criterion. The effective sample size estimator is defined as\n\\[\n\\hat{N}_{\\mathrm{eff}} = \\frac{M \\cdot N}{\\hat{\\tau}},\n\\]\nwhere\n\\[\n\\hat{\\tau} = 1 + 2 \\sum_{t=1}^{2m+1} \\hat{\\rho}_t = -1 + 2 \\sum_{t'=0}^{m} \\hat{P}_{t'},\n\\]\nwhere \\(\\hat{P}_{t'}=\\hat{\\rho}_{2t'}+\\hat{\\rho}_{2t'+1}\\). Initial positive sequence estimators is obtained by choosing the largest \\(m\\) such that \\(\\hat{P}_{t'}>0, \\quad t' = 1,\\ldots,m\\). The initial monotone sequence is obtained by further reducing \\(\\hat{P}_{t'}\\) to the minimum of the preceding ones so that the estimated sequence is monotone.\n\n\n\nThe posterior standard deviation of a parameter \\(\\theta_n\\) conditioned on observed data \\(y\\) is just the standard deviation of the posterior density \\(p(\\theta_n | y)\\). This is estimated by the standard deviation of the combined posterior draws across chains,\n\\[\n\\hat{\\sigma}_n = \\mathrm{sd}(\\theta^{(1)}_n, \\ldots, \\theta^{(m)}_n).\n\\]\nThe previous section showed how to estimate \\(N_{\\mathrm{eff}}\\) for a parameter \\(\\theta_n\\) based on multiple chains of posterior draws.\nThe mean of the posterior draws of \\(\\theta_n\\) \\[\n\\hat{\\theta}_n\n= \\mathrm{mean}(\\theta^{(1)}_n, \\ldots, \\theta^{(m)}_n)\n\\]\nis treated as an estimator of the true posterior mean,\n\\[\n\\mathbb{E}[\\theta_n \\mid y]\n\\ = \\\n\\int_{-\\infty}^{\\infty}\n \\, \\theta \\, p(\\theta | y)\n\\, \\mathrm{d}\\theta_n,\n\\]\nbased the observed data \\(y\\).\nThe standard error for the estimator \\(\\hat{\\theta}_n\\) is given by the posterior standard deviation divided by the square root of the effective sample size. This standard error is itself estimated as \\(\\hat{\\sigma}_n / \\sqrt{N_{\\mathrm{eff}}}\\). The smaller the standard error, the closer the estimate \\(\\hat{\\theta}_n\\) is expected to be to the true value. This is just the MCMC CLT applied to an estimator; see Charles J. Geyer (2011) for more details of the MCMC central limit theorem.\n\n\n\nIn complex posteriors, draws are almost always positively correlated. In these situations, the autocorrelation at lag \\(t\\), \\(\\rho_t\\), decreases as the lag, \\(t\\), increases. In this situation, thinning the sample by keeping only every \\(N\\)-th draw will reduce the autocorrelation of the resulting chain. This is particularly useful if we need to save storage or re-use the draws for inference.\nFor instance, consider generating one thousand posterior draws in one of the following two ways.\n\nGenerate 1000 draws after convergence and save all of them.\nGenerate 10,000 draws after convergence and save every tenth draw.\n\nEven though both produce a sample consisting one thousand draws, the second approach with thinning can produce a higher effective sample size when the draws are positively correlated. That’s because the autocorrelation \\(\\rho_t\\) for the thinned sequence is equivalent to \\(\\rho_{10t}\\) in the unthinned sequence, so the sum of the autocorrelations usually will be lower and thus the effective sample size higher.\nNow contrast the second approach above with the unthinned alternative,\n\nGenerate 10,000 draws after convergence and save every draw.\n\nThis will typically have a higher effective sample than the thinned sample consisting of every tenth drawn. But the gap might not be very large. To summarize, the only reason to thin a sample is to reduce memory requirements.\nIf draws are anticorrelated, then thinning will increase correlation and further reduce the overall effective sample size.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Posterior Analysis" + ] + }, + { + "objectID": "reference-manual/analysis.html#footnotes", + "href": "reference-manual/analysis.html#footnotes", + "title": "Posterior Analysis", + "section": "Footnotes", + "text": "Footnotes\n\n\nUsing vectors simplifies high level exposition at the expense of collapsing structure.↩︎\nThe structure is assumed to be rectangular; in the future, this needs to be generalized to ragged samples.↩︎\nThe lp__ value also represents the potential energy in the Hamiltonian system and is rate bounded by the randomly supplied kinetic energy each iteration, which follows a Chi-square distribution in the number of parameters.↩︎", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Posterior Analysis" + ] + }, + { + "objectID": "reference-manual/comments.html", + "href": "reference-manual/comments.html", + "title": "Comments", + "section": "", + "text": "Stan supports C++-style line-based and bracketed comments. Comments may be used anywhere whitespace is allowed in a Stan program.\n\n\nAny characters on a line following two forward slashes (//) is ignored along with the slashes. These may be used, for example, to document variables,\ndata {\n int<lower=0> N; // number of observations\n array[N] real y; // observations\n}\n\n\n\nFor bracketed comments, any text between a forward-slash and asterisk pair (/*) and an asterisk and forward-slash pair (*/) is ignored.", + "crumbs": [ + "Reference Manual", + "Language", + "Comments" + ] + }, + { + "objectID": "reference-manual/comments.html#line-based-comments", + "href": "reference-manual/comments.html#line-based-comments", + "title": "Comments", + "section": "", + "text": "Any characters on a line following two forward slashes (//) is ignored along with the slashes. These may be used, for example, to document variables,\ndata {\n int<lower=0> N; // number of observations\n array[N] real y; // observations\n}", + "crumbs": [ + "Reference Manual", + "Language", + "Comments" + ] + }, + { + "objectID": "reference-manual/comments.html#bracketed-comments", + "href": "reference-manual/comments.html#bracketed-comments", + "title": "Comments", + "section": "", + "text": "For bracketed comments, any text between a forward-slash and asterisk pair (/*) and an asterisk and forward-slash pair (*/) is ignored.", + "crumbs": [ + "Reference Manual", + "Language", + "Comments" + ] + }, + { + "objectID": "reference-manual/diagnostics.html", + "href": "reference-manual/diagnostics.html", + "title": "Diagnostic Mode", + "section": "", + "text": "Stan’s diagnostic mode runs a Stan program with data, initializing parameters either randomly or with user-specified initial values, and then evaluates the log probability and its gradients. The gradients computed by the Stan program are compared to values calculated by finite differences.\nDiagnostic mode may be configured with two parameters.\nDiagnostic Mode Configuration Table. The diagnostic model configuration parameters, constraints, and default values.\n\n\n\n\n\n\n\n\n\nparameter\ndescription\nconstraints\ndefault\n\n\n\n\nepsilon\nfinite difference size\n(0, infty)\n1e–6\n\n\nerror\nerror threshold for matching\n(0, infty)\n1e–6\n\n\n\nIf the difference between the Stan program’s gradient value and that calculated by finite difference is higher than the specified threshold, the argument will be flagged.\n\n\nDiagnostic mode prints the log posterior density (up to a proportion) calculated by the Stan program for the specified initial values. For each parameter, it prints the gradient at the initial parameter values calculated by Stan’s program and by finite differences over Stan’s program for the log probability.\n\n\nThe output is for the variable values and their gradients are on the unconstrained scale, which means each variable is a vector of size corresponding to the number of unconstrained variables required to define it. For example, an \\(N \\times N\\) correlation matrix, requires \\(\\binom{N}{2}\\) unconstrained parameters. The transformations from constrained to unconstrained parameters are based on the constraints in the parameter declarations and described in the reference manual chapter on transforms.\n\n\n\nThe log density includes the Jacobian adjustment implied by the constraints declared on variables. The Jacobian adjustment for constrained parameter transforms may be turned off for optimization, but there is as of yet no way to turn it off in diagnostic mode.\n\n\n\n\nThe general configuration options for diagnostics are the same as those for MCMC. Initial values may be specified, or they may be drawn at random. Setting the random number generator will only have an effect if a random initialization is specified.\n\n\n\nDue to the application of finite differences, the computation time grows linearly with the number of parameters. This can be require a very long time, especially in models with latent parameters that grow with the data size. It can be helpful to diagnose a model with smaller data sizes in such cases.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Diagnostic Mode" + ] + }, + { + "objectID": "reference-manual/diagnostics.html#diagnostic-mode-output", + "href": "reference-manual/diagnostics.html#diagnostic-mode-output", + "title": "Diagnostic Mode", + "section": "", + "text": "Diagnostic mode prints the log posterior density (up to a proportion) calculated by the Stan program for the specified initial values. For each parameter, it prints the gradient at the initial parameter values calculated by Stan’s program and by finite differences over Stan’s program for the log probability.\n\n\nThe output is for the variable values and their gradients are on the unconstrained scale, which means each variable is a vector of size corresponding to the number of unconstrained variables required to define it. For example, an \\(N \\times N\\) correlation matrix, requires \\(\\binom{N}{2}\\) unconstrained parameters. The transformations from constrained to unconstrained parameters are based on the constraints in the parameter declarations and described in the reference manual chapter on transforms.\n\n\n\nThe log density includes the Jacobian adjustment implied by the constraints declared on variables. The Jacobian adjustment for constrained parameter transforms may be turned off for optimization, but there is as of yet no way to turn it off in diagnostic mode.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Diagnostic Mode" + ] + }, + { + "objectID": "reference-manual/diagnostics.html#configuration-options", + "href": "reference-manual/diagnostics.html#configuration-options", + "title": "Diagnostic Mode", + "section": "", + "text": "The general configuration options for diagnostics are the same as those for MCMC. Initial values may be specified, or they may be drawn at random. Setting the random number generator will only have an effect if a random initialization is specified.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Diagnostic Mode" + ] + }, + { + "objectID": "reference-manual/diagnostics.html#speed-warning-and-data-trimming", + "href": "reference-manual/diagnostics.html#speed-warning-and-data-trimming", + "title": "Diagnostic Mode", + "section": "", + "text": "Due to the application of finite differences, the computation time grows linearly with the number of parameters. This can be require a very long time, especially in models with latent parameters that grow with the data size. It can be helpful to diagnose a model with smaller data sizes in such cases.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Diagnostic Mode" + ] + }, + { + "objectID": "reference-manual/execution.html", + "href": "reference-manual/execution.html", + "title": "Program Execution", + "section": "", + "text": "This chapter provides a sketch of how a compiled Stan model is executed using sampling. Optimization shares the same data reading and initialization steps, but then does optimization rather than sampling.\nThis sketch is elaborated in the following chapters of this part, which cover variable declarations, expressions, statements, and blocks in more detail.\n\n\nThe reading and transforming data steps are the same for sampling, optimization and diagnostics.\n\n\nThe first step of execution is to read data into memory. Data may be read in through file (in CmdStan) or through memory (RStan and PyStan); see their respective manuals for details.1\nAll of the variables declared in the data block will be read. If a variable cannot be read, the program will halt with a message indicating which data variable is missing.\nAfter each variable is read, if it has a declared constraint, the constraint is validated. For example, if a variable N is declared as int<lower=0>, after N is read, it will be tested to make sure it is greater than or equal to zero. If a variable violates its declared constraint, the program will halt with a warning message indicating which variable contains an illegal value, the value that was read, and the constraint that was declared.\n\n\n\nAfter data is read into the model, the transformed data variable statements are executed in order to define the transformed data variables. As the statements execute, declared constraints on variables are not enforced.\nTransformed data variables are initialized with real values set to NaN and integer values set to the smallest integer (large absolute value negative number).\nAfter the statements are executed, all declared constraints on transformed data variables are validated. If the validation fails, execution halts and the variable’s name, value and constraints are displayed.\n\n\n\n\nInitialization is the same for all of Stan’s algorithms.\n\n\nIf there are user-supplied initial values for parameters, these are read using the same input mechanism and same file format as data reads. Any constraints declared on the parameters are validated for the initial values. If a variable’s value violates its declared constraint, the program halts and a diagnostic message is printed.\nAfter being read, initial values are transformed to unconstrained values that will be used to initialize the sampler.\n\n\nBecause of the way Stan defines its transforms from the constrained to the unconstrained space, initializing parameters on the boundaries of their constraints is usually problematic. For instance, with a constraint\nparameters {\n real<lower=0, upper=1> theta;\n // ...\n}\nan initial value of 0 for theta leads to an unconstrained value of \\(-\\infty\\), whereas a value of 1 leads to an unconstrained value of \\(+\\infty\\). While this will be inverse transformed back correctly given the behavior of floating point arithmetic, the Jacobian will be infinite and the log probability function will fail and raise an exception.\n\n\n\n\nIf there are no user-supplied initial values, the default initialization strategy is to initialize the unconstrained parameters directly with values drawn uniformly from the interval \\((-2,2)\\). The bounds of this initialization can be changed but it is always symmetric around 0. The value of 0 is special in that it represents the median of the initialization. An unconstrained value of 0 corresponds to different parameter values depending on the constraints declared on the parameters.\nAn unconstrained real does not involve any transform, so an initial value of 0 for the unconstrained parameters is also a value of 0 for the constrained parameters.\nFor parameters that are bounded below at 0, the initial value of 0 on the unconstrained scale corresponds to \\(\\exp(0) = 1\\) on the constrained scale. A value of -2 corresponds to \\(\\exp(-2) = .13\\) and a value of 2 corresponds to \\(\\exp(2) = 7.4\\).\nFor parameters bounded above and below, the initial value of 0 on the unconstrained scale corresponds to a value at the midpoint of the constraint interval. For probability parameters, bounded below by 0 and above by 1, the transform is the inverse logit, so that an initial unconstrained value of 0 corresponds to a constrained value of 0.5, -2 corresponds to 0.12 and 2 to 0.88. Bounds other than 0 and 1 are just scaled and translated.\nSimplexes with initial values of 0 on the unconstrained basis correspond to symmetric values on the constrained values (i.e., each value is \\(1/K\\) in a \\(K\\)-simplex).\nCholesky factors for positive-definite matrices are initialized to 1 on the diagonal and 0 elsewhere; this is because the diagonal is log transformed and the below-diagonal values are unconstrained.\nThe initial values for other parameters can be determined from the transform that is applied. The transforms are all described in full detail in the chapter on variable transforms.\n\n\n\nThe initial values may all be set to 0 on the unconstrained scale. This can be helpful for diagnosis, and may also be a good starting point for sampling. Once a model is running, multiple chains with more diffuse starting points can help diagnose problems with convergence; see the user’s guide for more information on convergence monitoring.\n\n\n\n\nSampling is based on simulating the Hamiltonian of a particle with a starting position equal to the current parameter values and an initial momentum (kinetic energy) generated randomly. The potential energy at work on the particle is taken to be the negative log (unnormalized) total probability function defined by the model. In the usual approach to implementing HMC, the Hamiltonian dynamics of the particle is simulated using the leapfrog integrator, which discretizes the smooth path of the particle into a number of small time steps called leapfrog steps.\n\n\nFor each leapfrog step, the negative log probability function and its gradient need to be evaluated at the position corresponding to the current parameter values (a more detailed sketch is provided in the next section). These are used to update the momentum based on the gradient and the position based on the momentum.\nFor simple models, only a few leapfrog steps with large step sizes are needed. For models with complex posterior geometries, many small leapfrog steps may be needed to accurately model the path of the parameters.\nIf the user specifies the number of leapfrog steps (i.e., chooses to use standard HMC), that number of leapfrog steps are simulated. If the user has not specified the number of leapfrog steps, the No-U-Turn sampler (NUTS) will determine the number of leapfrog steps adaptively (Hoffman and Gelman 2014).\n\n\n\nDuring each leapfrog step, the log probability function and its gradient must be calculated. This is where most of the time in the Stan algorithm is spent. This log probability function, which is used by the sampling algorithm, is defined over the unconstrained parameters.\nThe first step of the calculation requires the inverse transform of the unconstrained parameter values back to the constrained parameters in terms of which the model is defined. There is no error checking required because the inverse transform is a total function on every point in whose range satisfies the constraints.\nBecause the probability statements in the model are defined in terms of constrained parameters, the log Jacobian of the inverse transform must be added to the accumulated log probability.\nNext, the transformed parameter statements are executed. After they complete, any constraints declared for the transformed parameters are checked. If the constraints are violated, the model will halt with a diagnostic error message.\nThe final step in the log probability function calculation is to execute the statements defined in the model block.\nAs the log probability function executes, it accumulates an in-memory representation of the expression tree used to calculate the log probability. This includes all of the transformed parameter operations and all of the Jacobian adjustments. This tree is then used to evaluate the gradients by propagating partial derivatives backward along the expression graph. The gradient calculations account for the majority of the cycles consumed by a Stan program.\n\n\n\nA standard Metropolis accept/reject step is required to retain detailed balance and ensure draws are marginally distributed according to the probability function defined by the model. This Metropolis adjustment is based on comparing log probabilities, here defined by the Hamiltonian, which is the sum of the potential (negative log probability) and kinetic (squared momentum) energies. In theory, the Hamiltonian is invariant over the path of the particle and rejection should never occur. In practice, the probability of rejection is determined by the accuracy of the leapfrog approximation to the true trajectory of the parameters.\nIf step sizes are small, very few updates will be rejected, but many steps will be required to move the same distance. If step sizes are large, more updates will be rejected, but fewer steps will be required to move the same distance. Thus a balance between effort and rejection rate is required. If the user has not specified a step size, Stan will tune the step size during warmup sampling to achieve a desired rejection rate (thus balancing rejection versus number of steps).\nIf the proposal is accepted, the parameters are updated to their new values. Otherwise, the sample is the current set of parameter values.\n\n\n\n\nOptimization runs very much like sampling in that it starts by reading the data and then initializing parameters. Unlike sampling, it produces a deterministic output which requires no further analysis other than to verify that the optimizer itself converged to a posterior mode. The output for optimization is also similar to that for sampling.\n\n\n\nVariational inference also runs similar to sampling. It begins by reading the data and initializing the algorithm. The initial variational approximation is a random draw from the standard normal distribution in the unconstrained (real-coordinate) space. Again, similar to sampling, it outputs draws from the approximate posterior once the algorithm has decided that it has converged. Thus, the tools we use for analyzing the result of Stan’s sampling routines can also be used for variational inference.\n\n\n\nModel diagnostics are like sampling and optimization in that they depend on a model’s data being read and its parameters being initialized. The user’s guides for the interfaces (RStan, PyStan, CmdStan) provide more details on the diagnostics available; as of Stan 2.0, that’s just gradients on the unconstrained scale and log probabilities.\n\n\n\nFor each final draw (not counting draws during warmup or draws that are thinned), there is an output stage of writing the draw.\n\n\nBefore generating any output, the statements in the generated quantities block are executed. This can be used for any forward simulation based on parameters of the model. Or it may be used to transform parameters to an appropriate form for output.\nAfter the generated quantities statements execute, the constraints declared on generated quantities variables are validated. If these constraints are violated, the program will terminate with a diagnostic message.\n\n\n\nThe final step is to write the actual values. The values of all variables declared as parameters, transformed parameters, or generated quantities are written. Local variables are not written, nor is the data or transformed data. All values are written in their constrained forms, that is the form that is used in the model definitions.\nIn the executable form of a Stan models, parameters, transformed parameters, and generated quantities are written to a file in comma-separated value (CSV) notation with a header defining the names of the parameters (including indices for multivariate parameters).2", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#reading-and-transforming-data", + "href": "reference-manual/execution.html#reading-and-transforming-data", + "title": "Program Execution", + "section": "", + "text": "The reading and transforming data steps are the same for sampling, optimization and diagnostics.\n\n\nThe first step of execution is to read data into memory. Data may be read in through file (in CmdStan) or through memory (RStan and PyStan); see their respective manuals for details.1\nAll of the variables declared in the data block will be read. If a variable cannot be read, the program will halt with a message indicating which data variable is missing.\nAfter each variable is read, if it has a declared constraint, the constraint is validated. For example, if a variable N is declared as int<lower=0>, after N is read, it will be tested to make sure it is greater than or equal to zero. If a variable violates its declared constraint, the program will halt with a warning message indicating which variable contains an illegal value, the value that was read, and the constraint that was declared.\n\n\n\nAfter data is read into the model, the transformed data variable statements are executed in order to define the transformed data variables. As the statements execute, declared constraints on variables are not enforced.\nTransformed data variables are initialized with real values set to NaN and integer values set to the smallest integer (large absolute value negative number).\nAfter the statements are executed, all declared constraints on transformed data variables are validated. If the validation fails, execution halts and the variable’s name, value and constraints are displayed.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#initialization", + "href": "reference-manual/execution.html#initialization", + "title": "Program Execution", + "section": "", + "text": "Initialization is the same for all of Stan’s algorithms.\n\n\nIf there are user-supplied initial values for parameters, these are read using the same input mechanism and same file format as data reads. Any constraints declared on the parameters are validated for the initial values. If a variable’s value violates its declared constraint, the program halts and a diagnostic message is printed.\nAfter being read, initial values are transformed to unconstrained values that will be used to initialize the sampler.\n\n\nBecause of the way Stan defines its transforms from the constrained to the unconstrained space, initializing parameters on the boundaries of their constraints is usually problematic. For instance, with a constraint\nparameters {\n real<lower=0, upper=1> theta;\n // ...\n}\nan initial value of 0 for theta leads to an unconstrained value of \\(-\\infty\\), whereas a value of 1 leads to an unconstrained value of \\(+\\infty\\). While this will be inverse transformed back correctly given the behavior of floating point arithmetic, the Jacobian will be infinite and the log probability function will fail and raise an exception.\n\n\n\n\nIf there are no user-supplied initial values, the default initialization strategy is to initialize the unconstrained parameters directly with values drawn uniformly from the interval \\((-2,2)\\). The bounds of this initialization can be changed but it is always symmetric around 0. The value of 0 is special in that it represents the median of the initialization. An unconstrained value of 0 corresponds to different parameter values depending on the constraints declared on the parameters.\nAn unconstrained real does not involve any transform, so an initial value of 0 for the unconstrained parameters is also a value of 0 for the constrained parameters.\nFor parameters that are bounded below at 0, the initial value of 0 on the unconstrained scale corresponds to \\(\\exp(0) = 1\\) on the constrained scale. A value of -2 corresponds to \\(\\exp(-2) = .13\\) and a value of 2 corresponds to \\(\\exp(2) = 7.4\\).\nFor parameters bounded above and below, the initial value of 0 on the unconstrained scale corresponds to a value at the midpoint of the constraint interval. For probability parameters, bounded below by 0 and above by 1, the transform is the inverse logit, so that an initial unconstrained value of 0 corresponds to a constrained value of 0.5, -2 corresponds to 0.12 and 2 to 0.88. Bounds other than 0 and 1 are just scaled and translated.\nSimplexes with initial values of 0 on the unconstrained basis correspond to symmetric values on the constrained values (i.e., each value is \\(1/K\\) in a \\(K\\)-simplex).\nCholesky factors for positive-definite matrices are initialized to 1 on the diagonal and 0 elsewhere; this is because the diagonal is log transformed and the below-diagonal values are unconstrained.\nThe initial values for other parameters can be determined from the transform that is applied. The transforms are all described in full detail in the chapter on variable transforms.\n\n\n\nThe initial values may all be set to 0 on the unconstrained scale. This can be helpful for diagnosis, and may also be a good starting point for sampling. Once a model is running, multiple chains with more diffuse starting points can help diagnose problems with convergence; see the user’s guide for more information on convergence monitoring.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#sampling", + "href": "reference-manual/execution.html#sampling", + "title": "Program Execution", + "section": "", + "text": "Sampling is based on simulating the Hamiltonian of a particle with a starting position equal to the current parameter values and an initial momentum (kinetic energy) generated randomly. The potential energy at work on the particle is taken to be the negative log (unnormalized) total probability function defined by the model. In the usual approach to implementing HMC, the Hamiltonian dynamics of the particle is simulated using the leapfrog integrator, which discretizes the smooth path of the particle into a number of small time steps called leapfrog steps.\n\n\nFor each leapfrog step, the negative log probability function and its gradient need to be evaluated at the position corresponding to the current parameter values (a more detailed sketch is provided in the next section). These are used to update the momentum based on the gradient and the position based on the momentum.\nFor simple models, only a few leapfrog steps with large step sizes are needed. For models with complex posterior geometries, many small leapfrog steps may be needed to accurately model the path of the parameters.\nIf the user specifies the number of leapfrog steps (i.e., chooses to use standard HMC), that number of leapfrog steps are simulated. If the user has not specified the number of leapfrog steps, the No-U-Turn sampler (NUTS) will determine the number of leapfrog steps adaptively (Hoffman and Gelman 2014).\n\n\n\nDuring each leapfrog step, the log probability function and its gradient must be calculated. This is where most of the time in the Stan algorithm is spent. This log probability function, which is used by the sampling algorithm, is defined over the unconstrained parameters.\nThe first step of the calculation requires the inverse transform of the unconstrained parameter values back to the constrained parameters in terms of which the model is defined. There is no error checking required because the inverse transform is a total function on every point in whose range satisfies the constraints.\nBecause the probability statements in the model are defined in terms of constrained parameters, the log Jacobian of the inverse transform must be added to the accumulated log probability.\nNext, the transformed parameter statements are executed. After they complete, any constraints declared for the transformed parameters are checked. If the constraints are violated, the model will halt with a diagnostic error message.\nThe final step in the log probability function calculation is to execute the statements defined in the model block.\nAs the log probability function executes, it accumulates an in-memory representation of the expression tree used to calculate the log probability. This includes all of the transformed parameter operations and all of the Jacobian adjustments. This tree is then used to evaluate the gradients by propagating partial derivatives backward along the expression graph. The gradient calculations account for the majority of the cycles consumed by a Stan program.\n\n\n\nA standard Metropolis accept/reject step is required to retain detailed balance and ensure draws are marginally distributed according to the probability function defined by the model. This Metropolis adjustment is based on comparing log probabilities, here defined by the Hamiltonian, which is the sum of the potential (negative log probability) and kinetic (squared momentum) energies. In theory, the Hamiltonian is invariant over the path of the particle and rejection should never occur. In practice, the probability of rejection is determined by the accuracy of the leapfrog approximation to the true trajectory of the parameters.\nIf step sizes are small, very few updates will be rejected, but many steps will be required to move the same distance. If step sizes are large, more updates will be rejected, but fewer steps will be required to move the same distance. Thus a balance between effort and rejection rate is required. If the user has not specified a step size, Stan will tune the step size during warmup sampling to achieve a desired rejection rate (thus balancing rejection versus number of steps).\nIf the proposal is accepted, the parameters are updated to their new values. Otherwise, the sample is the current set of parameter values.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#optimization", + "href": "reference-manual/execution.html#optimization", + "title": "Program Execution", + "section": "", + "text": "Optimization runs very much like sampling in that it starts by reading the data and then initializing parameters. Unlike sampling, it produces a deterministic output which requires no further analysis other than to verify that the optimizer itself converged to a posterior mode. The output for optimization is also similar to that for sampling.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#variational-inference", + "href": "reference-manual/execution.html#variational-inference", + "title": "Program Execution", + "section": "", + "text": "Variational inference also runs similar to sampling. It begins by reading the data and initializing the algorithm. The initial variational approximation is a random draw from the standard normal distribution in the unconstrained (real-coordinate) space. Again, similar to sampling, it outputs draws from the approximate posterior once the algorithm has decided that it has converged. Thus, the tools we use for analyzing the result of Stan’s sampling routines can also be used for variational inference.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#model-diagnostics", + "href": "reference-manual/execution.html#model-diagnostics", + "title": "Program Execution", + "section": "", + "text": "Model diagnostics are like sampling and optimization in that they depend on a model’s data being read and its parameters being initialized. The user’s guides for the interfaces (RStan, PyStan, CmdStan) provide more details on the diagnostics available; as of Stan 2.0, that’s just gradients on the unconstrained scale and log probabilities.", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#output", + "href": "reference-manual/execution.html#output", + "title": "Program Execution", + "section": "", + "text": "For each final draw (not counting draws during warmup or draws that are thinned), there is an output stage of writing the draw.\n\n\nBefore generating any output, the statements in the generated quantities block are executed. This can be used for any forward simulation based on parameters of the model. Or it may be used to transform parameters to an appropriate form for output.\nAfter the generated quantities statements execute, the constraints declared on generated quantities variables are validated. If these constraints are violated, the program will terminate with a diagnostic message.\n\n\n\nThe final step is to write the actual values. The values of all variables declared as parameters, transformed parameters, or generated quantities are written. Local variables are not written, nor is the data or transformed data. All values are written in their constrained forms, that is the form that is used in the model definitions.\nIn the executable form of a Stan models, parameters, transformed parameters, and generated quantities are written to a file in comma-separated value (CSV) notation with a header defining the names of the parameters (including indices for multivariate parameters).2", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/execution.html#footnotes", + "href": "reference-manual/execution.html#footnotes", + "title": "Program Execution", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe C++ code underlying Stan is flexible enough to allow data to be read from memory or file. Calls from R, for instance, can be configured to read data from file or directly from R’s memory.↩︎\nIn the R version of Stan, the values may either be written to a CSV file or directly back to R’s memory.↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "Program Execution" + ] + }, + { + "objectID": "reference-manual/includes.html", + "href": "reference-manual/includes.html", + "title": "Includes", + "section": "", + "text": "Stan allows one file to be included within another file using a syntax similar to that from C++. For example, suppose the file my-std-normal.stan defines the standard normal log probability density function (up to an additive constant).\nfunctions {\n real my_std_normal_lpdf(vector y) {\n return -0.5 * y' * y;\n }\n}\nSuppose we also have a file containing a Stan program with an include statement.\n#include my-std-normal.stan\nparameters {\n real y;\n}\nmodel {\n y ~ my_std_normal();\n}\nThis Stan program behaves as if the contents of the file my-std-normal.stan replace the line with the #include statement, behaving as if a single Stan program were provided.\nfunctions {\n real my_std_normal_lpdf(vector y) {\n return -0.5 * y' * y;\n }\n}\nparameters {\n real y;\n}\nmodel {\n y ~ my_std_normal();\n}\nThere are no restrictions on where include statements may be placed within a file or what the contents are of the replaced file.\n\n\nIt is possible to use includes on a line non-initially. For example, the previous example could’ve included space before the # in the include line:\n #include my-std-normal.stan\nparameters {\n// ...\nIf there is initial space before an include, it will be discarded.\n\n\n\nIt is also possible to include line-based comments after the include. For example, the previous example can be coded as:\n#include my-std-normal.stan // definition of standard normal\nparameters {\n// ...\nLine comments are discarded when the entire line is replaced with the contents of the included file.\n\n\n\nRecursive includes will lead to a compiler error. For example, suppose a.stan contains\n#include b.stan\nand b.stan contains\n#include a.stan\nThis will result in an error explaining the circular dependency:\nSyntax error in './b.stan', line 1, column 0, included from\n'./a.stan', line 1, column 0, included from\n'./b.stan', line 1, column 0, included from\n'a.stan', line 1, column 0, include error:\n -------------------------------------------------\n 1: #include a.stan\n ^\n -------------------------------------------------\n\nFile a.stan recursively included itself.\n\n\n\nThe Stan interfaces may provide a mechanism for specifying a sequence of system paths in which to search for include files. The file included is the first one that is found in the sequence.\n\n\nIf there is not a final / or \\ in the path, a / will be appended between the path and the included file name.", + "crumbs": [ + "Reference Manual", + "Language", + "Includes" + ] + }, + { + "objectID": "reference-manual/includes.html#recursive-includes", + "href": "reference-manual/includes.html#recursive-includes", + "title": "Includes", + "section": "", + "text": "Recursive includes will lead to a compiler error. For example, suppose a.stan contains\n#include b.stan\nand b.stan contains\n#include a.stan\nThis will result in an error explaining the circular dependency:\nSyntax error in './b.stan', line 1, column 0, included from\n'./a.stan', line 1, column 0, included from\n'./b.stan', line 1, column 0, included from\n'a.stan', line 1, column 0, include error:\n -------------------------------------------------\n 1: #include a.stan\n ^\n -------------------------------------------------\n\nFile a.stan recursively included itself.", + "crumbs": [ + "Reference Manual", + "Language", + "Includes" + ] + }, + { + "objectID": "reference-manual/includes.html#include-paths", + "href": "reference-manual/includes.html#include-paths", + "title": "Includes", + "section": "", + "text": "The Stan interfaces may provide a mechanism for specifying a sequence of system paths in which to search for include files. The file included is the first one that is found in the sequence.\n\n\nIf there is not a final / or \\ in the path, a / will be appended between the path and the included file name.", + "crumbs": [ + "Reference Manual", + "Language", + "Includes" + ] + }, + { + "objectID": "reference-manual/laplace.html", + "href": "reference-manual/laplace.html", + "title": "Laplace Approximation", + "section": "", + "text": "Laplace Approximation\nStan provides a Laplace approximation algorithm which can be used to obtain draws from an approximated posterior. The Laplace approximation works in the unconstrained space, so that if there are constrained parameters, the normal approximation is centered at the mode in the unconstrained space and then the implemented method transforms the normal approximation sample to the constrained space before outputting them.\nGiven the estimate of the mode \\(\\widehat{\\theta}\\), the Hessian \\(H(\\widehat{\\theta})\\) is computed using central finite differences of the model functor. Next the algorithm computes the Cholesky factor of the negative inverse Hessian:\n\\(R^{-1} = \\textrm{chol}(-H(\\widehat{\\theta})) \\backslash \\mathbf{1}\\).\nEach draw is generated on the unconstrained scale by sampling\n\\(\\theta^{\\textrm{std}(m)} \\sim \\textrm{normal}(0, \\textrm{I})\\)\nand defining draw \\(m\\) to be\n\\(\\theta^{(m)} = \\widehat{\\theta} + R^{-1} \\cdot \\theta^{\\textrm{std}(m)}\\)\nFinally, each \\(\\theta^{(m)}\\) is transformed back to the constrained scale.\nThe one-time computation of the Cholesky factor incurs a high constant overhead of \\(\\mathcal{O}(N^3)\\) in \\(N\\) dimensions. It also requires \\(2N\\) gradient calculations to use as the basis, which scales at best as \\(\\mathcal{O}(N^2)\\) and is worse for models whose gradient calculation is super-linear in dimension. The algorithm also has a high per-draw overhead, requiring \\(N\\) standard normal pseudorandom numbers and \\(\\mathcal{O}(N^2)\\) per draw (to multiply by the Cholesky factor). For \\(M\\) draws, the total cost is proportional to \\(\\mathcal{O}(N^3 + M \\cdot N^2)\\).\n\n\n\n\n Back to top", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Laplace Approximation" + ] + }, + { + "objectID": "reference-manual/licenses.html", + "href": "reference-manual/licenses.html", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan and its dependent libraries, are distributed under generous, freedom-respecting licenses approved by the Open Source Initiative.\nIn particular, the licenses for Stan and its dependent libraries have no “copyleft” provisions requiring applications of Stan to be open source if they are redistributed.\nThis chapter specifies the licenses for the libraries on which Stan’s math library, language, and algorithms depend. The last tool mentioned, Google Test, is only used for testing and is not needed to run Stan.\n\n\nStan is distributed under\n\nBSD 3-clause license (BSD New)\n\nThe copyright holder of each contribution is the developer or his or her assignee.1\n\n\n\nStan uses the Boost library for template metaprograms, traits programs, the parser, and various numerical libraries for special functions, probability functions, and random number generators. Boost is distributed under the\n\nBoost Software License version 1.0\n\nThe copyright for each Boost package is held by its developers or their assignees.\n\n\n\nStan uses the Eigen library for matrix arithmetic and linear algebra. Eigen is distributed under the\n\nMozilla Public License, version 2\n\nThe copyright of Eigen is owned jointly by its developers or their assignees.\n\n\n\nStan uses the SUNDIALS package for solving differential equations. SUNDIALS is distributed under the\n\nBSD 3-clause license (BSD New)\n\nThe copyright of SUNDIALS is owned by Lawrence Livermore National Security Lab.\n\n\n\nStan uses the Threaded Building Blocks (TBB) library for parallel computations. TBB is distributed under the\n\nApache License, version 2\n\nThe copyright of TBB is owned by Intel Corporation.\n\n\n\nStan uses Google Test for unit testing; it is not required to compile or execute models. Google Test is distributed under the\n\nBSD 3-clause license (BSD New)\n\nThe copyright of Google Test is owned by Google, Inc.", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#stan-license", + "href": "reference-manual/licenses.html#stan-license", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan is distributed under\n\nBSD 3-clause license (BSD New)\n\nThe copyright holder of each contribution is the developer or his or her assignee.1", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#boost-license", + "href": "reference-manual/licenses.html#boost-license", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan uses the Boost library for template metaprograms, traits programs, the parser, and various numerical libraries for special functions, probability functions, and random number generators. Boost is distributed under the\n\nBoost Software License version 1.0\n\nThe copyright for each Boost package is held by its developers or their assignees.", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#eigen-license", + "href": "reference-manual/licenses.html#eigen-license", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan uses the Eigen library for matrix arithmetic and linear algebra. Eigen is distributed under the\n\nMozilla Public License, version 2\n\nThe copyright of Eigen is owned jointly by its developers or their assignees.", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#sundials-license", + "href": "reference-manual/licenses.html#sundials-license", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan uses the SUNDIALS package for solving differential equations. SUNDIALS is distributed under the\n\nBSD 3-clause license (BSD New)\n\nThe copyright of SUNDIALS is owned by Lawrence Livermore National Security Lab.", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#threaded-building-blocks-tbb-license", + "href": "reference-manual/licenses.html#threaded-building-blocks-tbb-license", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan uses the Threaded Building Blocks (TBB) library for parallel computations. TBB is distributed under the\n\nApache License, version 2\n\nThe copyright of TBB is owned by Intel Corporation.", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#google-test-license", + "href": "reference-manual/licenses.html#google-test-license", + "title": "Licenses and Dependencies", + "section": "", + "text": "Stan uses Google Test for unit testing; it is not required to compile or execute models. Google Test is distributed under the\n\nBSD 3-clause license (BSD New)\n\nThe copyright of Google Test is owned by Google, Inc.", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/licenses.html#footnotes", + "href": "reference-manual/licenses.html#footnotes", + "title": "Licenses and Dependencies", + "section": "Footnotes", + "text": "Footnotes\n\n\nUniversities or companies often own the copyright of computer programs developed by their employees.↩︎", + "crumbs": [ + "Reference Manual", + "Usage", + "Licenses and Dependencies" + ] + }, + { + "objectID": "reference-manual/optimization.html", + "href": "reference-manual/optimization.html", + "title": "Optimization", + "section": "", + "text": "Stan provides optimization algorithms which find modes of the density specified by a Stan program. Such modes may be used as parameter estimates or as the basis of approximations to a Bayesian posterior.\nStan provides three different optimizers, a Newton optimizer, and two related quasi-Newton algorithms, BFGS and L-BFGS; see Nocedal and Wright (2006) for thorough description and analysis of all of these algorithms. The L-BFGS algorithm is the default optimizer. Newton’s method is the least efficient of the three, but has the advantage of setting its own stepsize.\n\n\nAll of the optimizers have the option of including the the log absolute Jacobian determinant of inverse parameter transforms in the log probability computation. If the Jacobian adjustment is not included (the default), the optimization returns parameter values that correspond to a mode of the target in the constrained space (if such mode exists). Thus this option is useful for any optimization where we want to find the mode in the original constrained parameter space. If the Jacobian adjustment is included, the optimization returns parameter values that correspond to a mode in the unconstrained space. This is useful, for example, if we want to make a distributional approximation of the posterior at the mode (see, Laplace sampling, as then Jacobian adjustment needs to be included for correct results.\nAll of the optimizers are iterative and allow the maximum number of iterations to be specified; the default maximum number of iterations is 2000.\nAll of the optimizers are able to stream intermediate output reporting on their progress. Whether or not to save the intermediate iterations and stream progress is configurable.\n\n\n\n\n\nConvergence monitoring in (L-)BFGS is controlled by a number of tolerance values, any one of which being satisfied causes the algorithm to terminate with a solution. Any of the convergence tests can be disabled by setting its corresponding tolerance parameter to zero. The tests for convergence are as follows.\n\n\nThe parameters \\(\\theta_i\\) in iteration \\(i\\) are considered to have converged with respect to tolerance tol_param if\n\\[\n|| \\theta_{i} - \\theta_{i-1} || < \\mathtt{tol\\_param}.\n\\]\n\n\n\nThe (unnormalized) log density \\(\\log p(\\theta_{i}|y)\\) for the parameters \\(\\theta_i\\) in iteration \\(i\\) given data \\(y\\) is considered to have converged with respect to tolerance tol_obj if\n\\[\n\\left| \\log p(\\theta_{i}|y) - \\log p(\\theta_{i-1}|y) \\right| <\n\\mathtt{tol\\_obj}.\n\\]\nThe log density is considered to have converged to within relative tolerance tol_rel_obj if\n\\[\n\\frac{\\left| \\log p(\\theta_{i}|y) - \\log p(\\theta_{i-1}|y) \\right|}{\\\n \\max\\left(\\left| \\log p(\\theta_{i}|y)\\right|,\\left| \\log\n p(\\theta_{i-1}|y)\\right|,1.0\\right)}\n< \\mathtt{tol\\_rel\\_obj} * \\epsilon.\n\\]\n\n\n\nThe gradient is considered to have converged to 0 relative to a specified tolerance tol_grad if\n\\[\n|| g_{i} || < \\mathtt{tol\\_grad},\n\\] where \\(\\nabla_{\\theta}\\) is the gradient operator with respect to \\(\\theta\\) and \\(g_{i} = \\nabla_{\\theta} \\log p(\\theta | y)\\) is the gradient at iteration \\(i\\) evaluated at \\(\\theta^{(i)}\\), the value on the \\(i\\)-th posterior iteration.\nThe gradient is considered to have converged to 0 relative to a specified relative tolerance tol_rel_grad if\n\\[\n\\frac{g_{i}^T \\hat{H}_{i}^{-1} g_{i} }{ \\max\\left(\\left|\\log\np(\\theta_{i}|y)\\right|,1.0\\right) }\n\\ < \\\n\\mathtt{tol\\_rel\\_grad} * \\epsilon,\n\\]\nwhere \\(\\hat{H}_{i}\\) is the estimate of the Hessian at iteration \\(i\\), \\(|u|\\) is the absolute value (L1 norm) of \\(u\\), \\(||u||\\) is the vector length (L2 norm) of \\(u\\), and \\(\\epsilon \\approx 2e-16\\) is machine precision.\n\n\n\n\nThe initial step size parameter \\(\\alpha\\) for BFGS-style optimizers may be specified. If the first iteration takes a long time (and requires a lot of function evaluations) initialize \\(\\alpha\\) to be the roughly equal to the \\(\\alpha\\) used in that first iteration. The default value is intentionally small, 0.001, which is reasonable for many problems but might be too large or too small depending on the objective function and initialization. Being too big or too small just means that the first iteration will take longer (i.e., require more gradient evaluations) before the line search finds a good step length. It’s not a critical parameter, but for optimizing the same model multiple times (as you tweak things or with different data), being able to tune \\(\\alpha\\) can save some real time.\n\n\n\nL-BFGS has a command-line argument which controls the size of the history it uses to approximate the Hessian. The value should be less than the dimensionality of the parameter space and, in general, relatively small values (5–10) are sufficient; the default value is 5.\nIf L-BFGS performs poorly but BFGS performs well, consider increasing the history size. Increasing history size will increase the memory usage, although this is unlikely to be an issue for typical Stan models.\n\n\n\n\n\n\nFor constrained optimization problems, for instance, with a standard deviation parameter \\(\\sigma\\) constrained so that \\(\\sigma > 0\\), it can be much more efficient to declare a parameter sigma with no constraints. This allows the optimizer to easily get close to 0 without having to tend toward \\(-\\infty\\) on the \\(\\log \\sigma\\) scale.\nWith unconstrained parameterizations of parameters with constrained support, it is important to provide a custom initialization that is within the support. For example, declaring a vector\nvector[M] sigma;\nand using the default random initialization which is \\(\\mathsf{Uniform}(-2, 2)\\) on the unconstrained scale means that there is only a \\(2^{-M}\\) chance that the initialization will be within support.\nFor any given optimization problem, it is probably worthwhile trying the program both ways, with and without the constraint, to see which one is more efficient.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Optimization" + ] + }, + { + "objectID": "reference-manual/optimization.html#general-configuration", + "href": "reference-manual/optimization.html#general-configuration", + "title": "Optimization", + "section": "", + "text": "All of the optimizers have the option of including the the log absolute Jacobian determinant of inverse parameter transforms in the log probability computation. If the Jacobian adjustment is not included (the default), the optimization returns parameter values that correspond to a mode of the target in the constrained space (if such mode exists). Thus this option is useful for any optimization where we want to find the mode in the original constrained parameter space. If the Jacobian adjustment is included, the optimization returns parameter values that correspond to a mode in the unconstrained space. This is useful, for example, if we want to make a distributional approximation of the posterior at the mode (see, Laplace sampling, as then Jacobian adjustment needs to be included for correct results.\nAll of the optimizers are iterative and allow the maximum number of iterations to be specified; the default maximum number of iterations is 2000.\nAll of the optimizers are able to stream intermediate output reporting on their progress. Whether or not to save the intermediate iterations and stream progress is configurable.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Optimization" + ] + }, + { + "objectID": "reference-manual/optimization.html#bfgs-and-l-bfgs-configuration", + "href": "reference-manual/optimization.html#bfgs-and-l-bfgs-configuration", + "title": "Optimization", + "section": "", + "text": "Convergence monitoring in (L-)BFGS is controlled by a number of tolerance values, any one of which being satisfied causes the algorithm to terminate with a solution. Any of the convergence tests can be disabled by setting its corresponding tolerance parameter to zero. The tests for convergence are as follows.\n\n\nThe parameters \\(\\theta_i\\) in iteration \\(i\\) are considered to have converged with respect to tolerance tol_param if\n\\[\n|| \\theta_{i} - \\theta_{i-1} || < \\mathtt{tol\\_param}.\n\\]\n\n\n\nThe (unnormalized) log density \\(\\log p(\\theta_{i}|y)\\) for the parameters \\(\\theta_i\\) in iteration \\(i\\) given data \\(y\\) is considered to have converged with respect to tolerance tol_obj if\n\\[\n\\left| \\log p(\\theta_{i}|y) - \\log p(\\theta_{i-1}|y) \\right| <\n\\mathtt{tol\\_obj}.\n\\]\nThe log density is considered to have converged to within relative tolerance tol_rel_obj if\n\\[\n\\frac{\\left| \\log p(\\theta_{i}|y) - \\log p(\\theta_{i-1}|y) \\right|}{\\\n \\max\\left(\\left| \\log p(\\theta_{i}|y)\\right|,\\left| \\log\n p(\\theta_{i-1}|y)\\right|,1.0\\right)}\n< \\mathtt{tol\\_rel\\_obj} * \\epsilon.\n\\]\n\n\n\nThe gradient is considered to have converged to 0 relative to a specified tolerance tol_grad if\n\\[\n|| g_{i} || < \\mathtt{tol\\_grad},\n\\] where \\(\\nabla_{\\theta}\\) is the gradient operator with respect to \\(\\theta\\) and \\(g_{i} = \\nabla_{\\theta} \\log p(\\theta | y)\\) is the gradient at iteration \\(i\\) evaluated at \\(\\theta^{(i)}\\), the value on the \\(i\\)-th posterior iteration.\nThe gradient is considered to have converged to 0 relative to a specified relative tolerance tol_rel_grad if\n\\[\n\\frac{g_{i}^T \\hat{H}_{i}^{-1} g_{i} }{ \\max\\left(\\left|\\log\np(\\theta_{i}|y)\\right|,1.0\\right) }\n\\ < \\\n\\mathtt{tol\\_rel\\_grad} * \\epsilon,\n\\]\nwhere \\(\\hat{H}_{i}\\) is the estimate of the Hessian at iteration \\(i\\), \\(|u|\\) is the absolute value (L1 norm) of \\(u\\), \\(||u||\\) is the vector length (L2 norm) of \\(u\\), and \\(\\epsilon \\approx 2e-16\\) is machine precision.\n\n\n\n\nThe initial step size parameter \\(\\alpha\\) for BFGS-style optimizers may be specified. If the first iteration takes a long time (and requires a lot of function evaluations) initialize \\(\\alpha\\) to be the roughly equal to the \\(\\alpha\\) used in that first iteration. The default value is intentionally small, 0.001, which is reasonable for many problems but might be too large or too small depending on the objective function and initialization. Being too big or too small just means that the first iteration will take longer (i.e., require more gradient evaluations) before the line search finds a good step length. It’s not a critical parameter, but for optimizing the same model multiple times (as you tweak things or with different data), being able to tune \\(\\alpha\\) can save some real time.\n\n\n\nL-BFGS has a command-line argument which controls the size of the history it uses to approximate the Hessian. The value should be less than the dimensionality of the parameter space and, in general, relatively small values (5–10) are sufficient; the default value is 5.\nIf L-BFGS performs poorly but BFGS performs well, consider increasing the history size. Increasing history size will increase the memory usage, although this is unlikely to be an issue for typical Stan models.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Optimization" + ] + }, + { + "objectID": "reference-manual/optimization.html#writing-models-for-optimization", + "href": "reference-manual/optimization.html#writing-models-for-optimization", + "title": "Optimization", + "section": "", + "text": "For constrained optimization problems, for instance, with a standard deviation parameter \\(\\sigma\\) constrained so that \\(\\sigma > 0\\), it can be much more efficient to declare a parameter sigma with no constraints. This allows the optimizer to easily get close to 0 without having to tend toward \\(-\\infty\\) on the \\(\\log \\sigma\\) scale.\nWith unconstrained parameterizations of parameters with constrained support, it is important to provide a custom initialization that is within the support. For example, declaring a vector\nvector[M] sigma;\nand using the default random initialization which is \\(\\mathsf{Uniform}(-2, 2)\\) on the unconstrained scale means that there is only a \\(2^{-M}\\) chance that the initialization will be within support.\nFor any given optimization problem, it is probably worthwhile trying the program both ways, with and without the constraint, to see which one is more efficient.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Optimization" + ] + }, + { + "objectID": "reference-manual/references.html", + "href": "reference-manual/references.html", + "title": "References", + "section": "", + "text": "References\n\n\n\n\n Back to top" + }, + { + "objectID": "reference-manual/reproducibility.html", + "href": "reference-manual/reproducibility.html", + "title": "Reproducibility", + "section": "", + "text": "Floating point operations on modern computers are notoriously difficult to replicate because the fundamental arithmetic operations, right down to the IEEE 754 encoding level, are not fully specified. The primary problem is that the precision of operations varies across different hardware platforms and software implementations.\nStan is designed to allow full reproducibility. However, this is only possible up to the external constraints imposed by floating point arithmetic.\nStan results will only be exactly reproducible if all of the following components are identical:\n\nStan version\nStan interface (RStan, PyStan, CmdStan) and version, plus version of interface language (R, Python, shell)\nversions of included libraries (Boost and Eigen)\noperating system version\ncomputer hardware including CPU, motherboard and memory\nC++ compiler, including version, compiler flags, and linked libraries\nsame configuration of call to Stan, including random seed, chain ID, initialization and data\n\nIt doesn’t matter if you use a stable release version of Stan or the version with a particular Git hash tag. The same goes for all of the interfaces, compilers, and so on. The point is that if any of these moving parts changes in some way, floating point results may change.\nConcretely, if you compile a single Stan program using the same CmdStan code base, but changed the optimization flag (-O3 vs. -O2 or -O0), the two programs may not return the identical stream of results. Thus it is very hard to guarantee reproducibility on externally managed hardware, like in a cluster or even a desktop managed by an IT department or with automatic updates turned on.\nIf, however, you compiled a Stan program today using one set of flags, took the computer away from the internet and didn’t allow it to update anything, then came back in a decade and recompiled the Stan program in the same way, you should get the same results.\nThe data needs to be the same down to the bit level. For example, if you are running in RStan, Rcpp handles the conversion between R’s floating point numbers and C++ doubles. If Rcpp changes the conversion process or use different types, the results are not guaranteed to be the same down to the bit level.\nThe compiler and compiler settings can also be an issue. There is a nice discussion of the issues and how to control reproducibility in Intel’s proprietary compiler by Corden and Kreitzer (2014).\n\n\nAs noted above, there is no guarantee that the same results will be reproducible between two different versions of Stan, even if the same settings and environment are used.\nHowever, there are occassionally notable changes which would affect many if not all users, and these are noted here. The absence of a version from this list still does not guarantee exact reproducibility between it and other versions.\n\nStan 2.28 changed the default chain ID for MCMC from 0 to 1. Users who had set a seed but not a chain ID would observe completely different outputs.\nStan 2.35 changed the default pseudo-random number generator used by the Stan algorithms. There is no relationship between seeds in versions pre-2.35 and version 2.35 and on.", + "crumbs": [ + "Reference Manual", + "Usage", + "Reproducibility" + ] + }, + { + "objectID": "reference-manual/reproducibility.html#notable-changes-across-versions", + "href": "reference-manual/reproducibility.html#notable-changes-across-versions", + "title": "Reproducibility", + "section": "", + "text": "As noted above, there is no guarantee that the same results will be reproducible between two different versions of Stan, even if the same settings and environment are used.\nHowever, there are occassionally notable changes which would affect many if not all users, and these are noted here. The absence of a version from this list still does not guarantee exact reproducibility between it and other versions.\n\nStan 2.28 changed the default chain ID for MCMC from 0 to 1. Users who had set a seed but not a chain ID would observe completely different outputs.\nStan 2.35 changed the default pseudo-random number generator used by the Stan algorithms. There is no relationship between seeds in versions pre-2.35 and version 2.35 and on.", + "crumbs": [ + "Reference Manual", + "Usage", + "Reproducibility" + ] + }, + { + "objectID": "reference-manual/syntax.html", + "href": "reference-manual/syntax.html", + "title": "Language Syntax", + "section": "", + "text": "This chapter defines the basic syntax of the Stan modeling language using a Backus-Naur form (BNF) grammar plus extra-grammatical constraints on function typing and operator precedence and associativity.\n\n\n\n\nIn the following BNF grammars, tokens are represented in ALLCAPS. Grammar non-terminals are surrounded by < and >. A square brackets ([A]) indicates optionality of A. A postfixed Kleene star (A*) indicates zero or more occurrences of A. Parenthesis can be used to group symbols together in productions.\nFinally, this grammar uses the concept of “parameterized nonterminals” as used in the parsing library Menhir. A rule like <list(x)> ::= x (COMMA x)* declares a generic list rule, which can later be applied to others by the symbol <list(<expression>)>.\nThe following representation is constructed directly from the OCaml reference parser using a tool called Obelisk. The raw output is available here.\n\n\n\n\n<program> ::= [<function_block>] [<data_block>] [<transformed_data_block>]\n [<parameters_block>] [<transformed_parameters_block>]\n [<model_block>] [<generated_quantities_block>] EOF\n\n<functions_only> ::= <function_def>* EOF\n\n<function_block> ::= FUNCTIONBLOCK LBRACE <function_def>* RBRACE\n\n<data_block> ::= DATABLOCK LBRACE <top_var_decl_no_assign>* RBRACE\n\n<transformed_data_block> ::= TRANSFORMEDDATABLOCK LBRACE\n <top_vardecl_or_statement>* RBRACE\n\n<parameters_block> ::= PARAMETERSBLOCK LBRACE <top_var_decl_no_assign>*\n RBRACE\n\n<transformed_parameters_block> ::= TRANSFORMEDPARAMETERSBLOCK LBRACE\n <top_vardecl_or_statement>* RBRACE\n\n<model_block> ::= MODELBLOCK LBRACE <vardecl_or_statement>* RBRACE\n\n<generated_quantities_block> ::= GENERATEDQUANTITIESBLOCK LBRACE\n <top_vardecl_or_statement>* RBRACE\n\n\n\n<function_def> ::= <return_type> <decl_identifier> LPAREN [<arg_decl> (COMMA\n <arg_decl>)*] RPAREN <statement>\n\n<return_type> ::= VOID\n | <unsized_type>\n\n<arg_decl> ::= [DATABLOCK] <unsized_type> <decl_identifier>\n\n<unsized_type> ::= ARRAY <unsized_dims> <basic_type>\n | ARRAY <unsized_dims> <unsized_tuple_type>\n | <basic_type>\n | <unsized_tuple_type>\n\n<unsized_tuple_type> ::= TUPLE LPAREN <unsized_type> COMMA [<unsized_type>\n (COMMA <unsized_type>)*] RPAREN\n\n<basic_type> ::= INT\n | REAL\n | COMPLEX\n | VECTOR\n | ROWVECTOR\n | MATRIX\n | COMPLEXVECTOR\n | COMPLEXROWVECTOR\n | COMPLEXMATRIX\n\n<unsized_dims> ::= LBRACK COMMA* RBRACK\n\n\n\n\n<identifier> ::= IDENTIFIER\n | TRUNCATE\n\n<decl_identifier> ::= <identifier>\n\n<no_assign> ::= UNREACHABLE\n\n<optional_assignment(rhs)> ::= [ASSIGN rhs]\n\n<id_and_optional_assignment(rhs)> ::= <decl_identifier>\n <optional_assignment(rhs)>\n\n<decl(type_rule, rhs)> ::= type_rule <decl_identifier> <dims>\n <optional_assignment(rhs)> SEMICOLON\n | <higher_type(type_rule)>\n <id_and_optional_assignment(rhs)> (COMMA\n <id_and_optional_assignment(rhs)>)* SEMICOLON\n\n<higher_type(type_rule)> ::= <array_type(type_rule)>\n | <tuple_type(type_rule)>\n | type_rule\n\n<array_type(type_rule)> ::= <arr_dims> type_rule\n | <arr_dims> <tuple_type(type_rule)>\n\n<tuple_type(type_rule)> ::= TUPLE LPAREN <higher_type(type_rule)> COMMA\n [<higher_type(type_rule)> (COMMA\n <higher_type(type_rule)>)*] RPAREN\n\n<var_decl> ::= <decl(<sized_basic_type>, <expression>)>\n\n<top_var_decl> ::= <decl(<top_var_type>, <expression>)>\n\n<top_var_decl_no_assign> ::= <decl(<top_var_type>, <no_assign>)>\n | SEMICOLON\n\n<sized_basic_type> ::= INT\n | REAL\n | COMPLEX\n | VECTOR LBRACK <expression> RBRACK\n | ROWVECTOR LBRACK <expression> RBRACK\n | MATRIX LBRACK <expression> COMMA <expression> RBRACK\n | COMPLEXVECTOR LBRACK <expression> RBRACK\n | COMPLEXROWVECTOR LBRACK <expression> RBRACK\n | COMPLEXMATRIX LBRACK <expression> COMMA <expression>\n RBRACK\n\n<top_var_type> ::= INT [LABRACK <range> RABRACK]\n | REAL <type_constraint>\n | COMPLEX <type_constraint>\n | VECTOR <type_constraint> LBRACK <expression> RBRACK\n | ROWVECTOR <type_constraint> LBRACK <expression> RBRACK\n | MATRIX <type_constraint> LBRACK <expression> COMMA\n <expression> RBRACK\n | COMPLEXVECTOR <type_constraint> LBRACK <expression> RBRACK\n | COMPLEXROWVECTOR <type_constraint> LBRACK <expression>\n RBRACK\n | COMPLEXMATRIX <type_constraint> LBRACK <expression> COMMA\n <expression> RBRACK\n | ORDERED LBRACK <expression> RBRACK\n | POSITIVEORDERED LBRACK <expression> RBRACK\n | SIMPLEX LBRACK <expression> RBRACK\n | UNITVECTOR LBRACK <expression> RBRACK\n | SUMTOZEROVEC LBRACK <expression> RBRACK\n | CHOLESKYFACTORCORR LBRACK <expression> RBRACK\n | CHOLESKYFACTORCOV LBRACK <expression> [COMMA <expression>]\n RBRACK\n | CORRMATRIX LBRACK <expression> RBRACK\n | COVMATRIX LBRACK <expression> RBRACK\n | SUMTOZEROMATRIX LBRACK <expression> COMMA <expression> RBRACK\n | STOCHASTICCOLUMNMATRIX LBRACK <expression> COMMA\n <expression> RBRACK\n | STOCHASTICROWMATRIX LBRACK <expression> COMMA <expression>\n RBRACK\n\n<type_constraint> ::= [LABRACK <range> RABRACK]\n | LABRACK <offset_mult> RABRACK\n\n<range> ::= LOWER ASSIGN <constr_expression> COMMA UPPER ASSIGN\n <constr_expression>\n | UPPER ASSIGN <constr_expression> COMMA LOWER ASSIGN\n <constr_expression>\n | LOWER ASSIGN <constr_expression>\n | UPPER ASSIGN <constr_expression>\n\n<offset_mult> ::= OFFSET ASSIGN <constr_expression> COMMA MULTIPLIER ASSIGN\n <constr_expression>\n | MULTIPLIER ASSIGN <constr_expression> COMMA OFFSET ASSIGN\n <constr_expression>\n | OFFSET ASSIGN <constr_expression>\n | MULTIPLIER ASSIGN <constr_expression>\n\n<arr_dims> ::= ARRAY LBRACK <expression> (COMMA <expression>)* RBRACK\n\n\n\n\n<expression> ::= <expression> QMARK <expression> COLON <expression>\n | <expression> <infixOp> <expression>\n | <prefixOp> <expression>\n | <expression> <postfixOp>\n | <common_expression>\n\n<constr_expression> ::= <constr_expression> <arithmeticBinOp>\n <constr_expression>\n | <prefixOp> <constr_expression>\n | <constr_expression> <postfixOp>\n | <common_expression>\n\n<common_expression> ::= <identifier>\n | INTNUMERAL\n | REALNUMERAL\n | DOTNUMERAL\n | IMAGNUMERAL\n | LBRACE <expression> (COMMA <expression>)* RBRACE\n | LBRACK [<expression> (COMMA <expression>)*] RBRACK\n | <identifier> LPAREN [<expression> (COMMA\n <expression>)*] RPAREN\n | TARGET LPAREN RPAREN\n | <identifier> LPAREN <expression> BAR [<expression>\n (COMMA <expression>)*] RPAREN\n | LPAREN <expression> COMMA [<expression> (COMMA\n <expression>)*] RPAREN\n | <common_expression> DOTNUMERAL\n | <common_expression> LBRACK <indexes> RBRACK\n | LPAREN <expression> RPAREN\n\n<prefixOp> ::= BANG\n | MINUS\n | PLUS\n\n<postfixOp> ::= TRANSPOSE\n\n<infixOp> ::= <arithmeticBinOp>\n | <logicalBinOp>\n\n<arithmeticBinOp> ::= PLUS\n | MINUS\n | TIMES\n | DIVIDE\n | IDIVIDE\n | MODULO\n | LDIVIDE\n | ELTTIMES\n | ELTDIVIDE\n | HAT\n | ELTPOW\n\n<logicalBinOp> ::= OR\n | AND\n | EQUALS\n | NEQUALS\n | LABRACK\n | LEQ\n | RABRACK\n | GEQ\n\n<indexes> ::= epsilon\n | COLON\n | <expression>\n | <expression> COLON\n | COLON <expression>\n | <expression> COLON <expression>\n | <indexes> COMMA <indexes>\n\n<printables> ::= <expression>\n | <string_literal>\n | <printables> COMMA <printables>\n\n\n\n<statement> ::= <atomic_statement>\n | <nested_statement>\n\n<atomic_statement> ::= <common_expression> <assignment_op> <expression>\n SEMICOLON\n | <identifier> LPAREN [<expression> (COMMA\n <expression>)*] RPAREN SEMICOLON\n | <expression> TILDE <identifier> LPAREN [<expression>\n (COMMA <expression>)*] RPAREN [<truncation>] SEMICOLON\n | TARGET PLUSASSIGN <expression> SEMICOLON\n | JACOBIAN PLUSASSIGN <expression> SEMICOLON\n | BREAK SEMICOLON\n | CONTINUE SEMICOLON\n | PRINT LPAREN <printables> RPAREN SEMICOLON\n | REJECT LPAREN <printables> RPAREN SEMICOLON\n | FATAL_ERROR LPAREN <printables> RPAREN SEMICOLON\n | RETURN <expression> SEMICOLON\n | RETURN SEMICOLON\n | SEMICOLON\n\n<assignment_op> ::= ASSIGN\n | PLUSASSIGN\n | MINUSASSIGN\n | TIMESASSIGN\n | DIVIDEASSIGN\n | ELTTIMESASSIGN\n | ELTDIVIDEASSIGN\n\n<string_literal> ::= STRINGLITERAL\n\n<truncation> ::= TRUNCATE LBRACK [<expression>] COMMA [<expression>] RBRACK\n\n<nested_statement> ::= IF LPAREN <expression> RPAREN <vardecl_or_statement>\n ELSE <vardecl_or_statement>\n | IF LPAREN <expression> RPAREN <vardecl_or_statement>\n | WHILE LPAREN <expression> RPAREN\n <vardecl_or_statement>\n | FOR LPAREN <identifier> IN <expression> COLON\n <expression> RPAREN <vardecl_or_statement>\n | FOR LPAREN <identifier> IN <expression> RPAREN\n <vardecl_or_statement>\n | PROFILE LPAREN <string_literal> RPAREN LBRACE\n <vardecl_or_statement>* RBRACE\n | LBRACE <vardecl_or_statement>* RBRACE\n\n<vardecl_or_statement> ::= <statement>\n | <var_decl>\n\n<top_vardecl_or_statement> ::= <statement>\n | <top_var_decl>\n\n\n\n\nMany of the tokens used in the BNF grammars follow obviously from their names: DATABLOCK is the literal string ‘data’, COMMA is a single ‘,’ character, etc. The literal representation of each operator is additionally provided in the operator precedence table.\nA few tokens are not so obvious, and are defined here in regular expressions:\nIDENTIFIER = [a-zA-Z] [a-zA-Z0-9_]*\n\nSTRINGLITERAL = \".*\"\n\nINTNUMERAL = [0-9]+ (_ [0-9]+)*\n\nEXPLITERAL = [eE] [+-]? INTNUMERAL\n\nREALNUMERAL = INTNUMERAL \\. INTNUMERAL? EXPLITERAL?\n | \\. INTNUMERAL EXPLITERAL\n | INTNUMERAL EXPLITERAL\n\nIMAGNUMERAL = (REALNUMERAL | INTNUMERAL) i\n\nDOTNUMERAL = \\. INTNUMERAL\n\n\n\n\n\nA well-formed Stan program must satisfy the type constraints imposed by functions and distributions. For example, the binomial distribution requires an integer total count parameter and integer variate and when truncated would require integer truncation points. If these constraints are violated, the program will be rejected during compilation with an error message indicating the location of the problem.\n\n\n\nIn the Stan grammar provided in this chapter, the expression 1 + 2 * 3 has two parses. As described in the operator precedence table, Stan disambiguates between the meaning \\(1\n+ (2 \\times 3)\\) and the meaning \\((1 + 2) \\times 3\\) based on operator precedences and associativities.\n\n\n\nIn a compound variable declaration and definition, the type of the right-hand side expression must be assignable to the variable being declared. The assignability constraint restricts compound declarations and definitions to local variables and variables declared in the transformed data, transformed parameters, and generated quantities blocks.\n\n\n\nThe types of expressions used for elements in array expressions ('{' expressions '}') must all be of the same type or a mixture of scalar (int, real and complex) types (in which case the result is promoted to be of the highest type on the int -> real -> complex hierarchy).\n\n\n\nInteger literals longer than one digit may not start with 0 and real literals cannot consist of only a period or only an exponent.\n\n\n\nBoth the conditional if-then-else statement and while-loop statement require the expression denoting the condition to be a primitive type, integer or real.\n\n\n\nThe for loop statement requires that we specify in addition to the loop identifier, either a range consisting of two expressions denoting an integer, separated by ‘:’, or a single expression denoting a container. The loop variable will be of type integer in the former case and of the contained type in the latter case. Furthermore, the loop variable must not be in scope (i.e., there is no masking of variables).\n\n\n\nThe arguments to a print statement cannot be void.\n\n\n\nThe break and continue statements may only be used within the body of a for-loop or while-loop.\n\n\n\nSome constructs in the Stan language are only allowed in certain blocks or in certain kinds of user-defined functions.\n\n\nFunctions ending in _rng may only be called in the transformed data and generated quantities block, and within the bodies of user-defined functions with names ending in _rng.\n\n\n\nUnnormalized distributions (with suffixes _lupmf or _lupdf) may only be called in the model block, user-defined probability functions, or within the bodies of user defined functions which end in _lp.\n\n\n\ntarget += statements can only be used inside of the model block or user-defined functions which end in _lp.\nUser defined functions which end in _lp and the target() function can only be used in the model block, transformed parameters block, and in the bodies of other user defined functions which end in _lp.\nSampling statements (using ~) can only be used in the model block or in the bodies of user-defined functions which end in _lp.\njacobian += statements can only be used inside of the transformed parameters block or in functions that end with _jacobian.\n\n\n\n\nA probability function literal must have one of the following suffixes: _lpdf, _lpmf, _lcdf, or _lccdf.\n\n\n\nStandalone expressions used as indexes must denote either an integer (int) or an integer array (array[] int). Expressions participating in range indexes (e.g., a and b in a : b) must denote integers (int).\nA second condition is that there not be more indexes provided than dimensions of the underlying expression (in general) or variable (on the left side of assignments) being indexed. A vector or row vector adds 1 to the array dimension and a matrix adds 2. That is, the type array[ , , ] matrix, a three-dimensional array of matrices, has five index positions: three for the array, one for the row of the matrix and one for the column.", + "crumbs": [ + "Reference Manual", + "Language", + "Language Syntax" + ] + }, + { + "objectID": "reference-manual/syntax.html#bnf-grammars", + "href": "reference-manual/syntax.html#bnf-grammars", + "title": "Language Syntax", + "section": "", + "text": "In the following BNF grammars, tokens are represented in ALLCAPS. Grammar non-terminals are surrounded by < and >. A square brackets ([A]) indicates optionality of A. A postfixed Kleene star (A*) indicates zero or more occurrences of A. Parenthesis can be used to group symbols together in productions.\nFinally, this grammar uses the concept of “parameterized nonterminals” as used in the parsing library Menhir. A rule like <list(x)> ::= x (COMMA x)* declares a generic list rule, which can later be applied to others by the symbol <list(<expression>)>.\nThe following representation is constructed directly from the OCaml reference parser using a tool called Obelisk. The raw output is available here.\n\n\n\n\n<program> ::= [<function_block>] [<data_block>] [<transformed_data_block>]\n [<parameters_block>] [<transformed_parameters_block>]\n [<model_block>] [<generated_quantities_block>] EOF\n\n<functions_only> ::= <function_def>* EOF\n\n<function_block> ::= FUNCTIONBLOCK LBRACE <function_def>* RBRACE\n\n<data_block> ::= DATABLOCK LBRACE <top_var_decl_no_assign>* RBRACE\n\n<transformed_data_block> ::= TRANSFORMEDDATABLOCK LBRACE\n <top_vardecl_or_statement>* RBRACE\n\n<parameters_block> ::= PARAMETERSBLOCK LBRACE <top_var_decl_no_assign>*\n RBRACE\n\n<transformed_parameters_block> ::= TRANSFORMEDPARAMETERSBLOCK LBRACE\n <top_vardecl_or_statement>* RBRACE\n\n<model_block> ::= MODELBLOCK LBRACE <vardecl_or_statement>* RBRACE\n\n<generated_quantities_block> ::= GENERATEDQUANTITIESBLOCK LBRACE\n <top_vardecl_or_statement>* RBRACE\n\n\n\n<function_def> ::= <return_type> <decl_identifier> LPAREN [<arg_decl> (COMMA\n <arg_decl>)*] RPAREN <statement>\n\n<return_type> ::= VOID\n | <unsized_type>\n\n<arg_decl> ::= [DATABLOCK] <unsized_type> <decl_identifier>\n\n<unsized_type> ::= ARRAY <unsized_dims> <basic_type>\n | ARRAY <unsized_dims> <unsized_tuple_type>\n | <basic_type>\n | <unsized_tuple_type>\n\n<unsized_tuple_type> ::= TUPLE LPAREN <unsized_type> COMMA [<unsized_type>\n (COMMA <unsized_type>)*] RPAREN\n\n<basic_type> ::= INT\n | REAL\n | COMPLEX\n | VECTOR\n | ROWVECTOR\n | MATRIX\n | COMPLEXVECTOR\n | COMPLEXROWVECTOR\n | COMPLEXMATRIX\n\n<unsized_dims> ::= LBRACK COMMA* RBRACK\n\n\n\n\n<identifier> ::= IDENTIFIER\n | TRUNCATE\n\n<decl_identifier> ::= <identifier>\n\n<no_assign> ::= UNREACHABLE\n\n<optional_assignment(rhs)> ::= [ASSIGN rhs]\n\n<id_and_optional_assignment(rhs)> ::= <decl_identifier>\n <optional_assignment(rhs)>\n\n<decl(type_rule, rhs)> ::= type_rule <decl_identifier> <dims>\n <optional_assignment(rhs)> SEMICOLON\n | <higher_type(type_rule)>\n <id_and_optional_assignment(rhs)> (COMMA\n <id_and_optional_assignment(rhs)>)* SEMICOLON\n\n<higher_type(type_rule)> ::= <array_type(type_rule)>\n | <tuple_type(type_rule)>\n | type_rule\n\n<array_type(type_rule)> ::= <arr_dims> type_rule\n | <arr_dims> <tuple_type(type_rule)>\n\n<tuple_type(type_rule)> ::= TUPLE LPAREN <higher_type(type_rule)> COMMA\n [<higher_type(type_rule)> (COMMA\n <higher_type(type_rule)>)*] RPAREN\n\n<var_decl> ::= <decl(<sized_basic_type>, <expression>)>\n\n<top_var_decl> ::= <decl(<top_var_type>, <expression>)>\n\n<top_var_decl_no_assign> ::= <decl(<top_var_type>, <no_assign>)>\n | SEMICOLON\n\n<sized_basic_type> ::= INT\n | REAL\n | COMPLEX\n | VECTOR LBRACK <expression> RBRACK\n | ROWVECTOR LBRACK <expression> RBRACK\n | MATRIX LBRACK <expression> COMMA <expression> RBRACK\n | COMPLEXVECTOR LBRACK <expression> RBRACK\n | COMPLEXROWVECTOR LBRACK <expression> RBRACK\n | COMPLEXMATRIX LBRACK <expression> COMMA <expression>\n RBRACK\n\n<top_var_type> ::= INT [LABRACK <range> RABRACK]\n | REAL <type_constraint>\n | COMPLEX <type_constraint>\n | VECTOR <type_constraint> LBRACK <expression> RBRACK\n | ROWVECTOR <type_constraint> LBRACK <expression> RBRACK\n | MATRIX <type_constraint> LBRACK <expression> COMMA\n <expression> RBRACK\n | COMPLEXVECTOR <type_constraint> LBRACK <expression> RBRACK\n | COMPLEXROWVECTOR <type_constraint> LBRACK <expression>\n RBRACK\n | COMPLEXMATRIX <type_constraint> LBRACK <expression> COMMA\n <expression> RBRACK\n | ORDERED LBRACK <expression> RBRACK\n | POSITIVEORDERED LBRACK <expression> RBRACK\n | SIMPLEX LBRACK <expression> RBRACK\n | UNITVECTOR LBRACK <expression> RBRACK\n | SUMTOZEROVEC LBRACK <expression> RBRACK\n | CHOLESKYFACTORCORR LBRACK <expression> RBRACK\n | CHOLESKYFACTORCOV LBRACK <expression> [COMMA <expression>]\n RBRACK\n | CORRMATRIX LBRACK <expression> RBRACK\n | COVMATRIX LBRACK <expression> RBRACK\n | SUMTOZEROMATRIX LBRACK <expression> COMMA <expression> RBRACK\n | STOCHASTICCOLUMNMATRIX LBRACK <expression> COMMA\n <expression> RBRACK\n | STOCHASTICROWMATRIX LBRACK <expression> COMMA <expression>\n RBRACK\n\n<type_constraint> ::= [LABRACK <range> RABRACK]\n | LABRACK <offset_mult> RABRACK\n\n<range> ::= LOWER ASSIGN <constr_expression> COMMA UPPER ASSIGN\n <constr_expression>\n | UPPER ASSIGN <constr_expression> COMMA LOWER ASSIGN\n <constr_expression>\n | LOWER ASSIGN <constr_expression>\n | UPPER ASSIGN <constr_expression>\n\n<offset_mult> ::= OFFSET ASSIGN <constr_expression> COMMA MULTIPLIER ASSIGN\n <constr_expression>\n | MULTIPLIER ASSIGN <constr_expression> COMMA OFFSET ASSIGN\n <constr_expression>\n | OFFSET ASSIGN <constr_expression>\n | MULTIPLIER ASSIGN <constr_expression>\n\n<arr_dims> ::= ARRAY LBRACK <expression> (COMMA <expression>)* RBRACK\n\n\n\n\n<expression> ::= <expression> QMARK <expression> COLON <expression>\n | <expression> <infixOp> <expression>\n | <prefixOp> <expression>\n | <expression> <postfixOp>\n | <common_expression>\n\n<constr_expression> ::= <constr_expression> <arithmeticBinOp>\n <constr_expression>\n | <prefixOp> <constr_expression>\n | <constr_expression> <postfixOp>\n | <common_expression>\n\n<common_expression> ::= <identifier>\n | INTNUMERAL\n | REALNUMERAL\n | DOTNUMERAL\n | IMAGNUMERAL\n | LBRACE <expression> (COMMA <expression>)* RBRACE\n | LBRACK [<expression> (COMMA <expression>)*] RBRACK\n | <identifier> LPAREN [<expression> (COMMA\n <expression>)*] RPAREN\n | TARGET LPAREN RPAREN\n | <identifier> LPAREN <expression> BAR [<expression>\n (COMMA <expression>)*] RPAREN\n | LPAREN <expression> COMMA [<expression> (COMMA\n <expression>)*] RPAREN\n | <common_expression> DOTNUMERAL\n | <common_expression> LBRACK <indexes> RBRACK\n | LPAREN <expression> RPAREN\n\n<prefixOp> ::= BANG\n | MINUS\n | PLUS\n\n<postfixOp> ::= TRANSPOSE\n\n<infixOp> ::= <arithmeticBinOp>\n | <logicalBinOp>\n\n<arithmeticBinOp> ::= PLUS\n | MINUS\n | TIMES\n | DIVIDE\n | IDIVIDE\n | MODULO\n | LDIVIDE\n | ELTTIMES\n | ELTDIVIDE\n | HAT\n | ELTPOW\n\n<logicalBinOp> ::= OR\n | AND\n | EQUALS\n | NEQUALS\n | LABRACK\n | LEQ\n | RABRACK\n | GEQ\n\n<indexes> ::= epsilon\n | COLON\n | <expression>\n | <expression> COLON\n | COLON <expression>\n | <expression> COLON <expression>\n | <indexes> COMMA <indexes>\n\n<printables> ::= <expression>\n | <string_literal>\n | <printables> COMMA <printables>\n\n\n\n<statement> ::= <atomic_statement>\n | <nested_statement>\n\n<atomic_statement> ::= <common_expression> <assignment_op> <expression>\n SEMICOLON\n | <identifier> LPAREN [<expression> (COMMA\n <expression>)*] RPAREN SEMICOLON\n | <expression> TILDE <identifier> LPAREN [<expression>\n (COMMA <expression>)*] RPAREN [<truncation>] SEMICOLON\n | TARGET PLUSASSIGN <expression> SEMICOLON\n | JACOBIAN PLUSASSIGN <expression> SEMICOLON\n | BREAK SEMICOLON\n | CONTINUE SEMICOLON\n | PRINT LPAREN <printables> RPAREN SEMICOLON\n | REJECT LPAREN <printables> RPAREN SEMICOLON\n | FATAL_ERROR LPAREN <printables> RPAREN SEMICOLON\n | RETURN <expression> SEMICOLON\n | RETURN SEMICOLON\n | SEMICOLON\n\n<assignment_op> ::= ASSIGN\n | PLUSASSIGN\n | MINUSASSIGN\n | TIMESASSIGN\n | DIVIDEASSIGN\n | ELTTIMESASSIGN\n | ELTDIVIDEASSIGN\n\n<string_literal> ::= STRINGLITERAL\n\n<truncation> ::= TRUNCATE LBRACK [<expression>] COMMA [<expression>] RBRACK\n\n<nested_statement> ::= IF LPAREN <expression> RPAREN <vardecl_or_statement>\n ELSE <vardecl_or_statement>\n | IF LPAREN <expression> RPAREN <vardecl_or_statement>\n | WHILE LPAREN <expression> RPAREN\n <vardecl_or_statement>\n | FOR LPAREN <identifier> IN <expression> COLON\n <expression> RPAREN <vardecl_or_statement>\n | FOR LPAREN <identifier> IN <expression> RPAREN\n <vardecl_or_statement>\n | PROFILE LPAREN <string_literal> RPAREN LBRACE\n <vardecl_or_statement>* RBRACE\n | LBRACE <vardecl_or_statement>* RBRACE\n\n<vardecl_or_statement> ::= <statement>\n | <var_decl>\n\n<top_vardecl_or_statement> ::= <statement>\n | <top_var_decl>", + "crumbs": [ + "Reference Manual", + "Language", + "Language Syntax" + ] + }, + { + "objectID": "reference-manual/syntax.html#tokenizing-rules", + "href": "reference-manual/syntax.html#tokenizing-rules", + "title": "Language Syntax", + "section": "", + "text": "Many of the tokens used in the BNF grammars follow obviously from their names: DATABLOCK is the literal string ‘data’, COMMA is a single ‘,’ character, etc. The literal representation of each operator is additionally provided in the operator precedence table.\nA few tokens are not so obvious, and are defined here in regular expressions:\nIDENTIFIER = [a-zA-Z] [a-zA-Z0-9_]*\n\nSTRINGLITERAL = \".*\"\n\nINTNUMERAL = [0-9]+ (_ [0-9]+)*\n\nEXPLITERAL = [eE] [+-]? INTNUMERAL\n\nREALNUMERAL = INTNUMERAL \\. INTNUMERAL? EXPLITERAL?\n | \\. INTNUMERAL EXPLITERAL\n | INTNUMERAL EXPLITERAL\n\nIMAGNUMERAL = (REALNUMERAL | INTNUMERAL) i\n\nDOTNUMERAL = \\. INTNUMERAL", + "crumbs": [ + "Reference Manual", + "Language", + "Language Syntax" + ] + }, + { + "objectID": "reference-manual/syntax.html#extra-grammatical-constraints", + "href": "reference-manual/syntax.html#extra-grammatical-constraints", + "title": "Language Syntax", + "section": "", + "text": "A well-formed Stan program must satisfy the type constraints imposed by functions and distributions. For example, the binomial distribution requires an integer total count parameter and integer variate and when truncated would require integer truncation points. If these constraints are violated, the program will be rejected during compilation with an error message indicating the location of the problem.\n\n\n\nIn the Stan grammar provided in this chapter, the expression 1 + 2 * 3 has two parses. As described in the operator precedence table, Stan disambiguates between the meaning \\(1\n+ (2 \\times 3)\\) and the meaning \\((1 + 2) \\times 3\\) based on operator precedences and associativities.\n\n\n\nIn a compound variable declaration and definition, the type of the right-hand side expression must be assignable to the variable being declared. The assignability constraint restricts compound declarations and definitions to local variables and variables declared in the transformed data, transformed parameters, and generated quantities blocks.\n\n\n\nThe types of expressions used for elements in array expressions ('{' expressions '}') must all be of the same type or a mixture of scalar (int, real and complex) types (in which case the result is promoted to be of the highest type on the int -> real -> complex hierarchy).\n\n\n\nInteger literals longer than one digit may not start with 0 and real literals cannot consist of only a period or only an exponent.\n\n\n\nBoth the conditional if-then-else statement and while-loop statement require the expression denoting the condition to be a primitive type, integer or real.\n\n\n\nThe for loop statement requires that we specify in addition to the loop identifier, either a range consisting of two expressions denoting an integer, separated by ‘:’, or a single expression denoting a container. The loop variable will be of type integer in the former case and of the contained type in the latter case. Furthermore, the loop variable must not be in scope (i.e., there is no masking of variables).\n\n\n\nThe arguments to a print statement cannot be void.\n\n\n\nThe break and continue statements may only be used within the body of a for-loop or while-loop.\n\n\n\nSome constructs in the Stan language are only allowed in certain blocks or in certain kinds of user-defined functions.\n\n\nFunctions ending in _rng may only be called in the transformed data and generated quantities block, and within the bodies of user-defined functions with names ending in _rng.\n\n\n\nUnnormalized distributions (with suffixes _lupmf or _lupdf) may only be called in the model block, user-defined probability functions, or within the bodies of user defined functions which end in _lp.\n\n\n\ntarget += statements can only be used inside of the model block or user-defined functions which end in _lp.\nUser defined functions which end in _lp and the target() function can only be used in the model block, transformed parameters block, and in the bodies of other user defined functions which end in _lp.\nSampling statements (using ~) can only be used in the model block or in the bodies of user-defined functions which end in _lp.\njacobian += statements can only be used inside of the transformed parameters block or in functions that end with _jacobian.\n\n\n\n\nA probability function literal must have one of the following suffixes: _lpdf, _lpmf, _lcdf, or _lccdf.\n\n\n\nStandalone expressions used as indexes must denote either an integer (int) or an integer array (array[] int). Expressions participating in range indexes (e.g., a and b in a : b) must denote integers (int).\nA second condition is that there not be more indexes provided than dimensions of the underlying expression (in general) or variable (on the left side of assignments) being indexed. A vector or row vector adds 1 to the array dimension and a matrix adds 2. That is, the type array[ , , ] matrix, a three-dimensional array of matrices, has five index positions: three for the array, one for the row of the matrix and one for the column.", + "crumbs": [ + "Reference Manual", + "Language", + "Language Syntax" + ] + }, + { + "objectID": "reference-manual/types.html", + "href": "reference-manual/types.html", + "title": "Data Types and Declarations", + "section": "", + "text": "This chapter covers the data types for expressions in Stan. Every variable used in a Stan program must have a declared data type. Only values of that type will be assignable to the variable (except for temporary states of transformed data and transformed parameter values). This follows the convention of programming languages like C++, not the conventions of scripting languages like Python or statistical languages such as R or BUGS.\nThe motivation for strong, static typing is threefold.\n\nStrong typing forces the programmer’s intent to be declared with the variable, making programs easier to comprehend and hence easier to debug and maintain.\nStrong typing allows programming errors relative to the declared intent to be caught sooner (at compile time) rather than later (at run time). The Stan compiler (called through an interface such as CmdStan, RStan, or PyStan) will flag any type errors and indicate the offending expressions quickly when the program is compiled.\nConstrained types will catch runtime data, initialization, and intermediate value errors as soon as they occur rather than allowing them to propagate and potentially pollute final results.\n\nStrong typing disallows assigning the same variable to objects of different types at different points in the program or in different invocations of the program.\n\n\nArguments for built-in and user-defined functions and local variables are required to be basic data types, meaning an unconstrained scalar, vector, or matrix type, or an array of such.\nPassing arguments to functions in Stan works just like assignment to basic types. Stan functions are only specified for the basic data types of their arguments, including array dimensionality, but not for sizes or constraints. Of course, functions often check constraints as part of their behavior.\n\n\nStan provides two primitive data types, real for continuous values and int for integer values. These are both considered scalar types.\n\n\n\nStan provides a complex number data type complex, where a complex number contains both a real and an imaginary component, both of which are of type real. Complex types are considered scalar types.\n\n\n\nStan provides three real-valued matrix data types, vector for column vectors, row_vector for row vectors, and matrix for matrices.\nStan also provides three complex-valued matrix data types, complex_vector for column vectors, complex_row_vector for row vectors, and complex_matrix for matrices.\n\n\n\nAny type (including the constrained types discussed in the next section) can be made into an array type by declaring array arguments. For example,\narray[10] real x;\narray[6, 7] matrix[3, 3] m;\narray[12, 8, 15] complex z;\ndeclares x to be a one-dimensional array of size 10 containing real values, declares m to be a two-dimensional array of size \\(6 \\times 7\\) containing values that are \\(3 \\times 3\\) matrices, and declares z to be a \\(12 \\times 8 \\times 15\\) array of complex numbers.\nPrior to 2.26 Stan models used a different syntax which has since been removed. See the Removed Features chapter for more details.\n\n\n\nFor any sequence of types, Stan provides a tuple data type. For example,\ntuple(real, array[5] int) xi;\ndeclares xi to be a tuple holding two values, the first of which is of type type real and the second of which a 5-dimensional array of type int.\n\n\n\nDeclarations of variables other than local variables may be provided with constraints. These constraints are not part of the underlying data type for a variable, but determine error checking in the transformed data, transformed parameter, and generated quantities block, and the transform from unconstrained to constrained space in the parameters block.\nAll of the basic data types other than complex may be given lower and upper bounds using syntax such as\nint<lower=1> N;\nreal<upper=0> log_p;\nvector<lower=-1, upper=1>[3] rho;\nThere are also special data types for structured vectors and matrices. There are five constrained vector data types, simplex for unit simplexes, unit_vector for unit-length vectors, sum_to_zero_vector for vectors that sum to zero, ordered for ordered vectors of scalars, and positive_ordered for vectors of positive ordered scalars. There are specialized matrix data types corr_matrix and cov_matrix for correlation matrices (symmetric, positive definite, unit diagonal) and covariance matrices (symmetric, positive definite). The type cholesky_factor_cov is for Cholesky factors of covariance matrices (lower triangular, positive diagonal, product with own transpose is a covariance matrix). The type cholesky_factor_corr is for Cholesky factors of correlation matrices (lower triangular, positive diagonal, unit-length rows). The type sum_to_zero_matrix is for matrices that sum to zero across rows and columns.\nConstraints provide error checking for variables defined in the data, transformed data, transformed parameters, and generated quantities blocks. Constraints are critical for variables declared in the parameters block, where they determine the transformation from constrained variables (those satisfying the declared constraint) to unconstrained variables (those ranging over all of \\(\\mathbb{R}^n\\)).\nIt is worth calling out the most important aspect of constrained data types:\n\nThe model must have support (non-zero density, equivalently finite log density) at parameter values that satisfy the declared constraints.\n\nIf this condition is violated with parameter values that satisfy declared constraints but do not have finite log density, then the samplers and optimizers may have any of a number of pathologies including just getting stuck, failure to initialize, excessive Metropolis rejection, or biased draws due to inability to explore the tails of the distribution.\n\n\n\n\nUnfortunately, the lovely mathematical abstraction of integers and real numbers is only partially supported by finite-precision computer arithmetic.\n\n\nStan uses 32-bit (4-byte) integers for all of its integer representations. The maximum value that can be represented as an integer is \\(2^{31}-1\\); the minimum value is \\(-(2^{31})\\).\nWhen integers overflow, their value is determined by the underlying architecture. On most, their values wrap, but this cannot be guaranteed. Thus it is up to the Stan programmer to make sure the integer values in their programs stay in range. In particular, every intermediate expression must have an integer value that is in range.\nInteger arithmetic works in the expected way for addition, subtraction, and multiplication, but truncates the result of division (see the Stan Functions Reference integer-valued arithmetic operators section for more information).\n\n\n\nStan uses 64-bit (8-byte) floating point representations of real numbers. Stan roughly1 follows the IEEE 754 standard for floating-point computation. The range of a 64-bit number is roughly \\(\\pm 2^{1022}\\), which is slightly larger than \\(\\pm 10^{307}\\). It is a good idea to stay well away from such extreme values in Stan models as they are prone to cause overflow.\n64-bit floating point representations have roughly 15 decimal digits of accuracy. But when they are combined, the result often has less accuracy. In some cases, the difference in accuracy between two operands and their result is large.\nThere are three special real values used to represent (1) not-a-number value for error conditions, (2) positive infinity for overflow, and (3) negative infinity for overflow. The behavior of these special numbers follows standard IEEE 754 behavior.\n\n\nThe not-a-number value propagates. If an argument to a real-valued function is not-a-number, it either rejects (an exception in the underlying C++) or returns not-a-number itself. For boolean-valued comparison operators, if one of the arguments is not-a-number, the return value is always zero (i.e., false).\n\n\n\nPositive infinity is greater than all numbers other than itself and not-a-number; negative infinity is similarly smaller. Adding an infinite value to a finite value returns the infinite value. Dividing a finite number by an infinite value returns zero; dividing an infinite number by a finite number returns the infinite number of appropriate sign. Dividing a finite number by zero returns positive infinity. Dividing two infinite numbers produces a not-a-number value as does subtracting two infinite numbers. Some functions are sensitive to infinite values; for example, the exponential function returns zero if given negative infinity and positive infinity if given positive infinity. Often the gradients will break down when values are infinite, making these boundary conditions less useful than they may appear at first.\n\n\n\n\nStan automatically promotes integer values to real values if necessary, but does not automatically demote real values to integers. For very large integers, this will cause a rounding error to fewer significant digits in the floating point representation than in the integer representation.\nUnlike in C++, real values are never demoted to integers. Therefore, real values may only be assigned to real variables. Integer values may be assigned to either integer variables or real variables. Internally, the integer representation is cast to a floating-point representation. This operation is not without overhead and should thus be avoided where possible.\n\n\n\n\nThe complex data type is a scalar, but unlike real and int types, it contains two components, a real and imaginary component, both of which are of type real. That is, the real and imaginary components of a complex number are 64-bit, IEEE 754-complaint floating point numbers.\n\n\nImaginary literals are written in mathematical notation using a numeral followed by the suffix i. For example, the following example constructs a complex number \\(2 - 1.3i\\) and assigns it to the variable z.\ncomplex z = 2 - 1.3i;\nreal re = get_real(z); // re has value 2.0\nreal im = get_imag(z); // im has value -1.3\nThe getter functions then extract the real and imaginary components of z and assign them to re and im respectively.\nThe function to_complex constructs a complex number from its real and imaginary components. The functional form needs to be used whenever the components are not literal numerals, as in the following example.\nvector[K] re;\nvector[K] im;\n// ...\nfor (k in 1:K) {\n complex z = to_complex(re[k], im[k]);\n // ...\n}\n\n\n\nExpressions of type real may be assigned to variables of type complex. For example, the following is a valid sequence of Stan statements.\nreal x = 5.0;\ncomplex z = x; // get_real(z) == 5.0, get_imag(z) == 0\nThe real number assigned to a complex number determine’s the complex number’s real component, with the imaginary component set to zero.\nAssignability is transitive, so that expressions of type int may also be assigned to variables of type complex, as in the following example.\nint n = 2;\ncomplex z = n;\nFunction arguments also support promotion of integer or real typed expressions to type complex.\n\n\n\n\nAll variables used in a Stan program must have an explicitly declared data type. The form of a declaration includes the type and the name of a variable. This section covers scalar types, namely integer, real, and complex. The next section covers vector and matrix types, and the following section array types.\n\n\nUnconstrained integers are declared using the int keyword. For example, the variable N is declared to be an integer as follows.\nint N;\n\n\n\nInteger data types may be constrained to allow values only in a specified interval by providing a lower bound, an upper bound, or both. For instance, to declare N to be a positive integer, use the following.\nint<lower=1> N;\nThis illustrates that the bounds are inclusive for integers.\nTo declare an integer variable cond to take only binary values, that is zero or one, a lower and upper bound must be provided, as in the following example.\nint<lower=0, upper=1> cond;\n\n\n\nUnconstrained real variables are declared using the keyword real. The following example declares theta to be an unconstrained continuous value.\nreal theta;\n\n\n\nUnconstrained complex numbers are declared using the keyword complex. The following example declares z to be an unconstrained complex variable.\ncomplex z;\n\n\n\nReal variables may be bounded using the same syntax as integers. In theory (that is, with arbitrary-precision arithmetic), the bounds on real values would be exclusive. Unfortunately, finite-precision arithmetic rounding errors will often lead to values on the boundaries, so they are allowed in Stan.\nThe variable sigma may be declared to be non-negative as follows.\nreal<lower=0> sigma;\nThe following declares the variable x to be less than or equal to \\(-1\\).\nreal<upper=-1> x;\nTo ensure rho takes on values between \\(-1\\) and \\(1\\), use the following declaration.\nreal<lower=-1, upper=1> rho;\n\n\nLower bounds that are negative infinity or upper bounds that are positive infinity are ignored. Stan provides constants positive_infinity() and negative_infinity() which may be used for this purpose, or they may be supplied as data.\n\n\n\n\nReal variables may be declared on a space that has been transformed using an affine transformation \\(x\\mapsto \\mu + \\sigma * x\\) with offset \\(\\mu\\) and (positive) multiplier \\(\\sigma\\), using a syntax similar to that for bounds. While these transforms do not change the asymptotic sampling behaviour of the resulting Stan program (in a sense, the model the program implements), they can be useful for making the sampling process more efficient by transforming the geometry of the problem to a more natural multiplier and to a more natural offset for the sampling process, for instance by facilitating a non-centered parameterisation. While these affine transformation declarations do not impose a hard constraint on variables, they behave like the bounds constraints in many ways and could perhaps be viewed as acting as a sort of soft constraint.\nThe variable x may be declared to have offset \\(1\\) as follows.\nreal<offset=1> x;\nSimilarly, it can be declared to have multiplier \\(2\\) as follows.\nreal<multiplier=2> x;\nFinally, we can combine both declarations to declare a variable with offset \\(1\\) and multiplier \\(2\\).\nreal<offset=1, multiplier=2> x;\nAs an example, we can give x a normal distribution with non-centered parameterization as follows.\nparameters {\n real<offset=mu, multiplier=sigma> x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\nRecall that the centered parameterization is achieved with the code\nparameters {\n real x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\nor equivalently\nparameters {\n real<offset=0, multiplier=1> x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\n\n\n\nBounds (and offset and multiplier) for integer or real variables may be arbitrary expressions. The only requirement is that they only include variables that have been declared (though not necessarily defined) before the declaration. If the bounds themselves are parameters, the behind-the-scenes variable transform accounts for them in the log Jacobian.\nFor example, it is acceptable to have the following declarations.\ndata {\n real lb;\n}\nparameters {\n real<lower=lb> phi;\n}\nThis declares a real-valued parameter phi to take values greater than the value of the real-valued data variable lb. Constraints may be arbitrary expressions, but must be of type int for integer variables and of type real for real variables (including constraints on vectors, row vectors, and matrices). Variables used in constraints can be any variable that has been defined at the point the constraint is used. For instance,\ndata {\n int<lower=1> N;\n array[N] real y;\n}\nparameters {\n real<lower=min(y), upper=max(y)> phi;\n}\nThis declares a positive integer data variable N, an array y of real-valued data of length N, and then a parameter ranging between the minimum and maximum value of y. As shown in the example code, the functions min() and max() may be applied to containers such as arrays.\nA more subtle case involves declarations of parameters or transformed parameters based on parameters declared previously. For example, the following program will work as intended.\nparameters {\n real a;\n real<lower=a> b; // enforces a < b\n}\ntransformed parameters {\n real c;\n real<lower=c> d;\n c = a;\n d = b;\n}\nThe parameters instance works because all parameters are defined externally before the block is executed. The transformed parameters case works even though c isn’t defined at the point it is used, because constraints on transformed parameters are only validated at the end of the block. Data variables work like parameter variables, whereas transformed data and generated quantity variables work like transformed parameter variables.\n\n\n\nA variable may be declared with a size that depends on a boolean constant. For example, consider the definition of alpha in the following program fragment.\ndata {\n int<lower=0, upper=1> include_alpha;\n // ...\n}\nparameters {\n vector[include_alpha ? N : 0] alpha;\n // ...\n}\nIf include_alpha is true, the model will include the vector alpha; if the flag is false, the model will not include alpha (technically, it will include alpha of size 0, which means it won’t contain any values and won’t be included in any output).\nThis technique is not just useful for containers. If the value of N is set to 1, then the vector alpha will contain a single element and thus alpha[1] behaves like an optional scalar, the existence of which is controlled by include_alpha.\nThis coding pattern allows a single Stan program to define different models based on the data provided as input. This strategy is used extensively in the implementation of the RStanArm package.\n\n\n\n\nStan provides three types of container objects: arrays, vectors, and matrices. Vectors and matrices are more limited kinds of data structures than arrays. Vectors are intrinsically one-dimensional collections of real or complex values, whereas matrices are intrinsically two dimensional. Vectors, matrices, and arrays are not assignable to one another, even if their dimensions are identical. A \\(3 \\times 4\\) matrix is a different kind of object in Stan than a \\(3\n\\times 4\\) array.\nThe intention of using matrix types is to call out their usage in the code. There are three situations in Stan where only vectors and matrices may be used,\n\nmatrix arithmetic operations (e.g., matrix multiplication)\nlinear algebra functions (e.g., eigenvalues and determinants), and\nmultivariate function parameters and outcomes (e.g., multivariate normal distribution arguments).\n\nVectors and matrices cannot be typed to return integer values. They are restricted to real and complex values.\nFor constructing vectors and matrices in Stan, see Vector, Matrix, and Array Expressions.\n\n\nVectors and matrices, as well as arrays, are indexed starting from one (1) in Stan. This follows the convention in statistics and linear algebra as well as their implementations in the statistical software packages R, MATLAB, BUGS, and JAGS. General computer programming languages, on the other hand, such as C++ and Python, index arrays starting from zero.\n\n\n\nVectors in Stan are column vectors; see below for information on row vectors. Vectors are declared with a size (i.e., a dimensionality). For example, a 3-dimensional real vector is declared with the keyword vector, as follows.\nvector[3] u;\nVectors may also be declared with constraints, as in the following declaration of a 3-vector of non-negative values.\nvector<lower=0>[3] u;\nSimilarly, they may be declared with a offset and/or multiplier, as in the following example\nvector<offset=42, multiplier=3>[3] u;\n\n\n\nLike real vectors, complex vectors are column vectors and are declared with a size. For example, a 3-dimensional complex vector is declared with the keyword complex_vector, as follows.\ncomplex_vector[3] v;\nComplex vector declarations do not support any constraints.\n\n\n\nA unit simplex is a vector with non-negative values whose entries sum to 1. For instance, \\([0.2,0.3,0.4,0.1]^{\\top}\\) is a unit 4-simplex. Unit simplexes are most often used as parameters in categorical or multinomial distributions, and they are also the sampled variate in a Dirichlet distribution. Simplexes are declared with their full dimensionality. For instance, theta is declared to be a unit \\(5\\)-simplex by\nsimplex[5] theta;\nUnit simplexes are implemented as vectors and may be assigned to other vectors and vice-versa. Simplex variables, like other constrained variables, are validated to ensure they contain simplex values; for simplexes, this is only done up to a statically specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\nIn high dimensional problems, simplexes may require smaller step sizes in the inference algorithms in order to remain stable; this can be achieved through higher target acceptance rates for samplers and longer warmup periods, tighter tolerances for optimization with more iterations, and in either case, with less dispersed parameter initialization or custom initialization if there are informative priors for some parameters.\n\n\n\nA stochastic matrix is a matrix where each column or row is a unit simplex, meaning that each column (row) vector has non-negative values that sum to 1. The following example is a \\(3 \\times 4\\) column-stochastic matrix.\n\\[\n\\begin{bmatrix}\n0.2 & 0.5 & 0.1 & 0.3 \\\\\n0.3 & 0.3 & 0.6 & 0.4 \\\\\n0.5 & 0.2 & 0.3 & 0.3\n\\end{bmatrix}\n\\]\nAn example of a \\(3 \\times 4\\) row-stochastic matrix is the following.\n\\[\n\\begin{bmatrix}\n0.2 & 0.5 & 0.1 & 0.2 \\\\\n0.2 & 0.1 & 0.6 & 0.1 \\\\\n0.5 & 0.2 & 0.2 & 0.1\n\\end{bmatrix}\n\\]\nIn the examples above, each column (or row) sums to 1, making the matrices valid column_stochastic_matrix and row_stochastic_matrix types.\nColumn-stochastic matrices are often used in models where each column represents a probability distribution across a set of categories such as in multiple multinomial distributions, factor models, transition matrices in Markov models, or compositional data analysis. They can also be used in situations where you need multiple simplexes of the same dimensionality.\nThe column_stochastic_matrix and row_stochastic_matrix types are declared with row and column sizes. For instance, a matrix theta with 3 rows and 4 columns, where each column is a 3-simplex, is declared like a matrix with 3 rows and 4 columns.\ncolumn_stochastic_matrix[3, 4] theta;\nA matrix theta with 3 rows and 4 columns, where each row is a 4-simplex, is similarly declared as a matrix with 3 rows and 4 columns.\nrow_stochastic_matrix[3, 4] theta;\nAs with simplexes, column_stochastic_matrix and row_stochastic_matrix variables are subject to validation, ensuring that each column (row) satisfies the simplex constraints. This validation accounts for floating-point imprecision, with checks performed up to a statically specified accuracy threshold \\(\\epsilon\\).\n\n\nIn high-dimensional settings, column_stochastic_matrix and row_stochastic_matrix types may require careful tuning of the inference algorithms. To ensure stability:\n\nSmaller Step Sizes: In samplers like Hamiltonian Monte Carlo (HMC), smaller step sizes can help maintain stability, especially in high dimensions.\nHigher Target Acceptance Rates: Setting higher target acceptance rates can improve the robustness of the sampling process.\nLonger Warmup Periods: Increasing the warmup period allows the sampler to better explore the parameter space before the actual sampling begins.\nTighter Optimization Tolerances: For optimization-based inference, tighter tolerances with more iterations can yield more accurate results.\nCustom Initialization: If prior information about the parameters is available, custom initialization or less dispersed initialization can lead to more efficient inference.\n\n\n\n\n\nA unit vector is a vector with a norm of one. For instance, \\([0.5,\n0.5, 0.5, 0.5]^{\\top}\\) is a unit 4-vector. Unit vectors are sometimes used in directional statistics. Unit vectors are declared with their full dimensionality. For instance, theta is declared to be a unit \\(5\\)-vector by\nunit_vector[5] theta;\nUnit vectors are implemented as vectors and may be assigned to other vectors and vice-versa. Unit vector variables, like other constrained variables, are validated to ensure that they are indeed unit length; for unit vectors, this is only done up to a statically specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\n\n\n\nA zero-sum vector is constrained such that the sum of its elements is always \\(0\\). These are sometimes useful for resolving identifiability issues in regression models. While the underlying vector has only \\(N - 1\\) degrees of freedom, zero sum vectors are declared with their full dimensionality. For instance, beta is declared to be a zero-sum \\(5\\)-vector (4 DoF) by\nsum_to_zero_vector[5] beta;\nZero sum vectors are implemented as vectors and may be assigned to other vectors and vice-versa. Zero sum vector variables, like other constrained variables, are validated to ensure that they are indeed sum to zero; for zero sum vectors, this is only done up to a statically specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\n\n\n\nAn ordered vector type in Stan represents a vector whose entries are sorted in ascending order. For instance, \\((-1.3,2.7,2.71)^{\\top}\\) is an ordered 3-vector. Ordered vectors are most often employed as cut points in ordered logistic regression models (see section).\nThe variable c is declared as an ordered 5-vector by\nordered[5] c;\nAfter their declaration, ordered vectors, like unit simplexes, may be assigned to other vectors and other vectors may be assigned to them. Constraints will be checked after executing the block in which the variables were declared.\n\n\n\nThere is also a positive, ordered vector type which operates similarly to ordered vectors, but all entries are constrained to be positive. For instance, \\((2,3.7,4,12.9)\\) is a positive, ordered 4-vector.\nThe variable d is declared as a positive, ordered 5-vector by\npositive_ordered[5] d;\nLike ordered vectors, after their declaration, positive ordered vectors may be assigned to other vectors and other vectors may be assigned to them. Constraints will be checked after executing the block in which the variables were declared.\n\n\n\nRow vectors are declared with the keyword row_vector. Like (column) vectors, they are declared with a size. For example, a 1093-dimensional row vector u would be declared as\nrow_vector[1093] u;\nConstraints are declared as for vectors, as in the following example of a 10-vector with values between -1 and 1.\nrow_vector<lower=-1, upper=1>[10] u;\nOffset and multiplier are also similar as for the following 3-row-vector with offset -42 and multiplier 3.\nrow_vector<offset=-42, multiplier=3>[3] u;\nRow vectors may not be assigned to column vectors, nor may column vectors be assigned to row vectors. If assignments are required, they may be accommodated through the transposition operator.\n\n\n\nComplex row vectors are declared with the keyword complex_row_vector and given a size in basic declarations. For example, a 12-dimensional complex row vector v would be declared as\ncomplex_row_vector[12] v;\nComplex row vectors do not allow constraints.\n\n\n\nMatrices are declared with the keyword matrix along with a number of rows and number of columns. For example,\nmatrix[3, 3] A;\nmatrix[M, N] B;\ndeclares A to be a \\(3 \\times 3\\) matrix and B to be a \\(M\n\\times N\\) matrix. For the second declaration to be well formed, the variables M and N must be declared as integers in either the data or transformed data block and before the matrix declaration.\nMatrices may also be declared with constraints, as in this (\\(3 \\times 4\\)) matrix of non-positive values.\nmatrix<upper=0>[3, 4] B;\nSimilarly, matrices can be declared to have a set offset and/or multiplier, as in this matrix with multiplier 5.\nmatrix<multiplier=5>[3, 4] B;\n\n\nRows of a matrix can be assigned by indexing the left-hand side of an assignment statement. For example, this is possible.\nmatrix[M, N] a;\nrow_vector[N] b;\n// ...\na[1] = b;\nThis copies the values from row vector b to a[1], which is the first row of the matrix a. If the number of columns in a is not the same as the size of b, a run-time error is raised; the number of columns of a is N, which is also the number of columns of b.\nAssignment works by copying values in Stan. That means any subsequent assignment to a[1] does not affect b, nor does an assignment to b affect a.\n\n\n\n\nComplex matrices are declared with the keyword complex_matrix and a number of rows and columns. For example,\ncomplex_matrix[3, 3] C;\nComplex matrices do not allow constraints.\n\n\n\nMatrix variables may be constrained to represent covariance matrices. A matrix is a covariance matrix if it is symmetric and positive definite. Like correlation matrices, covariance matrices only need a single dimension in their declaration. For instance,\ncov_matrix[K] Sigma;\ndeclares Sigma to be a \\(K \\times K\\) covariance matrix, where \\(K\\) is the value of the data variable K.\n\n\n\nMatrix variables may be constrained to represent correlation matrices. A matrix is a correlation matrix if it is symmetric and positive definite, has entries between \\(-1\\) and \\(1\\), and has a unit diagonal. Because correlation matrices are square, only one dimension needs to be declared. For example,\ncorr_matrix[3] Omega;\ndeclares Omega to be a \\(3 \\times 3\\) correlation matrix.\nCorrelation matrices may be assigned to other matrices, including unconstrained matrices, if their dimensions match, and vice-versa.\n\n\n\nMatrix variables may be constrained to represent the Cholesky factors of a covariance matrix. This is often more convenient or more efficient than representing covariance matrices directly.\nA Cholesky factor \\(L\\) is an \\(M \\times N\\) lower-triangular matrix (if \\(m < n\\) then \\(L[m, n] =0\\)) with a strictly positive diagonal (\\(L[k, k]\n> 0\\)) and \\(M \\geq N\\). If \\(L\\) is a Cholesky factor, then \\(\\Sigma = L\n\\, L^{\\top}\\) is a covariance matrix (i.e., it is positive definite). The mapping between positive definite matrices and their Cholesky factors is bijective—every covariance matrix has a unique Cholesky factorization.\nThe typical case of a square Cholesky factor may be declared with a single dimension,\ncholesky_factor_cov[4] L;\n\n\nIn general, two dimensions may be declared, with the above being equal to cholesky_factor_cov[4, 4]. The type cholesky_factor_cov[M, N] may be used for the general \\(M \\times N\\) case to produce positive semi-definite matrices of rank \\(M\\).\n\n\n\n\nMatrix variables may be constrained to represent the Cholesky factors of a correlation matrix.\nA Cholesky factor for a correlation matrix \\(L\\) is a \\(K \\times K\\) lower-triangular matrix with positive diagonal entries and rows that are of length 1 (i.e., \\(\\sum_{n=1}^K L_{m,n}^2 = 1\\)). If \\(L\\) is a Cholesky factor for a correlation matrix, then \\(L\\,L^{\\top}\\) is a correlation matrix (i.e., symmetric positive definite with a unit diagonal).\nTo declare the variable L to be a K by K Cholesky factor of a correlation matrix, the following code may be used.\ncholesky_factor_corr[K] L;\n\n\n\nA sum-to-zero matrix is constrained such that the sum of rows and the sum of the columns is always \\(0\\). These are sometimes useful for resolving identifiability issues in regression models. While the underlying vector has only \\((N - 1) \\times (M - 1)\\) degrees of freedom, zero sum matrices are declared with their full dimensionality. For instance, beta is declared to be a sum-to-zero \\(5 \\times 4\\)-matrix (20 degrees of freedom) by\nsum_to_zero_matrix[5, 4] beta;\nSum-to-zero matrices are implemented as matrices and may be assigned to other matrices and vice-versa. Zero sum matrix variables, like other constrained variables, are validated to ensure that they are indeed sum to zero; for zero sum matrices, this is only done up to an internally specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\n\n\n\nConstrained variables of all types may be assigned to other variables of the same unconstrained type and vice-versa. Matching is interpreted strictly as having the same basic type and number of array dimensions. Constraints are not considered, but basic data types are. For instance, a variable declared to be real<lower=0, upper=1> could be assigned to a variable declared as real and vice-versa. Similarly, a variable declared as matrix[3, 3] may be assigned to a variable declared as cov_matrix[3] or cholesky_factor_cov[3], and vice-versa.\nChecks are carried out at the end of each relevant block of statements to ensure constraints are enforced. This includes run-time size checks. The Stan compiler isn’t able to catch the fact that an attempt may be made to assign a matrix of one dimensionality to a matrix of mismatching dimensionality.\n\n\n\nReal-valued vectors, row vectors and matrices may be assigned to complex-valued vectors, row vectors and matrices, respectively. For example, the following is legal.\nvector[N] v = ...;\ncomplex_vector[N] u = 2 * v;\nRow vectors and matrices work the same way.\n\n\n\nVariables may be declared with sizes given by expressions. Such expressions are constrained to only contain data or transformed data variables. This ensures that all sizes are determined once the data is read in and transformed data variables defined by their statements. For example, the following is legal.\ndata {\n int<lower=0> N_observed, N_missing;\n // ...\ntransformed parameters {\n vector[N_observed + N_missing] y;\n // ...\n\n\n\nIf v is a column vector or row vector, then v[2] is the second element in the vector. If m is a matrix, then m[2, 3] is the value in the second row and third column.\nProviding a matrix with a single index returns the specified row. For instance, if m is a matrix, then m[2] is the second row. This allows Stan blocks such as\nmatrix[M, N] m;\nrow_vector[N] v;\nreal x;\n// ...\nv = m[2];\nx = v[3]; // x == m[2][3] == m[2, 3]\nThe type of m[2] is row_vector because it is the second row of m. Thus it is possible to write m[2][3] instead of m[2, 3] to access the third element in the second row. When given a choice, the form m[2, 3] is preferred.\nComplex versions work the same way,\ncomplex_matrix[M, N] m = ...;\ncomplex_row_vector[N] u = m[3];\ncomplex_vector[M] v = m[ , 2];\n\n\nThe form m[2, 3] is more efficient because it does not require the creation and use of an intermediate expression template for m[2]. In later versions, explicit calls to m[2][3] may be optimized to be as efficient as m[2, 3] by the Stan compiler.\n\n\n\n\nAn integer expression is used to pick out the sizes of vectors, matrices, and arrays. For instance, we can declare a vector of size M + N using\nvector[M + N] y;\nAny integer-denoting expression may be used for the size declaration, providing all variables involved are either data, transformed data, or local variables. That is, expressions used for size declarations may not include parameters or transformed parameters or generated quantities.\n\n\n\n\nStan supports arrays of arbitrary dimension. The values in an array can be any type, so that arrays may contain values that are simple reals or integers, vectors, matrices, or other arrays. Arrays are the only way to store sequences of integers, and some functions in Stan, such as discrete distributions, require integer arguments.\nA two-dimensional array is just an array of arrays, both conceptually and in terms of current implementation. When an index is supplied to an array, it returns the value at that index. When more than one index is supplied, this indexing operation is chained. For example, if a is a two-dimensional array, then a[m, n] is just a convenient shorthand for a[m][n].\nVectors, matrices, and arrays are not assignable to one another, even if their dimensions are identical.\nFor constructing arrays in Stan, see Vector, Matrix, and Array Expressions.\n\n\nArrays are declared with the keyword array followed by the dimensions enclosed in square brackets, the element type, and the name of the variable.\nThe variable n is declared as an array of five integers as follows.\narray[5] int n;\nA two-dimensional array of complex values with three rows and four columns is declared as follows.\narray[3, 4] complex a;\nA three-dimensional array z of positive reals with five rows, four columns, and two shelves can be declared as follows.\narray[5, 4, 2] real<lower=0> z;\nArrays may also be declared to contain vectors. For example,\narray[3] vector[7] mu;\ndeclares mu to be an array of size 3 containing vectors with 7 elements. Arrays may also contain matrices. The example\narray[15, 12] complex_matrix[7, 2] mu;\ndeclares a 15 by 12 array of \\(7 \\times 2\\) complex matrices. Any of the constrained types may also be used in arrays, as in the declaration\narray[2, 3, 4] cholesky_factor_cov[5, 6] mu;\nof a \\(2 \\times 3 \\times 4\\) array of \\(5 \\times 6\\) Cholesky factors of covariance matrices.\n\n\n\nIf x is a 1-dimensional array of length 5, then x[1] is the first element in the array and x[5] is the last. For a \\(3\n\\times 4\\) array y of two dimensions, y[1, 1] is the first element and y[3, 4] the last element. For a three-dimensional array z, the first element is z[1, 1, 1], and so on.\nSubarrays of arrays may be accessed by providing fewer than the full number of indexes. For example, suppose y is a two-dimensional array with three rows and four columns. Then y[3] is one-dimensional array of length four. This means that y[3][1] may be used instead of y[3, 1] to access the value of the first column of the third row of y. The form y[3, 1] is the preferred form (see note in this chapter).\n\n\n\nSubarrays may be manipulated and assigned just like any other variables. Similar to the behavior of matrices, Stan allows blocks such as\narray[9, 10, 11] real w;\narray[10, 11] real x;\narray[11] real y;\nreal z;\n// ...\nx = w[5];\ny = x[4]; // y == w[5][4] == w[5, 4]\nz = y[3]; // z == w[5][4][3] == w[5, 4, 3]\nComplex-valued arrays work the same way.\n\n\n\nArrays of vectors and matrices are accessed in the same way as arrays of doubles. Consider the following vector and scalar declarations.\narray[3, 4] vector[5] a;\narray[4] vector[5] b;\nvector[5] c;\nreal x;\nWith these declarations, the following assignments are legal.\nb = a[1]; // result is array of vectors\nc = a[1, 3]; // result is vector\nc = b[3]; // same result as above\nx = a[1, 3, 5]; // result is scalar\nx = b[3, 5]; // same result as above\nx = c[5]; // same result as above\nRow vectors and other derived vector types (simplex and ordered) behave the same way in terms of indexing.\nConsider the following matrix, vector and scalar declarations.\narray[3, 4] matrix[6, 5] d;\narray[4] matrix[6, 5] e;\nmatrix[6, 5] f;\nrow_vector[5] g;\nreal x;\nWith these declarations, the following definitions are legal.\ne = d[1]; // result is array of matrices\nf = d[1, 3]; // result is matrix\nf = e[3]; // same result as above\ng = d[1, 3, 2]; // result is row vector\ng = e[3, 2]; // same result as above\ng = f[2]; // same result as above\nx = d[1, 3, 5, 2]; // result is scalar\nx = e[3, 5, 2]; // same result as above\nx = f[5, 2]; // same result as above\nx = g[2]; // same result as above\nAs shown, the result f[2] of supplying a single index to a matrix is the indexed row, here row 2 of matrix f.\n\n\n\nSubarrays of arrays may be assigned by indexing on the left-hand side of an assignment statement. For example, the following is legal.\narray[I, J, K] real x;\narray[J, K] real y;\narray[K] real z;\n// ...\nx[1] = y;\nx[1, 1] = z;\nThe sizes must match. Here, x[1] is a J by K array, as is y.\nPartial array assignment also works for arrays of matrices, vectors, and row vectors.\n\n\n\nArrays, row vectors, column vectors and matrices are not interchangeable in Stan. Thus a variable of any one of these fundamental types is not assignable to any of the others, nor may it be used as an argument where the other is required (use as arguments follows the assignment rules).\n\n\nFor example, vectors cannot be assigned to arrays or vice-versa.\narray[4] real a;\nvector[4] b;\nrow_vector[4] c;\n// ...\na = b; // illegal assignment of vector to array\nb = a; // illegal assignment of array to vector\na = c; // illegal assignment of row vector to array\nc = a; // illegal assignment of array to row vector\n\n\n\nIt is not even legal to assign row vectors to column vectors or vice versa.\nvector[4] b;\nrow_vector[4] c;\n// ...\nb = c; // illegal assignment of row vector to column vector\nc = b; // illegal assignment of column vector to row vector\n\n\n\nThe same holds for matrices, where 2-dimensional arrays may not be assigned to matrices or vice-versa.\narray[3, 4] real a;\nmatrix[3, 4] b;\n// ...\na = b; // illegal assignment of matrix to array\nb = a; // illegal assignment of array to matrix\n\n\n\nA \\(1 \\times N\\) matrix cannot be assigned a row vector or vice versa.\nmatrix[1, 4] a;\nrow_vector[4] b;\n// ...\na = b; // illegal assignment of row vector to matrix\nb = a; // illegal assignment of matrix to row vector\nSimilarly, an \\(M \\times 1\\) matrix may not be assigned to a column vector.\nmatrix[4, 1] a;\nvector[4] b;\n// ...\na = b; // illegal assignment of column vector to matrix\nb = a; // illegal assignment of matrix to column vector\n\n\n\n\nAn integer expression is used to pick out the sizes of arrays. The same restrictions as for vector and matrix sizes apply, namely that the size is declared with an integer-denoting expression that does not contain any parameters, transformed parameters, or generated quantities.\n\n\n\nIf any of an array’s dimensions is size zero, the entire array will be of size zero. That is, if we declare\narray[3, 0] real a;\nthen the resulting size of a is zero and querying any of its dimensions at run time will result in the value zero. Declared as above, a[1] will be a size-zero one-dimensional array. For comparison, declaring\narray[0, 3] real b;\nalso produces an array with an overall size of zero, but in this case, there is no way to index legally into b, because b[0] is undefined. The array will behave at run time as if it’s a \\(0 \\times\n0\\) array. For example, the result of to_matrix(b) will be a \\(0 \\times 0\\) matrix, not a \\(0 \\times 3\\) matrix.\n\n\n\n\nStan supports tuples of arbitrary size. The values in a tuple can be of arbitrary type, but the component types must be declared along with the declaration of the tuple. Tuples can be manipulated as a whole, or their elements may be accessed and set individually.\n\n\nTuples are declared with the keyword tuple followed by a parenthesized sequence of types, which determine the types of the respective tuple entries. For example, a tuple with three elements may be declared as\ntuple(int, vector[3], complex) abc;\nTuples must have at least one entry, so the following declaration is illegal.\ntuple() nil; // ILLEGAL\nTuples of length one must use a trailing comma, to align with the expression syntax.\ntuple(int,) m; // CORRECT\ntuple(int) n; // ILLEGAL\nTuples can be assigned as a whole if their elements can be assigned individually. For example, a can be assigned to b in the following example because int can be promoted to complex.\ntuple(int, real) a;\n...\ntuple(complex, real) b = a;\nTuple types may have elements which are declared as tuples, such as the following example.\ntuple(int, tuple(real, complex)) x;\nIn this case, it would probably be simpler to use a 3-tuple type, tuple(int, real, complex).\nTuples can be declared with constraints anywhere that ordinary variables can (i.e., as top-level block variables). That means any context in which it is legal to have a declaration\nreal<lower=0> sigma;\nreal<lower=0, upper=1> theta;\nit is legal to have a tuple with constraints such as\ntuple(real<lower=0>, real<lower=0, upper=1>) sigma_theta;\n\n\n\nTuple elements may be accessed directly. For example, with our declaration of abc from the last section, Stan uses abc.1 for the first element, abc.2 for the second, and abc.3 for the third. These numbers must be integer literals (i.e., they cannot be variables), and must be within the size of the number of elements of tuples. The types of elements are as declared, so that abc.1 is of type int, abc.2 of type vector[3] and abc.3 of type complex.\n\n\n\nTuple elements can be assigned individually, allowing, e.g.,\ntuple(int, real) ab;\nab.1 = 123;\nab.2 = 12.9;\nAs with other assignments, promotions will happen if necessary (of int to real and of real to complex, along with the corresponding container type promotions).\n\n\n\nFor convenience of using values stored in tuples, Stan supports “unpacking” (or “destructuring”) of tuples in an assignment statement.\nGiven a tuple t of type tuple(T1, ..., Tn) and a sequence of assignable expressions of types v1, …, vn, where each vi has a type which is assignable from type Ti, individual elements of the tuple may be assigned to the corresponding variables in the sequence by the statement\n(v1, /*...*/, vn) = t;\nNote that the above parenthesis are required, unlike in some other languages with similar features (e.g., Python).\nThese unpacking assignments can be nested if the tuple on the right hand side contains nested tuples.\nFor example, if T is a tuple of type tuple(int, (real, real), complex), then the program\nint i;\nreal x, y;\ncomplex z;\n\n(i, (x, y), z) = T;\nAssigns the result of T.1 to i, the result of T.2.1 to x, the result of T.2.2 to y, and the result of T.3 to z.\nThe left hand side must match in size the tuple on the right. Additionally, the same variable may not appear more than once in the left hand side of an unpacking assignment.\n\n\n\n\nThe type information associated with a variable only contains the underlying type and dimensionality of the variable.\n\n\nThe size associated with a given variable is not part of its data type. For example, declaring a variable using\narray[3] real a;\ndeclares the variable a to be an array. The fact that it was declared to have size 3 is part of its declaration, but not part of its underlying type.\n\n\nSizes are determined dynamically (at run time) and thus cannot be type-checked statically when the program is compiled. As a result, any conformance error on size will raise a run-time error. For example, trying to assign an array of size 5 to an array of size 6 will cause a run-time error. Similarly, multiplying an \\(N \\times M\\) by a \\(J \\times K\\) matrix will raise a run-time error if \\(M \\neq J\\).\n\n\n\n\nLike sizes, constraints are not treated as part of a variable’s type in Stan when it comes to the compile-time check of operations it may participate in. Anywhere Stan accepts a matrix as an argument, it will syntactically accept a correlation matrix or covariance matrix or Cholesky factor. Thus a covariance matrix may be assigned to a matrix and vice-versa.\nSimilarly, a bounded real may be assigned to an unconstrained real and vice-versa.\n\n\nFor arguments to functions, constraints are sometimes, but not always checked when the function is called. Exclusions include C++ standard library functions. All probability functions and cumulative distribution functions check that their arguments are appropriate at run time as the function is called.\n\n\n\nFor data variables, constraints are checked after the variable is read from a data file or other source. For transformed data variables, the check is done after the statements in the transformed data block have executed. Thus it is legal for intermediate values of variables to not satisfy declared constraints.\nFor parameters, constraints are enforced by the transform applied and do not need to be checked. For transformed parameters, the check is done after the statements in the transformed parameter block have executed.\nFor all blocks defining variables (transformed data, transformed parameters, generated quantities), real values are initialized to NaN and integer values are initialized to the smallest legal integer (i.e., a large absolute value negative number).\nFor generated quantities, constraints are enforced after the statements in the generated quantities block have executed.\n\n\n\n\nIn order to refer to data types, it is convenient to have a way to refer to them. The type naming notation outlined in this section is not part of the Stan programming language, but rather a convention adopted in this document to enable a concise description of a type.\nBecause size information is not part of a data type, data types will be written without size information. For instance, array[] real is the type of one-dimensional array of reals and matrix is the type of matrices. The three-dimensional integer array type is written as array[,,] int, indicating the number slots available for indexing. Similarly, array[,] vector is the type of a two-dimensional array of vectors.\n\n\n\n\nVariables in Stan are declared by giving a type and a name. For example\nint N;\nvector[N] y;\narray[5] matrix[3, 4] A;\ndeclares a variable N that is an integer, a variable y that is a vector of length N (the previously declared variable), and a variable A, which is a length-5 array where each element is a 3 by 4 matrix.\nThe size of top-level variables in the parameters, transformed parameters, and generated quantities must remain constant across all iterations, therefore only data variables can be used in top-level size declarations.\n// illegal and will be flagged by the compiler:\ngenerated quantities {\n int N = 10;\n array[N] int foo;\nDepending on where the variable is declared in the Stan program, it either must or cannot have size information, and constraints are either optional or not allowed.\n// valid block variables, but not locals or function parameters\nvector<lower=0>[N] u;\n\n// valid as a block or local variable, but not a function parameter\narray[3] int is;\n\n// function parameters exclude sizes and cannot be constrained\nvoid pretty_print_tri_lower(matrix x) { ... }\nTop-level variables can have constraints and must include sizes for their types, as in the above examples. Local variables, like those defined inside loops or local blocks cannot be constrained, but still include sizes. Finally, variables declared as function parameters are not constrained types and exclude sizes.\nIn the following table, the leftmost column is a list of the unconstrained and undimensioned basic types; these are used as function return types and argument types. The middle column is of unconstrained types with dimensions; these are used as local variable types. The variables M and N indicate number of columns and rows, respectively. The variable K is used for square matrices, i.e., K denotes both the number of rows and columns. The rightmost column lists the corresponding constrained types. An expression of any right-hand column type may be assigned to its corresponding left-hand column basic type. At runtime, dimensions are checked for consistency for all variables; containers of any sizes may be assigned to function arguments. The constrained matrix types cov_matrix[K], corr_matrix[K], cholesky_factor_cov[K], and cholesky_factor_corr[K] are only assignable to matrices of dimensions matrix[K, K] types.\n\n\n\n\n\n\n\n\nFunction Argument (unsized)\n Local\n(unconstrained)\n Block\n (constrained)\n\n\n\n\nint\nint\nint\n\n\n\n\nint<lower=L>\n\n\n\n\nint<upper=U>\n\n\n\n\nint<lower=L, upper=U>\n\n\n\n\nint<offset=O>\n\n\n\n\nint<multiplier=M>\n\n\n\n\nint<offset=O, multiplier=M>\n\n\nreal\nreal\nreal\n\n\n\n\nreal<lower=L>\n\n\n\n\nreal<upper=U>\n\n\n\n\nreal<lower=L, upper=U>\n\n\n\n\nreal<offset=O>\n\n\n\n\nreal<multiplier=M>\n\n\n\n\nreal<offset=O, multiplier=M>\n\n\ncomplex\ncomplex\ncomplex\n\n\nvector\nvector[N]\nvector[N]\n\n\n\n\nvector[N]<lower=L>\n\n\n\n\nvector[N]<upper=U>\n\n\n\n\nvector[N]<lower=L, upper=U>\n\n\n\n\nvector[N]<offset=O>\n\n\n\n\nvector[N]<multiplier=M>\n\n\n\n\nvector[N]<offset=O, multiplier=M>\n\n\n\n\nordered[N]\n\n\n\n\npositive_ordered[N]\n\n\n\n\nsimplex[N]\n\n\n\n\nunit_vector[N]\n\n\n\n\nsum_to_zero_vector[N]\n\n\nrow_vector\nrow_vector[N]\nrow_vector[N]\n\n\n\n\nrow_vector[N]<lower=L>\n\n\n\n\nrow_vector[N]<upper=U>\n\n\n\n\nrow_vector[N]<lower=L, upper=U>\n\n\n\n\nrow_vector[N]<offset=O>\n\n\n\n\nrow_vector[N]<multiplier=M>\n\n\n\n\nrow_vector[N]<offset=O, multiplier=M>\n\n\nmatrix\nmatrix[M, N]\nmatrix[M, N]\n\n\n\n\nmatrix[M, N]<lower=L>\n\n\n\n\nmatrix[M, N]<upper=U>\n\n\n\n\nmatrix[M, N]<lower=L, upper=U> |\n\n\n\n\nmatrix[M, N]<offset=O>\n\n\n\n\nmatrix[M, N]<multiplier=M>\n\n\n\n\nmatrix[M, N]<offset=O, multiplier=M>\n\n\n\n\ncolumn_stochastic_matrix[M, N]\n\n\n\n\nrow_stochastic_matrix[M, N]\n\n\n\n\nsum_to_zero_matrix[M, N]\n\n\n\nmatrix[K, K]\ncorr_matrix[K]\n\n\n\nmatrix[K, K]\ncov_matrix[K]\n\n\n\nmatrix[K, K]\ncholesky_factor_corr[K]\n\n\n\nmatrix[K, K]\ncholesky_factor_cov[K]\n\n\ncomplex_vector\ncomplex_vector[M]\ncomplex_vector[M]\n\n\ncomplex_row_vector\ncomplex_row_vector[N]\ncomplex_row_vector[N]\n\n\ncomplex_matrix\ncomplex_matrix[M, N]\ncomplex_matrix[M,N]\n\n\narray[] vector\narray[M] vector[N]\narray[M] vector[N]\n\n\n\n\narray[M] vector[N]<lower=L>\n\n\n\n\narray[M] vector[N]<upper=U>\n\n\n\n\narray[M] vector[N]<lower=L, upper=U>\n\n\n\n\narray[M] vector[N]<offset=O>\n\n\n\n\narray[M] vector[N]<multiplier=M>\n\n\n\n\narray[M] vector[N]<offset=O, multiplier=M>\n\n\n\n\narray[M] ordered[N]\n\n\n\n\narray[M] positive_ordered[N]\n\n\n\n\narray[M] simplex[N]\n\n\n\n\narray[M] unit_vector[N]\n\n\n\n\narray[M] sum_to_zero_vector[N]\n\n\n\n\nAdditional array types follow the same basic template as the final example in the table and can contain any of the previous types. The unsized version of arrays with more than one dimension is specified by using commas, e.g. array[ , ] is a 2-D array.\nFor more on how function arguments and return types are declared, consult the User’s Guide chapter on functions.\n\n\n\nStan allows assignable variables to be declared and defined in a single statement. Assignable variables are\n\nlocal variables, and\nvariables declared in the transformed data, transformed parameters, or generated quantities blocks.\n\nFor example, the statement\nint N = 5;\ndeclares the variable N to be an integer scalar type and at the same time defines it to be the value of the expression 5.\n\n\nThe type of the expression on the right-hand side of the assignment must be assignable to the type of the variable being declared. For example, it is legal to have\nreal sum = 0;\neven though 0 is of type int and sum is of type real, because integer-typed scalar expressions can be assigned to real-valued scalar variables. In all other cases, the type of the expression on the right-hand side of the assignment must be identical to the type of the variable being declared.\nVariables of any type may have values assigned to them. For example,\nmatrix[3, 2] a = b;\ndeclares a \\(3 \\times 2\\) matrix variable a and assigns a copy of the value of b to the variable a. The variable b must be of type matrix for the statement to be well formed. For the code to execute successfully, b must be the same shape as a, but this cannot be validated until run time. Because a copy is assigned, subsequent changes to a do not affect b and subsequent changes to b do not affect a.\n\n\n\nThe right-hand side may be any expression which has a type which is assignable to the variable being declared. For example,\nmatrix[3, 2] a = 0.5 * (b + c);\nassigns the matrix variable a to half of the sum of b and c. The only requirement on b and c is that the expression b + c be of type matrix. For example, b could be of type matrix and c of type real, because adding a matrix to a scalar produces a matrix, and the multiplying by a scalar produces another matrix.\nSimilarly,\ncomplex z = 2 + 3i;\nassigns the the complex number \\(2 + 3i\\) to the complex scalar z. The right-hand side expression can be a call to a user defined function, allowing general algorithms to be applied that might not be otherwise expressible as simple expressions (e.g., iterative or recursive algorithms).\n\n\n\nAny variable that is in scope and any function that is available in the block in which the compound declaration and definition appears may be used in the expression on the right-hand side of the compound declaration and definition statement.\n\n\n\n\nStan will interpret multiple comma-separated variable names following a single type as declaring multiple new variables. This is available for all variable declarations in all blocks.\n\n\nThe code:\nreal x, y;\nis equivalent to\nreal x;\nreal y;\nAs a result, all declarations on the same line must be of the same type.\n\n\n\nThe ability to declare multiple variables can be combined with assignments whenever a declare-define is valid, as documented in the section introducing compound declarations and definitions :\nreal x = 3, y = 5.6;\nConstrained data types can also be declared together, so long as the constraint for each variable is the same:\nreal<lower=0> x, y;", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#overview-of-data-types", + "href": "reference-manual/types.html#overview-of-data-types", + "title": "Data Types and Declarations", + "section": "", + "text": "Arguments for built-in and user-defined functions and local variables are required to be basic data types, meaning an unconstrained scalar, vector, or matrix type, or an array of such.\nPassing arguments to functions in Stan works just like assignment to basic types. Stan functions are only specified for the basic data types of their arguments, including array dimensionality, but not for sizes or constraints. Of course, functions often check constraints as part of their behavior.\n\n\nStan provides two primitive data types, real for continuous values and int for integer values. These are both considered scalar types.\n\n\n\nStan provides a complex number data type complex, where a complex number contains both a real and an imaginary component, both of which are of type real. Complex types are considered scalar types.\n\n\n\nStan provides three real-valued matrix data types, vector for column vectors, row_vector for row vectors, and matrix for matrices.\nStan also provides three complex-valued matrix data types, complex_vector for column vectors, complex_row_vector for row vectors, and complex_matrix for matrices.\n\n\n\nAny type (including the constrained types discussed in the next section) can be made into an array type by declaring array arguments. For example,\narray[10] real x;\narray[6, 7] matrix[3, 3] m;\narray[12, 8, 15] complex z;\ndeclares x to be a one-dimensional array of size 10 containing real values, declares m to be a two-dimensional array of size \\(6 \\times 7\\) containing values that are \\(3 \\times 3\\) matrices, and declares z to be a \\(12 \\times 8 \\times 15\\) array of complex numbers.\nPrior to 2.26 Stan models used a different syntax which has since been removed. See the Removed Features chapter for more details.\n\n\n\nFor any sequence of types, Stan provides a tuple data type. For example,\ntuple(real, array[5] int) xi;\ndeclares xi to be a tuple holding two values, the first of which is of type type real and the second of which a 5-dimensional array of type int.\n\n\n\nDeclarations of variables other than local variables may be provided with constraints. These constraints are not part of the underlying data type for a variable, but determine error checking in the transformed data, transformed parameter, and generated quantities block, and the transform from unconstrained to constrained space in the parameters block.\nAll of the basic data types other than complex may be given lower and upper bounds using syntax such as\nint<lower=1> N;\nreal<upper=0> log_p;\nvector<lower=-1, upper=1>[3] rho;\nThere are also special data types for structured vectors and matrices. There are five constrained vector data types, simplex for unit simplexes, unit_vector for unit-length vectors, sum_to_zero_vector for vectors that sum to zero, ordered for ordered vectors of scalars, and positive_ordered for vectors of positive ordered scalars. There are specialized matrix data types corr_matrix and cov_matrix for correlation matrices (symmetric, positive definite, unit diagonal) and covariance matrices (symmetric, positive definite). The type cholesky_factor_cov is for Cholesky factors of covariance matrices (lower triangular, positive diagonal, product with own transpose is a covariance matrix). The type cholesky_factor_corr is for Cholesky factors of correlation matrices (lower triangular, positive diagonal, unit-length rows). The type sum_to_zero_matrix is for matrices that sum to zero across rows and columns.\nConstraints provide error checking for variables defined in the data, transformed data, transformed parameters, and generated quantities blocks. Constraints are critical for variables declared in the parameters block, where they determine the transformation from constrained variables (those satisfying the declared constraint) to unconstrained variables (those ranging over all of \\(\\mathbb{R}^n\\)).\nIt is worth calling out the most important aspect of constrained data types:\n\nThe model must have support (non-zero density, equivalently finite log density) at parameter values that satisfy the declared constraints.\n\nIf this condition is violated with parameter values that satisfy declared constraints but do not have finite log density, then the samplers and optimizers may have any of a number of pathologies including just getting stuck, failure to initialize, excessive Metropolis rejection, or biased draws due to inability to explore the tails of the distribution.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#numerical-data-types", + "href": "reference-manual/types.html#numerical-data-types", + "title": "Data Types and Declarations", + "section": "", + "text": "Unfortunately, the lovely mathematical abstraction of integers and real numbers is only partially supported by finite-precision computer arithmetic.\n\n\nStan uses 32-bit (4-byte) integers for all of its integer representations. The maximum value that can be represented as an integer is \\(2^{31}-1\\); the minimum value is \\(-(2^{31})\\).\nWhen integers overflow, their value is determined by the underlying architecture. On most, their values wrap, but this cannot be guaranteed. Thus it is up to the Stan programmer to make sure the integer values in their programs stay in range. In particular, every intermediate expression must have an integer value that is in range.\nInteger arithmetic works in the expected way for addition, subtraction, and multiplication, but truncates the result of division (see the Stan Functions Reference integer-valued arithmetic operators section for more information).\n\n\n\nStan uses 64-bit (8-byte) floating point representations of real numbers. Stan roughly1 follows the IEEE 754 standard for floating-point computation. The range of a 64-bit number is roughly \\(\\pm 2^{1022}\\), which is slightly larger than \\(\\pm 10^{307}\\). It is a good idea to stay well away from such extreme values in Stan models as they are prone to cause overflow.\n64-bit floating point representations have roughly 15 decimal digits of accuracy. But when they are combined, the result often has less accuracy. In some cases, the difference in accuracy between two operands and their result is large.\nThere are three special real values used to represent (1) not-a-number value for error conditions, (2) positive infinity for overflow, and (3) negative infinity for overflow. The behavior of these special numbers follows standard IEEE 754 behavior.\n\n\nThe not-a-number value propagates. If an argument to a real-valued function is not-a-number, it either rejects (an exception in the underlying C++) or returns not-a-number itself. For boolean-valued comparison operators, if one of the arguments is not-a-number, the return value is always zero (i.e., false).\n\n\n\nPositive infinity is greater than all numbers other than itself and not-a-number; negative infinity is similarly smaller. Adding an infinite value to a finite value returns the infinite value. Dividing a finite number by an infinite value returns zero; dividing an infinite number by a finite number returns the infinite number of appropriate sign. Dividing a finite number by zero returns positive infinity. Dividing two infinite numbers produces a not-a-number value as does subtracting two infinite numbers. Some functions are sensitive to infinite values; for example, the exponential function returns zero if given negative infinity and positive infinity if given positive infinity. Often the gradients will break down when values are infinite, making these boundary conditions less useful than they may appear at first.\n\n\n\n\nStan automatically promotes integer values to real values if necessary, but does not automatically demote real values to integers. For very large integers, this will cause a rounding error to fewer significant digits in the floating point representation than in the integer representation.\nUnlike in C++, real values are never demoted to integers. Therefore, real values may only be assigned to real variables. Integer values may be assigned to either integer variables or real variables. Internally, the integer representation is cast to a floating-point representation. This operation is not without overhead and should thus be avoided where possible.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#complex-numerical-data-type", + "href": "reference-manual/types.html#complex-numerical-data-type", + "title": "Data Types and Declarations", + "section": "", + "text": "The complex data type is a scalar, but unlike real and int types, it contains two components, a real and imaginary component, both of which are of type real. That is, the real and imaginary components of a complex number are 64-bit, IEEE 754-complaint floating point numbers.\n\n\nImaginary literals are written in mathematical notation using a numeral followed by the suffix i. For example, the following example constructs a complex number \\(2 - 1.3i\\) and assigns it to the variable z.\ncomplex z = 2 - 1.3i;\nreal re = get_real(z); // re has value 2.0\nreal im = get_imag(z); // im has value -1.3\nThe getter functions then extract the real and imaginary components of z and assign them to re and im respectively.\nThe function to_complex constructs a complex number from its real and imaginary components. The functional form needs to be used whenever the components are not literal numerals, as in the following example.\nvector[K] re;\nvector[K] im;\n// ...\nfor (k in 1:K) {\n complex z = to_complex(re[k], im[k]);\n // ...\n}\n\n\n\nExpressions of type real may be assigned to variables of type complex. For example, the following is a valid sequence of Stan statements.\nreal x = 5.0;\ncomplex z = x; // get_real(z) == 5.0, get_imag(z) == 0\nThe real number assigned to a complex number determine’s the complex number’s real component, with the imaginary component set to zero.\nAssignability is transitive, so that expressions of type int may also be assigned to variables of type complex, as in the following example.\nint n = 2;\ncomplex z = n;\nFunction arguments also support promotion of integer or real typed expressions to type complex.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#scalar-data-types-and-variable-declarations", + "href": "reference-manual/types.html#scalar-data-types-and-variable-declarations", + "title": "Data Types and Declarations", + "section": "", + "text": "All variables used in a Stan program must have an explicitly declared data type. The form of a declaration includes the type and the name of a variable. This section covers scalar types, namely integer, real, and complex. The next section covers vector and matrix types, and the following section array types.\n\n\nUnconstrained integers are declared using the int keyword. For example, the variable N is declared to be an integer as follows.\nint N;\n\n\n\nInteger data types may be constrained to allow values only in a specified interval by providing a lower bound, an upper bound, or both. For instance, to declare N to be a positive integer, use the following.\nint<lower=1> N;\nThis illustrates that the bounds are inclusive for integers.\nTo declare an integer variable cond to take only binary values, that is zero or one, a lower and upper bound must be provided, as in the following example.\nint<lower=0, upper=1> cond;\n\n\n\nUnconstrained real variables are declared using the keyword real. The following example declares theta to be an unconstrained continuous value.\nreal theta;\n\n\n\nUnconstrained complex numbers are declared using the keyword complex. The following example declares z to be an unconstrained complex variable.\ncomplex z;\n\n\n\nReal variables may be bounded using the same syntax as integers. In theory (that is, with arbitrary-precision arithmetic), the bounds on real values would be exclusive. Unfortunately, finite-precision arithmetic rounding errors will often lead to values on the boundaries, so they are allowed in Stan.\nThe variable sigma may be declared to be non-negative as follows.\nreal<lower=0> sigma;\nThe following declares the variable x to be less than or equal to \\(-1\\).\nreal<upper=-1> x;\nTo ensure rho takes on values between \\(-1\\) and \\(1\\), use the following declaration.\nreal<lower=-1, upper=1> rho;\n\n\nLower bounds that are negative infinity or upper bounds that are positive infinity are ignored. Stan provides constants positive_infinity() and negative_infinity() which may be used for this purpose, or they may be supplied as data.\n\n\n\n\nReal variables may be declared on a space that has been transformed using an affine transformation \\(x\\mapsto \\mu + \\sigma * x\\) with offset \\(\\mu\\) and (positive) multiplier \\(\\sigma\\), using a syntax similar to that for bounds. While these transforms do not change the asymptotic sampling behaviour of the resulting Stan program (in a sense, the model the program implements), they can be useful for making the sampling process more efficient by transforming the geometry of the problem to a more natural multiplier and to a more natural offset for the sampling process, for instance by facilitating a non-centered parameterisation. While these affine transformation declarations do not impose a hard constraint on variables, they behave like the bounds constraints in many ways and could perhaps be viewed as acting as a sort of soft constraint.\nThe variable x may be declared to have offset \\(1\\) as follows.\nreal<offset=1> x;\nSimilarly, it can be declared to have multiplier \\(2\\) as follows.\nreal<multiplier=2> x;\nFinally, we can combine both declarations to declare a variable with offset \\(1\\) and multiplier \\(2\\).\nreal<offset=1, multiplier=2> x;\nAs an example, we can give x a normal distribution with non-centered parameterization as follows.\nparameters {\n real<offset=mu, multiplier=sigma> x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\nRecall that the centered parameterization is achieved with the code\nparameters {\n real x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\nor equivalently\nparameters {\n real<offset=0, multiplier=1> x;\n}\nmodel {\n x ~ normal(mu, sigma);\n}\n\n\n\nBounds (and offset and multiplier) for integer or real variables may be arbitrary expressions. The only requirement is that they only include variables that have been declared (though not necessarily defined) before the declaration. If the bounds themselves are parameters, the behind-the-scenes variable transform accounts for them in the log Jacobian.\nFor example, it is acceptable to have the following declarations.\ndata {\n real lb;\n}\nparameters {\n real<lower=lb> phi;\n}\nThis declares a real-valued parameter phi to take values greater than the value of the real-valued data variable lb. Constraints may be arbitrary expressions, but must be of type int for integer variables and of type real for real variables (including constraints on vectors, row vectors, and matrices). Variables used in constraints can be any variable that has been defined at the point the constraint is used. For instance,\ndata {\n int<lower=1> N;\n array[N] real y;\n}\nparameters {\n real<lower=min(y), upper=max(y)> phi;\n}\nThis declares a positive integer data variable N, an array y of real-valued data of length N, and then a parameter ranging between the minimum and maximum value of y. As shown in the example code, the functions min() and max() may be applied to containers such as arrays.\nA more subtle case involves declarations of parameters or transformed parameters based on parameters declared previously. For example, the following program will work as intended.\nparameters {\n real a;\n real<lower=a> b; // enforces a < b\n}\ntransformed parameters {\n real c;\n real<lower=c> d;\n c = a;\n d = b;\n}\nThe parameters instance works because all parameters are defined externally before the block is executed. The transformed parameters case works even though c isn’t defined at the point it is used, because constraints on transformed parameters are only validated at the end of the block. Data variables work like parameter variables, whereas transformed data and generated quantity variables work like transformed parameter variables.\n\n\n\nA variable may be declared with a size that depends on a boolean constant. For example, consider the definition of alpha in the following program fragment.\ndata {\n int<lower=0, upper=1> include_alpha;\n // ...\n}\nparameters {\n vector[include_alpha ? N : 0] alpha;\n // ...\n}\nIf include_alpha is true, the model will include the vector alpha; if the flag is false, the model will not include alpha (technically, it will include alpha of size 0, which means it won’t contain any values and won’t be included in any output).\nThis technique is not just useful for containers. If the value of N is set to 1, then the vector alpha will contain a single element and thus alpha[1] behaves like an optional scalar, the existence of which is controlled by include_alpha.\nThis coding pattern allows a single Stan program to define different models based on the data provided as input. This strategy is used extensively in the implementation of the RStanArm package.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#vector-and-matrix-data-types", + "href": "reference-manual/types.html#vector-and-matrix-data-types", + "title": "Data Types and Declarations", + "section": "", + "text": "Stan provides three types of container objects: arrays, vectors, and matrices. Vectors and matrices are more limited kinds of data structures than arrays. Vectors are intrinsically one-dimensional collections of real or complex values, whereas matrices are intrinsically two dimensional. Vectors, matrices, and arrays are not assignable to one another, even if their dimensions are identical. A \\(3 \\times 4\\) matrix is a different kind of object in Stan than a \\(3\n\\times 4\\) array.\nThe intention of using matrix types is to call out their usage in the code. There are three situations in Stan where only vectors and matrices may be used,\n\nmatrix arithmetic operations (e.g., matrix multiplication)\nlinear algebra functions (e.g., eigenvalues and determinants), and\nmultivariate function parameters and outcomes (e.g., multivariate normal distribution arguments).\n\nVectors and matrices cannot be typed to return integer values. They are restricted to real and complex values.\nFor constructing vectors and matrices in Stan, see Vector, Matrix, and Array Expressions.\n\n\nVectors and matrices, as well as arrays, are indexed starting from one (1) in Stan. This follows the convention in statistics and linear algebra as well as their implementations in the statistical software packages R, MATLAB, BUGS, and JAGS. General computer programming languages, on the other hand, such as C++ and Python, index arrays starting from zero.\n\n\n\nVectors in Stan are column vectors; see below for information on row vectors. Vectors are declared with a size (i.e., a dimensionality). For example, a 3-dimensional real vector is declared with the keyword vector, as follows.\nvector[3] u;\nVectors may also be declared with constraints, as in the following declaration of a 3-vector of non-negative values.\nvector<lower=0>[3] u;\nSimilarly, they may be declared with a offset and/or multiplier, as in the following example\nvector<offset=42, multiplier=3>[3] u;\n\n\n\nLike real vectors, complex vectors are column vectors and are declared with a size. For example, a 3-dimensional complex vector is declared with the keyword complex_vector, as follows.\ncomplex_vector[3] v;\nComplex vector declarations do not support any constraints.\n\n\n\nA unit simplex is a vector with non-negative values whose entries sum to 1. For instance, \\([0.2,0.3,0.4,0.1]^{\\top}\\) is a unit 4-simplex. Unit simplexes are most often used as parameters in categorical or multinomial distributions, and they are also the sampled variate in a Dirichlet distribution. Simplexes are declared with their full dimensionality. For instance, theta is declared to be a unit \\(5\\)-simplex by\nsimplex[5] theta;\nUnit simplexes are implemented as vectors and may be assigned to other vectors and vice-versa. Simplex variables, like other constrained variables, are validated to ensure they contain simplex values; for simplexes, this is only done up to a statically specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\nIn high dimensional problems, simplexes may require smaller step sizes in the inference algorithms in order to remain stable; this can be achieved through higher target acceptance rates for samplers and longer warmup periods, tighter tolerances for optimization with more iterations, and in either case, with less dispersed parameter initialization or custom initialization if there are informative priors for some parameters.\n\n\n\nA stochastic matrix is a matrix where each column or row is a unit simplex, meaning that each column (row) vector has non-negative values that sum to 1. The following example is a \\(3 \\times 4\\) column-stochastic matrix.\n\\[\n\\begin{bmatrix}\n0.2 & 0.5 & 0.1 & 0.3 \\\\\n0.3 & 0.3 & 0.6 & 0.4 \\\\\n0.5 & 0.2 & 0.3 & 0.3\n\\end{bmatrix}\n\\]\nAn example of a \\(3 \\times 4\\) row-stochastic matrix is the following.\n\\[\n\\begin{bmatrix}\n0.2 & 0.5 & 0.1 & 0.2 \\\\\n0.2 & 0.1 & 0.6 & 0.1 \\\\\n0.5 & 0.2 & 0.2 & 0.1\n\\end{bmatrix}\n\\]\nIn the examples above, each column (or row) sums to 1, making the matrices valid column_stochastic_matrix and row_stochastic_matrix types.\nColumn-stochastic matrices are often used in models where each column represents a probability distribution across a set of categories such as in multiple multinomial distributions, factor models, transition matrices in Markov models, or compositional data analysis. They can also be used in situations where you need multiple simplexes of the same dimensionality.\nThe column_stochastic_matrix and row_stochastic_matrix types are declared with row and column sizes. For instance, a matrix theta with 3 rows and 4 columns, where each column is a 3-simplex, is declared like a matrix with 3 rows and 4 columns.\ncolumn_stochastic_matrix[3, 4] theta;\nA matrix theta with 3 rows and 4 columns, where each row is a 4-simplex, is similarly declared as a matrix with 3 rows and 4 columns.\nrow_stochastic_matrix[3, 4] theta;\nAs with simplexes, column_stochastic_matrix and row_stochastic_matrix variables are subject to validation, ensuring that each column (row) satisfies the simplex constraints. This validation accounts for floating-point imprecision, with checks performed up to a statically specified accuracy threshold \\(\\epsilon\\).\n\n\nIn high-dimensional settings, column_stochastic_matrix and row_stochastic_matrix types may require careful tuning of the inference algorithms. To ensure stability:\n\nSmaller Step Sizes: In samplers like Hamiltonian Monte Carlo (HMC), smaller step sizes can help maintain stability, especially in high dimensions.\nHigher Target Acceptance Rates: Setting higher target acceptance rates can improve the robustness of the sampling process.\nLonger Warmup Periods: Increasing the warmup period allows the sampler to better explore the parameter space before the actual sampling begins.\nTighter Optimization Tolerances: For optimization-based inference, tighter tolerances with more iterations can yield more accurate results.\nCustom Initialization: If prior information about the parameters is available, custom initialization or less dispersed initialization can lead to more efficient inference.\n\n\n\n\n\nA unit vector is a vector with a norm of one. For instance, \\([0.5,\n0.5, 0.5, 0.5]^{\\top}\\) is a unit 4-vector. Unit vectors are sometimes used in directional statistics. Unit vectors are declared with their full dimensionality. For instance, theta is declared to be a unit \\(5\\)-vector by\nunit_vector[5] theta;\nUnit vectors are implemented as vectors and may be assigned to other vectors and vice-versa. Unit vector variables, like other constrained variables, are validated to ensure that they are indeed unit length; for unit vectors, this is only done up to a statically specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\n\n\n\nA zero-sum vector is constrained such that the sum of its elements is always \\(0\\). These are sometimes useful for resolving identifiability issues in regression models. While the underlying vector has only \\(N - 1\\) degrees of freedom, zero sum vectors are declared with their full dimensionality. For instance, beta is declared to be a zero-sum \\(5\\)-vector (4 DoF) by\nsum_to_zero_vector[5] beta;\nZero sum vectors are implemented as vectors and may be assigned to other vectors and vice-versa. Zero sum vector variables, like other constrained variables, are validated to ensure that they are indeed sum to zero; for zero sum vectors, this is only done up to a statically specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\n\n\n\nAn ordered vector type in Stan represents a vector whose entries are sorted in ascending order. For instance, \\((-1.3,2.7,2.71)^{\\top}\\) is an ordered 3-vector. Ordered vectors are most often employed as cut points in ordered logistic regression models (see section).\nThe variable c is declared as an ordered 5-vector by\nordered[5] c;\nAfter their declaration, ordered vectors, like unit simplexes, may be assigned to other vectors and other vectors may be assigned to them. Constraints will be checked after executing the block in which the variables were declared.\n\n\n\nThere is also a positive, ordered vector type which operates similarly to ordered vectors, but all entries are constrained to be positive. For instance, \\((2,3.7,4,12.9)\\) is a positive, ordered 4-vector.\nThe variable d is declared as a positive, ordered 5-vector by\npositive_ordered[5] d;\nLike ordered vectors, after their declaration, positive ordered vectors may be assigned to other vectors and other vectors may be assigned to them. Constraints will be checked after executing the block in which the variables were declared.\n\n\n\nRow vectors are declared with the keyword row_vector. Like (column) vectors, they are declared with a size. For example, a 1093-dimensional row vector u would be declared as\nrow_vector[1093] u;\nConstraints are declared as for vectors, as in the following example of a 10-vector with values between -1 and 1.\nrow_vector<lower=-1, upper=1>[10] u;\nOffset and multiplier are also similar as for the following 3-row-vector with offset -42 and multiplier 3.\nrow_vector<offset=-42, multiplier=3>[3] u;\nRow vectors may not be assigned to column vectors, nor may column vectors be assigned to row vectors. If assignments are required, they may be accommodated through the transposition operator.\n\n\n\nComplex row vectors are declared with the keyword complex_row_vector and given a size in basic declarations. For example, a 12-dimensional complex row vector v would be declared as\ncomplex_row_vector[12] v;\nComplex row vectors do not allow constraints.\n\n\n\nMatrices are declared with the keyword matrix along with a number of rows and number of columns. For example,\nmatrix[3, 3] A;\nmatrix[M, N] B;\ndeclares A to be a \\(3 \\times 3\\) matrix and B to be a \\(M\n\\times N\\) matrix. For the second declaration to be well formed, the variables M and N must be declared as integers in either the data or transformed data block and before the matrix declaration.\nMatrices may also be declared with constraints, as in this (\\(3 \\times 4\\)) matrix of non-positive values.\nmatrix<upper=0>[3, 4] B;\nSimilarly, matrices can be declared to have a set offset and/or multiplier, as in this matrix with multiplier 5.\nmatrix<multiplier=5>[3, 4] B;\n\n\nRows of a matrix can be assigned by indexing the left-hand side of an assignment statement. For example, this is possible.\nmatrix[M, N] a;\nrow_vector[N] b;\n// ...\na[1] = b;\nThis copies the values from row vector b to a[1], which is the first row of the matrix a. If the number of columns in a is not the same as the size of b, a run-time error is raised; the number of columns of a is N, which is also the number of columns of b.\nAssignment works by copying values in Stan. That means any subsequent assignment to a[1] does not affect b, nor does an assignment to b affect a.\n\n\n\n\nComplex matrices are declared with the keyword complex_matrix and a number of rows and columns. For example,\ncomplex_matrix[3, 3] C;\nComplex matrices do not allow constraints.\n\n\n\nMatrix variables may be constrained to represent covariance matrices. A matrix is a covariance matrix if it is symmetric and positive definite. Like correlation matrices, covariance matrices only need a single dimension in their declaration. For instance,\ncov_matrix[K] Sigma;\ndeclares Sigma to be a \\(K \\times K\\) covariance matrix, where \\(K\\) is the value of the data variable K.\n\n\n\nMatrix variables may be constrained to represent correlation matrices. A matrix is a correlation matrix if it is symmetric and positive definite, has entries between \\(-1\\) and \\(1\\), and has a unit diagonal. Because correlation matrices are square, only one dimension needs to be declared. For example,\ncorr_matrix[3] Omega;\ndeclares Omega to be a \\(3 \\times 3\\) correlation matrix.\nCorrelation matrices may be assigned to other matrices, including unconstrained matrices, if their dimensions match, and vice-versa.\n\n\n\nMatrix variables may be constrained to represent the Cholesky factors of a covariance matrix. This is often more convenient or more efficient than representing covariance matrices directly.\nA Cholesky factor \\(L\\) is an \\(M \\times N\\) lower-triangular matrix (if \\(m < n\\) then \\(L[m, n] =0\\)) with a strictly positive diagonal (\\(L[k, k]\n> 0\\)) and \\(M \\geq N\\). If \\(L\\) is a Cholesky factor, then \\(\\Sigma = L\n\\, L^{\\top}\\) is a covariance matrix (i.e., it is positive definite). The mapping between positive definite matrices and their Cholesky factors is bijective—every covariance matrix has a unique Cholesky factorization.\nThe typical case of a square Cholesky factor may be declared with a single dimension,\ncholesky_factor_cov[4] L;\n\n\nIn general, two dimensions may be declared, with the above being equal to cholesky_factor_cov[4, 4]. The type cholesky_factor_cov[M, N] may be used for the general \\(M \\times N\\) case to produce positive semi-definite matrices of rank \\(M\\).\n\n\n\n\nMatrix variables may be constrained to represent the Cholesky factors of a correlation matrix.\nA Cholesky factor for a correlation matrix \\(L\\) is a \\(K \\times K\\) lower-triangular matrix with positive diagonal entries and rows that are of length 1 (i.e., \\(\\sum_{n=1}^K L_{m,n}^2 = 1\\)). If \\(L\\) is a Cholesky factor for a correlation matrix, then \\(L\\,L^{\\top}\\) is a correlation matrix (i.e., symmetric positive definite with a unit diagonal).\nTo declare the variable L to be a K by K Cholesky factor of a correlation matrix, the following code may be used.\ncholesky_factor_corr[K] L;\n\n\n\nA sum-to-zero matrix is constrained such that the sum of rows and the sum of the columns is always \\(0\\). These are sometimes useful for resolving identifiability issues in regression models. While the underlying vector has only \\((N - 1) \\times (M - 1)\\) degrees of freedom, zero sum matrices are declared with their full dimensionality. For instance, beta is declared to be a sum-to-zero \\(5 \\times 4\\)-matrix (20 degrees of freedom) by\nsum_to_zero_matrix[5, 4] beta;\nSum-to-zero matrices are implemented as matrices and may be assigned to other matrices and vice-versa. Zero sum matrix variables, like other constrained variables, are validated to ensure that they are indeed sum to zero; for zero sum matrices, this is only done up to an internally specified accuracy threshold \\(\\epsilon\\) to account for errors arising from floating-point imprecision.\n\n\n\nConstrained variables of all types may be assigned to other variables of the same unconstrained type and vice-versa. Matching is interpreted strictly as having the same basic type and number of array dimensions. Constraints are not considered, but basic data types are. For instance, a variable declared to be real<lower=0, upper=1> could be assigned to a variable declared as real and vice-versa. Similarly, a variable declared as matrix[3, 3] may be assigned to a variable declared as cov_matrix[3] or cholesky_factor_cov[3], and vice-versa.\nChecks are carried out at the end of each relevant block of statements to ensure constraints are enforced. This includes run-time size checks. The Stan compiler isn’t able to catch the fact that an attempt may be made to assign a matrix of one dimensionality to a matrix of mismatching dimensionality.\n\n\n\nReal-valued vectors, row vectors and matrices may be assigned to complex-valued vectors, row vectors and matrices, respectively. For example, the following is legal.\nvector[N] v = ...;\ncomplex_vector[N] u = 2 * v;\nRow vectors and matrices work the same way.\n\n\n\nVariables may be declared with sizes given by expressions. Such expressions are constrained to only contain data or transformed data variables. This ensures that all sizes are determined once the data is read in and transformed data variables defined by their statements. For example, the following is legal.\ndata {\n int<lower=0> N_observed, N_missing;\n // ...\ntransformed parameters {\n vector[N_observed + N_missing] y;\n // ...\n\n\n\nIf v is a column vector or row vector, then v[2] is the second element in the vector. If m is a matrix, then m[2, 3] is the value in the second row and third column.\nProviding a matrix with a single index returns the specified row. For instance, if m is a matrix, then m[2] is the second row. This allows Stan blocks such as\nmatrix[M, N] m;\nrow_vector[N] v;\nreal x;\n// ...\nv = m[2];\nx = v[3]; // x == m[2][3] == m[2, 3]\nThe type of m[2] is row_vector because it is the second row of m. Thus it is possible to write m[2][3] instead of m[2, 3] to access the third element in the second row. When given a choice, the form m[2, 3] is preferred.\nComplex versions work the same way,\ncomplex_matrix[M, N] m = ...;\ncomplex_row_vector[N] u = m[3];\ncomplex_vector[M] v = m[ , 2];\n\n\nThe form m[2, 3] is more efficient because it does not require the creation and use of an intermediate expression template for m[2]. In later versions, explicit calls to m[2][3] may be optimized to be as efficient as m[2, 3] by the Stan compiler.\n\n\n\n\nAn integer expression is used to pick out the sizes of vectors, matrices, and arrays. For instance, we can declare a vector of size M + N using\nvector[M + N] y;\nAny integer-denoting expression may be used for the size declaration, providing all variables involved are either data, transformed data, or local variables. That is, expressions used for size declarations may not include parameters or transformed parameters or generated quantities.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#array-data-types.section", + "href": "reference-manual/types.html#array-data-types.section", + "title": "Data Types and Declarations", + "section": "", + "text": "Stan supports arrays of arbitrary dimension. The values in an array can be any type, so that arrays may contain values that are simple reals or integers, vectors, matrices, or other arrays. Arrays are the only way to store sequences of integers, and some functions in Stan, such as discrete distributions, require integer arguments.\nA two-dimensional array is just an array of arrays, both conceptually and in terms of current implementation. When an index is supplied to an array, it returns the value at that index. When more than one index is supplied, this indexing operation is chained. For example, if a is a two-dimensional array, then a[m, n] is just a convenient shorthand for a[m][n].\nVectors, matrices, and arrays are not assignable to one another, even if their dimensions are identical.\nFor constructing arrays in Stan, see Vector, Matrix, and Array Expressions.\n\n\nArrays are declared with the keyword array followed by the dimensions enclosed in square brackets, the element type, and the name of the variable.\nThe variable n is declared as an array of five integers as follows.\narray[5] int n;\nA two-dimensional array of complex values with three rows and four columns is declared as follows.\narray[3, 4] complex a;\nA three-dimensional array z of positive reals with five rows, four columns, and two shelves can be declared as follows.\narray[5, 4, 2] real<lower=0> z;\nArrays may also be declared to contain vectors. For example,\narray[3] vector[7] mu;\ndeclares mu to be an array of size 3 containing vectors with 7 elements. Arrays may also contain matrices. The example\narray[15, 12] complex_matrix[7, 2] mu;\ndeclares a 15 by 12 array of \\(7 \\times 2\\) complex matrices. Any of the constrained types may also be used in arrays, as in the declaration\narray[2, 3, 4] cholesky_factor_cov[5, 6] mu;\nof a \\(2 \\times 3 \\times 4\\) array of \\(5 \\times 6\\) Cholesky factors of covariance matrices.\n\n\n\nIf x is a 1-dimensional array of length 5, then x[1] is the first element in the array and x[5] is the last. For a \\(3\n\\times 4\\) array y of two dimensions, y[1, 1] is the first element and y[3, 4] the last element. For a three-dimensional array z, the first element is z[1, 1, 1], and so on.\nSubarrays of arrays may be accessed by providing fewer than the full number of indexes. For example, suppose y is a two-dimensional array with three rows and four columns. Then y[3] is one-dimensional array of length four. This means that y[3][1] may be used instead of y[3, 1] to access the value of the first column of the third row of y. The form y[3, 1] is the preferred form (see note in this chapter).\n\n\n\nSubarrays may be manipulated and assigned just like any other variables. Similar to the behavior of matrices, Stan allows blocks such as\narray[9, 10, 11] real w;\narray[10, 11] real x;\narray[11] real y;\nreal z;\n// ...\nx = w[5];\ny = x[4]; // y == w[5][4] == w[5, 4]\nz = y[3]; // z == w[5][4][3] == w[5, 4, 3]\nComplex-valued arrays work the same way.\n\n\n\nArrays of vectors and matrices are accessed in the same way as arrays of doubles. Consider the following vector and scalar declarations.\narray[3, 4] vector[5] a;\narray[4] vector[5] b;\nvector[5] c;\nreal x;\nWith these declarations, the following assignments are legal.\nb = a[1]; // result is array of vectors\nc = a[1, 3]; // result is vector\nc = b[3]; // same result as above\nx = a[1, 3, 5]; // result is scalar\nx = b[3, 5]; // same result as above\nx = c[5]; // same result as above\nRow vectors and other derived vector types (simplex and ordered) behave the same way in terms of indexing.\nConsider the following matrix, vector and scalar declarations.\narray[3, 4] matrix[6, 5] d;\narray[4] matrix[6, 5] e;\nmatrix[6, 5] f;\nrow_vector[5] g;\nreal x;\nWith these declarations, the following definitions are legal.\ne = d[1]; // result is array of matrices\nf = d[1, 3]; // result is matrix\nf = e[3]; // same result as above\ng = d[1, 3, 2]; // result is row vector\ng = e[3, 2]; // same result as above\ng = f[2]; // same result as above\nx = d[1, 3, 5, 2]; // result is scalar\nx = e[3, 5, 2]; // same result as above\nx = f[5, 2]; // same result as above\nx = g[2]; // same result as above\nAs shown, the result f[2] of supplying a single index to a matrix is the indexed row, here row 2 of matrix f.\n\n\n\nSubarrays of arrays may be assigned by indexing on the left-hand side of an assignment statement. For example, the following is legal.\narray[I, J, K] real x;\narray[J, K] real y;\narray[K] real z;\n// ...\nx[1] = y;\nx[1, 1] = z;\nThe sizes must match. Here, x[1] is a J by K array, as is y.\nPartial array assignment also works for arrays of matrices, vectors, and row vectors.\n\n\n\nArrays, row vectors, column vectors and matrices are not interchangeable in Stan. Thus a variable of any one of these fundamental types is not assignable to any of the others, nor may it be used as an argument where the other is required (use as arguments follows the assignment rules).\n\n\nFor example, vectors cannot be assigned to arrays or vice-versa.\narray[4] real a;\nvector[4] b;\nrow_vector[4] c;\n// ...\na = b; // illegal assignment of vector to array\nb = a; // illegal assignment of array to vector\na = c; // illegal assignment of row vector to array\nc = a; // illegal assignment of array to row vector\n\n\n\nIt is not even legal to assign row vectors to column vectors or vice versa.\nvector[4] b;\nrow_vector[4] c;\n// ...\nb = c; // illegal assignment of row vector to column vector\nc = b; // illegal assignment of column vector to row vector\n\n\n\nThe same holds for matrices, where 2-dimensional arrays may not be assigned to matrices or vice-versa.\narray[3, 4] real a;\nmatrix[3, 4] b;\n// ...\na = b; // illegal assignment of matrix to array\nb = a; // illegal assignment of array to matrix\n\n\n\nA \\(1 \\times N\\) matrix cannot be assigned a row vector or vice versa.\nmatrix[1, 4] a;\nrow_vector[4] b;\n// ...\na = b; // illegal assignment of row vector to matrix\nb = a; // illegal assignment of matrix to row vector\nSimilarly, an \\(M \\times 1\\) matrix may not be assigned to a column vector.\nmatrix[4, 1] a;\nvector[4] b;\n// ...\na = b; // illegal assignment of column vector to matrix\nb = a; // illegal assignment of matrix to column vector\n\n\n\n\nAn integer expression is used to pick out the sizes of arrays. The same restrictions as for vector and matrix sizes apply, namely that the size is declared with an integer-denoting expression that does not contain any parameters, transformed parameters, or generated quantities.\n\n\n\nIf any of an array’s dimensions is size zero, the entire array will be of size zero. That is, if we declare\narray[3, 0] real a;\nthen the resulting size of a is zero and querying any of its dimensions at run time will result in the value zero. Declared as above, a[1] will be a size-zero one-dimensional array. For comparison, declaring\narray[0, 3] real b;\nalso produces an array with an overall size of zero, but in this case, there is no way to index legally into b, because b[0] is undefined. The array will behave at run time as if it’s a \\(0 \\times\n0\\) array. For example, the result of to_matrix(b) will be a \\(0 \\times 0\\) matrix, not a \\(0 \\times 3\\) matrix.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#tuple-data-type", + "href": "reference-manual/types.html#tuple-data-type", + "title": "Data Types and Declarations", + "section": "", + "text": "Stan supports tuples of arbitrary size. The values in a tuple can be of arbitrary type, but the component types must be declared along with the declaration of the tuple. Tuples can be manipulated as a whole, or their elements may be accessed and set individually.\n\n\nTuples are declared with the keyword tuple followed by a parenthesized sequence of types, which determine the types of the respective tuple entries. For example, a tuple with three elements may be declared as\ntuple(int, vector[3], complex) abc;\nTuples must have at least one entry, so the following declaration is illegal.\ntuple() nil; // ILLEGAL\nTuples of length one must use a trailing comma, to align with the expression syntax.\ntuple(int,) m; // CORRECT\ntuple(int) n; // ILLEGAL\nTuples can be assigned as a whole if their elements can be assigned individually. For example, a can be assigned to b in the following example because int can be promoted to complex.\ntuple(int, real) a;\n...\ntuple(complex, real) b = a;\nTuple types may have elements which are declared as tuples, such as the following example.\ntuple(int, tuple(real, complex)) x;\nIn this case, it would probably be simpler to use a 3-tuple type, tuple(int, real, complex).\nTuples can be declared with constraints anywhere that ordinary variables can (i.e., as top-level block variables). That means any context in which it is legal to have a declaration\nreal<lower=0> sigma;\nreal<lower=0, upper=1> theta;\nit is legal to have a tuple with constraints such as\ntuple(real<lower=0>, real<lower=0, upper=1>) sigma_theta;\n\n\n\nTuple elements may be accessed directly. For example, with our declaration of abc from the last section, Stan uses abc.1 for the first element, abc.2 for the second, and abc.3 for the third. These numbers must be integer literals (i.e., they cannot be variables), and must be within the size of the number of elements of tuples. The types of elements are as declared, so that abc.1 is of type int, abc.2 of type vector[3] and abc.3 of type complex.\n\n\n\nTuple elements can be assigned individually, allowing, e.g.,\ntuple(int, real) ab;\nab.1 = 123;\nab.2 = 12.9;\nAs with other assignments, promotions will happen if necessary (of int to real and of real to complex, along with the corresponding container type promotions).\n\n\n\nFor convenience of using values stored in tuples, Stan supports “unpacking” (or “destructuring”) of tuples in an assignment statement.\nGiven a tuple t of type tuple(T1, ..., Tn) and a sequence of assignable expressions of types v1, …, vn, where each vi has a type which is assignable from type Ti, individual elements of the tuple may be assigned to the corresponding variables in the sequence by the statement\n(v1, /*...*/, vn) = t;\nNote that the above parenthesis are required, unlike in some other languages with similar features (e.g., Python).\nThese unpacking assignments can be nested if the tuple on the right hand side contains nested tuples.\nFor example, if T is a tuple of type tuple(int, (real, real), complex), then the program\nint i;\nreal x, y;\ncomplex z;\n\n(i, (x, y), z) = T;\nAssigns the result of T.1 to i, the result of T.2.1 to x, the result of T.2.2 to y, and the result of T.3 to z.\nThe left hand side must match in size the tuple on the right. Additionally, the same variable may not appear more than once in the left hand side of an unpacking assignment.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#variable-types-vs.-constraints-and-sizes", + "href": "reference-manual/types.html#variable-types-vs.-constraints-and-sizes", + "title": "Data Types and Declarations", + "section": "", + "text": "The type information associated with a variable only contains the underlying type and dimensionality of the variable.\n\n\nThe size associated with a given variable is not part of its data type. For example, declaring a variable using\narray[3] real a;\ndeclares the variable a to be an array. The fact that it was declared to have size 3 is part of its declaration, but not part of its underlying type.\n\n\nSizes are determined dynamically (at run time) and thus cannot be type-checked statically when the program is compiled. As a result, any conformance error on size will raise a run-time error. For example, trying to assign an array of size 5 to an array of size 6 will cause a run-time error. Similarly, multiplying an \\(N \\times M\\) by a \\(J \\times K\\) matrix will raise a run-time error if \\(M \\neq J\\).\n\n\n\n\nLike sizes, constraints are not treated as part of a variable’s type in Stan when it comes to the compile-time check of operations it may participate in. Anywhere Stan accepts a matrix as an argument, it will syntactically accept a correlation matrix or covariance matrix or Cholesky factor. Thus a covariance matrix may be assigned to a matrix and vice-versa.\nSimilarly, a bounded real may be assigned to an unconstrained real and vice-versa.\n\n\nFor arguments to functions, constraints are sometimes, but not always checked when the function is called. Exclusions include C++ standard library functions. All probability functions and cumulative distribution functions check that their arguments are appropriate at run time as the function is called.\n\n\n\nFor data variables, constraints are checked after the variable is read from a data file or other source. For transformed data variables, the check is done after the statements in the transformed data block have executed. Thus it is legal for intermediate values of variables to not satisfy declared constraints.\nFor parameters, constraints are enforced by the transform applied and do not need to be checked. For transformed parameters, the check is done after the statements in the transformed parameter block have executed.\nFor all blocks defining variables (transformed data, transformed parameters, generated quantities), real values are initialized to NaN and integer values are initialized to the smallest legal integer (i.e., a large absolute value negative number).\nFor generated quantities, constraints are enforced after the statements in the generated quantities block have executed.\n\n\n\n\nIn order to refer to data types, it is convenient to have a way to refer to them. The type naming notation outlined in this section is not part of the Stan programming language, but rather a convention adopted in this document to enable a concise description of a type.\nBecause size information is not part of a data type, data types will be written without size information. For instance, array[] real is the type of one-dimensional array of reals and matrix is the type of matrices. The three-dimensional integer array type is written as array[,,] int, indicating the number slots available for indexing. Similarly, array[,] vector is the type of a two-dimensional array of vectors.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#variable-declaration.section", + "href": "reference-manual/types.html#variable-declaration.section", + "title": "Data Types and Declarations", + "section": "", + "text": "Variables in Stan are declared by giving a type and a name. For example\nint N;\nvector[N] y;\narray[5] matrix[3, 4] A;\ndeclares a variable N that is an integer, a variable y that is a vector of length N (the previously declared variable), and a variable A, which is a length-5 array where each element is a 3 by 4 matrix.\nThe size of top-level variables in the parameters, transformed parameters, and generated quantities must remain constant across all iterations, therefore only data variables can be used in top-level size declarations.\n// illegal and will be flagged by the compiler:\ngenerated quantities {\n int N = 10;\n array[N] int foo;\nDepending on where the variable is declared in the Stan program, it either must or cannot have size information, and constraints are either optional or not allowed.\n// valid block variables, but not locals or function parameters\nvector<lower=0>[N] u;\n\n// valid as a block or local variable, but not a function parameter\narray[3] int is;\n\n// function parameters exclude sizes and cannot be constrained\nvoid pretty_print_tri_lower(matrix x) { ... }\nTop-level variables can have constraints and must include sizes for their types, as in the above examples. Local variables, like those defined inside loops or local blocks cannot be constrained, but still include sizes. Finally, variables declared as function parameters are not constrained types and exclude sizes.\nIn the following table, the leftmost column is a list of the unconstrained and undimensioned basic types; these are used as function return types and argument types. The middle column is of unconstrained types with dimensions; these are used as local variable types. The variables M and N indicate number of columns and rows, respectively. The variable K is used for square matrices, i.e., K denotes both the number of rows and columns. The rightmost column lists the corresponding constrained types. An expression of any right-hand column type may be assigned to its corresponding left-hand column basic type. At runtime, dimensions are checked for consistency for all variables; containers of any sizes may be assigned to function arguments. The constrained matrix types cov_matrix[K], corr_matrix[K], cholesky_factor_cov[K], and cholesky_factor_corr[K] are only assignable to matrices of dimensions matrix[K, K] types.\n\n\n\n\n\n\n\n\nFunction Argument (unsized)\n Local\n(unconstrained)\n Block\n (constrained)\n\n\n\n\nint\nint\nint\n\n\n\n\nint<lower=L>\n\n\n\n\nint<upper=U>\n\n\n\n\nint<lower=L, upper=U>\n\n\n\n\nint<offset=O>\n\n\n\n\nint<multiplier=M>\n\n\n\n\nint<offset=O, multiplier=M>\n\n\nreal\nreal\nreal\n\n\n\n\nreal<lower=L>\n\n\n\n\nreal<upper=U>\n\n\n\n\nreal<lower=L, upper=U>\n\n\n\n\nreal<offset=O>\n\n\n\n\nreal<multiplier=M>\n\n\n\n\nreal<offset=O, multiplier=M>\n\n\ncomplex\ncomplex\ncomplex\n\n\nvector\nvector[N]\nvector[N]\n\n\n\n\nvector[N]<lower=L>\n\n\n\n\nvector[N]<upper=U>\n\n\n\n\nvector[N]<lower=L, upper=U>\n\n\n\n\nvector[N]<offset=O>\n\n\n\n\nvector[N]<multiplier=M>\n\n\n\n\nvector[N]<offset=O, multiplier=M>\n\n\n\n\nordered[N]\n\n\n\n\npositive_ordered[N]\n\n\n\n\nsimplex[N]\n\n\n\n\nunit_vector[N]\n\n\n\n\nsum_to_zero_vector[N]\n\n\nrow_vector\nrow_vector[N]\nrow_vector[N]\n\n\n\n\nrow_vector[N]<lower=L>\n\n\n\n\nrow_vector[N]<upper=U>\n\n\n\n\nrow_vector[N]<lower=L, upper=U>\n\n\n\n\nrow_vector[N]<offset=O>\n\n\n\n\nrow_vector[N]<multiplier=M>\n\n\n\n\nrow_vector[N]<offset=O, multiplier=M>\n\n\nmatrix\nmatrix[M, N]\nmatrix[M, N]\n\n\n\n\nmatrix[M, N]<lower=L>\n\n\n\n\nmatrix[M, N]<upper=U>\n\n\n\n\nmatrix[M, N]<lower=L, upper=U> |\n\n\n\n\nmatrix[M, N]<offset=O>\n\n\n\n\nmatrix[M, N]<multiplier=M>\n\n\n\n\nmatrix[M, N]<offset=O, multiplier=M>\n\n\n\n\ncolumn_stochastic_matrix[M, N]\n\n\n\n\nrow_stochastic_matrix[M, N]\n\n\n\n\nsum_to_zero_matrix[M, N]\n\n\n\nmatrix[K, K]\ncorr_matrix[K]\n\n\n\nmatrix[K, K]\ncov_matrix[K]\n\n\n\nmatrix[K, K]\ncholesky_factor_corr[K]\n\n\n\nmatrix[K, K]\ncholesky_factor_cov[K]\n\n\ncomplex_vector\ncomplex_vector[M]\ncomplex_vector[M]\n\n\ncomplex_row_vector\ncomplex_row_vector[N]\ncomplex_row_vector[N]\n\n\ncomplex_matrix\ncomplex_matrix[M, N]\ncomplex_matrix[M,N]\n\n\narray[] vector\narray[M] vector[N]\narray[M] vector[N]\n\n\n\n\narray[M] vector[N]<lower=L>\n\n\n\n\narray[M] vector[N]<upper=U>\n\n\n\n\narray[M] vector[N]<lower=L, upper=U>\n\n\n\n\narray[M] vector[N]<offset=O>\n\n\n\n\narray[M] vector[N]<multiplier=M>\n\n\n\n\narray[M] vector[N]<offset=O, multiplier=M>\n\n\n\n\narray[M] ordered[N]\n\n\n\n\narray[M] positive_ordered[N]\n\n\n\n\narray[M] simplex[N]\n\n\n\n\narray[M] unit_vector[N]\n\n\n\n\narray[M] sum_to_zero_vector[N]\n\n\n\n\nAdditional array types follow the same basic template as the final example in the table and can contain any of the previous types. The unsized version of arrays with more than one dimension is specified by using commas, e.g. array[ , ] is a 2-D array.\nFor more on how function arguments and return types are declared, consult the User’s Guide chapter on functions.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#compound-variable-declaration-and-definition", + "href": "reference-manual/types.html#compound-variable-declaration-and-definition", + "title": "Data Types and Declarations", + "section": "", + "text": "Stan allows assignable variables to be declared and defined in a single statement. Assignable variables are\n\nlocal variables, and\nvariables declared in the transformed data, transformed parameters, or generated quantities blocks.\n\nFor example, the statement\nint N = 5;\ndeclares the variable N to be an integer scalar type and at the same time defines it to be the value of the expression 5.\n\n\nThe type of the expression on the right-hand side of the assignment must be assignable to the type of the variable being declared. For example, it is legal to have\nreal sum = 0;\neven though 0 is of type int and sum is of type real, because integer-typed scalar expressions can be assigned to real-valued scalar variables. In all other cases, the type of the expression on the right-hand side of the assignment must be identical to the type of the variable being declared.\nVariables of any type may have values assigned to them. For example,\nmatrix[3, 2] a = b;\ndeclares a \\(3 \\times 2\\) matrix variable a and assigns a copy of the value of b to the variable a. The variable b must be of type matrix for the statement to be well formed. For the code to execute successfully, b must be the same shape as a, but this cannot be validated until run time. Because a copy is assigned, subsequent changes to a do not affect b and subsequent changes to b do not affect a.\n\n\n\nThe right-hand side may be any expression which has a type which is assignable to the variable being declared. For example,\nmatrix[3, 2] a = 0.5 * (b + c);\nassigns the matrix variable a to half of the sum of b and c. The only requirement on b and c is that the expression b + c be of type matrix. For example, b could be of type matrix and c of type real, because adding a matrix to a scalar produces a matrix, and the multiplying by a scalar produces another matrix.\nSimilarly,\ncomplex z = 2 + 3i;\nassigns the the complex number \\(2 + 3i\\) to the complex scalar z. The right-hand side expression can be a call to a user defined function, allowing general algorithms to be applied that might not be otherwise expressible as simple expressions (e.g., iterative or recursive algorithms).\n\n\n\nAny variable that is in scope and any function that is available in the block in which the compound declaration and definition appears may be used in the expression on the right-hand side of the compound declaration and definition statement.", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#declaring-multiple-variables-at-once", + "href": "reference-manual/types.html#declaring-multiple-variables-at-once", + "title": "Data Types and Declarations", + "section": "", + "text": "Stan will interpret multiple comma-separated variable names following a single type as declaring multiple new variables. This is available for all variable declarations in all blocks.\n\n\nThe code:\nreal x, y;\nis equivalent to\nreal x;\nreal y;\nAs a result, all declarations on the same line must be of the same type.\n\n\n\nThe ability to declare multiple variables can be combined with assignments whenever a declare-define is valid, as documented in the section introducing compound declarations and definitions :\nreal x = 3, y = 5.6;\nConstrained data types can also be declared together, so long as the constraint for each variable is the same:\nreal<lower=0> x, y;", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/types.html#footnotes", + "href": "reference-manual/types.html#footnotes", + "title": "Data Types and Declarations", + "section": "Footnotes", + "text": "Footnotes\n\n\nStan compiles integers to int and reals to double types in C++. Precise details of rounding will depend on the compiler and hardware architecture on which the code is run.↩︎", + "crumbs": [ + "Reference Manual", + "Language", + "Data Types and Declarations" + ] + }, + { + "objectID": "reference-manual/variational.html", + "href": "reference-manual/variational.html", + "title": "Variational Inference", + "section": "", + "text": "Stan implements an automatic variational inference algorithm, called Automatic Differentiation Variational Inference (ADVI) Kucukelbir et al. (2017). In this chapter, we describe the specifics of how ADVI maximizes the variational objective.\n\n\nADVI optimizes the ELBO in the real-coordinate space using stochastic gradient ascent. We obtain noisy (yet unbiased) gradients of the variational objective using automatic differentiation and Monte Carlo integration. The algorithm ascends these gradients using an adaptive stepsize sequence. We evaluate the ELBO also using Monte Carlo integration and measure convergence similar to the relative tolerance scheme in Stan’s optimization feature.\n\n\nADVI uses Monte Carlo integration to approximate the variational objective function, the ELBO. The number of draws used to approximate the ELBO is denoted by elbo_samples. We recommend a default value of \\(100\\), as we only evaluate the ELBO every eval_elbo iterations, which also defaults to \\(100\\).\n\n\n\nADVI uses Monte Carlo integration to approximate the gradients of the ELBO. The number of draws used to approximate the gradients is denoted by grad_samples. We recommend a default value of \\(1\\), as this is the most efficient. It also a very noisy estimate of the gradient, but stochastic gradient ascent is capable of following such gradients.\n\n\n\nADVI uses a finite-memory version of adaGrad Duchi, Hazan, and Singer (2011). This has a single parameter that we expose, denoted eta. We now have a warmup adaptation phase that selects a good value for eta. The procedure does a heuristic search over eta values that span 5 orders of magnitude.\n\n\n\nADVI tracks the progression of the ELBO through the stochastic optimization. Specifically, ADVI heuristically determines a rolling window over which it computes the average and the median change of the ELBO. Should either number fall below a threshold, denoted by tol_rel_obj, we consider the algorithm to have converged. The change in ELBO is calculated the same way as in Stan’s optimization module.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Variational Inference" + ] + }, + { + "objectID": "reference-manual/variational.html#stochastic-gradient-ascent", + "href": "reference-manual/variational.html#stochastic-gradient-ascent", + "title": "Variational Inference", + "section": "", + "text": "ADVI optimizes the ELBO in the real-coordinate space using stochastic gradient ascent. We obtain noisy (yet unbiased) gradients of the variational objective using automatic differentiation and Monte Carlo integration. The algorithm ascends these gradients using an adaptive stepsize sequence. We evaluate the ELBO also using Monte Carlo integration and measure convergence similar to the relative tolerance scheme in Stan’s optimization feature.\n\n\nADVI uses Monte Carlo integration to approximate the variational objective function, the ELBO. The number of draws used to approximate the ELBO is denoted by elbo_samples. We recommend a default value of \\(100\\), as we only evaluate the ELBO every eval_elbo iterations, which also defaults to \\(100\\).\n\n\n\nADVI uses Monte Carlo integration to approximate the gradients of the ELBO. The number of draws used to approximate the gradients is denoted by grad_samples. We recommend a default value of \\(1\\), as this is the most efficient. It also a very noisy estimate of the gradient, but stochastic gradient ascent is capable of following such gradients.\n\n\n\nADVI uses a finite-memory version of adaGrad Duchi, Hazan, and Singer (2011). This has a single parameter that we expose, denoted eta. We now have a warmup adaptation phase that selects a good value for eta. The procedure does a heuristic search over eta values that span 5 orders of magnitude.\n\n\n\nADVI tracks the progression of the ELBO through the stochastic optimization. Specifically, ADVI heuristically determines a rolling window over which it computes the average and the median change of the ELBO. Should either number fall below a threshold, denoted by tol_rel_obj, we consider the algorithm to have converged. The change in ELBO is calculated the same way as in Stan’s optimization module.", + "crumbs": [ + "Reference Manual", + "Algorithms", + "Variational Inference" + ] + }, + { + "objectID": "stan-users-guide/algebraic-equations.html", + "href": "stan-users-guide/algebraic-equations.html", + "title": "Solving Algebraic Equations", + "section": "", + "text": "Stan provides a built-in mechanism for specifying systems of algebraic equations. These systems can be solved either with the Newton method, as implemented in the Kinsol package (Hindmarsh et al. 2005), or with the Powell hybrid method (Powell 1970). The function signatures for Stan’s algebraic solvers are fully described in the algebraic solver section of the reference manual.\nSolving any system of algebraic equations can be translated into a root-finding problem, that is, given a function \\(f\\), we wish to find \\(y\\) such that \\(f(y) = 0\\).\n\n\nFor systems of linear algebraic equations, we recommend solving the system using matrix division. The algebraic solver becomes handy when we want to solve nonlinear equations.\nAs an illustrative example, we consider the following nonlinear system of two equations with two unknowns: \\[\\begin{align*}\nz_1 &= y_1 - \\theta_1 \\\\\nz_2 &= y_1 y_2 + \\theta_2\n\\end{align*}\\]\nOur goal is to simultaneously solve all equations for \\(y_1\\) and \\(y_2\\), such that the vector \\(z\\) goes to 0.\n\n\n\nA system of algebraic equations is coded directly in Stan as a function with a strictly specified signature. For example, the nonlinear system given above can be coded using the following function in Stan (see the user-defined functions section for more information on coding user-defined functions).\nvector system(vector y, // unknowns\n vector theta, // parameters\n data array[] real x_r, // data (real)\n array[] int x_i) { // data (integer)\n vector[2] z;\n z[1] = y[1] - theta[1];\n z[2] = y[1] * y[2] - theta[2];\n return z;\n}\nThe function takes the unknowns we wish to solve for in y (a vector), the system parameters in theta (a vector), the real data in x_r (a real array) and the integer data in x_i (an integer array). The system function returns the value of the function (a vector), for which we want to compute the roots. Our example does not use real or integer data. Nevertheless, these unused arguments must be included in the system function with exactly the signature above.\nThe body of the system function here could also be coded using a row vector constructor and transposition,\nreturn [ y[1] - theta[1],\n y[1] * y[2] - theta[2] ]';\nAs systems get more complicated, naming the intermediate expressions goes a long way toward readability.\n\n\nThe function defining the system must have exactly these argument types and return type. This may require passing in zero-length arrays for data or a zero-length vector for parameters if the system does not involve data or parameters.\n\n\n\n\nLet’s suppose \\(\\theta = (3, 6)\\). To call the algebraic solver, we need to provide an initial guess. This varies on a case-by-case basis, but in general a good guess will speed up the solver and, in pathological cases, even determine whether the solver converges or not. If the solver does not converge, the Metropolis proposal gets rejected and a warning message, stating no acceptable solution was found, is issued.\nThe solver has three tuning parameters to determine convergence: the relative tolerance, the function tolerance, and the maximum number of steps. Their behavior is explained in the section about algebraic solvers with control parameters.\nThe following code returns the solution to our nonlinear algebraic system:\ntransformed data {\n vector[2] y_guess = [1, 1]';\n array[0] real x_r;\n array[0] int x_i;\n}\n\ntransformed parameters {\n vector[2] theta = [3, 6]';\n vector[2] y;\n\n y = solve_newton(system, y_guess, theta, x_r, x_i);\n}\nwhich returns \\(y = (3, -2)\\).\n\n\nThe arguments for the real data x_r and the integer data x_i must be expressions that only involve data or transformed data variables. theta, on the other hand, must only involve parameters. Note there are no restrictions on the initial guess, y_guess, which may be a data or a parameter vector.\n\n\n\nThe Jacobian of the solution with respect to the parameters is computed using the implicit function theorem, which imposes certain restrictions. In particular, the Jacobian of the algebraic function \\(f\\) with respect to the unknowns \\(x\\) must be invertible. This requires the Jacobian to be square, meaning \\(f(y)\\) and \\(y\\) have the same length or, in other words the number of equations in the system is the same as the number of unknowns.\n\n\n\nCertain systems may be degenerate, meaning they have multiple solutions. The algebraic solver will not report these cases, as the algorithm stops once it has found an acceptable solution. The initial guess will often determine which solution gets found first. The degeneracy may be broken by putting additional constraints on the solution. For instance, it might make “physical sense” for a solution to be positive or negative.\nOn the other hand, a system may not have a solution (for a given point in the parameter space). In that case, the solver will not converge to a solution. When the solver fails to do so, the current Metropolis proposal gets rejected.\n\n\n\n\nThe call to the algebraic solver shown previously uses the default control settings. The _tol variant of the solver function allows three additional parameters, all of which must be supplied before the variadic arguments.\ny = solve_newton_tol(system, y_guess, scaling_step, f_tol, max_steps,\n theta, x_r, x_i);\nFor the Newton solver the three control arguments are scaling step, function tolerance, and maximum number of steps. For the Powell’s hybrid method the three control arguments are relative tolerance, function tolerance, and maximum number of steps. If a Newton step is smaller than the scaling step tolerance, the code breaks, assuming the solver is no longer making significant progress. If set to 0, this constraint is ignored. For Powell’s hybrid method the relative tolerance is the estimated relative error of the solver and serves to test if a satisfactory solution has been found. After convergence of the either solver, the proposed solution is plugged into the algebraic system and its norm is compared to the function tolerance. If the norm is below the function tolerance, the solution is deemed acceptable. If the solver solver reaches the maximum number of steps, it stops and returns an error message. If one of the criteria is not met, the Metropolis proposal gets rejected with a warning message explaining which criterion was not satisfied.\nThe default values for the control arguments are respectively scaling_step = 1e-3 (\\(10^{-3}\\)), rel_tol = 1e-10 (\\(10^{-10}\\)), f_tol = 1e-6 (\\(10^{-6}\\)), and max_steps = 200 (\\(200\\)).\n\n\nThe relative and function tolerances control the accuracy of the solution generated by the solver. Relative tolerances are relative to the solution value. The function tolerance is the norm of the algebraic function, once we plug in the proposed solution. This norm should go to 0 (equivalently, all elements of the vector function are 0). It helps to think about this geometrically. Ideally the output of the algebraic function is at the origin; the norm measures deviations from this ideal. As the length of the return vector increases, a certain function tolerance becomes an increasingly difficult criterion to meet, given each individual element of the vector contribute to the norm.\nSmaller relative tolerances produce more accurate solutions but require more computational time.\n\n\nThe tolerances should be set low enough that setting them lower does not change the statistical properties of posterior sample generated by the Stan program. The sensitivity can be analysed using importance sampling without need to re-run MCMC with different tolerances as shown by Timonen et al. (2023).\n\n\n\n\nThe maximum number of steps can be used to stop a runaway simulation. This can arise in MCMC when a bad jump is taken, particularly during warmup. If the limit is hit, the current Metropolis proposal gets rejected. Users will see a warning message stating the maximum number of steps has been exceeded.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Solving Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/algebraic-equations.html#example-system-of-nonlinear-algebraic-equations", + "href": "stan-users-guide/algebraic-equations.html#example-system-of-nonlinear-algebraic-equations", + "title": "Solving Algebraic Equations", + "section": "", + "text": "For systems of linear algebraic equations, we recommend solving the system using matrix division. The algebraic solver becomes handy when we want to solve nonlinear equations.\nAs an illustrative example, we consider the following nonlinear system of two equations with two unknowns: \\[\\begin{align*}\nz_1 &= y_1 - \\theta_1 \\\\\nz_2 &= y_1 y_2 + \\theta_2\n\\end{align*}\\]\nOur goal is to simultaneously solve all equations for \\(y_1\\) and \\(y_2\\), such that the vector \\(z\\) goes to 0.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Solving Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/algebraic-equations.html#coding-an-algebraic-system", + "href": "stan-users-guide/algebraic-equations.html#coding-an-algebraic-system", + "title": "Solving Algebraic Equations", + "section": "", + "text": "A system of algebraic equations is coded directly in Stan as a function with a strictly specified signature. For example, the nonlinear system given above can be coded using the following function in Stan (see the user-defined functions section for more information on coding user-defined functions).\nvector system(vector y, // unknowns\n vector theta, // parameters\n data array[] real x_r, // data (real)\n array[] int x_i) { // data (integer)\n vector[2] z;\n z[1] = y[1] - theta[1];\n z[2] = y[1] * y[2] - theta[2];\n return z;\n}\nThe function takes the unknowns we wish to solve for in y (a vector), the system parameters in theta (a vector), the real data in x_r (a real array) and the integer data in x_i (an integer array). The system function returns the value of the function (a vector), for which we want to compute the roots. Our example does not use real or integer data. Nevertheless, these unused arguments must be included in the system function with exactly the signature above.\nThe body of the system function here could also be coded using a row vector constructor and transposition,\nreturn [ y[1] - theta[1],\n y[1] * y[2] - theta[2] ]';\nAs systems get more complicated, naming the intermediate expressions goes a long way toward readability.\n\n\nThe function defining the system must have exactly these argument types and return type. This may require passing in zero-length arrays for data or a zero-length vector for parameters if the system does not involve data or parameters.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Solving Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/algebraic-equations.html#calling-the-algebraic-solver", + "href": "stan-users-guide/algebraic-equations.html#calling-the-algebraic-solver", + "title": "Solving Algebraic Equations", + "section": "", + "text": "Let’s suppose \\(\\theta = (3, 6)\\). To call the algebraic solver, we need to provide an initial guess. This varies on a case-by-case basis, but in general a good guess will speed up the solver and, in pathological cases, even determine whether the solver converges or not. If the solver does not converge, the Metropolis proposal gets rejected and a warning message, stating no acceptable solution was found, is issued.\nThe solver has three tuning parameters to determine convergence: the relative tolerance, the function tolerance, and the maximum number of steps. Their behavior is explained in the section about algebraic solvers with control parameters.\nThe following code returns the solution to our nonlinear algebraic system:\ntransformed data {\n vector[2] y_guess = [1, 1]';\n array[0] real x_r;\n array[0] int x_i;\n}\n\ntransformed parameters {\n vector[2] theta = [3, 6]';\n vector[2] y;\n\n y = solve_newton(system, y_guess, theta, x_r, x_i);\n}\nwhich returns \\(y = (3, -2)\\).\n\n\nThe arguments for the real data x_r and the integer data x_i must be expressions that only involve data or transformed data variables. theta, on the other hand, must only involve parameters. Note there are no restrictions on the initial guess, y_guess, which may be a data or a parameter vector.\n\n\n\nThe Jacobian of the solution with respect to the parameters is computed using the implicit function theorem, which imposes certain restrictions. In particular, the Jacobian of the algebraic function \\(f\\) with respect to the unknowns \\(x\\) must be invertible. This requires the Jacobian to be square, meaning \\(f(y)\\) and \\(y\\) have the same length or, in other words the number of equations in the system is the same as the number of unknowns.\n\n\n\nCertain systems may be degenerate, meaning they have multiple solutions. The algebraic solver will not report these cases, as the algorithm stops once it has found an acceptable solution. The initial guess will often determine which solution gets found first. The degeneracy may be broken by putting additional constraints on the solution. For instance, it might make “physical sense” for a solution to be positive or negative.\nOn the other hand, a system may not have a solution (for a given point in the parameter space). In that case, the solver will not converge to a solution. When the solver fails to do so, the current Metropolis proposal gets rejected.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Solving Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/algebraic-equations.html#algebra-control.section", + "href": "stan-users-guide/algebraic-equations.html#algebra-control.section", + "title": "Solving Algebraic Equations", + "section": "", + "text": "The call to the algebraic solver shown previously uses the default control settings. The _tol variant of the solver function allows three additional parameters, all of which must be supplied before the variadic arguments.\ny = solve_newton_tol(system, y_guess, scaling_step, f_tol, max_steps,\n theta, x_r, x_i);\nFor the Newton solver the three control arguments are scaling step, function tolerance, and maximum number of steps. For the Powell’s hybrid method the three control arguments are relative tolerance, function tolerance, and maximum number of steps. If a Newton step is smaller than the scaling step tolerance, the code breaks, assuming the solver is no longer making significant progress. If set to 0, this constraint is ignored. For Powell’s hybrid method the relative tolerance is the estimated relative error of the solver and serves to test if a satisfactory solution has been found. After convergence of the either solver, the proposed solution is plugged into the algebraic system and its norm is compared to the function tolerance. If the norm is below the function tolerance, the solution is deemed acceptable. If the solver solver reaches the maximum number of steps, it stops and returns an error message. If one of the criteria is not met, the Metropolis proposal gets rejected with a warning message explaining which criterion was not satisfied.\nThe default values for the control arguments are respectively scaling_step = 1e-3 (\\(10^{-3}\\)), rel_tol = 1e-10 (\\(10^{-10}\\)), f_tol = 1e-6 (\\(10^{-6}\\)), and max_steps = 200 (\\(200\\)).\n\n\nThe relative and function tolerances control the accuracy of the solution generated by the solver. Relative tolerances are relative to the solution value. The function tolerance is the norm of the algebraic function, once we plug in the proposed solution. This norm should go to 0 (equivalently, all elements of the vector function are 0). It helps to think about this geometrically. Ideally the output of the algebraic function is at the origin; the norm measures deviations from this ideal. As the length of the return vector increases, a certain function tolerance becomes an increasingly difficult criterion to meet, given each individual element of the vector contribute to the norm.\nSmaller relative tolerances produce more accurate solutions but require more computational time.\n\n\nThe tolerances should be set low enough that setting them lower does not change the statistical properties of posterior sample generated by the Stan program. The sensitivity can be analysed using importance sampling without need to re-run MCMC with different tolerances as shown by Timonen et al. (2023).\n\n\n\n\nThe maximum number of steps can be used to stop a runaway simulation. This can arise in MCMC when a bad jump is taken, particularly during warmup. If the limit is hit, the current Metropolis proposal gets rejected. Users will see a warning message stating the maximum number of steps has been exceeded.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Solving Algebraic Equations" + ] + }, + { + "objectID": "stan-users-guide/clustering.html", + "href": "stan-users-guide/clustering.html", + "title": "Clustering Models", + "section": "", + "text": "Unsupervised methods for organizing data into groups are collectively referred to as clustering. This chapter describes the implementation in Stan of two widely used statistical clustering models, soft \\(K\\)-means and latent Dirichlet allocation (LDA). In addition, this chapter includes naive Bayesian classification, which can be viewed as a form of clustering which may be supervised. These models are typically expressed using discrete parameters for cluster assignments. Nevertheless, they can be implemented in Stan like any other mixture model by marginalizing out the discrete parameters (see the mixture modeling chapter).\n\n\nAs mentioned in the clustering section, clustering models and finite mixture models are really just two sides of the same coin. The “soft” \\(K\\)-means model described in the next section is a normal mixture model (with varying assumptions about covariance in higher dimensions leading to variants of \\(K\\)-means). Latent Dirichlet allocation is a mixed-membership multinomial mixture.\n\n\n\n\\(K\\)-means clustering is a method of clustering data represented as \\(D\\)-dimensional vectors. Specifically, there will be \\(N\\) items to be clustered, each represented as a vector \\(y_n \\in \\mathbb{R}^D\\). In the “soft” version of \\(K\\)-means, the assignments to clusters will be probabilistic.\n\n\n\\(K\\)-means clustering is typically described geometrically in terms of the following algorithm, which assumes the number of clusters \\(K\\) and data vectors \\(y\\) as input.\n\nFor each \\(n\\) in \\(\\{1,\\dotsc,N\\}\\), randomly assign vector \\(y_n\\) to a cluster in \\(\\{1,\\dotsc,K\\}\\);\nRepeat\n\nFor each cluster \\(k\\) in \\(\\{1,\\dotsc,K\\}\\), compute the cluster centroid \\(\\mu_k\\) by averaging the vectors assigned to that cluster;\nFor each \\(n\\) in \\(\\{1,\\dotsc,N\\}\\), reassign \\(y_n\\) to the cluster \\(k\\) for which the (Euclidean) distance from \\(y_n\\) to \\(\\mu_k\\) is smallest;\nIf no vectors changed cluster, return the cluster assignments.\n\n\nThis algorithm is guaranteed to terminate.\n\n\n\nSoft \\(K\\)-means clustering treats the cluster assignments as probability distributions over the clusters. Because of the connection between Euclidean distance and multivariate normal models with a fixed covariance, soft \\(K\\)-means can be expressed (and coded in Stan) as a multivariate normal mixture model.\nIn the full generative model, each data point \\(n\\) in \\(\\{1,\\dotsc,N\\}\\) is assigned a cluster \\(z_n \\in \\{1,\\dotsc,K\\}\\) with symmetric uniform probability, \\[\nz_n \\sim \\textsf{categorical}(1/K),\n\\] where \\(1\\) is the unit vector of \\(K\\) dimensions, so that \\(1/K\\) is the symmetric \\(K\\)-simplex. Thus the model assumes that each data point is drawn from a hard decision about cluster membership. The softness arises only from the uncertainty about which cluster generated a data point.\nThe data points themselves are generated from a multivariate normal distribution whose parameters are determined by the cluster assignment \\(z_n\\), \\[\ny_n \\sim \\textsf{normal}(\\mu_{z[n]},\\Sigma_{z[n]})\n\\]\nThe sample implementation in this section assumes a fixed unit covariance matrix shared by all clusters \\(k\\), \\[\n\\Sigma_k = \\mathrm{diag\\_matrix}({\\bf 1}),\n\\] so that the log multivariate normal can be implemented directly up to a proportion by \\[\n\\mathrm{normal}\\left( y_n \\mid \\mu_k, \\mathrm{diag\\_matrix}({\\bf 1}) \\right)\n\\propto \\exp \\left (- \\frac{1}{2} \\sum_{d=1}^D \\left( \\mu_{k,d} - y_{n,d}\n \\right)^2 \\right).\n\\] The spatial perspective on \\(K\\)-means arises by noting that the inner term is just half the negative Euclidean distance from the cluster mean \\(\\mu_k\\) to the data point \\(y_n\\).\n\n\n\nConsider the following Stan program for implementing \\(K\\)-means clustering.\ndata {\n int<lower=0> N; // number of data points\n int<lower=1> D; // number of dimensions\n int<lower=1> K; // number of clusters\n array[N] vector[D] y; // observations\n}\ntransformed data {\n real<upper=0> neg_log_K;\n neg_log_K = -log(K);\n}\nparameters {\n array[K] vector[D] mu; // cluster means\n}\ntransformed parameters {\n array[N, K] real<upper=0> soft_z; // log unnormalized clusters\n for (n in 1:N) {\n for (k in 1:K) {\n soft_z[n, k] = neg_log_K\n - 0.5 * dot_self(mu[k] - y[n]);\n }\n }\n}\nmodel {\n // prior\n for (k in 1:K) {\n mu[k] ~ std_normal();\n }\n\n // likelihood\n for (n in 1:N) {\n target += log_sum_exp(soft_z[n]);\n }\n}\nThere is an independent standard normal prior on the centroid parameters; this prior could be swapped with other priors, or even a hierarchical model to fit an overall problem scale and location.\nThe only parameter is mu, where mu[k] is the centroid for cluster \\(k\\). The transformed parameters soft_z[n] contain the log of the unnormalized cluster assignment probabilities. The vector soft_z[n] can be converted back to a normalized simplex using the softmax function (see the functions reference manual), either externally or within the model’s generated quantities block.\n\n\n\nThe multivariate normal distribution with unit covariance matrix produces a log probability density proportional to Euclidean distance (i.e., \\(L_2\\) distance). Other distributions relate to other geometries. For instance, replacing the normal distribution with the double exponential (Laplace) distribution produces a clustering model based on \\(L_1\\) distance (i.e., Manhattan or taxicab distance).\nWithin the multivariate normal version of \\(K\\)-means, replacing the unit covariance matrix with a shared covariance matrix amounts to working with distances defined in a space transformed by the inverse covariance matrix.\nAlthough there is no global spatial analog, it is common to see soft \\(K\\)-means specified with a per-cluster covariance matrix. In this situation, a hierarchical prior may be used for the covariance matrices.\n\n\n\n\nTwo problems make it pretty much impossible to perform full Bayesian inference for clustering models, the lack of parameter identifiability and the extreme multimodality of the posteriors. There is additional discussion related to the non-identifiability due to label switching in the label switching section.\n\n\nCluster assignments are not identified—permuting the cluster mean vectors mu leads to a model with identical likelihoods. For instance, permuting the first two indexes in mu and the first two indexes in each soft_z[n] leads to an identical likelihood (and prior).\nThe lack of identifiability means that the cluster parameters cannot be compared across multiple Markov chains. In fact, the only parameter in soft \\(K\\)-means is not identified, leading to problems in monitoring convergence. Clusters can even fail to be identified within a single chain, with indices swapping if the chain is long enough or the data are not cleanly separated.\n\n\n\nThe other problem with clustering models is that their posteriors are highly multimodal. One form of multimodality is the non-identifiability leading to index swapping. But even without the index problems the posteriors are highly multimodal.\nBayesian inference fails in cases of high multimodality because there is no way to visit all of the modes in the posterior in appropriate proportions and thus no way to evaluate integrals involved in posterior predictive inference.\nIn light of these two problems, the advice often given in fitting clustering models is to try many different initializations and select the sample with the highest overall probability. It is also popular to use optimization-based point estimators such as expectation maximization or variational Bayes, which can be much more efficient than sampling-based approaches.\n\n\n\n\nNaive Bayes is a kind of mixture model that can be used for classification or for clustering (or a mix of both), depending on which labels for items are observed.1\nMultinomial mixture models are referred to as “naive Bayes” because they are often applied to classification problems where the multinomial independence assumptions are clearly false.\nNaive Bayes classification and clustering can be applied to any data with multinomial structure. A typical example of this is natural language text classification and clustering, which is used an example in what follows.\nThe observed data consists of a sequence of \\(M\\) documents made up of bags of words drawn from a vocabulary of \\(V\\) distinct words. A document \\(m\\) has \\(N_m\\) words, which are indexed as \\(w_{m,1}, \\dotsc,\nw_{m,N[m]} \\in \\{1,\\dotsc,V\\}\\). Despite the ordered indexing of words in a document, this order is not part of the model, which is clearly defective for natural human language data. A number of topics (or categories) \\(K\\) is fixed.\nThe multinomial mixture model generates a single category \\(z_m \\in\n\\{1,\\dotsc,K\\}\\) for each document \\(m \\in \\{1,\\dotsc,M\\}\\) according to a categorical distribution, \\[\nz_m \\sim \\textsf{categorical}(\\theta).\n\\] The \\(K\\)-simplex parameter \\(\\theta\\) represents the prevalence of each category in the data.\nNext, the words in each document are generated conditionally independently of each other and the words in other documents based on the category of the document, with word \\(n\\) of document \\(m\\) being generated as \\[\nw_{m,n} \\sim \\textsf{categorical}(\\phi_{z[m]}).\n\\] The parameter \\(\\phi_{z[m]}\\) is a \\(V\\)-simplex representing the probability of each word in the vocabulary in documents of category \\(z_m\\).\nThe parameters \\(\\theta\\) and \\(\\phi\\) are typically given symmetric Dirichlet priors. The prevalence \\(\\theta\\) is sometimes fixed to produce equal probabilities for each category \\(k \\in \\{1,\\dotsc,K\\}\\).\n\n\nThe specification for naive Bayes in the previous sections have used a ragged array notation for the words \\(w\\). Because Stan does not support ragged arrays, the models are coded using an alternative strategy that provides an index for each word in a global list of words. The data is organized as follows, with the word arrays laid out in a column and each assigned to its document in a second column.\n\\[\n\\begin{array}{lll}\n\\hline\n\\mathrm{n} \\qquad\\qquad\\qquad\\qquad & \\mathrm{w[n]} \\qquad & \\mathrm{doc[n]} \\\\\n\\hline\n1 & w_{1,1} & 1 \\\\\n2 & w_{1,2} & 1 \\\\\n\\vdots & \\vdots & \\vdots \\\\\nN_1 & w_{1,N[1]} & 1 \\\\\nN_1 + 1 & w_{2,1} & 2 \\\\\nN_1 + 2 & w_{2,2} & 2 \\\\\n\\vdots & \\vdots & \\vdots \\\\\nN_1 + N_2 & w_{2,N[2]} & 2 \\\\\nN_1 + N_2 + 1 & w_{3,1} & 3 \\\\\n\\vdots & \\vdots & \\vdots \\\\\nN = \\sum_{m=1}^M N_m & w_{M,N[M]} & M \\\\\n\\hline\n\\end{array}\n\\]\nThe relevant variables for the program are N, the total number of words in all the documents, the word array w, and the document identity array doc.\n\n\n\nA naive Bayes model for estimating the simplex parameters given training data with documents of known categories can be coded in Stan as follows\ndata {\n // training data\n int<lower=1> K; // num topics\n int<lower=1> V; // num words\n int<lower=0> M; // num docs\n int<lower=0> N; // total word instances\n array[M] int<lower=1, upper=K> z; // topic for doc m\n array[N] int<lower=1, upper=V> w; // word n\n array[N] int<lower=1, upper=M> doc; // doc ID for word n\n // hyperparameters\n vector<lower=0>[K] alpha; // topic prior\n vector<lower=0>[V] beta; // word prior\n}\nparameters {\n simplex[K] theta; // topic prevalence\n array[K] simplex[V] phi; // word dist for topic k\n}\nmodel {\n theta ~ dirichlet(alpha);\n for (k in 1:K) {\n phi[k] ~ dirichlet(beta);\n }\n for (m in 1:M) {\n z[m] ~ categorical(theta);\n }\n for (n in 1:N) {\n w[n] ~ categorical(phi[z[doc[n]]]);\n }\n}\nThe topic identifiers \\(z_m\\) are declared as data and the latent category assignments are included as part of the likelihood function.\n\n\n\nNaive Bayes models can be used in an unsupervised fashion to cluster multinomial-structured data into a fixed number \\(K\\) of categories. The data declaration includes the same variables as the model in the previous section excluding the topic labels z. Because z is discrete, it needs to be summed out of the model calculation. This is done for naive Bayes as for other mixture models. The parameters are the same up to the priors, but the likelihood is now computed as the marginal document probability\n\\[\\begin{align*}\n\\log\\, &p(w_{m,1},\\dotsc,w_{m,N_m} \\mid \\theta,\\phi) \\\\\n&= \\log \\sum_{k=1}^K\n \\left( \\textsf{categorical}(k \\mid \\theta)\n \\times \\prod_{n=1}^{N_m} \\textsf{categorical}(w_{m,n} \\mid \\phi_k)\n \\right) \\\\\n&= \\log \\sum_{k=1}^K \\exp \\left(\n \\log \\textsf{categorical}(k \\mid \\theta)\n + \\sum_{n=1}^{N_m} \\log \\textsf{categorical}(w_{m,n} \\mid \\phi_k)\n \\right).\n\\end{align*}\\]\nThe last step shows how the log_sum_exp function can be used to stabilize the numerical calculation and return a result on the log scale.\nmodel {\n array[M, K] real gamma;\n theta ~ dirichlet(alpha);\n for (k in 1:K) {\n phi[k] ~ dirichlet(beta);\n }\n for (m in 1:M) {\n for (k in 1:K) {\n gamma[m, k] = categorical_lpmf(k | theta);\n }\n }\n for (n in 1:N) {\n for (k in 1:K) {\n gamma[doc[n], k] = gamma[doc[n], k]\n + categorical_lpmf(w[n] | phi[k]);\n }\n }\n for (m in 1:M) {\n target += log_sum_exp(gamma[m]);\n }\n}\nThe local variable gamma[m, k] represents the value \\[\n\\gamma_{m,k} = \\log \\textsf{categorical}(k \\mid \\theta)\n+ \\sum_{n=1}^{N_m} \\log \\textsf{categorical}(w_{m,n} \\mid \\phi_k).\n\\]\nGiven \\(\\gamma\\), the posterior probability that document \\(m\\) is assigned category \\(k\\) is \\[\n\\Pr[z_m = k \\mid w,\\alpha,\\beta]\n=\n\\exp \\left(\n\\gamma_{m,k}\n- \\log \\sum_{k=1}^K \\exp \\left( \\gamma_{m,k} \\right)\n\\right).\n\\]\nIf the variable gamma were declared and defined in the transformed parameter block, its sampled values would be saved by Stan. The normalized posterior probabilities could also be defined as generated quantities.\n\n\n\nFull Bayesian posterior predictive inference for the naive Bayes model can be implemented in Stan by combining the models for labeled and unlabeled data. The estimands include both the model parameters and the posterior distribution over categories for the unlabeled data. The model is essentially a missing data model assuming the unknown category labels are missing completely at random; see Gelman et al. (2013) and Gelman and Hill (2007) for more information on missing data imputation. The model is also an instance of semisupervised learning because the unlabeled data contributes to the parameter estimations.\nTo specify a Stan model for performing full Bayesian inference, the model for labeled data is combined with the model for unlabeled data. A second document collection is declared as data, but without the category labels, leading to new variables M2 N2, w2, and doc2. The number of categories and number of words, as well as the hyperparameters are shared and only declared once. Similarly, there is only one set of parameters. Then the model contains a single set of statements for the prior, a set of statements for the labeled data, and a set of statements for the unlabeled data.\n\n\n\nAn alternative to full Bayesian inference involves estimating a model using labeled data, then applying it to unlabeled data without updating the parameter estimates based on the unlabeled data. This behavior can be implemented by moving the definition of gamma for the unlabeled documents to the generated quantities block. Because the variables no longer contribute to the log probability, they no longer jointly contribute to the estimation of the model parameters.\n\n\n\n\nLatent Dirichlet allocation (LDA) is a mixed-membership multinomial clustering model (Blei, Ng, and Jordan 2003) that generalizes naive Bayes. Using the topic and document terminology common in discussions of LDA, each document is modeled as having a mixture of topics, with each word drawn from a topic based on the mixing proportions.\n\n\nThe basic model assumes each document is generated independently based on fixed hyperparameters. For document \\(m\\), the first step is to draw a topic distribution simplex \\(\\theta_m\\) over the \\(K\\) topics, \\[\n\\theta_m \\sim \\textsf{Dirichlet}(\\alpha).\n\\]\nThe prior hyperparameter \\(\\alpha\\) is fixed to a \\(K\\)-vector of positive values. Each word in the document is generated independently conditional on the distribution \\(\\theta_m\\). First, a topic \\(z_{m,n} \\in \\{1,\\dotsc,K\\}\\) is drawn for the word based on the document-specific topic-distribution, \\[\nz_{m,n} \\sim \\textsf{categorical}(\\theta_m).\n\\]\nFinally, the word \\(w_{m,n}\\) is drawn according to the word distribution for topic \\(z_{m,n}\\), \\[\nw_{m,n} \\sim \\textsf{categorical}(\\phi_{z[m,n]}).\n\\] The distributions \\(\\phi_k\\) over words for topic \\(k\\) are also given a Dirichlet prior, \\[\n\\phi_k \\sim \\textsf{Dirichlet}(\\beta)\n\\]\nwhere \\(\\beta\\) is a fixed \\(V\\)-vector of positive values.\n\n\n\nAlthough Stan does not (yet) support discrete sampling, it is possible to calculate the marginal distribution over the continuous parameters by summing out the discrete parameters as in other mixture models. The marginal posterior of the topic and word variables is \\[\\begin{align*}\np(\\theta,\\phi \\mid w,\\alpha,\\beta)\n&\\propto p(\\theta \\mid \\alpha) \\, p(\\phi \\mid \\beta) \\, p(w \\mid \\theta,\\phi) \\\\\n&= \\prod_{m=1}^M p(\\theta_m \\mid \\alpha)\n \\times \\prod_{k=1}^K p(\\phi_k \\mid \\beta)\n \\times \\prod_{m=1}^M \\prod_{n=1}^{M[n]} p(w_{m,n} \\mid \\theta_m,\\phi).\n\\end{align*}\\]\nThe inner word-probability term is defined by summing out the topic assignments, \\[\\begin{align*}\np(w_{m,n} \\mid \\theta_m,\\phi)\n&= \\sum_{z=1}^K p(z,w_{m,n} \\mid \\theta_m,\\phi) \\\\\n&= \\sum_{z=1}^K p(z \\mid \\theta_m) \\, p(w_{m,n} \\mid \\phi_z).\n\\end{align*}\\]\nPlugging the distributions in and converting to the log scale provides a formula that can be implemented directly in Stan, \\[\\begin{align*}\n\\log\\, &p(\\theta,\\phi \\mid w,\\alpha,\\beta) \\\\\n&= \\sum_{m=1}^M \\log \\textsf{Dirichlet}(\\theta_m \\mid \\alpha)\n + \\sum_{k=1}^K \\log \\textsf{Dirichlet}(\\phi_k \\mid \\beta) \\\\\n&\\qquad + \\sum_{m=1}^M \\sum_{n=1}^{N[m]} \\log \\left(\n \\sum_{z=1}^K \\textsf{categorical}(z \\mid \\theta_m)\n \\times \\textsf{categorical}(w_{m,n} \\mid \\phi_z)\n \\right)\n\\end{align*}\\]\n\n\n\nApplying the marginal derived in the last section to the data structure described in this section leads to the following Stan program for LDA.\ndata {\n int<lower=2> K; // num topics\n int<lower=2> V; // num words\n int<lower=1> M; // num docs\n int<lower=1> N; // total word instances\n array[N] int<lower=1, upper=V> w; // word n\n array[N] int<lower=1, upper=M> doc; // doc ID for word n\n vector<lower=0>[K] alpha; // topic prior\n vector<lower=0>[V] beta; // word prior\n}\nparameters {\n array[M] simplex[K] theta; // topic dist for doc m\n array[K] simplex[V] phi; // word dist for topic k\n}\nmodel {\n for (m in 1:M) {\n theta[m] ~ dirichlet(alpha); // prior\n }\n for (k in 1:K) {\n phi[k] ~ dirichlet(beta); // prior\n }\n for (n in 1:N) {\n array[K] real gamma;\n for (k in 1:K) {\n gamma[k] = log(theta[doc[n], k]) + log(phi[k, w[n]]);\n }\n target += log_sum_exp(gamma); // likelihood;\n }\n}\nAs in the other mixture models, the log-sum-of-exponents function is used to stabilize the numerical arithmetic.\n\n\n\nTo account for correlations in the distribution of topics for documents, Blei and Lafferty (2007) introduced a variant of LDA in which the Dirichlet prior on the per-document topic distribution is replaced with a multivariate logistic normal distribution.\nThe authors treat the prior as a fixed hyperparameter. They use an \\(L_1\\)-regularized estimate of covariance, which is equivalent to the maximum a posteriori estimate given a double-exponential prior. Here the mean and covariance of the multivariate logistic normal are specified as data.\n\n\nThe Stan model in the previous section can be modified to implement the correlated topic model by replacing the Dirichlet topic prior alpha in the data declaration with the mean and covariance of the multivariate logistic normal prior.\ndata {\n // ... data as before without alpha ...\n vector[K] mu; // topic mean\n cov_matrix[K] Sigma; // topic covariance\n}\nRather than drawing the simplex parameter theta from a Dirichlet, a parameter eta is drawn from a multivariate normal distribution and then transformed using softmax into a simplex.\nparameters {\n array[K] simplex[V] phi; // word dist for topic k\n array[M] vector[K] eta; // topic dist for doc m\n}\ntransformed parameters {\n array[M] simplex[K] theta;\n for (m in 1:M) {\n theta[m] = softmax(eta[m]);\n }\n}\nmodel {\n for (m in 1:M) {\n eta[m] ~ multi_normal(mu, Sigma);\n }\n // ... model as before w/o prior for theta ...\n}\n\n\n\nBy adding a prior for the mean and covariance, Stan supports full Bayesian inference for the correlated topic model. This requires moving the declarations of topic mean mu and covariance Sigma from the data block to the parameters block and providing them with priors in the model. A relatively efficient and interpretable prior for the covariance matrix Sigma may be encoded as follows.\n// ... data block as before, but without alpha ...\nparameters {\n vector[K] mu; // topic mean\n corr_matrix[K] Omega; // correlation matrix\n vector<lower=0>[K] sigma; // scales\n array[M] vector[K] eta; // logit topic dist for doc m\n array[K] simplex[V] phi; // word dist for topic k\n}\ntransformed parameters {\n // ... eta as above ...\n cov_matrix[K] Sigma; // covariance matrix\n for (m in 1:K) {\n Sigma[m, m] = sigma[m] * sigma[m] * Omega[m, m];\n }\n for (m in 1:(K-1)) {\n for (n in (m+1):K) {\n Sigma[m, n] = sigma[m] * sigma[n] * Omega[m, n];\n Sigma[n, m] = Sigma[m, n];\n }\n }\n}\nmodel {\n mu ~ normal(0, 5); // vectorized, diffuse\n Omega ~ lkj_corr(2.0); // regularize to unit correlation\n sigma ~ cauchy(0, 5); // half-Cauchy due to constraint\n // ... words sampled as above ...\n}\nThe \\(\\textsf{LKJCorr}\\) distribution with shape \\(\\alpha > 0\\) has support on correlation matrices (i.e., symmetric positive definite with unit diagonal). Its density is defined by \\[\n\\mathsf{LkjCorr}(\\Omega\\mid\\alpha) \\propto \\mathrm{det}(\\Omega)^{\\alpha - 1}\n\\] With a scale of \\(\\alpha = 2\\), the weakly informative prior favors a unit correlation matrix. Thus the compound effect of this prior on the covariance matrix \\(\\Sigma\\) for the multivariate logistic normal is a slight concentration around diagonal covariance matrices with scales determined by the prior on sigma.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/clustering.html#relation-to-finite-mixture-models", + "href": "stan-users-guide/clustering.html#relation-to-finite-mixture-models", + "title": "Clustering Models", + "section": "", + "text": "As mentioned in the clustering section, clustering models and finite mixture models are really just two sides of the same coin. The “soft” \\(K\\)-means model described in the next section is a normal mixture model (with varying assumptions about covariance in higher dimensions leading to variants of \\(K\\)-means). Latent Dirichlet allocation is a mixed-membership multinomial mixture.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/clustering.html#soft-k-means", + "href": "stan-users-guide/clustering.html#soft-k-means", + "title": "Clustering Models", + "section": "", + "text": "\\(K\\)-means clustering is a method of clustering data represented as \\(D\\)-dimensional vectors. Specifically, there will be \\(N\\) items to be clustered, each represented as a vector \\(y_n \\in \\mathbb{R}^D\\). In the “soft” version of \\(K\\)-means, the assignments to clusters will be probabilistic.\n\n\n\\(K\\)-means clustering is typically described geometrically in terms of the following algorithm, which assumes the number of clusters \\(K\\) and data vectors \\(y\\) as input.\n\nFor each \\(n\\) in \\(\\{1,\\dotsc,N\\}\\), randomly assign vector \\(y_n\\) to a cluster in \\(\\{1,\\dotsc,K\\}\\);\nRepeat\n\nFor each cluster \\(k\\) in \\(\\{1,\\dotsc,K\\}\\), compute the cluster centroid \\(\\mu_k\\) by averaging the vectors assigned to that cluster;\nFor each \\(n\\) in \\(\\{1,\\dotsc,N\\}\\), reassign \\(y_n\\) to the cluster \\(k\\) for which the (Euclidean) distance from \\(y_n\\) to \\(\\mu_k\\) is smallest;\nIf no vectors changed cluster, return the cluster assignments.\n\n\nThis algorithm is guaranteed to terminate.\n\n\n\nSoft \\(K\\)-means clustering treats the cluster assignments as probability distributions over the clusters. Because of the connection between Euclidean distance and multivariate normal models with a fixed covariance, soft \\(K\\)-means can be expressed (and coded in Stan) as a multivariate normal mixture model.\nIn the full generative model, each data point \\(n\\) in \\(\\{1,\\dotsc,N\\}\\) is assigned a cluster \\(z_n \\in \\{1,\\dotsc,K\\}\\) with symmetric uniform probability, \\[\nz_n \\sim \\textsf{categorical}(1/K),\n\\] where \\(1\\) is the unit vector of \\(K\\) dimensions, so that \\(1/K\\) is the symmetric \\(K\\)-simplex. Thus the model assumes that each data point is drawn from a hard decision about cluster membership. The softness arises only from the uncertainty about which cluster generated a data point.\nThe data points themselves are generated from a multivariate normal distribution whose parameters are determined by the cluster assignment \\(z_n\\), \\[\ny_n \\sim \\textsf{normal}(\\mu_{z[n]},\\Sigma_{z[n]})\n\\]\nThe sample implementation in this section assumes a fixed unit covariance matrix shared by all clusters \\(k\\), \\[\n\\Sigma_k = \\mathrm{diag\\_matrix}({\\bf 1}),\n\\] so that the log multivariate normal can be implemented directly up to a proportion by \\[\n\\mathrm{normal}\\left( y_n \\mid \\mu_k, \\mathrm{diag\\_matrix}({\\bf 1}) \\right)\n\\propto \\exp \\left (- \\frac{1}{2} \\sum_{d=1}^D \\left( \\mu_{k,d} - y_{n,d}\n \\right)^2 \\right).\n\\] The spatial perspective on \\(K\\)-means arises by noting that the inner term is just half the negative Euclidean distance from the cluster mean \\(\\mu_k\\) to the data point \\(y_n\\).\n\n\n\nConsider the following Stan program for implementing \\(K\\)-means clustering.\ndata {\n int<lower=0> N; // number of data points\n int<lower=1> D; // number of dimensions\n int<lower=1> K; // number of clusters\n array[N] vector[D] y; // observations\n}\ntransformed data {\n real<upper=0> neg_log_K;\n neg_log_K = -log(K);\n}\nparameters {\n array[K] vector[D] mu; // cluster means\n}\ntransformed parameters {\n array[N, K] real<upper=0> soft_z; // log unnormalized clusters\n for (n in 1:N) {\n for (k in 1:K) {\n soft_z[n, k] = neg_log_K\n - 0.5 * dot_self(mu[k] - y[n]);\n }\n }\n}\nmodel {\n // prior\n for (k in 1:K) {\n mu[k] ~ std_normal();\n }\n\n // likelihood\n for (n in 1:N) {\n target += log_sum_exp(soft_z[n]);\n }\n}\nThere is an independent standard normal prior on the centroid parameters; this prior could be swapped with other priors, or even a hierarchical model to fit an overall problem scale and location.\nThe only parameter is mu, where mu[k] is the centroid for cluster \\(k\\). The transformed parameters soft_z[n] contain the log of the unnormalized cluster assignment probabilities. The vector soft_z[n] can be converted back to a normalized simplex using the softmax function (see the functions reference manual), either externally or within the model’s generated quantities block.\n\n\n\nThe multivariate normal distribution with unit covariance matrix produces a log probability density proportional to Euclidean distance (i.e., \\(L_2\\) distance). Other distributions relate to other geometries. For instance, replacing the normal distribution with the double exponential (Laplace) distribution produces a clustering model based on \\(L_1\\) distance (i.e., Manhattan or taxicab distance).\nWithin the multivariate normal version of \\(K\\)-means, replacing the unit covariance matrix with a shared covariance matrix amounts to working with distances defined in a space transformed by the inverse covariance matrix.\nAlthough there is no global spatial analog, it is common to see soft \\(K\\)-means specified with a per-cluster covariance matrix. In this situation, a hierarchical prior may be used for the covariance matrices.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/clustering.html#the-difficulty-of-bayesian-inference-for-clustering", + "href": "stan-users-guide/clustering.html#the-difficulty-of-bayesian-inference-for-clustering", + "title": "Clustering Models", + "section": "", + "text": "Two problems make it pretty much impossible to perform full Bayesian inference for clustering models, the lack of parameter identifiability and the extreme multimodality of the posteriors. There is additional discussion related to the non-identifiability due to label switching in the label switching section.\n\n\nCluster assignments are not identified—permuting the cluster mean vectors mu leads to a model with identical likelihoods. For instance, permuting the first two indexes in mu and the first two indexes in each soft_z[n] leads to an identical likelihood (and prior).\nThe lack of identifiability means that the cluster parameters cannot be compared across multiple Markov chains. In fact, the only parameter in soft \\(K\\)-means is not identified, leading to problems in monitoring convergence. Clusters can even fail to be identified within a single chain, with indices swapping if the chain is long enough or the data are not cleanly separated.\n\n\n\nThe other problem with clustering models is that their posteriors are highly multimodal. One form of multimodality is the non-identifiability leading to index swapping. But even without the index problems the posteriors are highly multimodal.\nBayesian inference fails in cases of high multimodality because there is no way to visit all of the modes in the posterior in appropriate proportions and thus no way to evaluate integrals involved in posterior predictive inference.\nIn light of these two problems, the advice often given in fitting clustering models is to try many different initializations and select the sample with the highest overall probability. It is also popular to use optimization-based point estimators such as expectation maximization or variational Bayes, which can be much more efficient than sampling-based approaches.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/clustering.html#naive-bayes-classification-and-clustering", + "href": "stan-users-guide/clustering.html#naive-bayes-classification-and-clustering", + "title": "Clustering Models", + "section": "", + "text": "Naive Bayes is a kind of mixture model that can be used for classification or for clustering (or a mix of both), depending on which labels for items are observed.1\nMultinomial mixture models are referred to as “naive Bayes” because they are often applied to classification problems where the multinomial independence assumptions are clearly false.\nNaive Bayes classification and clustering can be applied to any data with multinomial structure. A typical example of this is natural language text classification and clustering, which is used an example in what follows.\nThe observed data consists of a sequence of \\(M\\) documents made up of bags of words drawn from a vocabulary of \\(V\\) distinct words. A document \\(m\\) has \\(N_m\\) words, which are indexed as \\(w_{m,1}, \\dotsc,\nw_{m,N[m]} \\in \\{1,\\dotsc,V\\}\\). Despite the ordered indexing of words in a document, this order is not part of the model, which is clearly defective for natural human language data. A number of topics (or categories) \\(K\\) is fixed.\nThe multinomial mixture model generates a single category \\(z_m \\in\n\\{1,\\dotsc,K\\}\\) for each document \\(m \\in \\{1,\\dotsc,M\\}\\) according to a categorical distribution, \\[\nz_m \\sim \\textsf{categorical}(\\theta).\n\\] The \\(K\\)-simplex parameter \\(\\theta\\) represents the prevalence of each category in the data.\nNext, the words in each document are generated conditionally independently of each other and the words in other documents based on the category of the document, with word \\(n\\) of document \\(m\\) being generated as \\[\nw_{m,n} \\sim \\textsf{categorical}(\\phi_{z[m]}).\n\\] The parameter \\(\\phi_{z[m]}\\) is a \\(V\\)-simplex representing the probability of each word in the vocabulary in documents of category \\(z_m\\).\nThe parameters \\(\\theta\\) and \\(\\phi\\) are typically given symmetric Dirichlet priors. The prevalence \\(\\theta\\) is sometimes fixed to produce equal probabilities for each category \\(k \\in \\{1,\\dotsc,K\\}\\).\n\n\nThe specification for naive Bayes in the previous sections have used a ragged array notation for the words \\(w\\). Because Stan does not support ragged arrays, the models are coded using an alternative strategy that provides an index for each word in a global list of words. The data is organized as follows, with the word arrays laid out in a column and each assigned to its document in a second column.\n\\[\n\\begin{array}{lll}\n\\hline\n\\mathrm{n} \\qquad\\qquad\\qquad\\qquad & \\mathrm{w[n]} \\qquad & \\mathrm{doc[n]} \\\\\n\\hline\n1 & w_{1,1} & 1 \\\\\n2 & w_{1,2} & 1 \\\\\n\\vdots & \\vdots & \\vdots \\\\\nN_1 & w_{1,N[1]} & 1 \\\\\nN_1 + 1 & w_{2,1} & 2 \\\\\nN_1 + 2 & w_{2,2} & 2 \\\\\n\\vdots & \\vdots & \\vdots \\\\\nN_1 + N_2 & w_{2,N[2]} & 2 \\\\\nN_1 + N_2 + 1 & w_{3,1} & 3 \\\\\n\\vdots & \\vdots & \\vdots \\\\\nN = \\sum_{m=1}^M N_m & w_{M,N[M]} & M \\\\\n\\hline\n\\end{array}\n\\]\nThe relevant variables for the program are N, the total number of words in all the documents, the word array w, and the document identity array doc.\n\n\n\nA naive Bayes model for estimating the simplex parameters given training data with documents of known categories can be coded in Stan as follows\ndata {\n // training data\n int<lower=1> K; // num topics\n int<lower=1> V; // num words\n int<lower=0> M; // num docs\n int<lower=0> N; // total word instances\n array[M] int<lower=1, upper=K> z; // topic for doc m\n array[N] int<lower=1, upper=V> w; // word n\n array[N] int<lower=1, upper=M> doc; // doc ID for word n\n // hyperparameters\n vector<lower=0>[K] alpha; // topic prior\n vector<lower=0>[V] beta; // word prior\n}\nparameters {\n simplex[K] theta; // topic prevalence\n array[K] simplex[V] phi; // word dist for topic k\n}\nmodel {\n theta ~ dirichlet(alpha);\n for (k in 1:K) {\n phi[k] ~ dirichlet(beta);\n }\n for (m in 1:M) {\n z[m] ~ categorical(theta);\n }\n for (n in 1:N) {\n w[n] ~ categorical(phi[z[doc[n]]]);\n }\n}\nThe topic identifiers \\(z_m\\) are declared as data and the latent category assignments are included as part of the likelihood function.\n\n\n\nNaive Bayes models can be used in an unsupervised fashion to cluster multinomial-structured data into a fixed number \\(K\\) of categories. The data declaration includes the same variables as the model in the previous section excluding the topic labels z. Because z is discrete, it needs to be summed out of the model calculation. This is done for naive Bayes as for other mixture models. The parameters are the same up to the priors, but the likelihood is now computed as the marginal document probability\n\\[\\begin{align*}\n\\log\\, &p(w_{m,1},\\dotsc,w_{m,N_m} \\mid \\theta,\\phi) \\\\\n&= \\log \\sum_{k=1}^K\n \\left( \\textsf{categorical}(k \\mid \\theta)\n \\times \\prod_{n=1}^{N_m} \\textsf{categorical}(w_{m,n} \\mid \\phi_k)\n \\right) \\\\\n&= \\log \\sum_{k=1}^K \\exp \\left(\n \\log \\textsf{categorical}(k \\mid \\theta)\n + \\sum_{n=1}^{N_m} \\log \\textsf{categorical}(w_{m,n} \\mid \\phi_k)\n \\right).\n\\end{align*}\\]\nThe last step shows how the log_sum_exp function can be used to stabilize the numerical calculation and return a result on the log scale.\nmodel {\n array[M, K] real gamma;\n theta ~ dirichlet(alpha);\n for (k in 1:K) {\n phi[k] ~ dirichlet(beta);\n }\n for (m in 1:M) {\n for (k in 1:K) {\n gamma[m, k] = categorical_lpmf(k | theta);\n }\n }\n for (n in 1:N) {\n for (k in 1:K) {\n gamma[doc[n], k] = gamma[doc[n], k]\n + categorical_lpmf(w[n] | phi[k]);\n }\n }\n for (m in 1:M) {\n target += log_sum_exp(gamma[m]);\n }\n}\nThe local variable gamma[m, k] represents the value \\[\n\\gamma_{m,k} = \\log \\textsf{categorical}(k \\mid \\theta)\n+ \\sum_{n=1}^{N_m} \\log \\textsf{categorical}(w_{m,n} \\mid \\phi_k).\n\\]\nGiven \\(\\gamma\\), the posterior probability that document \\(m\\) is assigned category \\(k\\) is \\[\n\\Pr[z_m = k \\mid w,\\alpha,\\beta]\n=\n\\exp \\left(\n\\gamma_{m,k}\n- \\log \\sum_{k=1}^K \\exp \\left( \\gamma_{m,k} \\right)\n\\right).\n\\]\nIf the variable gamma were declared and defined in the transformed parameter block, its sampled values would be saved by Stan. The normalized posterior probabilities could also be defined as generated quantities.\n\n\n\nFull Bayesian posterior predictive inference for the naive Bayes model can be implemented in Stan by combining the models for labeled and unlabeled data. The estimands include both the model parameters and the posterior distribution over categories for the unlabeled data. The model is essentially a missing data model assuming the unknown category labels are missing completely at random; see Gelman et al. (2013) and Gelman and Hill (2007) for more information on missing data imputation. The model is also an instance of semisupervised learning because the unlabeled data contributes to the parameter estimations.\nTo specify a Stan model for performing full Bayesian inference, the model for labeled data is combined with the model for unlabeled data. A second document collection is declared as data, but without the category labels, leading to new variables M2 N2, w2, and doc2. The number of categories and number of words, as well as the hyperparameters are shared and only declared once. Similarly, there is only one set of parameters. Then the model contains a single set of statements for the prior, a set of statements for the labeled data, and a set of statements for the unlabeled data.\n\n\n\nAn alternative to full Bayesian inference involves estimating a model using labeled data, then applying it to unlabeled data without updating the parameter estimates based on the unlabeled data. This behavior can be implemented by moving the definition of gamma for the unlabeled documents to the generated quantities block. Because the variables no longer contribute to the log probability, they no longer jointly contribute to the estimation of the model parameters.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/clustering.html#latent-dirichlet-allocation", + "href": "stan-users-guide/clustering.html#latent-dirichlet-allocation", + "title": "Clustering Models", + "section": "", + "text": "Latent Dirichlet allocation (LDA) is a mixed-membership multinomial clustering model (Blei, Ng, and Jordan 2003) that generalizes naive Bayes. Using the topic and document terminology common in discussions of LDA, each document is modeled as having a mixture of topics, with each word drawn from a topic based on the mixing proportions.\n\n\nThe basic model assumes each document is generated independently based on fixed hyperparameters. For document \\(m\\), the first step is to draw a topic distribution simplex \\(\\theta_m\\) over the \\(K\\) topics, \\[\n\\theta_m \\sim \\textsf{Dirichlet}(\\alpha).\n\\]\nThe prior hyperparameter \\(\\alpha\\) is fixed to a \\(K\\)-vector of positive values. Each word in the document is generated independently conditional on the distribution \\(\\theta_m\\). First, a topic \\(z_{m,n} \\in \\{1,\\dotsc,K\\}\\) is drawn for the word based on the document-specific topic-distribution, \\[\nz_{m,n} \\sim \\textsf{categorical}(\\theta_m).\n\\]\nFinally, the word \\(w_{m,n}\\) is drawn according to the word distribution for topic \\(z_{m,n}\\), \\[\nw_{m,n} \\sim \\textsf{categorical}(\\phi_{z[m,n]}).\n\\] The distributions \\(\\phi_k\\) over words for topic \\(k\\) are also given a Dirichlet prior, \\[\n\\phi_k \\sim \\textsf{Dirichlet}(\\beta)\n\\]\nwhere \\(\\beta\\) is a fixed \\(V\\)-vector of positive values.\n\n\n\nAlthough Stan does not (yet) support discrete sampling, it is possible to calculate the marginal distribution over the continuous parameters by summing out the discrete parameters as in other mixture models. The marginal posterior of the topic and word variables is \\[\\begin{align*}\np(\\theta,\\phi \\mid w,\\alpha,\\beta)\n&\\propto p(\\theta \\mid \\alpha) \\, p(\\phi \\mid \\beta) \\, p(w \\mid \\theta,\\phi) \\\\\n&= \\prod_{m=1}^M p(\\theta_m \\mid \\alpha)\n \\times \\prod_{k=1}^K p(\\phi_k \\mid \\beta)\n \\times \\prod_{m=1}^M \\prod_{n=1}^{M[n]} p(w_{m,n} \\mid \\theta_m,\\phi).\n\\end{align*}\\]\nThe inner word-probability term is defined by summing out the topic assignments, \\[\\begin{align*}\np(w_{m,n} \\mid \\theta_m,\\phi)\n&= \\sum_{z=1}^K p(z,w_{m,n} \\mid \\theta_m,\\phi) \\\\\n&= \\sum_{z=1}^K p(z \\mid \\theta_m) \\, p(w_{m,n} \\mid \\phi_z).\n\\end{align*}\\]\nPlugging the distributions in and converting to the log scale provides a formula that can be implemented directly in Stan, \\[\\begin{align*}\n\\log\\, &p(\\theta,\\phi \\mid w,\\alpha,\\beta) \\\\\n&= \\sum_{m=1}^M \\log \\textsf{Dirichlet}(\\theta_m \\mid \\alpha)\n + \\sum_{k=1}^K \\log \\textsf{Dirichlet}(\\phi_k \\mid \\beta) \\\\\n&\\qquad + \\sum_{m=1}^M \\sum_{n=1}^{N[m]} \\log \\left(\n \\sum_{z=1}^K \\textsf{categorical}(z \\mid \\theta_m)\n \\times \\textsf{categorical}(w_{m,n} \\mid \\phi_z)\n \\right)\n\\end{align*}\\]\n\n\n\nApplying the marginal derived in the last section to the data structure described in this section leads to the following Stan program for LDA.\ndata {\n int<lower=2> K; // num topics\n int<lower=2> V; // num words\n int<lower=1> M; // num docs\n int<lower=1> N; // total word instances\n array[N] int<lower=1, upper=V> w; // word n\n array[N] int<lower=1, upper=M> doc; // doc ID for word n\n vector<lower=0>[K] alpha; // topic prior\n vector<lower=0>[V] beta; // word prior\n}\nparameters {\n array[M] simplex[K] theta; // topic dist for doc m\n array[K] simplex[V] phi; // word dist for topic k\n}\nmodel {\n for (m in 1:M) {\n theta[m] ~ dirichlet(alpha); // prior\n }\n for (k in 1:K) {\n phi[k] ~ dirichlet(beta); // prior\n }\n for (n in 1:N) {\n array[K] real gamma;\n for (k in 1:K) {\n gamma[k] = log(theta[doc[n], k]) + log(phi[k, w[n]]);\n }\n target += log_sum_exp(gamma); // likelihood;\n }\n}\nAs in the other mixture models, the log-sum-of-exponents function is used to stabilize the numerical arithmetic.\n\n\n\nTo account for correlations in the distribution of topics for documents, Blei and Lafferty (2007) introduced a variant of LDA in which the Dirichlet prior on the per-document topic distribution is replaced with a multivariate logistic normal distribution.\nThe authors treat the prior as a fixed hyperparameter. They use an \\(L_1\\)-regularized estimate of covariance, which is equivalent to the maximum a posteriori estimate given a double-exponential prior. Here the mean and covariance of the multivariate logistic normal are specified as data.\n\n\nThe Stan model in the previous section can be modified to implement the correlated topic model by replacing the Dirichlet topic prior alpha in the data declaration with the mean and covariance of the multivariate logistic normal prior.\ndata {\n // ... data as before without alpha ...\n vector[K] mu; // topic mean\n cov_matrix[K] Sigma; // topic covariance\n}\nRather than drawing the simplex parameter theta from a Dirichlet, a parameter eta is drawn from a multivariate normal distribution and then transformed using softmax into a simplex.\nparameters {\n array[K] simplex[V] phi; // word dist for topic k\n array[M] vector[K] eta; // topic dist for doc m\n}\ntransformed parameters {\n array[M] simplex[K] theta;\n for (m in 1:M) {\n theta[m] = softmax(eta[m]);\n }\n}\nmodel {\n for (m in 1:M) {\n eta[m] ~ multi_normal(mu, Sigma);\n }\n // ... model as before w/o prior for theta ...\n}\n\n\n\nBy adding a prior for the mean and covariance, Stan supports full Bayesian inference for the correlated topic model. This requires moving the declarations of topic mean mu and covariance Sigma from the data block to the parameters block and providing them with priors in the model. A relatively efficient and interpretable prior for the covariance matrix Sigma may be encoded as follows.\n// ... data block as before, but without alpha ...\nparameters {\n vector[K] mu; // topic mean\n corr_matrix[K] Omega; // correlation matrix\n vector<lower=0>[K] sigma; // scales\n array[M] vector[K] eta; // logit topic dist for doc m\n array[K] simplex[V] phi; // word dist for topic k\n}\ntransformed parameters {\n // ... eta as above ...\n cov_matrix[K] Sigma; // covariance matrix\n for (m in 1:K) {\n Sigma[m, m] = sigma[m] * sigma[m] * Omega[m, m];\n }\n for (m in 1:(K-1)) {\n for (n in (m+1):K) {\n Sigma[m, n] = sigma[m] * sigma[n] * Omega[m, n];\n Sigma[n, m] = Sigma[m, n];\n }\n }\n}\nmodel {\n mu ~ normal(0, 5); // vectorized, diffuse\n Omega ~ lkj_corr(2.0); // regularize to unit correlation\n sigma ~ cauchy(0, 5); // half-Cauchy due to constraint\n // ... words sampled as above ...\n}\nThe \\(\\textsf{LKJCorr}\\) distribution with shape \\(\\alpha > 0\\) has support on correlation matrices (i.e., symmetric positive definite with unit diagonal). Its density is defined by \\[\n\\mathsf{LkjCorr}(\\Omega\\mid\\alpha) \\propto \\mathrm{det}(\\Omega)^{\\alpha - 1}\n\\] With a scale of \\(\\alpha = 2\\), the weakly informative prior favors a unit correlation matrix. Thus the compound effect of this prior on the covariance matrix \\(\\Sigma\\) for the multivariate logistic normal is a slight concentration around diagonal covariance matrices with scales determined by the prior on sigma.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/clustering.html#footnotes", + "href": "stan-users-guide/clustering.html#footnotes", + "title": "Clustering Models", + "section": "Footnotes", + "text": "Footnotes\n\n\nFor clustering, the non-identifiability problems for all mixture models present a problem, whereas there is no such problem for classification. Despite the difficulties with full Bayesian inference for clustering, researchers continue to use it, often in an exploratory data analysis setting rather than for predictive modeling.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Clustering Models" + ] + }, + { + "objectID": "stan-users-guide/copulas.html", + "href": "stan-users-guide/copulas.html", + "title": "Copulas", + "section": "", + "text": "Copulas provide a flexible way to model multivariate distributions by separating the marginal cumulative distribution functions from the dependence structure. This chapter introduces copulas in Stan, focusing on implementation techniques and practical examples. This chapter was derived from Brynjólfur Gauti Guðrúnar Jónsson’s A gentle introduction: the Gaussian copula.\n\n\nAccording to Sklar’s theorem (Sklar 1959), any multivariate distribution can be expressed in terms of its marginals and a copula that captures the dependence structure. Copulas are functions that join univariate marginal cumulative distribution functions to form multivariate distributions.\nFor a multivariate random variable \\(\\mathbf{X} = [X_1 \\cdots X_D]^\\top\\) with marginal cumulative distribution functions \\(F_i\\), the joint cumulative distribution function can be written as:\n\\[\nF_{\\mathbf{X}}(\\mathbf{x}) = C(F_1(x_1), \\ldots, F_D(x_D)) = \\Pr[X_1 \\leq x_1, \\ldots, X_D \\leq x_D]\n\\]\nwhere \\(C\\) is the copula function, \\(F_{\\mathbf{X}}\\) is the joint cumulative distribution function, and \\(F_i\\) are the marginal cumulative distribution functions. The copula function \\(C\\) must be a joint cumulative distribution function over the unit hypercube \\([0, 1]^D\\).\n\n\n\nThis section describes the general structure of copula models in Stan. The next sections will provide specific examples of copula implementations, but first, let’s understand the general pattern that separates the marginal distributions from the dependence structure.\nThe log density of a multivariate distribution using a copula can be written as:\n\\[\n\\log h(\\mathbf{x}) = \\log c\\left(u_1, \\dots, u_D \\vert \\boldsymbol{\\alpha}\\right) + \\sum_{i=1}^D \\log f_i(x_i \\vert \\boldsymbol{\\beta}_i)\n\\]\nwhere:\n\n\\(u_i = F_i(x_i \\vert \\boldsymbol{\\beta}_i)\\) are the probability integral transforms of the data\n\\(\\log c\\left(u_1, \\dots, u_D \\vert \\boldsymbol{\\alpha}\\right)\\) is the log density of the copula\n\\(\\sum_{i=1}^D \\log f_i(x_i \\vert \\boldsymbol{\\beta}_i)\\) is the sum of the log densities of the marginals\n\\(\\boldsymbol{\\alpha}\\) represents the parameters describing the parametric form of the copula\n\\(\\boldsymbol{\\beta}_i\\) represents the parameters describing the parametric form of the \\(i\\)-th marginal distribution\n\nThe implementation of copulas in Stan has two key requirements:\n\nBoth the probability density functions and cumulative distribution functions of the marginal distributions must be available\nA function that computes the log density of the copula for the transformed data must be implemented\n\nMost copula implementations in Stan follow a three-step process:\n\nAccumulate marginal log likelihoods: Calculate and add the log density of each marginal distribution to the target log density\nTransform to uniform variables: Apply the marginal CDFs to transform the data to uniform variables on the unit interval\nCalculate copula density: Compute the log density of the copula based on these uniform variables and add it to the target log density\n\nThis process is reflected in the general form of the log density shown above, where the first term represents the copula density and the second term represents the sum of marginal log densities.\nIn a way, we are always modeling with copulas, as the independence assumption can be viewed as a special case using the independence copula, where \\(\\log c(\\mathbf{u}) = 0\\), resulting in the familiar sum of marginal log densities. This perspective highlights that traditional independent modeling is just a specific case within the broader copula framework.\nMost parametric copula families include independence as a special case, either as a subset of their parameter space (e.g., when correlation parameters are zero) or as a limit when parameters approach specific values (e.g., when the dependence parameter approaches zero in Archimedean copulas).\n\n\n\nThe Gaussian copula is constructed using the multivariate normal distribution. For a \\(D\\)-dimensional random vector \\(\\mathbf{X}\\) with marginals \\(F_i\\), the log Gaussian copula density is given by:\n\\[\n\\begin{aligned}\n\\log c(\\mathbf{u}) &=\n-\\frac{1}{2} \\log |\\boldsymbol{\\Omega}| -\\frac{1}{2} \\mathbf{z}^\\top (\\boldsymbol{\\Omega}^{-1} - \\mathbf{I}) \\mathbf{z} \\\\\n& =\n-\\frac{1}{2} \\log |\\boldsymbol{\\Omega}| -\\frac{1}{2} \\mathbf{z}^\\top \\boldsymbol{\\Omega}^{-1} \\mathbf{z} + \\frac{1}{2} \\mathbf{z}^\\top \\mathbf{z} \\\\\n&= \\log \\mathcal{N}(\\mathbf{z} \\mid \\mathbf{0}, \\boldsymbol{\\Omega}) - \\log \\mathcal{N}(\\mathbf{z} \\mid \\mathbf{0}, \\mathbf{I})\n\\end{aligned}\n\\]\nwhere \\(\\mathbf{z} = [\\Phi^{-1}(u_1), \\ldots, \\Phi^{-1}(u_D)]^\\top\\) are the inverse normal CDF transforms of the uniform marginals, \\(\\boldsymbol{\\Omega}\\) is the correlation matrix, and \\(\\mathbf{I}\\) is the identity matrix. The joint log density is then:\n\\[\n\\log h(\\mathbf{x}) = \\log c(F_1(x_1), \\ldots, F_D(x_D)) + \\sum_{i=1}^D \\log f_i(x_i)\n\\]\nFollowing the three-step process for implementing copulas in Stan:\n\nAccumulate marginal log likelihoods: The exponential log densities are added to the target in the line target += exponential_lpdf(y[n] | lambda)\nTransform to uniform variables: The exponential CDF transforms the data to uniform variables: exponential_cdf(y[n, d] | lambda[d])\nCalculate copula density: The transformed variables are converted to normal scale using inv_Phi and the multivariate normal log density is computed: z ~ multi_normal_cholesky(zeros, L_Omega)\n\nThe following example demonstrates a Gaussian copula with exponential marginal distributions. Note that while the copula is Gaussian, the marginals are exponential.\ndata {\n int<lower=0> N; // number of observations\n int<lower=0> D; // number of dimensions\n vector<lower=0>[D] y[N]; // data\n}\n\ntransformed data {\n vector[D] zeros = rep_vector(0, D);\n}\n\nparameters {\n // Parameters for exponential marginal distributions\n vector<lower=0>[D] lambda; // rate parameters\n \n // Correlation matrix for Gaussian copula\n cholesky_factor_corr[D] L_Omega;\n}\n\nmodel {\n // Priors\n lambda ~ gamma(2, 1); // prior for rate parameters\n L_Omega ~ lkj_corr_cholesky(2);\n \n // Likelihood using Gaussian copula with exponential marginals\n for (n in 1:N) {\n // Add exponential log density to target\n target += exponential_lpdf(y[n] | lambda);\n \n vector[D] z;\n for (d in 1:D) {\n // Transform to uniform using exponential CDF\n real u_d = exponential_cdf(y[n, d] | lambda[d]);\n \n // Transform to standard normal\n z[d] = inv_Phi(u_d);\n }\n // Multivariate normal log density with correlation matrix\n z ~ multi_normal_cholesky(zeros, L_Omega);\n }\n}\n\ngenerated quantities {\n // Optional: Recover correlation matrix from Cholesky factor\n matrix[D, D] Omega = multiply_lower_tri_self_transpose(L_Omega);\n}\n\n\n\nCopulas offer several advantages in statistical modeling:\n\nFlexibility: They allow combining any marginal distributions with various dependence structures. For example:\n\nModeling financial returns with heavy-tailed marginals and complex dependence structures\nCombining different types of distributions (e.g., normal and gamma) in a single model\nCapturing asymmetric dependencies between variables, such as in financial markets where joint negative returns are more common than joint positive returns due to macro-events affecting multiple stocks simultaneously, while positive returns tend to be more idiosyncratic\nModeling different types of tail dependence in different parts of the distribution\n\nFactorability: The marginal distributions and dependence structure can be modeled separately, allowing for different prior knowledge about each component. This is similar to the common practice of factoring scale and correlation in multivariate normal priors.\nFor example, when modeling the survival times of two components in a system, we can separately specify exponential or gamma marginal distributions based on historical failure data for each component, and a Gaussian copula (or asymmetrical Archimedean copula) capturing how the failure of one component affects the other, making it easier to incorporate prior knowledge about each aspect independently.\nTail dependence: Different copulas can capture different types of tail dependence, which is crucial in applications like risk management and extreme value analysis where joint extreme scenarios need to be quantified.\nUniversal Framework: In a way, we are always modeling with copulas, as the independence assumption can be viewed as a special case using the independence copula. This perspective highlights that traditional independent modeling is just a specific case within the broader copula framework.\n\n\n\n\nWhen implementing copulas in Stan, several considerations should be kept in mind:\n\nComputational efficiency: The probability integral transform and inverse transform steps can be computationally intensive, especially for complex marginal distributions.\nParameter identifiability: Care must be taken to ensure that the parameters of the marginal distributions and the copula are identifiable.\nModel selection: The choice of copula family should be guided by the specific dependence structure of the data. For example:\n\nThe Gaussian copula may underestimate the probability of joint extreme events in financial data\nThe Student-t copula, while offering tail dependence, maintains symmetric tail behavior that may not match all applications\nArchimedean copulas can model asymmetric tail dependence but may be less flexible and harder to estimate in high dimensions\n\nNumerical stability: The transformations between different scales (original, uniform, and normal/Student-t/calculations using Archimedian copulas) require careful implementation to maintain numerical stability.\nSymmetry considerations: Many copula families exhibit strong symmetries that may not match the data:\n\nRadial symmetry: Some copulas (like Gaussian and Student-t) treat positive and negative extremes equally, which may not match financial data where joint negative returns are more common than joint positive returns\nExchangeability: Some copulas are invariant under permutations of their arguments, which can lead to unintuitive results when combined with inhomogeneous marginals. For example, when modeling time-to-event scenarios with different marginal distributions (e.g., exponential distributions with different parameters), perfect dependence in the copula does not imply simultaneous events. Instead, one event triggers the other at a later time corresponding to the same quantile, which can lead to incorrect modeling of joint events.\n\nTail dependence: Understanding and choosing appropriate tail dependence is crucial:\n\nThe upper (lower) tail dependence coefficient \\(\\lambda_U (\\lambda_L)\\) is the probability that one variable is extremely large (small) given that another is extremely large (small).\nDifferent copula families exhibit different tail dependence properties:\n\nSome copulas (like Gaussian) have zero tail dependence\nOthers can model symmetric tail dependence (\\(\\lambda_U = \\lambda_L\\))\nSome can capture asymmetric tail dependence (\\(\\lambda_U \\neq \\lambda_L\\))\nCertain copulas allow for tail dependence even with zero correlation\n\nThe choice of copula should be guided by the expected tail behavior in the application:\n\nFinancial data often requires modeling joint lower extreme events\nRisk management applications may need asymmetric tail dependence\nSome applications may require different tail behavior in different parts of the distribution\n\n\nHigh-dimensional modeling: As dimensionality increases:\n\nThe number of dependence parameters grows\nSome copula families become less flexible\nVine copulas or factor copulas may be more appropriate\n\n\n\n\n\nSeveral copula families are available for modeling different dependence structures in the correlation component:\n\nGaussian copula: Based on the multivariate normal distribution, offering symmetric dependence\nStudent-t copula: Based on the multivariate Student-t distribution, providing more flexibility in tail dependence than the Gaussian copula\nArchimedean copulas: A class of copulas defined through generator functions, including:\n\nClayton copula: Stronger lower tail dependence\nGumbel copula: Stronger upper tail dependence\nFrank copula: Symmetric dependence\n\nVine copulas: A flexible approach for modeling high-dimensional dependencies by decomposing the joint distribution into a series of bivariate copulas\n\n\n\n\n\nJónsson’s three part blog series, Copulas in Stan:\n\nPart I: If it bleeds, we can kill it\nPart II: A gentle introduction: the Gaussian copula\nPart III: It was the best of tails, it was the worst of tails: The T-Copula\n\nBrynjólfur Gauti Guðrúnar Jónsson’s StanCon 2024 presentation, Copulas in Stan: Modeling Spatial Dependence\nSean Pinkney’s Helpful Stan functions: copula functions", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#what-are-copulas", + "href": "stan-users-guide/copulas.html#what-are-copulas", + "title": "Copulas", + "section": "", + "text": "According to Sklar’s theorem (Sklar 1959), any multivariate distribution can be expressed in terms of its marginals and a copula that captures the dependence structure. Copulas are functions that join univariate marginal cumulative distribution functions to form multivariate distributions.\nFor a multivariate random variable \\(\\mathbf{X} = [X_1 \\cdots X_D]^\\top\\) with marginal cumulative distribution functions \\(F_i\\), the joint cumulative distribution function can be written as:\n\\[\nF_{\\mathbf{X}}(\\mathbf{x}) = C(F_1(x_1), \\ldots, F_D(x_D)) = \\Pr[X_1 \\leq x_1, \\ldots, X_D \\leq x_D]\n\\]\nwhere \\(C\\) is the copula function, \\(F_{\\mathbf{X}}\\) is the joint cumulative distribution function, and \\(F_i\\) are the marginal cumulative distribution functions. The copula function \\(C\\) must be a joint cumulative distribution function over the unit hypercube \\([0, 1]^D\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#general-structure-of-copula-models-in-stan", + "href": "stan-users-guide/copulas.html#general-structure-of-copula-models-in-stan", + "title": "Copulas", + "section": "", + "text": "This section describes the general structure of copula models in Stan. The next sections will provide specific examples of copula implementations, but first, let’s understand the general pattern that separates the marginal distributions from the dependence structure.\nThe log density of a multivariate distribution using a copula can be written as:\n\\[\n\\log h(\\mathbf{x}) = \\log c\\left(u_1, \\dots, u_D \\vert \\boldsymbol{\\alpha}\\right) + \\sum_{i=1}^D \\log f_i(x_i \\vert \\boldsymbol{\\beta}_i)\n\\]\nwhere:\n\n\\(u_i = F_i(x_i \\vert \\boldsymbol{\\beta}_i)\\) are the probability integral transforms of the data\n\\(\\log c\\left(u_1, \\dots, u_D \\vert \\boldsymbol{\\alpha}\\right)\\) is the log density of the copula\n\\(\\sum_{i=1}^D \\log f_i(x_i \\vert \\boldsymbol{\\beta}_i)\\) is the sum of the log densities of the marginals\n\\(\\boldsymbol{\\alpha}\\) represents the parameters describing the parametric form of the copula\n\\(\\boldsymbol{\\beta}_i\\) represents the parameters describing the parametric form of the \\(i\\)-th marginal distribution\n\nThe implementation of copulas in Stan has two key requirements:\n\nBoth the probability density functions and cumulative distribution functions of the marginal distributions must be available\nA function that computes the log density of the copula for the transformed data must be implemented\n\nMost copula implementations in Stan follow a three-step process:\n\nAccumulate marginal log likelihoods: Calculate and add the log density of each marginal distribution to the target log density\nTransform to uniform variables: Apply the marginal CDFs to transform the data to uniform variables on the unit interval\nCalculate copula density: Compute the log density of the copula based on these uniform variables and add it to the target log density\n\nThis process is reflected in the general form of the log density shown above, where the first term represents the copula density and the second term represents the sum of marginal log densities.\nIn a way, we are always modeling with copulas, as the independence assumption can be viewed as a special case using the independence copula, where \\(\\log c(\\mathbf{u}) = 0\\), resulting in the familiar sum of marginal log densities. This perspective highlights that traditional independent modeling is just a specific case within the broader copula framework.\nMost parametric copula families include independence as a special case, either as a subset of their parameter space (e.g., when correlation parameters are zero) or as a limit when parameters approach specific values (e.g., when the dependence parameter approaches zero in Archimedean copulas).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#gaussian-copula-example", + "href": "stan-users-guide/copulas.html#gaussian-copula-example", + "title": "Copulas", + "section": "", + "text": "The Gaussian copula is constructed using the multivariate normal distribution. For a \\(D\\)-dimensional random vector \\(\\mathbf{X}\\) with marginals \\(F_i\\), the log Gaussian copula density is given by:\n\\[\n\\begin{aligned}\n\\log c(\\mathbf{u}) &=\n-\\frac{1}{2} \\log |\\boldsymbol{\\Omega}| -\\frac{1}{2} \\mathbf{z}^\\top (\\boldsymbol{\\Omega}^{-1} - \\mathbf{I}) \\mathbf{z} \\\\\n& =\n-\\frac{1}{2} \\log |\\boldsymbol{\\Omega}| -\\frac{1}{2} \\mathbf{z}^\\top \\boldsymbol{\\Omega}^{-1} \\mathbf{z} + \\frac{1}{2} \\mathbf{z}^\\top \\mathbf{z} \\\\\n&= \\log \\mathcal{N}(\\mathbf{z} \\mid \\mathbf{0}, \\boldsymbol{\\Omega}) - \\log \\mathcal{N}(\\mathbf{z} \\mid \\mathbf{0}, \\mathbf{I})\n\\end{aligned}\n\\]\nwhere \\(\\mathbf{z} = [\\Phi^{-1}(u_1), \\ldots, \\Phi^{-1}(u_D)]^\\top\\) are the inverse normal CDF transforms of the uniform marginals, \\(\\boldsymbol{\\Omega}\\) is the correlation matrix, and \\(\\mathbf{I}\\) is the identity matrix. The joint log density is then:\n\\[\n\\log h(\\mathbf{x}) = \\log c(F_1(x_1), \\ldots, F_D(x_D)) + \\sum_{i=1}^D \\log f_i(x_i)\n\\]\nFollowing the three-step process for implementing copulas in Stan:\n\nAccumulate marginal log likelihoods: The exponential log densities are added to the target in the line target += exponential_lpdf(y[n] | lambda)\nTransform to uniform variables: The exponential CDF transforms the data to uniform variables: exponential_cdf(y[n, d] | lambda[d])\nCalculate copula density: The transformed variables are converted to normal scale using inv_Phi and the multivariate normal log density is computed: z ~ multi_normal_cholesky(zeros, L_Omega)\n\nThe following example demonstrates a Gaussian copula with exponential marginal distributions. Note that while the copula is Gaussian, the marginals are exponential.\ndata {\n int<lower=0> N; // number of observations\n int<lower=0> D; // number of dimensions\n vector<lower=0>[D] y[N]; // data\n}\n\ntransformed data {\n vector[D] zeros = rep_vector(0, D);\n}\n\nparameters {\n // Parameters for exponential marginal distributions\n vector<lower=0>[D] lambda; // rate parameters\n \n // Correlation matrix for Gaussian copula\n cholesky_factor_corr[D] L_Omega;\n}\n\nmodel {\n // Priors\n lambda ~ gamma(2, 1); // prior for rate parameters\n L_Omega ~ lkj_corr_cholesky(2);\n \n // Likelihood using Gaussian copula with exponential marginals\n for (n in 1:N) {\n // Add exponential log density to target\n target += exponential_lpdf(y[n] | lambda);\n \n vector[D] z;\n for (d in 1:D) {\n // Transform to uniform using exponential CDF\n real u_d = exponential_cdf(y[n, d] | lambda[d]);\n \n // Transform to standard normal\n z[d] = inv_Phi(u_d);\n }\n // Multivariate normal log density with correlation matrix\n z ~ multi_normal_cholesky(zeros, L_Omega);\n }\n}\n\ngenerated quantities {\n // Optional: Recover correlation matrix from Cholesky factor\n matrix[D, D] Omega = multiply_lower_tri_self_transpose(L_Omega);\n}", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#advantages-of-copulas", + "href": "stan-users-guide/copulas.html#advantages-of-copulas", + "title": "Copulas", + "section": "", + "text": "Copulas offer several advantages in statistical modeling:\n\nFlexibility: They allow combining any marginal distributions with various dependence structures. For example:\n\nModeling financial returns with heavy-tailed marginals and complex dependence structures\nCombining different types of distributions (e.g., normal and gamma) in a single model\nCapturing asymmetric dependencies between variables, such as in financial markets where joint negative returns are more common than joint positive returns due to macro-events affecting multiple stocks simultaneously, while positive returns tend to be more idiosyncratic\nModeling different types of tail dependence in different parts of the distribution\n\nFactorability: The marginal distributions and dependence structure can be modeled separately, allowing for different prior knowledge about each component. This is similar to the common practice of factoring scale and correlation in multivariate normal priors.\nFor example, when modeling the survival times of two components in a system, we can separately specify exponential or gamma marginal distributions based on historical failure data for each component, and a Gaussian copula (or asymmetrical Archimedean copula) capturing how the failure of one component affects the other, making it easier to incorporate prior knowledge about each aspect independently.\nTail dependence: Different copulas can capture different types of tail dependence, which is crucial in applications like risk management and extreme value analysis where joint extreme scenarios need to be quantified.\nUniversal Framework: In a way, we are always modeling with copulas, as the independence assumption can be viewed as a special case using the independence copula. This perspective highlights that traditional independent modeling is just a specific case within the broader copula framework.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#common-pitfalls-and-considerations", + "href": "stan-users-guide/copulas.html#common-pitfalls-and-considerations", + "title": "Copulas", + "section": "", + "text": "When implementing copulas in Stan, several considerations should be kept in mind:\n\nComputational efficiency: The probability integral transform and inverse transform steps can be computationally intensive, especially for complex marginal distributions.\nParameter identifiability: Care must be taken to ensure that the parameters of the marginal distributions and the copula are identifiable.\nModel selection: The choice of copula family should be guided by the specific dependence structure of the data. For example:\n\nThe Gaussian copula may underestimate the probability of joint extreme events in financial data\nThe Student-t copula, while offering tail dependence, maintains symmetric tail behavior that may not match all applications\nArchimedean copulas can model asymmetric tail dependence but may be less flexible and harder to estimate in high dimensions\n\nNumerical stability: The transformations between different scales (original, uniform, and normal/Student-t/calculations using Archimedian copulas) require careful implementation to maintain numerical stability.\nSymmetry considerations: Many copula families exhibit strong symmetries that may not match the data:\n\nRadial symmetry: Some copulas (like Gaussian and Student-t) treat positive and negative extremes equally, which may not match financial data where joint negative returns are more common than joint positive returns\nExchangeability: Some copulas are invariant under permutations of their arguments, which can lead to unintuitive results when combined with inhomogeneous marginals. For example, when modeling time-to-event scenarios with different marginal distributions (e.g., exponential distributions with different parameters), perfect dependence in the copula does not imply simultaneous events. Instead, one event triggers the other at a later time corresponding to the same quantile, which can lead to incorrect modeling of joint events.\n\nTail dependence: Understanding and choosing appropriate tail dependence is crucial:\n\nThe upper (lower) tail dependence coefficient \\(\\lambda_U (\\lambda_L)\\) is the probability that one variable is extremely large (small) given that another is extremely large (small).\nDifferent copula families exhibit different tail dependence properties:\n\nSome copulas (like Gaussian) have zero tail dependence\nOthers can model symmetric tail dependence (\\(\\lambda_U = \\lambda_L\\))\nSome can capture asymmetric tail dependence (\\(\\lambda_U \\neq \\lambda_L\\))\nCertain copulas allow for tail dependence even with zero correlation\n\nThe choice of copula should be guided by the expected tail behavior in the application:\n\nFinancial data often requires modeling joint lower extreme events\nRisk management applications may need asymmetric tail dependence\nSome applications may require different tail behavior in different parts of the distribution\n\n\nHigh-dimensional modeling: As dimensionality increases:\n\nThe number of dependence parameters grows\nSome copula families become less flexible\nVine copulas or factor copulas may be more appropriate", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#common-copula-families", + "href": "stan-users-guide/copulas.html#common-copula-families", + "title": "Copulas", + "section": "", + "text": "Several copula families are available for modeling different dependence structures in the correlation component:\n\nGaussian copula: Based on the multivariate normal distribution, offering symmetric dependence\nStudent-t copula: Based on the multivariate Student-t distribution, providing more flexibility in tail dependence than the Gaussian copula\nArchimedean copulas: A class of copulas defined through generator functions, including:\n\nClayton copula: Stronger lower tail dependence\nGumbel copula: Stronger upper tail dependence\nFrank copula: Symmetric dependence\n\nVine copulas: A flexible approach for modeling high-dimensional dependencies by decomposing the joint distribution into a series of bivariate copulas", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/copulas.html#further-readingviewing", + "href": "stan-users-guide/copulas.html#further-readingviewing", + "title": "Copulas", + "section": "", + "text": "Jónsson’s three part blog series, Copulas in Stan:\n\nPart I: If it bleeds, we can kill it\nPart II: A gentle introduction: the Gaussian copula\nPart III: It was the best of tails, it was the worst of tails: The T-Copula\n\nBrynjólfur Gauti Guðrúnar Jónsson’s StanCon 2024 presentation, Copulas in Stan: Modeling Spatial Dependence\nSean Pinkney’s Helpful Stan functions: copula functions", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Copulas" + ] + }, + { + "objectID": "stan-users-guide/custom-probability.html", + "href": "stan-users-guide/custom-probability.html", + "title": "Custom Probability Functions", + "section": "", + "text": "Custom distributions may also be implemented directly within Stan’s programming language. The only thing that is needed is to increment the total log probability. The rest of the chapter provides examples.\n\n\n\n\nA simple example is the symmetric triangle distribution, whose density is shaped like an isosceles triangle with corners at specified bounds and height determined by the constraint that a density integrate to 1. If \\(\\alpha \\in \\mathbb{R}\\) and \\(\\beta \\in \\mathbb{R}\\) are the bounds, with \\(\\alpha < \\beta\\), then \\(y \\in (\\alpha,\\beta)\\) has a density defined as \\[\n\\textsf{triangle}(y \\mid \\alpha,\\beta)\n=\n\\frac{1}{(\\beta - \\alpha)^2}\n\\cdot\n\\textrm{min}(y - \\alpha, \\beta - y).\n\\]\nThe general form of triangle can be coded in Stan as follows.\ndata {\n real alpha;\n real<lower=alpha> beta;\n}\nparameters {\n real<lower=alpha, upper=beta> y;\n}\nmodel {\n target += -2 * log(beta - alpha)\n + log(fmin(y - alpha, beta - y));\n}\nBecause the bounds are specified as data here, the term -2 * log(beta - alpha) could be dropped from the log density. If either of the bounds depends on a parameter, then this term must be included.\nIf \\(\\alpha = -1\\), \\(\\beta = 1\\), and \\(y \\in (-1,1)\\), then fmin(y - alpha, beta - y) is fmin(y + 1, -1 - y), which is fmin(y + 1, -(y + 1)), which reduces to 1 - abs(y). Therfore, the density, dropping constants, reduces to \\[\n\\textsf{triangle}(y \\mid -1, 1) \\propto 1 - |y|.\n\\] Consider the following Stan implementation of \\(\\textsf{triangle}(-1,1)\\) for sampling.\nparameters {\n real<lower=-1, upper=1> y;\n}\nmodel {\n target += log1m(abs(y));\n}\nThe single scalar parameter y is declared as lying in the interval (-1, 1). The total log probability is incremented with the joint log probability of all parameters, i.e., \\(\\log \\mathsf{Triangle}(y \\mid -1, 1)\\). This value is coded in Stan as log1m(abs(y)). The function log1m is defined so that log1m(x) has the same value as \\(\\log(1 - x)\\), but the computation is faster, more accurate, and more stable.\nThe constrained type real<lower=-1, upper=1> declared for y is critical for correct sampling behavior. If the constraint on y is removed from the program, say by declaring y as having the unconstrained scalar type real, the program would compile, but it would produce arithmetic exceptions at run time when the sampler explored values of y outside of \\((-1,1)\\).\nNow suppose the log probability function were extended to all of \\(\\mathbb{R}\\) as follows by defining the probability to be log(0.0), i.e., \\(-\\infty\\), for values outside of \\((-1, 1)\\).\ntarget += log(fmax(0.0,1 - abs(y)));\nWith the constraint on y in place, this is just a less efficient, slower, and less arithmetically stable version of the original program. But if the constraint on y is removed, the model will compile and run without arithmetic errors, but will not sample properly.1\n\n\n\nIf Stan didn’t happen to include the exponential distribution, it could be coded directly using the following assignment statement, where lambda is the inverse scale and y the sampled variate.\ntarget += log(lambda) - y * lambda;\nThis encoding will work for any lambda and y; they can be parameters, data, or one of each, or even local variables.\nThe assignment statement in the previous paragraph generates C++ code that is similar to that generated by the following distribution statement.\ny ~ exponential(lambda);\nThere are two notable differences. First, the distribution statement will check the inputs to make sure both lambda is positive and y is non-negative (which includes checking that neither is the special not-a-number value).\nThe second difference is that if lambda is not a parameter, transformed parameter, or local model variable, the distribution statement is clever enough to drop the log(lambda) term. This results in the same posterior because Stan only needs the log probability up to an additive constant. If lambda and y are both constants, the distribution statement will drop both terms (but still check for out-of-domain errors on the inputs).\n\n\n\nFor another example of user-defined functions, consider the following definition of the bivariate normal cumulative distribution function (CDF) with location zero, unit variance, and correlation rho. That is, it computes \\[\n\\texttt{binormal}\\mathtt{\\_}\\texttt{cdf}(z_1, z_2, \\rho) = \\Pr[Z_1 \\leq z_1 \\text{ and } Z_2 \\leq z_2]\n\\] where the random 2-vector \\(Z\\) has the distribution \\[\nZ \\sim \\textsf{multivariate normal}\\left(\n\\begin{bmatrix}\n0 \\\\\n0\n\\end{bmatrix}, \\\n\\begin{bmatrix}\n1 & \\rho\n\\\\\n\\rho & 1\n\\end{bmatrix}\n\\right).\n\\]\nThe following Stan program implements this function,\nreal binormal_cdf(tuple(real, real) z, real rho) {\n real z1 = z.1;\n real z2 = z.2;\n if (z1 == 0 && z2 == 0) {\n return 0.25 + asin(rho) / (2 * pi());\n }\n real denom = sqrt((1 + rho) * (1 - rho));\n real term1 = z1 == 0\n ? (z2 > 0 ? 0.25 : -0.25)\n : owens_t(z1, (z2 / z1 - rho) / denom);\n real term2 = z2 == 0\n ? (z1 > 0 ? 0.25 : -0.25)\n : owens_t(z2, (z1 / z2 - rho) / denom);\n real z1z2 = z1 * z2;\n real delta = z1z2 < 0 || (z1z2 == 0 && (z1 + z2) < 0);\n return 0.5 * (Phi(z1) + Phi(z2) - delta) - term1 - term2;\n}\nIt is written using a tuple argument so that it may be called as binormal_cdf((z1, z2) | rho). The two ternary operators defining term1 and term2 are derived by taking the limit of the owens_t function when the second argument goes to infinity.\nOne way to test a user-defined function is to have it operate on transformed data. That way, when it’s run, the output of the functions is printed before sampling begins.\ntransformed data {\n for (zzr in {[0, 0, 0.5],\n [0, 1, 0.5],\n [1, 0, -0.2],\n [1, 3, 0.9]}) {\n real z1 = zzr[1];\n real z2 = zzr[2];\n real rho = zzr[3];\n print(\"binomial_cdf((\", z1, \", \", z2, \") | \", rho, \")\",\n \"=\", binormal_cdf((z1, z2) | rho));\n }\n}\nIn this case, we verified that the results match those of the pbivnorm package in R.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Custom Probability Functions" + ] + }, + { + "objectID": "stan-users-guide/custom-probability.html#examples", + "href": "stan-users-guide/custom-probability.html#examples", + "title": "Custom Probability Functions", + "section": "", + "text": "A simple example is the symmetric triangle distribution, whose density is shaped like an isosceles triangle with corners at specified bounds and height determined by the constraint that a density integrate to 1. If \\(\\alpha \\in \\mathbb{R}\\) and \\(\\beta \\in \\mathbb{R}\\) are the bounds, with \\(\\alpha < \\beta\\), then \\(y \\in (\\alpha,\\beta)\\) has a density defined as \\[\n\\textsf{triangle}(y \\mid \\alpha,\\beta)\n=\n\\frac{1}{(\\beta - \\alpha)^2}\n\\cdot\n\\textrm{min}(y - \\alpha, \\beta - y).\n\\]\nThe general form of triangle can be coded in Stan as follows.\ndata {\n real alpha;\n real<lower=alpha> beta;\n}\nparameters {\n real<lower=alpha, upper=beta> y;\n}\nmodel {\n target += -2 * log(beta - alpha)\n + log(fmin(y - alpha, beta - y));\n}\nBecause the bounds are specified as data here, the term -2 * log(beta - alpha) could be dropped from the log density. If either of the bounds depends on a parameter, then this term must be included.\nIf \\(\\alpha = -1\\), \\(\\beta = 1\\), and \\(y \\in (-1,1)\\), then fmin(y - alpha, beta - y) is fmin(y + 1, -1 - y), which is fmin(y + 1, -(y + 1)), which reduces to 1 - abs(y). Therfore, the density, dropping constants, reduces to \\[\n\\textsf{triangle}(y \\mid -1, 1) \\propto 1 - |y|.\n\\] Consider the following Stan implementation of \\(\\textsf{triangle}(-1,1)\\) for sampling.\nparameters {\n real<lower=-1, upper=1> y;\n}\nmodel {\n target += log1m(abs(y));\n}\nThe single scalar parameter y is declared as lying in the interval (-1, 1). The total log probability is incremented with the joint log probability of all parameters, i.e., \\(\\log \\mathsf{Triangle}(y \\mid -1, 1)\\). This value is coded in Stan as log1m(abs(y)). The function log1m is defined so that log1m(x) has the same value as \\(\\log(1 - x)\\), but the computation is faster, more accurate, and more stable.\nThe constrained type real<lower=-1, upper=1> declared for y is critical for correct sampling behavior. If the constraint on y is removed from the program, say by declaring y as having the unconstrained scalar type real, the program would compile, but it would produce arithmetic exceptions at run time when the sampler explored values of y outside of \\((-1,1)\\).\nNow suppose the log probability function were extended to all of \\(\\mathbb{R}\\) as follows by defining the probability to be log(0.0), i.e., \\(-\\infty\\), for values outside of \\((-1, 1)\\).\ntarget += log(fmax(0.0,1 - abs(y)));\nWith the constraint on y in place, this is just a less efficient, slower, and less arithmetically stable version of the original program. But if the constraint on y is removed, the model will compile and run without arithmetic errors, but will not sample properly.1\n\n\n\nIf Stan didn’t happen to include the exponential distribution, it could be coded directly using the following assignment statement, where lambda is the inverse scale and y the sampled variate.\ntarget += log(lambda) - y * lambda;\nThis encoding will work for any lambda and y; they can be parameters, data, or one of each, or even local variables.\nThe assignment statement in the previous paragraph generates C++ code that is similar to that generated by the following distribution statement.\ny ~ exponential(lambda);\nThere are two notable differences. First, the distribution statement will check the inputs to make sure both lambda is positive and y is non-negative (which includes checking that neither is the special not-a-number value).\nThe second difference is that if lambda is not a parameter, transformed parameter, or local model variable, the distribution statement is clever enough to drop the log(lambda) term. This results in the same posterior because Stan only needs the log probability up to an additive constant. If lambda and y are both constants, the distribution statement will drop both terms (but still check for out-of-domain errors on the inputs).\n\n\n\nFor another example of user-defined functions, consider the following definition of the bivariate normal cumulative distribution function (CDF) with location zero, unit variance, and correlation rho. That is, it computes \\[\n\\texttt{binormal}\\mathtt{\\_}\\texttt{cdf}(z_1, z_2, \\rho) = \\Pr[Z_1 \\leq z_1 \\text{ and } Z_2 \\leq z_2]\n\\] where the random 2-vector \\(Z\\) has the distribution \\[\nZ \\sim \\textsf{multivariate normal}\\left(\n\\begin{bmatrix}\n0 \\\\\n0\n\\end{bmatrix}, \\\n\\begin{bmatrix}\n1 & \\rho\n\\\\\n\\rho & 1\n\\end{bmatrix}\n\\right).\n\\]\nThe following Stan program implements this function,\nreal binormal_cdf(tuple(real, real) z, real rho) {\n real z1 = z.1;\n real z2 = z.2;\n if (z1 == 0 && z2 == 0) {\n return 0.25 + asin(rho) / (2 * pi());\n }\n real denom = sqrt((1 + rho) * (1 - rho));\n real term1 = z1 == 0\n ? (z2 > 0 ? 0.25 : -0.25)\n : owens_t(z1, (z2 / z1 - rho) / denom);\n real term2 = z2 == 0\n ? (z1 > 0 ? 0.25 : -0.25)\n : owens_t(z2, (z1 / z2 - rho) / denom);\n real z1z2 = z1 * z2;\n real delta = z1z2 < 0 || (z1z2 == 0 && (z1 + z2) < 0);\n return 0.5 * (Phi(z1) + Phi(z2) - delta) - term1 - term2;\n}\nIt is written using a tuple argument so that it may be called as binormal_cdf((z1, z2) | rho). The two ternary operators defining term1 and term2 are derived by taking the limit of the owens_t function when the second argument goes to infinity.\nOne way to test a user-defined function is to have it operate on transformed data. That way, when it’s run, the output of the functions is printed before sampling begins.\ntransformed data {\n for (zzr in {[0, 0, 0.5],\n [0, 1, 0.5],\n [1, 0, -0.2],\n [1, 3, 0.9]}) {\n real z1 = zzr[1];\n real z2 = zzr[2];\n real rho = zzr[3];\n print(\"binomial_cdf((\", z1, \", \", z2, \") | \", rho, \")\",\n \"=\", binormal_cdf((z1, z2) | rho));\n }\n}\nIn this case, we verified that the results match those of the pbivnorm package in R.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Custom Probability Functions" + ] + }, + { + "objectID": "stan-users-guide/custom-probability.html#footnotes", + "href": "stan-users-guide/custom-probability.html#footnotes", + "title": "Custom Probability Functions", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe problem is the (extremely!) light tails of the triangle distribution. The standard HMC and NUTS samplers can’t get into the corners of the triangle properly. Because the Stan code declares y to be of type real<lower=-1, upper=1>, the inverse logit transform is applied to the unconstrained variable and its log absolute derivative added to the log probability. The resulting distribution on the logit-transformed y is well behaved.↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Custom Probability Functions" + ] + }, + { + "objectID": "stan-users-guide/decision-analysis.html", + "href": "stan-users-guide/decision-analysis.html", + "title": "Decision Analysis", + "section": "", + "text": "Statistical decision analysis is about making decisions under uncertainty. In order to make decisions, outcomes must have some notion of “utility” associated with them. The so-called “Bayes optimal” decision is the one that maximizes expected utility (or equivalently, minimizes expected loss). This chapter shows how Stan can be used to simultaneously estimate the distribution of outcomes based on decisions and compute the required expected utilities.\n\n\nFollowing Gelman et al. (2013), Bayesian decision analysis can be factored into the following four steps.\n\nDefine a set \\(X\\) of possible outcomes and a set \\(D\\) of possible decisions.\nDefine a probability distribution of outcomes conditional on decisions through a conditional density function \\(p(x \\mid d)\\) for \\(x \\in X\\) and \\(d \\in D.\\)\nDefine a utility function \\(U : X \\rightarrow \\mathbb{R}\\) mapping outcomes to their utility.\nChoose action \\(d^* \\in D\\) with highest expected utility, \\[\nd^* = \\textrm{arg max}_d \\ \\mathbb{E}[U(x) \\mid d].\n\\]\n\nThe outcomes should represent as much information as possible that is relevant to utility. In Bayesian decision analysis, the distribution of outcomes will typically be a posterior predictive distribution conditioned on observed data. There is a large literature in psychology and economics related to defining utility functions. For example, the utility of money is usually assumed to be strictly concave rather than linear (i.e., the marginal utility of getting another unit of money decreases the more money one has).\n\n\n\nThis section outlines a very simple decision analysis for a commuter deciding among modes of transportation to get to work: walk, bike share, public transportation, or cab. Suppose the commuter has been taking various modes of transportation for the previous year and the transportation conditions and costs have not changed during that time. Over the year, such a commuter might accumulate two hundred observations of the time it takes to get to work given a choice of commute mode.\n\n\nA decision consists of the choice of commute mode and the outcome is a time and cost. More formally,\n\nthe set of decisions is \\(D = 1:4\\), corresponding to the commute types walking, bicycling, public transportation, and cab, respectively, and\nthe set of outcomes \\(X = \\mathbb{R} \\times \\mathbb{R}_+\\) contains pairs of numbers \\(x = (c, t)\\) consisting of a cost \\(c\\) and time \\(t \\geq 0\\).\n\n\n\n\nThe density required is \\(p(x \\mid d),\\) where \\(d \\in D\\) is a decision and \\(x = (c, t) \\in X\\) is an outcome. Being a statistical decision problem, this density will the a posterior predictive distribution conditioned on previously observed outcome and decision pairs, based on a parameter model with parameters \\(\\theta,\\) \\[\np(x \\mid d, x^{\\textrm{obs}}, d^{\\textrm{obs}})\n=\n\\int\n p(x \\mid d, \\theta)\n \\cdot p(\\theta \\mid x^{\\textrm{obs}}, d^{\\textrm{obs}})\n \\, \\textrm{d}\\theta.\n\\] The observed data for a year of commutes consists of choice of the chosen commute mode \\(d^{\\textrm{obs}}_n\\) and observed costs and times \\(x^{\\textrm{obs}}_n = (c^{\\textrm{obs}}_n, t^{\\textrm{obs}}_n)\\) for \\(n\n\\in 1:200.\\)\nFor simplicity, commute time \\(t_n\\) for trip \\(n\\) will be modeled as lognormal for a given choice of transportation \\(d_n \\in 1:4,\\) \\[\nt_n \\sim \\textrm{lognormal}(\\mu_{d[n]}, \\sigma_{d[n]}).\n\\] To understand the notation, \\(d_n\\), also written \\(d[n]\\), is the mode of transportation used for trip \\(n\\). For example if trip \\(n\\) was by bicycle, then \\(t_n \\sim \\textrm{lognormal}(\\mu_2, \\sigma_2),\\) where \\(\\mu_2\\) and \\(\\sigma_2\\) are the lognormal parameters for bicycling.\nSimple fixed priors are used for each mode of transportation \\(k \\in 1:4,\\) \\[\\begin{eqnarray*}\n\\mu_k & \\sim & \\textrm{normal}(0, 5)\n\\\\[2pt]\n\\sigma_k & \\sim & \\textrm{lognormal}(0, 1).\n\\end{eqnarray*}\\] These priors are consistent with a broad range of commute times; in a more realistic model each commute mode would have its own prior based on knowledge of the city and the time of day would be used as a covariate; here the commutes are taken to be exchangeable.\nCost is usually a constant function for public transportation, walking, and bicycling. Nevertheless, for simplicity, all costs will be modeled as lognormal, \\[\nc_n \\sim \\textrm{lognormal}(\\nu_{d[n]}, \\tau_{d[n]}).\n\\] Again, the priors are fixed for the modes of transportation, \\[\\begin{eqnarray*}\n\\nu_k & \\sim & \\textrm{normal}(0, 5)\n\\\\[2pt]\n\\tau_k & \\sim & \\textrm{lognormal}(0, 1).\n\\end{eqnarray*}\\] A more realistic approach would model cost conditional on time, because the cost of a cab depends on route chosen and the time it takes.\nThe full set of parameters that are marginalized in the posterior predictive distribution is \\[\n\\theta = (\\mu_{1:4}, \\sigma_{1:4}, \\nu_{1:4}, \\tau_{1:4}).\n\\]\n\n\n\nFor the sake of concreteness, the utility function will be assumed to be a simple function of cost and time. Further suppose the commuter values their commute time at $25 per hour and has a utility function that is linear in the commute cost and time. Then the utility function may be defined as\n\\[\nU(c, t) = -(c + 25 \\cdot t)\n\\]\nThe sign is negative because high cost is undesirable. A better utility function might have a step function or increasing costs for being late, different costs for different modes of transportation because of their comfort and environmental impact, and non-linearity of utility in cost.\n\n\n\nAt this point, all that is left is to calculate expected utility for each decision and choose the optimum. If the decisions consist of a small set of discrete choices, expected utility can be easily coded in Stan. The utility function is coded as a function, the observed data is coded as data, the model parameters coded as parameters, and the model block itself coded to follow the sampling distributions of each parameter.\nfunctions {\n real U(real c, real t) {\n return -(c + 25 * t);\n }\n}\ndata {\n int<lower=0> N;\n array[N] int<lower=1, upper=4> d;\n array[N] real c;\n array[N] real<lower=0> t;\n}\nparameters {\n vector[4] mu;\n vector<lower=0>[4] sigma;\n array[4] real nu;\n array[4] real<lower=0> tau;\n}\nmodel {\n mu ~ normal(0, 1);\n sigma ~ lognormal(0, 0.25);\n nu ~ normal(0, 20);\n tau ~ lognormal(0, 0.25);\n t ~ lognormal(mu[d], sigma[d]);\n c ~ lognormal(nu[d], tau[d]);\n}\ngenerated quantities {\n array[4] real util;\n for (k in 1:4) {\n util[k] = U(lognormal_rng(nu[k], tau[k]),\n lognormal_rng(mu[k], sigma[k]));\n }\n}\nThe generated quantities block defines an array variable util where util[k], which will hold the utility derived from a random commute for choice k generated according to the model parameters for that choice. This randomness is required to appropriately characterize the posterior predictive distribution of utility.\nFor simplicity in this initial formulation, all four commute options have their costs estimated, even though cost is fixed for three of the options. To deal with the fact that some costs are fixed, the costs would have to be hardcoded or read in as data, nu and tau would be declared as univariate, and the RNG for cost would only be employed when k == 4.\nDefining the utility function for pairs of vectors would allow the random number generation in the generated quantities block to be vectorized.\nAll that is left is to run Stan. The posterior mean for util[k] is the expected utility, which written out with full conditioning, is \\[\\begin{eqnarray*}\n\\mathbb{E}\\!\\left[U(x) \\mid d = k, d^{\\textrm{obs}}, x^{\\textrm{obs}}\\right]\n& = &\n\\int\n U(x)\n \\cdot p(x \\mid d = k, \\theta)\n \\cdot p(\\theta \\mid d^{\\textrm{obs}}, x^{\\textrm{obs}})\n \\, \\textrm{d}\\theta\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M U(x^{(m)} ),\n\\end{eqnarray*}\\] where \\[\nx^{(m)} \\sim p(x \\mid d = k, \\theta^{(m)} )\n\\] and \\[\n\\theta^{(m)}\n\\sim p(\\theta \\mid d^{\\textrm{obs}}, x^{\\textrm{obs}}).\n\\]\nIn terms of Stan’s execution, the random generation of \\(x^{(m)}\\) is carried out with the lognormal_rng operations after \\(\\theta^{(m)}\\) is drawn from the model posterior. The average is then calculated after multiple chains are run and combined.\nIt only remains to make the decision k with highest expected utility, which will correspond to the choice with the highest posterior mean for util[k]. This can be read off of the mean column of the Stan’s summary statistics or accessed programmatically through Stan’s interfaces.\n\n\n\n\nMany choices, such as how much to invest for retirement or how long to spend at the gym are not discrete, but continuous. In these cases, the continuous choice can be coded as data in the Stan program. Then the expected utilities may be calculated. In other words, Stan can be used as a function from a choice to expected utilities. Then an external optimizer can call that function. This optimization can be difficult without gradient information. Gradients could be supplied by automatic differentiation, but Stan is not currently instrumented to calculate those derivatives.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Decision Analysis" + ] + }, + { + "objectID": "stan-users-guide/decision-analysis.html#outline-of-decision-analysis", + "href": "stan-users-guide/decision-analysis.html#outline-of-decision-analysis", + "title": "Decision Analysis", + "section": "", + "text": "Following Gelman et al. (2013), Bayesian decision analysis can be factored into the following four steps.\n\nDefine a set \\(X\\) of possible outcomes and a set \\(D\\) of possible decisions.\nDefine a probability distribution of outcomes conditional on decisions through a conditional density function \\(p(x \\mid d)\\) for \\(x \\in X\\) and \\(d \\in D.\\)\nDefine a utility function \\(U : X \\rightarrow \\mathbb{R}\\) mapping outcomes to their utility.\nChoose action \\(d^* \\in D\\) with highest expected utility, \\[\nd^* = \\textrm{arg max}_d \\ \\mathbb{E}[U(x) \\mid d].\n\\]\n\nThe outcomes should represent as much information as possible that is relevant to utility. In Bayesian decision analysis, the distribution of outcomes will typically be a posterior predictive distribution conditioned on observed data. There is a large literature in psychology and economics related to defining utility functions. For example, the utility of money is usually assumed to be strictly concave rather than linear (i.e., the marginal utility of getting another unit of money decreases the more money one has).", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Decision Analysis" + ] + }, + { + "objectID": "stan-users-guide/decision-analysis.html#example-decision-analysis", + "href": "stan-users-guide/decision-analysis.html#example-decision-analysis", + "title": "Decision Analysis", + "section": "", + "text": "This section outlines a very simple decision analysis for a commuter deciding among modes of transportation to get to work: walk, bike share, public transportation, or cab. Suppose the commuter has been taking various modes of transportation for the previous year and the transportation conditions and costs have not changed during that time. Over the year, such a commuter might accumulate two hundred observations of the time it takes to get to work given a choice of commute mode.\n\n\nA decision consists of the choice of commute mode and the outcome is a time and cost. More formally,\n\nthe set of decisions is \\(D = 1:4\\), corresponding to the commute types walking, bicycling, public transportation, and cab, respectively, and\nthe set of outcomes \\(X = \\mathbb{R} \\times \\mathbb{R}_+\\) contains pairs of numbers \\(x = (c, t)\\) consisting of a cost \\(c\\) and time \\(t \\geq 0\\).\n\n\n\n\nThe density required is \\(p(x \\mid d),\\) where \\(d \\in D\\) is a decision and \\(x = (c, t) \\in X\\) is an outcome. Being a statistical decision problem, this density will the a posterior predictive distribution conditioned on previously observed outcome and decision pairs, based on a parameter model with parameters \\(\\theta,\\) \\[\np(x \\mid d, x^{\\textrm{obs}}, d^{\\textrm{obs}})\n=\n\\int\n p(x \\mid d, \\theta)\n \\cdot p(\\theta \\mid x^{\\textrm{obs}}, d^{\\textrm{obs}})\n \\, \\textrm{d}\\theta.\n\\] The observed data for a year of commutes consists of choice of the chosen commute mode \\(d^{\\textrm{obs}}_n\\) and observed costs and times \\(x^{\\textrm{obs}}_n = (c^{\\textrm{obs}}_n, t^{\\textrm{obs}}_n)\\) for \\(n\n\\in 1:200.\\)\nFor simplicity, commute time \\(t_n\\) for trip \\(n\\) will be modeled as lognormal for a given choice of transportation \\(d_n \\in 1:4,\\) \\[\nt_n \\sim \\textrm{lognormal}(\\mu_{d[n]}, \\sigma_{d[n]}).\n\\] To understand the notation, \\(d_n\\), also written \\(d[n]\\), is the mode of transportation used for trip \\(n\\). For example if trip \\(n\\) was by bicycle, then \\(t_n \\sim \\textrm{lognormal}(\\mu_2, \\sigma_2),\\) where \\(\\mu_2\\) and \\(\\sigma_2\\) are the lognormal parameters for bicycling.\nSimple fixed priors are used for each mode of transportation \\(k \\in 1:4,\\) \\[\\begin{eqnarray*}\n\\mu_k & \\sim & \\textrm{normal}(0, 5)\n\\\\[2pt]\n\\sigma_k & \\sim & \\textrm{lognormal}(0, 1).\n\\end{eqnarray*}\\] These priors are consistent with a broad range of commute times; in a more realistic model each commute mode would have its own prior based on knowledge of the city and the time of day would be used as a covariate; here the commutes are taken to be exchangeable.\nCost is usually a constant function for public transportation, walking, and bicycling. Nevertheless, for simplicity, all costs will be modeled as lognormal, \\[\nc_n \\sim \\textrm{lognormal}(\\nu_{d[n]}, \\tau_{d[n]}).\n\\] Again, the priors are fixed for the modes of transportation, \\[\\begin{eqnarray*}\n\\nu_k & \\sim & \\textrm{normal}(0, 5)\n\\\\[2pt]\n\\tau_k & \\sim & \\textrm{lognormal}(0, 1).\n\\end{eqnarray*}\\] A more realistic approach would model cost conditional on time, because the cost of a cab depends on route chosen and the time it takes.\nThe full set of parameters that are marginalized in the posterior predictive distribution is \\[\n\\theta = (\\mu_{1:4}, \\sigma_{1:4}, \\nu_{1:4}, \\tau_{1:4}).\n\\]\n\n\n\nFor the sake of concreteness, the utility function will be assumed to be a simple function of cost and time. Further suppose the commuter values their commute time at $25 per hour and has a utility function that is linear in the commute cost and time. Then the utility function may be defined as\n\\[\nU(c, t) = -(c + 25 \\cdot t)\n\\]\nThe sign is negative because high cost is undesirable. A better utility function might have a step function or increasing costs for being late, different costs for different modes of transportation because of their comfort and environmental impact, and non-linearity of utility in cost.\n\n\n\nAt this point, all that is left is to calculate expected utility for each decision and choose the optimum. If the decisions consist of a small set of discrete choices, expected utility can be easily coded in Stan. The utility function is coded as a function, the observed data is coded as data, the model parameters coded as parameters, and the model block itself coded to follow the sampling distributions of each parameter.\nfunctions {\n real U(real c, real t) {\n return -(c + 25 * t);\n }\n}\ndata {\n int<lower=0> N;\n array[N] int<lower=1, upper=4> d;\n array[N] real c;\n array[N] real<lower=0> t;\n}\nparameters {\n vector[4] mu;\n vector<lower=0>[4] sigma;\n array[4] real nu;\n array[4] real<lower=0> tau;\n}\nmodel {\n mu ~ normal(0, 1);\n sigma ~ lognormal(0, 0.25);\n nu ~ normal(0, 20);\n tau ~ lognormal(0, 0.25);\n t ~ lognormal(mu[d], sigma[d]);\n c ~ lognormal(nu[d], tau[d]);\n}\ngenerated quantities {\n array[4] real util;\n for (k in 1:4) {\n util[k] = U(lognormal_rng(nu[k], tau[k]),\n lognormal_rng(mu[k], sigma[k]));\n }\n}\nThe generated quantities block defines an array variable util where util[k], which will hold the utility derived from a random commute for choice k generated according to the model parameters for that choice. This randomness is required to appropriately characterize the posterior predictive distribution of utility.\nFor simplicity in this initial formulation, all four commute options have their costs estimated, even though cost is fixed for three of the options. To deal with the fact that some costs are fixed, the costs would have to be hardcoded or read in as data, nu and tau would be declared as univariate, and the RNG for cost would only be employed when k == 4.\nDefining the utility function for pairs of vectors would allow the random number generation in the generated quantities block to be vectorized.\nAll that is left is to run Stan. The posterior mean for util[k] is the expected utility, which written out with full conditioning, is \\[\\begin{eqnarray*}\n\\mathbb{E}\\!\\left[U(x) \\mid d = k, d^{\\textrm{obs}}, x^{\\textrm{obs}}\\right]\n& = &\n\\int\n U(x)\n \\cdot p(x \\mid d = k, \\theta)\n \\cdot p(\\theta \\mid d^{\\textrm{obs}}, x^{\\textrm{obs}})\n \\, \\textrm{d}\\theta\n\\\\[4pt]\n& \\approx &\n\\frac{1}{M} \\sum_{m = 1}^M U(x^{(m)} ),\n\\end{eqnarray*}\\] where \\[\nx^{(m)} \\sim p(x \\mid d = k, \\theta^{(m)} )\n\\] and \\[\n\\theta^{(m)}\n\\sim p(\\theta \\mid d^{\\textrm{obs}}, x^{\\textrm{obs}}).\n\\]\nIn terms of Stan’s execution, the random generation of \\(x^{(m)}\\) is carried out with the lognormal_rng operations after \\(\\theta^{(m)}\\) is drawn from the model posterior. The average is then calculated after multiple chains are run and combined.\nIt only remains to make the decision k with highest expected utility, which will correspond to the choice with the highest posterior mean for util[k]. This can be read off of the mean column of the Stan’s summary statistics or accessed programmatically through Stan’s interfaces.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Decision Analysis" + ] + }, + { + "objectID": "stan-users-guide/decision-analysis.html#continuous-choices", + "href": "stan-users-guide/decision-analysis.html#continuous-choices", + "title": "Decision Analysis", + "section": "", + "text": "Many choices, such as how much to invest for retirement or how long to spend at the gym are not discrete, but continuous. In these cases, the continuous choice can be coded as data in the Stan program. Then the expected utilities may be calculated. In other words, Stan can be used as a function from a choice to expected utilities. Then an external optimizer can call that function. This optimization can be difficult without gradient information. Gradients could be supplied by automatic differentiation, but Stan is not currently instrumented to calculate those derivatives.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Decision Analysis" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html", + "href": "stan-users-guide/finite-mixtures.html", + "title": "Finite Mixtures", + "section": "", + "text": "Finite mixture models of an outcome assume that the outcome is drawn from one of several distributions, the identity of which is controlled by a categorical mixing distribution. Mixture models typically have multimodal densities with modes near the modes of the mixture components. Mixture models may be parameterized in several ways, as described in the following sections. Mixture models may be used directly for modeling data with multimodal distributions, or they may be used as priors for other parameters.\n\n\nClustering models, as discussed in the clustering chapter, are just a particular class of mixture models that have been widely applied to clustering in the engineering and machine-learning literature. The normal mixture model discussed in this chapter reappears in multivariate form as the statistical basis for the \\(K\\)-means algorithm; the latent Dirichlet allocation model, usually applied to clustering problems, can be viewed as a mixed-membership multinomial mixture model.\n\n\n\nOne way to parameterize a mixture model is with a latent categorical variable indicating which mixture component was responsible for the outcome. For example, consider \\(K\\) normal distributions with locations \\(\\mu_k \\in \\mathbb{R}\\) and scales \\(\\sigma_k \\in (0,\\infty)\\). Now consider mixing them in proportion \\(\\lambda\\), where \\(\\lambda_k \\geq 0\\) and \\(\\sum_{k=1}^K \\lambda_k = 1\\) (i.e., \\(\\lambda\\) lies in the unit \\(K\\)-simplex). For each outcome \\(y_n\\) there is a latent variable \\(z_n\\) in \\(\\{ 1,\\dotsc,K \\}\\) with a categorical distribution parameterized by \\(\\lambda\\), \\[\nz_n \\sim \\textsf{categorical}(\\lambda).\n\\]\nThe variable \\(y_n\\) is distributed according to the parameters of the mixture component \\(z_n\\), \\[\ny_n \\sim \\textsf{normal}(\\mu_{z[n]},\\sigma_{z[n]}).\n\\]\nThis model is not directly supported by Stan because it involves discrete parameters \\(z_n\\), but Stan can sample \\(\\mu\\) and \\(\\sigma\\) by summing out the \\(z\\) parameter as described in the next section.\n\n\n\nTo implement the normal mixture model outlined in the previous section in Stan, the discrete parameters can be summed out of the model. If \\(Y\\) is a mixture of \\(K\\) normal distributions with locations \\(\\mu_k\\) and scales \\(\\sigma_k\\) with mixing proportions \\(\\lambda\\) in the unit \\(K\\)-simplex, then \\[\np_Y\\left(y \\mid \\lambda, \\mu, \\sigma \\right)\n=\n\\sum_{k=1}^K \\lambda_k \\, \\textsf{normal}\\left(y \\mid \\mu_k, \\sigma_k\\right).\n\\]\n\n\nThe log sum of exponentials function is used to define mixtures on the log scale. It is defined for two inputs by \\[\n\\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}(a, b) = \\log \\left(\\exp(a) + \\exp(b)\\right).\n\\]\nIf \\(a\\) and \\(b\\) are probabilities on the log scale, then \\(\\exp(a) +\n\\exp(b)\\) is their sum on the linear scale, and the outer log converts the result back to the log scale; to summarize, log_sum_exp does linear addition on the log scale. The reason to use Stan’s built-in log_sum_exp function is that it can prevent underflow and overflow in the exponentiation, by calculating the result as \\[\n\\log \\left( \\exp(a) + \\exp(b)\\right)\n= c + \\log \\left( \\exp(a - c) + \\exp(b - c) \\right),\n\\] where \\(c = \\max(a, b)\\). In this evaluation, one of the terms, \\(a - c\\) or \\(b - c\\), is zero and the other is negative, thus eliminating the possibility of overflow or underflow in the leading term while extracting the most arithmetic precision possible by pulling the \\(\\max(a, b)\\) out of the log-exp round trip.\nFor example, the mixture of \\(\\textsf{normal}(-1, 2)\\) with \\(\\textsf{normal}(3, 1)\\), with mixing proportion \\(\\lambda =\n[0.3,0.7]^{\\top}\\), can be implemented in Stan as follows.\nparameters {\n real y;\n}\nmodel {\n target += log_sum_exp(log(0.3) + normal_lpdf(y | -1, 2),\n log(0.7) + normal_lpdf(y | 3, 1));\n}\nThe log probability term is derived by taking \\[\\begin{align*}\n\\log\\, &p\\left(y \\mid \\lambda,\\mu,\\sigma \\right) \\\\\n&= \\log\\big( 0.3 \\times \\textsf{normal}\\left(y \\mid -1,2 \\right)\n + 0.7 \\times \\textsf{normal}\\left(y \\mid 3,1 \\right) \\big) \\\\\n&= \\log\\bigg( \\exp\\Big(\\log\\big(0.3 \\times \\textsf{normal}\\left(y \\mid -1,2 \\right)\\big)\\Big)\n + \\exp\\Big(\\log\\big(0.7 \\times \\textsf{normal}\\left(y \\mid 3,1 \\right)\\big)\\Big) \\bigg) \\\\\n&= \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}\\big(\n \\log(0.3) + \\log \\textsf{normal}\\left(y \\mid -1,2 \\right),\n \\log(0.7) + \\log \\textsf{normal}\\left(y \\mid 3,1 \\right) \\big).\n\\end{align*}\\]\n\n\n\nIf a two-component mixture has a mixing ratio of 0.5, then the mixing ratios can be dropped, because\nlog_half = log(0.5);\nfor (n in 1:N) {\n target +=\n log_sum_exp(log_half + normal_lpdf(y[n] | mu[1], sigma[1]),\n log_half + normal_lpdf(y[n] | mu[2], sigma[2]));\n}\nthen the \\(\\log 0.5\\) term isn’t contributing to the proportional density, and the above can be replaced with the more efficient version\nfor (n in 1:N) {\n target += log_sum_exp(normal_lpdf(y[n] | mu[1], sigma[1]),\n normal_lpdf(y[n] | mu[2], sigma[2]));\n}\nThe same result holds if there are \\(K\\) components and the mixing simplex \\(\\lambda\\) is symmetric, i.e., \\[\n\\lambda = \\left( \\frac{1}{K}, \\dotsc, \\frac{1}{K} \\right).\n\\]\nThe result follows from the identity \\[\n\\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}(c + a, c + b)\n=\nc + \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}(a, b)\n\\] and the fact that adding a constant \\(c\\) to the log density accumulator has no effect because the log density is only specified up to an additive constant in the first place. There is nothing specific to the normal distribution here; constants may always be dropped from the target.\n\n\n\nThe posterior \\(p(z_n \\mid y_n, \\mu, \\sigma)\\) over the mixture indicator \\(z_n\n\\in 1:K\\) is often of interest as \\(p(z_n = k \\mid y, \\mu, \\sigma)\\) is the posterior probability that that observation \\(y_n\\) was generated by mixture component \\(k\\). The posterior can be computed via Bayes’s rule, \\[\\begin{align*}\n\\Pr\\!\\left[z_n = k \\mid y_n, \\mu, \\sigma, \\lambda \\right]\n &\\propto p\\left(y_n \\mid z_n = k, \\mu, \\sigma\\right)\\, p\\left(z_n = k \\mid \\lambda\\right) \\\\\n &= \\textsf{normal}\\left(y_n \\mid \\mu_k, \\sigma_k\\right) \\cdot \\lambda_k.\n\\end{align*}\\]\nThe normalization can be done via summation, because \\(z_n \\in 1{:}K\\) only takes on finitely many values. In detail, \\[\np\\left(z_n = k \\mid y_n, \\mu, \\sigma, \\lambda \\right) =\n\\frac{p\\left(y_n \\mid z_n = k, \\mu, \\sigma \\right) \\cdot p\\left(z_n = k \\mid \\lambda \\right)}\n {\\sum_{k' = 1}^K p\\left(y_n \\mid z_n = k', \\mu, \\sigma \\right)\n \\cdot p\\left(z_n = k' \\mid \\lambda \\right)}.\n\\]\nOn the log scale, the normalized probability is computed as \\[\\begin{align*}\n\\log\\,&\\Pr\\!\\left[z_n = k \\mid y_n, \\mu, \\sigma, \\lambda\\right] \\\\\n&= \\log p\\left(y_n \\mid z_n = k, \\mu, \\sigma\\right) + \\log \\Pr\\!\\left[z_n = k \\mid \\lambda\\right] \\\\\n&\\quad - \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}_{k' = 1}^K \\big(\\log p\\left(y_n \\mid z_n = k', \\mu, \\sigma\\right)\n + \\log p\\left(z_n = k' \\mid \\lambda\\right)\\big).\n\\end{align*}\\] This can be coded up directly in Stan; the change-point model in the change point section provides an example.\n\n\n\nGiven the scheme for representing mixtures, it may be moved to an estimation setting, where the locations, scales, and mixture components are unknown. Further generalizing to a number of mixture components specified as data yields the following model.\ndata {\n int<lower=1> K; // number of mixture components\n int<lower=1> N; // number of data points\n array[N] real y; // observations\n}\nparameters {\n simplex[K] theta; // mixing proportions\n ordered[K] mu; // locations of mixture components\n vector<lower=0>[K] sigma; // scales of mixture components\n}\nmodel {\n vector[K] log_theta = log(theta); // cache log calculation\n sigma ~ lognormal(0, 2);\n mu ~ normal(0, 10);\n for (n in 1:N) {\n vector[K] lps = log_theta;\n for (k in 1:K) {\n lps[k] += normal_lpdf(y[n] | mu[k], sigma[k]);\n }\n target += log_sum_exp(lps);\n }\n}\nThe model involves K mixture components and N data points. The mixing proportion parameter theta is declared to be a unit \\(K\\)-simplex, whereas the component location parameter mu and scale parameter sigma are both defined to be K-vectors.\nThe location parameter mu is declared to be an ordered vector in order to identify the model. This will not affect inferences that do not depend on the ordering of the components as long as the prior for the components mu[k] is symmetric, as it is here (each component has an independent \\(\\textsf{normal}(0, 10)\\) prior). It would even be possible to include a hierarchical prior for the components.\nThe values in the scale array sigma are constrained to be non-negative, and have a weakly informative prior given in the model chosen to avoid zero values and thus collapsing components.\nThe model declares a local array variable lps to be size K and uses it to accumulate the log contributions from the mixture components. The main action is in the loop over data points n. For each such point, the log of \\(\\theta_k \\times\n\\textsf{normal}\\left(y_n \\mid \\mu_k,\\sigma_k\\right)\\) is calculated and added to the array lps. Then the log probability is incremented with the log sum of exponentials of those values.\n\n\n\n\nThere is (currently) no way to vectorize mixture models at the observation level in Stan. This section is to warn users away from attempting to vectorize naively, as it results in a different model. A proper mixture at the observation level is defined as follows, where we assume that lambda, y[n], mu[1], mu[2], and sigma[1], sigma[2] are all scalars and lambda is between 0 and 1.\nfor (n in 1:N) {\n target += log_sum_exp(log(lambda)\n + normal_lpdf(y[n] | mu[1], sigma[1]),\n log1m(lambda)\n + normal_lpdf(y[n] | mu[2], sigma[2]));\nor equivalently\nfor (n in 1:N) {\n target += log_mix(lambda,\n normal_lpdf(y[n] | mu[1], sigma[1]),\n normal_lpdf(y[n] | mu[2], sigma[2]))\n };\nThis definition assumes that each observation \\(y_n\\) may have arisen from either of the mixture components. The density is \\[\np\\left(y \\mid \\lambda, \\mu, \\sigma\\right)\n= \\prod_{n=1}^N \\big(\\lambda \\times \\textsf{normal}\\left(y_n \\mid \\mu_1, \\sigma_1 \\right)\n + (1 - \\lambda) \\times \\textsf{normal}\\left(y_n \\mid \\mu_2, \\sigma_2 \\right)\\big).\n\\]\nContrast the previous model with the following (erroneous) attempt to vectorize the model.\ntarget += log_sum_exp(log(lambda)\n + normal_lpdf(y | mu[1], sigma[1]),\n log1m(lambda)\n + normal_lpdf(y | mu[2], sigma[2]));\nor equivalently,\ntarget += log_mix(lambda,\n normal_lpdf(y | mu[1], sigma[1]),\n normal_lpdf(y | mu[2], sigma[2]));\nThis second definition implies that the entire sequence \\(y_1, \\dotsc, y_n\\) of observations comes form one component or the other, defining a different density, \\[\np\\left(y \\mid \\lambda, \\mu, \\sigma \\right)\n= \\lambda \\times \\prod_{n=1}^N \\textsf{normal}\\left(y_n \\mid \\mu_1, \\sigma_1\\right)\n+ (1 - \\lambda) \\times \\prod_{n=1}^N \\textsf{normal}\\left(y_n \\mid \\mu_2, \\sigma_2\\right).\n\\]\n\n\n\nIn many mixture models, the mixture components are underlyingly exchangeable in the model and thus not identifiable. This arises if the parameters of the mixture components have exchangeable priors and the mixture ratio gets a uniform prior so that the parameters of the mixture components are also exchangeable in the likelihood.\nWe have finessed this basic problem by ordering the parameters. This will allow us in some cases to pick out mixture components either ahead of time or after fitting (e.g., male vs. female, or Democrat vs. Republican).\nIn other cases, we do not care about the actual identities of the mixture components and want to consider inferences that are independent of indexes. For example, we might only be interested in posterior predictions for new observations.\n\n\nAs an example, consider the normal mixture from the previous section, which provides an exchangeable prior on the pairs of parameters \\((\\mu_1, \\sigma_1)\\) and \\((\\mu_2, \\sigma_2)\\), \\[\\begin{align*}\n\\mu_1, \\mu_2 &\\sim \\textsf{normal}(0, 10) \\\\\n\\sigma_1, \\sigma_2 &\\sim \\textsf{halfnormal}(0, 10) \\\\\n\\end{align*}\\]\nThe prior on the mixture ratio is uniform, \\[\n\\lambda \\sim \\textsf{uniform}(0, 1),\n\\] so that with the likelihood \\[\np\\left(y_n \\mid \\mu, \\sigma\\right)\n= \\lambda \\times \\textsf{normal}\\left(y_n \\mid \\mu_1, \\sigma_1\\right)\n+ (1 - \\lambda) \\times \\textsf{normal}\\left(y_n \\mid \\mu_2, \\sigma_2\\right),\n\\] the joint distribution \\(p(y, \\mu, \\sigma, \\lambda)\\) is exchangeable in the parameters \\((\\mu_1, \\sigma_1)\\) and \\((\\mu_2, \\sigma_2)\\) with \\(\\lambda\\) flipping to \\(1 - \\lambda\\).1\n\n\n\nIn cases where the mixture components are not identifiable, it can be difficult to diagnose convergence of sampling or optimization algorithms because the labels will switch, or be permuted, in different MCMC chains or different optimization runs. Luckily, posterior inferences which do not refer to specific component labels are invariant under label switching and may be used directly. This subsection considers a pair of examples.\n\n\nPosterior predictive distribution for a new observation \\(\\tilde{y}\\) given the complete parameter vector \\(\\theta\\) will be \\[\np(\\tilde{y} \\mid y)\n=\n\\int_{\\theta}\np(\\tilde{y} \\mid \\theta)\n\\, p(\\theta \\mid y)\n\\, \\textsf{d}\\theta.\n\\]\nThe normal mixture example from the previous section, with \\(\\theta =\n(\\mu, \\sigma, \\lambda)\\), shows that the model returns the same density under label switching and thus the predictive inference is sound. In Stan, that predictive inference can be done either by computing \\(p(\\tilde{y} \\mid y)\\), which is more efficient statistically in terms of effective sample size, or simulating draws of \\(\\tilde{y}\\), which is easier to plug into other inferences. Both approaches can be coded directly in the generated quantities block of the program. Here’s an example of the direct (non-sampling) approach.\ndata {\n int<lower=0> N_tilde;\n vector[N_tilde] y_tilde;\n // ...\n}\ngenerated quantities {\n vector[N_tilde] log_p_y_tilde;\n for (n in 1:N_tilde) {\n log_p_y_tilde[n]\n = log_mix(lambda,\n normal_lpdf(y_tilde[n] | mu[1], sigma[1])\n normal_lpdf(y_tilde[n] | mu[2], sigma[2]));\n }\n}\nIt is a bit of a bother afterwards, because the logarithm function isn’t linear and hence doesn’t distribute through averages (Jensen’s inequality shows which way the inequality goes). The right thing to do is to apply log_sum_exp of the posterior draws of log_p_y_tilde. The average log predictive density is then given by subtracting log(N_new).\n\n\n\nOften a mixture model will be applied to a clustering problem and there might be two data items \\(y_i\\) and \\(y_j\\) for which there is a question of whether they arose from the same mixture component. If we take \\(z_i\\) and \\(z_j\\) to be the component responsibility discrete variables, then the quantity of interest is \\(z_i = z_j\\), which can be summarized as an event probability \\[\n\\Pr[z_i = z_j \\mid y]\n=\n\\int_{\\theta}\n\\frac{\\sum_{k=0}^1 p(z_i=k, z_j = k, y_i, y_j \\mid \\theta)}\n {\\sum_{k=0}^1 \\sum_{m=0}^1 p(z_i = k, z_j = m, y_i, y_j \\mid \\theta)}\n\\,\np(\\theta \\mid y)\n\\,\n\\textsf{d}\\theta.\n\\]\nAs with other event probabilities, this can be calculated in the generated quantities block either by sampling \\(z_i\\) and \\(z_j\\) and using the indicator function on their equality, or by computing the term inside the integral as a generated quantity. As with posterior predictive distribute, working in expectation is more statistically efficient than sampling.\n\n\n\n\n\nZero-inflated and hurdle models both provide mixtures of a Poisson and Bernoulli probability mass function to allow more flexibility in modeling the probability of a zero outcome. Zero-inflated models, as defined by Lambert (1992), add additional probability mass to the outcome of zero. Hurdle models, on the other hand, are formulated as pure mixtures of zero and non-zero outcomes.\nZero inflation and hurdle models can be formulated for discrete distributions other than the Poisson. Zero inflation does not work for continuous distributions in Stan because of issues with derivatives; in particular, there is no way to add a point mass to a continuous distribution, such as zero-inflating a normal as a regression coefficient prior. Hurdle models can be formulated as combination of point mass at zero and continuous distribution for positive values.\n\n\nConsider the following example for zero-inflated Poisson distributions. There is a probability \\(\\theta\\) of observing a zero, and a probability \\(1 - \\theta\\) of observing a count with a \\(\\textsf{Poisson}(\\lambda)\\) distribution (now \\(\\theta\\) is being used for mixing proportions because \\(\\lambda\\) is the traditional notation for a Poisson mean parameter). Given the probability \\(\\theta\\) and the intensity \\(\\lambda\\), the distribution for \\(y_n\\) can be written as \\[\\begin{align*}\ny_n & = 0 & \\quad\\text{with probability } \\theta, \\text{ and}\\\\\ny_n & \\sim \\textsf{Poisson}(y_n \\mid \\lambda) & \\quad\\text{with probability } 1-\\theta.\n\\end{align*}\\]\nStan does not support conditional distribution statements (with ~) conditional on some parameter, and we need to consider the corresponding likelihood \\[\np(y_n \\mid \\theta,\\lambda)\n=\n\\begin{cases}\n\\theta + (1 - \\theta) \\times \\textsf{Poisson}(0 \\mid \\lambda) & \\quad\\text{if } y_n = 0, \\text{ and}\\\\\n(1-\\theta) \\times \\textsf{Poisson}(y_n \\mid \\lambda) &\\quad\\text{if } y_n > 0.\n\\end{cases}\n\\] The log likelihood can be coded directly in Stan (with target +=) as follows.\ndata {\n int<lower=0> N;\n array[N] int<lower=0> y;\n}\nparameters {\n real<lower=0, upper=1> theta;\n real<lower=0> lambda;\n}\nmodel {\n for (n in 1:N) {\n if (y[n] == 0) {\n target += log_sum_exp(log(theta),\n log1m(theta)\n + poisson_lpmf(y[n] | lambda));\n } else {\n target += log1m(theta)\n + poisson_lpmf(y[n] | lambda);\n }\n }\n}\nThe log1m(theta) computes log(1-theta), but is more computationally stable. The log_sum_exp(lp1,lp2) function adds the log probabilities on the linear scale; it is defined to be equal to log(exp(lp1) + exp(lp2)), but is more computationally stable and faster.\n\n\nThe code given above to compute the zero-inflated Poisson redundantly calculates all of the Bernoulli terms and also poisson_lpmf(0 | lambda) every time the first condition body executes. The use of the redundant terms is conditioned on y, which is known when the data are read in. This allows the transformed data block to be used to compute some more convenient terms for expressing the log density each iteration.\nThe number of zero cases is computed and handled separately. Then the nonzero cases are collected into their own array for vectorization. The number of zeros is required to declare y_nonzero, so it must be computed in a function.\nfunctions {\n int num_zeros(array[] int y) {\n int sum = 0;\n for (n in 1:size(y)) {\n sum += (y[n] == 0);\n }\n return sum;\n }\n}\n// ...\ntransformed data {\n int<lower=0> N_zero = num_zeros(y);\n array[N - N_zero] int<lower=1> y_nonzero;\n int N_nonzero = 0;\n for (n in 1:N) {\n if (y[n] == 0) continue;\n N_nonzero += 1;\n y_nonzero[N_nonzero] = y[n];\n }\n}\n// ...\nmodel {\n // ...\n target\n += N_zero\n * log_sum_exp(log(theta),\n log1m(theta)\n + poisson_lpmf(0 | lambda));\n target += N_nonzero * log1m(theta);\n target += poisson_lpmf(y_nonzero | lambda);\n // ...\n}\nThe boundary conditions of all zeros and no zero outcomes is handled appropriately; in the vectorized case, if y_nonzero is empty, N_nonzero will be zero, and the last two target increment terms will add zeros.\n\n\n\n\nThe hurdle model is similar to the zero-inflated model, but more flexible in that the zero outcomes can be deflated as well as inflated. Given the probability \\(\\theta\\) and the intensity \\(\\lambda\\), the distribution for \\(y_n\\) can be written as [ \\[\\begin{align*}\ny_n & = 0 \\quad\\text{with probability } \\theta, \\text{ and}\\\\\ny_n & \\sim \\textsf{Poisson}_{x\\neq 0}(y_n \\mid \\lambda) \\quad\\text{with probability } 1-\\theta,\n\\end{align*}\\] ] Where \\(\\textsf{Poisson}_{x\\neq 0}\\) is a truncated Poisson distribution, truncated at \\(0\\).\nThe corresponding likelihood function for the hurdle model is defined by \\[\np(y\\mid\\theta,\\lambda)\n=\n\\begin{cases}\n\\theta &\\quad\\text{if } y = 0, \\text{ and}\\\\\n(1 - \\theta)\n \\frac{\\displaystyle \\textsf{Poisson}(y \\mid \\lambda)}\n {\\displaystyle 1 - \\textsf{PoissonCDF}(0 \\mid \\lambda)}\n&\\quad\\text{if } y > 0,\n\\end{cases}\n\\] where \\(\\textsf{PoissonCDF}\\) is the cumulative distribution function for the Poisson distribution and and \\(1 - \\textsf{PoissonCDF}(0 \\mid \\lambda)\\) is the relative normalization term for the truncated Poisson (truncated at \\(0\\)).\nThe hurdle model is even more straightforward to program in Stan, as it does not require an explicit mixture.\nif (y[n] == 0) {\n target += log(theta);\n} else {\n target += log1m(theta) + poisson_lpmf(y[n] | lambda)\n - poisson_lccdf(0 | lambda));\n}\nJulian King pointed out that because \\[\\begin{align*}\n\\log \\left( 1 - \\textsf{PoissonCDF}(0 \\mid \\lambda) \\right)\n &= \\log \\left( 1 - \\textsf{Poisson}(0 \\mid \\lambda) \\right) \\\\\n &= \\log(1 - \\exp(-\\lambda))\n\\end{align*}\\] the CCDF in the else clause can be replaced with a simpler expression.\ntarget += log1m(theta) + poisson_lpmf(y[n] | lambda)\n - log1m_exp(-lambda));\nThe resulting code is about 15% faster than the code with the CCDF.\nThis is an example where collecting counts ahead of time can also greatly speed up the execution speed without changing the density. For data size \\(N=200\\) and parameters \\(\\theta=0.3\\) and \\(\\lambda = 8\\), the speedup is a factor of 10; it will be lower for smaller \\(N\\) and greater for larger \\(N\\); it will also be greater for larger \\(\\theta\\).\nTo achieve this speedup, it helps to have a function to count the number of non-zero entries in an array of integers,\nfunctions {\n int num_zero(array[] int y) {\n int nz = 0;\n for (n in 1:size(y)) {\n if (y[n] == 0) {\n nz += 1;\n }\n }\n return nz;\n }\n}\nThen a transformed data block can be used to store the sufficient statistics,\ntransformed data {\n int<lower=0, upper=N> N0 = num_zero(y);\n int<lower=0, upper=N> Ngt0 = N - N0;\n array[N - num_zero(y)] int<lower=1> y_nz;\n {\n int pos = 1;\n for (n in 1:N) {\n if (y[n] != 0) {\n y_nz[pos] = y[n];\n pos += 1;\n }\n }\n }\n}\nThe model block is then reduced to three statements.\nmodel {\n N0 ~ binomial(N, theta);\n y_nz ~ poisson(lambda);\n target += -Ngt0 * log1m_exp(-lambda);\n}\nThe first statement accounts for the Bernoulli contribution to both the zero and non-zero counts. The second line is the Poisson contribution from the non-zero counts, which is now vectorized. Finally, the normalization for the truncation is a single line, so that the expression for the log CCDF at 0 isn’t repeated. Also note that the negation is applied to the constant Ngt0; whenever possible, leave subexpressions constant because then gradients need not be propagated until a non-constant term is encountered.\n\n\n\n\nSuppose we have a two-component mixture model with mixing rate \\(\\lambda \\in (0, 1)\\). Because the likelihood for the mixture components is proportionally weighted by the mixture weights, the effective data size used to estimate each of the mixture components will also be weighted as a fraction of the overall data size. Thus although there are \\(N\\) observations, the mixture components will be estimated with effective data sizes of \\(\\theta \\, N\\) and \\((1 - \\theta)\n\\, N\\) for the two components for some \\(\\theta \\in (0, 1)\\). The effective weighting size is determined by posterior responsibility, not simply by the mixing rate \\(\\lambda\\).\n\n\nIn contrast to mixture models, which create mixtures at the observation level, model averaging creates mixtures over the posteriors of models separately fit with the entire data set. In this situation, the priors work as expected when fitting the models independently, with the posteriors being based on the complete observed data \\(y\\).\nIf different models are expected to account for different observations, we recommend building mixture models directly. If the models being mixed are similar, often a single expanded model will capture the features of both and may be used on its own for inferential purposes (estimation, decision making, prediction, etc.). For example, rather than fitting an intercept-only regression and a slope-only regression and averaging their predictions, even as a mixture model, we would recommend building a single regression with both a slope and an intercept. Model complexity, such as having more predictors than data points, can be tamed using appropriately regularizing priors. If computation becomes a bottleneck, the only recourse can be model averaging, which can be calculated after fitting each model independently (see Hoeting et al. (1999) and Gelman et al. (2013) for theoretical and computational details).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#clustering-mixture.section", + "href": "stan-users-guide/finite-mixtures.html#clustering-mixture.section", + "title": "Finite Mixtures", + "section": "", + "text": "Clustering models, as discussed in the clustering chapter, are just a particular class of mixture models that have been widely applied to clustering in the engineering and machine-learning literature. The normal mixture model discussed in this chapter reappears in multivariate form as the statistical basis for the \\(K\\)-means algorithm; the latent Dirichlet allocation model, usually applied to clustering problems, can be viewed as a mixed-membership multinomial mixture model.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#latent-discrete-parameterization", + "href": "stan-users-guide/finite-mixtures.html#latent-discrete-parameterization", + "title": "Finite Mixtures", + "section": "", + "text": "One way to parameterize a mixture model is with a latent categorical variable indicating which mixture component was responsible for the outcome. For example, consider \\(K\\) normal distributions with locations \\(\\mu_k \\in \\mathbb{R}\\) and scales \\(\\sigma_k \\in (0,\\infty)\\). Now consider mixing them in proportion \\(\\lambda\\), where \\(\\lambda_k \\geq 0\\) and \\(\\sum_{k=1}^K \\lambda_k = 1\\) (i.e., \\(\\lambda\\) lies in the unit \\(K\\)-simplex). For each outcome \\(y_n\\) there is a latent variable \\(z_n\\) in \\(\\{ 1,\\dotsc,K \\}\\) with a categorical distribution parameterized by \\(\\lambda\\), \\[\nz_n \\sim \\textsf{categorical}(\\lambda).\n\\]\nThe variable \\(y_n\\) is distributed according to the parameters of the mixture component \\(z_n\\), \\[\ny_n \\sim \\textsf{normal}(\\mu_{z[n]},\\sigma_{z[n]}).\n\\]\nThis model is not directly supported by Stan because it involves discrete parameters \\(z_n\\), but Stan can sample \\(\\mu\\) and \\(\\sigma\\) by summing out the \\(z\\) parameter as described in the next section.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#summing-out-the-responsibility-parameter", + "href": "stan-users-guide/finite-mixtures.html#summing-out-the-responsibility-parameter", + "title": "Finite Mixtures", + "section": "", + "text": "To implement the normal mixture model outlined in the previous section in Stan, the discrete parameters can be summed out of the model. If \\(Y\\) is a mixture of \\(K\\) normal distributions with locations \\(\\mu_k\\) and scales \\(\\sigma_k\\) with mixing proportions \\(\\lambda\\) in the unit \\(K\\)-simplex, then \\[\np_Y\\left(y \\mid \\lambda, \\mu, \\sigma \\right)\n=\n\\sum_{k=1}^K \\lambda_k \\, \\textsf{normal}\\left(y \\mid \\mu_k, \\sigma_k\\right).\n\\]\n\n\nThe log sum of exponentials function is used to define mixtures on the log scale. It is defined for two inputs by \\[\n\\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}(a, b) = \\log \\left(\\exp(a) + \\exp(b)\\right).\n\\]\nIf \\(a\\) and \\(b\\) are probabilities on the log scale, then \\(\\exp(a) +\n\\exp(b)\\) is their sum on the linear scale, and the outer log converts the result back to the log scale; to summarize, log_sum_exp does linear addition on the log scale. The reason to use Stan’s built-in log_sum_exp function is that it can prevent underflow and overflow in the exponentiation, by calculating the result as \\[\n\\log \\left( \\exp(a) + \\exp(b)\\right)\n= c + \\log \\left( \\exp(a - c) + \\exp(b - c) \\right),\n\\] where \\(c = \\max(a, b)\\). In this evaluation, one of the terms, \\(a - c\\) or \\(b - c\\), is zero and the other is negative, thus eliminating the possibility of overflow or underflow in the leading term while extracting the most arithmetic precision possible by pulling the \\(\\max(a, b)\\) out of the log-exp round trip.\nFor example, the mixture of \\(\\textsf{normal}(-1, 2)\\) with \\(\\textsf{normal}(3, 1)\\), with mixing proportion \\(\\lambda =\n[0.3,0.7]^{\\top}\\), can be implemented in Stan as follows.\nparameters {\n real y;\n}\nmodel {\n target += log_sum_exp(log(0.3) + normal_lpdf(y | -1, 2),\n log(0.7) + normal_lpdf(y | 3, 1));\n}\nThe log probability term is derived by taking \\[\\begin{align*}\n\\log\\, &p\\left(y \\mid \\lambda,\\mu,\\sigma \\right) \\\\\n&= \\log\\big( 0.3 \\times \\textsf{normal}\\left(y \\mid -1,2 \\right)\n + 0.7 \\times \\textsf{normal}\\left(y \\mid 3,1 \\right) \\big) \\\\\n&= \\log\\bigg( \\exp\\Big(\\log\\big(0.3 \\times \\textsf{normal}\\left(y \\mid -1,2 \\right)\\big)\\Big)\n + \\exp\\Big(\\log\\big(0.7 \\times \\textsf{normal}\\left(y \\mid 3,1 \\right)\\big)\\Big) \\bigg) \\\\\n&= \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}\\big(\n \\log(0.3) + \\log \\textsf{normal}\\left(y \\mid -1,2 \\right),\n \\log(0.7) + \\log \\textsf{normal}\\left(y \\mid 3,1 \\right) \\big).\n\\end{align*}\\]\n\n\n\nIf a two-component mixture has a mixing ratio of 0.5, then the mixing ratios can be dropped, because\nlog_half = log(0.5);\nfor (n in 1:N) {\n target +=\n log_sum_exp(log_half + normal_lpdf(y[n] | mu[1], sigma[1]),\n log_half + normal_lpdf(y[n] | mu[2], sigma[2]));\n}\nthen the \\(\\log 0.5\\) term isn’t contributing to the proportional density, and the above can be replaced with the more efficient version\nfor (n in 1:N) {\n target += log_sum_exp(normal_lpdf(y[n] | mu[1], sigma[1]),\n normal_lpdf(y[n] | mu[2], sigma[2]));\n}\nThe same result holds if there are \\(K\\) components and the mixing simplex \\(\\lambda\\) is symmetric, i.e., \\[\n\\lambda = \\left( \\frac{1}{K}, \\dotsc, \\frac{1}{K} \\right).\n\\]\nThe result follows from the identity \\[\n\\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}(c + a, c + b)\n=\nc + \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}(a, b)\n\\] and the fact that adding a constant \\(c\\) to the log density accumulator has no effect because the log density is only specified up to an additive constant in the first place. There is nothing specific to the normal distribution here; constants may always be dropped from the target.\n\n\n\nThe posterior \\(p(z_n \\mid y_n, \\mu, \\sigma)\\) over the mixture indicator \\(z_n\n\\in 1:K\\) is often of interest as \\(p(z_n = k \\mid y, \\mu, \\sigma)\\) is the posterior probability that that observation \\(y_n\\) was generated by mixture component \\(k\\). The posterior can be computed via Bayes’s rule, \\[\\begin{align*}\n\\Pr\\!\\left[z_n = k \\mid y_n, \\mu, \\sigma, \\lambda \\right]\n &\\propto p\\left(y_n \\mid z_n = k, \\mu, \\sigma\\right)\\, p\\left(z_n = k \\mid \\lambda\\right) \\\\\n &= \\textsf{normal}\\left(y_n \\mid \\mu_k, \\sigma_k\\right) \\cdot \\lambda_k.\n\\end{align*}\\]\nThe normalization can be done via summation, because \\(z_n \\in 1{:}K\\) only takes on finitely many values. In detail, \\[\np\\left(z_n = k \\mid y_n, \\mu, \\sigma, \\lambda \\right) =\n\\frac{p\\left(y_n \\mid z_n = k, \\mu, \\sigma \\right) \\cdot p\\left(z_n = k \\mid \\lambda \\right)}\n {\\sum_{k' = 1}^K p\\left(y_n \\mid z_n = k', \\mu, \\sigma \\right)\n \\cdot p\\left(z_n = k' \\mid \\lambda \\right)}.\n\\]\nOn the log scale, the normalized probability is computed as \\[\\begin{align*}\n\\log\\,&\\Pr\\!\\left[z_n = k \\mid y_n, \\mu, \\sigma, \\lambda\\right] \\\\\n&= \\log p\\left(y_n \\mid z_n = k, \\mu, \\sigma\\right) + \\log \\Pr\\!\\left[z_n = k \\mid \\lambda\\right] \\\\\n&\\quad - \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}_{k' = 1}^K \\big(\\log p\\left(y_n \\mid z_n = k', \\mu, \\sigma\\right)\n + \\log p\\left(z_n = k' \\mid \\lambda\\right)\\big).\n\\end{align*}\\] This can be coded up directly in Stan; the change-point model in the change point section provides an example.\n\n\n\nGiven the scheme for representing mixtures, it may be moved to an estimation setting, where the locations, scales, and mixture components are unknown. Further generalizing to a number of mixture components specified as data yields the following model.\ndata {\n int<lower=1> K; // number of mixture components\n int<lower=1> N; // number of data points\n array[N] real y; // observations\n}\nparameters {\n simplex[K] theta; // mixing proportions\n ordered[K] mu; // locations of mixture components\n vector<lower=0>[K] sigma; // scales of mixture components\n}\nmodel {\n vector[K] log_theta = log(theta); // cache log calculation\n sigma ~ lognormal(0, 2);\n mu ~ normal(0, 10);\n for (n in 1:N) {\n vector[K] lps = log_theta;\n for (k in 1:K) {\n lps[k] += normal_lpdf(y[n] | mu[k], sigma[k]);\n }\n target += log_sum_exp(lps);\n }\n}\nThe model involves K mixture components and N data points. The mixing proportion parameter theta is declared to be a unit \\(K\\)-simplex, whereas the component location parameter mu and scale parameter sigma are both defined to be K-vectors.\nThe location parameter mu is declared to be an ordered vector in order to identify the model. This will not affect inferences that do not depend on the ordering of the components as long as the prior for the components mu[k] is symmetric, as it is here (each component has an independent \\(\\textsf{normal}(0, 10)\\) prior). It would even be possible to include a hierarchical prior for the components.\nThe values in the scale array sigma are constrained to be non-negative, and have a weakly informative prior given in the model chosen to avoid zero values and thus collapsing components.\nThe model declares a local array variable lps to be size K and uses it to accumulate the log contributions from the mixture components. The main action is in the loop over data points n. For each such point, the log of \\(\\theta_k \\times\n\\textsf{normal}\\left(y_n \\mid \\mu_k,\\sigma_k\\right)\\) is calculated and added to the array lps. Then the log probability is incremented with the log sum of exponentials of those values.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#vectorizing-mixtures", + "href": "stan-users-guide/finite-mixtures.html#vectorizing-mixtures", + "title": "Finite Mixtures", + "section": "", + "text": "There is (currently) no way to vectorize mixture models at the observation level in Stan. This section is to warn users away from attempting to vectorize naively, as it results in a different model. A proper mixture at the observation level is defined as follows, where we assume that lambda, y[n], mu[1], mu[2], and sigma[1], sigma[2] are all scalars and lambda is between 0 and 1.\nfor (n in 1:N) {\n target += log_sum_exp(log(lambda)\n + normal_lpdf(y[n] | mu[1], sigma[1]),\n log1m(lambda)\n + normal_lpdf(y[n] | mu[2], sigma[2]));\nor equivalently\nfor (n in 1:N) {\n target += log_mix(lambda,\n normal_lpdf(y[n] | mu[1], sigma[1]),\n normal_lpdf(y[n] | mu[2], sigma[2]))\n };\nThis definition assumes that each observation \\(y_n\\) may have arisen from either of the mixture components. The density is \\[\np\\left(y \\mid \\lambda, \\mu, \\sigma\\right)\n= \\prod_{n=1}^N \\big(\\lambda \\times \\textsf{normal}\\left(y_n \\mid \\mu_1, \\sigma_1 \\right)\n + (1 - \\lambda) \\times \\textsf{normal}\\left(y_n \\mid \\mu_2, \\sigma_2 \\right)\\big).\n\\]\nContrast the previous model with the following (erroneous) attempt to vectorize the model.\ntarget += log_sum_exp(log(lambda)\n + normal_lpdf(y | mu[1], sigma[1]),\n log1m(lambda)\n + normal_lpdf(y | mu[2], sigma[2]));\nor equivalently,\ntarget += log_mix(lambda,\n normal_lpdf(y | mu[1], sigma[1]),\n normal_lpdf(y | mu[2], sigma[2]));\nThis second definition implies that the entire sequence \\(y_1, \\dotsc, y_n\\) of observations comes form one component or the other, defining a different density, \\[\np\\left(y \\mid \\lambda, \\mu, \\sigma \\right)\n= \\lambda \\times \\prod_{n=1}^N \\textsf{normal}\\left(y_n \\mid \\mu_1, \\sigma_1\\right)\n+ (1 - \\lambda) \\times \\prod_{n=1}^N \\textsf{normal}\\left(y_n \\mid \\mu_2, \\sigma_2\\right).\n\\]", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#mixture-inference.section", + "href": "stan-users-guide/finite-mixtures.html#mixture-inference.section", + "title": "Finite Mixtures", + "section": "", + "text": "In many mixture models, the mixture components are underlyingly exchangeable in the model and thus not identifiable. This arises if the parameters of the mixture components have exchangeable priors and the mixture ratio gets a uniform prior so that the parameters of the mixture components are also exchangeable in the likelihood.\nWe have finessed this basic problem by ordering the parameters. This will allow us in some cases to pick out mixture components either ahead of time or after fitting (e.g., male vs. female, or Democrat vs. Republican).\nIn other cases, we do not care about the actual identities of the mixture components and want to consider inferences that are independent of indexes. For example, we might only be interested in posterior predictions for new observations.\n\n\nAs an example, consider the normal mixture from the previous section, which provides an exchangeable prior on the pairs of parameters \\((\\mu_1, \\sigma_1)\\) and \\((\\mu_2, \\sigma_2)\\), \\[\\begin{align*}\n\\mu_1, \\mu_2 &\\sim \\textsf{normal}(0, 10) \\\\\n\\sigma_1, \\sigma_2 &\\sim \\textsf{halfnormal}(0, 10) \\\\\n\\end{align*}\\]\nThe prior on the mixture ratio is uniform, \\[\n\\lambda \\sim \\textsf{uniform}(0, 1),\n\\] so that with the likelihood \\[\np\\left(y_n \\mid \\mu, \\sigma\\right)\n= \\lambda \\times \\textsf{normal}\\left(y_n \\mid \\mu_1, \\sigma_1\\right)\n+ (1 - \\lambda) \\times \\textsf{normal}\\left(y_n \\mid \\mu_2, \\sigma_2\\right),\n\\] the joint distribution \\(p(y, \\mu, \\sigma, \\lambda)\\) is exchangeable in the parameters \\((\\mu_1, \\sigma_1)\\) and \\((\\mu_2, \\sigma_2)\\) with \\(\\lambda\\) flipping to \\(1 - \\lambda\\).1\n\n\n\nIn cases where the mixture components are not identifiable, it can be difficult to diagnose convergence of sampling or optimization algorithms because the labels will switch, or be permuted, in different MCMC chains or different optimization runs. Luckily, posterior inferences which do not refer to specific component labels are invariant under label switching and may be used directly. This subsection considers a pair of examples.\n\n\nPosterior predictive distribution for a new observation \\(\\tilde{y}\\) given the complete parameter vector \\(\\theta\\) will be \\[\np(\\tilde{y} \\mid y)\n=\n\\int_{\\theta}\np(\\tilde{y} \\mid \\theta)\n\\, p(\\theta \\mid y)\n\\, \\textsf{d}\\theta.\n\\]\nThe normal mixture example from the previous section, with \\(\\theta =\n(\\mu, \\sigma, \\lambda)\\), shows that the model returns the same density under label switching and thus the predictive inference is sound. In Stan, that predictive inference can be done either by computing \\(p(\\tilde{y} \\mid y)\\), which is more efficient statistically in terms of effective sample size, or simulating draws of \\(\\tilde{y}\\), which is easier to plug into other inferences. Both approaches can be coded directly in the generated quantities block of the program. Here’s an example of the direct (non-sampling) approach.\ndata {\n int<lower=0> N_tilde;\n vector[N_tilde] y_tilde;\n // ...\n}\ngenerated quantities {\n vector[N_tilde] log_p_y_tilde;\n for (n in 1:N_tilde) {\n log_p_y_tilde[n]\n = log_mix(lambda,\n normal_lpdf(y_tilde[n] | mu[1], sigma[1])\n normal_lpdf(y_tilde[n] | mu[2], sigma[2]));\n }\n}\nIt is a bit of a bother afterwards, because the logarithm function isn’t linear and hence doesn’t distribute through averages (Jensen’s inequality shows which way the inequality goes). The right thing to do is to apply log_sum_exp of the posterior draws of log_p_y_tilde. The average log predictive density is then given by subtracting log(N_new).\n\n\n\nOften a mixture model will be applied to a clustering problem and there might be two data items \\(y_i\\) and \\(y_j\\) for which there is a question of whether they arose from the same mixture component. If we take \\(z_i\\) and \\(z_j\\) to be the component responsibility discrete variables, then the quantity of interest is \\(z_i = z_j\\), which can be summarized as an event probability \\[\n\\Pr[z_i = z_j \\mid y]\n=\n\\int_{\\theta}\n\\frac{\\sum_{k=0}^1 p(z_i=k, z_j = k, y_i, y_j \\mid \\theta)}\n {\\sum_{k=0}^1 \\sum_{m=0}^1 p(z_i = k, z_j = m, y_i, y_j \\mid \\theta)}\n\\,\np(\\theta \\mid y)\n\\,\n\\textsf{d}\\theta.\n\\]\nAs with other event probabilities, this can be calculated in the generated quantities block either by sampling \\(z_i\\) and \\(z_j\\) and using the indicator function on their equality, or by computing the term inside the integral as a generated quantity. As with posterior predictive distribute, working in expectation is more statistically efficient than sampling.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#zero-inflated.section", + "href": "stan-users-guide/finite-mixtures.html#zero-inflated.section", + "title": "Finite Mixtures", + "section": "", + "text": "Zero-inflated and hurdle models both provide mixtures of a Poisson and Bernoulli probability mass function to allow more flexibility in modeling the probability of a zero outcome. Zero-inflated models, as defined by Lambert (1992), add additional probability mass to the outcome of zero. Hurdle models, on the other hand, are formulated as pure mixtures of zero and non-zero outcomes.\nZero inflation and hurdle models can be formulated for discrete distributions other than the Poisson. Zero inflation does not work for continuous distributions in Stan because of issues with derivatives; in particular, there is no way to add a point mass to a continuous distribution, such as zero-inflating a normal as a regression coefficient prior. Hurdle models can be formulated as combination of point mass at zero and continuous distribution for positive values.\n\n\nConsider the following example for zero-inflated Poisson distributions. There is a probability \\(\\theta\\) of observing a zero, and a probability \\(1 - \\theta\\) of observing a count with a \\(\\textsf{Poisson}(\\lambda)\\) distribution (now \\(\\theta\\) is being used for mixing proportions because \\(\\lambda\\) is the traditional notation for a Poisson mean parameter). Given the probability \\(\\theta\\) and the intensity \\(\\lambda\\), the distribution for \\(y_n\\) can be written as \\[\\begin{align*}\ny_n & = 0 & \\quad\\text{with probability } \\theta, \\text{ and}\\\\\ny_n & \\sim \\textsf{Poisson}(y_n \\mid \\lambda) & \\quad\\text{with probability } 1-\\theta.\n\\end{align*}\\]\nStan does not support conditional distribution statements (with ~) conditional on some parameter, and we need to consider the corresponding likelihood \\[\np(y_n \\mid \\theta,\\lambda)\n=\n\\begin{cases}\n\\theta + (1 - \\theta) \\times \\textsf{Poisson}(0 \\mid \\lambda) & \\quad\\text{if } y_n = 0, \\text{ and}\\\\\n(1-\\theta) \\times \\textsf{Poisson}(y_n \\mid \\lambda) &\\quad\\text{if } y_n > 0.\n\\end{cases}\n\\] The log likelihood can be coded directly in Stan (with target +=) as follows.\ndata {\n int<lower=0> N;\n array[N] int<lower=0> y;\n}\nparameters {\n real<lower=0, upper=1> theta;\n real<lower=0> lambda;\n}\nmodel {\n for (n in 1:N) {\n if (y[n] == 0) {\n target += log_sum_exp(log(theta),\n log1m(theta)\n + poisson_lpmf(y[n] | lambda));\n } else {\n target += log1m(theta)\n + poisson_lpmf(y[n] | lambda);\n }\n }\n}\nThe log1m(theta) computes log(1-theta), but is more computationally stable. The log_sum_exp(lp1,lp2) function adds the log probabilities on the linear scale; it is defined to be equal to log(exp(lp1) + exp(lp2)), but is more computationally stable and faster.\n\n\nThe code given above to compute the zero-inflated Poisson redundantly calculates all of the Bernoulli terms and also poisson_lpmf(0 | lambda) every time the first condition body executes. The use of the redundant terms is conditioned on y, which is known when the data are read in. This allows the transformed data block to be used to compute some more convenient terms for expressing the log density each iteration.\nThe number of zero cases is computed and handled separately. Then the nonzero cases are collected into their own array for vectorization. The number of zeros is required to declare y_nonzero, so it must be computed in a function.\nfunctions {\n int num_zeros(array[] int y) {\n int sum = 0;\n for (n in 1:size(y)) {\n sum += (y[n] == 0);\n }\n return sum;\n }\n}\n// ...\ntransformed data {\n int<lower=0> N_zero = num_zeros(y);\n array[N - N_zero] int<lower=1> y_nonzero;\n int N_nonzero = 0;\n for (n in 1:N) {\n if (y[n] == 0) continue;\n N_nonzero += 1;\n y_nonzero[N_nonzero] = y[n];\n }\n}\n// ...\nmodel {\n // ...\n target\n += N_zero\n * log_sum_exp(log(theta),\n log1m(theta)\n + poisson_lpmf(0 | lambda));\n target += N_nonzero * log1m(theta);\n target += poisson_lpmf(y_nonzero | lambda);\n // ...\n}\nThe boundary conditions of all zeros and no zero outcomes is handled appropriately; in the vectorized case, if y_nonzero is empty, N_nonzero will be zero, and the last two target increment terms will add zeros.\n\n\n\n\nThe hurdle model is similar to the zero-inflated model, but more flexible in that the zero outcomes can be deflated as well as inflated. Given the probability \\(\\theta\\) and the intensity \\(\\lambda\\), the distribution for \\(y_n\\) can be written as [ \\[\\begin{align*}\ny_n & = 0 \\quad\\text{with probability } \\theta, \\text{ and}\\\\\ny_n & \\sim \\textsf{Poisson}_{x\\neq 0}(y_n \\mid \\lambda) \\quad\\text{with probability } 1-\\theta,\n\\end{align*}\\] ] Where \\(\\textsf{Poisson}_{x\\neq 0}\\) is a truncated Poisson distribution, truncated at \\(0\\).\nThe corresponding likelihood function for the hurdle model is defined by \\[\np(y\\mid\\theta,\\lambda)\n=\n\\begin{cases}\n\\theta &\\quad\\text{if } y = 0, \\text{ and}\\\\\n(1 - \\theta)\n \\frac{\\displaystyle \\textsf{Poisson}(y \\mid \\lambda)}\n {\\displaystyle 1 - \\textsf{PoissonCDF}(0 \\mid \\lambda)}\n&\\quad\\text{if } y > 0,\n\\end{cases}\n\\] where \\(\\textsf{PoissonCDF}\\) is the cumulative distribution function for the Poisson distribution and and \\(1 - \\textsf{PoissonCDF}(0 \\mid \\lambda)\\) is the relative normalization term for the truncated Poisson (truncated at \\(0\\)).\nThe hurdle model is even more straightforward to program in Stan, as it does not require an explicit mixture.\nif (y[n] == 0) {\n target += log(theta);\n} else {\n target += log1m(theta) + poisson_lpmf(y[n] | lambda)\n - poisson_lccdf(0 | lambda));\n}\nJulian King pointed out that because \\[\\begin{align*}\n\\log \\left( 1 - \\textsf{PoissonCDF}(0 \\mid \\lambda) \\right)\n &= \\log \\left( 1 - \\textsf{Poisson}(0 \\mid \\lambda) \\right) \\\\\n &= \\log(1 - \\exp(-\\lambda))\n\\end{align*}\\] the CCDF in the else clause can be replaced with a simpler expression.\ntarget += log1m(theta) + poisson_lpmf(y[n] | lambda)\n - log1m_exp(-lambda));\nThe resulting code is about 15% faster than the code with the CCDF.\nThis is an example where collecting counts ahead of time can also greatly speed up the execution speed without changing the density. For data size \\(N=200\\) and parameters \\(\\theta=0.3\\) and \\(\\lambda = 8\\), the speedup is a factor of 10; it will be lower for smaller \\(N\\) and greater for larger \\(N\\); it will also be greater for larger \\(\\theta\\).\nTo achieve this speedup, it helps to have a function to count the number of non-zero entries in an array of integers,\nfunctions {\n int num_zero(array[] int y) {\n int nz = 0;\n for (n in 1:size(y)) {\n if (y[n] == 0) {\n nz += 1;\n }\n }\n return nz;\n }\n}\nThen a transformed data block can be used to store the sufficient statistics,\ntransformed data {\n int<lower=0, upper=N> N0 = num_zero(y);\n int<lower=0, upper=N> Ngt0 = N - N0;\n array[N - num_zero(y)] int<lower=1> y_nz;\n {\n int pos = 1;\n for (n in 1:N) {\n if (y[n] != 0) {\n y_nz[pos] = y[n];\n pos += 1;\n }\n }\n }\n}\nThe model block is then reduced to three statements.\nmodel {\n N0 ~ binomial(N, theta);\n y_nz ~ poisson(lambda);\n target += -Ngt0 * log1m_exp(-lambda);\n}\nThe first statement accounts for the Bernoulli contribution to both the zero and non-zero counts. The second line is the Poisson contribution from the non-zero counts, which is now vectorized. Finally, the normalization for the truncation is a single line, so that the expression for the log CCDF at 0 isn’t repeated. Also note that the negation is applied to the constant Ngt0; whenever possible, leave subexpressions constant because then gradients need not be propagated until a non-constant term is encountered.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#priors-and-effective-data-size-in-mixture-models", + "href": "stan-users-guide/finite-mixtures.html#priors-and-effective-data-size-in-mixture-models", + "title": "Finite Mixtures", + "section": "", + "text": "Suppose we have a two-component mixture model with mixing rate \\(\\lambda \\in (0, 1)\\). Because the likelihood for the mixture components is proportionally weighted by the mixture weights, the effective data size used to estimate each of the mixture components will also be weighted as a fraction of the overall data size. Thus although there are \\(N\\) observations, the mixture components will be estimated with effective data sizes of \\(\\theta \\, N\\) and \\((1 - \\theta)\n\\, N\\) for the two components for some \\(\\theta \\in (0, 1)\\). The effective weighting size is determined by posterior responsibility, not simply by the mixing rate \\(\\lambda\\).\n\n\nIn contrast to mixture models, which create mixtures at the observation level, model averaging creates mixtures over the posteriors of models separately fit with the entire data set. In this situation, the priors work as expected when fitting the models independently, with the posteriors being based on the complete observed data \\(y\\).\nIf different models are expected to account for different observations, we recommend building mixture models directly. If the models being mixed are similar, often a single expanded model will capture the features of both and may be used on its own for inferential purposes (estimation, decision making, prediction, etc.). For example, rather than fitting an intercept-only regression and a slope-only regression and averaging their predictions, even as a mixture model, we would recommend building a single regression with both a slope and an intercept. Model complexity, such as having more predictors than data points, can be tamed using appropriately regularizing priors. If computation becomes a bottleneck, the only recourse can be model averaging, which can be calculated after fitting each model independently (see Hoeting et al. (1999) and Gelman et al. (2013) for theoretical and computational details).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/finite-mixtures.html#footnotes", + "href": "stan-users-guide/finite-mixtures.html#footnotes", + "title": "Finite Mixtures", + "section": "Footnotes", + "text": "Footnotes\n\n\nImposing a constraint such as \\(\\theta < 0.5\\) will resolve the symmetry, but fundamentally changes the model and its posterior inferences.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Finite Mixtures" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html", + "href": "stan-users-guide/for-bugs-users.html", + "title": "Transitioning from BUGS", + "section": "", + "text": "From the outside, Stan and BUGS1 are similar—they use statistically-themed modeling languages (which are similar but with some differences; see below), they can be called from R, running some specified number of chains to some specified length, producing posterior simulations that can be assessed using standard convergence diagnostics. This is not a coincidence: in designing Stan: we wanted to keep many of the useful features of Bugs.\n\n\n\n\nStan is compiled in two steps, first a model is translated to templated C++ and then to a platform-specific executable. Stan, unlike BUGS, allows the user to directly program in C++, but we do not describe how to do this in this Stan manual (see the getting started with C++ section of https://mc-stan.org for more information on using Stan directly from C++).\n\n\n\nBUGS performs MCMC updating one scalar parameter at a time, (with some exceptions such as JAGS’s implementation of regression and generalized linear models and some conjugate multivariate parameters), using conditional distributions (Gibbs sampling) where possible and otherwise using adaptive rejection sampling, slice sampling, and Metropolis jumping. BUGS figures out the dependence structure of the joint distribution as specified in its modeling language and uses this information to compute only what it needs at each step. Stan moves in the entire space of all the parameters using Hamiltonian Monte Carlo (more precisely, the no-U-turn sampler), thus avoiding some difficulties that occur with one-dimension-at-a-time sampling in high dimensions but at the cost of requiring the computation of the entire log density at each step.\n\n\n\nBUGS tunes its adaptive jumping (if necessary) during its warmup phase (traditionally referred to as “burn-in”). Stan uses its warmup phase to tune the no-U-turn sampler (NUTS).\n\n\n\nThe BUGS modeling language is not directly executable. Rather, BUGS parses its model to determine the posterior density and then decides on a sampling scheme. In contrast, the statements in a Stan model are directly executable: they translate exactly into C++ code that is used to compute the log posterior density (which in turn is used to compute the gradient).\n\n\n\nIn BUGS, statements are executed according to the directed graphical model so that variables are always defined when needed. A side effect of the direct execution of Stan’s modeling language is that statements execute in the order in which they are written. For instance, the following Stan program, which sets mu before using it to sample y:\nmu = a + b * x;\ny ~ normal(mu, sigma);\ntranslates to the following C++ code:\nmu = a + b * x;\ntarget += normal_lpdf(y | mu, sigma);\nContrast this with the following Stan program:\ny ~ normal(mu, sigma);\nmu = a + b * x;\nThis program is well formed, but is almost certainly a coding error, because it attempts to use mu before it is set. The direct translation to C++ code highlights the potential error of using mu in the first statement:\ntarget += normal_lpdf(y | mu, sigma);\nmu = a + b * x;\nTo trap these kinds of errors, variables are initialized to the special not-a-number (NaN) value. If NaN is passed to a log probability function, it will raise a domain exception, which will in turn be reported by the sampler. The sampler will reject the sample out of hand as if it had zero probability.\n\n\n\nStan uses its own C++ algorithmic differentiation packages to compute the gradient of the log density (up to a proportion). Gradients are required during the Hamiltonian dynamics simulations within the leapfrog algorithm of the Hamiltonian Monte Carlo and NUTS samplers.\n\n\n\nBoth BUGS and Stan are semi-automatic in that they run by themselves with no outside tuning required. Nevertheless, the user needs to pick the number of chains and number of iterations per chain. We usually pick 4 chains and start with 10 iterations per chain (to make sure there are no major bugs and to approximately check the timing), then go to 100, 1000, or more iterations as necessary. Compared to Gibbs or Metropolis, Hamiltonian Monte Carlo can take longer per iteration (as it typically takes many “leapfrog steps” within each iteration), but the iterations typically have lower autocorrelation. So Stan might work fine with 1000 iterations in an example where BUGS would require 100,000 for good mixing. We recommend monitoring potential scale reduction statistics (\\(\\hat{R}\\)) and the effective sample size to judge when to stop (stopping when \\(\\hat{R}\\) values do not counter-indicate convergence and when high enough effective sample size have been obtained).\n\n\n\nWinBUGS is closed source. OpenBUGS and JAGS are both licensed under the Gnu Public License (GPL), otherwise known as copyleft due to the restrictions it places on derivative works. Stan is licensed under the much more liberal new BSD license.\n\n\n\nLike WinBUGS, OpenBUGS and JAGS, Stan can be run directly from the command line or through common analytics platforms like R, Python, Julia, MATLAB, Mathematica, and the command line.\n\n\n\nLike OpenBUGS and JAGS, Stan can be run on Linux, Mac, and Windows platforms.\n\n\n\n\nThe BUGS modeling language follows an R-like syntax in which line breaks are meaningful. Stan follows the rules of C, in which line breaks are equivalent to spaces, and each statement ends in a semicolon. For example:\ny ~ normal(mu, sigma);\nand\nfor (i in 1:n) y[i] ~ normal(mu, sigma);\nOr, equivalently (recall that a line break is just another form of whitespace),\nfor (i in 1:n)\n y[i] ~ normal(mu, sigma);\nand also equivalently,\nfor (i in 1:n) {\n y[i] ~ normal(mu, sigma);\n}\nThere’s a semicolon after the model statement but not after the brackets indicating the body of the for loop.\nIn Stan, variables can have names constructed using letters, numbers, and the underscore (_) symbol, but nothing else (and a variable name cannot begin with a number). BUGS variables can also include the dot, or period (.) symbol.\nIn Stan, the second argument to the “normal” function is the standard deviation (i.e., the scale), not the variance (as in Bayesian Data Analysis) and not the inverse-variance (i.e., precision) (as in BUGS). Thus a normal with mean 1 and standard deviation 2 is normal(1,2), not normal(1,4) or normal(1,0.25).\nSimilarly, the second argument to the “multivariate normal” function is the covariance matrix and not the inverse covariance matrix (i.e., the precision matrix) (as in BUGS). The same is true for the “multivariate student” distribution.\nThe distributions have slightly different names:\n\n\n\nBUGS\nStan\n\n\n\n\ndnorm\nnormal\n\n\ndbinom\nbinomial\n\n\ndpois\npoisson\n\n\n…\n…\n\n\n\nStan, unlike BUGS, allows intermediate quantities, in the form of local variables, to be reassigned. For example, the following is legal and meaningful (if possibly inefficient) Stan code.\n{\n total = 0;\n for (i in 1:n) {\n theta[i] ~ normal(total, sigma);\n total = total + theta[i];\n }\n}\nIn BUGS, the above model would not be legal because the variable total is defined more than once. But in Stan, the loop is executed in order, so total is overwritten in each step.\nStan uses explicit declarations. Variables are declared with base type integer or real, and vectors, matrices, and arrays have specified dimensions. When variables are bounded, we give that information also. For data and transformed parameters, the bounds are used for error checking. For parameters, the constraints are critical to sampling as they determine the geometry over which the Hamiltonian is simulated.\nIn Stan, variables can be declared as data, transformed data, parameters, transformed parameters, or generated quantities. They can also be declared as local variables within blocks. For more information, see the part of this manual devoted to the Stan programming language and examine at the example models.\nStan allows all sorts of tricks with vector and matrix operations which can make Stan models more compact. For example, arguments to probability functions may be vectorized,2 allowing\nfor (i in 1:n) {\n y[i] ~ normal(mu[i], sigma[i]);\n}\nto be expressed more compactly as\ny ~ normal(mu, sigma);\nThe vectorized form is also more efficient because Stan can unfold the computation of the chain rule during algorithmic differentiation.\nStan also allows for arrays of vectors and matrices. For example, in a hierarchical model might have a vector of K parameters for each of J groups; this can be declared using\narray[J] vector[K] theta;\nThen theta[j] is an expression denoting a K-vector and may be used in the code just like any other vector variable.\nAn alternative encoding would be with a two-dimensional array, as in\narray[J, K] real theta;\nThe vector version can have some advantages, both in convenience and in computational speed for some operations.\nA third encoding would use a matrix:\nmatrix[J, K] theta;\nbut in this case, theta[j] is a row vector, not a vector, and accessing it as a vector is less efficient than with an array of vectors. The transposition operator, as in theta[j]', may be used to convert the row vector theta[j] to a (column) vector. Column vector and row vector types are not interchangeable everywhere in Stan; see the function signature declarations in the programming language section of this manual.\nStan supports general conditional statements using a standard if-else syntax. For example, a zero-inflated (or -deflated) Poisson mixture model is defined using the if-else syntax as described in the zero inflation section.\nStan supports general while loops using a standard syntax. While loops give Stan full Turing equivalent computational power. They are useful for defining iterative functions with complex termination conditions. As an illustration of their syntax, the for-loop\nmodel {\n // ...\n for (n in 1:N) {\n // ... do something with n ....\n }\n}\nmay be recoded using the following while loop.\nmodel {\n int n;\n // ...\n n = 1;\n while (n <= N) {\n // ... do something with n ...\n n = n + 1;\n }\n}\n\n\n\nStan does not yet support declaration of discrete parameters. Discrete data variables are supported. Inference is supported for discrete parameters as described in the mixture and latent discrete parameters chapters of the manual.\nStan has some distributions on covariance matrices that do not exist in BUGS, including a uniform distribution over correlation matrices which may be rescaled, and the priors based on C-vines defined in Lewandowski, Kurowicka, and Joe (2009). In particular, the Lewandowski et al. prior allows the correlation matrix to be shrunk toward the unit matrix while the scales are given independent priors.\nIn BUGS you need to define all variables. In Stan, if you declare but don’t define a parameter it implicitly has a flat prior (on the scale in which the parameter is defined). For example, if you have a parameter p declared as\nreal<lower=0, upper=1> p;\nand then have no distribution statement for p in the model block, then you are implicitly assigning a uniform \\([0,1]\\) prior on p.\nOn the other hand, if you have a parameter theta declared with\nreal theta;\nand have no distribution statement for theta in the model block, then you are implicitly assigning an improper uniform prior on \\((-\\infty,\\infty)\\) to theta.\nBUGS models are always proper (being constructed as a product of proper marginal and conditional densities). Stan models can be improper. Here is the simplest improper Stan model:\nparameters {\n real theta;\n}\nmodel { }\nAlthough parameters in Stan models may have improper priors, we do not want improper posterior distributions, as we are trying to use these distributions for Bayesian inference. There is no general way to check if a posterior distribution is improper. But if all the priors are proper, the posterior will be proper also.\nEach statement in a Stan model is directly translated into the C++ code for computing the log posterior. Thus, for example, the following pair of statements is legal in a Stan model:\ny ~ normal(0,1);\ny ~ normal(2,3);\nThe second line here does not simply overwrite the first; rather, both statements contribute to the density function that is evaluated. The above two lines have the effect of including the product, \\(\\textsf{normal}(y \\mid 0,1) * \\textsf{normal}(y \\mid 2,3)\\), into the density function.\nFor a perhaps more confusing example, consider the following two lines in a Stan model:\nx ~ normal(0.8 * y, sigma);\ny ~ normal(0.8 * x, sigma);\nAt first, this might look like a joint normal distribution with a correlation of 0.8. But it is not. The above are not interpreted as conditional entities; rather, they are factors in the joint density. Multiplying them gives, \\(\\textsf{normal}(x \\mid 0.8y,\\sigma)\n\\times \\textsf{normal}(y \\mid 0.8x,\\sigma)\\), which is what it is (you can work out the algebra) but it is not the joint distribution where the conditionals have regressions with slope 0.8.\nWith censoring and truncation, Stan uses the censored-data or truncated-data likelihood—this is not always done in BUGS. All of the approaches to censoring and truncation discussed in Gelman et al. (2013) and Gelman and Hill (2007) may be implemented in Stan directly as written.\nStan, like BUGS, can benefit from human intervention in the form of reparameterization.\n\n\n\nStan can be set up from within R using two lines of code. Follow the instructions for running Stan from R on the Stan web site. You don’t need to separately download Stan and RStan. Installing RStan will automatically set up Stan.\nIn practice we typically run the same Stan model repeatedly. If you pass RStan the result of a previously fitted model the model will not need be recompiled. An example is given on the running Stan from R pages available from the Stan web site.\nWhen you run Stan, it saves various conditions including starting values, some control variables for the tuning and running of the no-U-turn sampler, and the initial random seed. You can specify these values in the Stan call and thus achieve exact replication if desired. (This can be useful for debugging.)\nWhen running BUGS from R, you need to send exactly the data that the model needs. When running RStan, you can include extra data, which can be helpful when playing around with models. For example, if you remove a variable x from the model, you can keep it in the data sent from R, thus allowing you to quickly alter the Stan model without having to also change the calling information in your R script.\nAs in R2WinBUGS and R2jags, after running the Stan model, you can quickly summarize using plot() and print(). You can access the simulations themselves using various extractor functions, as described in the RStan documentation.\nVarious information about the sampler, such as number of leapfrog steps, log probability, and step size, is available through extractor functions. These can be useful for understanding what is going wrong when the algorithm is slow to converge.\n\n\n\nStan, like WinBUGS, OpenBUGS, and JAGS, has an active community, which you can access via the user’s mailing list and the developer’s mailing list; see the Stan web site for information on subscribing and posting and to look at archives.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html#some-differences-in-how-bugs-and-stan-work", + "href": "stan-users-guide/for-bugs-users.html#some-differences-in-how-bugs-and-stan-work", + "title": "Transitioning from BUGS", + "section": "", + "text": "Stan is compiled in two steps, first a model is translated to templated C++ and then to a platform-specific executable. Stan, unlike BUGS, allows the user to directly program in C++, but we do not describe how to do this in this Stan manual (see the getting started with C++ section of https://mc-stan.org for more information on using Stan directly from C++).\n\n\n\nBUGS performs MCMC updating one scalar parameter at a time, (with some exceptions such as JAGS’s implementation of regression and generalized linear models and some conjugate multivariate parameters), using conditional distributions (Gibbs sampling) where possible and otherwise using adaptive rejection sampling, slice sampling, and Metropolis jumping. BUGS figures out the dependence structure of the joint distribution as specified in its modeling language and uses this information to compute only what it needs at each step. Stan moves in the entire space of all the parameters using Hamiltonian Monte Carlo (more precisely, the no-U-turn sampler), thus avoiding some difficulties that occur with one-dimension-at-a-time sampling in high dimensions but at the cost of requiring the computation of the entire log density at each step.\n\n\n\nBUGS tunes its adaptive jumping (if necessary) during its warmup phase (traditionally referred to as “burn-in”). Stan uses its warmup phase to tune the no-U-turn sampler (NUTS).\n\n\n\nThe BUGS modeling language is not directly executable. Rather, BUGS parses its model to determine the posterior density and then decides on a sampling scheme. In contrast, the statements in a Stan model are directly executable: they translate exactly into C++ code that is used to compute the log posterior density (which in turn is used to compute the gradient).\n\n\n\nIn BUGS, statements are executed according to the directed graphical model so that variables are always defined when needed. A side effect of the direct execution of Stan’s modeling language is that statements execute in the order in which they are written. For instance, the following Stan program, which sets mu before using it to sample y:\nmu = a + b * x;\ny ~ normal(mu, sigma);\ntranslates to the following C++ code:\nmu = a + b * x;\ntarget += normal_lpdf(y | mu, sigma);\nContrast this with the following Stan program:\ny ~ normal(mu, sigma);\nmu = a + b * x;\nThis program is well formed, but is almost certainly a coding error, because it attempts to use mu before it is set. The direct translation to C++ code highlights the potential error of using mu in the first statement:\ntarget += normal_lpdf(y | mu, sigma);\nmu = a + b * x;\nTo trap these kinds of errors, variables are initialized to the special not-a-number (NaN) value. If NaN is passed to a log probability function, it will raise a domain exception, which will in turn be reported by the sampler. The sampler will reject the sample out of hand as if it had zero probability.\n\n\n\nStan uses its own C++ algorithmic differentiation packages to compute the gradient of the log density (up to a proportion). Gradients are required during the Hamiltonian dynamics simulations within the leapfrog algorithm of the Hamiltonian Monte Carlo and NUTS samplers.\n\n\n\nBoth BUGS and Stan are semi-automatic in that they run by themselves with no outside tuning required. Nevertheless, the user needs to pick the number of chains and number of iterations per chain. We usually pick 4 chains and start with 10 iterations per chain (to make sure there are no major bugs and to approximately check the timing), then go to 100, 1000, or more iterations as necessary. Compared to Gibbs or Metropolis, Hamiltonian Monte Carlo can take longer per iteration (as it typically takes many “leapfrog steps” within each iteration), but the iterations typically have lower autocorrelation. So Stan might work fine with 1000 iterations in an example where BUGS would require 100,000 for good mixing. We recommend monitoring potential scale reduction statistics (\\(\\hat{R}\\)) and the effective sample size to judge when to stop (stopping when \\(\\hat{R}\\) values do not counter-indicate convergence and when high enough effective sample size have been obtained).\n\n\n\nWinBUGS is closed source. OpenBUGS and JAGS are both licensed under the Gnu Public License (GPL), otherwise known as copyleft due to the restrictions it places on derivative works. Stan is licensed under the much more liberal new BSD license.\n\n\n\nLike WinBUGS, OpenBUGS and JAGS, Stan can be run directly from the command line or through common analytics platforms like R, Python, Julia, MATLAB, Mathematica, and the command line.\n\n\n\nLike OpenBUGS and JAGS, Stan can be run on Linux, Mac, and Windows platforms.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html#some-differences-in-the-modeling-languages", + "href": "stan-users-guide/for-bugs-users.html#some-differences-in-the-modeling-languages", + "title": "Transitioning from BUGS", + "section": "", + "text": "The BUGS modeling language follows an R-like syntax in which line breaks are meaningful. Stan follows the rules of C, in which line breaks are equivalent to spaces, and each statement ends in a semicolon. For example:\ny ~ normal(mu, sigma);\nand\nfor (i in 1:n) y[i] ~ normal(mu, sigma);\nOr, equivalently (recall that a line break is just another form of whitespace),\nfor (i in 1:n)\n y[i] ~ normal(mu, sigma);\nand also equivalently,\nfor (i in 1:n) {\n y[i] ~ normal(mu, sigma);\n}\nThere’s a semicolon after the model statement but not after the brackets indicating the body of the for loop.\nIn Stan, variables can have names constructed using letters, numbers, and the underscore (_) symbol, but nothing else (and a variable name cannot begin with a number). BUGS variables can also include the dot, or period (.) symbol.\nIn Stan, the second argument to the “normal” function is the standard deviation (i.e., the scale), not the variance (as in Bayesian Data Analysis) and not the inverse-variance (i.e., precision) (as in BUGS). Thus a normal with mean 1 and standard deviation 2 is normal(1,2), not normal(1,4) or normal(1,0.25).\nSimilarly, the second argument to the “multivariate normal” function is the covariance matrix and not the inverse covariance matrix (i.e., the precision matrix) (as in BUGS). The same is true for the “multivariate student” distribution.\nThe distributions have slightly different names:\n\n\n\nBUGS\nStan\n\n\n\n\ndnorm\nnormal\n\n\ndbinom\nbinomial\n\n\ndpois\npoisson\n\n\n…\n…\n\n\n\nStan, unlike BUGS, allows intermediate quantities, in the form of local variables, to be reassigned. For example, the following is legal and meaningful (if possibly inefficient) Stan code.\n{\n total = 0;\n for (i in 1:n) {\n theta[i] ~ normal(total, sigma);\n total = total + theta[i];\n }\n}\nIn BUGS, the above model would not be legal because the variable total is defined more than once. But in Stan, the loop is executed in order, so total is overwritten in each step.\nStan uses explicit declarations. Variables are declared with base type integer or real, and vectors, matrices, and arrays have specified dimensions. When variables are bounded, we give that information also. For data and transformed parameters, the bounds are used for error checking. For parameters, the constraints are critical to sampling as they determine the geometry over which the Hamiltonian is simulated.\nIn Stan, variables can be declared as data, transformed data, parameters, transformed parameters, or generated quantities. They can also be declared as local variables within blocks. For more information, see the part of this manual devoted to the Stan programming language and examine at the example models.\nStan allows all sorts of tricks with vector and matrix operations which can make Stan models more compact. For example, arguments to probability functions may be vectorized,2 allowing\nfor (i in 1:n) {\n y[i] ~ normal(mu[i], sigma[i]);\n}\nto be expressed more compactly as\ny ~ normal(mu, sigma);\nThe vectorized form is also more efficient because Stan can unfold the computation of the chain rule during algorithmic differentiation.\nStan also allows for arrays of vectors and matrices. For example, in a hierarchical model might have a vector of K parameters for each of J groups; this can be declared using\narray[J] vector[K] theta;\nThen theta[j] is an expression denoting a K-vector and may be used in the code just like any other vector variable.\nAn alternative encoding would be with a two-dimensional array, as in\narray[J, K] real theta;\nThe vector version can have some advantages, both in convenience and in computational speed for some operations.\nA third encoding would use a matrix:\nmatrix[J, K] theta;\nbut in this case, theta[j] is a row vector, not a vector, and accessing it as a vector is less efficient than with an array of vectors. The transposition operator, as in theta[j]', may be used to convert the row vector theta[j] to a (column) vector. Column vector and row vector types are not interchangeable everywhere in Stan; see the function signature declarations in the programming language section of this manual.\nStan supports general conditional statements using a standard if-else syntax. For example, a zero-inflated (or -deflated) Poisson mixture model is defined using the if-else syntax as described in the zero inflation section.\nStan supports general while loops using a standard syntax. While loops give Stan full Turing equivalent computational power. They are useful for defining iterative functions with complex termination conditions. As an illustration of their syntax, the for-loop\nmodel {\n // ...\n for (n in 1:N) {\n // ... do something with n ....\n }\n}\nmay be recoded using the following while loop.\nmodel {\n int n;\n // ...\n n = 1;\n while (n <= N) {\n // ... do something with n ...\n n = n + 1;\n }\n}", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html#some-differences-in-the-statistical-models-that-are-allowed", + "href": "stan-users-guide/for-bugs-users.html#some-differences-in-the-statistical-models-that-are-allowed", + "title": "Transitioning from BUGS", + "section": "", + "text": "Stan does not yet support declaration of discrete parameters. Discrete data variables are supported. Inference is supported for discrete parameters as described in the mixture and latent discrete parameters chapters of the manual.\nStan has some distributions on covariance matrices that do not exist in BUGS, including a uniform distribution over correlation matrices which may be rescaled, and the priors based on C-vines defined in Lewandowski, Kurowicka, and Joe (2009). In particular, the Lewandowski et al. prior allows the correlation matrix to be shrunk toward the unit matrix while the scales are given independent priors.\nIn BUGS you need to define all variables. In Stan, if you declare but don’t define a parameter it implicitly has a flat prior (on the scale in which the parameter is defined). For example, if you have a parameter p declared as\nreal<lower=0, upper=1> p;\nand then have no distribution statement for p in the model block, then you are implicitly assigning a uniform \\([0,1]\\) prior on p.\nOn the other hand, if you have a parameter theta declared with\nreal theta;\nand have no distribution statement for theta in the model block, then you are implicitly assigning an improper uniform prior on \\((-\\infty,\\infty)\\) to theta.\nBUGS models are always proper (being constructed as a product of proper marginal and conditional densities). Stan models can be improper. Here is the simplest improper Stan model:\nparameters {\n real theta;\n}\nmodel { }\nAlthough parameters in Stan models may have improper priors, we do not want improper posterior distributions, as we are trying to use these distributions for Bayesian inference. There is no general way to check if a posterior distribution is improper. But if all the priors are proper, the posterior will be proper also.\nEach statement in a Stan model is directly translated into the C++ code for computing the log posterior. Thus, for example, the following pair of statements is legal in a Stan model:\ny ~ normal(0,1);\ny ~ normal(2,3);\nThe second line here does not simply overwrite the first; rather, both statements contribute to the density function that is evaluated. The above two lines have the effect of including the product, \\(\\textsf{normal}(y \\mid 0,1) * \\textsf{normal}(y \\mid 2,3)\\), into the density function.\nFor a perhaps more confusing example, consider the following two lines in a Stan model:\nx ~ normal(0.8 * y, sigma);\ny ~ normal(0.8 * x, sigma);\nAt first, this might look like a joint normal distribution with a correlation of 0.8. But it is not. The above are not interpreted as conditional entities; rather, they are factors in the joint density. Multiplying them gives, \\(\\textsf{normal}(x \\mid 0.8y,\\sigma)\n\\times \\textsf{normal}(y \\mid 0.8x,\\sigma)\\), which is what it is (you can work out the algebra) but it is not the joint distribution where the conditionals have regressions with slope 0.8.\nWith censoring and truncation, Stan uses the censored-data or truncated-data likelihood—this is not always done in BUGS. All of the approaches to censoring and truncation discussed in Gelman et al. (2013) and Gelman and Hill (2007) may be implemented in Stan directly as written.\nStan, like BUGS, can benefit from human intervention in the form of reparameterization.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html#some-differences-when-running-from-r", + "href": "stan-users-guide/for-bugs-users.html#some-differences-when-running-from-r", + "title": "Transitioning from BUGS", + "section": "", + "text": "Stan can be set up from within R using two lines of code. Follow the instructions for running Stan from R on the Stan web site. You don’t need to separately download Stan and RStan. Installing RStan will automatically set up Stan.\nIn practice we typically run the same Stan model repeatedly. If you pass RStan the result of a previously fitted model the model will not need be recompiled. An example is given on the running Stan from R pages available from the Stan web site.\nWhen you run Stan, it saves various conditions including starting values, some control variables for the tuning and running of the no-U-turn sampler, and the initial random seed. You can specify these values in the Stan call and thus achieve exact replication if desired. (This can be useful for debugging.)\nWhen running BUGS from R, you need to send exactly the data that the model needs. When running RStan, you can include extra data, which can be helpful when playing around with models. For example, if you remove a variable x from the model, you can keep it in the data sent from R, thus allowing you to quickly alter the Stan model without having to also change the calling information in your R script.\nAs in R2WinBUGS and R2jags, after running the Stan model, you can quickly summarize using plot() and print(). You can access the simulations themselves using various extractor functions, as described in the RStan documentation.\nVarious information about the sampler, such as number of leapfrog steps, log probability, and step size, is available through extractor functions. These can be useful for understanding what is going wrong when the algorithm is slow to converge.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html#the-stan-community", + "href": "stan-users-guide/for-bugs-users.html#the-stan-community", + "title": "Transitioning from BUGS", + "section": "", + "text": "Stan, like WinBUGS, OpenBUGS, and JAGS, has an active community, which you can access via the user’s mailing list and the developer’s mailing list; see the Stan web site for information on subscribing and posting and to look at archives.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/for-bugs-users.html#footnotes", + "href": "stan-users-guide/for-bugs-users.html#footnotes", + "title": "Transitioning from BUGS", + "section": "Footnotes", + "text": "Footnotes\n\n\nExcept where otherwise noted, we use “BUGS” to refer to WinBUGS, OpenBUGS, and JAGS, indiscriminately.↩︎\nMost distributions have been vectorized, but currently the truncated versions may not exist.↩︎", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Transitioning from BUGS" + ] + }, + { + "objectID": "stan-users-guide/hyperspherical-models.html", + "href": "stan-users-guide/hyperspherical-models.html", + "title": "Directions, Rotations, and Hyperspheres", + "section": "", + "text": "Directional statistics involve data and/or parameters that are constrained to be directions. The set of directions forms a sphere, the geometry of which is not smoothly mappable to that of a Euclidean space because you can move around a sphere and come back to where you started. This is why it is impossible to make a map of the globe on a flat piece of paper where all points that are close to each other on the globe are close to each other on the flat map. The fundamental problem is easy to visualize in two dimensions, because as you move around a circle, you wind up back where you started. In other words, 0 degrees and 360 degrees (equivalently, 0 and \\(2 \\pi\\) radians) pick out the same point, and the distance between 359 degrees and 2 degrees is the same as the distance between 137 and 140 degrees.\nStan supports directional statistics by providing a unit-vector data type, the values of which determine points on a hypersphere (circle in two dimensions, sphere in three dimensions).\n\n\nThe length of a vector \\(x \\in \\mathbb{R}^K\\) is given by \\[\n\\Vert x \\Vert\n= \\sqrt{x^{\\top}\\,x}\n= \\sqrt{x_1^2 + x_2^2 + \\cdots + x_K^2}.\n\\] Unit vectors are defined to be vectors of unit length (i.e., length one).\nWith a variable declaration such as\nunit_vector[K] x;\nthe value of x will be constrained to be a vector of size K with unit length; the reference manual chapter on constrained parameter transforms provides precise definitions.\nWarning: An extra term gets added to the log density to ensure the distribution on unit vectors is proper. This is not a problem in practice, but it may lead to misunderstandings of the target log density output (lp__ in some interfaces). The underlying source of the problem is that a unit vector of size \\(K\\) has only \\(K - 1\\) degrees of freedom. But there is no way to map those \\(K - 1\\) degrees of freedom continuously to \\(\\mathbb{R}^N\\)—for example, the circle can’t be mapped continuously to a line so the limits work out, nor can a sphere be mapped to a plane. A workaround is needed instead. Stan’s unit vector transform uses \\(K\\) unconstrained variables, then projects down to the unit hypersphere. Even though the hypersphere is compact, the result would be an improper distribution. To ensure the unit vector distribution is proper, each unconstrained variable is given a “Jacobian” adjustment equal to an independent standard normal distribution. Effectively, each dimension is drawn standard normal, then they are together projected down to the hypersphere to produce a unit vector. The result is a proper uniform distribution over the hypersphere.\n\n\n\nAn \\(n\\)-sphere, written \\(S^{n}\\), is defined as the set of \\((n +\n1)\\)-dimensional unit vectors, \\[\nS^{n} = \\left\\{ x \\in \\mathbb{R}^{n+1} \\: : \\: \\Vert x \\Vert = 1 \\right\\}.\n\\]\nEven though \\(S^n\\) is made up of points in \\((n+1)\\) dimensions, it is only an \\(n\\)-dimensional manifold. For example, \\(S^2\\) is defined as a set of points in \\(\\mathbb{R}^3\\), but each such point may be described uniquely by a latitude and longitude. Geometrically, the surface defined by \\(S^2\\) in \\(\\mathbb{R}^3\\) behaves locally like a plane, i.e., \\(\\mathbb{R}^2\\). However, the overall shape of \\(S^2\\) is not like a plane in that it is compact (i.e., there is a maximum distance between points). If you set off around the globe in a “straight line” (i.e., a geodesic), you wind up back where you started eventually; that is why the geodesics on the sphere (\\(S^2\\)) are called “great circles,” and why we need to use some clever representations to do circular or spherical statistics.\nEven though \\(S^{n-1}\\) behaves locally like \\(\\mathbb{R}^{n-1}\\), there is no way to smoothly map between them. For example, because latitude and longitude work on a modular basis (wrapping at \\(2\\pi\\) radians in natural units), they do not produce a smooth map.\nLike a bounded interval \\((a, b)\\), in geometric terms, a sphere is compact in that the distance between any two points is bounded.\n\n\n\nStan (inverse) transforms arbitrary points in \\(\\mathbb{R}^{K+1}\\) to points in \\(S^K\\) using the auxiliary variable approach of Muller (1959). A point \\(y \\in \\mathbb{R}^K\\) is transformed to a point \\(x \\in S^{K-1}\\) by \\[\nx = \\frac{y}{\\sqrt{y^{\\top} y}}.\n\\]\nThe problem with this mapping is that it’s many to one; any point lying on a vector out of the origin is projected to the same point on the surface of the sphere. Muller (1959) introduced an auxiliary variable interpretation of this mapping that provides the desired properties of uniformity; the reference manual contains the precise definitions used in the chapter on constrained parameter transforms.\n\n\nThe above mapping from \\(\\mathbb{R}^n\\) to \\(S^n\\) is not defined at zero. While this point outcome has measure zero during sampling, and may thus be ignored, it is the default initialization point and thus unit vector parameters cannot be initialized at zero. A simple workaround is to initialize from a small interval around zero, which is an option built into all of the Stan interfaces.\n\n\n\n\nUnit vectors correspond directly to angles and thus to rotations. This is easy to see in two dimensions, where a point on a circle determines a compass direction, or equivalently, an angle \\(\\theta\\). Given an angle \\(\\theta\\), a matrix can be defined, the pre-multiplication by which rotates a point by an angle of \\(\\theta\\). For angle \\(\\theta\\) (in two dimensions), the \\(2 \\times 2\\) rotation matrix is defined by \\[\nR_{\\theta}\n=\n\\begin{bmatrix}\n\\cos \\theta & -\\sin \\theta \\\\\n\\sin \\theta & \\cos \\theta\n\\end{bmatrix}.\n\\] Given a two-dimensional vector \\(x\\), \\(R_{\\theta} \\, x\\) is the rotation of \\(x\\) (around the origin) by \\(\\theta\\) degrees.\n\n\nAngles can be calculated from unit vectors. For example, a random variable theta representing an angle in \\((-\\pi, \\pi)\\) radians can be declared as a two-dimensional unit vector then transformed to an angle.\nparameters {\n unit_vector[2] xy;\n}\ntransformed parameters {\n real<lower=-pi(), upper=pi()> theta = atan2(xy[2], xy[1]);\n}\nIf the distribution of \\((x, y)\\) is uniform over a circle, then the distribution of \\(\\arctan \\frac{y}{x}\\) is uniform over \\((-\\pi, \\pi)\\).\nIt might be tempting to try to just declare theta directly as a parameter with the lower and upper bound constraint as given above. The drawback to this approach is that the values \\(-\\pi\\) and \\(\\pi\\) are at \\(-\\infty\\) and \\(\\infty\\) on the unconstrained scale, which can produce multimodal posterior distributions when the true distribution on the circle is unimodal.\nWith a little additional work on the trigonometric front, the same conversion back to angles may be accomplished in more dimensions.\n\n\n\n\nA 24-hour clock naturally represents the progression of time through the day, moving from midnight to noon and back again in one rotation. A point on a circle divided into 24 hours is thus a natural representation for the time of day. Similarly, years cycle through the seasons and return to the season from which they started.\nIn human affairs, temporal effects often arise by convention. These can be modeled directly with ad-hoc predictors for holidays and weekends, or with data normalization back to natural scales for daylight savings time.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Directions, Rotations, and Hyperspheres" + ] + }, + { + "objectID": "stan-users-guide/hyperspherical-models.html#unit-vectors", + "href": "stan-users-guide/hyperspherical-models.html#unit-vectors", + "title": "Directions, Rotations, and Hyperspheres", + "section": "", + "text": "The length of a vector \\(x \\in \\mathbb{R}^K\\) is given by \\[\n\\Vert x \\Vert\n= \\sqrt{x^{\\top}\\,x}\n= \\sqrt{x_1^2 + x_2^2 + \\cdots + x_K^2}.\n\\] Unit vectors are defined to be vectors of unit length (i.e., length one).\nWith a variable declaration such as\nunit_vector[K] x;\nthe value of x will be constrained to be a vector of size K with unit length; the reference manual chapter on constrained parameter transforms provides precise definitions.\nWarning: An extra term gets added to the log density to ensure the distribution on unit vectors is proper. This is not a problem in practice, but it may lead to misunderstandings of the target log density output (lp__ in some interfaces). The underlying source of the problem is that a unit vector of size \\(K\\) has only \\(K - 1\\) degrees of freedom. But there is no way to map those \\(K - 1\\) degrees of freedom continuously to \\(\\mathbb{R}^N\\)—for example, the circle can’t be mapped continuously to a line so the limits work out, nor can a sphere be mapped to a plane. A workaround is needed instead. Stan’s unit vector transform uses \\(K\\) unconstrained variables, then projects down to the unit hypersphere. Even though the hypersphere is compact, the result would be an improper distribution. To ensure the unit vector distribution is proper, each unconstrained variable is given a “Jacobian” adjustment equal to an independent standard normal distribution. Effectively, each dimension is drawn standard normal, then they are together projected down to the hypersphere to produce a unit vector. The result is a proper uniform distribution over the hypersphere.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Directions, Rotations, and Hyperspheres" + ] + }, + { + "objectID": "stan-users-guide/hyperspherical-models.html#circles-spheres-and-hyperspheres", + "href": "stan-users-guide/hyperspherical-models.html#circles-spheres-and-hyperspheres", + "title": "Directions, Rotations, and Hyperspheres", + "section": "", + "text": "An \\(n\\)-sphere, written \\(S^{n}\\), is defined as the set of \\((n +\n1)\\)-dimensional unit vectors, \\[\nS^{n} = \\left\\{ x \\in \\mathbb{R}^{n+1} \\: : \\: \\Vert x \\Vert = 1 \\right\\}.\n\\]\nEven though \\(S^n\\) is made up of points in \\((n+1)\\) dimensions, it is only an \\(n\\)-dimensional manifold. For example, \\(S^2\\) is defined as a set of points in \\(\\mathbb{R}^3\\), but each such point may be described uniquely by a latitude and longitude. Geometrically, the surface defined by \\(S^2\\) in \\(\\mathbb{R}^3\\) behaves locally like a plane, i.e., \\(\\mathbb{R}^2\\). However, the overall shape of \\(S^2\\) is not like a plane in that it is compact (i.e., there is a maximum distance between points). If you set off around the globe in a “straight line” (i.e., a geodesic), you wind up back where you started eventually; that is why the geodesics on the sphere (\\(S^2\\)) are called “great circles,” and why we need to use some clever representations to do circular or spherical statistics.\nEven though \\(S^{n-1}\\) behaves locally like \\(\\mathbb{R}^{n-1}\\), there is no way to smoothly map between them. For example, because latitude and longitude work on a modular basis (wrapping at \\(2\\pi\\) radians in natural units), they do not produce a smooth map.\nLike a bounded interval \\((a, b)\\), in geometric terms, a sphere is compact in that the distance between any two points is bounded.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Directions, Rotations, and Hyperspheres" + ] + }, + { + "objectID": "stan-users-guide/hyperspherical-models.html#transforming-to-unconstrained-parameters", + "href": "stan-users-guide/hyperspherical-models.html#transforming-to-unconstrained-parameters", + "title": "Directions, Rotations, and Hyperspheres", + "section": "", + "text": "Stan (inverse) transforms arbitrary points in \\(\\mathbb{R}^{K+1}\\) to points in \\(S^K\\) using the auxiliary variable approach of Muller (1959). A point \\(y \\in \\mathbb{R}^K\\) is transformed to a point \\(x \\in S^{K-1}\\) by \\[\nx = \\frac{y}{\\sqrt{y^{\\top} y}}.\n\\]\nThe problem with this mapping is that it’s many to one; any point lying on a vector out of the origin is projected to the same point on the surface of the sphere. Muller (1959) introduced an auxiliary variable interpretation of this mapping that provides the desired properties of uniformity; the reference manual contains the precise definitions used in the chapter on constrained parameter transforms.\n\n\nThe above mapping from \\(\\mathbb{R}^n\\) to \\(S^n\\) is not defined at zero. While this point outcome has measure zero during sampling, and may thus be ignored, it is the default initialization point and thus unit vector parameters cannot be initialized at zero. A simple workaround is to initialize from a small interval around zero, which is an option built into all of the Stan interfaces.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Directions, Rotations, and Hyperspheres" + ] + }, + { + "objectID": "stan-users-guide/hyperspherical-models.html#unit-vectors-and-rotations", + "href": "stan-users-guide/hyperspherical-models.html#unit-vectors-and-rotations", + "title": "Directions, Rotations, and Hyperspheres", + "section": "", + "text": "Unit vectors correspond directly to angles and thus to rotations. This is easy to see in two dimensions, where a point on a circle determines a compass direction, or equivalently, an angle \\(\\theta\\). Given an angle \\(\\theta\\), a matrix can be defined, the pre-multiplication by which rotates a point by an angle of \\(\\theta\\). For angle \\(\\theta\\) (in two dimensions), the \\(2 \\times 2\\) rotation matrix is defined by \\[\nR_{\\theta}\n=\n\\begin{bmatrix}\n\\cos \\theta & -\\sin \\theta \\\\\n\\sin \\theta & \\cos \\theta\n\\end{bmatrix}.\n\\] Given a two-dimensional vector \\(x\\), \\(R_{\\theta} \\, x\\) is the rotation of \\(x\\) (around the origin) by \\(\\theta\\) degrees.\n\n\nAngles can be calculated from unit vectors. For example, a random variable theta representing an angle in \\((-\\pi, \\pi)\\) radians can be declared as a two-dimensional unit vector then transformed to an angle.\nparameters {\n unit_vector[2] xy;\n}\ntransformed parameters {\n real<lower=-pi(), upper=pi()> theta = atan2(xy[2], xy[1]);\n}\nIf the distribution of \\((x, y)\\) is uniform over a circle, then the distribution of \\(\\arctan \\frac{y}{x}\\) is uniform over \\((-\\pi, \\pi)\\).\nIt might be tempting to try to just declare theta directly as a parameter with the lower and upper bound constraint as given above. The drawback to this approach is that the values \\(-\\pi\\) and \\(\\pi\\) are at \\(-\\infty\\) and \\(\\infty\\) on the unconstrained scale, which can produce multimodal posterior distributions when the true distribution on the circle is unimodal.\nWith a little additional work on the trigonometric front, the same conversion back to angles may be accomplished in more dimensions.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Directions, Rotations, and Hyperspheres" + ] + }, + { + "objectID": "stan-users-guide/hyperspherical-models.html#circular-representations-of-days-and-years", + "href": "stan-users-guide/hyperspherical-models.html#circular-representations-of-days-and-years", + "title": "Directions, Rotations, and Hyperspheres", + "section": "", + "text": "A 24-hour clock naturally represents the progression of time through the day, moving from midnight to noon and back again in one rotation. A point on a circle divided into 24 hours is thus a natural representation for the time of day. Similarly, years cycle through the seasons and return to the season from which they started.\nIn human affairs, temporal effects often arise by convention. These can be modeled directly with ad-hoc predictors for holidays and weekends, or with data normalization back to natural scales for daylight savings time.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Directions, Rotations, and Hyperspheres" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html", + "href": "stan-users-guide/latent-discrete.html", + "title": "Latent Discrete Parameters", + "section": "", + "text": "Stan does not support sampling discrete parameters. So it is not possible to directly translate BUGS or JAGS models with discrete parameters (i.e., discrete stochastic nodes). Nevertheless, it is possible to code many models that involve bounded discrete parameters by marginalizing out the discrete parameters.1\nThis chapter shows how to code several widely-used models involving latent discrete parameters. The next chapter, the clustering chapter, on clustering models, considers further models involving latent discrete parameters.\n\n\nAlthough it requires some algebra on the joint probability function, a pleasant byproduct of the required calculations is the posterior expectation of the marginalized variable, which is often the quantity of interest for a model. This allows far greater exploration of the tails of the distribution as well as more efficient sampling on an iteration-by-iteration basis because the expectation at all possible values is being used rather than itself being estimated through sampling a discrete parameter.\nStandard optimization algorithms, including expectation maximization (EM), are often provided in applied statistics papers to describe maximum likelihood estimation algorithms. Such derivations provide exactly the marginalization needed for coding the model in Stan.\n\n\n\nThe first example is a model of coal mining disasters in the U.K. for the years 1851–1962.2\n\n\nFonnesbeck et al. (2013, sec. 3.1) provides a Poisson model of disaster \\(D_t\\) in year \\(t\\) with two rate parameters, an early rate (\\(e\\)) and late rate (\\(l\\)), that change at a given point in time \\(s\\). The full model expressed using a latent discrete parameter \\(s\\) is \\[\\begin{align*}\ne &\\sim \\textsf{exponential}(r_e) \\\\\nl &\\sim \\textsf{exponential}(r_l) \\\\\ns &\\sim \\textsf{uniform}(1, T) \\\\\nD_t &\\sim \\textsf{Poisson}(t < s \\; ? \\; e \\: : \\: l)\n\\end{align*}\\]\nThe last line uses the conditional operator (also known as the ternary operator), which is borrowed from C and related languages. The conditional operator has the same behavior as its counterpart in C++.3\nIt uses a compact notation involving separating its three arguments by a question mark (?) and a colon (:). The conditional operator is defined by \\[\nc \\; ? \\; x_1 \\: : \\: x_2\n=\n\\begin{cases}\n\\ x_1 & \\quad\\text{if } c \\text{ is true (i.e., non-zero), and} \\\\\n\\ x_2 & \\quad\\text{if } c \\text{ is false (i.e., zero).}\n\\end{cases}\n\\]\n\n\n\nTo code this model in Stan, the discrete parameter \\(s\\) must be marginalized out to produce a model defining the log of the probability function \\(p(e,l,D_t)\\). The full joint probability factors as \\[\\begin{align*}\np(e,l,s,D) &= p(e) \\, p(l) \\, p(s) \\, p(D \\mid s, e, l) \\\\\n&= \\textsf{exponential}(e \\mid r_e) \\ \\textsf{exponential}(l \\mid r_l) \\,\n \\textsf{uniform}(s \\mid 1, T) \\\\\n& \\qquad \\prod_{t=1}^T \\textsf{Poisson}(D_t \\mid t < s \\; ? \\; e \\: : \\: l).\n\\end{align*}\\]\nTo marginalize, an alternative factorization into prior and likelihood is used, \\[\np(e,l,D) = p(e,l) \\, p(D \\mid e,l),\n\\]\nwhere the likelihood is defined by marginalizing \\(s\\) as \\[\\begin{align*}\np(D \\mid e,l) &= \\sum_{s=1}^T p(s, D \\mid e,l) \\\\\n&= \\sum_{s=1}^T p(s) \\, p(D \\mid s,e,l) \\\\\n&= \\sum_{s=1}^T \\textsf{uniform}(s \\mid 1,T) \\,\n \\prod_{t=1}^T \\textsf{Poisson}(D_t \\mid t < s \\; ? \\; e \\: : \\: l).\n\\end{align*}\\]\nStan operates on the log scale and thus requires the log likelihood, \\[\\begin{align*}\n\\log p(D \\mid e,l)\n&= \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}_{s=1}^T\n \\left( \\log \\textsf{uniform}(s \\mid 1, T) \\vphantom{\\sum_{t=1}^T}\\right. \\\\\n&\\qquad \\left.\n + \\sum_{t=1}^T \\log \\textsf{Poisson}(D_t \\mid t < s \\; ? \\; e \\: : \\: l)\n\\right),\n\\end{align*}\\] where the log sum of exponents function is defined by \\[\n\\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}_{n=1}^N \\, \\alpha_n =\n\\log \\sum_{n=1}^N \\exp(\\alpha_n).\n\\]\nThe log sum of exponents function allows the model to be coded directly in Stan using the built-in function log_sum_exp, which provides both arithmetic stability and efficiency for mixture model calculations.\n\n\n\nThe Stan program for the change point model is shown in the figure below. The transformed parameter lp[s] stores the quantity \\(\\log p(s, D \\mid e, l)\\).\ndata {\n real<lower=0> r_e;\n real<lower=0> r_l;\n\n int<lower=1> T;\n array[T] int<lower=0> D;\n}\ntransformed data {\n real log_unif;\n log_unif = -log(T);\n}\nparameters {\n real<lower=0> e;\n real<lower=0> l;\n}\ntransformed parameters {\n vector[T] lp;\n lp = rep_vector(log_unif, T);\n for (s in 1:T) {\n for (t in 1:T) {\n lp[s] = lp[s] + poisson_lpmf(D[t] | t < s ? e : l);\n }\n }\n}\nmodel {\n e ~ exponential(r_e);\n l ~ exponential(r_l);\n target += log_sum_exp(lp);\n}\nA change point model in which disaster rates D[t] have one rate, e, before the change point and a different rate, l, after the change point. The change point itself, s, is marginalized out as described in the text.\nAlthough the change-point model is coded directly, the doubly nested loop used for s and t is quadratic in T. Luke Wiklendt pointed out that a linear alternative can be achieved by the use of dynamic programming similar to the forward-backward algorithm for Hidden Markov models; he submitted a slight variant of the following code to replace the transformed parameters block of the above Stan program.\ntransformed parameters {\n vector[T] lp;\n {\n vector[T + 1] lp_e;\n vector[T + 1] lp_l;\n lp_e[1] = 0;\n lp_l[1] = 0;\n for (t in 1:T) {\n lp_e[t + 1] = lp_e[t] + poisson_lpmf(D[t] | e);\n lp_l[t + 1] = lp_l[t] + poisson_lpmf(D[t] | l);\n }\n lp = rep_vector(log_unif + lp_l[T + 1], T)\n + head(lp_e, T) - head(lp_l, T);\n }\n }\nAs should be obvious from looking at it, it has linear complexity in T rather than quadratic. The result for the mining-disaster data is about 20 times faster; the improvement will be greater for larger T.\nThe key to understanding Wiklendt’s dynamic programming version is to see that head(lp_e) holds the forward values, whereas lp_l[T + 1] - head(lp_l, T) holds the backward values; the clever use of subtraction allows lp_l to be accumulated naturally in the forward direction.\n\n\n\nThis model is easy to fit using MCMC with NUTS in its default configuration. Convergence is fast and sampling produces roughly one effective sample every two iterations. Because it is a relatively small model (the inner double loop over time is roughly 20,000 steps), it is fast.\nThe value of lp for each iteration for each change point is available because it is declared as a transformed parameter. If the value of lp were not of interest, it could be coded as a local variable in the model block and thus avoid the I/O overhead of saving values every iteration.\n\n\n\nThe value of lp[s] in a given iteration is given by \\(\\log\np(s,D \\mid e,l)\\) for the values of the early and late rates, \\(e\\) and \\(l\\), in the iteration. In each iteration after convergence, the early and late disaster rates, \\(e\\) and \\(l\\), are drawn from the posterior \\(p(e,l \\mid D)\\) by MCMC sampling and the associated lp calculated. The value of lp may be normalized to calculate \\(p(s \\mid e,l,D)\\) in each iteration, based on on the current values of \\(e\\) and \\(l\\). Averaging over iterations provides an unnormalized probability estimate of the change point being \\(s\\) (see below for the normalizing constant), \\[\\begin{align*}\np(s \\mid D) &\\propto q(s \\mid D) \\\\\n&= \\frac{1}{M} \\sum_{m=1}^{M} \\exp(\\texttt{lp}[m,s]).\n\\end{align*}\\] where \\(\\texttt{lp}[m,s]\\) represents the value of lp in posterior draw \\(m\\) for change point \\(s\\). By averaging over draws, \\(e\\) and \\(l\\) are themselves marginalized out, and the result has no dependence on a given iteration’s value for \\(e\\) and \\(l\\). A final normalization then produces the quantity of interest, the posterior probability of the change point being \\(s\\) conditioned on the data \\(D\\), \\[\np(s \\mid D) = \\frac{q(s \\mid D)}{\\sum_{s'=1}^T q(s' \\mid D)}.\n\\]\nA plot of the values of \\(\\log p(s \\mid D)\\) computed using Stan 2.4’s default MCMC implementation is shown in the posterior plot.\nLog probability of change point being in year, calculated analytically.\n\n\n\nAnalytical change-point posterior\n\n\nThe frequency of change points generated by sampling the discrete change points.\n\n\n\nSampled change-point posterior\n\n\nIn order their range of estimates be visible, the first plot is on the log scale and the second plot on the linear scale; note the narrower range of years in the second plot resulting from sampling. The posterior mean of \\(s\\) is roughly 1891.\n\n\n\nThe generated quantities block may be used to draw discrete parameter values using the built-in pseudo-random number generators. For example, with lp defined as above, the following program draws a random value for s at every iteration.\ngenerated quantities {\n int<lower=1, upper=T> s;\n s = categorical_logit_rng(lp);\n}\nA posterior histogram of draws for \\(s\\) is shown on the second change point posterior figure above.\nCompared to working in terms of expectations, discrete sampling is highly inefficient, especially for tails of distributions, so this approach should only be used if draws from a distribution are explicitly required. Otherwise, expectations should be computed in the generated quantities block based on the posterior distribution for s given by softmax(lp).\n\n\n\nThe discrete sample generated for \\(s\\) can be used to calculate covariance with other parameters. Although the sampling approach is straightforward, it is more statistically efficient (in the sense of requiring far fewer iterations for the same degree of accuracy) to calculate these covariances in expectation using lp.\n\n\n\nThere is no obstacle in principle to allowing multiple change points. The only issue is that computation increases from linear to quadratic in marginalizing out two change points, cubic for three change points, and so on. There are three parameters, e, m, and l, and two loops for the change point and then one over time, with log densities being stored in a matrix.\nmatrix[T, T] lp;\nlp = rep_matrix(log_unif, T);\nfor (s1 in 1:T) {\n for (s2 in 1:T) {\n for (t in 1:T) {\n lp[s1,s2] = lp[s1,s2]\n + poisson_lpmf(D[t] | t < s1 ? e : (t < s2 ? m : l));\n }\n }\n}\nThe matrix can then be converted back to a vector using to_vector before being passed to log_sum_exp.\n\n\n\n\nA widely applied field method in ecology is to capture (or sight) animals, mark them (e.g., by tagging), then release them. This process is then repeated one or more times, and is often done for populations on an ongoing basis. The resulting data may be used to estimate population size.\nThe first subsection describes a simple mark-recapture model that does not involve any latent discrete parameters. The following subsections describes the Cormack-Jolly-Seber model, which involves latent discrete parameters for animal death.\n\n\nIn the simplest case, a one-stage mark-recapture study produces the following data\n\n\\(M\\) : number of animals marked in first capture,\n\\(C\\) : number animals in second capture, and\n\\(R\\) : number of marked animals in second capture.\n\nThe estimand of interest is\n\n\\(N\\) : number of animals in the population.\n\nDespite the notation, the model will take \\(N\\) to be a continuous parameter; just because the population must be finite doesn’t mean the parameter representing it must be. The parameter will be used to produce a real-valued estimate of the population size.\nThe Lincoln-Petersen (Lincoln 1930; Petersen 1896) method for estimating population size is \\[\n\\hat{N} = \\frac{M C}{R}.\n\\]\nThis population estimate would arise from a probabilistic model in which the number of recaptured animals is distributed binomially, \\[\nR \\sim \\textsf{binomial}(C, M / N)\n\\] given the total number of animals captured in the second round (\\(C\\)) with a recapture probability of \\(M/N\\), the fraction of the total population \\(N\\) marked in the first round.\ndata {\n int<lower=0> M;\n int<lower=0> C;\n int<lower=0, upper=min(M, C)> R;\n}\nparameters {\n real<lower=(C - R + M)> N;\n}\nmodel {\n R ~ binomial(C, M / N);\n}\nA probabilistic formulation of the Lincoln-Petersen estimator for population size based on data from a one-step mark-recapture study. The lower bound on \\(N\\) is necessary to efficiently eliminate impossible values.\nThe probabilistic variant of the Lincoln-Petersen estimator can be directly coded in Stan as shown in the Lincon-Petersen model figure. The Lincoln-Petersen estimate is the maximum likelihood estimate (MLE) for this model.\nTo ensure the MLE is the Lincoln-Petersen estimate, an improper uniform prior for \\(N\\) is used; this could (and should) be replaced with a more informative prior if possible, based on knowledge of the population under study.\nThe one tricky part of the model is the lower bound \\(C - R + M\\) placed on the population size \\(N\\). Values below this bound are impossible because it is otherwise not possible to draw \\(R\\) samples out of the \\(C\\) animals recaptured. Implementing this lower bound is necessary to ensure sampling and optimization can be carried out in an unconstrained manner with unbounded support for parameters on the transformed (unconstrained) space. The lower bound in the declaration for \\(C\\) implies a variable transform \\(f : (C-R+M,\\infty) \\rightarrow (-\\infty,+\\infty)\\) defined by \\(f(N) = \\log(N - (C - R + M))\\); the reference manual contains full details of all constrained parameter transforms.\n\n\n\nThe Cormack-Jolly-Seber (CJS) model (Cormack 1964; Jolly 1965; Seber 1965) is an open-population model in which the population may change over time due to death; the presentation here draws heavily on Schofield (2007).\nThe basic data are\n\n\\(I\\): number of individuals,\n\\(T\\): number of capture periods, and\n\\(y_{i,t}\\): Boolean indicating if individual \\(i\\) was captured at time \\(t\\).\n\nEach individual is assumed to have been captured at least once because an individual only contributes information conditionally after they have been captured the first time.\nThere are two Bernoulli parameters in the model,\n\n\\(\\phi_t\\) : probability that animal alive at time \\(t\\) survives until \\(t + 1\\) and\n\\(p_t\\) : probability that animal alive at time \\(t\\) is captured at time \\(t\\).\n\nThese parameters will both be given uniform priors, but information should be used to tighten these priors in practice.\nThe CJS model also employs a latent discrete parameter \\(z_{i,t}\\) indicating for each individual \\(i\\) whether it is alive at time \\(t\\), distributed as \\[\nz_{i,t} \\sim \\mathsf{Bernoulli}(z_{i,t-1} \\; ? \\; 0 \\: : \\: \\phi_{t-1}).\n\\]\nThe conditional prevents the model positing zombies; once an animal is dead, it stays dead. The data distribution is then simple to express conditional on \\(z\\) as \\[\ny_{i,t} \\sim \\mathsf{Bernoulli}(z_{i,t} \\; ? \\; 0 \\: : \\: p_t).\n\\]\nThe conditional enforces the constraint that dead animals cannot be captured.\n\n\n\nThis subsection presents an implementation of the model in terms of counts for different history profiles for individuals over three capture times. It assumes exchangeability of the animals in that each is assigned the same capture and survival probabilities.\nIn order to ease the marginalization of the latent discrete parameter \\(z_{i,t}\\), the Stan models rely on a derived quantity \\(\\chi_t\\) for the probability that an individual is never captured again if it is alive at time \\(t\\) (if it is dead, the recapture probability is zero). this quantity is defined recursively by \\[\n\\chi_t\n=\n\\begin{cases}\n1 & \\quad\\text{if } t = T \\\\\n(1 - \\phi_t) + \\phi_t (1 - p_{t+1}) \\chi_{t+1}\n & \\quad\\text{if } t < T\n\\end{cases}\n\\]\nThe base case arises because if an animal was captured in the last time period, the probability it is never captured again is 1 because there are no more capture periods. The recursive case defining \\(\\chi_{t}\\) in terms of \\(\\chi_{t+1}\\) involves two possibilities: (1) not surviving to the next time period, with probability \\((1 - \\phi_t)\\), or (2) surviving to the next time period with probability \\(\\phi_t\\), not being captured in the next time period with probability \\((1 - p_{t+1})\\), and not being captured again after being alive in period \\(t+1\\) with probability \\(\\chi_{t+1}\\).\nWith three capture times, there are eight captured/not-captured profiles an individual may have. These may be naturally coded as binary numbers as follows.\n\\[\n\\begin{array}{crclc}\n\\hline\n& \\qquad\\qquad & captures & \\qquad\\qquad & \\\\\n\\mathrm{profile} & 1 & 2 & 3 & \\mathrm{probability} \\\\\n\\hline\n0 & - & - & - & n/a \\\\\n1 & - & - & + & n/a \\\\\n2 & - & + & - & \\chi_2 \\\\\n3 & - & + & + & \\phi_2 \\, p_3 \\\\\n4 & + & - & - & \\chi_1 \\\\\n5 & + & - & + & \\phi_1 \\, (1 - p_2) \\, \\phi_2 \\, p_3 \\\\\n6 & + & + & - & \\phi_1 \\, p_2 \\, \\chi_2 \\\\\n7 & + & + & + & \\phi_1 \\, p_2 \\, \\phi_2 \\, p_3 \\\\\n\\hline\n\\end{array}\n\\]\nHistory 0, for animals that are never captured, is unobservable because only animals that are captured are observed. History 1, for animals that are only captured in the last round, provides no information for the CJS model, because capture/non-capture status is only informative when conditioned on earlier captures. For the remaining cases, the contribution to the likelihood is provided in the final column.\nBy defining these probabilities in terms of \\(\\chi\\) directly, there is no need for a latent binary parameter indicating whether an animal is alive at time \\(t\\) or not. The definition of \\(\\chi\\) is typically used to define the likelihood (i.e., marginalize out the latent discrete parameter) for the CJS model (Schofield 2007).\nThe Stan model defines \\(\\chi\\) as a transformed parameter based on parameters \\(\\phi\\) and \\(p\\). In the model block, the log probability is incremented for each history based on its count. This second step is similar to collecting Bernoulli observations into a binomial or categorical observations into a multinomial, only it is coded directly in the Stan program using target += rather than being part of a built-in probability function.\nThe following is the Stan program for the Cormack-Jolly-Seber mark-recapture model that considers counts of individuals with observation histories of being observed or not in three capture periods\ndata {\n array[7] int<lower=0> history;\n}\nparameters {\n array[2] real<lower=0, upper=1> phi;\n array[3] real<lower=0, upper=1> p;\n}\ntransformed parameters {\n array[2] real<lower=0, upper=1> chi;\n chi[2] = (1 - phi[2]) + phi[2] * (1 - p[3]);\n chi[1] = (1 - phi[1]) + phi[1] * (1 - p[2]) * chi[2];\n}\nmodel {\n target += history[2] * log(chi[2]);\n target += history[3] * (log(phi[2]) + log(p[3]));\n target += history[4] * (log(chi[1]));\n target += history[5] * (log(phi[1]) + log1m(p[2])\n + log(phi[2]) + log(p[3]));\n target += history[6] * (log(phi[1]) + log(p[2])\n + log(chi[2]));\n target += history[7] * (log(phi[1]) + log(p[2])\n + log(phi[2]) + log(p[3]));\n}\ngenerated quantities {\n real<lower=0, upper=1> beta3;\n beta3 = phi[2] * p[3];\n}\n\n\nThe parameters \\(\\phi_2\\) and \\(p_3\\), the probability of death at time 2 and probability of capture at time 3 are not identifiable, because both may be used to account for lack of capture at time 3. Their product, \\(\\beta_3 = \\phi_2 \\, p_3\\), is identified. The Stan model defines beta3 as a generated quantity. Unidentified parameters pose a problem for Stan’s samplers’ adaptation. Although the problem posed for adaptation is mild here because the parameters are bounded and thus have proper uniform priors, it would be better to formulate an identified parameterization. One way to do this would be to formulate a hierarchical model for the \\(p\\) and \\(\\phi\\) parameters.\n\n\n\n\nThis section presents a version of the Cormack-Jolly-Seber (CJS) model cast at the individual level rather than collectively as in the previous subsection. It also extends the model to allow an arbitrary number of time periods. The data will consist of the number \\(T\\) of capture events, the number \\(I\\) of individuals, and a boolean flag \\(y_{i,t}\\) indicating if individual \\(i\\) was observed at time \\(t\\). In Stan,\ndata {\n int<lower=2> T;\n int<lower=0> I;\n array[I, T] int<lower=0, upper=1> y;\n}\nThe advantages to the individual-level model is that it becomes possible to add individual “random effects” that affect survival or capture probability, as well as to avoid the combinatorics involved in unfolding \\(2^T\\) observation histories for \\(T\\) capture times.\n\n\nThe individual CJS model is written involves several function definitions. The first two are used in the transformed data block to compute the first and last time period in which an animal was captured.4\nfunctions {\n int first_capture(array[] int y_i) {\n for (k in 1:size(y_i)) {\n if (y_i[k]) {\n return k;\n }\n }\n return 0;\n }\n int last_capture(array[] int y_i) {\n for (k_rev in 0:(size(y_i) - 1)) {\n int k;\n k = size(y_i) - k_rev;\n if (y_i[k]) {\n return k;\n }\n }\n return 0;\n }\n // ...\n}\nThese two functions are used to define the first and last capture time for each individual in the transformed data block.5\ntransformed data {\n array[I] int<lower=0, upper=T> first;\n array[I] int<lower=0, upper=T> last;\n vector<lower=0, upper=I>[T] n_captured;\n for (i in 1:I) {\n first[i] = first_capture(y[i]);\n }\n for (i in 1:I) {\n last[i] = last_capture(y[i]);\n }\n n_captured = rep_vector(0, T);\n for (t in 1:T) {\n for (i in 1:I) {\n if (y[i, t]) {\n n_captured[t] = n_captured[t] + 1;\n }\n }\n }\n}\nThe transformed data block also defines n_captured[t], which is the total number of captures at time t. The variable n_captured is defined as a vector instead of an integer array so that it can be used in an elementwise vector operation in the generated quantities block to model the population estimates at each time point.\nThe parameters and transformed parameters are as before, but now there is a function definition for computing the entire vector chi, the probability that if an individual is alive at t that it will never be captured again.\nparameters {\n vector<lower=0, upper=1>[T - 1] phi;\n vector<lower=0, upper=1>[T] p;\n}\ntransformed parameters {\n vector<lower=0, upper=1>[T] chi;\n chi = prob_uncaptured(T, p, phi);\n}\nThe definition of prob_uncaptured, from the functions block, is\nfunctions {\n // ...\n vector prob_uncaptured(int T, vector p, vector phi) {\n vector[T] chi;\n chi[T] = 1.0;\n for (t in 1:(T - 1)) {\n int t_curr;\n int t_next;\n t_curr = T - t;\n t_next = t_curr + 1;\n chi[t_curr] = (1 - phi[t_curr])\n + phi[t_curr]\n * (1 - p[t_next])\n * chi[t_next];\n }\n return chi;\n }\n}\nThe function definition directly follows the mathematical definition of \\(\\chi_t\\), unrolling the recursion into an iteration and defining the elements of chi from T down to 1.\n\n\n\nGiven the precomputed quantities, the model block directly encodes the CJS model’s log likelihood function. All parameters are left with their default uniform priors and the model simply encodes the log probability of the observations q given the parameters p and phi as well as the transformed parameter chi defined in terms of p and phi.\nmodel {\n for (i in 1:I) {\n if (first[i] > 0) {\n for (t in (first[i]+1):last[i]) {\n 1 ~ bernoulli(phi[t - 1]);\n y[i, t] ~ bernoulli(p[t]);\n }\n 1 ~ bernoulli(chi[last[i]]);\n }\n }\n}\nThe outer loop is over individuals, conditional skipping individuals i which are never captured. The never-captured check depends on the convention of the first-capture and last-capture functions returning 0 for first if an individual is never captured.\nThe inner loop for individual i first increments the log probability based on the survival of the individual with probability phi[t - 1]. The outcome of 1 is fixed because the individual must survive between the first and last capture (i.e., no zombies). The loop starts after the first capture, because all information in the CJS model is conditional on the first capture.\nIn the inner loop, the observed capture status y[i, t] for individual i at time t has a Bernoulli distribution based on the capture probability p[t] at time t.\nAfter the inner loop, the probability of an animal never being seen again after being observed at time last[i] is included, because last[i] was defined to be the last time period in which animal i was observed.\n\n\n\nAs with the collective model described in the previous subsection, this model does not identify phi[T - 1] and p[T], but does identify their product, beta. Thus beta is defined as a generated quantity to monitor convergence and report.\ngenerated quantities {\n real beta;\n // ...\n\n beta = phi[T - 1] * p[T];\n // ...\n}\nThe parameter p[1] is also not modeled and will just be uniform between 0 and 1. A more finely articulated model might have a hierarchical or time-series component, in which case p[1] would be an unknown initial condition and both phi[T - 1] and p[T] could be identified.\n\n\n\nThe generated quantities also calculates an estimate of the population mean at each time t in the same way as in the simple mark-recapture model as the number of individuals captured at time t divided by the probability of capture at time t. This is done with the elementwise division operation for vectors (./) in the generated quantities block.\ngenerated quantities {\n // ...\n vector<lower=0>[T] pop;\n // ...\n pop = n_captured ./ p;\n pop[1] = -1;\n}\n\n\n\nAll individuals are modeled as having the same capture probability, but this model could be easily generalized to use a logistic regression here based on individual-level inputs to be used as predictors.\n\n\n\n\n\nAlthough seemingly disparate tasks, the rating/coding/annotation of items with categories and diagnostic testing for disease or other conditions, share several characteristics which allow their statistical properties to be modeled similarly.\n\n\nSuppose you have diagnostic tests for a condition of varying sensitivity and specificity. Sensitivity is the probability a test returns positive when the patient has the condition and specificity is the probability that a test returns negative when the patient does not have the condition. For example, mammograms and puncture biopsy tests both test for the presence of breast cancer. Mammograms have high sensitivity and low specificity, meaning lots of false positives, whereas puncture biopsies are the opposite, with low sensitivity and high specificity, meaning lots of false negatives.\nThere are several estimands of interest in such studies. An epidemiological study may be interested in the prevalence of a kind of infection, such as malaria, in a population. A test development study might be interested in the diagnostic accuracy of a new test. A health care worker performing tests might be interested in the disease status of a particular patient.\n\n\n\nHumans are often given the task of coding (equivalently rating or annotating) data. For example, journal or grant reviewers rate submissions, a political study may code campaign commercials as to whether they are attack ads or not, a natural language processing study might annotate Tweets as to whether they are positive or negative in overall sentiment, or a dentist looking at an X-ray classifies a patient as having a cavity or not. In all of these cases, the data coders play the role of the diagnostic tests and all of the same estimands are in play — data coder accuracy and bias, true categories of items being coded, or the prevalence of various categories of items in the data.\n\n\n\nIn this section, only categorical ratings are considered, and the challenge in the modeling for Stan is to marginalize out the discrete parameters.\nDawid and Skene (1979) introduce a noisy-measurement model for coding and apply it in the epidemiological setting of coding what doctors say about patient histories; the same model can be used for diagnostic procedures.\n\n\nThe data for the model consists of \\(J\\) raters (diagnostic tests), \\(I\\) items (patients), and \\(K\\) categories (condition statuses) to annotate, with \\(y_{i, j} \\in \\{1, \\dotsc, K\\}\\) being the rating provided by rater \\(j\\) for item \\(i\\). In a diagnostic test setting for a particular condition, the raters are diagnostic procedures and often \\(K=2\\), with values signaling the presence or absence of the condition.6\nIt is relatively straightforward to extend Dawid and Skene’s model to deal with the situation where not every rater rates each item exactly once.\n\n\n\n\nThe model is based on three parameters, the first of which is discrete:\n\n\\(z_i\\) : a value in \\(\\{1, \\dotsc, K\\}\\) indicating the true category of item \\(i\\),\n\\(\\pi\\) : a \\(K\\)-simplex for the prevalence of the \\(K\\) categories in the population, and\n\\(\\theta_{j,k}\\) : a \\(K\\)-simplex for the response of annotator \\(j\\) to an item of true category \\(k\\).\n\n\n\n\nThe true category of an item is assumed to be generated by a simple categorical distribution based on item prevalence, \\[\nz_i \\sim \\textsf{categorical}(\\pi).\n\\]\nThe rating \\(y_{i, j}\\) provided for item \\(i\\) by rater \\(j\\) is modeled as a categorical response of rater \\(i\\) to an item of category \\(z_i\\),7 \\[\ny_{i, j} \\sim \\textsf{categorical}(\\theta_{j,\\pi_{z[i]}}).\n\\]\n\n\nDawid and Skene provided maximum likelihood estimates for \\(\\theta\\) and \\(\\pi\\), which allows them to generate probability estimates for each \\(z_i\\).\nTo mimic Dawid and Skene’s maximum likelihood model, the parameters \\(\\theta_{j,k}\\) and \\(\\pi\\) can be given uniform priors over \\(K\\)-simplexes. It is straightforward to generalize to Dirichlet priors, \\[\n\\pi \\sim \\textsf{Dirichlet}(\\alpha)\n\\] and \\[\n\\theta_{j,k} \\sim \\textsf{Dirichlet}(\\beta_k)\n\\] with fixed hyperparameters \\(\\alpha\\) (a vector) and \\(\\beta\\) (a matrix or array of vectors). The prior for \\(\\theta_{j,k}\\) must be allowed to vary in \\(k\\), so that, for instance, \\(\\beta_{k,k}\\) is large enough to allow the prior to favor better-than-chance annotators over random or adversarial ones.\nBecause there are \\(J\\) coders, it would be natural to extend the model to include a hierarchical prior for \\(\\beta\\) and to partially pool the estimates of coder accuracy and bias.\n\n\n\nBecause the true category parameter \\(z\\) is discrete, it must be marginalized out of the joint posterior in order to carry out sampling or maximum likelihood estimation in Stan. The joint posterior factors as \\[\np(y, \\theta, \\pi) = p(y \\mid \\theta,\\pi) \\, p(\\pi) \\, p(\\theta),\n\\] where \\(p(y \\mid \\theta,\\pi)\\) is derived by marginalizing \\(z\\) out of \\[\np(z, y \\mid \\theta, \\pi) =\n\\prod_{i=1}^I \\left( \\textsf{categorical}(z_i \\mid \\pi)\n \\prod_{j=1}^J\n \\textsf{categorical}(y_{i, j} \\mid \\theta_{j, z[i]})\n \\right).\n\\]\nThis can be done item by item, with \\[\np(y \\mid \\theta, \\pi) =\n\\prod_{i=1}^I \\sum_{k=1}^K\n \\left( \\textsf{categorical}(k \\mid \\pi)\n \\prod_{j=1}^J\n \\textsf{categorical}(y_{i, j} \\mid \\theta_{j, k})\n \\right).\n\\]\nIn the missing data model, only the observed labels would be used in the inner product.\nDawid and Skene (1979) derive exactly the same equation in their Equation (2.7), required for the E-step in their expectation maximization (EM) algorithm. Stan requires the marginalized probability function on the log scale, \\[\\begin{align*}\n\\log p(y \\mid \\theta, \\pi)\n&= \\sum_{i=1}^I \\log \\left( \\sum_{k=1}^K \\exp\n \\left(\\log \\textsf{categorical}(k \\mid \\pi) \\vphantom{\\sum_{j=1}^J}\\right.\\right.\n \\left.\\left. + \\ \\sum_{j=1}^J\n \\log \\textsf{categorical}(y_{i, j} \\mid \\theta_{j, k})\n \\right) \\right),\n\\end{align*}\\] which can be directly coded using Stan’s built-in log_sum_exp function.\n\n\n\n\nThe Stan program for the Dawid and Skene model is provided below (Dawid and Skene 1979).\ndata {\n int<lower=2> K;\n int<lower=1> I;\n int<lower=1> J;\n\n array[I, J] int<lower=1, upper=K> y;\n\n vector<lower=0>[K] alpha;\n vector<lower=0>[K] beta[K];\n}\nparameters {\n simplex[K] pi;\n array[J, K] simplex[K] theta;\n}\ntransformed parameters {\n array[I] vector[K] log_q_z;\n for (i in 1:I) {\n log_q_z[i] = log(pi);\n for (j in 1:J) {\n for (k in 1:K) {\n log_q_z[i, k] = log_q_z[i, k]\n + log(theta[j, k, y[i, j]]);\n }\n }\n }\n}\nmodel {\n pi ~ dirichlet(alpha);\n for (j in 1:J) {\n for (k in 1:K) {\n theta[j, k] ~ dirichlet(beta[k]);\n }\n }\n\n for (i in 1:I) {\n target += log_sum_exp(log_q_z[i]);\n }\n}\nThe model marginalizes out the discrete parameter \\(z\\), storing the unnormalized conditional probability \\(\\log q(z_i=k|\\theta,\\pi)\\) in log_q_z[i, k].\nThe Stan model converges quickly and mixes well using NUTS starting at diffuse initial points, unlike the equivalent model implemented with Gibbs sampling over the discrete parameter. Reasonable weakly informative priors are \\(\\alpha_k = 3\\) and \\(\\beta_{k,k} = 2.5 K\\) and \\(\\beta_{k,k'} = 1\\) if \\(k \\neq k'\\). Taking \\(\\alpha\\) and \\(\\beta_k\\) to be unit vectors and applying optimization will produce the same answer as the expectation maximization (EM) algorithm of Dawid and Skene (1979).\n\n\nThe quantity log_q_z[i] is defined as a transformed parameter. It encodes the (unnormalized) log of \\(p(z_i \\mid \\theta,\n\\pi)\\). Each iteration provides a value conditioned on that iteration’s values for \\(\\theta\\) and \\(\\pi\\). Applying the softmax function to log_q_z[i] provides a simplex corresponding to the probability mass function of \\(z_i\\) in the posterior. These may be averaged across the iterations to provide the posterior probability distribution over each \\(z_i\\).\n\n\n\n\n\n\n\nThis section describes in more detail the mathematics of statistical inference using the output of marginalized Stan models, such as those presented in the last three sections. It provides a mathematical explanation of why and how certain manipulations of Stan’s output produce valid summaries of the posterior distribution when discrete parameters have been marginalized out of a statistical model. Ultimately, however, fully understanding the mathematics in this section is not necessary to fit models with discrete parameters using Stan.\nThroughout, the model under consideration consists of both continuous parameters, \\(\\Theta\\), and discrete parameters, \\(Z\\). It is also assumed that \\(Z\\) can only take finitely many values, as is the case for all the models described in this chapter of the User’s Guide. To simplify notation, any conditioning on data is suppressed in this section, except where specified. As with all Bayesian analyses, however, all inferences using models with marginalized parameters are made conditional on the observed data.\n\n\n\nWhen performing Bayesian inference, interest often centers on estimating some (constant) low-dimensional summary statistics of the posterior distribution. Mathematically, we are interested in estimating \\(\\mu\\), say, where \\(\\mu = \\mathbb{E}[g(\\Theta, Z)]\\) and \\(g(\\cdot)\\) is an arbitrary function. An example of such a quantity is \\(\\mathbb{E}[\\Theta]\\), the posterior mean of the continuous parameters, where we would take \\(g(\\theta, z) = \\theta\\). To estimate \\(\\mu\\) the most common approach is to sample a series of values, at least approximately, from the posterior distribution of the parameters of interest. The numerical values of these draws can then be used to calculate the quantities of interest. Often, this process of calculation is trivial, but more care is required when working with marginalized posteriors as we describe in this section.\nIf both \\(\\Theta\\) and \\(Z\\) were continuous, Stan could be used to sample \\(M\\) draws from the joint posterior \\(p_{\\Theta, Z}(\\theta, z)\\) and then estimate \\(\\mu\\) with \\[\n\\hat{\\mu} = \\frac{1}{M} \\sum_{i = 1}^M {g(\\theta^{(i)}, z^{(i)})}.\n\\] Given \\(Z\\) is discrete, however, Stan cannot be used to sample from the joint posterior (or even to do optimization). Instead, as outlined in the previous sections describing specific models, the user can first marginalize out \\(Z\\) from the joint posterior to give the marginalized posterior \\(p_\\Theta(\\theta)\\). This marginalized posterior can then be implemented in Stan as usual, and Stan will give draws \\(\\{\\theta^{(i)}\\}_{i = 1}^M\\) from the marginalized posterior.\nUsing only these draws, how can we estimate \\(\\mathbb{E}[g(\\Theta, Z)]\\)? We can use a conditional estimator. We explain in more detail below, but at a high level the idea is that, for each function \\(g\\) of interest, we compute \\[\nh(\\Theta) = \\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\n\\] and then estimate \\(\\mathbb{E}[g(\\Theta, Z)]\\) with \\[\n\\hat{\\mu} = \\frac{1}{M} \\sum_{i = 1}^M h(\\theta^{(i)}).\n\\] This estimator is justified by the law of iterated expectation, the fact that \\[\n\\mathbb{E}[h(\\Theta)] = \\mathbb{E}[\\mathbb{E}[g(\\Theta, Z)] \\mid \\Theta] = \\mathbb{E}[g(\\Theta, Z)] = \\mu.\n\\] Using this marginalized estimator provides a way to estimate the expectation of any function \\(g(\\cdot)\\) for all combinations of discrete or continuous parameters in the model. However, it presents a possible new challenge: evaluating the conditional expectation \\(\\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\\).\n\n\n\nFortunately, the discrete nature of \\(Z\\) makes evaluating \\(\\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\\) easy. The function \\(h(\\Theta)\\) can be written as: \\[\nh(\\Theta)\n= \\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\n= \\sum_{k} g(\\Theta, k) \\Pr[Z = k \\mid \\Theta],\n\\] where we sum over the possible values of the latent discrete parameters. An essential part of this formula is the probability of the discrete parameters conditional on the continuous parameters, \\(\\Pr[Z = k \\mid \\Theta]\\). More detail on how this quantity can be calculated is included below. Note that if \\(Z\\) takes infinitely many values then computing the infinite sums will involve, potentially computationally expensive, approximation.\nWhen \\(g(\\theta, z)\\) is a function of either \\(\\theta\\) or \\(z\\) only, the above formula simplifies further.\nIn the first case, where \\(g(\\theta, z) = g(\\theta)\\), we have: \\[\\begin{align*}\nh(\\Theta)\n&= \\sum_{k} g(\\Theta) \\Pr[Z = k \\mid \\Theta] \\\\\n&= g(\\Theta) \\sum_{k} \\Pr[Z = k \\mid \\Theta] \\\\\n&= g(\\Theta).\n\\end{align*}\\] This means that we can estimate \\(\\mathbb{E}[g(\\Theta)]\\) with the standard, seemingly unconditional, estimator: \\[\n\\frac{1}{M} \\sum_{i = 1}^M g(\\theta^{(i)}).\n\\] Even after marginalization, computing expectations of functions of the continuous parameters can be performed as if no marginalization had taken place.\nIn the second case, where \\(g(\\theta, z) = g(z)\\), the conditional expectation instead simplifies as follows: \\[\nh(\\Theta) = \\sum_{k} g(k) \\Pr[Z = k \\mid \\Theta].\n\\] An important special case of this result is when \\(g(\\theta, z) = \\textrm{I}(z = k)\\), where \\(\\textrm{I}\\) is the indicator function. This choice allows us to recover the probability mass function of the discrete random variable \\(Z\\), since \\(\\mathbb{E}[\\textrm{I}(Z = k)] = \\Pr[Z = k]\\). In this case, \\[\nh(\\Theta)\n= \\sum_{k} \\textrm{I}(z = k) \\Pr[Z = k \\mid \\Theta]\n= \\Pr[Z = k \\mid \\Theta].\n\\] The quantity \\(\\Pr[Z = k]\\) can therefore be estimated with: \\[\n\\frac{1}{M} \\sum_{i = 1}^M \\Pr[Z = k \\mid \\Theta = \\theta^{(i)}].\n\\] When calculating this conditional probability it is important to remember that we are also conditioning on the observed data, \\(Y\\). That is, we are really estimating \\(\\Pr[Z = k \\mid Y]\\) with \\[\n\\frac{1}{M} \\sum_{i = 1}^M \\Pr[Z = k \\mid \\Theta = \\theta^{(i)}, Y].\n\\] This point is important as it suggests one of the main ways of calculating the required conditional probability. Using Bayes’s theorem gives us \\[\n\\Pr[Z = k \\mid \\Theta = \\theta^{(i)}, Y]\n= \\frac{\\Pr[Y \\mid Z = k, \\Theta = \\theta^{(i)}]\n\\Pr[Z = k \\mid \\Theta = \\theta^{(i)}]}\n{\\sum_{k = 1}^K \\Pr[Y \\mid Z = k, \\Theta = \\theta^{(i)}]\n\\Pr[Z = k \\mid \\Theta = \\theta^{(i)}]}.\n\\] Here, \\(\\Pr[Y \\mid \\Theta = \\theta^{(i)}, Z = k]\\) is the likelihood conditional on a particular value of the latent variables. Crucially, all elements of the expression can be calculated using the draws from the posterior of the continuous parameters and knowledge of the model structure.\nOther than the use of Bayes’s theorem, \\(\\Pr[Z = k \\mid \\theta = \\theta^{(i)}, Y]\\) can also be extracted by coding the Stan model to include the conditional probability explicitly (as is done for the Dawid–Skene model).\nFor a longer introduction to the mathematics of marginalization in Stan, which also covers the connections between Rao–Blackwellization and marginalization, see Pullin, Gurrin, and Vukcevic (2021).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html#rao-blackwell.section", + "href": "stan-users-guide/latent-discrete.html#rao-blackwell.section", + "title": "Latent Discrete Parameters", + "section": "", + "text": "Although it requires some algebra on the joint probability function, a pleasant byproduct of the required calculations is the posterior expectation of the marginalized variable, which is often the quantity of interest for a model. This allows far greater exploration of the tails of the distribution as well as more efficient sampling on an iteration-by-iteration basis because the expectation at all possible values is being used rather than itself being estimated through sampling a discrete parameter.\nStandard optimization algorithms, including expectation maximization (EM), are often provided in applied statistics papers to describe maximum likelihood estimation algorithms. Such derivations provide exactly the marginalization needed for coding the model in Stan.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html#change-point.section", + "href": "stan-users-guide/latent-discrete.html#change-point.section", + "title": "Latent Discrete Parameters", + "section": "", + "text": "The first example is a model of coal mining disasters in the U.K. for the years 1851–1962.2\n\n\nFonnesbeck et al. (2013, sec. 3.1) provides a Poisson model of disaster \\(D_t\\) in year \\(t\\) with two rate parameters, an early rate (\\(e\\)) and late rate (\\(l\\)), that change at a given point in time \\(s\\). The full model expressed using a latent discrete parameter \\(s\\) is \\[\\begin{align*}\ne &\\sim \\textsf{exponential}(r_e) \\\\\nl &\\sim \\textsf{exponential}(r_l) \\\\\ns &\\sim \\textsf{uniform}(1, T) \\\\\nD_t &\\sim \\textsf{Poisson}(t < s \\; ? \\; e \\: : \\: l)\n\\end{align*}\\]\nThe last line uses the conditional operator (also known as the ternary operator), which is borrowed from C and related languages. The conditional operator has the same behavior as its counterpart in C++.3\nIt uses a compact notation involving separating its three arguments by a question mark (?) and a colon (:). The conditional operator is defined by \\[\nc \\; ? \\; x_1 \\: : \\: x_2\n=\n\\begin{cases}\n\\ x_1 & \\quad\\text{if } c \\text{ is true (i.e., non-zero), and} \\\\\n\\ x_2 & \\quad\\text{if } c \\text{ is false (i.e., zero).}\n\\end{cases}\n\\]\n\n\n\nTo code this model in Stan, the discrete parameter \\(s\\) must be marginalized out to produce a model defining the log of the probability function \\(p(e,l,D_t)\\). The full joint probability factors as \\[\\begin{align*}\np(e,l,s,D) &= p(e) \\, p(l) \\, p(s) \\, p(D \\mid s, e, l) \\\\\n&= \\textsf{exponential}(e \\mid r_e) \\ \\textsf{exponential}(l \\mid r_l) \\,\n \\textsf{uniform}(s \\mid 1, T) \\\\\n& \\qquad \\prod_{t=1}^T \\textsf{Poisson}(D_t \\mid t < s \\; ? \\; e \\: : \\: l).\n\\end{align*}\\]\nTo marginalize, an alternative factorization into prior and likelihood is used, \\[\np(e,l,D) = p(e,l) \\, p(D \\mid e,l),\n\\]\nwhere the likelihood is defined by marginalizing \\(s\\) as \\[\\begin{align*}\np(D \\mid e,l) &= \\sum_{s=1}^T p(s, D \\mid e,l) \\\\\n&= \\sum_{s=1}^T p(s) \\, p(D \\mid s,e,l) \\\\\n&= \\sum_{s=1}^T \\textsf{uniform}(s \\mid 1,T) \\,\n \\prod_{t=1}^T \\textsf{Poisson}(D_t \\mid t < s \\; ? \\; e \\: : \\: l).\n\\end{align*}\\]\nStan operates on the log scale and thus requires the log likelihood, \\[\\begin{align*}\n\\log p(D \\mid e,l)\n&= \\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}_{s=1}^T\n \\left( \\log \\textsf{uniform}(s \\mid 1, T) \\vphantom{\\sum_{t=1}^T}\\right. \\\\\n&\\qquad \\left.\n + \\sum_{t=1}^T \\log \\textsf{Poisson}(D_t \\mid t < s \\; ? \\; e \\: : \\: l)\n\\right),\n\\end{align*}\\] where the log sum of exponents function is defined by \\[\n\\texttt{log}\\mathtt{\\_}\\texttt{sum}\\mathtt{\\_}\\texttt{exp}_{n=1}^N \\, \\alpha_n =\n\\log \\sum_{n=1}^N \\exp(\\alpha_n).\n\\]\nThe log sum of exponents function allows the model to be coded directly in Stan using the built-in function log_sum_exp, which provides both arithmetic stability and efficiency for mixture model calculations.\n\n\n\nThe Stan program for the change point model is shown in the figure below. The transformed parameter lp[s] stores the quantity \\(\\log p(s, D \\mid e, l)\\).\ndata {\n real<lower=0> r_e;\n real<lower=0> r_l;\n\n int<lower=1> T;\n array[T] int<lower=0> D;\n}\ntransformed data {\n real log_unif;\n log_unif = -log(T);\n}\nparameters {\n real<lower=0> e;\n real<lower=0> l;\n}\ntransformed parameters {\n vector[T] lp;\n lp = rep_vector(log_unif, T);\n for (s in 1:T) {\n for (t in 1:T) {\n lp[s] = lp[s] + poisson_lpmf(D[t] | t < s ? e : l);\n }\n }\n}\nmodel {\n e ~ exponential(r_e);\n l ~ exponential(r_l);\n target += log_sum_exp(lp);\n}\nA change point model in which disaster rates D[t] have one rate, e, before the change point and a different rate, l, after the change point. The change point itself, s, is marginalized out as described in the text.\nAlthough the change-point model is coded directly, the doubly nested loop used for s and t is quadratic in T. Luke Wiklendt pointed out that a linear alternative can be achieved by the use of dynamic programming similar to the forward-backward algorithm for Hidden Markov models; he submitted a slight variant of the following code to replace the transformed parameters block of the above Stan program.\ntransformed parameters {\n vector[T] lp;\n {\n vector[T + 1] lp_e;\n vector[T + 1] lp_l;\n lp_e[1] = 0;\n lp_l[1] = 0;\n for (t in 1:T) {\n lp_e[t + 1] = lp_e[t] + poisson_lpmf(D[t] | e);\n lp_l[t + 1] = lp_l[t] + poisson_lpmf(D[t] | l);\n }\n lp = rep_vector(log_unif + lp_l[T + 1], T)\n + head(lp_e, T) - head(lp_l, T);\n }\n }\nAs should be obvious from looking at it, it has linear complexity in T rather than quadratic. The result for the mining-disaster data is about 20 times faster; the improvement will be greater for larger T.\nThe key to understanding Wiklendt’s dynamic programming version is to see that head(lp_e) holds the forward values, whereas lp_l[T + 1] - head(lp_l, T) holds the backward values; the clever use of subtraction allows lp_l to be accumulated naturally in the forward direction.\n\n\n\nThis model is easy to fit using MCMC with NUTS in its default configuration. Convergence is fast and sampling produces roughly one effective sample every two iterations. Because it is a relatively small model (the inner double loop over time is roughly 20,000 steps), it is fast.\nThe value of lp for each iteration for each change point is available because it is declared as a transformed parameter. If the value of lp were not of interest, it could be coded as a local variable in the model block and thus avoid the I/O overhead of saving values every iteration.\n\n\n\nThe value of lp[s] in a given iteration is given by \\(\\log\np(s,D \\mid e,l)\\) for the values of the early and late rates, \\(e\\) and \\(l\\), in the iteration. In each iteration after convergence, the early and late disaster rates, \\(e\\) and \\(l\\), are drawn from the posterior \\(p(e,l \\mid D)\\) by MCMC sampling and the associated lp calculated. The value of lp may be normalized to calculate \\(p(s \\mid e,l,D)\\) in each iteration, based on on the current values of \\(e\\) and \\(l\\). Averaging over iterations provides an unnormalized probability estimate of the change point being \\(s\\) (see below for the normalizing constant), \\[\\begin{align*}\np(s \\mid D) &\\propto q(s \\mid D) \\\\\n&= \\frac{1}{M} \\sum_{m=1}^{M} \\exp(\\texttt{lp}[m,s]).\n\\end{align*}\\] where \\(\\texttt{lp}[m,s]\\) represents the value of lp in posterior draw \\(m\\) for change point \\(s\\). By averaging over draws, \\(e\\) and \\(l\\) are themselves marginalized out, and the result has no dependence on a given iteration’s value for \\(e\\) and \\(l\\). A final normalization then produces the quantity of interest, the posterior probability of the change point being \\(s\\) conditioned on the data \\(D\\), \\[\np(s \\mid D) = \\frac{q(s \\mid D)}{\\sum_{s'=1}^T q(s' \\mid D)}.\n\\]\nA plot of the values of \\(\\log p(s \\mid D)\\) computed using Stan 2.4’s default MCMC implementation is shown in the posterior plot.\nLog probability of change point being in year, calculated analytically.\n\n\n\nAnalytical change-point posterior\n\n\nThe frequency of change points generated by sampling the discrete change points.\n\n\n\nSampled change-point posterior\n\n\nIn order their range of estimates be visible, the first plot is on the log scale and the second plot on the linear scale; note the narrower range of years in the second plot resulting from sampling. The posterior mean of \\(s\\) is roughly 1891.\n\n\n\nThe generated quantities block may be used to draw discrete parameter values using the built-in pseudo-random number generators. For example, with lp defined as above, the following program draws a random value for s at every iteration.\ngenerated quantities {\n int<lower=1, upper=T> s;\n s = categorical_logit_rng(lp);\n}\nA posterior histogram of draws for \\(s\\) is shown on the second change point posterior figure above.\nCompared to working in terms of expectations, discrete sampling is highly inefficient, especially for tails of distributions, so this approach should only be used if draws from a distribution are explicitly required. Otherwise, expectations should be computed in the generated quantities block based on the posterior distribution for s given by softmax(lp).\n\n\n\nThe discrete sample generated for \\(s\\) can be used to calculate covariance with other parameters. Although the sampling approach is straightforward, it is more statistically efficient (in the sense of requiring far fewer iterations for the same degree of accuracy) to calculate these covariances in expectation using lp.\n\n\n\nThere is no obstacle in principle to allowing multiple change points. The only issue is that computation increases from linear to quadratic in marginalizing out two change points, cubic for three change points, and so on. There are three parameters, e, m, and l, and two loops for the change point and then one over time, with log densities being stored in a matrix.\nmatrix[T, T] lp;\nlp = rep_matrix(log_unif, T);\nfor (s1 in 1:T) {\n for (s2 in 1:T) {\n for (t in 1:T) {\n lp[s1,s2] = lp[s1,s2]\n + poisson_lpmf(D[t] | t < s1 ? e : (t < s2 ? m : l));\n }\n }\n}\nThe matrix can then be converted back to a vector using to_vector before being passed to log_sum_exp.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html#mark-recapture-models", + "href": "stan-users-guide/latent-discrete.html#mark-recapture-models", + "title": "Latent Discrete Parameters", + "section": "", + "text": "A widely applied field method in ecology is to capture (or sight) animals, mark them (e.g., by tagging), then release them. This process is then repeated one or more times, and is often done for populations on an ongoing basis. The resulting data may be used to estimate population size.\nThe first subsection describes a simple mark-recapture model that does not involve any latent discrete parameters. The following subsections describes the Cormack-Jolly-Seber model, which involves latent discrete parameters for animal death.\n\n\nIn the simplest case, a one-stage mark-recapture study produces the following data\n\n\\(M\\) : number of animals marked in first capture,\n\\(C\\) : number animals in second capture, and\n\\(R\\) : number of marked animals in second capture.\n\nThe estimand of interest is\n\n\\(N\\) : number of animals in the population.\n\nDespite the notation, the model will take \\(N\\) to be a continuous parameter; just because the population must be finite doesn’t mean the parameter representing it must be. The parameter will be used to produce a real-valued estimate of the population size.\nThe Lincoln-Petersen (Lincoln 1930; Petersen 1896) method for estimating population size is \\[\n\\hat{N} = \\frac{M C}{R}.\n\\]\nThis population estimate would arise from a probabilistic model in which the number of recaptured animals is distributed binomially, \\[\nR \\sim \\textsf{binomial}(C, M / N)\n\\] given the total number of animals captured in the second round (\\(C\\)) with a recapture probability of \\(M/N\\), the fraction of the total population \\(N\\) marked in the first round.\ndata {\n int<lower=0> M;\n int<lower=0> C;\n int<lower=0, upper=min(M, C)> R;\n}\nparameters {\n real<lower=(C - R + M)> N;\n}\nmodel {\n R ~ binomial(C, M / N);\n}\nA probabilistic formulation of the Lincoln-Petersen estimator for population size based on data from a one-step mark-recapture study. The lower bound on \\(N\\) is necessary to efficiently eliminate impossible values.\nThe probabilistic variant of the Lincoln-Petersen estimator can be directly coded in Stan as shown in the Lincon-Petersen model figure. The Lincoln-Petersen estimate is the maximum likelihood estimate (MLE) for this model.\nTo ensure the MLE is the Lincoln-Petersen estimate, an improper uniform prior for \\(N\\) is used; this could (and should) be replaced with a more informative prior if possible, based on knowledge of the population under study.\nThe one tricky part of the model is the lower bound \\(C - R + M\\) placed on the population size \\(N\\). Values below this bound are impossible because it is otherwise not possible to draw \\(R\\) samples out of the \\(C\\) animals recaptured. Implementing this lower bound is necessary to ensure sampling and optimization can be carried out in an unconstrained manner with unbounded support for parameters on the transformed (unconstrained) space. The lower bound in the declaration for \\(C\\) implies a variable transform \\(f : (C-R+M,\\infty) \\rightarrow (-\\infty,+\\infty)\\) defined by \\(f(N) = \\log(N - (C - R + M))\\); the reference manual contains full details of all constrained parameter transforms.\n\n\n\nThe Cormack-Jolly-Seber (CJS) model (Cormack 1964; Jolly 1965; Seber 1965) is an open-population model in which the population may change over time due to death; the presentation here draws heavily on Schofield (2007).\nThe basic data are\n\n\\(I\\): number of individuals,\n\\(T\\): number of capture periods, and\n\\(y_{i,t}\\): Boolean indicating if individual \\(i\\) was captured at time \\(t\\).\n\nEach individual is assumed to have been captured at least once because an individual only contributes information conditionally after they have been captured the first time.\nThere are two Bernoulli parameters in the model,\n\n\\(\\phi_t\\) : probability that animal alive at time \\(t\\) survives until \\(t + 1\\) and\n\\(p_t\\) : probability that animal alive at time \\(t\\) is captured at time \\(t\\).\n\nThese parameters will both be given uniform priors, but information should be used to tighten these priors in practice.\nThe CJS model also employs a latent discrete parameter \\(z_{i,t}\\) indicating for each individual \\(i\\) whether it is alive at time \\(t\\), distributed as \\[\nz_{i,t} \\sim \\mathsf{Bernoulli}(z_{i,t-1} \\; ? \\; 0 \\: : \\: \\phi_{t-1}).\n\\]\nThe conditional prevents the model positing zombies; once an animal is dead, it stays dead. The data distribution is then simple to express conditional on \\(z\\) as \\[\ny_{i,t} \\sim \\mathsf{Bernoulli}(z_{i,t} \\; ? \\; 0 \\: : \\: p_t).\n\\]\nThe conditional enforces the constraint that dead animals cannot be captured.\n\n\n\nThis subsection presents an implementation of the model in terms of counts for different history profiles for individuals over three capture times. It assumes exchangeability of the animals in that each is assigned the same capture and survival probabilities.\nIn order to ease the marginalization of the latent discrete parameter \\(z_{i,t}\\), the Stan models rely on a derived quantity \\(\\chi_t\\) for the probability that an individual is never captured again if it is alive at time \\(t\\) (if it is dead, the recapture probability is zero). this quantity is defined recursively by \\[\n\\chi_t\n=\n\\begin{cases}\n1 & \\quad\\text{if } t = T \\\\\n(1 - \\phi_t) + \\phi_t (1 - p_{t+1}) \\chi_{t+1}\n & \\quad\\text{if } t < T\n\\end{cases}\n\\]\nThe base case arises because if an animal was captured in the last time period, the probability it is never captured again is 1 because there are no more capture periods. The recursive case defining \\(\\chi_{t}\\) in terms of \\(\\chi_{t+1}\\) involves two possibilities: (1) not surviving to the next time period, with probability \\((1 - \\phi_t)\\), or (2) surviving to the next time period with probability \\(\\phi_t\\), not being captured in the next time period with probability \\((1 - p_{t+1})\\), and not being captured again after being alive in period \\(t+1\\) with probability \\(\\chi_{t+1}\\).\nWith three capture times, there are eight captured/not-captured profiles an individual may have. These may be naturally coded as binary numbers as follows.\n\\[\n\\begin{array}{crclc}\n\\hline\n& \\qquad\\qquad & captures & \\qquad\\qquad & \\\\\n\\mathrm{profile} & 1 & 2 & 3 & \\mathrm{probability} \\\\\n\\hline\n0 & - & - & - & n/a \\\\\n1 & - & - & + & n/a \\\\\n2 & - & + & - & \\chi_2 \\\\\n3 & - & + & + & \\phi_2 \\, p_3 \\\\\n4 & + & - & - & \\chi_1 \\\\\n5 & + & - & + & \\phi_1 \\, (1 - p_2) \\, \\phi_2 \\, p_3 \\\\\n6 & + & + & - & \\phi_1 \\, p_2 \\, \\chi_2 \\\\\n7 & + & + & + & \\phi_1 \\, p_2 \\, \\phi_2 \\, p_3 \\\\\n\\hline\n\\end{array}\n\\]\nHistory 0, for animals that are never captured, is unobservable because only animals that are captured are observed. History 1, for animals that are only captured in the last round, provides no information for the CJS model, because capture/non-capture status is only informative when conditioned on earlier captures. For the remaining cases, the contribution to the likelihood is provided in the final column.\nBy defining these probabilities in terms of \\(\\chi\\) directly, there is no need for a latent binary parameter indicating whether an animal is alive at time \\(t\\) or not. The definition of \\(\\chi\\) is typically used to define the likelihood (i.e., marginalize out the latent discrete parameter) for the CJS model (Schofield 2007).\nThe Stan model defines \\(\\chi\\) as a transformed parameter based on parameters \\(\\phi\\) and \\(p\\). In the model block, the log probability is incremented for each history based on its count. This second step is similar to collecting Bernoulli observations into a binomial or categorical observations into a multinomial, only it is coded directly in the Stan program using target += rather than being part of a built-in probability function.\nThe following is the Stan program for the Cormack-Jolly-Seber mark-recapture model that considers counts of individuals with observation histories of being observed or not in three capture periods\ndata {\n array[7] int<lower=0> history;\n}\nparameters {\n array[2] real<lower=0, upper=1> phi;\n array[3] real<lower=0, upper=1> p;\n}\ntransformed parameters {\n array[2] real<lower=0, upper=1> chi;\n chi[2] = (1 - phi[2]) + phi[2] * (1 - p[3]);\n chi[1] = (1 - phi[1]) + phi[1] * (1 - p[2]) * chi[2];\n}\nmodel {\n target += history[2] * log(chi[2]);\n target += history[3] * (log(phi[2]) + log(p[3]));\n target += history[4] * (log(chi[1]));\n target += history[5] * (log(phi[1]) + log1m(p[2])\n + log(phi[2]) + log(p[3]));\n target += history[6] * (log(phi[1]) + log(p[2])\n + log(chi[2]));\n target += history[7] * (log(phi[1]) + log(p[2])\n + log(phi[2]) + log(p[3]));\n}\ngenerated quantities {\n real<lower=0, upper=1> beta3;\n beta3 = phi[2] * p[3];\n}\n\n\nThe parameters \\(\\phi_2\\) and \\(p_3\\), the probability of death at time 2 and probability of capture at time 3 are not identifiable, because both may be used to account for lack of capture at time 3. Their product, \\(\\beta_3 = \\phi_2 \\, p_3\\), is identified. The Stan model defines beta3 as a generated quantity. Unidentified parameters pose a problem for Stan’s samplers’ adaptation. Although the problem posed for adaptation is mild here because the parameters are bounded and thus have proper uniform priors, it would be better to formulate an identified parameterization. One way to do this would be to formulate a hierarchical model for the \\(p\\) and \\(\\phi\\) parameters.\n\n\n\n\nThis section presents a version of the Cormack-Jolly-Seber (CJS) model cast at the individual level rather than collectively as in the previous subsection. It also extends the model to allow an arbitrary number of time periods. The data will consist of the number \\(T\\) of capture events, the number \\(I\\) of individuals, and a boolean flag \\(y_{i,t}\\) indicating if individual \\(i\\) was observed at time \\(t\\). In Stan,\ndata {\n int<lower=2> T;\n int<lower=0> I;\n array[I, T] int<lower=0, upper=1> y;\n}\nThe advantages to the individual-level model is that it becomes possible to add individual “random effects” that affect survival or capture probability, as well as to avoid the combinatorics involved in unfolding \\(2^T\\) observation histories for \\(T\\) capture times.\n\n\nThe individual CJS model is written involves several function definitions. The first two are used in the transformed data block to compute the first and last time period in which an animal was captured.4\nfunctions {\n int first_capture(array[] int y_i) {\n for (k in 1:size(y_i)) {\n if (y_i[k]) {\n return k;\n }\n }\n return 0;\n }\n int last_capture(array[] int y_i) {\n for (k_rev in 0:(size(y_i) - 1)) {\n int k;\n k = size(y_i) - k_rev;\n if (y_i[k]) {\n return k;\n }\n }\n return 0;\n }\n // ...\n}\nThese two functions are used to define the first and last capture time for each individual in the transformed data block.5\ntransformed data {\n array[I] int<lower=0, upper=T> first;\n array[I] int<lower=0, upper=T> last;\n vector<lower=0, upper=I>[T] n_captured;\n for (i in 1:I) {\n first[i] = first_capture(y[i]);\n }\n for (i in 1:I) {\n last[i] = last_capture(y[i]);\n }\n n_captured = rep_vector(0, T);\n for (t in 1:T) {\n for (i in 1:I) {\n if (y[i, t]) {\n n_captured[t] = n_captured[t] + 1;\n }\n }\n }\n}\nThe transformed data block also defines n_captured[t], which is the total number of captures at time t. The variable n_captured is defined as a vector instead of an integer array so that it can be used in an elementwise vector operation in the generated quantities block to model the population estimates at each time point.\nThe parameters and transformed parameters are as before, but now there is a function definition for computing the entire vector chi, the probability that if an individual is alive at t that it will never be captured again.\nparameters {\n vector<lower=0, upper=1>[T - 1] phi;\n vector<lower=0, upper=1>[T] p;\n}\ntransformed parameters {\n vector<lower=0, upper=1>[T] chi;\n chi = prob_uncaptured(T, p, phi);\n}\nThe definition of prob_uncaptured, from the functions block, is\nfunctions {\n // ...\n vector prob_uncaptured(int T, vector p, vector phi) {\n vector[T] chi;\n chi[T] = 1.0;\n for (t in 1:(T - 1)) {\n int t_curr;\n int t_next;\n t_curr = T - t;\n t_next = t_curr + 1;\n chi[t_curr] = (1 - phi[t_curr])\n + phi[t_curr]\n * (1 - p[t_next])\n * chi[t_next];\n }\n return chi;\n }\n}\nThe function definition directly follows the mathematical definition of \\(\\chi_t\\), unrolling the recursion into an iteration and defining the elements of chi from T down to 1.\n\n\n\nGiven the precomputed quantities, the model block directly encodes the CJS model’s log likelihood function. All parameters are left with their default uniform priors and the model simply encodes the log probability of the observations q given the parameters p and phi as well as the transformed parameter chi defined in terms of p and phi.\nmodel {\n for (i in 1:I) {\n if (first[i] > 0) {\n for (t in (first[i]+1):last[i]) {\n 1 ~ bernoulli(phi[t - 1]);\n y[i, t] ~ bernoulli(p[t]);\n }\n 1 ~ bernoulli(chi[last[i]]);\n }\n }\n}\nThe outer loop is over individuals, conditional skipping individuals i which are never captured. The never-captured check depends on the convention of the first-capture and last-capture functions returning 0 for first if an individual is never captured.\nThe inner loop for individual i first increments the log probability based on the survival of the individual with probability phi[t - 1]. The outcome of 1 is fixed because the individual must survive between the first and last capture (i.e., no zombies). The loop starts after the first capture, because all information in the CJS model is conditional on the first capture.\nIn the inner loop, the observed capture status y[i, t] for individual i at time t has a Bernoulli distribution based on the capture probability p[t] at time t.\nAfter the inner loop, the probability of an animal never being seen again after being observed at time last[i] is included, because last[i] was defined to be the last time period in which animal i was observed.\n\n\n\nAs with the collective model described in the previous subsection, this model does not identify phi[T - 1] and p[T], but does identify their product, beta. Thus beta is defined as a generated quantity to monitor convergence and report.\ngenerated quantities {\n real beta;\n // ...\n\n beta = phi[T - 1] * p[T];\n // ...\n}\nThe parameter p[1] is also not modeled and will just be uniform between 0 and 1. A more finely articulated model might have a hierarchical or time-series component, in which case p[1] would be an unknown initial condition and both phi[T - 1] and p[T] could be identified.\n\n\n\nThe generated quantities also calculates an estimate of the population mean at each time t in the same way as in the simple mark-recapture model as the number of individuals captured at time t divided by the probability of capture at time t. This is done with the elementwise division operation for vectors (./) in the generated quantities block.\ngenerated quantities {\n // ...\n vector<lower=0>[T] pop;\n // ...\n pop = n_captured ./ p;\n pop[1] = -1;\n}\n\n\n\nAll individuals are modeled as having the same capture probability, but this model could be easily generalized to use a logistic regression here based on individual-level inputs to be used as predictors.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html#data-coding-models.section", + "href": "stan-users-guide/latent-discrete.html#data-coding-models.section", + "title": "Latent Discrete Parameters", + "section": "", + "text": "Although seemingly disparate tasks, the rating/coding/annotation of items with categories and diagnostic testing for disease or other conditions, share several characteristics which allow their statistical properties to be modeled similarly.\n\n\nSuppose you have diagnostic tests for a condition of varying sensitivity and specificity. Sensitivity is the probability a test returns positive when the patient has the condition and specificity is the probability that a test returns negative when the patient does not have the condition. For example, mammograms and puncture biopsy tests both test for the presence of breast cancer. Mammograms have high sensitivity and low specificity, meaning lots of false positives, whereas puncture biopsies are the opposite, with low sensitivity and high specificity, meaning lots of false negatives.\nThere are several estimands of interest in such studies. An epidemiological study may be interested in the prevalence of a kind of infection, such as malaria, in a population. A test development study might be interested in the diagnostic accuracy of a new test. A health care worker performing tests might be interested in the disease status of a particular patient.\n\n\n\nHumans are often given the task of coding (equivalently rating or annotating) data. For example, journal or grant reviewers rate submissions, a political study may code campaign commercials as to whether they are attack ads or not, a natural language processing study might annotate Tweets as to whether they are positive or negative in overall sentiment, or a dentist looking at an X-ray classifies a patient as having a cavity or not. In all of these cases, the data coders play the role of the diagnostic tests and all of the same estimands are in play — data coder accuracy and bias, true categories of items being coded, or the prevalence of various categories of items in the data.\n\n\n\nIn this section, only categorical ratings are considered, and the challenge in the modeling for Stan is to marginalize out the discrete parameters.\nDawid and Skene (1979) introduce a noisy-measurement model for coding and apply it in the epidemiological setting of coding what doctors say about patient histories; the same model can be used for diagnostic procedures.\n\n\nThe data for the model consists of \\(J\\) raters (diagnostic tests), \\(I\\) items (patients), and \\(K\\) categories (condition statuses) to annotate, with \\(y_{i, j} \\in \\{1, \\dotsc, K\\}\\) being the rating provided by rater \\(j\\) for item \\(i\\). In a diagnostic test setting for a particular condition, the raters are diagnostic procedures and often \\(K=2\\), with values signaling the presence or absence of the condition.6\nIt is relatively straightforward to extend Dawid and Skene’s model to deal with the situation where not every rater rates each item exactly once.\n\n\n\n\nThe model is based on three parameters, the first of which is discrete:\n\n\\(z_i\\) : a value in \\(\\{1, \\dotsc, K\\}\\) indicating the true category of item \\(i\\),\n\\(\\pi\\) : a \\(K\\)-simplex for the prevalence of the \\(K\\) categories in the population, and\n\\(\\theta_{j,k}\\) : a \\(K\\)-simplex for the response of annotator \\(j\\) to an item of true category \\(k\\).\n\n\n\n\nThe true category of an item is assumed to be generated by a simple categorical distribution based on item prevalence, \\[\nz_i \\sim \\textsf{categorical}(\\pi).\n\\]\nThe rating \\(y_{i, j}\\) provided for item \\(i\\) by rater \\(j\\) is modeled as a categorical response of rater \\(i\\) to an item of category \\(z_i\\),7 \\[\ny_{i, j} \\sim \\textsf{categorical}(\\theta_{j,\\pi_{z[i]}}).\n\\]\n\n\nDawid and Skene provided maximum likelihood estimates for \\(\\theta\\) and \\(\\pi\\), which allows them to generate probability estimates for each \\(z_i\\).\nTo mimic Dawid and Skene’s maximum likelihood model, the parameters \\(\\theta_{j,k}\\) and \\(\\pi\\) can be given uniform priors over \\(K\\)-simplexes. It is straightforward to generalize to Dirichlet priors, \\[\n\\pi \\sim \\textsf{Dirichlet}(\\alpha)\n\\] and \\[\n\\theta_{j,k} \\sim \\textsf{Dirichlet}(\\beta_k)\n\\] with fixed hyperparameters \\(\\alpha\\) (a vector) and \\(\\beta\\) (a matrix or array of vectors). The prior for \\(\\theta_{j,k}\\) must be allowed to vary in \\(k\\), so that, for instance, \\(\\beta_{k,k}\\) is large enough to allow the prior to favor better-than-chance annotators over random or adversarial ones.\nBecause there are \\(J\\) coders, it would be natural to extend the model to include a hierarchical prior for \\(\\beta\\) and to partially pool the estimates of coder accuracy and bias.\n\n\n\nBecause the true category parameter \\(z\\) is discrete, it must be marginalized out of the joint posterior in order to carry out sampling or maximum likelihood estimation in Stan. The joint posterior factors as \\[\np(y, \\theta, \\pi) = p(y \\mid \\theta,\\pi) \\, p(\\pi) \\, p(\\theta),\n\\] where \\(p(y \\mid \\theta,\\pi)\\) is derived by marginalizing \\(z\\) out of \\[\np(z, y \\mid \\theta, \\pi) =\n\\prod_{i=1}^I \\left( \\textsf{categorical}(z_i \\mid \\pi)\n \\prod_{j=1}^J\n \\textsf{categorical}(y_{i, j} \\mid \\theta_{j, z[i]})\n \\right).\n\\]\nThis can be done item by item, with \\[\np(y \\mid \\theta, \\pi) =\n\\prod_{i=1}^I \\sum_{k=1}^K\n \\left( \\textsf{categorical}(k \\mid \\pi)\n \\prod_{j=1}^J\n \\textsf{categorical}(y_{i, j} \\mid \\theta_{j, k})\n \\right).\n\\]\nIn the missing data model, only the observed labels would be used in the inner product.\nDawid and Skene (1979) derive exactly the same equation in their Equation (2.7), required for the E-step in their expectation maximization (EM) algorithm. Stan requires the marginalized probability function on the log scale, \\[\\begin{align*}\n\\log p(y \\mid \\theta, \\pi)\n&= \\sum_{i=1}^I \\log \\left( \\sum_{k=1}^K \\exp\n \\left(\\log \\textsf{categorical}(k \\mid \\pi) \\vphantom{\\sum_{j=1}^J}\\right.\\right.\n \\left.\\left. + \\ \\sum_{j=1}^J\n \\log \\textsf{categorical}(y_{i, j} \\mid \\theta_{j, k})\n \\right) \\right),\n\\end{align*}\\] which can be directly coded using Stan’s built-in log_sum_exp function.\n\n\n\n\nThe Stan program for the Dawid and Skene model is provided below (Dawid and Skene 1979).\ndata {\n int<lower=2> K;\n int<lower=1> I;\n int<lower=1> J;\n\n array[I, J] int<lower=1, upper=K> y;\n\n vector<lower=0>[K] alpha;\n vector<lower=0>[K] beta[K];\n}\nparameters {\n simplex[K] pi;\n array[J, K] simplex[K] theta;\n}\ntransformed parameters {\n array[I] vector[K] log_q_z;\n for (i in 1:I) {\n log_q_z[i] = log(pi);\n for (j in 1:J) {\n for (k in 1:K) {\n log_q_z[i, k] = log_q_z[i, k]\n + log(theta[j, k, y[i, j]]);\n }\n }\n }\n}\nmodel {\n pi ~ dirichlet(alpha);\n for (j in 1:J) {\n for (k in 1:K) {\n theta[j, k] ~ dirichlet(beta[k]);\n }\n }\n\n for (i in 1:I) {\n target += log_sum_exp(log_q_z[i]);\n }\n}\nThe model marginalizes out the discrete parameter \\(z\\), storing the unnormalized conditional probability \\(\\log q(z_i=k|\\theta,\\pi)\\) in log_q_z[i, k].\nThe Stan model converges quickly and mixes well using NUTS starting at diffuse initial points, unlike the equivalent model implemented with Gibbs sampling over the discrete parameter. Reasonable weakly informative priors are \\(\\alpha_k = 3\\) and \\(\\beta_{k,k} = 2.5 K\\) and \\(\\beta_{k,k'} = 1\\) if \\(k \\neq k'\\). Taking \\(\\alpha\\) and \\(\\beta_k\\) to be unit vectors and applying optimization will produce the same answer as the expectation maximization (EM) algorithm of Dawid and Skene (1979).\n\n\nThe quantity log_q_z[i] is defined as a transformed parameter. It encodes the (unnormalized) log of \\(p(z_i \\mid \\theta,\n\\pi)\\). Each iteration provides a value conditioned on that iteration’s values for \\(\\theta\\) and \\(\\pi\\). Applying the softmax function to log_q_z[i] provides a simplex corresponding to the probability mass function of \\(z_i\\) in the posterior. These may be averaged across the iterations to provide the posterior probability distribution over each \\(z_i\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html#marginalization-mathematics.section", + "href": "stan-users-guide/latent-discrete.html#marginalization-mathematics.section", + "title": "Latent Discrete Parameters", + "section": "", + "text": "This section describes in more detail the mathematics of statistical inference using the output of marginalized Stan models, such as those presented in the last three sections. It provides a mathematical explanation of why and how certain manipulations of Stan’s output produce valid summaries of the posterior distribution when discrete parameters have been marginalized out of a statistical model. Ultimately, however, fully understanding the mathematics in this section is not necessary to fit models with discrete parameters using Stan.\nThroughout, the model under consideration consists of both continuous parameters, \\(\\Theta\\), and discrete parameters, \\(Z\\). It is also assumed that \\(Z\\) can only take finitely many values, as is the case for all the models described in this chapter of the User’s Guide. To simplify notation, any conditioning on data is suppressed in this section, except where specified. As with all Bayesian analyses, however, all inferences using models with marginalized parameters are made conditional on the observed data.\n\n\n\nWhen performing Bayesian inference, interest often centers on estimating some (constant) low-dimensional summary statistics of the posterior distribution. Mathematically, we are interested in estimating \\(\\mu\\), say, where \\(\\mu = \\mathbb{E}[g(\\Theta, Z)]\\) and \\(g(\\cdot)\\) is an arbitrary function. An example of such a quantity is \\(\\mathbb{E}[\\Theta]\\), the posterior mean of the continuous parameters, where we would take \\(g(\\theta, z) = \\theta\\). To estimate \\(\\mu\\) the most common approach is to sample a series of values, at least approximately, from the posterior distribution of the parameters of interest. The numerical values of these draws can then be used to calculate the quantities of interest. Often, this process of calculation is trivial, but more care is required when working with marginalized posteriors as we describe in this section.\nIf both \\(\\Theta\\) and \\(Z\\) were continuous, Stan could be used to sample \\(M\\) draws from the joint posterior \\(p_{\\Theta, Z}(\\theta, z)\\) and then estimate \\(\\mu\\) with \\[\n\\hat{\\mu} = \\frac{1}{M} \\sum_{i = 1}^M {g(\\theta^{(i)}, z^{(i)})}.\n\\] Given \\(Z\\) is discrete, however, Stan cannot be used to sample from the joint posterior (or even to do optimization). Instead, as outlined in the previous sections describing specific models, the user can first marginalize out \\(Z\\) from the joint posterior to give the marginalized posterior \\(p_\\Theta(\\theta)\\). This marginalized posterior can then be implemented in Stan as usual, and Stan will give draws \\(\\{\\theta^{(i)}\\}_{i = 1}^M\\) from the marginalized posterior.\nUsing only these draws, how can we estimate \\(\\mathbb{E}[g(\\Theta, Z)]\\)? We can use a conditional estimator. We explain in more detail below, but at a high level the idea is that, for each function \\(g\\) of interest, we compute \\[\nh(\\Theta) = \\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\n\\] and then estimate \\(\\mathbb{E}[g(\\Theta, Z)]\\) with \\[\n\\hat{\\mu} = \\frac{1}{M} \\sum_{i = 1}^M h(\\theta^{(i)}).\n\\] This estimator is justified by the law of iterated expectation, the fact that \\[\n\\mathbb{E}[h(\\Theta)] = \\mathbb{E}[\\mathbb{E}[g(\\Theta, Z)] \\mid \\Theta] = \\mathbb{E}[g(\\Theta, Z)] = \\mu.\n\\] Using this marginalized estimator provides a way to estimate the expectation of any function \\(g(\\cdot)\\) for all combinations of discrete or continuous parameters in the model. However, it presents a possible new challenge: evaluating the conditional expectation \\(\\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\\).\n\n\n\nFortunately, the discrete nature of \\(Z\\) makes evaluating \\(\\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\\) easy. The function \\(h(\\Theta)\\) can be written as: \\[\nh(\\Theta)\n= \\mathbb{E}[g(\\Theta, Z) \\mid \\Theta]\n= \\sum_{k} g(\\Theta, k) \\Pr[Z = k \\mid \\Theta],\n\\] where we sum over the possible values of the latent discrete parameters. An essential part of this formula is the probability of the discrete parameters conditional on the continuous parameters, \\(\\Pr[Z = k \\mid \\Theta]\\). More detail on how this quantity can be calculated is included below. Note that if \\(Z\\) takes infinitely many values then computing the infinite sums will involve, potentially computationally expensive, approximation.\nWhen \\(g(\\theta, z)\\) is a function of either \\(\\theta\\) or \\(z\\) only, the above formula simplifies further.\nIn the first case, where \\(g(\\theta, z) = g(\\theta)\\), we have: \\[\\begin{align*}\nh(\\Theta)\n&= \\sum_{k} g(\\Theta) \\Pr[Z = k \\mid \\Theta] \\\\\n&= g(\\Theta) \\sum_{k} \\Pr[Z = k \\mid \\Theta] \\\\\n&= g(\\Theta).\n\\end{align*}\\] This means that we can estimate \\(\\mathbb{E}[g(\\Theta)]\\) with the standard, seemingly unconditional, estimator: \\[\n\\frac{1}{M} \\sum_{i = 1}^M g(\\theta^{(i)}).\n\\] Even after marginalization, computing expectations of functions of the continuous parameters can be performed as if no marginalization had taken place.\nIn the second case, where \\(g(\\theta, z) = g(z)\\), the conditional expectation instead simplifies as follows: \\[\nh(\\Theta) = \\sum_{k} g(k) \\Pr[Z = k \\mid \\Theta].\n\\] An important special case of this result is when \\(g(\\theta, z) = \\textrm{I}(z = k)\\), where \\(\\textrm{I}\\) is the indicator function. This choice allows us to recover the probability mass function of the discrete random variable \\(Z\\), since \\(\\mathbb{E}[\\textrm{I}(Z = k)] = \\Pr[Z = k]\\). In this case, \\[\nh(\\Theta)\n= \\sum_{k} \\textrm{I}(z = k) \\Pr[Z = k \\mid \\Theta]\n= \\Pr[Z = k \\mid \\Theta].\n\\] The quantity \\(\\Pr[Z = k]\\) can therefore be estimated with: \\[\n\\frac{1}{M} \\sum_{i = 1}^M \\Pr[Z = k \\mid \\Theta = \\theta^{(i)}].\n\\] When calculating this conditional probability it is important to remember that we are also conditioning on the observed data, \\(Y\\). That is, we are really estimating \\(\\Pr[Z = k \\mid Y]\\) with \\[\n\\frac{1}{M} \\sum_{i = 1}^M \\Pr[Z = k \\mid \\Theta = \\theta^{(i)}, Y].\n\\] This point is important as it suggests one of the main ways of calculating the required conditional probability. Using Bayes’s theorem gives us \\[\n\\Pr[Z = k \\mid \\Theta = \\theta^{(i)}, Y]\n= \\frac{\\Pr[Y \\mid Z = k, \\Theta = \\theta^{(i)}]\n\\Pr[Z = k \\mid \\Theta = \\theta^{(i)}]}\n{\\sum_{k = 1}^K \\Pr[Y \\mid Z = k, \\Theta = \\theta^{(i)}]\n\\Pr[Z = k \\mid \\Theta = \\theta^{(i)}]}.\n\\] Here, \\(\\Pr[Y \\mid \\Theta = \\theta^{(i)}, Z = k]\\) is the likelihood conditional on a particular value of the latent variables. Crucially, all elements of the expression can be calculated using the draws from the posterior of the continuous parameters and knowledge of the model structure.\nOther than the use of Bayes’s theorem, \\(\\Pr[Z = k \\mid \\theta = \\theta^{(i)}, Y]\\) can also be extracted by coding the Stan model to include the conditional probability explicitly (as is done for the Dawid–Skene model).\nFor a longer introduction to the mathematics of marginalization in Stan, which also covers the connections between Rao–Blackwellization and marginalization, see Pullin, Gurrin, and Vukcevic (2021).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/latent-discrete.html#footnotes", + "href": "stan-users-guide/latent-discrete.html#footnotes", + "title": "Latent Discrete Parameters", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe computations are similar to those involved in expectation maximization (EM) algorithms (Dempster, Laird, and Rubin 1977).↩︎\nThe source of the data is (Jarrett 1979), which itself is a note correcting an earlier data collection.↩︎\nThe R counterpart, ifelse, is slightly different in that it is typically used in a vectorized situation. The conditional operator is not (yet) vectorized in Stan.↩︎\nAn alternative would be to compute this on the outside and feed it into the Stan model as preprocessed data. Yet another alternative encoding would be a sparse one recording only the capture events along with their time and identifying the individual captured.↩︎\nBoth functions return 0 if the individual represented by the input array was never captured. Individuals with no captures are not relevant for estimating the model because all probability statements are conditional on earlier captures. Typically they would be removed from the data, but the program allows them to be included even though they make not contribution to the log probability function.↩︎\nDiagnostic procedures are often ordinal, as in stages of cancer in oncological diagnosis or the severity of a cavity in dental diagnosis. Dawid and Skene’s model may be used as is or naturally generalized for ordinal ratings using a latent continuous rating and cutpoints as in ordinal logistic regression.↩︎\nIn the subscript, \\(z_i\\) is written as \\(z[i]\\) to improve legibility.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Latent Discrete Parameters" + ] + }, + { + "objectID": "stan-users-guide/measurement-error.html", + "href": "stan-users-guide/measurement-error.html", + "title": "Measurement Error and Meta-Analysis", + "section": "", + "text": "Most quantities used in statistical models arise from measurements. Most of these measurements are taken with some error. When the measurement error is small relative to the quantity being measured, its effect on a model is usually small. When measurement error is large relative to the quantity being measured, or when precise relations can be estimated being measured quantities, it is useful to introduce an explicit model of measurement error. One kind of measurement error is rounding.\nMeta-analysis plays out statistically much like measurement error models, where the inferences drawn from multiple data sets are combined to do inference over all of them. Inferences for each data set are treated as providing a kind of measurement error with respect to true parameter values.\n\n\nA Bayesian approach to measurement error can be formulated directly by treating the true quantities being measured as missing data (Clayton 1992; Richardson and Gilks 1993). This requires a model of how the measurements are derived from the true values.\n\n\nBefore considering regression with measurement error, first consider a linear regression model where the observed data for \\(N\\) cases includes a predictor \\(x_n\\) and outcome \\(y_n\\). In Stan, a linear regression for \\(y\\) based on \\(x\\) with a slope and intercept is modeled as follows.\ndata {\n int<lower=0> N; // number of cases\n vector[N] x; // predictor (covariate)\n vector[N] y; // outcome (variate)\n}\nparameters {\n real alpha; // intercept\n real beta; // slope\n real<lower=0> sigma; // outcome noise\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n alpha ~ normal(0, 10);\n beta ~ normal(0, 10);\n sigma ~ cauchy(0, 5);\n}\nNow suppose that the true values of the predictors \\(x_n\\) are not known, but for each \\(n\\), a measurement \\(x^{\\textrm{meas}}_n\\) of \\(x_n\\) is available. If the error in measurement can be modeled, the measured value \\(x^{\\textrm{meas}}_n\\) can be modeled in terms of the true value \\(x_n\\) plus measurement noise. The true value \\(x_n\\) is treated as missing data and estimated along with other quantities in the model. A simple approach is to assume the measurement error is normal with known deviation \\(\\tau\\). This leads to the following regression model with constant measurement error.\ndata {\n // ...\n array[N] real x_meas; // measurement of x\n real<lower=0> tau; // measurement noise\n}\nparameters {\n array[N] real x; // unknown true value\n real mu_x; // prior location\n real sigma_x; // prior scale\n // ...\n}\nmodel {\n x ~ normal(mu_x, sigma_x); // prior\n x_meas ~ normal(x, tau); // measurement model\n y ~ normal(alpha + beta * x, sigma);\n // ...\n}\nThe regression coefficients alpha and beta and regression noise scale sigma are the same as before, but now x is declared as a parameter rather than as data. The data are now x_meas, which is a measurement of the true x value with noise scale tau. The model then specifies that the measurement error for x_meas[n] given true value x[n] is normal with deviation tau. Furthermore, the true values x are given a hierarchical prior here.\nIn cases where the measurement errors are not normal, richer measurement error models may be specified. The prior on the true values may also be enriched. For instance, Clayton (1992) introduces an exposure model for the unknown (but noisily measured) risk factors \\(x\\) in terms of known (without measurement error) risk factors \\(c\\). A simple model would regress \\(x_n\\) on the covariates \\(c_n\\) with noise term \\(\\upsilon\\), \\[\nx_n \\sim \\textsf{normal}(\\gamma^{\\top}c, \\upsilon).\n\\] This can be coded in Stan just like any other regression. And, of course, other exposure models can be provided.\n\n\n\nA common form of measurement error arises from rounding measurements. Rounding may be done in many ways, such as rounding weights to the nearest milligram, or to the nearest pound; rounding may even be done by rounding down to the nearest integer.\nExercise 3.5(b) by Gelman et al. (2013) provides an example.\n\n3.5. Suppose we weigh an object five times and measure weights, rounded to the nearest pound, of 10, 10, 12, 11, 9. Assume the unrounded measurements are normally distributed with a noninformative prior distribution on \\(\\mu\\) and \\(\\sigma^2\\).\n\nGive the correct posterior distribution for \\((\\mu, \\sigma^2)\\), treating the measurements as rounded.\n\n\nLetting \\(z_n\\) be the unrounded measurement for \\(y_n\\), the problem as stated assumes \\[\nz_n \\sim \\textsf{normal}(\\mu, \\sigma).\n\\]\nThe rounding process entails that \\(z_n \\in (y_n - 0.5, y_n + 0.5)\\)1. The probability mass function for the discrete observation \\(y\\) is then given by marginalizing out the unrounded measurement, producing the likelihood \\[\\begin{align*}\np(y_n \\mid \\mu, \\sigma)\n&= \\int_{y_n - 0.5}^{y_n + 0.5} \\textsf{normal}(z_n \\mid \\mu, \\sigma) \\,\\textsf{d}z_n \\\\\n&= \\Phi\\!\\left(\\frac{y_n + 0.5 - \\mu}{\\sigma}\\right)\n -\\Phi\\!\\left(\\frac{y_n - 0.5 - \\mu}{\\sigma}\\right).\n\\end{align*}\\] Gelman’s answer for this problem took the noninformative prior to be uniform in the variance \\(\\sigma^2\\) on the log scale, but we replace it with more recently recommended half-normal prior on \\(\\sigma\\) \\[\n\\sigma \\sim \\textsf{normal}^+(0, 1).\n\\] The posterior after observing \\(y = (10, 10, 12, 11, 9)\\) can be calculated by Bayes’s rule as \\[\\begin{align*}\np(\\mu, \\sigma \\mid y)\n&\\propto p(\\mu, \\sigma) \\ p(y \\mid \\mu, \\sigma) \\\\\n&\\propto \\textsf{normal}^+(\\sigma \\mid 0, 1)\\prod_{n=1}^5\n \\left( \\Phi\\!\\left(\\frac{y_n + 0.5 - \\mu}{\\sigma}\\right)\n -\\Phi\\!\\left(\\frac{y_n - 0.5 - \\mu}{\\sigma}\\right)\n \\right).\n\\end{align*}\\]\nThe Stan code simply follows the mathematical definition, providing an example of the direct definition of a probability function up to a proportion.\ndata {\n int<lower=0> N;\n vector[N] y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n sigma ~ normal(0, 1);\n for (n in 1:N) {\n target += log_diff_exp(normal_lcdf(y[n] + 0.5 | mu, sigma),\n normal_lcdf(y[n] - 0.5 | mu, sigma));\n }\n}\nwhere normal_lcdf(y[n]+0.5 | mu, sigma) is equal to log(Phi((y[n] + 0.5 - mu) / sigma)), and log_diff_exp(a, b) computes log(exp(a) - exp(b)) in numerically more stable way.\nAlternatively, the model may be defined with latent parameters for the unrounded measurements \\(z_n\\). The Stan code in this case uses a distribution statement for \\(z_n\\) directly while respecting the constraint \\(z_n \\in (y_n - 0.5, y_n + 0.5)\\).\ndata {\n int<lower=0> N;\n vector[N] y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n vector<lower=y-0.5, upper=y+0.5>[N] z;\n}\nmodel {\n sigma ~ normal(0, 1);\n z ~ normal(mu, sigma);\n}\nThis explicit model for the unrounded measurements \\(z\\) produces the same posterior for \\(\\mu\\) and \\(\\sigma\\) as the previous model that marginalizes \\(z\\) out. Both approaches mix well, but the latent parameter version is about twice as efficient in terms of effective sample size per iteration, as well as providing a posterior for the unrounded parameters.\n\n\n\n\nMeta-analysis aims to pool the data from several studies, such as the application of a tutoring program in several schools or treatment using a drug in several clinical trials.\nThe Bayesian framework is particularly convenient for meta-analysis, because each previous study can be treated as providing a noisy measurement of some underlying quantity of interest. The model then follows directly from two components, a prior on the underlying quantities of interest and a measurement-error style model for each of the studies being analyzed.\n\n\nSuppose the data in question arise from a total of \\(M\\) studies providing paired binomial data for a treatment and control group. For instance, the data might be post-surgical pain reduction under a treatment of ibuprofen (Warn, Thompson, and Spiegelhalter 2002) or mortality after myocardial infarction under a treatment of beta blockers (Gelman et al. 2013, sec. 5.6).\n\n\nThe clinical data consists of \\(J\\) trials, each with \\(n^t\\) treatment cases, \\(n^c\\) control cases, \\(r^t\\) successful outcomes among those treated and \\(r^c\\) successful outcomes among those in the control group. This data can be declared in Stan as follows.2\ndata {\n int<lower=0> J;\n array[J] int<lower=0> n_t; // num cases, treatment\n array[J] int<lower=0> r_t; // num successes, treatment\n array[J] int<lower=0> n_c; // num cases, control\n array[J] int<lower=0> r_c; // num successes, control\n}\n\n\n\nAlthough the clinical trial data are binomial in its raw format, it may be transformed to an unbounded scale by considering the log odds ratio \\[\\begin{align*}\ny_j &= \\log \\left( \\frac{r^t_j / (n^t_j - r^t_j)}\n {r^c_j / (n^c_j - r^c_j)}\n \\right) \\\\\n&= \\log \\left( \\frac{r^t_j}{n^t_j - r^t_j} \\right)\n -\\log \\left( \\frac{r^c_j}{n^c_j - r^c_j} \\right)\n\\end{align*}\\] and corresponding standard errors \\[\n\\sigma_j = \\sqrt{\n \\frac{1}{r^T_j}\n+ \\frac{1}{n^T_j - r^T_j}\n+ \\frac{1}{r^C_j}\n+ \\frac{1}{n^C_j - r^C_j}\n}.\n\\]\nThe log odds and standard errors can be defined in a transformed data block, though care must be taken not to use integer division.3\ntransformed data {\n array[J] real y;\n array[J] real<lower=0> sigma;\n for (j in 1:J) {\n y[j] = log(r_t[j]) - log(n_t[j] - r_t[j])\n - (log(r_c[j]) - log(n_c[j] - r_c[j]));\n }\n for (j in 1:J) {\n sigma[j] = sqrt(1 / r_t[j] + 1 / (n_t[j] - r_t[j])\n + 1 / r_c[j] + 1 / (n_c[j] - r_c[j]));\n }\n}\nThis definition will be problematic if any of the success counts is zero or equal to the number of trials. If that arises, a direct binomial model will be required or other transforms must be used than the unregularized sample log odds.\n\n\n\nWith the transformed data in hand, two standard forms of meta-analysis can be applied. The first is a so-called “fixed effects” model, which assumes a single parameter for the global odds ratio. This model is coded in Stan as follows.\nparameters {\n real theta; // global treatment effect, log odds\n}\nmodel {\n y ~ normal(theta, sigma);\n}\nThe distribution statement for y is vectorized; it has the same effect as the following.\n for (j in 1:J) {\n y[j] ~ normal(theta, sigma[j]);\n }\nIt is common to include a prior for theta in this model, but it is not strictly necessary for the model to be proper because y is fixed and \\(\\textsf{normal}(y \\mid \\mu,\\sigma) =\n\\textsf{normal}(\\mu \\mid y,\\sigma)\\).\n\n\n\nTo model so-called “random effects,” where the treatment effect may vary by clinical trial, a hierarchical model can be used. The parameters include per-trial treatment effects and the hierarchical prior parameters, which will be estimated along with other unknown quantities.\nparameters {\n array[J] real theta; // per-trial treatment effect\n real mu; // mean treatment effect\n real<lower=0> tau; // deviation of treatment effects\n}\nmodel {\n y ~ normal(theta, sigma);\n theta ~ normal(mu, tau);\n mu ~ normal(0, 10);\n tau ~ cauchy(0, 5);\n}\nAlthough the vectorized distribution statement for y appears unchanged, the parameter theta is now a vector. The distribution statement for theta is also vectorized, with the hyperparameters mu and tau themselves being given wide priors compared to the scale of the data.\nRubin (1981) provided a hierarchical Bayesian meta-analysis of the treatment effect of Scholastic Aptitude Test (SAT) coaching in eight schools based on the sample treatment effect and standard error in each school.\n\n\n\nSmith, Spiegelhalter, and Thomas (1995) and Gelman et al. (2013, sec. 19.4) provide meta-analyses based directly on binomial data. Warn, Thompson, and Spiegelhalter (2002) consider the modeling implications of using alternatives to the log-odds ratio in transforming the binomial data.\nIf trial-specific predictors are available, these can be included directly in a regression model for the per-trial treatment effects \\(\\theta_j\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Measurement Error and Meta-Analysis" + ] + }, + { + "objectID": "stan-users-guide/measurement-error.html#bayesian-measurement-error-model", + "href": "stan-users-guide/measurement-error.html#bayesian-measurement-error-model", + "title": "Measurement Error and Meta-Analysis", + "section": "", + "text": "A Bayesian approach to measurement error can be formulated directly by treating the true quantities being measured as missing data (Clayton 1992; Richardson and Gilks 1993). This requires a model of how the measurements are derived from the true values.\n\n\nBefore considering regression with measurement error, first consider a linear regression model where the observed data for \\(N\\) cases includes a predictor \\(x_n\\) and outcome \\(y_n\\). In Stan, a linear regression for \\(y\\) based on \\(x\\) with a slope and intercept is modeled as follows.\ndata {\n int<lower=0> N; // number of cases\n vector[N] x; // predictor (covariate)\n vector[N] y; // outcome (variate)\n}\nparameters {\n real alpha; // intercept\n real beta; // slope\n real<lower=0> sigma; // outcome noise\n}\nmodel {\n y ~ normal(alpha + beta * x, sigma);\n alpha ~ normal(0, 10);\n beta ~ normal(0, 10);\n sigma ~ cauchy(0, 5);\n}\nNow suppose that the true values of the predictors \\(x_n\\) are not known, but for each \\(n\\), a measurement \\(x^{\\textrm{meas}}_n\\) of \\(x_n\\) is available. If the error in measurement can be modeled, the measured value \\(x^{\\textrm{meas}}_n\\) can be modeled in terms of the true value \\(x_n\\) plus measurement noise. The true value \\(x_n\\) is treated as missing data and estimated along with other quantities in the model. A simple approach is to assume the measurement error is normal with known deviation \\(\\tau\\). This leads to the following regression model with constant measurement error.\ndata {\n // ...\n array[N] real x_meas; // measurement of x\n real<lower=0> tau; // measurement noise\n}\nparameters {\n array[N] real x; // unknown true value\n real mu_x; // prior location\n real sigma_x; // prior scale\n // ...\n}\nmodel {\n x ~ normal(mu_x, sigma_x); // prior\n x_meas ~ normal(x, tau); // measurement model\n y ~ normal(alpha + beta * x, sigma);\n // ...\n}\nThe regression coefficients alpha and beta and regression noise scale sigma are the same as before, but now x is declared as a parameter rather than as data. The data are now x_meas, which is a measurement of the true x value with noise scale tau. The model then specifies that the measurement error for x_meas[n] given true value x[n] is normal with deviation tau. Furthermore, the true values x are given a hierarchical prior here.\nIn cases where the measurement errors are not normal, richer measurement error models may be specified. The prior on the true values may also be enriched. For instance, Clayton (1992) introduces an exposure model for the unknown (but noisily measured) risk factors \\(x\\) in terms of known (without measurement error) risk factors \\(c\\). A simple model would regress \\(x_n\\) on the covariates \\(c_n\\) with noise term \\(\\upsilon\\), \\[\nx_n \\sim \\textsf{normal}(\\gamma^{\\top}c, \\upsilon).\n\\] This can be coded in Stan just like any other regression. And, of course, other exposure models can be provided.\n\n\n\nA common form of measurement error arises from rounding measurements. Rounding may be done in many ways, such as rounding weights to the nearest milligram, or to the nearest pound; rounding may even be done by rounding down to the nearest integer.\nExercise 3.5(b) by Gelman et al. (2013) provides an example.\n\n3.5. Suppose we weigh an object five times and measure weights, rounded to the nearest pound, of 10, 10, 12, 11, 9. Assume the unrounded measurements are normally distributed with a noninformative prior distribution on \\(\\mu\\) and \\(\\sigma^2\\).\n\nGive the correct posterior distribution for \\((\\mu, \\sigma^2)\\), treating the measurements as rounded.\n\n\nLetting \\(z_n\\) be the unrounded measurement for \\(y_n\\), the problem as stated assumes \\[\nz_n \\sim \\textsf{normal}(\\mu, \\sigma).\n\\]\nThe rounding process entails that \\(z_n \\in (y_n - 0.5, y_n + 0.5)\\)1. The probability mass function for the discrete observation \\(y\\) is then given by marginalizing out the unrounded measurement, producing the likelihood \\[\\begin{align*}\np(y_n \\mid \\mu, \\sigma)\n&= \\int_{y_n - 0.5}^{y_n + 0.5} \\textsf{normal}(z_n \\mid \\mu, \\sigma) \\,\\textsf{d}z_n \\\\\n&= \\Phi\\!\\left(\\frac{y_n + 0.5 - \\mu}{\\sigma}\\right)\n -\\Phi\\!\\left(\\frac{y_n - 0.5 - \\mu}{\\sigma}\\right).\n\\end{align*}\\] Gelman’s answer for this problem took the noninformative prior to be uniform in the variance \\(\\sigma^2\\) on the log scale, but we replace it with more recently recommended half-normal prior on \\(\\sigma\\) \\[\n\\sigma \\sim \\textsf{normal}^+(0, 1).\n\\] The posterior after observing \\(y = (10, 10, 12, 11, 9)\\) can be calculated by Bayes’s rule as \\[\\begin{align*}\np(\\mu, \\sigma \\mid y)\n&\\propto p(\\mu, \\sigma) \\ p(y \\mid \\mu, \\sigma) \\\\\n&\\propto \\textsf{normal}^+(\\sigma \\mid 0, 1)\\prod_{n=1}^5\n \\left( \\Phi\\!\\left(\\frac{y_n + 0.5 - \\mu}{\\sigma}\\right)\n -\\Phi\\!\\left(\\frac{y_n - 0.5 - \\mu}{\\sigma}\\right)\n \\right).\n\\end{align*}\\]\nThe Stan code simply follows the mathematical definition, providing an example of the direct definition of a probability function up to a proportion.\ndata {\n int<lower=0> N;\n vector[N] y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n sigma ~ normal(0, 1);\n for (n in 1:N) {\n target += log_diff_exp(normal_lcdf(y[n] + 0.5 | mu, sigma),\n normal_lcdf(y[n] - 0.5 | mu, sigma));\n }\n}\nwhere normal_lcdf(y[n]+0.5 | mu, sigma) is equal to log(Phi((y[n] + 0.5 - mu) / sigma)), and log_diff_exp(a, b) computes log(exp(a) - exp(b)) in numerically more stable way.\nAlternatively, the model may be defined with latent parameters for the unrounded measurements \\(z_n\\). The Stan code in this case uses a distribution statement for \\(z_n\\) directly while respecting the constraint \\(z_n \\in (y_n - 0.5, y_n + 0.5)\\).\ndata {\n int<lower=0> N;\n vector[N] y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n vector<lower=y-0.5, upper=y+0.5>[N] z;\n}\nmodel {\n sigma ~ normal(0, 1);\n z ~ normal(mu, sigma);\n}\nThis explicit model for the unrounded measurements \\(z\\) produces the same posterior for \\(\\mu\\) and \\(\\sigma\\) as the previous model that marginalizes \\(z\\) out. Both approaches mix well, but the latent parameter version is about twice as efficient in terms of effective sample size per iteration, as well as providing a posterior for the unrounded parameters.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Measurement Error and Meta-Analysis" + ] + }, + { + "objectID": "stan-users-guide/measurement-error.html#meta-analysis", + "href": "stan-users-guide/measurement-error.html#meta-analysis", + "title": "Measurement Error and Meta-Analysis", + "section": "", + "text": "Meta-analysis aims to pool the data from several studies, such as the application of a tutoring program in several schools or treatment using a drug in several clinical trials.\nThe Bayesian framework is particularly convenient for meta-analysis, because each previous study can be treated as providing a noisy measurement of some underlying quantity of interest. The model then follows directly from two components, a prior on the underlying quantities of interest and a measurement-error style model for each of the studies being analyzed.\n\n\nSuppose the data in question arise from a total of \\(M\\) studies providing paired binomial data for a treatment and control group. For instance, the data might be post-surgical pain reduction under a treatment of ibuprofen (Warn, Thompson, and Spiegelhalter 2002) or mortality after myocardial infarction under a treatment of beta blockers (Gelman et al. 2013, sec. 5.6).\n\n\nThe clinical data consists of \\(J\\) trials, each with \\(n^t\\) treatment cases, \\(n^c\\) control cases, \\(r^t\\) successful outcomes among those treated and \\(r^c\\) successful outcomes among those in the control group. This data can be declared in Stan as follows.2\ndata {\n int<lower=0> J;\n array[J] int<lower=0> n_t; // num cases, treatment\n array[J] int<lower=0> r_t; // num successes, treatment\n array[J] int<lower=0> n_c; // num cases, control\n array[J] int<lower=0> r_c; // num successes, control\n}\n\n\n\nAlthough the clinical trial data are binomial in its raw format, it may be transformed to an unbounded scale by considering the log odds ratio \\[\\begin{align*}\ny_j &= \\log \\left( \\frac{r^t_j / (n^t_j - r^t_j)}\n {r^c_j / (n^c_j - r^c_j)}\n \\right) \\\\\n&= \\log \\left( \\frac{r^t_j}{n^t_j - r^t_j} \\right)\n -\\log \\left( \\frac{r^c_j}{n^c_j - r^c_j} \\right)\n\\end{align*}\\] and corresponding standard errors \\[\n\\sigma_j = \\sqrt{\n \\frac{1}{r^T_j}\n+ \\frac{1}{n^T_j - r^T_j}\n+ \\frac{1}{r^C_j}\n+ \\frac{1}{n^C_j - r^C_j}\n}.\n\\]\nThe log odds and standard errors can be defined in a transformed data block, though care must be taken not to use integer division.3\ntransformed data {\n array[J] real y;\n array[J] real<lower=0> sigma;\n for (j in 1:J) {\n y[j] = log(r_t[j]) - log(n_t[j] - r_t[j])\n - (log(r_c[j]) - log(n_c[j] - r_c[j]));\n }\n for (j in 1:J) {\n sigma[j] = sqrt(1 / r_t[j] + 1 / (n_t[j] - r_t[j])\n + 1 / r_c[j] + 1 / (n_c[j] - r_c[j]));\n }\n}\nThis definition will be problematic if any of the success counts is zero or equal to the number of trials. If that arises, a direct binomial model will be required or other transforms must be used than the unregularized sample log odds.\n\n\n\nWith the transformed data in hand, two standard forms of meta-analysis can be applied. The first is a so-called “fixed effects” model, which assumes a single parameter for the global odds ratio. This model is coded in Stan as follows.\nparameters {\n real theta; // global treatment effect, log odds\n}\nmodel {\n y ~ normal(theta, sigma);\n}\nThe distribution statement for y is vectorized; it has the same effect as the following.\n for (j in 1:J) {\n y[j] ~ normal(theta, sigma[j]);\n }\nIt is common to include a prior for theta in this model, but it is not strictly necessary for the model to be proper because y is fixed and \\(\\textsf{normal}(y \\mid \\mu,\\sigma) =\n\\textsf{normal}(\\mu \\mid y,\\sigma)\\).\n\n\n\nTo model so-called “random effects,” where the treatment effect may vary by clinical trial, a hierarchical model can be used. The parameters include per-trial treatment effects and the hierarchical prior parameters, which will be estimated along with other unknown quantities.\nparameters {\n array[J] real theta; // per-trial treatment effect\n real mu; // mean treatment effect\n real<lower=0> tau; // deviation of treatment effects\n}\nmodel {\n y ~ normal(theta, sigma);\n theta ~ normal(mu, tau);\n mu ~ normal(0, 10);\n tau ~ cauchy(0, 5);\n}\nAlthough the vectorized distribution statement for y appears unchanged, the parameter theta is now a vector. The distribution statement for theta is also vectorized, with the hyperparameters mu and tau themselves being given wide priors compared to the scale of the data.\nRubin (1981) provided a hierarchical Bayesian meta-analysis of the treatment effect of Scholastic Aptitude Test (SAT) coaching in eight schools based on the sample treatment effect and standard error in each school.\n\n\n\nSmith, Spiegelhalter, and Thomas (1995) and Gelman et al. (2013, sec. 19.4) provide meta-analyses based directly on binomial data. Warn, Thompson, and Spiegelhalter (2002) consider the modeling implications of using alternatives to the log-odds ratio in transforming the binomial data.\nIf trial-specific predictors are available, these can be included directly in a regression model for the per-trial treatment effects \\(\\theta_j\\).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Measurement Error and Meta-Analysis" + ] + }, + { + "objectID": "stan-users-guide/measurement-error.html#footnotes", + "href": "stan-users-guide/measurement-error.html#footnotes", + "title": "Measurement Error and Meta-Analysis", + "section": "Footnotes", + "text": "Footnotes\n\n\nThere are several different rounding rules (see, e.g., Wikipedia: Rounding), which affect which interval ends are open and which are closed, but these do not matter here as for continuous \\(z_n\\) \\(p(z_n=y_n-0.5)=p(z_n=y_n+0.5)=0\\).↩︎\nStan’s integer constraints are not powerful enough to express the constraint that \\(\\texttt{r}\\mathtt{\\_}\\texttt{t[j]} \\leq \\texttt{n}\\mathtt{\\_}\\texttt{t[j]}\\), but this constraint could be checked in the transformed data block.↩︎\nWhen dividing two integers, the result type is an integer and rounding will ensue if the result is not exact. See the discussion of primitive arithmetic types in the reference manual for more information.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Measurement Error and Meta-Analysis" + ] + }, + { + "objectID": "stan-users-guide/multi-indexing.html", + "href": "stan-users-guide/multi-indexing.html", + "title": "Multiple Indexing and Range Indexing", + "section": "", + "text": "Stan allows multiple indexes to be provided for containers (i.e., arrays, vectors, and matrices) in a single position, using either an array of integer indexes or range bounds. In many cases, there are functions that provide similar behavior.\nAllowing multiple indexes supports inline vectorization of models. For instance, consider the data model for a varying-slope, varying-intercept hierarchical linear regression, which could be coded as\nfor (n in 1:N) {\n y[n] ~ normal(alpha[ii[n]] + beta[ii[n]] * x[n], sigma);\n}\nWith multiple indexing, this can be coded in one line, leading to more efficient vectorized code.\ny ~ normal(alpha[ii] + rows_dot_product(beta[ii], x), sigma);\nThis latter version is faster than the loop version; it is equivalent in speed to the clunky assignment to a local variable.\n{\n vector[N] mu;\n for (n in 1:N) {\n mu[n] = alpha[ii[n]] + beta[ii[n]] * x[n];\n }\n y ~ normal(mu, sigma);\n}\nThe boost in speed compared to the original version is because the single call to the normal log density in the distribution statement will be much more memory efficient than the original version.\n\n\nThe following is the simplest concrete example of multiple indexing with an array of integers; the ellipses stand for code defining the variables as indicated in the comments.\narray[3] int c;\n// ... define: c == (5, 9, 7)\narray[4] int idxs;\n// ... define: idxs == (3, 3, 1, 2)\narray[4] int d;\nd = c[idxs]; // result: d == (7, 7, 5, 9)\nIn general, the multiple indexed expression c[idxs] is defined as follows, assuming idxs is of size K.\nc[idxs] = ( c[idxs[1]], c[idxs[2]], ..., c[idxs[K]] )\nThus c[idxs] is of the same size as idxs, which is K in this example.\nMultiple indexing can also be used with multi-dimensional arrays. For example, consider the following.\narray[2, 3] int c;\n// ... define: c = ((1, 3, 5), (7, 11, 13))\narray[4] int idxs;\n// ... define: idxs = (2, 2, 1, 2)\narray[4, 3] int d\nd = c[idxs]; // result: d = ((7, 11, 13), (7, 11, 13),\n // (1, 3, 5), (7, 11, 13))\nThat is, putting an index in the first position acts exactly the same way as defined above. The fact that the values are themselves arrays makes no difference—the result is still defined by c[idxs][j] == c[idxs[j]].\nMultiple indexing may also be used in the second position of a multi-dimensional array. Continuing the above example, consider a single index in the first position and a multiple index in the second.\narray[4] int e;\ne = c[2, idxs]; // result: c[2] = (7, 11, 13)\n // result: e = (11, 11, 7, 11)\nThe single index is applied, the one-dimensional result is determined, then the multiple index is applied to the result. That is, c[2,idxs] evaluates to the same value as c[2][idxs].\nMultiple indexing can apply to more than one position of a multi-dimensional array. For instance, consider the following\narray[2, 3] int c;\n// ... define: c = ((1, 3, 5), (7, 11, 13))\narray[3] int idxs1;\n// ... define: idxs1 = (2, 2, 1)\narray[2] int idxs2;\n// ... define: idxs2 = (1, 3)\narray[3, 2] int d;\nd = c[idxs1, idxs2]; // result: d = ((7, 13), (7, 13), (1, 5))\nWith multiple indexes, we no longer have c[idxs1, idxs2] being the same as c[idxs1][idxs2]. Rather, the entry d[i, j] after executing the above is given by\nd[i, j] == c[idxs1, idxs2][i, j] = c[idxs1[i], idxs2[j]]\nThis example illustrates the operation of multiple indexing in the general case: a multiple index like idxs1 converts an index i used on the result (here, c[idxs1, idxs2]) to index idxs1[i] in the variable being indexed (here, c). In contrast, a single index just returns the value at that index, thus reducing dimensionality by one in the result.\n\n\n\nSlicing returns a contiguous slice of a one-dimensional array, a contiguous sub-block of a two-dimensional array, and so on. Semantically, it is just a special form of multiple indexing.\n\n\nFor instance, consider supplying an upper and lower bound for an index.\narray[7] int c;\n// ...\narray[4] int d;\nd = c[3:6]; // result: d == (c[3], c[4], c[5], c[6])\nThe range index 3:6 behaves semantically just like the multiple index (3, 4, 5, 6). In terms of implementation, the sliced upper and/or lower bounded indices are faster and use less memory because they do not explicitly create a multiple index, but rather use a direct loop. They are also easier to read, so should be preferred over multiple indexes where applicable.\n\n\n\nIt is also possible to supply just a lower bound, or just an upper bound. Writing c[3:] is just shorthand for c[3:size(c)]. Writing c[:5] is just shorthand for c[1:5].\n\n\n\nFinally, it is possible to write a range index that covers the entire range of an array, either by including just the range symbol (:) as the index or leaving the index position empty. In both cases, c[] and c[:] are equal to c[1:size(c)], which in turn is just equal to c.\n\n\n\nStan provides head and tail functions that pull out prefixes or suffixes of vectors, row vectors, and one-dimensional arrays. In each case, the return type is the same as the argument type. For example,\nvector[M] a = ...;\nvector[N] b = head(a, N);\nassigns b to be a vector equivalent to the first N elements of the vector a. The function tail works the same way for suffixes, with\narray[M] a = ...;\narray[N] b = tail(a, N);\nFinally, there is a segment function, which specifies a first element and number of elements. For example,\narray[15] a = ...;\narray[3] b = segment(a, 5, 3);\nwill set b to be equal to { a[5], a[6], a[7] }, so that it starts at element 5 of a and includes a total of 3 elements.\n\n\n\n\nMultiple expressions may be used on the left-hand side of an assignment statement, where they work exactly the same way as on the right-hand side in terms of picking out entries of a container. For example, consider the following.\narray[3] int a;\narray[2] int c;\narray[2] int idxs;\n// ... define: a == (1, 2, 3); c == (5, 9)\n // idxs = (3,2)\na[idxs] = c; // result: a == (1, 9, 5)\nThe result above can be worked out by noting that the assignment sets a[idxs[1]] (a[3]) to c[1] (5) and a[idxs[2]] (a[2]) to c[2] (9).\nThe same principle applies when there are many multiple indexes, as in the following example.\narray[5, 7] int a;\narray[2, 2] int c;\n// ...\na[2:3, 5:6] = c; // result: a[2, 5] == c[1, 1]; a[2, 6] == c[1, 2]\n // a[3, 5] == c[2, 1]; a[3, 6] == c[2, 2]\nAs in the one-dimensional case, the right-hand side is written into the slice, block, or general chunk picked out by the left-hand side.\nUsage on the left-hand side allows the full generality of multiple indexing, with single indexes reducing dimensionality and multiple indexes maintaining dimensionality while rearranging, slicing, or blocking. For example, it is valid to assign to a segment of a row of an array as follows.\narray[10, 13] int a;\narray[2] int c;\n// ...\na[4, 2:3] = c; // result: a[4, 2] == c[1]; a[4, 3] == c[2]\n\n\nAliasing issues arise when there are references to the same data structure on the right-hand and left-hand side of an assignment. For example, consider the array a in the following code fragment.\narray[3] int a;\n// ... define: a == (5, 6, 7)\na[2:3] = a[1:2];\n// ... result: a == (5, 5, 6)\nThe reason the value of a after the assignment is \\((5,5,6)\\) rather than \\((5,5,5)\\) is that Stan behaves as if the right-hand side expression is evaluated to a fresh copy. As another example, consider the following.\narray[3] int a;\narray[3] int idxs;\n// ... define idxs = (2, 1, 3)\na[idxs] = a;\nIn this case, it is evident why the right-hand side needs to be copied before the assignment.\nIt is tempting (but wrong) to think of the assignment a[2:3] = a[1:2] as executing the following assignments.\n// ... define: a = (5, 6, 7)\na[2] = a[1]; // result: a = (5, 5, 7)\na[3] = a[2]; // result: a = (5, 5, 5)!\nThis produces a different result than executing the assignment because a[2]’s value changes before it is used.\n\n\n\n\nMultiple indexes can be supplied to vectors and matrices as well as arrays of vectors and matrices.\n\n\nVectors and row vectors behave exactly the same way as arrays with multiple indexes. If v is a vector, then v[3] is a scalar real value, whereas v[2:4] is a vector of size 3 containing the elements v[2], v[3], and v[4].\nThe only subtlety with vectors is in inferring the return type when there are multiple indexes. For example, consider the following minimal example.\narray[3] vector[5] v;\narray[7] int idxs;\n// ...\nvector[7] u;\nu = v[2, idxs];\n\narray[7] real w;\nw = v[idxs, 2];\nThe key is understanding that a single index always reduces dimensionality, whereas a multiple index never does. The dimensions with multiple indexes (and unindexed dimensions) determine the indexed expression’s type. In the example above, because v is an array of vectors, v[2, idxs] reduces the array dimension but doesn’t reduce the vector dimension, so the result is a vector. In contrast, v[idxs, 2] does not reduce the array dimension, but does reduce the vector dimension (to a scalar), so the result type for w is an array of reals. In both cases, the size of the multiple index (here, 7) determines the size of the result.\n\n\n\nMatrices are a bit trickier because they have two dimensions, but the underlying principle of type inference is the same—multiple indexes leave dimensions in place, whereas single indexes reduce them. The following code shows how this works for multiple indexing of matrices.\nmatrix[5, 7] m;\n// ...\nrow_vector[3] rv;\nrv = m[4, 3:5]; // result is 1 x 3\n// ...\nvector[4] v;\nv = m[2:5, 3]; // result is 3 x 1\n// ...\nmatrix[3, 4] m2;\nm2 = m[1:3, 2:5]; // result is 3 x 4\nThe key is realizing that any position with a multiple index or bounded index remains in play in the result, whereas any dimension with a single index is replaced with 1 in the resulting dimensions. Then the type of the result can be read off of the resulting dimensionality as indicated in the comments above.\n\n\n\nIf matrices receive a single multiple index, the result is a matrix. So if m is a matrix, so is m[2:4]. In contrast, supplying a single index, m[3], produces a row vector result. That is, m[3] produces the same result as m[3, ] or m[3, 1:cols(m)].\n\n\n\nWith arrays of matrices, vectors, and row vectors, the basic access rules remain exactly the same: single indexes reduce dimensionality and multiple indexes redirect indexes. For example, consider the following example.\narray[5, 7] matrix[3, 4] m;\n// ...\narray[2] matrix[3, 4] a;\na = m[1, 2:3]; // knock off first array dimension\na = m[3:4, 5]; // knock off second array dimension\nIn both assignments, the multiple index knocks off an array dimension, but it’s different in both cases. In the first case, a[i] == m[1, i + 1], whereas in the second case, a[i] == m[i + 2, 5].\nContinuing the previous example, consider the following.\n// ...\nvector[2] b;\nb = a[1, 3, 2:3, 2];\nHere, the two array dimensions are reduced as is the column dimension of the matrix, leaving only a row dimension index, hence the result is a vector. In this case, b[j] == a[1, 3, 1 + j, 2].\nThis last example illustrates an important point: if there is a lower-bounded index, such as 2:3, with lower bound 2, then the lower bound minus one is added to the index, as seen in the 1 + j expression above.\nContinuing further, consider continuing with the following.\n// ...\narray[2] row_vector[3] c;\nc = a[4:5, 3, 1, 2: ];\nHere, the first array dimension is reduced, leaving a single array dimension, and the row index of the matrix is reduced, leaving a row vector. For indexing, the values are given by c[i, j] == a[i + 3, 3, 1, j + 1]\n\n\n\nMatrix slicing can also be performed using the block function. For example,\nmatrix[20, 20] a = ...;\nmatrix[3, 2] b = block(a, 5, 9, 3, 2);\nwill set b equal to the submatrix of a starting at index [5, 9] and extending 3 rows and 2 columns. Thus block(a, 5, 9, 3, 2) is equivalent to b[5:7, 9:10].\nThe sub_col function extracts a slice of a column of a matrix as a vector. For example,\nmatrix[10, 10] a = ...;\nvector b = sub_col(a, 2, 3, 5);\nwill set b equal to the vector a[2:6, 3], taking the element starting at [2, 3], then extending for a total of 5 rows. The function sub_row works the same way for extracting a slice of a row as a row vector. For example, sub_row(a, 2, 3, 5) is equal to the row vector a[2, 3:7], which also starts at position [2, 3] then extends for a total of 5 columns.\n\n\n\n\nSuppose you have a \\(3 \\times 3\\) matrix and know that two entries are zero but the others are parameters. Such a situation arises in missing data situations and in problems with fixed structural parameters.\nSuppose a \\(3 \\times 3\\) matrix is known to be zero at indexes \\([1,2]\\) and \\([1,3]\\). The indexes for parameters are included in a “melted” data-frame or database format.\ntransformed data {\n array[7, 2] int<lower=1, upper=3> idxs\n = { {1, 1},\n {2, 1}, {2, 2}, {2, 3},\n {3, 1}, {3, 2}, {3, 3} };\n // ...\nThe seven remaining parameters are declared as a vector.\nparameters {\n vector[7] A_raw;\n // ...\n}\nThen the full matrix A is constructed in the model block as a local variable.\nmodel {\n matrix[3, 3] A;\n for (i in 1:7) {\n A[idxs[i, 1], idxs[i, 2]] = A_raw[i];\n }\n A[1, 2] = 0;\n A[1, 3] = 0;\n // ...\n}\nThis may seem like overkill in this setting, but in more general settings, the matrix size, vector size, and the idxs array will be too large to code directly. Similar techniques can be used to build up matrices with ad-hoc constraints, such as a handful of entries known to be positive.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Multiple Indexing and Range Indexing" + ] + }, + { + "objectID": "stan-users-guide/multi-indexing.html#multiple-indexing", + "href": "stan-users-guide/multi-indexing.html#multiple-indexing", + "title": "Multiple Indexing and Range Indexing", + "section": "", + "text": "The following is the simplest concrete example of multiple indexing with an array of integers; the ellipses stand for code defining the variables as indicated in the comments.\narray[3] int c;\n// ... define: c == (5, 9, 7)\narray[4] int idxs;\n// ... define: idxs == (3, 3, 1, 2)\narray[4] int d;\nd = c[idxs]; // result: d == (7, 7, 5, 9)\nIn general, the multiple indexed expression c[idxs] is defined as follows, assuming idxs is of size K.\nc[idxs] = ( c[idxs[1]], c[idxs[2]], ..., c[idxs[K]] )\nThus c[idxs] is of the same size as idxs, which is K in this example.\nMultiple indexing can also be used with multi-dimensional arrays. For example, consider the following.\narray[2, 3] int c;\n// ... define: c = ((1, 3, 5), (7, 11, 13))\narray[4] int idxs;\n// ... define: idxs = (2, 2, 1, 2)\narray[4, 3] int d\nd = c[idxs]; // result: d = ((7, 11, 13), (7, 11, 13),\n // (1, 3, 5), (7, 11, 13))\nThat is, putting an index in the first position acts exactly the same way as defined above. The fact that the values are themselves arrays makes no difference—the result is still defined by c[idxs][j] == c[idxs[j]].\nMultiple indexing may also be used in the second position of a multi-dimensional array. Continuing the above example, consider a single index in the first position and a multiple index in the second.\narray[4] int e;\ne = c[2, idxs]; // result: c[2] = (7, 11, 13)\n // result: e = (11, 11, 7, 11)\nThe single index is applied, the one-dimensional result is determined, then the multiple index is applied to the result. That is, c[2,idxs] evaluates to the same value as c[2][idxs].\nMultiple indexing can apply to more than one position of a multi-dimensional array. For instance, consider the following\narray[2, 3] int c;\n// ... define: c = ((1, 3, 5), (7, 11, 13))\narray[3] int idxs1;\n// ... define: idxs1 = (2, 2, 1)\narray[2] int idxs2;\n// ... define: idxs2 = (1, 3)\narray[3, 2] int d;\nd = c[idxs1, idxs2]; // result: d = ((7, 13), (7, 13), (1, 5))\nWith multiple indexes, we no longer have c[idxs1, idxs2] being the same as c[idxs1][idxs2]. Rather, the entry d[i, j] after executing the above is given by\nd[i, j] == c[idxs1, idxs2][i, j] = c[idxs1[i], idxs2[j]]\nThis example illustrates the operation of multiple indexing in the general case: a multiple index like idxs1 converts an index i used on the result (here, c[idxs1, idxs2]) to index idxs1[i] in the variable being indexed (here, c). In contrast, a single index just returns the value at that index, thus reducing dimensionality by one in the result.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Multiple Indexing and Range Indexing" + ] + }, + { + "objectID": "stan-users-guide/multi-indexing.html#slicing-with-range-indexes", + "href": "stan-users-guide/multi-indexing.html#slicing-with-range-indexes", + "title": "Multiple Indexing and Range Indexing", + "section": "", + "text": "Slicing returns a contiguous slice of a one-dimensional array, a contiguous sub-block of a two-dimensional array, and so on. Semantically, it is just a special form of multiple indexing.\n\n\nFor instance, consider supplying an upper and lower bound for an index.\narray[7] int c;\n// ...\narray[4] int d;\nd = c[3:6]; // result: d == (c[3], c[4], c[5], c[6])\nThe range index 3:6 behaves semantically just like the multiple index (3, 4, 5, 6). In terms of implementation, the sliced upper and/or lower bounded indices are faster and use less memory because they do not explicitly create a multiple index, but rather use a direct loop. They are also easier to read, so should be preferred over multiple indexes where applicable.\n\n\n\nIt is also possible to supply just a lower bound, or just an upper bound. Writing c[3:] is just shorthand for c[3:size(c)]. Writing c[:5] is just shorthand for c[1:5].\n\n\n\nFinally, it is possible to write a range index that covers the entire range of an array, either by including just the range symbol (:) as the index or leaving the index position empty. In both cases, c[] and c[:] are equal to c[1:size(c)], which in turn is just equal to c.\n\n\n\nStan provides head and tail functions that pull out prefixes or suffixes of vectors, row vectors, and one-dimensional arrays. In each case, the return type is the same as the argument type. For example,\nvector[M] a = ...;\nvector[N] b = head(a, N);\nassigns b to be a vector equivalent to the first N elements of the vector a. The function tail works the same way for suffixes, with\narray[M] a = ...;\narray[N] b = tail(a, N);\nFinally, there is a segment function, which specifies a first element and number of elements. For example,\narray[15] a = ...;\narray[3] b = segment(a, 5, 3);\nwill set b to be equal to { a[5], a[6], a[7] }, so that it starts at element 5 of a and includes a total of 3 elements.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Multiple Indexing and Range Indexing" + ] + }, + { + "objectID": "stan-users-guide/multi-indexing.html#multiple-indexing-on-the-left-of-assignments", + "href": "stan-users-guide/multi-indexing.html#multiple-indexing-on-the-left-of-assignments", + "title": "Multiple Indexing and Range Indexing", + "section": "", + "text": "Multiple expressions may be used on the left-hand side of an assignment statement, where they work exactly the same way as on the right-hand side in terms of picking out entries of a container. For example, consider the following.\narray[3] int a;\narray[2] int c;\narray[2] int idxs;\n// ... define: a == (1, 2, 3); c == (5, 9)\n // idxs = (3,2)\na[idxs] = c; // result: a == (1, 9, 5)\nThe result above can be worked out by noting that the assignment sets a[idxs[1]] (a[3]) to c[1] (5) and a[idxs[2]] (a[2]) to c[2] (9).\nThe same principle applies when there are many multiple indexes, as in the following example.\narray[5, 7] int a;\narray[2, 2] int c;\n// ...\na[2:3, 5:6] = c; // result: a[2, 5] == c[1, 1]; a[2, 6] == c[1, 2]\n // a[3, 5] == c[2, 1]; a[3, 6] == c[2, 2]\nAs in the one-dimensional case, the right-hand side is written into the slice, block, or general chunk picked out by the left-hand side.\nUsage on the left-hand side allows the full generality of multiple indexing, with single indexes reducing dimensionality and multiple indexes maintaining dimensionality while rearranging, slicing, or blocking. For example, it is valid to assign to a segment of a row of an array as follows.\narray[10, 13] int a;\narray[2] int c;\n// ...\na[4, 2:3] = c; // result: a[4, 2] == c[1]; a[4, 3] == c[2]\n\n\nAliasing issues arise when there are references to the same data structure on the right-hand and left-hand side of an assignment. For example, consider the array a in the following code fragment.\narray[3] int a;\n// ... define: a == (5, 6, 7)\na[2:3] = a[1:2];\n// ... result: a == (5, 5, 6)\nThe reason the value of a after the assignment is \\((5,5,6)\\) rather than \\((5,5,5)\\) is that Stan behaves as if the right-hand side expression is evaluated to a fresh copy. As another example, consider the following.\narray[3] int a;\narray[3] int idxs;\n// ... define idxs = (2, 1, 3)\na[idxs] = a;\nIn this case, it is evident why the right-hand side needs to be copied before the assignment.\nIt is tempting (but wrong) to think of the assignment a[2:3] = a[1:2] as executing the following assignments.\n// ... define: a = (5, 6, 7)\na[2] = a[1]; // result: a = (5, 5, 7)\na[3] = a[2]; // result: a = (5, 5, 5)!\nThis produces a different result than executing the assignment because a[2]’s value changes before it is used.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Multiple Indexing and Range Indexing" + ] + }, + { + "objectID": "stan-users-guide/multi-indexing.html#multiple-indexes-with-vectors-and-matrices", + "href": "stan-users-guide/multi-indexing.html#multiple-indexes-with-vectors-and-matrices", + "title": "Multiple Indexing and Range Indexing", + "section": "", + "text": "Multiple indexes can be supplied to vectors and matrices as well as arrays of vectors and matrices.\n\n\nVectors and row vectors behave exactly the same way as arrays with multiple indexes. If v is a vector, then v[3] is a scalar real value, whereas v[2:4] is a vector of size 3 containing the elements v[2], v[3], and v[4].\nThe only subtlety with vectors is in inferring the return type when there are multiple indexes. For example, consider the following minimal example.\narray[3] vector[5] v;\narray[7] int idxs;\n// ...\nvector[7] u;\nu = v[2, idxs];\n\narray[7] real w;\nw = v[idxs, 2];\nThe key is understanding that a single index always reduces dimensionality, whereas a multiple index never does. The dimensions with multiple indexes (and unindexed dimensions) determine the indexed expression’s type. In the example above, because v is an array of vectors, v[2, idxs] reduces the array dimension but doesn’t reduce the vector dimension, so the result is a vector. In contrast, v[idxs, 2] does not reduce the array dimension, but does reduce the vector dimension (to a scalar), so the result type for w is an array of reals. In both cases, the size of the multiple index (here, 7) determines the size of the result.\n\n\n\nMatrices are a bit trickier because they have two dimensions, but the underlying principle of type inference is the same—multiple indexes leave dimensions in place, whereas single indexes reduce them. The following code shows how this works for multiple indexing of matrices.\nmatrix[5, 7] m;\n// ...\nrow_vector[3] rv;\nrv = m[4, 3:5]; // result is 1 x 3\n// ...\nvector[4] v;\nv = m[2:5, 3]; // result is 3 x 1\n// ...\nmatrix[3, 4] m2;\nm2 = m[1:3, 2:5]; // result is 3 x 4\nThe key is realizing that any position with a multiple index or bounded index remains in play in the result, whereas any dimension with a single index is replaced with 1 in the resulting dimensions. Then the type of the result can be read off of the resulting dimensionality as indicated in the comments above.\n\n\n\nIf matrices receive a single multiple index, the result is a matrix. So if m is a matrix, so is m[2:4]. In contrast, supplying a single index, m[3], produces a row vector result. That is, m[3] produces the same result as m[3, ] or m[3, 1:cols(m)].\n\n\n\nWith arrays of matrices, vectors, and row vectors, the basic access rules remain exactly the same: single indexes reduce dimensionality and multiple indexes redirect indexes. For example, consider the following example.\narray[5, 7] matrix[3, 4] m;\n// ...\narray[2] matrix[3, 4] a;\na = m[1, 2:3]; // knock off first array dimension\na = m[3:4, 5]; // knock off second array dimension\nIn both assignments, the multiple index knocks off an array dimension, but it’s different in both cases. In the first case, a[i] == m[1, i + 1], whereas in the second case, a[i] == m[i + 2, 5].\nContinuing the previous example, consider the following.\n// ...\nvector[2] b;\nb = a[1, 3, 2:3, 2];\nHere, the two array dimensions are reduced as is the column dimension of the matrix, leaving only a row dimension index, hence the result is a vector. In this case, b[j] == a[1, 3, 1 + j, 2].\nThis last example illustrates an important point: if there is a lower-bounded index, such as 2:3, with lower bound 2, then the lower bound minus one is added to the index, as seen in the 1 + j expression above.\nContinuing further, consider continuing with the following.\n// ...\narray[2] row_vector[3] c;\nc = a[4:5, 3, 1, 2: ];\nHere, the first array dimension is reduced, leaving a single array dimension, and the row index of the matrix is reduced, leaving a row vector. For indexing, the values are given by c[i, j] == a[i + 3, 3, 1, j + 1]\n\n\n\nMatrix slicing can also be performed using the block function. For example,\nmatrix[20, 20] a = ...;\nmatrix[3, 2] b = block(a, 5, 9, 3, 2);\nwill set b equal to the submatrix of a starting at index [5, 9] and extending 3 rows and 2 columns. Thus block(a, 5, 9, 3, 2) is equivalent to b[5:7, 9:10].\nThe sub_col function extracts a slice of a column of a matrix as a vector. For example,\nmatrix[10, 10] a = ...;\nvector b = sub_col(a, 2, 3, 5);\nwill set b equal to the vector a[2:6, 3], taking the element starting at [2, 3], then extending for a total of 5 rows. The function sub_row works the same way for extracting a slice of a row as a row vector. For example, sub_row(a, 2, 3, 5) is equal to the row vector a[2, 3:7], which also starts at position [2, 3] then extends for a total of 5 columns.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Multiple Indexing and Range Indexing" + ] + }, + { + "objectID": "stan-users-guide/multi-indexing.html#matrices-with-parameters-and-constants", + "href": "stan-users-guide/multi-indexing.html#matrices-with-parameters-and-constants", + "title": "Multiple Indexing and Range Indexing", + "section": "", + "text": "Suppose you have a \\(3 \\times 3\\) matrix and know that two entries are zero but the others are parameters. Such a situation arises in missing data situations and in problems with fixed structural parameters.\nSuppose a \\(3 \\times 3\\) matrix is known to be zero at indexes \\([1,2]\\) and \\([1,3]\\). The indexes for parameters are included in a “melted” data-frame or database format.\ntransformed data {\n array[7, 2] int<lower=1, upper=3> idxs\n = { {1, 1},\n {2, 1}, {2, 2}, {2, 3},\n {3, 1}, {3, 2}, {3, 3} };\n // ...\nThe seven remaining parameters are declared as a vector.\nparameters {\n vector[7] A_raw;\n // ...\n}\nThen the full matrix A is constructed in the model block as a local variable.\nmodel {\n matrix[3, 3] A;\n for (i in 1:7) {\n A[idxs[i, 1], idxs[i, 2]] = A_raw[i];\n }\n A[1, 2] = 0;\n A[1, 3] = 0;\n // ...\n}\nThis may seem like overkill in this setting, but in more general settings, the matrix size, vector size, and the idxs array will be too large to code directly. Similar techniques can be used to build up matrices with ad-hoc constraints, such as a handful of entries known to be positive.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Multiple Indexing and Range Indexing" + ] + }, + { + "objectID": "stan-users-guide/odes.html", + "href": "stan-users-guide/odes.html", + "title": "Ordinary Differential Equations", + "section": "", + "text": "Stan provides a number of different methods for solving systems of ordinary differential equations (ODEs). All of these methods adaptively refine their solutions in order to satisfy given tolerances, but internally they handle calculations quite a bit differently.\nBecause Stan’s algorithms requires gradients of the log density, the ODE solvers must not only provide the solution to the ODE itself, but also the gradient of the ODE solution with respect to parameters (the sensitivities). Two fundamentally different approaches are available in Stan to solve this problem, each having very different computational cost depending on the number of ODE states \\(N\\) and the number of parameters \\(M\\) being used:\n\nA forward sensitivity solver expands the base ODE system with additional ODE equations for the gradients of the solution. For each parameter, an additional full set of \\(N\\) sensitivity states are added meaning that the full ODE solved has \\(N \\, + N \\cdot M\\) states.\nAn adjoint sensitivity solver starts by solving the base ODE system forward in time to get the ODE solution and then solves another ODE system (the adjoint) backward in time to get the gradients. The forward and reverse solves both have \\(N\\) states each. There is additionally one quadrature problem solved for every parameter.\n\nThe adjoint sensitivity approach scales much better than the forward sensitivity approach. Whereas the computational cost of the forward approach scales multiplicatively in the number of ODE states \\(N\\) and parameters \\(M\\), the adjoint sensitivity approach scales linear in states \\(N\\) and parameters \\(M\\). However, the adjoint problem is harder to configure and the overhead for small problems actually makes it slower than solving the full forward sensitivity system. With that in mind, the rest of this introduction focuses on the forward sensitivity interfaces. For information on the adjoint sensitivity interface see the Adjoint ODE solver\nTwo interfaces are provided for each forward sensitivity solver: one with default tolerances and default max number of steps, and one that allows these controls to be modified. Choosing tolerances is important for making any of the solvers work well – the defaults will not work everywhere. The tolerances should be chosen primarily with consideration to the scales of the solutions, the accuracy needed for the solutions, and how the solutions are used in the model. For instance, if a solution component slowly varies between 3.0 and 5.0 and measurements of the ODE state are noisy, then perhaps the tolerances do not need to be as tight as for a situation where the solutions vary between 3.0 and 3.1 and very high precision measurements of the ODE state are available. It is also often useful to reduce the absolute tolerance when a component of the solution is expected to approach zero. For information on choosing tolerances, see the control parameters section.\nThe advantage of adaptive solvers is that as long as reasonable tolerances are provided and an ODE solver well-suited to the problem is chosen the technical details of solving the ODE can be abstracted away. The catch is that it is not always clear from the outset what reasonable tolerances are or which ODE solver is best suited to a problem. In addition, as changes are made to an ODE model, the optimal solver and tolerances may change.\nWith this in mind, the four forward solvers are rk45, bdf, adams, and ckrk. If no other information about the ODE is available, start with the rk45 solver. The list below has information on when each solver is useful.\nIf there is any uncertainty about which solver is the best, it can be useful to measure the performance of all the interesting solvers using profile statements. It is difficult to always know exactly what solver is the best in all situations, but a profile can provide a quick check.\n\nrk45: a fourth and fifth order Runge-Kutta method for non-stiff systems (Dormand and Prince 1980; Ahnert and Mulansky 2011). rk45 is the most generic solver and should be tried first.\nbdf: a variable-step, variable-order, backward-differentiation formula implementation for stiff systems (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). bdf is often useful for ODEs modeling chemical reactions.\nadams: a variable-step, variable-order, Adams-Moulton formula implementation for non-stiff systems (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). The method has order up to 12, hence is commonly used when high-accuracy is desired for a very smooth solution, such as in modeling celestial mechanics and orbital dynamics (Montenbruck and Gill 2000).\nckrk: a fourth and fifth order explicit Runge-Kutta method for non-stiff and semi-stiff systems (Cash and Karp 1990; Mazzia, Cash, and Soetaert 2012). The difference between ckrk and rk45 is that ckrk should perform better for systems that exhibit rapidly varying solutions. Often in those situations the derivatives become large or even nearly discontinuous, and ckrk is designed to address such problems.\n\nFor a discussion of stiff ODE systems, see the stiff ODE section. For information on the adjoint sensitivity interface see the Adjoint ODE solver section. The function signatures for Stan’s ODE solvers can be found in the function reference manual section on ODE solvers.\n\n\nAn ODE is defined by a set of differential equations, \\(y(t, \\theta)' = f(t, y, \\theta)\\), and initial conditions, \\(y(t_0, \\theta) = y_0\\). The function \\(f(t, y, \\theta)\\) is called the system function. The \\(\\theta\\) dependence is included in the notation for \\(y(t, \\theta)\\) and \\(f(t, y, \\theta)\\) as a reminder that the solution is a function of any parameters used in the computation.\n\n\n\nAs an example of a system of ODEs, consider a harmonic oscillator. In a harmonic oscillator a particle disturbed from equilibrium is pulled back towards its equilibrium position by a force proportional to its displacement from equilibrium. The system here additionally has a friction force proportional to particle speed which points in the opposite direction of the particle velocity. The system state will be a pair \\(y = (y_1, y_2)\\) representing position and speed. The change in the system with respect to time is given by the following differential equations.1\n\\[\\begin{align*}\n&\\frac{d}{dt} y_1 = y_2 \\\\\n&\\frac{d}{dt} y_2 = -y_1 - \\theta y_2\n\\end{align*}\\]\nThe state equations implicitly defines the state at future times as a function of an initial state and the system parameters.\n\n\n\nThe first step in coding an ODE system in Stan is defining the ODE system function. The system functions require a specific signature so that the solvers know how to use them properly.\nThe first argument to the system function is time, passed as a real; the second argument to the system function is the system state, passed as a vector, and the return value from the system function are the current time derivatives of the state defined as a vector. Additional arguments can be included in the system function to pass other information into the solve (these will be passed through the function that starts the ODE integration). These argument can be parameters (in this case, the friction coefficient), data, or any quantities that are needed to define the differential equation.\nThe simple harmonic oscillator can be coded using the following function in Stan (see the user-defined functions chapter for more information on coding user-defined functions).\nvector sho(real t, // time\n vector y, // state\n real theta) { // friction parameter\n vector[2] dydt;\n dydt[1] = y[2];\n dydt[2] = -y[1] - theta * y[2];\n return dydt;\n}\nThe function takes in a time t (a real), the system state y (a vector), and the parameter theta (a real). The function returns a vector of time derivatives of the system state at time t, state y, and parameter theta. The simple harmonic oscillator coded here does not have time-sensitive equations; that is, t does not show up in the definition of dydt, however it is still required.\n\n\nThe types in the ODE system function are strict. The first argument is the time passed as a real, the second argument is the state passed as a vector, and the return type is a vector. A model that does not have this signature will fail to compile. The third argument onwards can be any type, granted all the argument types match the types of the respective arguments in the solver call.\nAll of these are possible ODE signatures:\nvector myode1(real t, vector y, real a0);\nvector myode2(real t, vector y, array[] int a0, vector a1);\nvector myode3(real t, vector y, matrix a0, array[] real a1, row_vector a2);\nbut these are not allowed:\nvector myode1(real t, array[] real y, real a0);\n// Second argument is not a vector\narray[] real myode2(real t, vector y, real a0);\n// Return type is not a vector\nvector myode3(vector y, real a0);\n// First argument is not a real and second is not a vector\n\n\n\n\nNoisy observations of the ODE state can be used to estimate the parameters and/or the initial state of the system.\n\n\nAs an example, suppose the simple harmonic oscillator has a parameter value of \\(\\theta = 0.15\\) and an initial state \\(y(t = 0, \\theta = 0.15) = (1, 0)\\). Assume the system is measured at 10 time points, \\(t = 1, 2, \\cdots, 10\\), where each measurement of \\(y(t, \\theta)\\) has independent \\(\\textsf{normal}(0, 0.1)\\) error in both dimensions (\\(y_1(t, \\theta)\\) and \\(y_2(t, \\theta)\\)).\nThe following model can be used to generate data like this:\nfunctions {\n vector sho(real t,\n vector y,\n real theta) {\n vector[2] dydt;\n dydt[1] = y[2];\n dydt[2] = -y[1] - theta * y[2];\n return dydt;\n }\n}\ndata {\n int<lower=1> T;\n vector[2] y0;\n real t0;\n array[T] real ts;\n real theta;\n}\nmodel {\n}\ngenerated quantities {\n array[T] vector[2] y_sim = ode_rk45(sho, y0, t0, ts, theta);\n // add measurement error\n for (t in 1:T) {\n y_sim[t, 1] += normal_rng(0, 0.1);\n y_sim[t, 2] += normal_rng(0, 0.1);\n }\n}\nThe system parameters theta and initial state y0 are read in as data along with the initial time t0 and observation times ts. The ODE is solved for the specified times, and then random measurement errors are added to produce simulated observations y_sim. Because the system is not stiff, the ode_rk45 solver is used.\nThis program illustrates the way in which the ODE solver is called in a Stan program,\narray[T] vector[2] y_sim = ode_rk45(sho, y0, t0, ts, theta);\nthis returns the solution of the ODE initial value problem defined by system function sho, initial state y0, initial time t0, and parameter theta at the times ts. The call explicitly specifies the non-stiff RK45 solver.\nThe parameter theta is passed unmodified to the ODE system function. If there were additional arguments that must be passed, they could be appended to the end of the ode call here. For instance, if the system function took two parameters, \\(\\theta\\) and \\(\\beta\\), the system function definition would look like:\nvector sho(real t, vector y, real theta, real beta) { ... }\nand the appropriate ODE solver call would be:\node_rk45(sho, y0, t0, ts, theta, beta);\nAny number of additional arguments can be added. They can be any Stan type (as long as the types match between the ODE system function and the solver call).\nBecause all none of the input arguments are a function of parameters, the ODE solver is called in the generated quantities block. The random measurement noise is added to each of the T outputs with normal_rng.\n\n\n\nTypical realization of harmonic oscillator trajectory.\n\n\n\n\n\nThese ten noisy observations of the state can be used to estimate the friction parameter, \\(\\theta\\), the initial conditions, \\(y(t_0, \\theta)\\), and the scale of the noise in the problem. The full Stan model is:\nfunctions {\n vector sho(real t,\n vector y,\n real theta) {\n vector[2] dydt;\n dydt[1] = y[2];\n dydt[2] = -y[1] - theta * y[2];\n return dydt;\n }\n}\ndata {\n int<lower=1> T;\n array[T] vector[2] y;\n real t0;\n array[T] real ts;\n}\nparameters {\n vector[2] y0;\n vector<lower=0>[2] sigma;\n real theta;\n}\nmodel {\n array[T] vector[2] mu = ode_rk45(sho, y0, t0, ts, theta);\n sigma ~ normal(0, 2.5);\n theta ~ std_normal();\n y0 ~ std_normal();\n for (t in 1:T) {\n y[t] ~ normal(mu[t], sigma);\n }\n}\nBecause the solves are now a function of model parameters, the ode_rk45 call is now made in the model block. There are half-normal priors on the measurement error scales sigma, and standard normal priors on theta and the initial state vector y0. The solutions to the ODE are assigned to mu, which is used as the location for the normal observation model.\nAs with other regression models, it’s easy to change the noise model to something with heavier tails (e.g., Student-t distributed), correlation in the state variables (e.g., with a multivariate normal distribution), or both heavy tails and correlation in the state variables (e.g., with a multivariate Student-t distribution).\n\n\n\n\nStiffness is a numerical phenomena that causes some differential equation solvers difficulty, notably the Runge-Kutta RK45 solver used in the examples earlier. The phenomena is common in chemical reaction systems, which are often characterized by having multiple vastly different time-scales. The stiffness of a system can also vary between different parts of parameter space, and so a typically non-stiff system may exhibit stiffness occasionally. These sorts of difficulties can occur more frequently with loose priors or during warmup.\nStan provides a specialized solver for stiff ODEs (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). An ODE system is specified exactly the same way with a function of exactly the same signature. The only difference is in the call to the solver the rk45 suffix is replaced with bdf, as in\node_bdf(sho, y0, t0, ts, theta);\nUsing the stiff (bdf) solver on a system that is not stiff may be much slower than using the non-stiff (rk45) solver because each step of the stiff solver takes more time to compute. On the other hand, attempting to use the non-stiff solver for a stiff system will cause the timestep to become very small, leading the non-stiff solver taking more time overall even if each step is easier to compute than for the stiff solver.\nIf it is not known for sure that an ODE system is stiff, run the model with both the rk45 and bdf solvers and see which is faster. If the rk45 solver is faster, then the problem is probably non-stiff, and then it makes sense to try the adams solver as well. The adams solver uses higher order methods which can take larger timesteps than the rk45 solver, though similar to the bdf solver each of these steps is more expensive to compute.\n\n\n\nFor additional control of the solves, both the stiff and non-stiff forward ODE solvers have function signatures that makes it possible to specify the relative_tolerance, absolute_tolerance, and max_num_steps parameters. These are the same as the regular function names but with _tol appended to the end. All three control arguments must be supplied with this signature (there are no defaults).\narray[T] vector[2] y_sim = ode_bdf_tol(sho, y0, t0, ts,\n relative_tolerance,\n absolute_tolerance,\n max_num_steps,\n theta);\nrelative_tolerance and absolute_tolerance control accuracy the solver tries to achieve, and max_num_steps specifies the maximum number of steps the solver will take between output time points before throwing an error.\nThe control parameters must be data variables – they cannot be parameters or expressions that depend on parameters, including local variables in any block other than transformed data and generated quantities. User-defined function arguments may be qualified as only allowing data arguments using the data qualifier.\nFor the RK45 and Cash-Karp solvers, the default values for relative and absolute tolerance are both \\(10^{-6}\\) and the maximum number of steps between outputs is one million. For the BDF and Adams solvers, the relative and absolute tolerances are \\(10^{-10}\\) and the maximum number of steps between outputs is one hundred million.\n\n\nIf there are discontinuities in the ODE system function, it is best to integrate the ODE between the discontinuities, stopping the solver at each one, and restarting it on the other side.\nNonetheless, the ODE solvers will attempt to integrate over discontinuities they encounters in the state function. The accuracy of the solution near the discontinuity may be problematic (requiring many small steps). An example of such a discontinuity is a lag in a pharmacokinetic model, where a concentration is zero for times \\(0 < t < t'\\) and then positive for \\(t \\geq t'\\). In this example example, we would use code in the system such as\nif (t < t_lag) {\n return [0, 0]';\n} else {\n // ... return non-zero vector...\n}\nIn general it is better to integrate up to t_lag in one solve and then integrate from t_lag onwards in another. Mathematically, the discontinuity can make the problem ill-defined and the numerical integrator may behave erratically around it.\nIf the location of the discontinuity cannot be controlled precisely, or there is some other rapidly change in ODE behavior, it can be useful to tell the ODE solver to produce output in the neighborhood. This can help the ODE solver avoid indiscriminately stepping over an important feature of the solution.\n\n\n\nThe relative tolerance RTOL and absolute tolerance ATOL control the accuracy of the numerical solution. Specifically, when solving an ODE with unknowns \\(y=(y_1,\\dots,y_n)^T\\), at every step the solver controls estimated local error \\(e=(e_1,\\dots,e_n)^T\\) through its weighted root-mean-square norm (Serban and Hindmarsh (2005), Hairer, Nørsett, and Wanner (1993))\n\\[\\begin{equation*}\n\\sqrt{\\sum_{i=1}^n{\\frac{1}{n}\\frac{e_i^2}{(\\text{RTOL}\\times y_i + \\text{ATOL})^2}}} < 1\n\\end{equation*}\\] by reducing the stepsize when the inequality is not satisfied.\nTo understand the roles of the two tolerances it helps to assume \\(y\\) at opposite scales in the above expression: on one hand the absolute tolerance has little effect when \\(y_i \\gg 1\\), on the other the relative tolerance can not affect the norm when \\(y_i = 0\\). Users are strongly encouraged to carefully choose tolerance values according to the ODE and its application. One can follow Brenan, Campbell, and Petzold (1995) for a rule of thumb: let \\(m\\) be the number of significant digits required for \\(y\\), set \\(\\text{RTOL}=10^{-(m+1)}\\), and set ATOL at which \\(y\\) becomes insignificant. Note that the same weighted root-mean-square norm is used to control nonlinear solver convergence in bdf and adams solvers, and the same tolerances are used to control forward sensitivity calculation. See Serban and Hindmarsh (2005) for details.\n\n\n\nThe maximum number of steps can be used to stop a runaway simulation. This can arise in when MCMC moves to a part of parameter space very far from where a differential equation would typically be solved. In particular this can happen during warmup. With the non-stiff solver, this may happen when the sampler moves to stiff regions of parameter space, which will requires small step sizes.\n\n\n\n\nThe adjoint ODE solver method differs mathematically from the forward ODE solvers in the way gradients of the ODE solution are obtained. The forward ODE approach augments the original ODE system with \\(N\\) additional states for each parameter for which gradients are needed. If there are \\(M\\) parameters for which sensitivities are required, then the augmented ODE system has a total of \\(N \\cdot (M +\n1)\\) states. This can result in very large ODE systems through the multiplicative scaling of the computational effort needed.\nIn contrast, the adjoint ODE solver integrates forward in time a system of \\(N\\) equations to compute the ODE solution and then integrates backwards in time another system of \\(N\\) equations to get the sensitivities. Additionally, for \\(M\\) parameters there are \\(M\\) additional equations to integrate during the backwards solve. Because of this the adjoint sensitivity problem scales better in parameters than the forward sensitivity problem. The adjoint solver in Stan uses CVODES (the same as the bdf and adams forward sensitivity interfaces).\nThe solution computed in the forward integration is required during the backward integration. CVODES uses a checkpointing scheme that saves the forward solver state regularly. The number of steps between saving checkpoints is configurable in the interface. These checkpoints are then interpolated during the backward solve using one of two interpolation schemes.\nThe solver type (either bdf or adams) can be individually set for both the forward and backward solves.\nThe tolerances for each phase of the solve must be specified in the interface. Note that the absolute tolerance for the forward and backward ODE integration phase need to be set for each ODE state separately. The harmonic oscillator example call from above becomes:\narray[T] vector[2] y_sim\n = ode_adjoint_tol_ctl(sho, y0, t0, ts,\n relative_tolerance/9.0, // forward tolerance\n rep_vector(absolute_tolerance/9.0, 2), // forward tolerance\n relative_tolerance/3.0, // backward tolerance\n rep_vector(absolute_tolerance/3.0, 2), // backward tolerance\n relative_tolerance, // quadrature tolerance\n absolute_tolerance, // quadrature tolerance\n max_num_steps,\n 150, // number of steps between checkpoints\n 1, // interpolation polynomial: 1=Hermite, 2=polynomial\n 2, // solver for forward phase: 1=Adams, 2=BDF\n 2, // solver for backward phase: 1=Adams, 2=BDF\n theta);\nFor a detailed information on each argument please see the Stan function reference manual.\n\n\n\nLinear systems of ODEs can be solved using a matrix exponential. This can be considerably faster than using one of the ODE solvers.\nThe solution to \\(\\frac{d}{dt} y = ay\\) is \\(y = y_0e^{at}\\), where the constant \\(y_0\\) is determined by boundary conditions. We can extend this solution to the vector case: \\[\n\\frac{d}{dt}y = A \\, y\n\\] where \\(y\\) is now a vector of length \\(n\\) and \\(A\\) is an \\(n\\) by \\(n\\) matrix. The solution is then given by: \\[\ny = e^{tA} \\, y_0\n\\] where the matrix exponential is formally defined by the convergent power series: \\[\ne^{tA} = \\sum_{n=0}^{\\infty} \\dfrac{tA^n}{n!} = I + tA + \\frac{t^2A^2}{2!} + \\dotsb\n\\] \nWe can apply this technique to the simple harmonic oscillator example, by setting \\[\ny = \\begin{bmatrix} y_1 \\\\ y_2 \\end{bmatrix} \\qquad\nA = \\begin{bmatrix} 0 & 1 \\\\ -1 & -\\theta \\end{bmatrix}\n\\] \nThe Stan model to simulate noisy observations using a matrix exponential function is given below.\nIn general, computing a matrix exponential will be more efficient than using a numerical solver. We can however only apply this technique to systems of linear ODEs.\ndata {\n int<lower=1> T;\n vector[2] y0;\n array[T] real ts;\n array[1] real theta;\n}\nmodel {\n}\ngenerated quantities {\n array[T] vector[2] y_sim;\n matrix[2, 2] A = [[ 0, 1],\n [-1, -theta[1]]]\n for (t in 1:T) {\n y_sim[t] = matrix_exp((t - 1) * A) * y0;\n }\n // add measurement error\n for (t in 1:T) {\n y_sim[t, 1] += normal_rng(0, 0.1);\n y_sim[t, 2] += normal_rng(0, 0.1);\n }\n}\nThis Stan program simulates noisy measurements from a simple harmonic oscillator. The system of linear differential equations is coded as a matrix. The system parameters theta and initial state y0 are read in as data along observation times ts. The generated quantities block is used to solve the ODE for the specified times and then add random measurement error, producing observations y_sim. Because the ODEs are linear, we can use the matrix_exp function to solve the system.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#notation", + "href": "stan-users-guide/odes.html#notation", + "title": "Ordinary Differential Equations", + "section": "", + "text": "An ODE is defined by a set of differential equations, \\(y(t, \\theta)' = f(t, y, \\theta)\\), and initial conditions, \\(y(t_0, \\theta) = y_0\\). The function \\(f(t, y, \\theta)\\) is called the system function. The \\(\\theta\\) dependence is included in the notation for \\(y(t, \\theta)\\) and \\(f(t, y, \\theta)\\) as a reminder that the solution is a function of any parameters used in the computation.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#example-simple-harmonic-oscillator", + "href": "stan-users-guide/odes.html#example-simple-harmonic-oscillator", + "title": "Ordinary Differential Equations", + "section": "", + "text": "As an example of a system of ODEs, consider a harmonic oscillator. In a harmonic oscillator a particle disturbed from equilibrium is pulled back towards its equilibrium position by a force proportional to its displacement from equilibrium. The system here additionally has a friction force proportional to particle speed which points in the opposite direction of the particle velocity. The system state will be a pair \\(y = (y_1, y_2)\\) representing position and speed. The change in the system with respect to time is given by the following differential equations.1\n\\[\\begin{align*}\n&\\frac{d}{dt} y_1 = y_2 \\\\\n&\\frac{d}{dt} y_2 = -y_1 - \\theta y_2\n\\end{align*}\\]\nThe state equations implicitly defines the state at future times as a function of an initial state and the system parameters.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#coding-the-ode-system-function", + "href": "stan-users-guide/odes.html#coding-the-ode-system-function", + "title": "Ordinary Differential Equations", + "section": "", + "text": "The first step in coding an ODE system in Stan is defining the ODE system function. The system functions require a specific signature so that the solvers know how to use them properly.\nThe first argument to the system function is time, passed as a real; the second argument to the system function is the system state, passed as a vector, and the return value from the system function are the current time derivatives of the state defined as a vector. Additional arguments can be included in the system function to pass other information into the solve (these will be passed through the function that starts the ODE integration). These argument can be parameters (in this case, the friction coefficient), data, or any quantities that are needed to define the differential equation.\nThe simple harmonic oscillator can be coded using the following function in Stan (see the user-defined functions chapter for more information on coding user-defined functions).\nvector sho(real t, // time\n vector y, // state\n real theta) { // friction parameter\n vector[2] dydt;\n dydt[1] = y[2];\n dydt[2] = -y[1] - theta * y[2];\n return dydt;\n}\nThe function takes in a time t (a real), the system state y (a vector), and the parameter theta (a real). The function returns a vector of time derivatives of the system state at time t, state y, and parameter theta. The simple harmonic oscillator coded here does not have time-sensitive equations; that is, t does not show up in the definition of dydt, however it is still required.\n\n\nThe types in the ODE system function are strict. The first argument is the time passed as a real, the second argument is the state passed as a vector, and the return type is a vector. A model that does not have this signature will fail to compile. The third argument onwards can be any type, granted all the argument types match the types of the respective arguments in the solver call.\nAll of these are possible ODE signatures:\nvector myode1(real t, vector y, real a0);\nvector myode2(real t, vector y, array[] int a0, vector a1);\nvector myode3(real t, vector y, matrix a0, array[] real a1, row_vector a2);\nbut these are not allowed:\nvector myode1(real t, array[] real y, real a0);\n// Second argument is not a vector\narray[] real myode2(real t, vector y, real a0);\n// Return type is not a vector\nvector myode3(vector y, real a0);\n// First argument is not a real and second is not a vector", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#measurement-error-models", + "href": "stan-users-guide/odes.html#measurement-error-models", + "title": "Ordinary Differential Equations", + "section": "", + "text": "Noisy observations of the ODE state can be used to estimate the parameters and/or the initial state of the system.\n\n\nAs an example, suppose the simple harmonic oscillator has a parameter value of \\(\\theta = 0.15\\) and an initial state \\(y(t = 0, \\theta = 0.15) = (1, 0)\\). Assume the system is measured at 10 time points, \\(t = 1, 2, \\cdots, 10\\), where each measurement of \\(y(t, \\theta)\\) has independent \\(\\textsf{normal}(0, 0.1)\\) error in both dimensions (\\(y_1(t, \\theta)\\) and \\(y_2(t, \\theta)\\)).\nThe following model can be used to generate data like this:\nfunctions {\n vector sho(real t,\n vector y,\n real theta) {\n vector[2] dydt;\n dydt[1] = y[2];\n dydt[2] = -y[1] - theta * y[2];\n return dydt;\n }\n}\ndata {\n int<lower=1> T;\n vector[2] y0;\n real t0;\n array[T] real ts;\n real theta;\n}\nmodel {\n}\ngenerated quantities {\n array[T] vector[2] y_sim = ode_rk45(sho, y0, t0, ts, theta);\n // add measurement error\n for (t in 1:T) {\n y_sim[t, 1] += normal_rng(0, 0.1);\n y_sim[t, 2] += normal_rng(0, 0.1);\n }\n}\nThe system parameters theta and initial state y0 are read in as data along with the initial time t0 and observation times ts. The ODE is solved for the specified times, and then random measurement errors are added to produce simulated observations y_sim. Because the system is not stiff, the ode_rk45 solver is used.\nThis program illustrates the way in which the ODE solver is called in a Stan program,\narray[T] vector[2] y_sim = ode_rk45(sho, y0, t0, ts, theta);\nthis returns the solution of the ODE initial value problem defined by system function sho, initial state y0, initial time t0, and parameter theta at the times ts. The call explicitly specifies the non-stiff RK45 solver.\nThe parameter theta is passed unmodified to the ODE system function. If there were additional arguments that must be passed, they could be appended to the end of the ode call here. For instance, if the system function took two parameters, \\(\\theta\\) and \\(\\beta\\), the system function definition would look like:\nvector sho(real t, vector y, real theta, real beta) { ... }\nand the appropriate ODE solver call would be:\node_rk45(sho, y0, t0, ts, theta, beta);\nAny number of additional arguments can be added. They can be any Stan type (as long as the types match between the ODE system function and the solver call).\nBecause all none of the input arguments are a function of parameters, the ODE solver is called in the generated quantities block. The random measurement noise is added to each of the T outputs with normal_rng.\n\n\n\nTypical realization of harmonic oscillator trajectory.\n\n\n\n\n\nThese ten noisy observations of the state can be used to estimate the friction parameter, \\(\\theta\\), the initial conditions, \\(y(t_0, \\theta)\\), and the scale of the noise in the problem. The full Stan model is:\nfunctions {\n vector sho(real t,\n vector y,\n real theta) {\n vector[2] dydt;\n dydt[1] = y[2];\n dydt[2] = -y[1] - theta * y[2];\n return dydt;\n }\n}\ndata {\n int<lower=1> T;\n array[T] vector[2] y;\n real t0;\n array[T] real ts;\n}\nparameters {\n vector[2] y0;\n vector<lower=0>[2] sigma;\n real theta;\n}\nmodel {\n array[T] vector[2] mu = ode_rk45(sho, y0, t0, ts, theta);\n sigma ~ normal(0, 2.5);\n theta ~ std_normal();\n y0 ~ std_normal();\n for (t in 1:T) {\n y[t] ~ normal(mu[t], sigma);\n }\n}\nBecause the solves are now a function of model parameters, the ode_rk45 call is now made in the model block. There are half-normal priors on the measurement error scales sigma, and standard normal priors on theta and the initial state vector y0. The solutions to the ODE are assigned to mu, which is used as the location for the normal observation model.\nAs with other regression models, it’s easy to change the noise model to something with heavier tails (e.g., Student-t distributed), correlation in the state variables (e.g., with a multivariate normal distribution), or both heavy tails and correlation in the state variables (e.g., with a multivariate Student-t distribution).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#stiff-ode.section", + "href": "stan-users-guide/odes.html#stiff-ode.section", + "title": "Ordinary Differential Equations", + "section": "", + "text": "Stiffness is a numerical phenomena that causes some differential equation solvers difficulty, notably the Runge-Kutta RK45 solver used in the examples earlier. The phenomena is common in chemical reaction systems, which are often characterized by having multiple vastly different time-scales. The stiffness of a system can also vary between different parts of parameter space, and so a typically non-stiff system may exhibit stiffness occasionally. These sorts of difficulties can occur more frequently with loose priors or during warmup.\nStan provides a specialized solver for stiff ODEs (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). An ODE system is specified exactly the same way with a function of exactly the same signature. The only difference is in the call to the solver the rk45 suffix is replaced with bdf, as in\node_bdf(sho, y0, t0, ts, theta);\nUsing the stiff (bdf) solver on a system that is not stiff may be much slower than using the non-stiff (rk45) solver because each step of the stiff solver takes more time to compute. On the other hand, attempting to use the non-stiff solver for a stiff system will cause the timestep to become very small, leading the non-stiff solver taking more time overall even if each step is easier to compute than for the stiff solver.\nIf it is not known for sure that an ODE system is stiff, run the model with both the rk45 and bdf solvers and see which is faster. If the rk45 solver is faster, then the problem is probably non-stiff, and then it makes sense to try the adams solver as well. The adams solver uses higher order methods which can take larger timesteps than the rk45 solver, though similar to the bdf solver each of these steps is more expensive to compute.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#control-ode.section", + "href": "stan-users-guide/odes.html#control-ode.section", + "title": "Ordinary Differential Equations", + "section": "", + "text": "For additional control of the solves, both the stiff and non-stiff forward ODE solvers have function signatures that makes it possible to specify the relative_tolerance, absolute_tolerance, and max_num_steps parameters. These are the same as the regular function names but with _tol appended to the end. All three control arguments must be supplied with this signature (there are no defaults).\narray[T] vector[2] y_sim = ode_bdf_tol(sho, y0, t0, ts,\n relative_tolerance,\n absolute_tolerance,\n max_num_steps,\n theta);\nrelative_tolerance and absolute_tolerance control accuracy the solver tries to achieve, and max_num_steps specifies the maximum number of steps the solver will take between output time points before throwing an error.\nThe control parameters must be data variables – they cannot be parameters or expressions that depend on parameters, including local variables in any block other than transformed data and generated quantities. User-defined function arguments may be qualified as only allowing data arguments using the data qualifier.\nFor the RK45 and Cash-Karp solvers, the default values for relative and absolute tolerance are both \\(10^{-6}\\) and the maximum number of steps between outputs is one million. For the BDF and Adams solvers, the relative and absolute tolerances are \\(10^{-10}\\) and the maximum number of steps between outputs is one hundred million.\n\n\nIf there are discontinuities in the ODE system function, it is best to integrate the ODE between the discontinuities, stopping the solver at each one, and restarting it on the other side.\nNonetheless, the ODE solvers will attempt to integrate over discontinuities they encounters in the state function. The accuracy of the solution near the discontinuity may be problematic (requiring many small steps). An example of such a discontinuity is a lag in a pharmacokinetic model, where a concentration is zero for times \\(0 < t < t'\\) and then positive for \\(t \\geq t'\\). In this example example, we would use code in the system such as\nif (t < t_lag) {\n return [0, 0]';\n} else {\n // ... return non-zero vector...\n}\nIn general it is better to integrate up to t_lag in one solve and then integrate from t_lag onwards in another. Mathematically, the discontinuity can make the problem ill-defined and the numerical integrator may behave erratically around it.\nIf the location of the discontinuity cannot be controlled precisely, or there is some other rapidly change in ODE behavior, it can be useful to tell the ODE solver to produce output in the neighborhood. This can help the ODE solver avoid indiscriminately stepping over an important feature of the solution.\n\n\n\nThe relative tolerance RTOL and absolute tolerance ATOL control the accuracy of the numerical solution. Specifically, when solving an ODE with unknowns \\(y=(y_1,\\dots,y_n)^T\\), at every step the solver controls estimated local error \\(e=(e_1,\\dots,e_n)^T\\) through its weighted root-mean-square norm (Serban and Hindmarsh (2005), Hairer, Nørsett, and Wanner (1993))\n\\[\\begin{equation*}\n\\sqrt{\\sum_{i=1}^n{\\frac{1}{n}\\frac{e_i^2}{(\\text{RTOL}\\times y_i + \\text{ATOL})^2}}} < 1\n\\end{equation*}\\] by reducing the stepsize when the inequality is not satisfied.\nTo understand the roles of the two tolerances it helps to assume \\(y\\) at opposite scales in the above expression: on one hand the absolute tolerance has little effect when \\(y_i \\gg 1\\), on the other the relative tolerance can not affect the norm when \\(y_i = 0\\). Users are strongly encouraged to carefully choose tolerance values according to the ODE and its application. One can follow Brenan, Campbell, and Petzold (1995) for a rule of thumb: let \\(m\\) be the number of significant digits required for \\(y\\), set \\(\\text{RTOL}=10^{-(m+1)}\\), and set ATOL at which \\(y\\) becomes insignificant. Note that the same weighted root-mean-square norm is used to control nonlinear solver convergence in bdf and adams solvers, and the same tolerances are used to control forward sensitivity calculation. See Serban and Hindmarsh (2005) for details.\n\n\n\nThe maximum number of steps can be used to stop a runaway simulation. This can arise in when MCMC moves to a part of parameter space very far from where a differential equation would typically be solved. In particular this can happen during warmup. With the non-stiff solver, this may happen when the sampler moves to stiff regions of parameter space, which will requires small step sizes.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#adjoint-ode.section", + "href": "stan-users-guide/odes.html#adjoint-ode.section", + "title": "Ordinary Differential Equations", + "section": "", + "text": "The adjoint ODE solver method differs mathematically from the forward ODE solvers in the way gradients of the ODE solution are obtained. The forward ODE approach augments the original ODE system with \\(N\\) additional states for each parameter for which gradients are needed. If there are \\(M\\) parameters for which sensitivities are required, then the augmented ODE system has a total of \\(N \\cdot (M +\n1)\\) states. This can result in very large ODE systems through the multiplicative scaling of the computational effort needed.\nIn contrast, the adjoint ODE solver integrates forward in time a system of \\(N\\) equations to compute the ODE solution and then integrates backwards in time another system of \\(N\\) equations to get the sensitivities. Additionally, for \\(M\\) parameters there are \\(M\\) additional equations to integrate during the backwards solve. Because of this the adjoint sensitivity problem scales better in parameters than the forward sensitivity problem. The adjoint solver in Stan uses CVODES (the same as the bdf and adams forward sensitivity interfaces).\nThe solution computed in the forward integration is required during the backward integration. CVODES uses a checkpointing scheme that saves the forward solver state regularly. The number of steps between saving checkpoints is configurable in the interface. These checkpoints are then interpolated during the backward solve using one of two interpolation schemes.\nThe solver type (either bdf or adams) can be individually set for both the forward and backward solves.\nThe tolerances for each phase of the solve must be specified in the interface. Note that the absolute tolerance for the forward and backward ODE integration phase need to be set for each ODE state separately. The harmonic oscillator example call from above becomes:\narray[T] vector[2] y_sim\n = ode_adjoint_tol_ctl(sho, y0, t0, ts,\n relative_tolerance/9.0, // forward tolerance\n rep_vector(absolute_tolerance/9.0, 2), // forward tolerance\n relative_tolerance/3.0, // backward tolerance\n rep_vector(absolute_tolerance/3.0, 2), // backward tolerance\n relative_tolerance, // quadrature tolerance\n absolute_tolerance, // quadrature tolerance\n max_num_steps,\n 150, // number of steps between checkpoints\n 1, // interpolation polynomial: 1=Hermite, 2=polynomial\n 2, // solver for forward phase: 1=Adams, 2=BDF\n 2, // solver for backward phase: 1=Adams, 2=BDF\n theta);\nFor a detailed information on each argument please see the Stan function reference manual.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#solving-a-system-of-linear-odes-using-a-matrix-exponential", + "href": "stan-users-guide/odes.html#solving-a-system-of-linear-odes-using-a-matrix-exponential", + "title": "Ordinary Differential Equations", + "section": "", + "text": "Linear systems of ODEs can be solved using a matrix exponential. This can be considerably faster than using one of the ODE solvers.\nThe solution to \\(\\frac{d}{dt} y = ay\\) is \\(y = y_0e^{at}\\), where the constant \\(y_0\\) is determined by boundary conditions. We can extend this solution to the vector case: \\[\n\\frac{d}{dt}y = A \\, y\n\\] where \\(y\\) is now a vector of length \\(n\\) and \\(A\\) is an \\(n\\) by \\(n\\) matrix. The solution is then given by: \\[\ny = e^{tA} \\, y_0\n\\] where the matrix exponential is formally defined by the convergent power series: \\[\ne^{tA} = \\sum_{n=0}^{\\infty} \\dfrac{tA^n}{n!} = I + tA + \\frac{t^2A^2}{2!} + \\dotsb\n\\] \nWe can apply this technique to the simple harmonic oscillator example, by setting \\[\ny = \\begin{bmatrix} y_1 \\\\ y_2 \\end{bmatrix} \\qquad\nA = \\begin{bmatrix} 0 & 1 \\\\ -1 & -\\theta \\end{bmatrix}\n\\] \nThe Stan model to simulate noisy observations using a matrix exponential function is given below.\nIn general, computing a matrix exponential will be more efficient than using a numerical solver. We can however only apply this technique to systems of linear ODEs.\ndata {\n int<lower=1> T;\n vector[2] y0;\n array[T] real ts;\n array[1] real theta;\n}\nmodel {\n}\ngenerated quantities {\n array[T] vector[2] y_sim;\n matrix[2, 2] A = [[ 0, 1],\n [-1, -theta[1]]]\n for (t in 1:T) {\n y_sim[t] = matrix_exp((t - 1) * A) * y0;\n }\n // add measurement error\n for (t in 1:T) {\n y_sim[t, 1] += normal_rng(0, 0.1);\n y_sim[t, 2] += normal_rng(0, 0.1);\n }\n}\nThis Stan program simulates noisy measurements from a simple harmonic oscillator. The system of linear differential equations is coded as a matrix. The system parameters theta and initial state y0 are read in as data along observation times ts. The generated quantities block is used to solve the ODE for the specified times and then add random measurement error, producing observations y_sim. Because the ODEs are linear, we can use the matrix_exp function to solve the system.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/odes.html#footnotes", + "href": "stan-users-guide/odes.html#footnotes", + "title": "Ordinary Differential Equations", + "section": "Footnotes", + "text": "Footnotes\n\n\nThis example is drawn from the documentation for the Boost Numeric Odeint library (Ahnert and Mulansky 2011), which Stan uses to implement the rk45 and ckrk solver.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Ordinary Differential Equations" + ] + }, + { + "objectID": "stan-users-guide/parallelization.html", + "href": "stan-users-guide/parallelization.html", + "title": "Parallelization", + "section": "", + "text": "Stan has support for different types of parallelization: multi-threading with Intel Threading Building Blocks (TBB), multi-processing with Message Passing Interface (MPI) and manycore processing with OpenCL.\nMulti-threading in Stan can be used with two mechanisms: reduce with summation and rectangular map. The latter can also be used with multi-processing.\nThe advantages of reduce with summation are:\n\nMore flexible argument interface, avoiding the packing and unpacking that is necessary with rectangular map.\nPartitions data for parallelization automatically (this is done manually in rectangular map).\nIs easier to use.\n\nThe advantages of rectangular map are:\n\nReturns a list of vectors, while the reduce summation returns only a scalar.\nCan be parallelized across multiple cores and multiple computers, while reduce summation can only parallelized across multiple cores on a single machine.\n\nThe actual speedup gained from using these functions will depend on many details. It is strongly recommended to only parallelize the computationally most expensive operations in a Stan program. Oftentimes this is the evaluation of the log likelihood for the observed data. When it is not clear which parts of the model is the most computationally expensive, we recommend using profiling, which is available in Stan 2.26 and newer.\nSince only portions of a Stan program will run in parallel, the maximal speedup one can achieve is capped, a phenomen described by Amdahl’s law.\n\n\nIt is often necessary in probabilistic modeling to compute the sum of a number of independent function evaluations. This occurs, for instance, when evaluating a number of conditionally independent terms in a log-likelihood. If g: U -> real is the function and { x1, x2, ... } is an array of inputs, then that sum looks like:\ng(x1) + g(x2) + ...\nreduce_sum and reduce_sum_static are tools for parallelizing these calculations.\nFor efficiency reasons the reduce function doesn’t work with the element-wise evaluated function g, but instead the partial sum function f: U[] -> real, where f computes the partial sum corresponding to a slice of the sequence x passed in. Due to the associativity of the sum reduction it holds that:\ng(x1) + g(x2) + g(x3) = f({ x1, x2, x3 })\n = f({ x1, x2 }) + f({ x3 })\n = f({ x1 }) + f({ x2, x3 })\n = f({ x1 }) + f({ x2 }) + f({ x3 })\nWith the partial sum function f: U[] -> real reduction of a large number of terms can be evaluated in parallel automatically, since the overall sum can be partitioned into arbitrary smaller partial sums. The exact partitioning into the partial sums is not under the control of the user. However, since the exact numerical result will depend on the order of summation, Stan provides two versions of the reduce summation facility:\n\nreduce_sum: Automatically choose partial sums partitioning based on a dynamic scheduling algorithm.\nreduce_sum_static: Compute the same sum as reduce_sum, but partition the input in the same way for given data set (in reduce_sum this partitioning might change depending on computer load).\n\ngrainsize is the one tuning parameter. For reduce_sum, grainsize is a suggested partial sum size. A grainsize of 1 leaves the partitioning entirely up to the scheduler. This should be the default way of using reduce_sum unless time is spent carefully picking grainsize. For picking a grainsize, see details below.\nFor reduce_sum_static, grainsize specifies the maximal partial sum size. With reduce_sum_static it is more important to choose grainsize carefully since it entirely determines the partitioning of work. See details below.\nFor efficiency and convenience additional shared arguments can be passed to every term in the sum. So for the array { x1, x2, ... } and the shared arguments s1, s2, ... the effective sum (with individual terms) looks like:\ng(x1, s1, s2, ...) + g(x2, s1, s2, ...) + g(x3, s1, s2, ...) + ...\nwhich can be written equivalently with partial sums to look like:\nf({ x1, x2 }, s1, s2, ...) + f({ x3 }, s1, s2, ...)\nwhere the particular slicing of the x array can change.\nGiven this, the signatures are:\nreal reduce_sum(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\nreal reduce_sum_static(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\n\nf - User defined function that computes partial sums\nx - Array to slice, each element corresponds to a term in the summation\ngrainsize - Target for size of slices\ns1, s2, ... - Arguments shared in every term\n\nThe user-defined partial sum functions have the signature:\nreal f(array[] T x_slice, int start, int end, T1 s1, T2 s2, ...)\nand take the arguments:\n\nx_slice - The subset of x (from reduce_sum / reduce_sum_static) for which this partial sum is responsible (x_slice = x[start:end])\nstart - An integer specifying the first term in the partial sum\nend - An integer specifying the last term in the partial sum (inclusive)\ns1, s2, ... - Arguments shared in every term (passed on without modification from the reduce_sum / reduce_sum_static call)\n\nThe user-provided function f is expected to compute the partial sum with the terms start through end of the overall sum. The user function is passed the subset x[start:end] as x_slice. start and end are passed so that f can index any of the tailing sM arguments as necessary. The trailing sM arguments are passed without modification to every call of f.\nA reduce_sum (or reduce_sum_static) call:\nreal sum = reduce_sum(f, x, grainsize, s1, s2, ...);\ncan be replaced by either:\nreal sum = f(x, 1, size(x), s1, s2, ...);\nor the code:\nreal sum = 0.0;\nfor(i in 1:size(x)) {\n sum += f({ x[i] }, i, i, s1, s2, ...);\n}\n\n\nLogistic regression is a useful example to clarify both the syntax and semantics of reduce summation and how it can be used to speed up a typical model. A basic logistic regression can be coded in Stan as:\ndata {\n int N;\n array[N] int y;\n vector[N] x;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n beta ~ std_normal();\n y ~ bernoulli_logit(beta[1] + beta[2] * x);\n}\nIn this model predictions are made about the N outputs y using the covariate x. The intercept and slope of the linear equation are to be estimated. The key point to getting this calculation to use reduce summation, is recognizing that the statement:\ny ~ bernoulli_logit(beta[1] + beta[2] * x);\ncan be rewritten (up to a proportionality constant) as:\nfor(n in 1:N) {\n target += bernoulli_logit_lpmf(y[n] | beta[1] + beta[2] * x[n])\n}\nNow it is clear that the calculation is the sum of a number of conditionally independent Bernoulli log probability statements, which is the condition where reduce summation is useful. To use the reduce summation, a function must be written that can be used to compute arbitrary partial sums of the total sum. Using the interface defined in Reduce-Sum, such a function can be written like:\nfunctions {\n real partial_sum(array[] int y_slice,\n int start, int end,\n vector x,\n vector beta) {\n return bernoulli_logit_lpmf(y_slice | beta[1] + beta[2] * x[start:end]);\n }\n}\nThe likelihood statement in the model can now be written:\ntarget += partial_sum(y, 1, N, x, beta); // Sum terms 1 to N of the likelihood\nIn this example, y was chosen to be sliced over because there is one term in the summation per value of y. Technically x would have worked as well. Use whatever conceptually makes the most sense for a given model, e.g. slice over independent terms like conditionally independent observations or groups of observations as in hierarchical models. Because x is a shared argument, it is subset accordingly with start:end. With this function, reduce summation can be used to automatically parallelize the likelihood:\nint grainsize = 1;\ntarget += reduce_sum(partial_sum, y,\n grainsize,\n x, beta);\nThe reduce summation facility automatically breaks the sum into pieces and computes them in parallel. grainsize = 1 specifies that the grainsize should be estimated automatically. The final model is:\nfunctions {\n real partial_sum(array[] int y_slice,\n int start, int end,\n vector x,\n vector beta) {\n return bernoulli_logit_lpmf(y_slice | beta[1] + beta[2] * x[start:end]);\n }\n}\ndata {\n int N;\n array[N] int y;\n vector[N] x;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n int grainsize = 1;\n beta ~ std_normal();\n target += reduce_sum(partial_sum, y,\n grainsize,\n x, beta);\n}\n\n\n\nThe rational for choosing a sensible grainsize is based on balancing the overhead implied by creating many small tasks versus creating fewer large tasks which limits the potential parallelism.\nIn reduce_sum, grainsize is a recommendation on how to partition the work in the partial sum into smaller pieces. A grainsize of 1 leaves this entirely up to the internal scheduler and should be chosen if no benchmarking of other grainsizes is done. Ideally this will be efficient, but there are no guarantees.\nIn reduce_sum_static, grainsize is an upper limit on the worksize. Work will be split until all partial sums are just smaller than grainsize (and the split will happen the same way every time for the same inputs). For the static version it is more important to select a sensible grainsize.\nIn order to figure out an optimal grainsize, if there are N terms and M cores, run a quick test model with grainsize set roughly to N / M. Record the time, cut the grainsize in half, and run the test again. Repeat this iteratively until the model runtime begins to increase. This is a suitable grainsize for the model, because this ensures the calculations can be carried out with the most parallelism without losing too much efficiency.\nFor instance, in a model with N=10000 and M = 4, start with grainsize = 2500, and sequentially try grainsize = 1250, grainsize = 625, etc.\nIt is important to repeat this process until performance gets worse. It is possible after many halvings nothing happens, but there might still be a smaller grainsize that performs better. Even if a sum has many tens of thousands of terms, depending on the internal calculations, a grainsize of thirty or forty or smaller might be the best, and it is difficult to predict this behavior. Without doing these halvings until performance actually gets worse, it is easy to miss this.\n\n\n\n\nMap-reduce allows large calculations (e.g., log likelihoods) to be broken into components which may be calculated modularly (e.g., data blocks) and combined (e.g., by summation and incrementing the target log density).\nA map function is a higher-order function that applies an argument function to every member of some collection, returning a collection of the results. For example, mapping the square function, \\(f(x) = x^2\\), over the vector \\([3, 5, 10]\\) produces the vector \\([9, 25, 100]\\). In other words, map applies the square function elementwise.\nThe output of mapping a sequence is often fed into a reduction. A reduction function takes an arbitrarily long sequence of inputs and returns a single output. Examples of reduction functions are summation (with the return being a single value) or sorting (with the return being a sorted sequence). The combination of mapping and reducing is so common it has its own name, map-reduce.\n\n\nIn order to generalize the form of functions and results that are possible and accommodate both parameters (which need derivatives) and data values (which don’t), Stan’s map function operates on more than just a sequence of inputs.\n\n\n\nStan’s map function has the following signature\nvector map_rect((vector, vector, array[] real, array[] int):vector f,\n vector phi, array[] vector thetas,\n data array[,] real x_rs, data array[,] int x_is);\nThe arrays thetas of parameters, x_rs of real data, and x_is of integer data have the suffix “s” to indicate they are arrays. These arrays must all be the same size, as they will be mapped in parallel by the function f. The value of phi is reused in each mapped operation.\nThe _rect suffix in the name arises because the data structures it takes as arguments are rectangular. In order to deal with ragged inputs, ragged inputs must be padded out to rectangular form.\nThe last two arguments are two dimensional arrays of real and integer data values. These argument types are marked with the data qualifier to indicate that they must only contain variables originating in the data or transformed data blocks. This will allow such data to be pinned to a processor on which it is being processed to reduce communication overhead.\nThe notation (vector, vector, array[] real, array[] int):vector indicates that the function argument f must have the following signature.\nvector f(vector phi, vector theta,\n data array[] real x_r, data array[] int x_i);\nAlthough f will often return a vector of size one, the built-in flexibility allows general multivariate functions to be mapped, even raggedly.\n\n\nStan’s map function applies the function f to the shared parameters along with one element each of the job parameters, real data, and integer data arrays. Each of the arguments theta, x_r, and x_i must be arrays of the same size. If the arrays are all size N, the result is defined as follows.\nmap_rect(f, phi, thetas, xs, ns)\n= f(phi, thetas[1], xs[1], ns[1]) . f(phi, thetas[2], xs[2], ns[2])\n . ... . f(phi, thetas[N], xs[N], ns[N])\nThe dot operators in the notation above are meant to indicate concatenation (implemented as append_row in Stan). The output of each application of f is a vector, and the sequence of N vectors is concatenated together to return a single vector.\n\n\n\n\nAn example should help to clarify both the syntax and semantics of the mapping operation and how it may be combined with reductions built into Stan to provide a map-reduce implementation.\n\n\nConsider the following simple logistic regression model, which is coded unconventionally to accommodate direct translation to a mapped implementation.\ndata {\n array[12] int y;\n array[12] real x;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n beta ~ std_normal();\n y ~ bernoulli_logit(beta[1] + beta[2] * to_vector(x));\n}\nThe program is unusual in that it (a) hardcodes the data size, which is not required by the map function but is just used here for simplicity, (b) represents the predictors as a real array even though it needs to be used as a vector, and (c) represents the regression coefficients (intercept and slope) as a vector even though they’re used individually. The bernoulli_logit distribution is used because the argument is on the logit scale—it implicitly applies the inverse logit function to map the argument to a probability.\n\n\n\nThe unmapped logistic regression model described in the previous subsection may be implemented using Stan’s rectangular mapping functionality as follows.\nfunctions {\n vector lr(vector beta, vector theta, array[] real x, array[] int y) {\n real lp = bernoulli_logit_lpmf(y | beta[1]\n + to_vector(x) * beta[2]);\n return [lp]';\n }\n}\ndata {\n array[12] int y;\n array[12] real x;\n}\ntransformed data {\n // K = 3 shards\n array[3, 4] int ys = { y[1:4], y[5:8], y[9:12] };\n array[3, 4] real xs = { x[1:4], x[5:8], x[9:12] };\n array[3] vector[0] theta;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n beta ~ std_normal();\n target += sum(map_rect(lr, beta, theta, xs, ys));\n}\nThe first piece of the code is the actual function to compute the logistic regression. The argument beta will contain the regression coefficients (intercept and slope), as before. The second argument theta of job-specific parameters is not used, but nevertheless must be present. The modeled data y is passed as an array of integers and the predictors x as an array of real values. The function body then computes the log probability mass of y and assigns it to the local variable lp. This variable is then used in [lp]' to construct a row vector and then transpose it to a vector to return.\nThe data are taken in as before. There is an additional transformed data block that breaks the data up into three shards.1\nThe value 3 is also hard coded; a more practical program would allow the number of shards to be controlled. There are three parallel arrays defined here, each of size three, corresponding to the number of shards. The array ys contains the modeled data variables; each element of the array ys is an array of size four. The second array xs is for the predictors, and each element of it is also of size four. These contained arrays are the same size because the predictors x stand in a one-to-one relationship with the modeled data y. The final array theta is also of size three; its elements are empty vectors, because there are no shard-specific parameters.\nThe parameters and the prior are as before. The likelihood is now coded using map-reduce. The function lr to compute the log probability mass is mapped over the data xs and ys, which contain the original predictors and outcomes broken into shards. The parameters beta are in the first argument because they are shared across shards. There are no shard-specific parameters, so the array of job-specific parameters theta contains only empty vectors.\n\n\n\n\nConsider a hierarchical model of American presidential voting behavior based on state of residence.2\nEach of the fifty states \\(k \\in \\{1,\\dotsc,50\\}\\) will have its own slope \\(\\beta_k\\) and intercept \\(\\alpha_k\\) to model the log odds of voting for the Republican candidate as a function of income. Suppose there are \\(N\\) voters and with voter \\(n \\in 1{:}N\\) being in state \\(s[n]\\) with income \\(x_n\\). The data model for the vote \\(y_n \\in \\{ 0, 1 \\}\\) is \\[\ny_n \\sim \\textsf{Bernoulli}\n\\Big(\n \\operatorname{logit}^{-1}\\left( \\alpha_{s[n]} + \\beta_{s[n]} \\, x_n \\right)\n\\Big).\n\\]\nThe slopes and intercepts get hierarchical priors, \\[\\begin{align*}\n\\alpha_k &\\sim \\textsf{normal}(\\mu_{\\alpha}, \\sigma_{\\alpha}) \\\\\n\\beta_k &\\sim \\textsf{normal}(\\mu_{\\beta}, \\sigma_{\\beta})\n\\end{align*}\\]\n\n\nThis model can be coded up in Stan directly as follows.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] int<lower=1, upper=K> kk;\n vector[N] x;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n matrix[K, 2] beta;\n vector[2] mu;\n vector<lower=0>[2] sigma;\n}\nmodel {\n mu ~ normal(0, 2);\n sigma ~ normal(0, 2);\n for (i in 1:2) {\n beta[ , i] ~ normal(mu[i], sigma[i]);\n }\n y ~ bernoulli_logit(beta[kk, 1] + beta[kk, 2] .* x);\n}\nFor this model the vector of predictors x is coded as a vector, corresponding to how it is used in the model. The priors for mu and sigma are vectorized. The priors on the two components of beta (intercept and slope, respectively) are stored in a \\(K \\times 2\\) matrix.\nThe distribution statement is also vectorized using multi-indexing with index kk for the states and elementwise multiplication (.*) for the income x. The vectorized distribution statement works out to the same thing as the following less efficient looped form.\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(beta[kk[n], 1] + beta[kk[n], 2] * x[n]);\n}\n\n\n\nThe mapped version of the model will map over the states K. This means the group-level parameters, real data, and integer-data must be arrays of the same size.\nThe mapped implementation requires a function to be mapped. In this function we can’t use distribution statements, but need to accumulate the desired log prior and log likelihood terms to the return value. The following function evaluates both the likelihood for the data observed for a group as well as the prior for the group-specific parameters (the name bernoulli_logit_glm derives from the fact that it’s a generalized linear model with a Bernoulli data model and logistic link function).\nfunctions {\n vector bl_glm(vector mu_sigma, vector beta,\n array[] real x, array[] int y) {\n vector[2] mu = mu_sigma[1:2];\n vector[2] sigma = mu_sigma[3:4];\n real lp = normal_lpdf(beta | mu, sigma);\n real ll = bernoulli_logit_lpmf(y | beta[1] + beta[2] * to_vector(x));\n return [lp + ll]';\n }\n}\nThe shared parameter mu_sigma contains the locations (mu_sigma[1:2]) and scales (mu_sigma[3:4]) of the priors, which are extracted in the first two lines of the program. The variable lp is assigned the log density of the prior on beta. The vector beta is of size two, as are the vectors mu and sigma, so everything lines up for the vectorization. Next, the variable ll is assigned to the log likelihood contribution for the group. Here beta[1] is the intercept of the regression and beta[2] the slope. The predictor array x needs to be converted to a vector allow the multiplication.\nThe data block is identical to that of the previous program, but repeated here for convenience. A transformed data block computes the data structures needed for the mapping by organizing the data into arrays indexed by group.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] int<lower=1, upper=K> kk;\n vector[N] x;\n array[N] int<lower=0, upper=1> y;\n}\ntransformed data {\n int<lower=0> J = N / K;\n array[K, J] real x_r;\n array[K, J] int<lower=0, upper=1> x_i;\n {\n int pos = 1;\n for (k in 1:K) {\n int end = pos + J - 1;\n x_r[k] = to_array_1d(x[pos:end]);\n x_i[k] = to_array_1d(y[pos:end]);\n pos += J;\n }\n }\n}\nThe integer J is set to the number of observations per group.3\nThe real data array x_r holds the predictors and the integer data array x_i holds the outcomes. The grouped data arrays are constructed by slicing the predictor vector x (and converting it to an array) and slicing the outcome array y.\nGiven the transformed data with groupings, the parameters are the same as the previous program. The model has the same priors for the hyperparameters mu and sigma, but moves the prior for beta and the likelihood to the mapped function.\nparameters {\n array[K] vector[2] beta;\n vector[2] mu;\n vector<lower=0>[2] sigma;\n}\nmodel {\n mu ~ normal(0, 2);\n sigma ~ normal(0, 2);\n target += sum(map_rect(bl_glm, append_row(mu, sigma), beta, x_r, x_i));\n \n}\nThe model as written here computes the priors for each group’s parameters along with the likelihood contribution for the group. An alternative mapping would leave the prior in the model block and only map the likelihood computation. In a serial setting this shouldn’t make much of a difference, but with parallelization, there is reduced communication (the prior’s parameters need not be transmitted) and also reduced parallelization with the version that leaves the prior in the model block.\n\n\n\n\nThe previous examples included rectangular data structures and single outputs. Despite the name, this is not technically required by map_rect.\n\n\nIf each group has a different number of observations, then the rectangular data structures for predictors and outcomes will need to be padded out to be rectangular. In addition, the size of the ragged structure will need to be passed as integer data. This holds for shards with varying numbers of parameters as well as varying numbers of data points.\n\n\n\nThe output of each mapped function is concatenated in order of inputs to produce the output of map_rect. When every shard returns a singleton (size one) array, the result is the same size as the number of shards and is easy to deal with downstream. If functions return longer arrays, they can still be structured using the to_matrix function if they are rectangular.\nIf the outputs are of varying sizes, then there will have to be some way to convert it back to a usable form based on the input, because there is no way to directly return sizes or a ragged structure.\n\n\n\n\n\nOpenCL (Open Computing Language) is a framework that enables writing programs that execute across heterogeneous platforms. An OpenCL program can be run on CPUs and GPUs. In order to run OpenCL programs, an OpenCL runtime be installed on the target system.\nStan’s OpenCL backend is currently supported in CmdStan and its wrappers. In order to use it, the model must be compiled with the STAN_OPENCL makefile flag. Setting this flag means that the Stan-to-C++ translator (stanc3) will be supplied the --use-opencl flag and that the OpenCL enabled backend (Stan Math functions) will be enabled.\nIn Stan, the following distributions can be automatically run in parallel on both CPUs and GPUs with OpenCL:\n\nbernoulli_lpmf\nbernoulli_logit_lpmf\nbernoulli_logit_glm_lpmf*\nbeta_lpdf\nbeta_proportion_lpdf\nbinomial_lpmf\ncategorical_logit_glm_lpmf*\ncauchy_lpdf\nchi_square_lpdf\ndouble_exponential_lpdf\nexp_mod_normal_lpdf\nexponential_lpdf\nfrechet_lpdf\ngamma_lpdf\ngumbel_lpdf\ninv_chi_square_lpdf\ninv_gamma_lpdf\nlogistic_lpdf\nlognormal_lpdf\nneg_binomial_lpmf\nneg_binomial_2_lpmf\nneg_binomial_2_log_lpmf\nneg_binomial_2_log_glm_lpmf*\nnormal_lpdf\nnormal_id_glm_lpdf*\nordered_logistic_glm_lpmf*\npareto_lpdf\npareto_type_2_lpdf\npoisson_lpmf\npoisson_log_lpmf\npoisson_log_glm_lpmf*\nrayleigh_lpdf\nscaled_inv_chi_square_lpdf\nskew_normal_lpdf\nstd_normal_lpdf\nstudent_t_lpdf\nuniform_lpdf\nweibull_lpdf\n\n* OpenCL is not used when the covariate argument to the GLM functions is a row_vector.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Parallelization" + ] + }, + { + "objectID": "stan-users-guide/parallelization.html#reduce-sum", + "href": "stan-users-guide/parallelization.html#reduce-sum", + "title": "Parallelization", + "section": "", + "text": "It is often necessary in probabilistic modeling to compute the sum of a number of independent function evaluations. This occurs, for instance, when evaluating a number of conditionally independent terms in a log-likelihood. If g: U -> real is the function and { x1, x2, ... } is an array of inputs, then that sum looks like:\ng(x1) + g(x2) + ...\nreduce_sum and reduce_sum_static are tools for parallelizing these calculations.\nFor efficiency reasons the reduce function doesn’t work with the element-wise evaluated function g, but instead the partial sum function f: U[] -> real, where f computes the partial sum corresponding to a slice of the sequence x passed in. Due to the associativity of the sum reduction it holds that:\ng(x1) + g(x2) + g(x3) = f({ x1, x2, x3 })\n = f({ x1, x2 }) + f({ x3 })\n = f({ x1 }) + f({ x2, x3 })\n = f({ x1 }) + f({ x2 }) + f({ x3 })\nWith the partial sum function f: U[] -> real reduction of a large number of terms can be evaluated in parallel automatically, since the overall sum can be partitioned into arbitrary smaller partial sums. The exact partitioning into the partial sums is not under the control of the user. However, since the exact numerical result will depend on the order of summation, Stan provides two versions of the reduce summation facility:\n\nreduce_sum: Automatically choose partial sums partitioning based on a dynamic scheduling algorithm.\nreduce_sum_static: Compute the same sum as reduce_sum, but partition the input in the same way for given data set (in reduce_sum this partitioning might change depending on computer load).\n\ngrainsize is the one tuning parameter. For reduce_sum, grainsize is a suggested partial sum size. A grainsize of 1 leaves the partitioning entirely up to the scheduler. This should be the default way of using reduce_sum unless time is spent carefully picking grainsize. For picking a grainsize, see details below.\nFor reduce_sum_static, grainsize specifies the maximal partial sum size. With reduce_sum_static it is more important to choose grainsize carefully since it entirely determines the partitioning of work. See details below.\nFor efficiency and convenience additional shared arguments can be passed to every term in the sum. So for the array { x1, x2, ... } and the shared arguments s1, s2, ... the effective sum (with individual terms) looks like:\ng(x1, s1, s2, ...) + g(x2, s1, s2, ...) + g(x3, s1, s2, ...) + ...\nwhich can be written equivalently with partial sums to look like:\nf({ x1, x2 }, s1, s2, ...) + f({ x3 }, s1, s2, ...)\nwhere the particular slicing of the x array can change.\nGiven this, the signatures are:\nreal reduce_sum(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\nreal reduce_sum_static(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)\n\nf - User defined function that computes partial sums\nx - Array to slice, each element corresponds to a term in the summation\ngrainsize - Target for size of slices\ns1, s2, ... - Arguments shared in every term\n\nThe user-defined partial sum functions have the signature:\nreal f(array[] T x_slice, int start, int end, T1 s1, T2 s2, ...)\nand take the arguments:\n\nx_slice - The subset of x (from reduce_sum / reduce_sum_static) for which this partial sum is responsible (x_slice = x[start:end])\nstart - An integer specifying the first term in the partial sum\nend - An integer specifying the last term in the partial sum (inclusive)\ns1, s2, ... - Arguments shared in every term (passed on without modification from the reduce_sum / reduce_sum_static call)\n\nThe user-provided function f is expected to compute the partial sum with the terms start through end of the overall sum. The user function is passed the subset x[start:end] as x_slice. start and end are passed so that f can index any of the tailing sM arguments as necessary. The trailing sM arguments are passed without modification to every call of f.\nA reduce_sum (or reduce_sum_static) call:\nreal sum = reduce_sum(f, x, grainsize, s1, s2, ...);\ncan be replaced by either:\nreal sum = f(x, 1, size(x), s1, s2, ...);\nor the code:\nreal sum = 0.0;\nfor(i in 1:size(x)) {\n sum += f({ x[i] }, i, i, s1, s2, ...);\n}\n\n\nLogistic regression is a useful example to clarify both the syntax and semantics of reduce summation and how it can be used to speed up a typical model. A basic logistic regression can be coded in Stan as:\ndata {\n int N;\n array[N] int y;\n vector[N] x;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n beta ~ std_normal();\n y ~ bernoulli_logit(beta[1] + beta[2] * x);\n}\nIn this model predictions are made about the N outputs y using the covariate x. The intercept and slope of the linear equation are to be estimated. The key point to getting this calculation to use reduce summation, is recognizing that the statement:\ny ~ bernoulli_logit(beta[1] + beta[2] * x);\ncan be rewritten (up to a proportionality constant) as:\nfor(n in 1:N) {\n target += bernoulli_logit_lpmf(y[n] | beta[1] + beta[2] * x[n])\n}\nNow it is clear that the calculation is the sum of a number of conditionally independent Bernoulli log probability statements, which is the condition where reduce summation is useful. To use the reduce summation, a function must be written that can be used to compute arbitrary partial sums of the total sum. Using the interface defined in Reduce-Sum, such a function can be written like:\nfunctions {\n real partial_sum(array[] int y_slice,\n int start, int end,\n vector x,\n vector beta) {\n return bernoulli_logit_lpmf(y_slice | beta[1] + beta[2] * x[start:end]);\n }\n}\nThe likelihood statement in the model can now be written:\ntarget += partial_sum(y, 1, N, x, beta); // Sum terms 1 to N of the likelihood\nIn this example, y was chosen to be sliced over because there is one term in the summation per value of y. Technically x would have worked as well. Use whatever conceptually makes the most sense for a given model, e.g. slice over independent terms like conditionally independent observations or groups of observations as in hierarchical models. Because x is a shared argument, it is subset accordingly with start:end. With this function, reduce summation can be used to automatically parallelize the likelihood:\nint grainsize = 1;\ntarget += reduce_sum(partial_sum, y,\n grainsize,\n x, beta);\nThe reduce summation facility automatically breaks the sum into pieces and computes them in parallel. grainsize = 1 specifies that the grainsize should be estimated automatically. The final model is:\nfunctions {\n real partial_sum(array[] int y_slice,\n int start, int end,\n vector x,\n vector beta) {\n return bernoulli_logit_lpmf(y_slice | beta[1] + beta[2] * x[start:end]);\n }\n}\ndata {\n int N;\n array[N] int y;\n vector[N] x;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n int grainsize = 1;\n beta ~ std_normal();\n target += reduce_sum(partial_sum, y,\n grainsize,\n x, beta);\n}\n\n\n\nThe rational for choosing a sensible grainsize is based on balancing the overhead implied by creating many small tasks versus creating fewer large tasks which limits the potential parallelism.\nIn reduce_sum, grainsize is a recommendation on how to partition the work in the partial sum into smaller pieces. A grainsize of 1 leaves this entirely up to the internal scheduler and should be chosen if no benchmarking of other grainsizes is done. Ideally this will be efficient, but there are no guarantees.\nIn reduce_sum_static, grainsize is an upper limit on the worksize. Work will be split until all partial sums are just smaller than grainsize (and the split will happen the same way every time for the same inputs). For the static version it is more important to select a sensible grainsize.\nIn order to figure out an optimal grainsize, if there are N terms and M cores, run a quick test model with grainsize set roughly to N / M. Record the time, cut the grainsize in half, and run the test again. Repeat this iteratively until the model runtime begins to increase. This is a suitable grainsize for the model, because this ensures the calculations can be carried out with the most parallelism without losing too much efficiency.\nFor instance, in a model with N=10000 and M = 4, start with grainsize = 2500, and sequentially try grainsize = 1250, grainsize = 625, etc.\nIt is important to repeat this process until performance gets worse. It is possible after many halvings nothing happens, but there might still be a smaller grainsize that performs better. Even if a sum has many tens of thousands of terms, depending on the internal calculations, a grainsize of thirty or forty or smaller might be the best, and it is difficult to predict this behavior. Without doing these halvings until performance actually gets worse, it is easy to miss this.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Parallelization" + ] + }, + { + "objectID": "stan-users-guide/parallelization.html#map-rect", + "href": "stan-users-guide/parallelization.html#map-rect", + "title": "Parallelization", + "section": "", + "text": "Map-reduce allows large calculations (e.g., log likelihoods) to be broken into components which may be calculated modularly (e.g., data blocks) and combined (e.g., by summation and incrementing the target log density).\nA map function is a higher-order function that applies an argument function to every member of some collection, returning a collection of the results. For example, mapping the square function, \\(f(x) = x^2\\), over the vector \\([3, 5, 10]\\) produces the vector \\([9, 25, 100]\\). In other words, map applies the square function elementwise.\nThe output of mapping a sequence is often fed into a reduction. A reduction function takes an arbitrarily long sequence of inputs and returns a single output. Examples of reduction functions are summation (with the return being a single value) or sorting (with the return being a sorted sequence). The combination of mapping and reducing is so common it has its own name, map-reduce.\n\n\nIn order to generalize the form of functions and results that are possible and accommodate both parameters (which need derivatives) and data values (which don’t), Stan’s map function operates on more than just a sequence of inputs.\n\n\n\nStan’s map function has the following signature\nvector map_rect((vector, vector, array[] real, array[] int):vector f,\n vector phi, array[] vector thetas,\n data array[,] real x_rs, data array[,] int x_is);\nThe arrays thetas of parameters, x_rs of real data, and x_is of integer data have the suffix “s” to indicate they are arrays. These arrays must all be the same size, as they will be mapped in parallel by the function f. The value of phi is reused in each mapped operation.\nThe _rect suffix in the name arises because the data structures it takes as arguments are rectangular. In order to deal with ragged inputs, ragged inputs must be padded out to rectangular form.\nThe last two arguments are two dimensional arrays of real and integer data values. These argument types are marked with the data qualifier to indicate that they must only contain variables originating in the data or transformed data blocks. This will allow such data to be pinned to a processor on which it is being processed to reduce communication overhead.\nThe notation (vector, vector, array[] real, array[] int):vector indicates that the function argument f must have the following signature.\nvector f(vector phi, vector theta,\n data array[] real x_r, data array[] int x_i);\nAlthough f will often return a vector of size one, the built-in flexibility allows general multivariate functions to be mapped, even raggedly.\n\n\nStan’s map function applies the function f to the shared parameters along with one element each of the job parameters, real data, and integer data arrays. Each of the arguments theta, x_r, and x_i must be arrays of the same size. If the arrays are all size N, the result is defined as follows.\nmap_rect(f, phi, thetas, xs, ns)\n= f(phi, thetas[1], xs[1], ns[1]) . f(phi, thetas[2], xs[2], ns[2])\n . ... . f(phi, thetas[N], xs[N], ns[N])\nThe dot operators in the notation above are meant to indicate concatenation (implemented as append_row in Stan). The output of each application of f is a vector, and the sequence of N vectors is concatenated together to return a single vector.\n\n\n\n\nAn example should help to clarify both the syntax and semantics of the mapping operation and how it may be combined with reductions built into Stan to provide a map-reduce implementation.\n\n\nConsider the following simple logistic regression model, which is coded unconventionally to accommodate direct translation to a mapped implementation.\ndata {\n array[12] int y;\n array[12] real x;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n beta ~ std_normal();\n y ~ bernoulli_logit(beta[1] + beta[2] * to_vector(x));\n}\nThe program is unusual in that it (a) hardcodes the data size, which is not required by the map function but is just used here for simplicity, (b) represents the predictors as a real array even though it needs to be used as a vector, and (c) represents the regression coefficients (intercept and slope) as a vector even though they’re used individually. The bernoulli_logit distribution is used because the argument is on the logit scale—it implicitly applies the inverse logit function to map the argument to a probability.\n\n\n\nThe unmapped logistic regression model described in the previous subsection may be implemented using Stan’s rectangular mapping functionality as follows.\nfunctions {\n vector lr(vector beta, vector theta, array[] real x, array[] int y) {\n real lp = bernoulli_logit_lpmf(y | beta[1]\n + to_vector(x) * beta[2]);\n return [lp]';\n }\n}\ndata {\n array[12] int y;\n array[12] real x;\n}\ntransformed data {\n // K = 3 shards\n array[3, 4] int ys = { y[1:4], y[5:8], y[9:12] };\n array[3, 4] real xs = { x[1:4], x[5:8], x[9:12] };\n array[3] vector[0] theta;\n}\nparameters {\n vector[2] beta;\n}\nmodel {\n beta ~ std_normal();\n target += sum(map_rect(lr, beta, theta, xs, ys));\n}\nThe first piece of the code is the actual function to compute the logistic regression. The argument beta will contain the regression coefficients (intercept and slope), as before. The second argument theta of job-specific parameters is not used, but nevertheless must be present. The modeled data y is passed as an array of integers and the predictors x as an array of real values. The function body then computes the log probability mass of y and assigns it to the local variable lp. This variable is then used in [lp]' to construct a row vector and then transpose it to a vector to return.\nThe data are taken in as before. There is an additional transformed data block that breaks the data up into three shards.1\nThe value 3 is also hard coded; a more practical program would allow the number of shards to be controlled. There are three parallel arrays defined here, each of size three, corresponding to the number of shards. The array ys contains the modeled data variables; each element of the array ys is an array of size four. The second array xs is for the predictors, and each element of it is also of size four. These contained arrays are the same size because the predictors x stand in a one-to-one relationship with the modeled data y. The final array theta is also of size three; its elements are empty vectors, because there are no shard-specific parameters.\nThe parameters and the prior are as before. The likelihood is now coded using map-reduce. The function lr to compute the log probability mass is mapped over the data xs and ys, which contain the original predictors and outcomes broken into shards. The parameters beta are in the first argument because they are shared across shards. There are no shard-specific parameters, so the array of job-specific parameters theta contains only empty vectors.\n\n\n\n\nConsider a hierarchical model of American presidential voting behavior based on state of residence.2\nEach of the fifty states \\(k \\in \\{1,\\dotsc,50\\}\\) will have its own slope \\(\\beta_k\\) and intercept \\(\\alpha_k\\) to model the log odds of voting for the Republican candidate as a function of income. Suppose there are \\(N\\) voters and with voter \\(n \\in 1{:}N\\) being in state \\(s[n]\\) with income \\(x_n\\). The data model for the vote \\(y_n \\in \\{ 0, 1 \\}\\) is \\[\ny_n \\sim \\textsf{Bernoulli}\n\\Big(\n \\operatorname{logit}^{-1}\\left( \\alpha_{s[n]} + \\beta_{s[n]} \\, x_n \\right)\n\\Big).\n\\]\nThe slopes and intercepts get hierarchical priors, \\[\\begin{align*}\n\\alpha_k &\\sim \\textsf{normal}(\\mu_{\\alpha}, \\sigma_{\\alpha}) \\\\\n\\beta_k &\\sim \\textsf{normal}(\\mu_{\\beta}, \\sigma_{\\beta})\n\\end{align*}\\]\n\n\nThis model can be coded up in Stan directly as follows.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] int<lower=1, upper=K> kk;\n vector[N] x;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n matrix[K, 2] beta;\n vector[2] mu;\n vector<lower=0>[2] sigma;\n}\nmodel {\n mu ~ normal(0, 2);\n sigma ~ normal(0, 2);\n for (i in 1:2) {\n beta[ , i] ~ normal(mu[i], sigma[i]);\n }\n y ~ bernoulli_logit(beta[kk, 1] + beta[kk, 2] .* x);\n}\nFor this model the vector of predictors x is coded as a vector, corresponding to how it is used in the model. The priors for mu and sigma are vectorized. The priors on the two components of beta (intercept and slope, respectively) are stored in a \\(K \\times 2\\) matrix.\nThe distribution statement is also vectorized using multi-indexing with index kk for the states and elementwise multiplication (.*) for the income x. The vectorized distribution statement works out to the same thing as the following less efficient looped form.\nfor (n in 1:N) {\n y[n] ~ bernoulli_logit(beta[kk[n], 1] + beta[kk[n], 2] * x[n]);\n}\n\n\n\nThe mapped version of the model will map over the states K. This means the group-level parameters, real data, and integer-data must be arrays of the same size.\nThe mapped implementation requires a function to be mapped. In this function we can’t use distribution statements, but need to accumulate the desired log prior and log likelihood terms to the return value. The following function evaluates both the likelihood for the data observed for a group as well as the prior for the group-specific parameters (the name bernoulli_logit_glm derives from the fact that it’s a generalized linear model with a Bernoulli data model and logistic link function).\nfunctions {\n vector bl_glm(vector mu_sigma, vector beta,\n array[] real x, array[] int y) {\n vector[2] mu = mu_sigma[1:2];\n vector[2] sigma = mu_sigma[3:4];\n real lp = normal_lpdf(beta | mu, sigma);\n real ll = bernoulli_logit_lpmf(y | beta[1] + beta[2] * to_vector(x));\n return [lp + ll]';\n }\n}\nThe shared parameter mu_sigma contains the locations (mu_sigma[1:2]) and scales (mu_sigma[3:4]) of the priors, which are extracted in the first two lines of the program. The variable lp is assigned the log density of the prior on beta. The vector beta is of size two, as are the vectors mu and sigma, so everything lines up for the vectorization. Next, the variable ll is assigned to the log likelihood contribution for the group. Here beta[1] is the intercept of the regression and beta[2] the slope. The predictor array x needs to be converted to a vector allow the multiplication.\nThe data block is identical to that of the previous program, but repeated here for convenience. A transformed data block computes the data structures needed for the mapping by organizing the data into arrays indexed by group.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] int<lower=1, upper=K> kk;\n vector[N] x;\n array[N] int<lower=0, upper=1> y;\n}\ntransformed data {\n int<lower=0> J = N / K;\n array[K, J] real x_r;\n array[K, J] int<lower=0, upper=1> x_i;\n {\n int pos = 1;\n for (k in 1:K) {\n int end = pos + J - 1;\n x_r[k] = to_array_1d(x[pos:end]);\n x_i[k] = to_array_1d(y[pos:end]);\n pos += J;\n }\n }\n}\nThe integer J is set to the number of observations per group.3\nThe real data array x_r holds the predictors and the integer data array x_i holds the outcomes. The grouped data arrays are constructed by slicing the predictor vector x (and converting it to an array) and slicing the outcome array y.\nGiven the transformed data with groupings, the parameters are the same as the previous program. The model has the same priors for the hyperparameters mu and sigma, but moves the prior for beta and the likelihood to the mapped function.\nparameters {\n array[K] vector[2] beta;\n vector[2] mu;\n vector<lower=0>[2] sigma;\n}\nmodel {\n mu ~ normal(0, 2);\n sigma ~ normal(0, 2);\n target += sum(map_rect(bl_glm, append_row(mu, sigma), beta, x_r, x_i));\n \n}\nThe model as written here computes the priors for each group’s parameters along with the likelihood contribution for the group. An alternative mapping would leave the prior in the model block and only map the likelihood computation. In a serial setting this shouldn’t make much of a difference, but with parallelization, there is reduced communication (the prior’s parameters need not be transmitted) and also reduced parallelization with the version that leaves the prior in the model block.\n\n\n\n\nThe previous examples included rectangular data structures and single outputs. Despite the name, this is not technically required by map_rect.\n\n\nIf each group has a different number of observations, then the rectangular data structures for predictors and outcomes will need to be padded out to be rectangular. In addition, the size of the ragged structure will need to be passed as integer data. This holds for shards with varying numbers of parameters as well as varying numbers of data points.\n\n\n\nThe output of each mapped function is concatenated in order of inputs to produce the output of map_rect. When every shard returns a singleton (size one) array, the result is the same size as the number of shards and is easy to deal with downstream. If functions return longer arrays, they can still be structured using the to_matrix function if they are rectangular.\nIf the outputs are of varying sizes, then there will have to be some way to convert it back to a usable form based on the input, because there is no way to directly return sizes or a ragged structure.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Parallelization" + ] + }, + { + "objectID": "stan-users-guide/parallelization.html#opencl", + "href": "stan-users-guide/parallelization.html#opencl", + "title": "Parallelization", + "section": "", + "text": "OpenCL (Open Computing Language) is a framework that enables writing programs that execute across heterogeneous platforms. An OpenCL program can be run on CPUs and GPUs. In order to run OpenCL programs, an OpenCL runtime be installed on the target system.\nStan’s OpenCL backend is currently supported in CmdStan and its wrappers. In order to use it, the model must be compiled with the STAN_OPENCL makefile flag. Setting this flag means that the Stan-to-C++ translator (stanc3) will be supplied the --use-opencl flag and that the OpenCL enabled backend (Stan Math functions) will be enabled.\nIn Stan, the following distributions can be automatically run in parallel on both CPUs and GPUs with OpenCL:\n\nbernoulli_lpmf\nbernoulli_logit_lpmf\nbernoulli_logit_glm_lpmf*\nbeta_lpdf\nbeta_proportion_lpdf\nbinomial_lpmf\ncategorical_logit_glm_lpmf*\ncauchy_lpdf\nchi_square_lpdf\ndouble_exponential_lpdf\nexp_mod_normal_lpdf\nexponential_lpdf\nfrechet_lpdf\ngamma_lpdf\ngumbel_lpdf\ninv_chi_square_lpdf\ninv_gamma_lpdf\nlogistic_lpdf\nlognormal_lpdf\nneg_binomial_lpmf\nneg_binomial_2_lpmf\nneg_binomial_2_log_lpmf\nneg_binomial_2_log_glm_lpmf*\nnormal_lpdf\nnormal_id_glm_lpdf*\nordered_logistic_glm_lpmf*\npareto_lpdf\npareto_type_2_lpdf\npoisson_lpmf\npoisson_log_lpmf\npoisson_log_glm_lpmf*\nrayleigh_lpdf\nscaled_inv_chi_square_lpdf\nskew_normal_lpdf\nstd_normal_lpdf\nstudent_t_lpdf\nuniform_lpdf\nweibull_lpdf\n\n* OpenCL is not used when the covariate argument to the GLM functions is a row_vector.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Parallelization" + ] + }, + { + "objectID": "stan-users-guide/parallelization.html#footnotes", + "href": "stan-users-guide/parallelization.html#footnotes", + "title": "Parallelization", + "section": "Footnotes", + "text": "Footnotes\n\n\nThe term “shard” is borrowed from databases, where it refers to a slice of the rows of a database. That is exactly what it is here if we think of rows of a dataframe. Stan’s shards are more general in that they need not correspond to rows of a dataframe.↩︎\nThis example is a simplified form of the model described in (Gelman and Hill 2007, sec. 14.2)↩︎\nThis makes the strong assumption that each group has the same number of observations!↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Parallelization" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html", + "href": "stan-users-guide/posterior-predictive-checks.html", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "Posterior predictive checks are a way of measuring whether a model does a good job of capturing relevant aspects of the data, such as means, standard deviations, and quantiles (Rubin 1984; Andrew Gelman, Meng, and Stern 1996). Posterior predictive checking works by simulating new replicated data sets based on the fitted model parameters and then comparing statistics applied to the replicated data set with the same statistic applied to the original data set.\nPrior predictive checks evaluate the prior the same way. Specifically, they evaluate what data sets would be consistent with the prior. They will not be calibrated with actual data, but extreme values help diagnose priors that are either too strong, too weak, poorly shaped, or poorly located.\nPrior and posterior predictive checks are two cases of the general concept of predictive checks, just conditioning on different things (no data and the observed data, respectively). For hierarchical models, there are intermediate versions, as discussed in the section on hierarchical models and mixed replication.\n\n\nThe posterior predictive distribution is the distribution over new observations given previous observations. It’s predictive in the sense that it’s predicting behavior on new data that is not part of the training set. It’s posterior in that everything is conditioned on observed data \\(y\\).\nThe posterior predictive distribution for replications \\(y^{\\textrm{rep}}\\) of the original data set \\(y\\) given model parameters \\(\\theta\\) is defined by \\[\np(y^{\\textrm{rep}} \\mid y)\n= \\int p(y^{\\textrm{rep}} \\mid \\theta)\n \\cdot p(\\theta \\mid y) \\, \\textrm{d}\\theta.\n\\]\nAs with other posterior predictive quantities, generating a replicated data set \\(y^{\\textrm{rep}}\\) from the posterior predictive distribution is straightforward using the generated quantities block. Consider a simple regression model with parameters \\(\\theta = (\\alpha, \\beta, \\sigma).\\)\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n alpha ~ normal(0, 2);\n beta ~ normal(0, 1);\n sigma ~ normal(0, 1);\n y ~ normal(alpha + beta * x, sigma);\n}\nTo generate a replicated data set y_rep for this simple model, the following generated quantities block suffices.\ngenerated quantities {\n array[N] real y_rep = normal_rng(alpha + beta * x, sigma);\n}\nThe vectorized form of the normal random number generator is used with the original predictors x and the model parameters alpha, beta, and sigma. The replicated data variable y_rep is declared to be the same size as the original data y, but instead of a vector type, it is declared to be an array of reals to match the return type of the function normal_rng. Because the vector and real array types have the same dimensions and layout, they can be plotted against one another and otherwise compared during downstream processing.\nThe posterior predictive sampling for posterior predictive checks is different from usual posterior predictive sampling discussed in the chapter on posterior predictions in that the original predictors \\(x\\) are used. That is, the posterior predictions are for the original data.\n\n\n\nA standard posterior predictive check would plot a histogram of each replicated data set along with the original data set and compare them by eye. For this purpose, only a few replications are needed. These should be taken by thinning a larger set of replications down to the size needed to ensure rough independence of the replications.\nHere’s a complete example where the model is a simple Poisson with a weakly informative exponential prior with a mean of 10 and standard deviation of 10.\ndata {\n int<lower=0> N;\n array[N] int<lower=0> y;\n}\ntransformed data {\n real<lower=0> mean_y = mean(to_vector(y));\n real<lower=0> sd_y = sd(to_vector(y));\n}\nparameters {\n real<lower=0> lambda;\n}\nmodel {\n y ~ poisson(lambda);\n lambda ~ exponential(0.2);\n}\ngenerated quantities {\n array[N] int<lower=0> y_rep = poisson_rng(rep_array(lambda, N));\n real<lower=0> mean_y_rep = mean(to_vector(y_rep));\n real<lower=0> sd_y_rep = sd(to_vector(y_rep));\n int<lower=0, upper=1> mean_gte = (mean_y_rep >= mean_y);\n int<lower=0, upper=1> sd_gte = (sd_y_rep >= sd_y);\n}\nThe generated quantities block creates a variable y_rep for the replicated data, variables mean_y_rep and sd_y_rep for the statistics of the replicated data, and indicator variables mean_gte and sd_gte for whether the replicated statistic is greater than or equal to the statistic applied to the original data.\nNow consider generating data \\(y \\sim \\textrm{Poisson}(5)\\). The resulting small multiples plot shows the original data plotted in the upper left and eight different posterior replications plotted in the remaining boxes.\n\n\n\nPosterior predictive checks for Poisson data generating process and Poisson model.\n\n\nWith a Poisson data-generating process and Poisson model, the posterior replications look similar to the original data. If it were easy to pick the original data out of the lineup, there would be a problem.\nNow consider generating over-dispersed data \\(y \\sim \\textrm{negative-binomial2}(5, 1).\\) This has the same mean as \\(\\textrm{Poisson}(5)\\), namely \\(5\\), but a standard deviation of \\(\\sqrt{5 + 5^2 /1} \\approx 5.5.\\) There is no way to fit this data with the Poisson model, because a variable distributed as \\(\\textrm{Poisson}(\\lambda)\\) has mean \\(\\lambda\\) and standard deviation \\(\\sqrt{\\lambda},\\) which is \\(\\sqrt{5}\\) for \\(\\textrm{Poisson}(5).\\) Here’s the resulting small multiples plot, again with original data in the upper left.\n\n\n\nPosterior predictive checks for negative binomial data generating process and Poisson model.\n\n\nThis time, the original data stands out in stark contrast to the replicated data sets, all of which are clearly more symmetric and lower variance than the original data. That is, the model’s not appropriately capturing the variance of the data.\n\n\n\nIf a model captures the data well, summary statistics such as sample mean and standard deviation, should have similar values in the original and replicated data sets. This can be tested by means of a p-value-like statistic, which here is just the probability the test statistic \\(s(\\cdot)\\) in a replicated data set exceeds that in the original data, \\[\n\\Pr\\!\\left[ s(y^{\\textrm{rep}}) \\geq s(y) \\mid y \\right]\n=\n\\int\n\\textrm{I}\\left( s(y^{\\textrm{rep}}) \\geq s(y) \\mid y \\right)\n\\cdot p\\left( y^{\\textrm{rep}} \\mid y \\right)\n\\, \\textrm{d}{y^{\\textrm{rep}}}.\n\\] It is important to note that ‘’p-values’’ is in quotes because these statistics are not classically calibrated, and thus will not in general have a uniform distribution even when the model is well specified (Bayarri and Berger 2000).\nNevertheless, values of this statistic very close to zero or one are cause for concern that the model is not fitting the data well. Unlike a visual test, this p-value-like test is easily automated for bulk model fitting.\nTo calculate event probabilities in Stan, it suffices to define indicator variables that take on value 1 if the event occurs and 0 if it does not. The posterior mean is then the event probability. For efficiency, indicator variables are defined in the generated quantities block.\ngenerated quantities {\n int<lower=0, upper=1> mean_gt;\n int<lower=0, upper=1> sd_gt;\n {\n array[N] real y_rep = normal_rng(alpha + beta * x, sigma);\n mean_gt = mean(y_rep) > mean(y);\n sd_gt = sd(y_rep) > sd(y);\n }\n}\nThe indicator variable mean_gt will have value 1 if the mean of the simulated data y_rep is greater than or equal to the mean of he original data y. Because the values of y_rep are not needed for the posterior predictive checks, the program saves output space by using a local variable for y_rep. The statistics mean(u) and sd(y) could also be computed in the transformed data block and saved.\nFor the example in the previous section, where over-dispersed data generated by a negative binomial distribution was fit with a simple Poisson model, the following plot illustrates the posterior p-value calculation for the mean statistic.\n\n\n\nHistogram of means of replicated data sets; vertical red line at mean of original data.\n\n\nThe p-value for the mean is just the percentage of replicated data sets whose statistic is greater than or equal that of the original data. Using a Poisson model for negative binomial data still fits the mean well, with a posterior \\(p\\)-value of 0.49. In Stan terms, it is extracted as the posterior mean of the indicator variable mean_gt.\nThe standard deviation statistic tells a different story.\n\n\n\nScatterplot of standard deviations of replicated data sets; the vertical red line is at standard deviation of original data.\n\n\nHere, the original data has much higher standard deviation than any of the replicated data sets. The resulting \\(p\\)-value estimated by Stan after a large number of iterations is exactly zero (the absolute error bounds are fine, but a lot of iterations are required to get good relative error bounds on small \\(p\\)-values by sampling). In other words, there were no posterior draws in which the replicated data set had a standard deviation greater than or equal to that of the original data set. Clearly, the model is not capturing the dispersion of the original data. The point of this exercise isn’t just to figure out that there’s a problem with a model, but to isolate where it is. Seeing that the data is over-dispersed compared to the Poisson model would be reason to fit a more general model like the negative binomial or a latent varying effects (aka random effects) model that can account for the over-dispersion.\n\n\nAny statistic may be used for the data, but these can be guided by the quantities of interest in the model itself. Popular choices in addition to mean and standard deviation are quantiles, such as the median, 5% or 95% quantiles, or even the maximum or minimum value to test extremes.\nDespite the range of choices, test statistics should ideally be ancillary, in the sense that they should be testing something other than the fit of a parameter. For example, a simple normal model of a data set will typically fit the mean and variance of the data quite well as long as the prior doesn’t dominate the posterior. In contrast, a Poisson model of the same data cannot capture both the mean and the variance of a data set if they are different, so they bear checking in the Poisson case. As we saw with the Poisson case, the posterior mean for the single rate parameter was located near the data mean, not the data variance. Other distributions such as the lognormal and gamma distribution, have means and variances that are functions of two or more parameters.\n\n\n\n\nPrior predictive checks generate data according to the prior in order to asses whether a prior is appropriate (Gabry et al. 2019). A posterior predictive check generates replicated data according to the posterior predictive distribution. In contrast, the prior predictive check generates data according to the prior predictive distribution, \\[\ny^{\\textrm{sim}} \\sim p(y).\n\\] The prior predictive distribution is just like the posterior predictive distribution with no observed data, so that a prior predictive check is nothing more than the limiting case of a posterior predictive check with no data.\nThis is easy to carry out mechanically by simulating parameters \\[\n\\theta^{\\textrm{sim}} \\sim p(\\theta)\n\\] according to the priors, then simulating data \\[\ny^{\\textrm{sim}} \\sim p(y \\mid \\theta^{\\textrm{sim}})\n\\] according to the data model given the simulated parameters. The result is a simulation from the joint distribution, \\[\n(y^{\\textrm{sim}}, \\theta^{\\textrm{sim}}) \\sim p(y, \\theta)\n\\] and thus \\[\ny^{\\textrm{sim}} \\sim p(y)\n\\] is a simulation from the prior predictive distribution.\n\n\nA prior predictive check is coded just like a posterior predictive check. If a posterior predictive check has already been coded and it’s possible to set the data to be empty, then no additional coding is necessary. The disadvantage to coding prior predictive checks as posterior predictive checks with no data is that Markov chain Monte Carlo will be used to sample the parameters, which is less efficient than taking independent draws using random number generation.\nPrior predictive checks can be coded entirely within the generated quantities block using random number generation. The resulting draws will be independent. Predictors must be read in from the actual data set—they do not have a generative model from which to be simulated. For a Poisson regression, prior predictive sampling can be encoded as the following complete Stan program.\ndata {\n int<lower=0> N;\n vector[N] x;\n}\ngenerated quantities {\n real alpha = normal_rng(0, 1);\n real beta = normal_rng(0, 1);\n array[N] real y_sim = poisson_log_rng(alpha + beta * x);\n}\nRunning this program using Stan’s fixed-parameter sampler yields draws from the prior. These may be plotted to consider their appropriateness.\n\n\n\n\nSuppose we have a model for a football (aka soccer) league where there are \\(J\\) teams. Each team has a scoring rate \\(\\lambda_j\\) and in each game will be assumed to score \\(\\textrm{poisson}(\\lambda_j)\\) points. Yes, this model completely ignores defense. Suppose the modeler does not want to “put their thumb on the scale” and would rather “let the data speak for themselves” and so uses a prior with very wide tails, because it seems uninformative, such as the widely deployed \\[\n\\lambda_j \\sim \\textrm{gamma}(\\epsilon_1, \\epsilon_2).\n\\] This is not just a manufactured example; The BUGS Book recommends setting \\(\\epsilon = (0.5, 0.00001)\\), which corresponds to a Jeffreys prior for a Poisson rate parameter prior (Lunn et al. 2012, 85).\nSuppose the league plays a round-robin tournament wherein every team plays every other team. The following Stan model generates random team abilities and the results of such a round-robin tournament, which may be used to perform prior predictive checks.\ndata {\n int<lower=0> J;\n array[2] real<lower=0> epsilon;\n}\ngenerated quantities {\n array[J] real<lower=0> lambda;\n array[J, J] int y;\n for (j in 1:J) lambda[j] = gamma_rng(epsilon[1], epsilon[2]);\n for (i in 1:J) {\n for (j in 1:J) {\n y[i, j] = poisson_rng(lambda[i]) - poisson_rng(lambda[j]);\n }\n }\n}\nIn this simulation, teams play each other twice and play themselves once. This could be made more realistic by controlling the combinatorics to only generate a single result for each pair of teams, of which there are \\(\\binom{J}{2} = \\frac{J \\cdot (J - 1)}{2}.\\)\nUsing the \\(\\textrm{gamma}(0.5, 0.00001)\\) reference prior on team abilities, the following are the first 20 simulated point differences for the match between the first two teams, \\(y^{(1:20)}_{1, 2}\\).\n2597 -26000 5725 22496 1270 1072 4502 -2809 -302 4987\n7513 7527 -3268 -12374 3828 -158 -29889 2986 -1392 66\nThat’s some pretty highly scoring football games being simulated; all but one has a score differential greater than 100! In other words, this \\(\\textrm{gamma}(0.5, 0.00001)\\) prior is putting around 95% of its weight on score differentials above 100. Given that two teams combined rarely score 10 points, this prior is way out of line with prior knowledge about football matches; it is not only consistent with outcomes that have never occurred in the history of the sport, it puts most of the prior probability mass there.\nThe posterior predictive distribution can be strongly affected by the prior when there is not much observed data and substantial prior mass is concentrated around infeasible values (A. Gelman 2006).\nJust as with posterior predictive distributions, any statistics of the generated data may be evaluated. Here, the focus was on score difference between a single pair of teams, but it could’ve been on maximums, minimums, averages, variances, etc.\nIn this textbook example, the prior is univariate and directly related to the expected number of points scored, and could thus be directly inspected for consistency with prior knowledge about scoring rates in football. There will not be the same kind of direct connection when the prior and data model distributions are multivariate. In these more challenging situations, prior predictive checks are an easy way to get a handle on the implications of a prior in terms of what it says the data is going to look like; for a more complex application involving spatially heterogeneous air pollution concentration, see (Gabry et al. 2019).\nPrior predictive checks can also be compared with the data, but one should not expect them to be calibrated in the same way as posterior predictive checks. That would require guessing the posterior and encoding it in the prior. The goal is make sure the prior is not so wide that it will pull probability mass away from feasible values.\n\n\n\nAndrew Gelman, Meng, and Stern (1996) discuss the case of mixed replication for hierarchical models in which the hyperparameters remain fixed, but varying effects are replicated. This is neither a purely prior nor purely posterior predictive check, but falls somewhere in between.\nFor example, consider a simple varying intercept logistic regression, with intercepts \\(\\alpha_k\\) for \\(k \\in 1:K\\). Each data item \\(y_n \\in \\{ 0, 1 \\}\\) is assumed to correspond to group \\(kk_n \\in 1:K.\\) The data model is thus \\[\ny_n \\sim \\textrm{bernoulli}(\\textrm{logit}^{-1}(\\alpha_{kk[n]})).\n\\] The varying intercepts have a hierarchical normal prior, \\[\n\\alpha_k \\sim \\textrm{normal}(\\mu, \\sigma).\n\\] The hyperparameters are themselves given weakly informative priors, \\[\\begin{eqnarray*}\n\\mu & \\sim & \\textrm{normal}(0, 2)\n\\\\[4pt]\n\\sigma & \\sim & \\textrm{lognormal}(0, 1).\n\\end{eqnarray*}\\]\nLike in a posterior predictive check, the hyperparameters \\(\\mu\\) and \\(\\sigma\\) are drawn from the posterior, \\[\n\\mu^{(m)}, \\sigma^{(m)} \\sim p(\\mu, \\sigma \\mid y)\n\\] Like in a prior predictive check, replicated values of \\(\\alpha\\) are drawn from the hyperparameters, \\[\n\\alpha^{\\textrm{rep}(m)}_k \\sim \\textrm{normal}(\\alpha_k \\mid\n\\mu^{(m)}, \\sigma^{(m)}).\n\\] The data items are then each replicated using the replicated intercepts, \\[\ny^{\\textrm{rep}(m)}_n \\sim\n\\textrm{bernoulli}\n (\\textrm{logit}^{-1}(\\alpha^{\\textrm{rep}(m)}_{kk[n]})).\n\\] Thus the \\(y^{\\textrm{rep}(m)}\\) can be seen as a kind of posterior predictive replication of observations from new groups that were not among the original \\(K\\) groups.\nIn Stan, mixed predictive replications \\(y^{\\textrm{rep}(m)}\\) can be programmed directly.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] int<lower=1, upper=K> kk;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n vector<offset=mu, multiplier=sigma>[K] alpha;\n}\nmodel {\n mu ~ normal(0, 2); // hyperprior\n sigma ~ lognormal(0, 1);\n alpha ~ normal(mu, sigma); // hierarchical prior\n y ~ bernoulli_logit(alpha[kk]); // data model\n}\ngenerated quantities {\n // alpha replicated; mu and sigma not replicated\n array[K] real alpha_rep\n = normal_rng(rep_vector(mu, K), sigma);\n array[N] int<lower=0, upper=1> y_rep\n = bernoulli_logit_rng(alpha_rep[kk]);\n}\n\n\n\nFollowing Andrew Gelman, Meng, and Stern (1996), prior, posterior, and mixed replications may all be defined as posteriors from joint models over parameters and observed and replicated data.\n\n\nFor example, posterior predictive replication may be formulated using distribution notation as follows. \\[\\begin{eqnarray*}\n\\theta & \\sim & p(\\theta)\n\\\\[2pt]\ny & \\sim & p(y \\mid \\theta)\n\\\\[2pt]\ny^{\\textrm{rep}} & \\sim & p(y \\mid \\theta)\n\\end{eqnarray*}\\] The heavily overloaded distribution notation is meant to indicate that both \\(y\\) and \\(y^{\\textrm{rep}}\\) are drawn from the same distribution, or more formally using capital letters to distinguish random variables, that the conditional densities \\(p_{Y^{\\textrm{rep}} \\mid\n\\Theta}\\) and \\(p_{Y \\mid \\Theta}\\) are the same.\nThe joint density is \\[\np(\\theta, y, y^{\\textrm{rep}})\n= p(\\theta) \\cdot p(y \\mid \\theta) \\cdot p(y^{\\textrm{rep}} \\mid \\theta).\n\\] This again is assuming that the two distributions for \\(y\\) and \\(y^{\\textrm{rep}}\\) are identical.\nThe variable \\(y\\) is observed, with the predictive simulation \\(y^{\\textrm{rep}}\\) and parameter vector \\(\\theta\\) not observed. The posterior is \\(p(y^{\\textrm{rep}}, \\theta \\mid y)\\). Given draws from the posterior, the posterior predictive simulations \\(y^{\\textrm{rep}}\\) are retained.\n\n\n\nThe prior predictive model simply drops the data component of the posterior predictive model. \\[\\begin{eqnarray*}\n\\theta & \\sim & p(\\theta)\n\\\\[2pt]\ny^{\\textrm{rep}} & \\sim & p(y \\mid \\theta)\n\\end{eqnarray*}\\] This corresponds to the joint density \\[\np(\\theta, y^{\\textrm{rep}}) = p(\\theta) \\cdot p(y^{\\textrm{rep}} \\mid\n\\theta).\n\\]\nIt is typically straightforward to draw \\(\\theta\\) from the prior and \\(y^{\\textrm{rep}}\\) from the data model given \\(\\theta\\) efficiently. In cases where it is not, the model may be coded and executed just as the posterior predictive model, only with no data.\n\n\n\nThe mixed replication corresponds to the model \\[\\begin{eqnarray*}\n\\phi & \\sim & p(\\phi)\n\\\\[2pt]\n\\alpha & \\sim & p(\\alpha \\mid \\phi)\n\\\\[2pt]\ny & \\sim & p(y \\mid \\alpha)\n\\\\[2pt]\n\\alpha^{\\textrm{rep}} & \\sim & p(\\alpha \\mid \\phi)\n\\\\[2pt]\ny^{\\textrm{rep}} & \\sim & p(y \\mid \\phi)\n\\end{eqnarray*}\\] The notation here is meant to indicate that \\(\\alpha\\) and \\(\\alpha^{\\textrm{rep}}\\) have identical distributions, as do \\(y\\) and \\(y^{\\textrm{rep}}\\).\nThis corresponds to a joint model \\[\np(\\phi, \\alpha, \\alpha^{\\textrm{rep}}, y, y^{\\textrm{rep}})\n=\np(\\phi)\n\\cdot p(\\alpha \\mid \\phi)\n\\cdot p(y \\mid \\alpha)\n\\cdot p(\\alpha^{\\textrm{rep}} \\mid \\phi)\n\\cdot p(y^{\\textrm{rep}} \\mid \\alpha^{\\textrm{rep}}),\n\\] where \\(y\\) is the only observed variable, \\(\\alpha\\) contains the lower-level parameters and \\(\\phi\\) the hyperparameters. Note that \\(\\phi\\) is not replicated and instead appears in the distribution for both \\(\\alpha\\) and \\(\\alpha^{\\textrm{rep}}\\).\nThe posterior is \\(p(\\phi, \\alpha, \\alpha^{\\textrm{rep}},\ny^{\\textrm{rep}} \\mid y)\\). From posterior draws, the posterior predictive simulations \\(y^{\\textrm{rep}}\\) are kept.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#simulating-from-the-posterior-predictive-distribution", + "href": "stan-users-guide/posterior-predictive-checks.html#simulating-from-the-posterior-predictive-distribution", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "The posterior predictive distribution is the distribution over new observations given previous observations. It’s predictive in the sense that it’s predicting behavior on new data that is not part of the training set. It’s posterior in that everything is conditioned on observed data \\(y\\).\nThe posterior predictive distribution for replications \\(y^{\\textrm{rep}}\\) of the original data set \\(y\\) given model parameters \\(\\theta\\) is defined by \\[\np(y^{\\textrm{rep}} \\mid y)\n= \\int p(y^{\\textrm{rep}} \\mid \\theta)\n \\cdot p(\\theta \\mid y) \\, \\textrm{d}\\theta.\n\\]\nAs with other posterior predictive quantities, generating a replicated data set \\(y^{\\textrm{rep}}\\) from the posterior predictive distribution is straightforward using the generated quantities block. Consider a simple regression model with parameters \\(\\theta = (\\alpha, \\beta, \\sigma).\\)\ndata {\n int<lower=0> N;\n vector[N] x;\n vector[N] y;\n}\nparameters {\n real alpha;\n real beta;\n real<lower=0> sigma;\n}\nmodel {\n alpha ~ normal(0, 2);\n beta ~ normal(0, 1);\n sigma ~ normal(0, 1);\n y ~ normal(alpha + beta * x, sigma);\n}\nTo generate a replicated data set y_rep for this simple model, the following generated quantities block suffices.\ngenerated quantities {\n array[N] real y_rep = normal_rng(alpha + beta * x, sigma);\n}\nThe vectorized form of the normal random number generator is used with the original predictors x and the model parameters alpha, beta, and sigma. The replicated data variable y_rep is declared to be the same size as the original data y, but instead of a vector type, it is declared to be an array of reals to match the return type of the function normal_rng. Because the vector and real array types have the same dimensions and layout, they can be plotted against one another and otherwise compared during downstream processing.\nThe posterior predictive sampling for posterior predictive checks is different from usual posterior predictive sampling discussed in the chapter on posterior predictions in that the original predictors \\(x\\) are used. That is, the posterior predictions are for the original data.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#plotting-multiples", + "href": "stan-users-guide/posterior-predictive-checks.html#plotting-multiples", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "A standard posterior predictive check would plot a histogram of each replicated data set along with the original data set and compare them by eye. For this purpose, only a few replications are needed. These should be taken by thinning a larger set of replications down to the size needed to ensure rough independence of the replications.\nHere’s a complete example where the model is a simple Poisson with a weakly informative exponential prior with a mean of 10 and standard deviation of 10.\ndata {\n int<lower=0> N;\n array[N] int<lower=0> y;\n}\ntransformed data {\n real<lower=0> mean_y = mean(to_vector(y));\n real<lower=0> sd_y = sd(to_vector(y));\n}\nparameters {\n real<lower=0> lambda;\n}\nmodel {\n y ~ poisson(lambda);\n lambda ~ exponential(0.2);\n}\ngenerated quantities {\n array[N] int<lower=0> y_rep = poisson_rng(rep_array(lambda, N));\n real<lower=0> mean_y_rep = mean(to_vector(y_rep));\n real<lower=0> sd_y_rep = sd(to_vector(y_rep));\n int<lower=0, upper=1> mean_gte = (mean_y_rep >= mean_y);\n int<lower=0, upper=1> sd_gte = (sd_y_rep >= sd_y);\n}\nThe generated quantities block creates a variable y_rep for the replicated data, variables mean_y_rep and sd_y_rep for the statistics of the replicated data, and indicator variables mean_gte and sd_gte for whether the replicated statistic is greater than or equal to the statistic applied to the original data.\nNow consider generating data \\(y \\sim \\textrm{Poisson}(5)\\). The resulting small multiples plot shows the original data plotted in the upper left and eight different posterior replications plotted in the remaining boxes.\n\n\n\nPosterior predictive checks for Poisson data generating process and Poisson model.\n\n\nWith a Poisson data-generating process and Poisson model, the posterior replications look similar to the original data. If it were easy to pick the original data out of the lineup, there would be a problem.\nNow consider generating over-dispersed data \\(y \\sim \\textrm{negative-binomial2}(5, 1).\\) This has the same mean as \\(\\textrm{Poisson}(5)\\), namely \\(5\\), but a standard deviation of \\(\\sqrt{5 + 5^2 /1} \\approx 5.5.\\) There is no way to fit this data with the Poisson model, because a variable distributed as \\(\\textrm{Poisson}(\\lambda)\\) has mean \\(\\lambda\\) and standard deviation \\(\\sqrt{\\lambda},\\) which is \\(\\sqrt{5}\\) for \\(\\textrm{Poisson}(5).\\) Here’s the resulting small multiples plot, again with original data in the upper left.\n\n\n\nPosterior predictive checks for negative binomial data generating process and Poisson model.\n\n\nThis time, the original data stands out in stark contrast to the replicated data sets, all of which are clearly more symmetric and lower variance than the original data. That is, the model’s not appropriately capturing the variance of the data.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#posterior-p-values", + "href": "stan-users-guide/posterior-predictive-checks.html#posterior-p-values", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "If a model captures the data well, summary statistics such as sample mean and standard deviation, should have similar values in the original and replicated data sets. This can be tested by means of a p-value-like statistic, which here is just the probability the test statistic \\(s(\\cdot)\\) in a replicated data set exceeds that in the original data, \\[\n\\Pr\\!\\left[ s(y^{\\textrm{rep}}) \\geq s(y) \\mid y \\right]\n=\n\\int\n\\textrm{I}\\left( s(y^{\\textrm{rep}}) \\geq s(y) \\mid y \\right)\n\\cdot p\\left( y^{\\textrm{rep}} \\mid y \\right)\n\\, \\textrm{d}{y^{\\textrm{rep}}}.\n\\] It is important to note that ‘’p-values’’ is in quotes because these statistics are not classically calibrated, and thus will not in general have a uniform distribution even when the model is well specified (Bayarri and Berger 2000).\nNevertheless, values of this statistic very close to zero or one are cause for concern that the model is not fitting the data well. Unlike a visual test, this p-value-like test is easily automated for bulk model fitting.\nTo calculate event probabilities in Stan, it suffices to define indicator variables that take on value 1 if the event occurs and 0 if it does not. The posterior mean is then the event probability. For efficiency, indicator variables are defined in the generated quantities block.\ngenerated quantities {\n int<lower=0, upper=1> mean_gt;\n int<lower=0, upper=1> sd_gt;\n {\n array[N] real y_rep = normal_rng(alpha + beta * x, sigma);\n mean_gt = mean(y_rep) > mean(y);\n sd_gt = sd(y_rep) > sd(y);\n }\n}\nThe indicator variable mean_gt will have value 1 if the mean of the simulated data y_rep is greater than or equal to the mean of he original data y. Because the values of y_rep are not needed for the posterior predictive checks, the program saves output space by using a local variable for y_rep. The statistics mean(u) and sd(y) could also be computed in the transformed data block and saved.\nFor the example in the previous section, where over-dispersed data generated by a negative binomial distribution was fit with a simple Poisson model, the following plot illustrates the posterior p-value calculation for the mean statistic.\n\n\n\nHistogram of means of replicated data sets; vertical red line at mean of original data.\n\n\nThe p-value for the mean is just the percentage of replicated data sets whose statistic is greater than or equal that of the original data. Using a Poisson model for negative binomial data still fits the mean well, with a posterior \\(p\\)-value of 0.49. In Stan terms, it is extracted as the posterior mean of the indicator variable mean_gt.\nThe standard deviation statistic tells a different story.\n\n\n\nScatterplot of standard deviations of replicated data sets; the vertical red line is at standard deviation of original data.\n\n\nHere, the original data has much higher standard deviation than any of the replicated data sets. The resulting \\(p\\)-value estimated by Stan after a large number of iterations is exactly zero (the absolute error bounds are fine, but a lot of iterations are required to get good relative error bounds on small \\(p\\)-values by sampling). In other words, there were no posterior draws in which the replicated data set had a standard deviation greater than or equal to that of the original data set. Clearly, the model is not capturing the dispersion of the original data. The point of this exercise isn’t just to figure out that there’s a problem with a model, but to isolate where it is. Seeing that the data is over-dispersed compared to the Poisson model would be reason to fit a more general model like the negative binomial or a latent varying effects (aka random effects) model that can account for the over-dispersion.\n\n\nAny statistic may be used for the data, but these can be guided by the quantities of interest in the model itself. Popular choices in addition to mean and standard deviation are quantiles, such as the median, 5% or 95% quantiles, or even the maximum or minimum value to test extremes.\nDespite the range of choices, test statistics should ideally be ancillary, in the sense that they should be testing something other than the fit of a parameter. For example, a simple normal model of a data set will typically fit the mean and variance of the data quite well as long as the prior doesn’t dominate the posterior. In contrast, a Poisson model of the same data cannot capture both the mean and the variance of a data set if they are different, so they bear checking in the Poisson case. As we saw with the Poisson case, the posterior mean for the single rate parameter was located near the data mean, not the data variance. Other distributions such as the lognormal and gamma distribution, have means and variances that are functions of two or more parameters.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#prior-predictive-checks", + "href": "stan-users-guide/posterior-predictive-checks.html#prior-predictive-checks", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "Prior predictive checks generate data according to the prior in order to asses whether a prior is appropriate (Gabry et al. 2019). A posterior predictive check generates replicated data according to the posterior predictive distribution. In contrast, the prior predictive check generates data according to the prior predictive distribution, \\[\ny^{\\textrm{sim}} \\sim p(y).\n\\] The prior predictive distribution is just like the posterior predictive distribution with no observed data, so that a prior predictive check is nothing more than the limiting case of a posterior predictive check with no data.\nThis is easy to carry out mechanically by simulating parameters \\[\n\\theta^{\\textrm{sim}} \\sim p(\\theta)\n\\] according to the priors, then simulating data \\[\ny^{\\textrm{sim}} \\sim p(y \\mid \\theta^{\\textrm{sim}})\n\\] according to the data model given the simulated parameters. The result is a simulation from the joint distribution, \\[\n(y^{\\textrm{sim}}, \\theta^{\\textrm{sim}}) \\sim p(y, \\theta)\n\\] and thus \\[\ny^{\\textrm{sim}} \\sim p(y)\n\\] is a simulation from the prior predictive distribution.\n\n\nA prior predictive check is coded just like a posterior predictive check. If a posterior predictive check has already been coded and it’s possible to set the data to be empty, then no additional coding is necessary. The disadvantage to coding prior predictive checks as posterior predictive checks with no data is that Markov chain Monte Carlo will be used to sample the parameters, which is less efficient than taking independent draws using random number generation.\nPrior predictive checks can be coded entirely within the generated quantities block using random number generation. The resulting draws will be independent. Predictors must be read in from the actual data set—they do not have a generative model from which to be simulated. For a Poisson regression, prior predictive sampling can be encoded as the following complete Stan program.\ndata {\n int<lower=0> N;\n vector[N] x;\n}\ngenerated quantities {\n real alpha = normal_rng(0, 1);\n real beta = normal_rng(0, 1);\n array[N] real y_sim = poisson_log_rng(alpha + beta * x);\n}\nRunning this program using Stan’s fixed-parameter sampler yields draws from the prior. These may be plotted to consider their appropriateness.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#example-of-prior-predictive-checks", + "href": "stan-users-guide/posterior-predictive-checks.html#example-of-prior-predictive-checks", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "Suppose we have a model for a football (aka soccer) league where there are \\(J\\) teams. Each team has a scoring rate \\(\\lambda_j\\) and in each game will be assumed to score \\(\\textrm{poisson}(\\lambda_j)\\) points. Yes, this model completely ignores defense. Suppose the modeler does not want to “put their thumb on the scale” and would rather “let the data speak for themselves” and so uses a prior with very wide tails, because it seems uninformative, such as the widely deployed \\[\n\\lambda_j \\sim \\textrm{gamma}(\\epsilon_1, \\epsilon_2).\n\\] This is not just a manufactured example; The BUGS Book recommends setting \\(\\epsilon = (0.5, 0.00001)\\), which corresponds to a Jeffreys prior for a Poisson rate parameter prior (Lunn et al. 2012, 85).\nSuppose the league plays a round-robin tournament wherein every team plays every other team. The following Stan model generates random team abilities and the results of such a round-robin tournament, which may be used to perform prior predictive checks.\ndata {\n int<lower=0> J;\n array[2] real<lower=0> epsilon;\n}\ngenerated quantities {\n array[J] real<lower=0> lambda;\n array[J, J] int y;\n for (j in 1:J) lambda[j] = gamma_rng(epsilon[1], epsilon[2]);\n for (i in 1:J) {\n for (j in 1:J) {\n y[i, j] = poisson_rng(lambda[i]) - poisson_rng(lambda[j]);\n }\n }\n}\nIn this simulation, teams play each other twice and play themselves once. This could be made more realistic by controlling the combinatorics to only generate a single result for each pair of teams, of which there are \\(\\binom{J}{2} = \\frac{J \\cdot (J - 1)}{2}.\\)\nUsing the \\(\\textrm{gamma}(0.5, 0.00001)\\) reference prior on team abilities, the following are the first 20 simulated point differences for the match between the first two teams, \\(y^{(1:20)}_{1, 2}\\).\n2597 -26000 5725 22496 1270 1072 4502 -2809 -302 4987\n7513 7527 -3268 -12374 3828 -158 -29889 2986 -1392 66\nThat’s some pretty highly scoring football games being simulated; all but one has a score differential greater than 100! In other words, this \\(\\textrm{gamma}(0.5, 0.00001)\\) prior is putting around 95% of its weight on score differentials above 100. Given that two teams combined rarely score 10 points, this prior is way out of line with prior knowledge about football matches; it is not only consistent with outcomes that have never occurred in the history of the sport, it puts most of the prior probability mass there.\nThe posterior predictive distribution can be strongly affected by the prior when there is not much observed data and substantial prior mass is concentrated around infeasible values (A. Gelman 2006).\nJust as with posterior predictive distributions, any statistics of the generated data may be evaluated. Here, the focus was on score difference between a single pair of teams, but it could’ve been on maximums, minimums, averages, variances, etc.\nIn this textbook example, the prior is univariate and directly related to the expected number of points scored, and could thus be directly inspected for consistency with prior knowledge about scoring rates in football. There will not be the same kind of direct connection when the prior and data model distributions are multivariate. In these more challenging situations, prior predictive checks are an easy way to get a handle on the implications of a prior in terms of what it says the data is going to look like; for a more complex application involving spatially heterogeneous air pollution concentration, see (Gabry et al. 2019).\nPrior predictive checks can also be compared with the data, but one should not expect them to be calibrated in the same way as posterior predictive checks. That would require guessing the posterior and encoding it in the prior. The goal is make sure the prior is not so wide that it will pull probability mass away from feasible values.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#mixed-replication", + "href": "stan-users-guide/posterior-predictive-checks.html#mixed-replication", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "Andrew Gelman, Meng, and Stern (1996) discuss the case of mixed replication for hierarchical models in which the hyperparameters remain fixed, but varying effects are replicated. This is neither a purely prior nor purely posterior predictive check, but falls somewhere in between.\nFor example, consider a simple varying intercept logistic regression, with intercepts \\(\\alpha_k\\) for \\(k \\in 1:K\\). Each data item \\(y_n \\in \\{ 0, 1 \\}\\) is assumed to correspond to group \\(kk_n \\in 1:K.\\) The data model is thus \\[\ny_n \\sim \\textrm{bernoulli}(\\textrm{logit}^{-1}(\\alpha_{kk[n]})).\n\\] The varying intercepts have a hierarchical normal prior, \\[\n\\alpha_k \\sim \\textrm{normal}(\\mu, \\sigma).\n\\] The hyperparameters are themselves given weakly informative priors, \\[\\begin{eqnarray*}\n\\mu & \\sim & \\textrm{normal}(0, 2)\n\\\\[4pt]\n\\sigma & \\sim & \\textrm{lognormal}(0, 1).\n\\end{eqnarray*}\\]\nLike in a posterior predictive check, the hyperparameters \\(\\mu\\) and \\(\\sigma\\) are drawn from the posterior, \\[\n\\mu^{(m)}, \\sigma^{(m)} \\sim p(\\mu, \\sigma \\mid y)\n\\] Like in a prior predictive check, replicated values of \\(\\alpha\\) are drawn from the hyperparameters, \\[\n\\alpha^{\\textrm{rep}(m)}_k \\sim \\textrm{normal}(\\alpha_k \\mid\n\\mu^{(m)}, \\sigma^{(m)}).\n\\] The data items are then each replicated using the replicated intercepts, \\[\ny^{\\textrm{rep}(m)}_n \\sim\n\\textrm{bernoulli}\n (\\textrm{logit}^{-1}(\\alpha^{\\textrm{rep}(m)}_{kk[n]})).\n\\] Thus the \\(y^{\\textrm{rep}(m)}\\) can be seen as a kind of posterior predictive replication of observations from new groups that were not among the original \\(K\\) groups.\nIn Stan, mixed predictive replications \\(y^{\\textrm{rep}(m)}\\) can be programmed directly.\ndata {\n int<lower=0> K;\n int<lower=0> N;\n array[N] int<lower=1, upper=K> kk;\n array[N] int<lower=0, upper=1> y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n vector<offset=mu, multiplier=sigma>[K] alpha;\n}\nmodel {\n mu ~ normal(0, 2); // hyperprior\n sigma ~ lognormal(0, 1);\n alpha ~ normal(mu, sigma); // hierarchical prior\n y ~ bernoulli_logit(alpha[kk]); // data model\n}\ngenerated quantities {\n // alpha replicated; mu and sigma not replicated\n array[K] real alpha_rep\n = normal_rng(rep_vector(mu, K), sigma);\n array[N] int<lower=0, upper=1> y_rep\n = bernoulli_logit_rng(alpha_rep[kk]);\n}", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/posterior-predictive-checks.html#joint-model-representation", + "href": "stan-users-guide/posterior-predictive-checks.html#joint-model-representation", + "title": "Posterior and Prior Predictive Checks", + "section": "", + "text": "Following Andrew Gelman, Meng, and Stern (1996), prior, posterior, and mixed replications may all be defined as posteriors from joint models over parameters and observed and replicated data.\n\n\nFor example, posterior predictive replication may be formulated using distribution notation as follows. \\[\\begin{eqnarray*}\n\\theta & \\sim & p(\\theta)\n\\\\[2pt]\ny & \\sim & p(y \\mid \\theta)\n\\\\[2pt]\ny^{\\textrm{rep}} & \\sim & p(y \\mid \\theta)\n\\end{eqnarray*}\\] The heavily overloaded distribution notation is meant to indicate that both \\(y\\) and \\(y^{\\textrm{rep}}\\) are drawn from the same distribution, or more formally using capital letters to distinguish random variables, that the conditional densities \\(p_{Y^{\\textrm{rep}} \\mid\n\\Theta}\\) and \\(p_{Y \\mid \\Theta}\\) are the same.\nThe joint density is \\[\np(\\theta, y, y^{\\textrm{rep}})\n= p(\\theta) \\cdot p(y \\mid \\theta) \\cdot p(y^{\\textrm{rep}} \\mid \\theta).\n\\] This again is assuming that the two distributions for \\(y\\) and \\(y^{\\textrm{rep}}\\) are identical.\nThe variable \\(y\\) is observed, with the predictive simulation \\(y^{\\textrm{rep}}\\) and parameter vector \\(\\theta\\) not observed. The posterior is \\(p(y^{\\textrm{rep}}, \\theta \\mid y)\\). Given draws from the posterior, the posterior predictive simulations \\(y^{\\textrm{rep}}\\) are retained.\n\n\n\nThe prior predictive model simply drops the data component of the posterior predictive model. \\[\\begin{eqnarray*}\n\\theta & \\sim & p(\\theta)\n\\\\[2pt]\ny^{\\textrm{rep}} & \\sim & p(y \\mid \\theta)\n\\end{eqnarray*}\\] This corresponds to the joint density \\[\np(\\theta, y^{\\textrm{rep}}) = p(\\theta) \\cdot p(y^{\\textrm{rep}} \\mid\n\\theta).\n\\]\nIt is typically straightforward to draw \\(\\theta\\) from the prior and \\(y^{\\textrm{rep}}\\) from the data model given \\(\\theta\\) efficiently. In cases where it is not, the model may be coded and executed just as the posterior predictive model, only with no data.\n\n\n\nThe mixed replication corresponds to the model \\[\\begin{eqnarray*}\n\\phi & \\sim & p(\\phi)\n\\\\[2pt]\n\\alpha & \\sim & p(\\alpha \\mid \\phi)\n\\\\[2pt]\ny & \\sim & p(y \\mid \\alpha)\n\\\\[2pt]\n\\alpha^{\\textrm{rep}} & \\sim & p(\\alpha \\mid \\phi)\n\\\\[2pt]\ny^{\\textrm{rep}} & \\sim & p(y \\mid \\phi)\n\\end{eqnarray*}\\] The notation here is meant to indicate that \\(\\alpha\\) and \\(\\alpha^{\\textrm{rep}}\\) have identical distributions, as do \\(y\\) and \\(y^{\\textrm{rep}}\\).\nThis corresponds to a joint model \\[\np(\\phi, \\alpha, \\alpha^{\\textrm{rep}}, y, y^{\\textrm{rep}})\n=\np(\\phi)\n\\cdot p(\\alpha \\mid \\phi)\n\\cdot p(y \\mid \\alpha)\n\\cdot p(\\alpha^{\\textrm{rep}} \\mid \\phi)\n\\cdot p(y^{\\textrm{rep}} \\mid \\alpha^{\\textrm{rep}}),\n\\] where \\(y\\) is the only observed variable, \\(\\alpha\\) contains the lower-level parameters and \\(\\phi\\) the hyperparameters. Note that \\(\\phi\\) is not replicated and instead appears in the distribution for both \\(\\alpha\\) and \\(\\alpha^{\\textrm{rep}}\\).\nThe posterior is \\(p(\\phi, \\alpha, \\alpha^{\\textrm{rep}},\ny^{\\textrm{rep}} \\mid y)\\). From posterior draws, the posterior predictive simulations \\(y^{\\textrm{rep}}\\) are kept.", + "crumbs": [ + "Stan Users Guide", + "Posterior Inference & Model Checking", + "Posterior and Prior Predictive Checks" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html", + "href": "stan-users-guide/problematic-posteriors.html", + "title": "Problematic Posteriors", + "section": "", + "text": "Mathematically speaking, with a proper posterior, one can do Bayesian inference and that’s that. There is not even a need to require a finite variance or even a finite mean—all that’s needed is a finite integral. Nevertheless, modeling is a tricky business and even experienced modelers sometimes code models that lead to improper priors. Furthermore, some posteriors are mathematically sound, but ill-behaved in practice. This chapter discusses issues in models that create problematic posterior inferences, either in general for Bayesian inference or in practice for Stan.\n\n\nThis section discusses problems related to the classical notion of identifiability, which lead to ridges in the posterior density and wreak havoc with both sampling and inference.\n\n\n\n\nThe first example of collinearity is an artificial example involving redundant intercept parameters.1\nSuppose there are observations \\(y_n\\) for \\(n \\in \\{1,\\dotsc,N\\}\\), two intercept parameters \\(\\lambda_1\\) and \\(\\lambda_2\\), a scale parameter \\(\\sigma > 0\\), and the data model \\[\ny_n \\sim \\textsf{normal}(\\lambda_1 + \\lambda_2, \\sigma).\n\\]\nFor any constant \\(q\\), the sampling density for \\(y\\) does not change if we add \\(q\\) to \\(\\lambda_1\\) and subtract it from \\(\\lambda_2\\), i.e., \\[\np(y \\mid \\lambda_1, \\lambda_2,\\sigma)\n=\np(y \\mid \\lambda_1 + q, \\lambda_2 - q, \\sigma).\n\\]\nThe consequence is that an improper uniform prior \\(p(\\mu,\\sigma)\n\\propto 1\\) leads to an improper posterior. This impropriety arises because the neighborhoods around \\(\\lambda_1 + q, \\lambda_2 - q\\) have the same mass no matter what \\(q\\) is. Therefore, a sampler would need to spend as much time in the neighborhood of \\(\\lambda_1=1\\,000\\,000\\,000\\) and \\(\\lambda_2=-1\\,000\\,000\\,000\\) as it does in the neighborhood of \\(\\lambda_1=0\\) and \\(\\lambda_2=0\\), and so on for ever more far-ranging values.\nThe marginal posterior \\(p(\\lambda_1,\\lambda_2 \\mid y)\\) for this model is thus improper.2\nThe impropriety shows up visually as a ridge in the posterior density, as illustrated in the left-hand plot. The ridge for this model is along the line where \\(\\lambda_2 = \\lambda_1 + c\\) for some constant \\(c\\).\nContrast this model with a simple regression with a single intercept parameter \\(\\mu\\) and data model \\[\ny_n \\sim \\textsf{normal}(\\mu,\\sigma).\n\\] Even with an improper prior, the posterior is proper as long as there are at least two data points \\(y_n\\) with distinct values.\n\n\n\nConsider an item-response theory model for students \\(j \\in 1{:}J\\) with abilities \\(\\alpha_j\\) and test items \\(i \\in 1{:}I\\) with difficulties \\(\\beta_i\\). The observed data are an \\(I \\times J\\) array with entries \\(y_{i, j} \\in \\{ 0, 1 \\}\\) coded such that \\(y_{i, j} = 1\\) indicates that student \\(j\\) answered question \\(i\\) correctly. The sampling distribution for the data is \\[\ny_{i, j} \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(\\alpha_j - \\beta_i)).\n\\]\nFor any constant \\(c\\), the probability of \\(y\\) is unchanged by adding a constant \\(c\\) to all the abilities and subtracting it from all the difficulties, i.e., \\[\np(y \\mid \\alpha, \\beta)\n=\np(y \\mid \\alpha + c, \\beta - c).\n\\]\nThis leads to a multivariate version of the ridge displayed by the regression with two intercepts discussed above.\n\n\n\nThe general form of the collinearity problem arises when predictors for a regression are collinear. For example, consider a linear regression data model \\[\ny_n \\sim \\textsf{normal}(x_n \\beta, \\sigma)\n\\] for an \\(N\\)-dimensional observation vector \\(y\\), an \\(N \\times K\\) predictor matrix \\(x\\), and a \\(K\\)-dimensional coefficient vector \\(\\beta\\).\nNow suppose that column \\(k\\) of the predictor matrix is a multiple of column \\(k'\\), i.e., there is some constant \\(c\\) such that \\(x_{n,k} = c\n\\, x_{n,k'}\\) for all \\(n\\). In this case, the coefficients \\(\\beta_k\\) and \\(\\beta_{k'}\\) can covary without changing the predictions, so that for any \\(d \\neq 0\\), \\[\np(y \\mid \\ldots, \\beta_k, \\dotsc, \\beta_{k'}, \\dotsc, \\sigma)\n=\np(y \\mid \\ldots, d \\beta_k, \\dotsc, \\frac{d}{c} \\, \\beta_{k'}, \\dotsc,\n\\sigma).\n\\]\nEven if columns of the predictor matrix are not exactly collinear as discussed above, they cause similar problems for inference if they are nearly collinear.\n\n\n\nConsider adding a discrimination parameter \\(\\delta_i\\) for each question in an IRT model, with data model \\[\ny_{i, j} \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(\\delta_i(\\alpha_j - \\beta_i))).\n\\] For any constant \\(c \\neq 0\\), multiplying \\(\\delta\\) by \\(c\\) and dividing \\(\\alpha\\) and \\(\\beta\\) by \\(c\\) produces the same likelihood, \\[\np(y \\mid \\delta,\\alpha,\\beta)\n= p(y \\mid c \\delta, \\frac{1}{c}\\alpha, \\frac{1}{c}\\beta).\n\\] If \\(c < 0\\), this switches the signs of every component in \\(\\alpha\\), \\(\\beta\\), and \\(\\delta\\) without changing the density.\n\n\n\nIn order to parameterize a \\(K\\)-simplex (i.e., a \\(K\\)-vector with non-negative values that sum to one), only \\(K - 1\\) parameters are necessary because the \\(K\\)th is just one minus the sum of the first \\(K\n- 1\\) parameters, so that if \\(\\theta\\) is a \\(K\\)-simplex, \\[\n\\theta_K = 1 - \\sum_{k=1}^{K-1} \\theta_k.\n\\]\nThe softmax function maps a \\(K\\)-vector \\(\\alpha\\) of linear predictors to a \\(K\\)-simplex \\(\\theta = \\texttt{softmax}(\\alpha)\\) by defining \\[\n\\theta_k = \\frac{\\exp(\\alpha_k)}{\\sum_{k'=1}^K \\exp(\\alpha_{k'})}.\n\\]\nThe softmax function is many-to-one, which leads to a lack of identifiability of the unconstrained parameters \\(\\alpha\\). In particular, adding or subtracting a constant from each \\(\\alpha_k\\) produces the same simplex \\(\\theta\\).\n\n\n\n\nAll of the examples discussed in the previous section allow translation or scaling of parameters while leaving the data probability density invariant. These problems can be mitigated in several ways.\n\n\nIn the case of the multiple intercepts, \\(\\lambda_1\\) and \\(\\lambda_2\\), the simplest solution is to remove the redundant intercept, resulting in a model with a single intercept parameter \\(\\mu\\) and sampling distribution \\(y_n \\sim \\textsf{normal}(\\mu, \\sigma)\\). The same solution works for solving the problem with collinearity—just remove one of the columns of the predictor matrix \\(x\\).\n\n\n\nThe IRT model without a discrimination parameter can be fixed by pinning one of its parameters to a fixed value, typically 0. For example, the first student ability \\(\\alpha_1\\) can be fixed to 0. Now all other student ability parameters can be interpreted as being relative to student 1. Similarly, the difficulty parameters are interpretable relative to student 1’s ability to answer them.\nThis solution is not sufficient to deal with the multiplicative invariance introduced by the question discrimination parameters \\(\\delta_i\\). To solve this problem, one of the discrimination parameters, say \\(\\delta_1\\), must also be constrained. Because it’s a multiplicative and not an additive invariance, it must be constrained to a non-zero value, with 1 being a convenient choice. Now all of the discrimination parameters may be interpreted relative to item 1’s discrimination.\nThe many-to-one nature of \\(\\texttt{softmax}(\\alpha)\\) is typically mitigated by pinning a component of \\(\\alpha\\), for instance fixing \\(\\alpha_K = 0\\). The resulting mapping is one-to-one from \\(K-1\\) unconstrained parameters to a \\(K\\)-simplex. This is roughly how simplex-constrained parameters are defined in Stan; see the reference manual chapter on constrained parameter transforms for a precise definition. The Stan code for creating a simplex from a \\(K-1\\)-vector can be written as\nvector softmax_id(vector alpha) {\n vector[num_elements(alpha) + 1] alphac1;\n for (k in 1:num_elements(alpha)) {\n alphac1[k] = alpha[k];\n }\n alphac1[num_elements(alphac1)] = 0;\n return softmax(alphac1);\n}\n\n\n\nSo far, the models have been discussed as if the priors on the parameters were improper uniform priors.\nA more general Bayesian solution to these invariance problems is to impose proper priors on the parameters. This approach can be used to solve problems arising from either additive or multiplicative invariance.\nFor example, normal priors on the multiple intercepts, \\[\n\\lambda_1, \\lambda_2 \\sim \\textsf{normal}(0,\\tau),\n\\] with a constant scale \\(\\tau\\), ensure that the posterior mode is located at a point where \\(\\lambda_1 = \\lambda_2\\), because this minimizes \\(\\log \\textsf{normal}(\\lambda_1 \\mid 0,\\tau) + \\log\n\\textsf{normal}(\\lambda_2 \\mid 0,\\tau)\\).3\nThe following plots show the posteriors for two intercept parameterization without prior, two intercept parameterization with standard normal prior, and one intercept reparameterization without prior. For all three cases, the posterior is plotted for 100 data points drawn from a standard normal.\nThe two intercept parameterization leads to an improper prior with a ridge extending infinitely to the northwest and southeast.\n\n\n\nTwo intercepts with improper prior\n\n\nAdding a standard normal prior for the intercepts results in a proper posterior.\n\n\n\nTwo intercepts with proper prior\n\n\nThe single intercept parameterization with no prior also has a proper posterior.\n\n\n\nSingle intercepts with improper prior\n\n\nThe addition of a prior to the two intercepts model is shown in the second plot; the final plot shows the result of reparameterizing to a single intercept.\nAn alternative strategy for identifying a \\(K\\)-simplex parameterization \\(\\theta = \\texttt{softmax}(\\alpha)\\) in terms of an unconstrained \\(K\\)-vector \\(\\alpha\\) is to place a prior on the components of \\(\\alpha\\) with a fixed location (that is, specifically avoid hierarchical priors with varying location). Unlike the approaching of pinning \\(\\alpha_K =\n0\\), the prior-based approach models the \\(K\\) outcomes symmetrically rather than modeling \\(K-1\\) outcomes relative to the \\(K\\)-th. The pinned parameterization, on the other hand, is usually more efficient statistically because it does not have the extra degree of (prior constrained) wiggle room.\n\n\n\nCare must be used when adding a prior to resolve invariances. If the prior is taken to be too broad (i.e., too vague), the resolution is in theory only, and samplers will still struggle.\nIdeally, a realistic prior will be formulated based on substantive knowledge of the problem being modeled. Such a prior can be chosen to have the appropriate strength based on prior knowledge. A strongly informative prior makes sense if there is strong prior information.\nWhen there is not strong prior information, a weakly informative prior strikes the proper balance between controlling computational inference without dominating the data in the posterior. In most problems, the modeler will have at least some notion of the expected scale of the estimates and be able to choose a prior for identification purposes that does not dominate the data, but provides sufficient computational control on the posterior.\nPriors can also be used in the same way to control the additive invariance of the IRT model. A typical approach is to place a strong prior on student ability parameters \\(\\alpha\\) to control scale simply to control the additive invariance of the basic IRT model and the multiplicative invariance of the model extended with a item discrimination parameters; such a prior does not add any prior knowledge to the problem. Then a prior on item difficulty can be chosen that is either informative or weakly informative based on prior knowledge of the problem.\n\n\n\n\n\nWhere collinearity in regression models can lead to infinitely many posterior maxima, swapping components in a mixture model leads to finitely many posterior maxima.\n\n\nConsider a normal mixture model with two location parameters \\(\\mu_1\\) and \\(\\mu_2\\), a shared scale \\(\\sigma > 0\\), a mixture ratio \\(\\theta \\in\n[0,1]\\), and data model \\[\np(y \\mid \\theta,\\mu_1,\\mu_2,\\sigma)\n= \\prod_{n=1}^N \\big( \\theta \\, \\textsf{normal}(y_n \\mid \\mu_1,\\sigma)\n + (1 - \\theta) \\, \\textsf{normal}(y_n \\mid \\mu_2,\\sigma) \\big).\n\\] The issue here is exchangeability of the mixture components, because \\[\np(\\theta,\\mu_1,\\mu_2,\\sigma \\mid y) = p\\big((1-\\theta),\\mu_2,\\mu_1,\\sigma \\mid y\\big).\n\\] The problem is exacerbated as the number of mixture components \\(K\\) grows, as in clustering models, leading to \\(K!\\) identical posterior maxima.\n\n\n\nThe analysis of posterior convergence and effective sample size is also difficult for mixture models. For example, the \\(\\hat{R}\\) convergence statistic reported by Stan and the computation of effective sample size are both compromised by label switching. The problem is that the posterior mean, a key ingredient in these computations, is affected by label switching, resulting in a posterior mean for \\(\\mu_1\\) that is equal to that of \\(\\mu_2\\), and a posterior mean for \\(\\theta\\) that is always 1/2, no matter what the data are.\n\n\n\nIn some sense, the index (or label) of a mixture component is irrelevant. Posterior predictive inferences can still be carried out without identifying mixture components. For example, the log probability of a new observation does not depend on the identities of the mixture components. The only sound Bayesian inferences in such models are those that are invariant to label switching. Posterior means for the parameters are meaningless because they are not invariant to label switching; for example, the posterior mean for \\(\\theta\\) in the two component mixture model will always be 1/2.\n\n\n\nTheoretically, this should not present a problem for inference because all of the integrals involved in posterior predictive inference will be well behaved. The problem in practice is computation.\nBeing able to carry out such invariant inferences in practice is an altogether different matter. It is almost always intractable to find even a single posterior mode, much less balance the exploration of the neighborhoods of multiple local maxima according to the probability masses. In Gibbs sampling, it is unlikely for \\(\\mu_1\\) to move to a new mode when sampled conditioned on the current values of \\(\\mu_2\\) and \\(\\theta\\). For HMC and NUTS, the problem is that the sampler gets stuck in one of the two “bowls” around the modes and cannot gather enough energy from random momentum assignment to move from one mode to another.\nEven with a proper posterior, all known sampling and inference techniques are notoriously ineffective when the number of modes grows super-exponentially as it does for mixture models with increasing numbers of components.\n\n\n\nSeveral hacks (i.e., “tricks”) have been suggested and employed to deal with the problems posed by label switching in practice.\n\n\nOne common strategy is to impose a constraint on the parameters that identifies the components. For instance, we might consider constraining \\(\\mu_1 < \\mu_2\\) in the two-component normal mixture model discussed above. A problem that can arise from such an approach is when there is substantial probability mass for the opposite ordering \\(\\mu_1 > \\mu_2\\). In these cases, the posteriors are affected by the constraint and true posterior uncertainty in \\(\\mu_1\\) and \\(\\mu_2\\) is not captured by the model with the constraint. In addition, standard approaches to posterior inference for event probabilities is compromised. For instance, attempting to use \\(M\\) posterior draws to estimate \\(\\Pr[\\mu_1 > \\mu_2]\\), will fail, because the estimator \\[\n\\Pr[\\mu_1 > \\mu_2]\n\\approx\n\\sum_{m=1}^M \\textrm{I}\\left(\\mu_1^{(m)} > \\mu_2^{(m)}\\right)\n\\] will result in an estimate of 0 because the posterior respects the constraint in the model.\n\n\n\nAnother common approach is to run a single chain or to initialize the parameters near realistic values.4\nThis can work better than the hard constraint approach if reasonable initial values can be found and the labels do not switch within a Markov chain. The result is that all chains are glued to a neighborhood of a particular mode in the posterior.\n\n\n\n\n\nIt is possible for two mixture components in a mixture model to collapse to the same values during sampling or optimization. For example, a mixture of \\(K\\) normals might devolve to have \\(\\mu_i =\n\\mu_j\\) and \\(\\sigma_i = \\sigma_j\\) for \\(i \\neq j\\).\nThis will typically happen early in sampling due to initialization in MCMC or optimization or arise from random movement during MCMC. Once the parameters match for a given draw \\((m)\\), it can become hard to escape because there can be a trough of low-density mass between the current parameter values and the ones without collapsed components.\nIt may help to use a smaller step size during warmup, a stronger prior on each mixture component’s membership responsibility. A more extreme measure is to include additional mixture components to deal with the possibility that some of them may collapse.\nIn general, it is difficult to recover exactly the right \\(K\\) mixture components in a mixture model as \\(K\\) increases beyond one (yes, even a two-component mixture can have this problem).\n\n\n\nIn some cases, the posterior density grows without bounds as parameters approach certain poles or boundaries. In such, there are no posterior modes and numerical stability issues can arise as sampled parameters approach constraint boundaries.\n\n\nOne such example is a binary mixture model with scales varying by component, \\(\\sigma_1\\) and \\(\\sigma_2\\) for locations \\(\\mu_1\\) and \\(\\mu_2\\). In this situation, the density grows without bound as \\(\\sigma_1 \\rightarrow 0\\) and \\(\\mu_1 \\rightarrow y_n\\) for some \\(n\\); that is, one of the mixture components concentrates all of its mass around a single data item \\(y_n\\).\n\n\n\nAnother example of unbounded densities arises with a posterior such as \\(\\textsf{beta}(\\phi \\mid 0.5,0.5)\\), which can arise if seemingly weak beta priors are used for groups that have no data. This density is unbounded as \\(\\phi \\rightarrow 0\\) and \\(\\phi \\rightarrow 1\\). Similarly, a Bernoulli data model coupled with a “weak” beta prior, leads to a posterior \\[\\begin{align*}\np(\\phi \\mid y)\n&\\propto\n \\textsf{beta}(\\phi \\mid 0.5,0.5) \\times \\prod_{n=1}^N \\textsf{Bernoulli}(y_n \\mid \\phi) \\\\\n&=\n \\textsf{beta}\\left(\\phi \\,\\middle|\\, 0.5 + \\sum_{n=1}^N y_n, 0.5 + N - \\sum_{n=1}^N y_n\\right).\n\\end{align*}\\]\nIf \\(N = 9\\) and each \\(y_n = 1\\), the posterior is \\(\\textsf{beta}(\\phi \\mid 9.5,0.5)\\). This posterior is unbounded as \\(\\phi\n\\rightarrow 1\\). Nevertheless, the posterior is proper, and although there is no posterior mode, the posterior mean is well-defined with a value of exactly 0.95.\n\n\nStan does not sample directly on the constrained \\((0,1)\\) space for this problem, so it doesn’t directly deal with unconstrained density values. Rather, the probability values \\(\\phi\\) are logit-transformed to \\((-\\infty,\\infty)\\). The boundaries at 0 and 1 are pushed out to \\(-\\infty\\) and \\(\\infty\\) respectively. The Jacobian adjustment that Stan automatically applies ensures the unconstrained density is proper. The adjustment for the particular case of \\((0,1)\\) is \\(\\log\n\\operatorname{logit}^{-1}(\\phi) + \\log \\operatorname{logit}(1 - \\phi)\\).\nThere are two problems that still arise, though. The first is that if the posterior mass for \\(\\phi\\) is near one of the boundaries, the logit-transformed parameter will have to sweep out long paths and thus can dominate the U-turn condition imposed by the no-U-turn sampler (NUTS). The second issue is that the inverse transform from the unconstrained space to the constrained space can underflow to 0 or overflow to 1, even when the unconstrained parameter is not infinite. Similar problems arise for the expectation terms in logistic regression, which is why the logit-scale parameterizations of the Bernoulli and binomial distributions are more stable.\n\n\n\n\n\nIn some cases, the posterior density will not grow without bound, but parameters will grow without bound with gradually increasing density values. Like the models discussed in the previous section that have densities that grow without bound, such models also have no posterior modes.\n\n\nConsider a logistic regression model with \\(N\\) observed outcomes \\(y_n\n\\in \\{ 0, 1 \\}\\), an \\(N \\times K\\) matrix \\(x\\) of predictors, a \\(K\\)-dimensional coefficient vector \\(\\beta\\), and data model \\[\ny_n \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(x_n \\beta)).\n\\] Now suppose that column \\(k\\) of the predictor matrix is such that \\(x_{n,k} > 0\\) if and only if \\(y_n = 1\\), a condition known as “separability.” In this case, predictive accuracy on the observed data continue to improve as \\(\\beta_k \\rightarrow \\infty\\), because for cases with \\(y_n = 1\\), \\(x_n \\beta \\rightarrow \\infty\\) and hence \\(\\operatorname{logit}^{-1}(x_n \\beta) \\rightarrow 1\\).\nWith separability, there is no maximum to the likelihood and hence no maximum likelihood estimate. From the Bayesian perspective, the posterior is improper and therefore the marginal posterior mean for \\(\\beta_k\\) is also not defined. The usual solution to this problem in Bayesian models is to include a proper prior for \\(\\beta\\), which ensures a proper posterior.\n\n\n\n\nSuppose your model includes a parameter \\(\\psi\\) that is defined on \\([0,1]\\) and is given a flat prior \\(\\textsf{uniform}(\\psi \\mid 0,1)\\). Now if the data don’t tell us anything about \\(\\psi\\), the posterior is also \\(\\textsf{uniform}(\\psi \\mid 0,1)\\).\nAlthough there is no maximum likelihood estimate for \\(\\psi\\), the posterior is uniform over a closed interval and hence proper. In the case of a uniform posterior on \\([0,1]\\), the posterior mean for \\(\\psi\\) is well-defined with value \\(1/2\\). Although there is no posterior mode, posterior predictive inference may nevertheless do the right thing by simply integrating (i.e., averaging) over the predictions for \\(\\psi\\) at all points in \\([0,1]\\).\n\n\n\nWith an improper posterior, it is theoretically impossible to properly explore the posterior. However, Gibbs sampling as performed by BUGS and JAGS, although still unable to properly sample from such an improper posterior, behaves differently in practice than the Hamiltonian Monte Carlo sampling performed by Stan when faced with an example such as the two intercept model discussed in the collinearity section and illustrated in the non-identifiable density plot.\n\n\nGibbs sampling, as performed by BUGS and JAGS, may appear to be efficient and well behaved for this unidentified model, but as discussed in the previous subsection, will not actually explore the posterior properly.\nConsider what happens with initial values \\(\\lambda_1^{(0)}, \\lambda_2^{(0)}\\). Gibbs sampling proceeds in iteration \\(m\\) by drawing \\[\\begin{align*}\n\\lambda_1^{(m)} &\\sim p(\\lambda_1 \\mid \\lambda_2^{(m-1)}, \\sigma^{(m-1)}, y) \\\\\n\\lambda_2^{(m)} &\\sim p(\\lambda_2 \\mid \\lambda_1^{(m)}, \\sigma^{(m-1)}, y) \\\\\n\\sigma^{(m)} &\\sim p(\\sigma \\mid \\lambda_1^{(m)}, \\lambda_2^{(m)}, y).\n\\end{align*}\\]\nNow consider the draw for \\(\\lambda_1\\) (the draw for \\(\\lambda_2\\) is symmetric), which is conjugate in this model and thus can be done efficiently. In this model, the range from which the next \\(\\lambda_1\\) can be drawn is highly constrained by the current values of \\(\\lambda_2\\) and \\(\\sigma\\). Gibbs will run quickly and provide seemingly reasonable inferences for \\(\\lambda_1 + \\lambda_2\\). But it will not explore the full range of the posterior; it will merely take a slow random walk from the initial values. This random walk behavior is typical of Gibbs sampling when posteriors are highly correlated and the primary reason to prefer Hamiltonian Monte Carlo to Gibbs sampling for models with parameters correlated in the posterior.\n\n\n\nHamiltonian Monte Carlo (HMC), as performed by Stan, is much more efficient at exploring posteriors in models where parameters are correlated in the posterior. In this particular example, the Hamiltonian dynamics (i.e., the motion of a fictitious particle given random momentum in the field defined by the negative log posterior) is going to run up and down along the valley defined by the potential energy (ridges in log posteriors correspond to valleys in potential energy). In practice, even with a random momentum for \\(\\lambda_1\\) and \\(\\lambda_2\\), the gradient of the log posterior is going to adjust for the correlation and the simulation will run \\(\\lambda_1\\) and \\(\\lambda_2\\) in opposite directions along the valley corresponding to the ridge in the posterior log density.\n\n\n\nStan’s default no-U-turn sampler (NUTS), is even more efficient at exploring the posterior (see Hoffman and Gelman 2014). NUTS simulates the motion of the fictitious particle representing the parameter values until it makes a U-turn, it will be defeated in most cases, as it will just move down the potential energy valley indefinitely without making a U-turn. What happens in practice is that the maximum number of leapfrog steps in the simulation will be hit in many of the iterations, causing a large number of log probability and gradient evaluations (1000 if the max tree depth is set to 10, as in the default). Thus sampling will appear to be slow. This is indicative of an improper posterior, not a bug in the NUTS algorithm or its implementation. It is simply not possible to sample from an improper posterior! Thus the behavior of HMC in general and NUTS in particular should be reassuring in that it will clearly fail in cases of improper posteriors, resulting in a clean diagnostic of sweeping out large paths in the posterior.\nHere are results of Stan runs with default parameters fit to \\(N=100\\) data points generated from \\(y_n \\sim \\textsf{normal}(0,1)\\):\nTwo Scale Parameters, Improper Prior\nInference for Stan model: improper_stan\nWarmup took (2.7, 2.6, 2.9, 2.9) seconds, 11 seconds total\nSampling took (3.4, 3.7, 3.6, 3.4) seconds, 14 seconds total\n\n Mean MCSE StdDev 5% 95% N_Eff N_Eff/s R_hat\nlp__ -5.3e+01 7.0e-02 8.5e-01 -5.5e+01 -5.3e+01 150 11 1.0\nn_leapfrog__ 1.4e+03 1.7e+01 9.2e+02 3.0e+00 2.0e+03 2987 212 1.0\nlambda1 1.3e+03 1.9e+03 2.7e+03 -2.3e+03 6.0e+03 2.1 0.15 5.2\nlambda2 -1.3e+03 1.9e+03 2.7e+03 -6.0e+03 2.3e+03 2.1 0.15 5.2\nsigma 1.0e+00 8.5e-03 6.2e-02 9.5e-01 1.2e+00 54 3.9 1.1\nmu 1.6e-01 1.9e-03 1.0e-01 -8.3e-03 3.3e-01 2966 211 1.0\nTwo Scale Parameters, Weak Prior\nWarmup took (0.40, 0.44, 0.40, 0.36) seconds, 1.6 seconds total\nSampling took (0.47, 0.40, 0.47, 0.39) seconds, 1.7 seconds total\n\n Mean MCSE StdDev 5% 95% N_Eff N_Eff/s R_hat\nlp__ -54 4.9e-02 1.3e+00 -5.7e+01 -53 728 421 1.0\nn_leapfrog__ 157 2.8e+00 1.5e+02 3.0e+00 511 3085 1784 1.0\nlambda1 0.31 2.8e-01 7.1e+00 -1.2e+01 12 638 369 1.0\nlambda2 -0.14 2.8e-01 7.1e+00 -1.2e+01 12 638 369 1.0\nsigma 1.0 2.6e-03 8.0e-02 9.2e-01 1.2 939 543 1.0\nmu 0.16 1.8e-03 1.0e-01 -8.1e-03 0.33 3289 1902 1.0\nOne Scale Parameter, Improper Prior\nWarmup took (0.011, 0.012, 0.011, 0.011) seconds, 0.044 seconds total\nSampling took (0.017, 0.020, 0.020, 0.019) seconds, 0.077 seconds total\n\n Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat\nlp__ -54 2.5e-02 0.91 -5.5e+01 -53 -53 1318 17198 1.0\nn_leapfrog__ 3.2 2.7e-01 1.7 1.0e+00 3.0 7.0 39 507 1.0\nmu 0.17 2.1e-03 0.10 -3.8e-03 0.17 0.33 2408 31417 1.0\nsigma 1.0 1.6e-03 0.071 9.3e-01 1.0 1.2 2094 27321 1.0\nOn the top is the non-identified model with improper uniform priors and data model \\(y_n \\sim \\textsf{normal}(\\lambda_1 + \\lambda_2,\n\\sigma)\\).\nIn the middle is the same data model as in top plus priors \\(\\lambda_k \\sim \\textsf{normal}(0,10)\\).\nOn the bottom is an identified model with an improper prior, with data model \\(y_n \\sim \\textsf{normal}(\\mu,\\sigma)\\). All models estimate \\(\\mu\\) at roughly 0.16 with low Monte Carlo standard error, but a high posterior standard deviation of 0.1; the true value \\(\\mu=0\\) is within the 90% posterior intervals in all three models.\n\n\n\nTo illustrate the issues with sampling from non-identified and only weakly identified models, we fit three models with increasing degrees of identification of their parameters. The posteriors for these models is illustrated in the non-identifiable density plot. The first model is the unidentified model with two location parameters and no priors discussed in the collinearity section.\ndata {\n int N;\n array[N] real y;\n}\nparameters {\n real lambda1;\n real lambda2;\n real<lower=0> sigma;\n}\ntransformed parameters {\n real mu;\n mu = lambda1 + lambda2;\n}\nmodel {\n y ~ normal(mu, sigma);\n}\nThe second adds priors to the model block for lambda1 and lambda2 to the previous model.\nlambda1 ~ normal(0, 10);\nlambda2 ~ normal(0, 10);\nThe third involves a single location parameter, but no priors.\ndata {\n int N;\n array[N] real y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(mu, sigma);\n}\nAll three of the example models were fit in Stan 2.1.0 with default parameters (1000 warmup iterations, 1000 sampling iterations, NUTS sampler with max tree depth of 10). The results are shown in the non-identified fits figure. The key statistics from these outputs are the following.\n\nAs indicated by R_hat column, all parameters have converged other than \\(\\lambda_1\\) and \\(\\lambda_2\\) in the non-identified model.\nThe average number of leapfrog steps is roughly 3 in the identified model, 150 in the model identified by a weak prior, and 1400 in the non-identified model.\nThe effective sample size per second for \\(\\mu\\) is roughly 31,000 in the identified model, 1,900 in the model identified with weakly informative priors, and 200 in the non-identified model; the results are similar for \\(\\sigma\\).\nIn the non-identified model, the 95% interval for \\(\\lambda_1\\) is (-2300,6000), whereas it is only (-12,12) in the model identified with weakly informative priors.\nIn all three models, the simulated value of \\(\\mu=0\\) and \\(\\sigma=1\\) are well within the posterior 90% intervals.\n\nThe first two points, lack of convergence and hitting the maximum number of leapfrog steps (equivalently maximum tree depth) are indicative of improper posteriors. Thus rather than covering up the problem with poor sampling as may be done with Gibbs samplers, Hamiltonian Monte Carlo tries to explore the posterior and its failure is a clear indication that something is amiss in the model.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#collinearity.section", + "href": "stan-users-guide/problematic-posteriors.html#collinearity.section", + "title": "Problematic Posteriors", + "section": "", + "text": "This section discusses problems related to the classical notion of identifiability, which lead to ridges in the posterior density and wreak havoc with both sampling and inference.\n\n\n\n\nThe first example of collinearity is an artificial example involving redundant intercept parameters.1\nSuppose there are observations \\(y_n\\) for \\(n \\in \\{1,\\dotsc,N\\}\\), two intercept parameters \\(\\lambda_1\\) and \\(\\lambda_2\\), a scale parameter \\(\\sigma > 0\\), and the data model \\[\ny_n \\sim \\textsf{normal}(\\lambda_1 + \\lambda_2, \\sigma).\n\\]\nFor any constant \\(q\\), the sampling density for \\(y\\) does not change if we add \\(q\\) to \\(\\lambda_1\\) and subtract it from \\(\\lambda_2\\), i.e., \\[\np(y \\mid \\lambda_1, \\lambda_2,\\sigma)\n=\np(y \\mid \\lambda_1 + q, \\lambda_2 - q, \\sigma).\n\\]\nThe consequence is that an improper uniform prior \\(p(\\mu,\\sigma)\n\\propto 1\\) leads to an improper posterior. This impropriety arises because the neighborhoods around \\(\\lambda_1 + q, \\lambda_2 - q\\) have the same mass no matter what \\(q\\) is. Therefore, a sampler would need to spend as much time in the neighborhood of \\(\\lambda_1=1\\,000\\,000\\,000\\) and \\(\\lambda_2=-1\\,000\\,000\\,000\\) as it does in the neighborhood of \\(\\lambda_1=0\\) and \\(\\lambda_2=0\\), and so on for ever more far-ranging values.\nThe marginal posterior \\(p(\\lambda_1,\\lambda_2 \\mid y)\\) for this model is thus improper.2\nThe impropriety shows up visually as a ridge in the posterior density, as illustrated in the left-hand plot. The ridge for this model is along the line where \\(\\lambda_2 = \\lambda_1 + c\\) for some constant \\(c\\).\nContrast this model with a simple regression with a single intercept parameter \\(\\mu\\) and data model \\[\ny_n \\sim \\textsf{normal}(\\mu,\\sigma).\n\\] Even with an improper prior, the posterior is proper as long as there are at least two data points \\(y_n\\) with distinct values.\n\n\n\nConsider an item-response theory model for students \\(j \\in 1{:}J\\) with abilities \\(\\alpha_j\\) and test items \\(i \\in 1{:}I\\) with difficulties \\(\\beta_i\\). The observed data are an \\(I \\times J\\) array with entries \\(y_{i, j} \\in \\{ 0, 1 \\}\\) coded such that \\(y_{i, j} = 1\\) indicates that student \\(j\\) answered question \\(i\\) correctly. The sampling distribution for the data is \\[\ny_{i, j} \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(\\alpha_j - \\beta_i)).\n\\]\nFor any constant \\(c\\), the probability of \\(y\\) is unchanged by adding a constant \\(c\\) to all the abilities and subtracting it from all the difficulties, i.e., \\[\np(y \\mid \\alpha, \\beta)\n=\np(y \\mid \\alpha + c, \\beta - c).\n\\]\nThis leads to a multivariate version of the ridge displayed by the regression with two intercepts discussed above.\n\n\n\nThe general form of the collinearity problem arises when predictors for a regression are collinear. For example, consider a linear regression data model \\[\ny_n \\sim \\textsf{normal}(x_n \\beta, \\sigma)\n\\] for an \\(N\\)-dimensional observation vector \\(y\\), an \\(N \\times K\\) predictor matrix \\(x\\), and a \\(K\\)-dimensional coefficient vector \\(\\beta\\).\nNow suppose that column \\(k\\) of the predictor matrix is a multiple of column \\(k'\\), i.e., there is some constant \\(c\\) such that \\(x_{n,k} = c\n\\, x_{n,k'}\\) for all \\(n\\). In this case, the coefficients \\(\\beta_k\\) and \\(\\beta_{k'}\\) can covary without changing the predictions, so that for any \\(d \\neq 0\\), \\[\np(y \\mid \\ldots, \\beta_k, \\dotsc, \\beta_{k'}, \\dotsc, \\sigma)\n=\np(y \\mid \\ldots, d \\beta_k, \\dotsc, \\frac{d}{c} \\, \\beta_{k'}, \\dotsc,\n\\sigma).\n\\]\nEven if columns of the predictor matrix are not exactly collinear as discussed above, they cause similar problems for inference if they are nearly collinear.\n\n\n\nConsider adding a discrimination parameter \\(\\delta_i\\) for each question in an IRT model, with data model \\[\ny_{i, j} \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(\\delta_i(\\alpha_j - \\beta_i))).\n\\] For any constant \\(c \\neq 0\\), multiplying \\(\\delta\\) by \\(c\\) and dividing \\(\\alpha\\) and \\(\\beta\\) by \\(c\\) produces the same likelihood, \\[\np(y \\mid \\delta,\\alpha,\\beta)\n= p(y \\mid c \\delta, \\frac{1}{c}\\alpha, \\frac{1}{c}\\beta).\n\\] If \\(c < 0\\), this switches the signs of every component in \\(\\alpha\\), \\(\\beta\\), and \\(\\delta\\) without changing the density.\n\n\n\nIn order to parameterize a \\(K\\)-simplex (i.e., a \\(K\\)-vector with non-negative values that sum to one), only \\(K - 1\\) parameters are necessary because the \\(K\\)th is just one minus the sum of the first \\(K\n- 1\\) parameters, so that if \\(\\theta\\) is a \\(K\\)-simplex, \\[\n\\theta_K = 1 - \\sum_{k=1}^{K-1} \\theta_k.\n\\]\nThe softmax function maps a \\(K\\)-vector \\(\\alpha\\) of linear predictors to a \\(K\\)-simplex \\(\\theta = \\texttt{softmax}(\\alpha)\\) by defining \\[\n\\theta_k = \\frac{\\exp(\\alpha_k)}{\\sum_{k'=1}^K \\exp(\\alpha_{k'})}.\n\\]\nThe softmax function is many-to-one, which leads to a lack of identifiability of the unconstrained parameters \\(\\alpha\\). In particular, adding or subtracting a constant from each \\(\\alpha_k\\) produces the same simplex \\(\\theta\\).\n\n\n\n\nAll of the examples discussed in the previous section allow translation or scaling of parameters while leaving the data probability density invariant. These problems can be mitigated in several ways.\n\n\nIn the case of the multiple intercepts, \\(\\lambda_1\\) and \\(\\lambda_2\\), the simplest solution is to remove the redundant intercept, resulting in a model with a single intercept parameter \\(\\mu\\) and sampling distribution \\(y_n \\sim \\textsf{normal}(\\mu, \\sigma)\\). The same solution works for solving the problem with collinearity—just remove one of the columns of the predictor matrix \\(x\\).\n\n\n\nThe IRT model without a discrimination parameter can be fixed by pinning one of its parameters to a fixed value, typically 0. For example, the first student ability \\(\\alpha_1\\) can be fixed to 0. Now all other student ability parameters can be interpreted as being relative to student 1. Similarly, the difficulty parameters are interpretable relative to student 1’s ability to answer them.\nThis solution is not sufficient to deal with the multiplicative invariance introduced by the question discrimination parameters \\(\\delta_i\\). To solve this problem, one of the discrimination parameters, say \\(\\delta_1\\), must also be constrained. Because it’s a multiplicative and not an additive invariance, it must be constrained to a non-zero value, with 1 being a convenient choice. Now all of the discrimination parameters may be interpreted relative to item 1’s discrimination.\nThe many-to-one nature of \\(\\texttt{softmax}(\\alpha)\\) is typically mitigated by pinning a component of \\(\\alpha\\), for instance fixing \\(\\alpha_K = 0\\). The resulting mapping is one-to-one from \\(K-1\\) unconstrained parameters to a \\(K\\)-simplex. This is roughly how simplex-constrained parameters are defined in Stan; see the reference manual chapter on constrained parameter transforms for a precise definition. The Stan code for creating a simplex from a \\(K-1\\)-vector can be written as\nvector softmax_id(vector alpha) {\n vector[num_elements(alpha) + 1] alphac1;\n for (k in 1:num_elements(alpha)) {\n alphac1[k] = alpha[k];\n }\n alphac1[num_elements(alphac1)] = 0;\n return softmax(alphac1);\n}\n\n\n\nSo far, the models have been discussed as if the priors on the parameters were improper uniform priors.\nA more general Bayesian solution to these invariance problems is to impose proper priors on the parameters. This approach can be used to solve problems arising from either additive or multiplicative invariance.\nFor example, normal priors on the multiple intercepts, \\[\n\\lambda_1, \\lambda_2 \\sim \\textsf{normal}(0,\\tau),\n\\] with a constant scale \\(\\tau\\), ensure that the posterior mode is located at a point where \\(\\lambda_1 = \\lambda_2\\), because this minimizes \\(\\log \\textsf{normal}(\\lambda_1 \\mid 0,\\tau) + \\log\n\\textsf{normal}(\\lambda_2 \\mid 0,\\tau)\\).3\nThe following plots show the posteriors for two intercept parameterization without prior, two intercept parameterization with standard normal prior, and one intercept reparameterization without prior. For all three cases, the posterior is plotted for 100 data points drawn from a standard normal.\nThe two intercept parameterization leads to an improper prior with a ridge extending infinitely to the northwest and southeast.\n\n\n\nTwo intercepts with improper prior\n\n\nAdding a standard normal prior for the intercepts results in a proper posterior.\n\n\n\nTwo intercepts with proper prior\n\n\nThe single intercept parameterization with no prior also has a proper posterior.\n\n\n\nSingle intercepts with improper prior\n\n\nThe addition of a prior to the two intercepts model is shown in the second plot; the final plot shows the result of reparameterizing to a single intercept.\nAn alternative strategy for identifying a \\(K\\)-simplex parameterization \\(\\theta = \\texttt{softmax}(\\alpha)\\) in terms of an unconstrained \\(K\\)-vector \\(\\alpha\\) is to place a prior on the components of \\(\\alpha\\) with a fixed location (that is, specifically avoid hierarchical priors with varying location). Unlike the approaching of pinning \\(\\alpha_K =\n0\\), the prior-based approach models the \\(K\\) outcomes symmetrically rather than modeling \\(K-1\\) outcomes relative to the \\(K\\)-th. The pinned parameterization, on the other hand, is usually more efficient statistically because it does not have the extra degree of (prior constrained) wiggle room.\n\n\n\nCare must be used when adding a prior to resolve invariances. If the prior is taken to be too broad (i.e., too vague), the resolution is in theory only, and samplers will still struggle.\nIdeally, a realistic prior will be formulated based on substantive knowledge of the problem being modeled. Such a prior can be chosen to have the appropriate strength based on prior knowledge. A strongly informative prior makes sense if there is strong prior information.\nWhen there is not strong prior information, a weakly informative prior strikes the proper balance between controlling computational inference without dominating the data in the posterior. In most problems, the modeler will have at least some notion of the expected scale of the estimates and be able to choose a prior for identification purposes that does not dominate the data, but provides sufficient computational control on the posterior.\nPriors can also be used in the same way to control the additive invariance of the IRT model. A typical approach is to place a strong prior on student ability parameters \\(\\alpha\\) to control scale simply to control the additive invariance of the basic IRT model and the multiplicative invariance of the model extended with a item discrimination parameters; such a prior does not add any prior knowledge to the problem. Then a prior on item difficulty can be chosen that is either informative or weakly informative based on prior knowledge of the problem.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#label-switching-problematic.section", + "href": "stan-users-guide/problematic-posteriors.html#label-switching-problematic.section", + "title": "Problematic Posteriors", + "section": "", + "text": "Where collinearity in regression models can lead to infinitely many posterior maxima, swapping components in a mixture model leads to finitely many posterior maxima.\n\n\nConsider a normal mixture model with two location parameters \\(\\mu_1\\) and \\(\\mu_2\\), a shared scale \\(\\sigma > 0\\), a mixture ratio \\(\\theta \\in\n[0,1]\\), and data model \\[\np(y \\mid \\theta,\\mu_1,\\mu_2,\\sigma)\n= \\prod_{n=1}^N \\big( \\theta \\, \\textsf{normal}(y_n \\mid \\mu_1,\\sigma)\n + (1 - \\theta) \\, \\textsf{normal}(y_n \\mid \\mu_2,\\sigma) \\big).\n\\] The issue here is exchangeability of the mixture components, because \\[\np(\\theta,\\mu_1,\\mu_2,\\sigma \\mid y) = p\\big((1-\\theta),\\mu_2,\\mu_1,\\sigma \\mid y\\big).\n\\] The problem is exacerbated as the number of mixture components \\(K\\) grows, as in clustering models, leading to \\(K!\\) identical posterior maxima.\n\n\n\nThe analysis of posterior convergence and effective sample size is also difficult for mixture models. For example, the \\(\\hat{R}\\) convergence statistic reported by Stan and the computation of effective sample size are both compromised by label switching. The problem is that the posterior mean, a key ingredient in these computations, is affected by label switching, resulting in a posterior mean for \\(\\mu_1\\) that is equal to that of \\(\\mu_2\\), and a posterior mean for \\(\\theta\\) that is always 1/2, no matter what the data are.\n\n\n\nIn some sense, the index (or label) of a mixture component is irrelevant. Posterior predictive inferences can still be carried out without identifying mixture components. For example, the log probability of a new observation does not depend on the identities of the mixture components. The only sound Bayesian inferences in such models are those that are invariant to label switching. Posterior means for the parameters are meaningless because they are not invariant to label switching; for example, the posterior mean for \\(\\theta\\) in the two component mixture model will always be 1/2.\n\n\n\nTheoretically, this should not present a problem for inference because all of the integrals involved in posterior predictive inference will be well behaved. The problem in practice is computation.\nBeing able to carry out such invariant inferences in practice is an altogether different matter. It is almost always intractable to find even a single posterior mode, much less balance the exploration of the neighborhoods of multiple local maxima according to the probability masses. In Gibbs sampling, it is unlikely for \\(\\mu_1\\) to move to a new mode when sampled conditioned on the current values of \\(\\mu_2\\) and \\(\\theta\\). For HMC and NUTS, the problem is that the sampler gets stuck in one of the two “bowls” around the modes and cannot gather enough energy from random momentum assignment to move from one mode to another.\nEven with a proper posterior, all known sampling and inference techniques are notoriously ineffective when the number of modes grows super-exponentially as it does for mixture models with increasing numbers of components.\n\n\n\nSeveral hacks (i.e., “tricks”) have been suggested and employed to deal with the problems posed by label switching in practice.\n\n\nOne common strategy is to impose a constraint on the parameters that identifies the components. For instance, we might consider constraining \\(\\mu_1 < \\mu_2\\) in the two-component normal mixture model discussed above. A problem that can arise from such an approach is when there is substantial probability mass for the opposite ordering \\(\\mu_1 > \\mu_2\\). In these cases, the posteriors are affected by the constraint and true posterior uncertainty in \\(\\mu_1\\) and \\(\\mu_2\\) is not captured by the model with the constraint. In addition, standard approaches to posterior inference for event probabilities is compromised. For instance, attempting to use \\(M\\) posterior draws to estimate \\(\\Pr[\\mu_1 > \\mu_2]\\), will fail, because the estimator \\[\n\\Pr[\\mu_1 > \\mu_2]\n\\approx\n\\sum_{m=1}^M \\textrm{I}\\left(\\mu_1^{(m)} > \\mu_2^{(m)}\\right)\n\\] will result in an estimate of 0 because the posterior respects the constraint in the model.\n\n\n\nAnother common approach is to run a single chain or to initialize the parameters near realistic values.4\nThis can work better than the hard constraint approach if reasonable initial values can be found and the labels do not switch within a Markov chain. The result is that all chains are glued to a neighborhood of a particular mode in the posterior.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#component-collapsing-in-mixture-models", + "href": "stan-users-guide/problematic-posteriors.html#component-collapsing-in-mixture-models", + "title": "Problematic Posteriors", + "section": "", + "text": "It is possible for two mixture components in a mixture model to collapse to the same values during sampling or optimization. For example, a mixture of \\(K\\) normals might devolve to have \\(\\mu_i =\n\\mu_j\\) and \\(\\sigma_i = \\sigma_j\\) for \\(i \\neq j\\).\nThis will typically happen early in sampling due to initialization in MCMC or optimization or arise from random movement during MCMC. Once the parameters match for a given draw \\((m)\\), it can become hard to escape because there can be a trough of low-density mass between the current parameter values and the ones without collapsed components.\nIt may help to use a smaller step size during warmup, a stronger prior on each mixture component’s membership responsibility. A more extreme measure is to include additional mixture components to deal with the possibility that some of them may collapse.\nIn general, it is difficult to recover exactly the right \\(K\\) mixture components in a mixture model as \\(K\\) increases beyond one (yes, even a two-component mixture can have this problem).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#posteriors-with-unbounded-densities", + "href": "stan-users-guide/problematic-posteriors.html#posteriors-with-unbounded-densities", + "title": "Problematic Posteriors", + "section": "", + "text": "In some cases, the posterior density grows without bounds as parameters approach certain poles or boundaries. In such, there are no posterior modes and numerical stability issues can arise as sampled parameters approach constraint boundaries.\n\n\nOne such example is a binary mixture model with scales varying by component, \\(\\sigma_1\\) and \\(\\sigma_2\\) for locations \\(\\mu_1\\) and \\(\\mu_2\\). In this situation, the density grows without bound as \\(\\sigma_1 \\rightarrow 0\\) and \\(\\mu_1 \\rightarrow y_n\\) for some \\(n\\); that is, one of the mixture components concentrates all of its mass around a single data item \\(y_n\\).\n\n\n\nAnother example of unbounded densities arises with a posterior such as \\(\\textsf{beta}(\\phi \\mid 0.5,0.5)\\), which can arise if seemingly weak beta priors are used for groups that have no data. This density is unbounded as \\(\\phi \\rightarrow 0\\) and \\(\\phi \\rightarrow 1\\). Similarly, a Bernoulli data model coupled with a “weak” beta prior, leads to a posterior \\[\\begin{align*}\np(\\phi \\mid y)\n&\\propto\n \\textsf{beta}(\\phi \\mid 0.5,0.5) \\times \\prod_{n=1}^N \\textsf{Bernoulli}(y_n \\mid \\phi) \\\\\n&=\n \\textsf{beta}\\left(\\phi \\,\\middle|\\, 0.5 + \\sum_{n=1}^N y_n, 0.5 + N - \\sum_{n=1}^N y_n\\right).\n\\end{align*}\\]\nIf \\(N = 9\\) and each \\(y_n = 1\\), the posterior is \\(\\textsf{beta}(\\phi \\mid 9.5,0.5)\\). This posterior is unbounded as \\(\\phi\n\\rightarrow 1\\). Nevertheless, the posterior is proper, and although there is no posterior mode, the posterior mean is well-defined with a value of exactly 0.95.\n\n\nStan does not sample directly on the constrained \\((0,1)\\) space for this problem, so it doesn’t directly deal with unconstrained density values. Rather, the probability values \\(\\phi\\) are logit-transformed to \\((-\\infty,\\infty)\\). The boundaries at 0 and 1 are pushed out to \\(-\\infty\\) and \\(\\infty\\) respectively. The Jacobian adjustment that Stan automatically applies ensures the unconstrained density is proper. The adjustment for the particular case of \\((0,1)\\) is \\(\\log\n\\operatorname{logit}^{-1}(\\phi) + \\log \\operatorname{logit}(1 - \\phi)\\).\nThere are two problems that still arise, though. The first is that if the posterior mass for \\(\\phi\\) is near one of the boundaries, the logit-transformed parameter will have to sweep out long paths and thus can dominate the U-turn condition imposed by the no-U-turn sampler (NUTS). The second issue is that the inverse transform from the unconstrained space to the constrained space can underflow to 0 or overflow to 1, even when the unconstrained parameter is not infinite. Similar problems arise for the expectation terms in logistic regression, which is why the logit-scale parameterizations of the Bernoulli and binomial distributions are more stable.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#posteriors-with-unbounded-parameters", + "href": "stan-users-guide/problematic-posteriors.html#posteriors-with-unbounded-parameters", + "title": "Problematic Posteriors", + "section": "", + "text": "In some cases, the posterior density will not grow without bound, but parameters will grow without bound with gradually increasing density values. Like the models discussed in the previous section that have densities that grow without bound, such models also have no posterior modes.\n\n\nConsider a logistic regression model with \\(N\\) observed outcomes \\(y_n\n\\in \\{ 0, 1 \\}\\), an \\(N \\times K\\) matrix \\(x\\) of predictors, a \\(K\\)-dimensional coefficient vector \\(\\beta\\), and data model \\[\ny_n \\sim \\textsf{Bernoulli}(\\operatorname{logit}^{-1}(x_n \\beta)).\n\\] Now suppose that column \\(k\\) of the predictor matrix is such that \\(x_{n,k} > 0\\) if and only if \\(y_n = 1\\), a condition known as “separability.” In this case, predictive accuracy on the observed data continue to improve as \\(\\beta_k \\rightarrow \\infty\\), because for cases with \\(y_n = 1\\), \\(x_n \\beta \\rightarrow \\infty\\) and hence \\(\\operatorname{logit}^{-1}(x_n \\beta) \\rightarrow 1\\).\nWith separability, there is no maximum to the likelihood and hence no maximum likelihood estimate. From the Bayesian perspective, the posterior is improper and therefore the marginal posterior mean for \\(\\beta_k\\) is also not defined. The usual solution to this problem in Bayesian models is to include a proper prior for \\(\\beta\\), which ensures a proper posterior.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#uniform-posteriors", + "href": "stan-users-guide/problematic-posteriors.html#uniform-posteriors", + "title": "Problematic Posteriors", + "section": "", + "text": "Suppose your model includes a parameter \\(\\psi\\) that is defined on \\([0,1]\\) and is given a flat prior \\(\\textsf{uniform}(\\psi \\mid 0,1)\\). Now if the data don’t tell us anything about \\(\\psi\\), the posterior is also \\(\\textsf{uniform}(\\psi \\mid 0,1)\\).\nAlthough there is no maximum likelihood estimate for \\(\\psi\\), the posterior is uniform over a closed interval and hence proper. In the case of a uniform posterior on \\([0,1]\\), the posterior mean for \\(\\psi\\) is well-defined with value \\(1/2\\). Although there is no posterior mode, posterior predictive inference may nevertheless do the right thing by simply integrating (i.e., averaging) over the predictions for \\(\\psi\\) at all points in \\([0,1]\\).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#sampling-difficulties-with-problematic-priors", + "href": "stan-users-guide/problematic-posteriors.html#sampling-difficulties-with-problematic-priors", + "title": "Problematic Posteriors", + "section": "", + "text": "With an improper posterior, it is theoretically impossible to properly explore the posterior. However, Gibbs sampling as performed by BUGS and JAGS, although still unable to properly sample from such an improper posterior, behaves differently in practice than the Hamiltonian Monte Carlo sampling performed by Stan when faced with an example such as the two intercept model discussed in the collinearity section and illustrated in the non-identifiable density plot.\n\n\nGibbs sampling, as performed by BUGS and JAGS, may appear to be efficient and well behaved for this unidentified model, but as discussed in the previous subsection, will not actually explore the posterior properly.\nConsider what happens with initial values \\(\\lambda_1^{(0)}, \\lambda_2^{(0)}\\). Gibbs sampling proceeds in iteration \\(m\\) by drawing \\[\\begin{align*}\n\\lambda_1^{(m)} &\\sim p(\\lambda_1 \\mid \\lambda_2^{(m-1)}, \\sigma^{(m-1)}, y) \\\\\n\\lambda_2^{(m)} &\\sim p(\\lambda_2 \\mid \\lambda_1^{(m)}, \\sigma^{(m-1)}, y) \\\\\n\\sigma^{(m)} &\\sim p(\\sigma \\mid \\lambda_1^{(m)}, \\lambda_2^{(m)}, y).\n\\end{align*}\\]\nNow consider the draw for \\(\\lambda_1\\) (the draw for \\(\\lambda_2\\) is symmetric), which is conjugate in this model and thus can be done efficiently. In this model, the range from which the next \\(\\lambda_1\\) can be drawn is highly constrained by the current values of \\(\\lambda_2\\) and \\(\\sigma\\). Gibbs will run quickly and provide seemingly reasonable inferences for \\(\\lambda_1 + \\lambda_2\\). But it will not explore the full range of the posterior; it will merely take a slow random walk from the initial values. This random walk behavior is typical of Gibbs sampling when posteriors are highly correlated and the primary reason to prefer Hamiltonian Monte Carlo to Gibbs sampling for models with parameters correlated in the posterior.\n\n\n\nHamiltonian Monte Carlo (HMC), as performed by Stan, is much more efficient at exploring posteriors in models where parameters are correlated in the posterior. In this particular example, the Hamiltonian dynamics (i.e., the motion of a fictitious particle given random momentum in the field defined by the negative log posterior) is going to run up and down along the valley defined by the potential energy (ridges in log posteriors correspond to valleys in potential energy). In practice, even with a random momentum for \\(\\lambda_1\\) and \\(\\lambda_2\\), the gradient of the log posterior is going to adjust for the correlation and the simulation will run \\(\\lambda_1\\) and \\(\\lambda_2\\) in opposite directions along the valley corresponding to the ridge in the posterior log density.\n\n\n\nStan’s default no-U-turn sampler (NUTS), is even more efficient at exploring the posterior (see Hoffman and Gelman 2014). NUTS simulates the motion of the fictitious particle representing the parameter values until it makes a U-turn, it will be defeated in most cases, as it will just move down the potential energy valley indefinitely without making a U-turn. What happens in practice is that the maximum number of leapfrog steps in the simulation will be hit in many of the iterations, causing a large number of log probability and gradient evaluations (1000 if the max tree depth is set to 10, as in the default). Thus sampling will appear to be slow. This is indicative of an improper posterior, not a bug in the NUTS algorithm or its implementation. It is simply not possible to sample from an improper posterior! Thus the behavior of HMC in general and NUTS in particular should be reassuring in that it will clearly fail in cases of improper posteriors, resulting in a clean diagnostic of sweeping out large paths in the posterior.\nHere are results of Stan runs with default parameters fit to \\(N=100\\) data points generated from \\(y_n \\sim \\textsf{normal}(0,1)\\):\nTwo Scale Parameters, Improper Prior\nInference for Stan model: improper_stan\nWarmup took (2.7, 2.6, 2.9, 2.9) seconds, 11 seconds total\nSampling took (3.4, 3.7, 3.6, 3.4) seconds, 14 seconds total\n\n Mean MCSE StdDev 5% 95% N_Eff N_Eff/s R_hat\nlp__ -5.3e+01 7.0e-02 8.5e-01 -5.5e+01 -5.3e+01 150 11 1.0\nn_leapfrog__ 1.4e+03 1.7e+01 9.2e+02 3.0e+00 2.0e+03 2987 212 1.0\nlambda1 1.3e+03 1.9e+03 2.7e+03 -2.3e+03 6.0e+03 2.1 0.15 5.2\nlambda2 -1.3e+03 1.9e+03 2.7e+03 -6.0e+03 2.3e+03 2.1 0.15 5.2\nsigma 1.0e+00 8.5e-03 6.2e-02 9.5e-01 1.2e+00 54 3.9 1.1\nmu 1.6e-01 1.9e-03 1.0e-01 -8.3e-03 3.3e-01 2966 211 1.0\nTwo Scale Parameters, Weak Prior\nWarmup took (0.40, 0.44, 0.40, 0.36) seconds, 1.6 seconds total\nSampling took (0.47, 0.40, 0.47, 0.39) seconds, 1.7 seconds total\n\n Mean MCSE StdDev 5% 95% N_Eff N_Eff/s R_hat\nlp__ -54 4.9e-02 1.3e+00 -5.7e+01 -53 728 421 1.0\nn_leapfrog__ 157 2.8e+00 1.5e+02 3.0e+00 511 3085 1784 1.0\nlambda1 0.31 2.8e-01 7.1e+00 -1.2e+01 12 638 369 1.0\nlambda2 -0.14 2.8e-01 7.1e+00 -1.2e+01 12 638 369 1.0\nsigma 1.0 2.6e-03 8.0e-02 9.2e-01 1.2 939 543 1.0\nmu 0.16 1.8e-03 1.0e-01 -8.1e-03 0.33 3289 1902 1.0\nOne Scale Parameter, Improper Prior\nWarmup took (0.011, 0.012, 0.011, 0.011) seconds, 0.044 seconds total\nSampling took (0.017, 0.020, 0.020, 0.019) seconds, 0.077 seconds total\n\n Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat\nlp__ -54 2.5e-02 0.91 -5.5e+01 -53 -53 1318 17198 1.0\nn_leapfrog__ 3.2 2.7e-01 1.7 1.0e+00 3.0 7.0 39 507 1.0\nmu 0.17 2.1e-03 0.10 -3.8e-03 0.17 0.33 2408 31417 1.0\nsigma 1.0 1.6e-03 0.071 9.3e-01 1.0 1.2 2094 27321 1.0\nOn the top is the non-identified model with improper uniform priors and data model \\(y_n \\sim \\textsf{normal}(\\lambda_1 + \\lambda_2,\n\\sigma)\\).\nIn the middle is the same data model as in top plus priors \\(\\lambda_k \\sim \\textsf{normal}(0,10)\\).\nOn the bottom is an identified model with an improper prior, with data model \\(y_n \\sim \\textsf{normal}(\\mu,\\sigma)\\). All models estimate \\(\\mu\\) at roughly 0.16 with low Monte Carlo standard error, but a high posterior standard deviation of 0.1; the true value \\(\\mu=0\\) is within the 90% posterior intervals in all three models.\n\n\n\nTo illustrate the issues with sampling from non-identified and only weakly identified models, we fit three models with increasing degrees of identification of their parameters. The posteriors for these models is illustrated in the non-identifiable density plot. The first model is the unidentified model with two location parameters and no priors discussed in the collinearity section.\ndata {\n int N;\n array[N] real y;\n}\nparameters {\n real lambda1;\n real lambda2;\n real<lower=0> sigma;\n}\ntransformed parameters {\n real mu;\n mu = lambda1 + lambda2;\n}\nmodel {\n y ~ normal(mu, sigma);\n}\nThe second adds priors to the model block for lambda1 and lambda2 to the previous model.\nlambda1 ~ normal(0, 10);\nlambda2 ~ normal(0, 10);\nThe third involves a single location parameter, but no priors.\ndata {\n int N;\n array[N] real y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(mu, sigma);\n}\nAll three of the example models were fit in Stan 2.1.0 with default parameters (1000 warmup iterations, 1000 sampling iterations, NUTS sampler with max tree depth of 10). The results are shown in the non-identified fits figure. The key statistics from these outputs are the following.\n\nAs indicated by R_hat column, all parameters have converged other than \\(\\lambda_1\\) and \\(\\lambda_2\\) in the non-identified model.\nThe average number of leapfrog steps is roughly 3 in the identified model, 150 in the model identified by a weak prior, and 1400 in the non-identified model.\nThe effective sample size per second for \\(\\mu\\) is roughly 31,000 in the identified model, 1,900 in the model identified with weakly informative priors, and 200 in the non-identified model; the results are similar for \\(\\sigma\\).\nIn the non-identified model, the 95% interval for \\(\\lambda_1\\) is (-2300,6000), whereas it is only (-12,12) in the model identified with weakly informative priors.\nIn all three models, the simulated value of \\(\\mu=0\\) and \\(\\sigma=1\\) are well within the posterior 90% intervals.\n\nThe first two points, lack of convergence and hitting the maximum number of leapfrog steps (equivalently maximum tree depth) are indicative of improper posteriors. Thus rather than covering up the problem with poor sampling as may be done with Gibbs samplers, Hamiltonian Monte Carlo tries to explore the posterior and its failure is a clear indication that something is amiss in the model.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/problematic-posteriors.html#footnotes", + "href": "stan-users-guide/problematic-posteriors.html#footnotes", + "title": "Problematic Posteriors", + "section": "Footnotes", + "text": "Footnotes\n\n\nThis example was raised by Richard McElreath on the Stan users group in a query about the difference in behavior between Gibbs sampling as used in BUGS and JAGS and the Hamiltonian Monte Carlo (HMC) and no-U-turn samplers (NUTS) used by Stan.↩︎\nThe marginal posterior \\(p(\\sigma \\mid y)\\) for \\(\\sigma\\) is proper here as long as there are at least two distinct data points.↩︎\nA Laplace prior (or an L1 regularizer for penalized maximum likelihood estimation) is not sufficient to remove this additive invariance. It provides shrinkage, but does not in and of itself identify the parameters because adding a constant to \\(\\lambda_1\\) and subtracting it from \\(\\lambda_2\\) results in the same value for the prior density.↩︎\nTempering methods may be viewed as automated ways to carry out such a search for modes, though most MCMC tempering methods continue to search for modes on an ongoing basis; see (Swendsen and Wang 1986; Neal 1996).↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Problematic Posteriors" + ] + }, + { + "objectID": "stan-users-guide/references.html", + "href": "stan-users-guide/references.html", + "title": "References", + "section": "", + "text": "References\n\n\n\n\n Back to top" + }, + { + "objectID": "stan-users-guide/reparameterization.html", + "href": "stan-users-guide/reparameterization.html", + "title": "Reparameterization and Change of Variables", + "section": "", + "text": "Stan supports a direct encoding of reparameterizations. Stan also supports changes of variables by directly incrementing the log probability accumulator with the log Jacobian of the transform.\n\n\nA Bayesian posterior is technically a probability measure, which is a parameterization-invariant, abstract mathematical object.1\nStan’s modeling language, on the other hand, defines a probability density, which is a non-unique, parameterization-dependent function in \\(\\mathbb{R}^N \\rightarrow \\mathbb{R}^{+}\\). In practice, this means a given model can be represented different ways in Stan, and different representations have different computational performances.\nAs pointed out by Gelman (2004) in a paper discussing the relation between parameterizations and Bayesian modeling, a change of parameterization often carries with it suggestions of how the model might change, because we tend to use certain natural classes of prior distributions. Thus, it’s not just that we have a fixed distribution that we want to sample from, with reparameterizations being computational aids. In addition, once we reparameterize and add prior information, the model itself typically changes, often in useful ways.\n\n\n\nReparameterizations may be implemented directly using the transformed parameters block or just in the model block.\n\n\nThe beta and Dirichlet distributions may both be reparameterized from a vector of counts to use a mean and total count.\n\n\nFor example, the Beta distribution is parameterized by two positive count parameters \\(\\alpha, \\beta > 0\\). The following example illustrates a hierarchical Stan model with a vector of parameters theta are drawn i.i.d. for a Beta distribution whose parameters are themselves drawn from a hyperprior distribution.\nparameters {\n real<lower=0> alpha;\n real<lower=0> beta;\n // ...\n}\nmodel {\n alpha ~ ...\n beta ~ ...\n for (n in 1:N) {\n theta[n] ~ beta(alpha, beta);\n }\n // ...\n}\nIt is often more natural to specify hyperpriors in terms of transformed parameters. In the case of the Beta, the obvious choice for reparameterization is in terms of a mean parameter \\[\n\\phi = \\alpha / (\\alpha + \\beta)\n\\] and total count parameter \\[\n\\lambda = \\alpha + \\beta.\n\\] Following @[GelmanEtAl:2013, Chapter 5] the mean gets a uniform prior and the count parameter a Pareto prior with \\(p(\\lambda) \\propto \\lambda^{-2.5}\\).\nparameters {\n real<lower=0, upper=1> phi;\n real<lower=0.1> lambda;\n // ...\n}\ntransformed parameters {\n real<lower=0> alpha = lambda * phi;\n real<lower=0> beta = lambda * (1 - phi);\n // ...\n}\nmodel {\n phi ~ beta(1, 1); // uniform on phi, could drop\n lambda ~ pareto(0.1, 1.5);\n for (n in 1:N) {\n theta[n] ~ beta(alpha, beta);\n }\n // ...\n}\nThe new parameters, phi and lambda, are declared in the parameters block and the parameters for the Beta distribution, alpha and beta, are declared and defined in the transformed parameters block. And if their values are not of interest, they could instead be defined as local variables in the model as follows.\nmodel {\n real alpha = lambda * phi;\n real beta = lambda * (1 - phi);\n // ...\n for (n in 1:N) {\n theta[n] ~ beta(alpha, beta);\n }\n // ...\n}\nWith vectorization, this could be expressed more compactly and efficiently as follows.\nmodel {\n theta ~ beta(lambda * phi, lambda * (1 - phi));\n // ...\n}\nIf the variables alpha and beta are of interest, they can be defined in the transformed parameter block and then used in the model.\n\n\n\nBecause the transformed parameters are being used, rather than given a distribution, there is no need to apply a Jacobian adjustment for the transform. For example, in the beta distribution example, alpha and beta have the correct posterior distribution.\n\n\n\nThe same thing can be done with a Dirichlet, replacing the mean for the Beta, which is a probability value, with a simplex. Assume there are \\(K > 0\\) dimensions being considered (\\(K=1\\) is trivial and \\(K=2\\) reduces to the beta distribution case). The traditional prior is\nparameters {\n vector[K] alpha;\n array[N] simplex[K] theta;\n // ...\n}\nmodel {\n alpha ~ // ...\n for (n in 1:N) {\n theta[n] ~ dirichlet(alpha);\n }\n}\nThis provides essentially \\(K\\) degrees of freedom, one for each dimension of alpha, and it is not obvious how to specify a reasonable prior for alpha.\nAn alternative coding is to use the mean, which is a simplex, and a total count.\nparameters {\n simplex[K] phi;\n real<lower=0> kappa;\n array[N] simplex[K] theta;\n // ...\n}\ntransformed parameters {\n vector[K] alpha = kappa * phi;\n // ...\n}\nmodel {\n phi ~ // ...\n kappa ~ // ...\n for (n in 1:N) {\n theta[n] ~ dirichlet(alpha);\n }\n // ...\n}\nNow it is much easier to formulate priors, because phi is the expected value of theta and kappa (minus K) is the strength of the prior mean measured in number of prior observations.\n\n\n\n\nIf the variable \\(u\\) has a \\(\\textsf{uniform}(0, 1)\\) distribution, then \\(\\operatorname{logit}(u)\\) is distributed as \\(\\textsf{logistic}(0, 1)\\). This is because inverse logit is the cumulative distribution function (cdf) for the logistic distribution, so that the logit function itself is the inverse CDF and thus maps a uniform draw in \\((0, 1)\\) to a logistically-distributed quantity.\nThings work the same way for the probit case: if \\(u\\) has a \\(\\textsf{uniform}(0, 1)\\) distribution, then \\(\\Phi^{-1}(u)\\) has a \\(\\textsf{normal}(0, 1)\\) distribution. The other way around, if \\(v\\) has a \\(\\textsf{normal}(0, 1)\\) distribution, then \\(\\Phi(v)\\) has a \\(\\textsf{uniform}(0, 1)\\) distribution.\nIn order to use the probit and logistic as priors on variables constrained to \\((0, 1)\\), create an unconstrained variable and transform it appropriately. For comparison, the following Stan program fragment declares a \\((0, 1)\\)-constrained parameter theta and gives it a beta prior, then uses it as a parameter in a distribution (here using foo as a placeholder).\nparameters {\n real<lower=0, upper=1> theta;\n // ...\n}\nmodel {\n theta ~ beta(a, b);\n // ...\n y ~ foo(theta);\n // ...\n}\nIf the variables a and b are one, then this imposes a uniform distribution theta. If a and b are both less than one, then the density on theta has a U shape, whereas if they are both greater than one, the density of theta has an inverted-U or more bell-like shape.\nRoughly the same result can be achieved with unbounded parameters that are probit or inverse-logit-transformed. For example,\nparameters {\n real theta_raw;\n // ...\n}\ntransformed parameters {\n real<lower=0, upper=1> theta = inv_logit(theta_raw);\n // ...\n}\nmodel {\n theta_raw ~ logistic(mu, sigma);\n // ...\n y ~ foo(theta);\n // ...\n}\nIn this model, an unconstrained parameter theta_raw gets a logistic prior, and then the transformed parameter theta is defined to be the inverse logit of theta_raw. In this parameterization, inv_logit(mu) is the mean of the implied prior on theta. The prior distribution on theta will be flat if sigma is one and mu is zero, and will be U-shaped if sigma is larger than one and bell shaped if sigma is less than one.\nWhen moving from a variable in \\((0, 1)\\) to a simplex, the same trick may be performed using the softmax function, which is a multinomial generalization of the inverse logit function. First, consider a simplex parameter with a Dirichlet prior.\nparameters {\n simplex[K] theta;\n // ...\n}\nmodel {\n theta ~ dirichlet(a);\n // ...\n y ~ foo(theta);\n}\nNow a is a vector with K rows, but it has the same shape properties as the pair a and b for a beta; the beta distribution is just the distribution of the first component of a Dirichlet with parameter vector \\([a b]^{\\top}\\). To formulate an unconstrained prior, the exact same strategy works as for the beta.\nparameters {\n vector[K] theta_raw;\n // ...\n}\ntransformed parameters {\n simplex[K] theta = softmax(theta_raw);\n // ...\n}\nmodel {\n theta_raw ~ multi_normal_cholesky(mu, L_Sigma);\n}\nThe multivariate normal is used for convenience and efficiency with its Cholesky-factor parameterization. Now the mean is controlled by softmax(mu), but we have additional control of covariance through L_Sigma at the expense of having on the order of \\(K^2\\) parameters in the prior rather than order \\(K\\). If no covariance is desired, the number of parameters can be reduced back to \\(K\\) using a vectorized normal distribution as follows.\ntheta_raw ~ normal(mu, sigma);\nwhere either or both of mu and sigma can be vectors.\n\n\n\n\nChanges of variables are applied when the transformation of a parameter is characterized by a distribution. The standard textbook example is the lognormal distribution, which is the distribution of a variable \\(y > 0\\) whose logarithm \\(\\log y\\) has a normal distribution. The distribution is being assigned to \\(\\log y\\).\nThe change of variables requires an adjustment to the probability to account for the distortion caused by the transform. For this to work, univariate changes of variables must be monotonic and differentiable everywhere in their support. Multivariate changes of variables must be injective and differentiable everywhere in their support, and they must map \\(\\mathbb{R}^N \\rightarrow \\mathbb{R}^N\\).\nThe probability must be scaled by a Jacobian adjustment equal to the absolute determinant of the Jacobian of the transform. In the univariate case, the Jacobian adjustment is simply the absolute derivative of the transform.\nIn the case of log normals, if \\(y\\)’s logarithm is normal with mean \\(\\mu\\) and deviation \\(\\sigma\\), then the distribution of \\(y\\) is given by \\[\np(y)\n= \\textsf{normal}(\\log y \\mid \\mu, \\sigma) \\, \\left| \\frac{d}{dy} \\log y \\right|\n= \\textsf{normal}(\\log y \\mid \\mu, \\sigma) \\, \\frac{1}{y}.\n\\] Stan works on the log scale to prevent underflow, where \\[\n\\log p(y)\n=\n\\log \\textsf{normal}(\\log y \\mid \\mu, \\sigma) - \\log y.\n\\]\nIn Stan, the change of variables can be applied in the sampling statement. To adjust for the curvature, the log probability accumulator is incremented with the log absolute derivative of the transform. The lognormal distribution can thus be implemented directly in Stan as follows.2\nparameters {\n real<lower=0> y;\n // ...\n}\nmodel {\n log(y) ~ normal(mu, sigma);\n target += -log(y);\n // ...\n}\nIt is important, as always, to declare appropriate constraints on parameters; here y is constrained to be positive.\nIt would be slightly more efficient to define a local variable for the logarithm, as follows.\nmodel {\n real log_y;\n log_y = log(y);\n log_y ~ normal(mu, sigma);\n target += -log_y;\n // ...\n}\nIf y were declared as data instead of as a parameter, then the adjustment can be ignored because the data will be constant and Stan only requires the log probability up to a constant.\n\n\nThis section illustrates the difference between a change of variables and a simple variable transformation. A transformation samples a parameter, then transforms it, whereas a change of variables transforms a parameter, then samples it. Only the latter requires a Jacobian adjustment.\nIt does not matter whether the probability function is expressed using a distribution statement, such as\nlog(y) ~ normal(mu, sigma);\nor as an increment to the log probability function, as in\ntarget += normal_lpdf(log(y) | mu, sigma);\n\n\nLike the log normal, the inverse gamma distribution is a distribution of variables whose inverse has a gamma distribution. This section contrasts two approaches, first with a transform, then with a change of variables.\nThe transform based approach to defining y_inv to have an inverse gamma distribution can be coded as follows.\nparameters {\n real<lower=0> y;\n}\ntransformed parameters {\n real<lower=0> y_inv;\n y_inv = 1 / y;\n}\nmodel {\n y ~ gamma(2,4);\n}\nThe change-of-variables approach to defining y_inv to have an inverse gamma distribution can be coded as follows.\nparameters {\n real<lower=0> y_inv;\n}\ntransformed parameters {\n real<lower=0> y;\n y = 1 / y_inv; // change variables\n jacobian += -2 * log(y_inv); // Jacobian adjustment\n}\nmodel {\n y ~ gamma(2,4);\n}\nThe Jacobian adjustment is the log of the absolute derivative of the transform, which in this case is\n\\[\n\\log \\left| \\frac{d}{du} \\left( \\frac{1}{u} \\right) \\right|\n= \\log \\left| - u^{-2} \\right|\n= \\log u^{-2}\n= -2 \\log u.\n\\]\n\n\n\n\nIn the case of a multivariate transform, the log of the absolute determinant of the Jacobian of the transform must be added to the log probability accumulator. In Stan, this can be coded as follows in the general case where the Jacobian is not a full matrix.\nparameters {\n vector[K] u; // multivariate parameter\n // ...\n}\ntransformed parameters {\n vector[K] v; // transformed parameter\n matrix[K, K] J; // Jacobian matrix of transform\n // ... compute v as a function of u ...\n // ... compute J[m, n] = d.v[m] / d.u[n] ...\n jacobian += log(abs(determinant(J)));\n // ...\n}\nmodel {\n v ~ // ...\n // ...\n}\nIf the determinant of the Jacobian is known analytically, it will be more efficient to apply it directly than to call the determinant function, which is neither efficient nor particularly stable numerically.\nIn many cases, the Jacobian matrix will be triangular, so that only the diagonal elements will be required for the determinant calculation. Triangular Jacobians arise when each element v[k] of the transformed parameter vector only depends on elements u[1], …, u[k] of the parameter vector. For triangular matrices, the determinant is the product of the diagonal elements, so the transformed parameters block of the above model can be simplified and made more efficient by recoding as follows.\ntransformed parameters {\n // ...\n vector[K] J_diag; // diagonals of Jacobian matrix\n // ...\n // ... compute J[k, k] = d.v[k] / d.u[k] ...\n jacobian += sum(log(J_diag));\n // ...\n}\n\n\n\n\nStan allows scalar and non-scalar upper and lower bounds to be declared in the constraints for a container data type. The transforms are calculated and their log Jacobians added to the log density accumulator; the Jacobian calculations are described in detail in the reference manual chapter on constrained parameter transforms.\n\n\nFor example, suppose there is a vector parameter \\(\\alpha\\) with a vector \\(L\\) of lower bounds. The simplest way to deal with this if \\(L\\) is a constant is to shift a lower-bounded parameter.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector<lower=L>[N] alpha_raw;\n // ...\n}\nThe above is equivalent to manually calculating the vector bounds by the following.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector<lower=0>[N] alpha_raw;\n // ...\n}\ntransformed parameters {\n vector[N] alpha = L + alpha_raw;\n // ...\n}\nThe Jacobian for adding a constant is one, so its log drops out of the log density.\nEven if the lower bound is a parameter rather than data, there is no Jacobian required, because the transform from \\((L, \\alpha_{\\textrm{raw}})\\) to \\((L + \\alpha_{\\textrm{raw}}, \\alpha_{\\textrm{raw}})\\) produces a Jacobian derivative matrix with a unit determinant.\nIt’s also possible to implement the transform using an array or vector of parameters as bounds (with the requirement that the type of the variable must match the bound type) in the following.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector<lower=0>[N] alpha_raw;\n vector<lower=L + alpha_raw>[N] alpha;\n // ...\n}\nThis is equivalent to directly transforming an unconstrained parameter and accounting for the Jacobian.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector[N] alpha_raw;\n // ...\n}\ntransformed parameters {\n vector[N] alpha = L + exp(alpha_raw);\n jacobian += sum(alpha_raw); // log Jacobian\n // ...\n}\nmodel {\n // ...\n}\nThe adjustment in the log Jacobian determinant of the transform mapping \\(\\alpha_{\\textrm{raw}}\\) to \\(\\alpha = L + \\exp(\\alpha_{\\textrm{raw}})\\). The details are simple in this case because the Jacobian is diagonal; see the reference manual chapter on constrained parameter transforms for full details. Here \\(L\\) can even be a vector containing parameters that don’t depend on \\(\\alpha_{\\textrm{raw}}\\); if the bounds do depend on \\(\\alpha_{\\textrm{raw}}\\) then a revised Jacobian needs to be calculated taking into account the dependencies.\n\n\n\nSuppose there are lower and upper bounds that vary by parameter. These can be applied to shift and rescale a parameter constrained to \\((0, 1)\\). This is easily accomplished as the following.\ndata {\n int N;\n vector[N] L; // lower bounds\n vector[N] U; // upper bounds\n // ...\n}\nparameters {\n vector<lower=L, upper=U>[N] alpha;\n // ...\n}\nThe same may be accomplished by manually constructing the transform as follows.\ndata {\n int N;\n vector[N] L; // lower bounds\n vector[N] U; // upper bounds\n // ...\n}\nparameters {\n vector<lower=0, upper=1>[N] alpha_raw;\n // ...\n}\ntransformed parameters {\n vector[N] alpha = L + (U - L) .* alpha_raw;\n}\nThe expression U - L is multiplied by alpha_raw elementwise to produce a vector of variables in \\((0, U-L)\\), then adding \\(L\\) results in a variable ranging between \\((L, U)\\).\nIn this case, it is important that \\(L\\) and \\(U\\) are constants, otherwise a Jacobian would be required when multiplying by \\(U - L\\).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Reparameterization and Change of Variables" + ] + }, + { + "objectID": "stan-users-guide/reparameterization.html#theoretical-and-practical-background", + "href": "stan-users-guide/reparameterization.html#theoretical-and-practical-background", + "title": "Reparameterization and Change of Variables", + "section": "", + "text": "A Bayesian posterior is technically a probability measure, which is a parameterization-invariant, abstract mathematical object.1\nStan’s modeling language, on the other hand, defines a probability density, which is a non-unique, parameterization-dependent function in \\(\\mathbb{R}^N \\rightarrow \\mathbb{R}^{+}\\). In practice, this means a given model can be represented different ways in Stan, and different representations have different computational performances.\nAs pointed out by Gelman (2004) in a paper discussing the relation between parameterizations and Bayesian modeling, a change of parameterization often carries with it suggestions of how the model might change, because we tend to use certain natural classes of prior distributions. Thus, it’s not just that we have a fixed distribution that we want to sample from, with reparameterizations being computational aids. In addition, once we reparameterize and add prior information, the model itself typically changes, often in useful ways.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Reparameterization and Change of Variables" + ] + }, + { + "objectID": "stan-users-guide/reparameterization.html#reparameterizations", + "href": "stan-users-guide/reparameterization.html#reparameterizations", + "title": "Reparameterization and Change of Variables", + "section": "", + "text": "Reparameterizations may be implemented directly using the transformed parameters block or just in the model block.\n\n\nThe beta and Dirichlet distributions may both be reparameterized from a vector of counts to use a mean and total count.\n\n\nFor example, the Beta distribution is parameterized by two positive count parameters \\(\\alpha, \\beta > 0\\). The following example illustrates a hierarchical Stan model with a vector of parameters theta are drawn i.i.d. for a Beta distribution whose parameters are themselves drawn from a hyperprior distribution.\nparameters {\n real<lower=0> alpha;\n real<lower=0> beta;\n // ...\n}\nmodel {\n alpha ~ ...\n beta ~ ...\n for (n in 1:N) {\n theta[n] ~ beta(alpha, beta);\n }\n // ...\n}\nIt is often more natural to specify hyperpriors in terms of transformed parameters. In the case of the Beta, the obvious choice for reparameterization is in terms of a mean parameter \\[\n\\phi = \\alpha / (\\alpha + \\beta)\n\\] and total count parameter \\[\n\\lambda = \\alpha + \\beta.\n\\] Following @[GelmanEtAl:2013, Chapter 5] the mean gets a uniform prior and the count parameter a Pareto prior with \\(p(\\lambda) \\propto \\lambda^{-2.5}\\).\nparameters {\n real<lower=0, upper=1> phi;\n real<lower=0.1> lambda;\n // ...\n}\ntransformed parameters {\n real<lower=0> alpha = lambda * phi;\n real<lower=0> beta = lambda * (1 - phi);\n // ...\n}\nmodel {\n phi ~ beta(1, 1); // uniform on phi, could drop\n lambda ~ pareto(0.1, 1.5);\n for (n in 1:N) {\n theta[n] ~ beta(alpha, beta);\n }\n // ...\n}\nThe new parameters, phi and lambda, are declared in the parameters block and the parameters for the Beta distribution, alpha and beta, are declared and defined in the transformed parameters block. And if their values are not of interest, they could instead be defined as local variables in the model as follows.\nmodel {\n real alpha = lambda * phi;\n real beta = lambda * (1 - phi);\n // ...\n for (n in 1:N) {\n theta[n] ~ beta(alpha, beta);\n }\n // ...\n}\nWith vectorization, this could be expressed more compactly and efficiently as follows.\nmodel {\n theta ~ beta(lambda * phi, lambda * (1 - phi));\n // ...\n}\nIf the variables alpha and beta are of interest, they can be defined in the transformed parameter block and then used in the model.\n\n\n\nBecause the transformed parameters are being used, rather than given a distribution, there is no need to apply a Jacobian adjustment for the transform. For example, in the beta distribution example, alpha and beta have the correct posterior distribution.\n\n\n\nThe same thing can be done with a Dirichlet, replacing the mean for the Beta, which is a probability value, with a simplex. Assume there are \\(K > 0\\) dimensions being considered (\\(K=1\\) is trivial and \\(K=2\\) reduces to the beta distribution case). The traditional prior is\nparameters {\n vector[K] alpha;\n array[N] simplex[K] theta;\n // ...\n}\nmodel {\n alpha ~ // ...\n for (n in 1:N) {\n theta[n] ~ dirichlet(alpha);\n }\n}\nThis provides essentially \\(K\\) degrees of freedom, one for each dimension of alpha, and it is not obvious how to specify a reasonable prior for alpha.\nAn alternative coding is to use the mean, which is a simplex, and a total count.\nparameters {\n simplex[K] phi;\n real<lower=0> kappa;\n array[N] simplex[K] theta;\n // ...\n}\ntransformed parameters {\n vector[K] alpha = kappa * phi;\n // ...\n}\nmodel {\n phi ~ // ...\n kappa ~ // ...\n for (n in 1:N) {\n theta[n] ~ dirichlet(alpha);\n }\n // ...\n}\nNow it is much easier to formulate priors, because phi is the expected value of theta and kappa (minus K) is the strength of the prior mean measured in number of prior observations.\n\n\n\n\nIf the variable \\(u\\) has a \\(\\textsf{uniform}(0, 1)\\) distribution, then \\(\\operatorname{logit}(u)\\) is distributed as \\(\\textsf{logistic}(0, 1)\\). This is because inverse logit is the cumulative distribution function (cdf) for the logistic distribution, so that the logit function itself is the inverse CDF and thus maps a uniform draw in \\((0, 1)\\) to a logistically-distributed quantity.\nThings work the same way for the probit case: if \\(u\\) has a \\(\\textsf{uniform}(0, 1)\\) distribution, then \\(\\Phi^{-1}(u)\\) has a \\(\\textsf{normal}(0, 1)\\) distribution. The other way around, if \\(v\\) has a \\(\\textsf{normal}(0, 1)\\) distribution, then \\(\\Phi(v)\\) has a \\(\\textsf{uniform}(0, 1)\\) distribution.\nIn order to use the probit and logistic as priors on variables constrained to \\((0, 1)\\), create an unconstrained variable and transform it appropriately. For comparison, the following Stan program fragment declares a \\((0, 1)\\)-constrained parameter theta and gives it a beta prior, then uses it as a parameter in a distribution (here using foo as a placeholder).\nparameters {\n real<lower=0, upper=1> theta;\n // ...\n}\nmodel {\n theta ~ beta(a, b);\n // ...\n y ~ foo(theta);\n // ...\n}\nIf the variables a and b are one, then this imposes a uniform distribution theta. If a and b are both less than one, then the density on theta has a U shape, whereas if they are both greater than one, the density of theta has an inverted-U or more bell-like shape.\nRoughly the same result can be achieved with unbounded parameters that are probit or inverse-logit-transformed. For example,\nparameters {\n real theta_raw;\n // ...\n}\ntransformed parameters {\n real<lower=0, upper=1> theta = inv_logit(theta_raw);\n // ...\n}\nmodel {\n theta_raw ~ logistic(mu, sigma);\n // ...\n y ~ foo(theta);\n // ...\n}\nIn this model, an unconstrained parameter theta_raw gets a logistic prior, and then the transformed parameter theta is defined to be the inverse logit of theta_raw. In this parameterization, inv_logit(mu) is the mean of the implied prior on theta. The prior distribution on theta will be flat if sigma is one and mu is zero, and will be U-shaped if sigma is larger than one and bell shaped if sigma is less than one.\nWhen moving from a variable in \\((0, 1)\\) to a simplex, the same trick may be performed using the softmax function, which is a multinomial generalization of the inverse logit function. First, consider a simplex parameter with a Dirichlet prior.\nparameters {\n simplex[K] theta;\n // ...\n}\nmodel {\n theta ~ dirichlet(a);\n // ...\n y ~ foo(theta);\n}\nNow a is a vector with K rows, but it has the same shape properties as the pair a and b for a beta; the beta distribution is just the distribution of the first component of a Dirichlet with parameter vector \\([a b]^{\\top}\\). To formulate an unconstrained prior, the exact same strategy works as for the beta.\nparameters {\n vector[K] theta_raw;\n // ...\n}\ntransformed parameters {\n simplex[K] theta = softmax(theta_raw);\n // ...\n}\nmodel {\n theta_raw ~ multi_normal_cholesky(mu, L_Sigma);\n}\nThe multivariate normal is used for convenience and efficiency with its Cholesky-factor parameterization. Now the mean is controlled by softmax(mu), but we have additional control of covariance through L_Sigma at the expense of having on the order of \\(K^2\\) parameters in the prior rather than order \\(K\\). If no covariance is desired, the number of parameters can be reduced back to \\(K\\) using a vectorized normal distribution as follows.\ntheta_raw ~ normal(mu, sigma);\nwhere either or both of mu and sigma can be vectors.", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Reparameterization and Change of Variables" + ] + }, + { + "objectID": "stan-users-guide/reparameterization.html#changes-of-variables", + "href": "stan-users-guide/reparameterization.html#changes-of-variables", + "title": "Reparameterization and Change of Variables", + "section": "", + "text": "Changes of variables are applied when the transformation of a parameter is characterized by a distribution. The standard textbook example is the lognormal distribution, which is the distribution of a variable \\(y > 0\\) whose logarithm \\(\\log y\\) has a normal distribution. The distribution is being assigned to \\(\\log y\\).\nThe change of variables requires an adjustment to the probability to account for the distortion caused by the transform. For this to work, univariate changes of variables must be monotonic and differentiable everywhere in their support. Multivariate changes of variables must be injective and differentiable everywhere in their support, and they must map \\(\\mathbb{R}^N \\rightarrow \\mathbb{R}^N\\).\nThe probability must be scaled by a Jacobian adjustment equal to the absolute determinant of the Jacobian of the transform. In the univariate case, the Jacobian adjustment is simply the absolute derivative of the transform.\nIn the case of log normals, if \\(y\\)’s logarithm is normal with mean \\(\\mu\\) and deviation \\(\\sigma\\), then the distribution of \\(y\\) is given by \\[\np(y)\n= \\textsf{normal}(\\log y \\mid \\mu, \\sigma) \\, \\left| \\frac{d}{dy} \\log y \\right|\n= \\textsf{normal}(\\log y \\mid \\mu, \\sigma) \\, \\frac{1}{y}.\n\\] Stan works on the log scale to prevent underflow, where \\[\n\\log p(y)\n=\n\\log \\textsf{normal}(\\log y \\mid \\mu, \\sigma) - \\log y.\n\\]\nIn Stan, the change of variables can be applied in the sampling statement. To adjust for the curvature, the log probability accumulator is incremented with the log absolute derivative of the transform. The lognormal distribution can thus be implemented directly in Stan as follows.2\nparameters {\n real<lower=0> y;\n // ...\n}\nmodel {\n log(y) ~ normal(mu, sigma);\n target += -log(y);\n // ...\n}\nIt is important, as always, to declare appropriate constraints on parameters; here y is constrained to be positive.\nIt would be slightly more efficient to define a local variable for the logarithm, as follows.\nmodel {\n real log_y;\n log_y = log(y);\n log_y ~ normal(mu, sigma);\n target += -log_y;\n // ...\n}\nIf y were declared as data instead of as a parameter, then the adjustment can be ignored because the data will be constant and Stan only requires the log probability up to a constant.\n\n\nThis section illustrates the difference between a change of variables and a simple variable transformation. A transformation samples a parameter, then transforms it, whereas a change of variables transforms a parameter, then samples it. Only the latter requires a Jacobian adjustment.\nIt does not matter whether the probability function is expressed using a distribution statement, such as\nlog(y) ~ normal(mu, sigma);\nor as an increment to the log probability function, as in\ntarget += normal_lpdf(log(y) | mu, sigma);\n\n\nLike the log normal, the inverse gamma distribution is a distribution of variables whose inverse has a gamma distribution. This section contrasts two approaches, first with a transform, then with a change of variables.\nThe transform based approach to defining y_inv to have an inverse gamma distribution can be coded as follows.\nparameters {\n real<lower=0> y;\n}\ntransformed parameters {\n real<lower=0> y_inv;\n y_inv = 1 / y;\n}\nmodel {\n y ~ gamma(2,4);\n}\nThe change-of-variables approach to defining y_inv to have an inverse gamma distribution can be coded as follows.\nparameters {\n real<lower=0> y_inv;\n}\ntransformed parameters {\n real<lower=0> y;\n y = 1 / y_inv; // change variables\n jacobian += -2 * log(y_inv); // Jacobian adjustment\n}\nmodel {\n y ~ gamma(2,4);\n}\nThe Jacobian adjustment is the log of the absolute derivative of the transform, which in this case is\n\\[\n\\log \\left| \\frac{d}{du} \\left( \\frac{1}{u} \\right) \\right|\n= \\log \\left| - u^{-2} \\right|\n= \\log u^{-2}\n= -2 \\log u.\n\\]\n\n\n\n\nIn the case of a multivariate transform, the log of the absolute determinant of the Jacobian of the transform must be added to the log probability accumulator. In Stan, this can be coded as follows in the general case where the Jacobian is not a full matrix.\nparameters {\n vector[K] u; // multivariate parameter\n // ...\n}\ntransformed parameters {\n vector[K] v; // transformed parameter\n matrix[K, K] J; // Jacobian matrix of transform\n // ... compute v as a function of u ...\n // ... compute J[m, n] = d.v[m] / d.u[n] ...\n jacobian += log(abs(determinant(J)));\n // ...\n}\nmodel {\n v ~ // ...\n // ...\n}\nIf the determinant of the Jacobian is known analytically, it will be more efficient to apply it directly than to call the determinant function, which is neither efficient nor particularly stable numerically.\nIn many cases, the Jacobian matrix will be triangular, so that only the diagonal elements will be required for the determinant calculation. Triangular Jacobians arise when each element v[k] of the transformed parameter vector only depends on elements u[1], …, u[k] of the parameter vector. For triangular matrices, the determinant is the product of the diagonal elements, so the transformed parameters block of the above model can be simplified and made more efficient by recoding as follows.\ntransformed parameters {\n // ...\n vector[K] J_diag; // diagonals of Jacobian matrix\n // ...\n // ... compute J[k, k] = d.v[k] / d.u[k] ...\n jacobian += sum(log(J_diag));\n // ...\n}", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Reparameterization and Change of Variables" + ] + }, + { + "objectID": "stan-users-guide/reparameterization.html#vectors-with-varying-bounds", + "href": "stan-users-guide/reparameterization.html#vectors-with-varying-bounds", + "title": "Reparameterization and Change of Variables", + "section": "", + "text": "Stan allows scalar and non-scalar upper and lower bounds to be declared in the constraints for a container data type. The transforms are calculated and their log Jacobians added to the log density accumulator; the Jacobian calculations are described in detail in the reference manual chapter on constrained parameter transforms.\n\n\nFor example, suppose there is a vector parameter \\(\\alpha\\) with a vector \\(L\\) of lower bounds. The simplest way to deal with this if \\(L\\) is a constant is to shift a lower-bounded parameter.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector<lower=L>[N] alpha_raw;\n // ...\n}\nThe above is equivalent to manually calculating the vector bounds by the following.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector<lower=0>[N] alpha_raw;\n // ...\n}\ntransformed parameters {\n vector[N] alpha = L + alpha_raw;\n // ...\n}\nThe Jacobian for adding a constant is one, so its log drops out of the log density.\nEven if the lower bound is a parameter rather than data, there is no Jacobian required, because the transform from \\((L, \\alpha_{\\textrm{raw}})\\) to \\((L + \\alpha_{\\textrm{raw}}, \\alpha_{\\textrm{raw}})\\) produces a Jacobian derivative matrix with a unit determinant.\nIt’s also possible to implement the transform using an array or vector of parameters as bounds (with the requirement that the type of the variable must match the bound type) in the following.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector<lower=0>[N] alpha_raw;\n vector<lower=L + alpha_raw>[N] alpha;\n // ...\n}\nThis is equivalent to directly transforming an unconstrained parameter and accounting for the Jacobian.\ndata {\n int N;\n vector[N] L; // lower bounds\n // ...\n}\nparameters {\n vector[N] alpha_raw;\n // ...\n}\ntransformed parameters {\n vector[N] alpha = L + exp(alpha_raw);\n jacobian += sum(alpha_raw); // log Jacobian\n // ...\n}\nmodel {\n // ...\n}\nThe adjustment in the log Jacobian determinant of the transform mapping \\(\\alpha_{\\textrm{raw}}\\) to \\(\\alpha = L + \\exp(\\alpha_{\\textrm{raw}})\\). The details are simple in this case because the Jacobian is diagonal; see the reference manual chapter on constrained parameter transforms for full details. Here \\(L\\) can even be a vector containing parameters that don’t depend on \\(\\alpha_{\\textrm{raw}}\\); if the bounds do depend on \\(\\alpha_{\\textrm{raw}}\\) then a revised Jacobian needs to be calculated taking into account the dependencies.\n\n\n\nSuppose there are lower and upper bounds that vary by parameter. These can be applied to shift and rescale a parameter constrained to \\((0, 1)\\). This is easily accomplished as the following.\ndata {\n int N;\n vector[N] L; // lower bounds\n vector[N] U; // upper bounds\n // ...\n}\nparameters {\n vector<lower=L, upper=U>[N] alpha;\n // ...\n}\nThe same may be accomplished by manually constructing the transform as follows.\ndata {\n int N;\n vector[N] L; // lower bounds\n vector[N] U; // upper bounds\n // ...\n}\nparameters {\n vector<lower=0, upper=1>[N] alpha_raw;\n // ...\n}\ntransformed parameters {\n vector[N] alpha = L + (U - L) .* alpha_raw;\n}\nThe expression U - L is multiplied by alpha_raw elementwise to produce a vector of variables in \\((0, U-L)\\), then adding \\(L\\) results in a variable ranging between \\((L, U)\\).\nIn this case, it is important that \\(L\\) and \\(U\\) are constants, otherwise a Jacobian would be required when multiplying by \\(U - L\\).", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Reparameterization and Change of Variables" + ] + }, + { + "objectID": "stan-users-guide/reparameterization.html#footnotes", + "href": "stan-users-guide/reparameterization.html#footnotes", + "title": "Reparameterization and Change of Variables", + "section": "Footnotes", + "text": "Footnotes\n\n\nThis is in contrast to (penalized) maximum likelihood estimates, which are not parameterization invariant.↩︎\nThis example is for illustrative purposes only; the recommended way to implement the lognormal distribution in Stan is with the built-in lognormal probability function; see the functions reference manual for details.↩︎", + "crumbs": [ + "Stan Users Guide", + "Programming Techniques", + "Reparameterization and Change of Variables" + ] + }, + { + "objectID": "stan-users-guide/sparse-ragged.html", + "href": "stan-users-guide/sparse-ragged.html", + "title": "Sparse and Ragged Data Structures", + "section": "", + "text": "Stan does not directly support either sparse or ragged data structures, though both can be accommodated with some programming effort. The sparse matrices chapter introduces a special-purpose sparse matrix times dense vector multiplication, which should be used where applicable; this chapter covers more general data structures.\n\n\nCoding sparse data structures is as easy as moving from a matrix-like data structure to a database-like data structure. For example, consider the coding of sparse data for the IRT models discussed in the item-response model section. There are \\(J\\) students and \\(K\\) questions, and if every student answers every question, then it is practical to declare the data as a \\(J \\times K\\) array of answers.\ndata {\n int<lower=1> J;\n int<lower=1> K;\n array[J, K] int<lower=0, upper=1> y;\n // ...\nmodel {\n for (j in 1:J) {\n for (k in 1:K) {\n y[j, k] ~ bernoulli_logit(delta[k] * (alpha[j] - beta[k]));\n }\n }\n // ...\n}\nWhen not every student is given every question, the dense array coding will no longer work, because Stan does not support undefined values.\nThe following missing data example shows an example with \\(J=3\\) and \\(K=4\\), with missing responses shown as NA, as in R.\n\\[\\begin{equation*}\ny\n=\n\\left[\n\\begin{array}{cccc}\n0 & 1 & \\mbox{NA} & 1\n\\\\\n0 & \\mbox{NA} & \\mbox{NA} & 1\n\\\\\n\\mbox{NA} & 0 & \\mbox{NA} & \\mbox{NA}\n\\end{array}\n\\right]\n\\end{equation*}\\]\nThere is no support within Stan for R’s NA values, so this data structure cannot be used directly. Instead, it must be converted to a “long form” as in a database, with columns indicating the indices along with the value. With columns \\(jj\\) and \\(kk\\) used for the indexes (following Gelman and Hill (2007)), the 2-D array \\(y\\) is recoded as a table. The number of rows in the table equals the number of defined array elements, here \\(y_{1,1} = 0\\), \\(y_{1,2} = 1\\), up to \\(y_{3,2} = 1\\). As the array becomes larger and sparser, the long form becomes the more economical encoding.\n\n\n\njj\nkk\ny\n\n\n\n\n1\n1\n0\n\n\n1\n2\n1\n\n\n1\n4\n1\n\n\n2\n1\n0\n\n\n2\n4\n1\n\n\n3\n2\n0\n\n\n\nLetting \\(N\\) be the number of \\(y\\) that are defined, here \\(N=6\\), the data and model can be formulated as follows.\ndata {\n // ...\n int<lower=1> N;\n array[N] int<lower=1, upper=J> jj;\n array[N] int<lower=1, upper=K> kk;\n array[N] int<lower=0, upper=1> y;\n // ...\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ bernoulli_logit(delta[kk[n]]\n * (alpha[jj[n]] - beta[kk[n]]));\n }\n // ...\n}\nIn the situation where there are no missing values, the two model formulations produce exactly the same log posterior density.\n\n\n\nRagged arrays are arrays that are not rectangular, but have different sized entries. This kind of structure crops up when there are different numbers of observations per entry.\nA general approach to dealing with ragged structure is to move to a full database-like data structure as discussed in the previous section. A more compact approach is possible with some indexing into a linear array.\nFor example, consider a data structure for three groups, each of which has a different number of observations.\n\n\n\n\n\n\n\\(y_1 = \\left[1.3 \\ \\ 2.4 \\ \\ 0.9\\right]\\\\\\) \\(y_2 = \\left[-1.8 \\ \\ -0.1\\right]\\\\\\) \\(y_3 = \\left[12.9 \\ \\ 18.7 \\ \\ 42.9 \\ \\ 4.7\\right]\\)\n\n\n\\(z = [1.3 \\ \\ 2.4 \\ \\ 0.9 \\ \\ -1.8 \\ \\ -0.1 \\ \\ 12.9 \\ \\ 18.7 \\ \\ 42.9 \\ \\ 4.7]\\\\\\) \\(s = \\{ 3 \\ \\ 2 \\ \\ 4 \\}\\)\n\n\n\nOn the left is the definition of a ragged data structure \\(y\\) with three rows of different sizes (\\(y_1\\) is size 3, \\(y_2\\) size 2, and \\(y_3\\) size 4). On the right is an example of how to code the data in Stan, using a single vector \\(z\\) to hold all the values and a separate array of integers \\(s\\) to hold the group row sizes. In this example, \\(y_1 = z_{1:3}\\), \\(y_2 = z_{4:5}\\), and \\(y_3 = z_{6:9}\\).\nSuppose the model is a simple varying intercept model, which, using vectorized notation, would yield a log-likelihood \\[\\begin{equation*}\n\\sum_{n=1}^3 \\log \\textsf{normal}(y_n \\mid \\mu_n, \\sigma).\n\\end{equation*}\\] There’s no direct way to encode this in Stan.\nA full database type structure could be used, as in the sparse example, but this is inefficient, wasting space for unnecessary indices and not allowing vector-based density operations. A better way to code this data is as a single list of values, with a separate data structure indicating the sizes of each subarray. This is indicated on the right of the example. This coding uses a single array for the values and a separate array for the sizes of each row.\nThe model can then be coded up using slicing operations as follows.\ndata {\n int<lower=0> N; // # observations\n int<lower=0> K; // # of groups\n vector[N] y; // observations\n array[K] int s; // group sizes\n // ...\n}\nmodel {\n int pos;\n pos = 1;\n for (k in 1:K) {\n segment(y, pos, s[k]) ~ normal(mu[k], sigma);\n pos = pos + s[k];\n }\nThis coding allows for efficient vectorization, which is worth the copy cost entailed by the segment() vector slicing operation.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Sparse and Ragged Data Structures" + ] + }, + { + "objectID": "stan-users-guide/sparse-ragged.html#sparse-data-structures", + "href": "stan-users-guide/sparse-ragged.html#sparse-data-structures", + "title": "Sparse and Ragged Data Structures", + "section": "", + "text": "Coding sparse data structures is as easy as moving from a matrix-like data structure to a database-like data structure. For example, consider the coding of sparse data for the IRT models discussed in the item-response model section. There are \\(J\\) students and \\(K\\) questions, and if every student answers every question, then it is practical to declare the data as a \\(J \\times K\\) array of answers.\ndata {\n int<lower=1> J;\n int<lower=1> K;\n array[J, K] int<lower=0, upper=1> y;\n // ...\nmodel {\n for (j in 1:J) {\n for (k in 1:K) {\n y[j, k] ~ bernoulli_logit(delta[k] * (alpha[j] - beta[k]));\n }\n }\n // ...\n}\nWhen not every student is given every question, the dense array coding will no longer work, because Stan does not support undefined values.\nThe following missing data example shows an example with \\(J=3\\) and \\(K=4\\), with missing responses shown as NA, as in R.\n\\[\\begin{equation*}\ny\n=\n\\left[\n\\begin{array}{cccc}\n0 & 1 & \\mbox{NA} & 1\n\\\\\n0 & \\mbox{NA} & \\mbox{NA} & 1\n\\\\\n\\mbox{NA} & 0 & \\mbox{NA} & \\mbox{NA}\n\\end{array}\n\\right]\n\\end{equation*}\\]\nThere is no support within Stan for R’s NA values, so this data structure cannot be used directly. Instead, it must be converted to a “long form” as in a database, with columns indicating the indices along with the value. With columns \\(jj\\) and \\(kk\\) used for the indexes (following Gelman and Hill (2007)), the 2-D array \\(y\\) is recoded as a table. The number of rows in the table equals the number of defined array elements, here \\(y_{1,1} = 0\\), \\(y_{1,2} = 1\\), up to \\(y_{3,2} = 1\\). As the array becomes larger and sparser, the long form becomes the more economical encoding.\n\n\n\njj\nkk\ny\n\n\n\n\n1\n1\n0\n\n\n1\n2\n1\n\n\n1\n4\n1\n\n\n2\n1\n0\n\n\n2\n4\n1\n\n\n3\n2\n0\n\n\n\nLetting \\(N\\) be the number of \\(y\\) that are defined, here \\(N=6\\), the data and model can be formulated as follows.\ndata {\n // ...\n int<lower=1> N;\n array[N] int<lower=1, upper=J> jj;\n array[N] int<lower=1, upper=K> kk;\n array[N] int<lower=0, upper=1> y;\n // ...\n}\nmodel {\n for (n in 1:N) {\n y[n] ~ bernoulli_logit(delta[kk[n]]\n * (alpha[jj[n]] - beta[kk[n]]));\n }\n // ...\n}\nIn the situation where there are no missing values, the two model formulations produce exactly the same log posterior density.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Sparse and Ragged Data Structures" + ] + }, + { + "objectID": "stan-users-guide/sparse-ragged.html#ragged-data-structs.section", + "href": "stan-users-guide/sparse-ragged.html#ragged-data-structs.section", + "title": "Sparse and Ragged Data Structures", + "section": "", + "text": "Ragged arrays are arrays that are not rectangular, but have different sized entries. This kind of structure crops up when there are different numbers of observations per entry.\nA general approach to dealing with ragged structure is to move to a full database-like data structure as discussed in the previous section. A more compact approach is possible with some indexing into a linear array.\nFor example, consider a data structure for three groups, each of which has a different number of observations.\n\n\n\n\n\n\n\\(y_1 = \\left[1.3 \\ \\ 2.4 \\ \\ 0.9\\right]\\\\\\) \\(y_2 = \\left[-1.8 \\ \\ -0.1\\right]\\\\\\) \\(y_3 = \\left[12.9 \\ \\ 18.7 \\ \\ 42.9 \\ \\ 4.7\\right]\\)\n\n\n\\(z = [1.3 \\ \\ 2.4 \\ \\ 0.9 \\ \\ -1.8 \\ \\ -0.1 \\ \\ 12.9 \\ \\ 18.7 \\ \\ 42.9 \\ \\ 4.7]\\\\\\) \\(s = \\{ 3 \\ \\ 2 \\ \\ 4 \\}\\)\n\n\n\nOn the left is the definition of a ragged data structure \\(y\\) with three rows of different sizes (\\(y_1\\) is size 3, \\(y_2\\) size 2, and \\(y_3\\) size 4). On the right is an example of how to code the data in Stan, using a single vector \\(z\\) to hold all the values and a separate array of integers \\(s\\) to hold the group row sizes. In this example, \\(y_1 = z_{1:3}\\), \\(y_2 = z_{4:5}\\), and \\(y_3 = z_{6:9}\\).\nSuppose the model is a simple varying intercept model, which, using vectorized notation, would yield a log-likelihood \\[\\begin{equation*}\n\\sum_{n=1}^3 \\log \\textsf{normal}(y_n \\mid \\mu_n, \\sigma).\n\\end{equation*}\\] There’s no direct way to encode this in Stan.\nA full database type structure could be used, as in the sparse example, but this is inefficient, wasting space for unnecessary indices and not allowing vector-based density operations. A better way to code this data is as a single list of values, with a separate data structure indicating the sizes of each subarray. This is indicated on the right of the example. This coding uses a single array for the values and a separate array for the sizes of each row.\nThe model can then be coded up using slicing operations as follows.\ndata {\n int<lower=0> N; // # observations\n int<lower=0> K; // # of groups\n vector[N] y; // observations\n array[K] int s; // group sizes\n // ...\n}\nmodel {\n int pos;\n pos = 1;\n for (k in 1:K) {\n segment(y, pos, s[k]) ~ normal(mu[k], sigma);\n pos = pos + s[k];\n }\nThis coding allows for efficient vectorization, which is worth the copy cost entailed by the segment() vector slicing operation.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Sparse and Ragged Data Structures" + ] + }, + { + "objectID": "stan-users-guide/survival.html", + "href": "stan-users-guide/survival.html", + "title": "Survival Models", + "section": "", + "text": "Survival models apply to animals and plants as well as inanimate objects such as machine parts or electrical components. Survival models arise when there is an event of interest for a group of subjects, machine component, or other item that is\n\ncertain to occur after some amount of time,\nbut only measured for a fixed period of time, during which the event may not have occurred for all subjects.\n\nFor example, one might wish to estimate the the distribution of time to failure for solid state drives in a data center, but only measure drives for a two year period, after which some number will have failed and some will still be in service.\nSurvival models are often used comparatively, such as comparing time to death of patients diagnosed with stage one liver cancer under a new treatment and a standard treatment (pure controls are not allowed when there is an effective existing treatment for a serious condition). During a two year trial, some patients will die and others will survive.\nSurvival models may involve covariates, such as the factory at which a component is manufactured, the day on which it is manufactured, and the amount of usage it gets. A clinical trial might be adjusted for the sex and age of a cancer patient or the hospital at which treatment is received.\nSurvival models come in two main flavors, parametric and semi-parametric. In a parametric model, the survival time of a subject is modeled explicitly using a parametric probability distribution. There is a great deal of flexibility in how the parametric probability distribution is constructed. The sections below consider exponential and Weibull distributed survival times.\nRather than explicitly modeling a parametric survival probability, semi-parametric survival models instead model the relative effect on survival of covariates. The final sections of this chapter consider the proportional hazards survival model.\n\n\nThe exponential distribution is commonly used in survival models where there is a constant risk of failure that does not go up the longer a subject survives. This is because the exponential distribution is memoryless in sense that if \\(T \\sim \\textrm{exponential}(\\lambda)\\) for some rate \\(\\lambda > 0,\\) then \\[\\begin{equation*}\n\\Pr[T > t] = \\Pr[T > t + t' \\mid T > t'].\n\\end{equation*}\\] If component survival times are distributed exponentially, it means the distribution of time to failure is the same no matter how long the item has already survived. This can be a reasonable assumption for electronic components, but is not a reasonable model for animal survival.\nThe exponential survival model has a single parameter for the rate, which assumes all subjects have the same distribution of failure time (this assumption is relaxed in the next section by introducing per-subject covariates). With the rate parameterization, the expected survival time for a component with survival time represented as the random variable \\(T\\) is \\[\\begin{equation*}\n\\mathbb{E}[T \\mid \\lambda] = \\frac{1}{\\lambda}.\n\\end{equation*}\\] The exponential distribution is sometimes parameterized in terms of a scale (i.e., inverse rate) \\(\\beta = 1 / \\lambda\\).\nThe data for a survival model consists of two components. First, there is a vector \\(t \\in (0, \\infty)^N\\) of \\(N\\) observed failure times. Second, there is a censoring time \\(t^{\\textrm{cens}}\\) such that failure times greater than \\(t^{\\textrm{cens}}\\) are not observed. The censoring time assumption imposes a constraint which requires \\(t_n < t^{\\textrm{cens}}\\) for all \\(n \\in 1{:}N.\\) For the censored subjects, the only thing required in the model is their total count, \\(N^\\textrm{cens}\\) (their covariates are also required for models with covariates).\nThe model for the observed failure times is exponential, so that for \\(n \\in 1{:}N,\\) \\[\\begin{equation*}\nt_n \\sim \\textrm{exponential}(\\lambda).\n\\end{equation*}\\]\nThe model for the censored failure times is also exponential. All that is known of a censored item is that its failure time is greater than the censoring time, so each censored item contributes a factor to the likelihood of \\[\\begin{equation*}\n\\Pr[T > t^{\\textrm{cens}}] = 1 - F_T(t^{\\textrm{cens}}),\n\\end{equation*}\\] where \\(F_T\\) is the cumulative distribution function (cdf) of survival time \\(T\\) (\\(F_X(x) = \\Pr[X \\leq x]\\) is standard notation for the cdf of a random variable \\(X\\)). The function \\(1 - F_T(t)\\) is the complementary cumulative distribution function (ccdf), and it is used directly to define the likelihood \\[\\begin{eqnarray*}\np(t, t^{\\textrm{cens}}, N^{\\textrm{cens}} \\mid \\lambda)\n& = &\n\\prod_{n=1}^N \\textrm{exponential}(t_n \\mid \\lambda)\n\\cdot\n\\prod_{n=1}^{N^{\\textrm{cens}}}\n\\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid \\lambda)\n\\\\\n& = &\n\\prod_{n=1}^N \\textrm{exponential}(t_n \\mid \\lambda)\n\\cdot\n\\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid \\lambda)^{N^{\\textrm{cens}}}.\n\\end{eqnarray*}\\]\nOn the log scale, that’s \\[\\begin{eqnarray*}\n\\log p(t, t^{\\textrm{cens}}, N^{\\textrm{cens}} \\mid \\lambda)\n& = &\n\\sum_{n=1}^N \\log \\textrm{exponential}(t_n \\mid \\lambda)\n\\\\\n& & { } + N^{\\textrm{cens}} \\cdot \\log \\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid \\lambda).\n\\end{eqnarray*}\\]\nThe model can be completed with a standard lognormal prior on \\(\\lambda,\\) \\[\\begin{equation*}\n\\lambda \\sim \\textrm{lognormal}(0, 1),\n\\end{equation*}\\] which is reasonable if failure times are in the range of 0.1 to 10 time units, because that’s roughly the 95% central interval for a variable distributed \\(\\textrm{lognormal}(0, 1)\\). In general, the range of the prior (and likelihood!) should be adjusted with prior knowledge of expected failure times.\n\n\nThe data for a simple survival analysis without covariates can be coded as follows.\ndata {\n int<lower=0> N;\n vector[N] t;\n int<lower=0> N_cens;\n real<lower=0> t_cens;\n}\nIn this program, N is the number of uncensored observations and t contains the times of the uncensored observations. There are a further N_cens items that are right censored at time t_cens. Right censoring means that if the time to failure is greater than\nt_cens, it is only observed that the part survived until time t_cens. In the case where there are no covariates, the model only needs the number of censored items because they all share the same censoring time.\nThere is a single rate parameter, the inverse of which is the expected time to failure.\nparameters {\n real<lower=0> lambda;\n}\nThe exponential survival model and the prior are coded directly using vectorized distribution and ccdf statements. This both simplifies the code and makes it more computationally efficient by sharing computation across instances.\nmodel {\n t ~ exponential(lambda);\n target += N_cens * exponential_lccdf(t_cens | lambda);\n\n lambda ~ lognormal(0, 1);\n}\nThe likelihood for rate lambda is just the density of exponential distribution for observed failure time. The Stan code is vectorized, modeling each entry of the vector t as a having an exponential distribution with rate lambda. This data model could have been written as\nfor (n in 1:N) {\n t[n] ~ exponential(lambda);\n}\nThe log likelihood contribution given censored items is the number of censored items times the log complementary cumulative distribution function (lccdf) at the censoring time of the exponential distribution with rate lambda. The log likelihood terms arising from the censored events could have been added to the target log density one at a time,\nfor (n in 1:N)\n target += exponential_lccdf(t_cens | lambda);\nto define the same log density, but it is much more efficient computationally to multiply by a constant than do a handful of sequential additions.\n\n\n\n\nThe Weibull distribution is a popular alternative to the exponential distribution in cases where there is a decreasing probability of survival as a subject gets older. The Weibull distribution models this by generalizing the exponential distribution to include a power-law trend.\nThe Weibull distribution is parameterized by a shape \\(\\alpha > 0\\) and scale \\(\\sigma > 0.\\) For an outcome \\(t \\geq 0\\), the Weibull distribution’s probability density function is \\[\\begin{equation*}\n\\textrm{Weibull}(t \\mid \\alpha, \\sigma)\n= \\frac{\\alpha}{\\sigma}\n \\cdot \\left( \\frac{t}{\\sigma} \\right)^{\\alpha - 1}\n \\cdot \\exp\\left(-\\left(\\frac{t}{\\sigma}\\right)^{\\alpha}\\right).\n\\end{equation*}\\] In contrast, recall that the exponential distribution can be expressed using a rate (inverse scale) parameter \\(\\beta > 0\\) with probability density function \\[\\begin{equation*}\n\\textrm{exponential}(t \\mid \\beta) =\n\\beta\n\\cdot\n\\exp(-\\beta \\cdot t).\n\\end{equation*}\\] When \\(\\alpha = 1,\\) the Weibull distribution reduces to an exponential distribution, \\[\\begin{equation*}\n\\textrm{Weibull}(t \\mid 1, \\sigma)\n=\n\\textrm{exponential}\\!\\left(t \\,\\bigg|\\, \\frac{1}{\\sigma}\\right).\n\\end{equation*}\\] In other words, the Weibull is a continuous expansion of the exponential distribution.\nIf \\(T \\sim \\textrm{Weibull}(\\alpha, \\sigma),\\) then the expected survival time is \\[\\begin{equation*}\n\\mathbb{E}[T] = \\sigma \\cdot \\Gamma\\!\\left(1 + \\frac{1}{\\alpha}\\right),\n\\end{equation*}\\] where the \\(\\Gamma\\) function is the continuous completion of the factorial function (i.e., \\(\\Gamma(1 + n) = n!\\ \\) for \\(n \\in\n\\mathbb{N}\\)). As \\(\\alpha \\rightarrow 0\\) for a fixed \\(\\sigma\\) or as \\(\\sigma \\rightarrow \\infty\\) for a fixed \\(\\alpha\\), the expected survival time goes to infinity.\nThere are three regimes of the Weibull distribution.\n\n\\(\\alpha < 1.\\) A subject is more likely to fail early. When \\(\\alpha\n< 1,\\) the Weibull density approaches infinity as \\(t \\rightarrow 0.\\)\n\\(\\alpha = 1.\\) The Weibull distribution reduces to the exponential distribution, with a constant rate of failure over time. When \\(\\alpha = 1,\\) the Weibull distribution approaches \\(\\sigma\\) as \\(t\n\\rightarrow 0.\\)\n\\(\\alpha > 1.\\) Subjects are less likely to fail early. When \\(\\alpha < 1,\\) the Weibull density approaches zero as \\(t \\rightarrow 0.\\)\n\nWith \\(\\alpha \\leq 1,\\) the mode is zero (\\(t = 0\\)), whereas with \\(\\alpha > 1,\\) the mode is nonzero (\\(t > 0\\)).\n\n\nWith Stan, one can just swap the exponential distribution for the Weibull distribution with the appropriate parameters and the model remains essentially the same. Recall the exponential model’s parameters and model block.\nparameters {\n real<lower=0> beta;\n}\nmodel {\n t ~ exponential(beta);\n target += N_cens * exponential_lccdf(t_cens | beta);\n\n beta ~ lognormal(0, 1);\n}\nThe Stan program for the Weibull model just swaps in the Weibull distribution and complementary cumulative distribution function with shape (alpha) and scale (sigma) parameters.\nparameters {\n real<lower=0> alpha;\n real<lower=0> sigma;\n}\nmodel {\n t ~ weibull(alpha, sigma);\n target += N_cens * weibull_lccdf(t_cens | alpha, sigma);\n\n alpha ~ lognormal(0, 1);\n sigma ~ lognormal(0, 1);\n}\nAs usual, if more is known about expected survival times, alpha and sigma should be given more informative priors.\n\n\n\n\nSuppose that for each of \\(n \\in 1{:}N\\) items observed, both censored and uncensored, there is a covariate (row) vector \\(x_n \\in\n\\mathbb{R}^K.\\) For example, a clinical trial may include the age (or a one-hot encoding of an age group) and the sex of a participant; an electronic component might include a one-hot encoding of the factory at which it was manufactured and a covariate for the load under which it has been run.\nSurvival with covariates replaces what is essentially a simple regression with only an intercept \\(\\lambda\\) with a generalized linear model with a log link, where the rate for item \\(n\\) is \\[\\begin{equation*}\n\\lambda_n = \\exp(x_n \\cdot \\beta),\n\\end{equation*}\\] where \\(\\beta \\in \\mathbb{R}^K\\) is a \\(K\\)-vector of regression coefficients. Thus \\[\\begin{equation*}\nt_n \\sim \\textrm{exponential}(\\lambda_n).\n\\end{equation*}\\] The censored items have probability \\[\\begin{equation*}\n\\Pr[n\\textrm{-th censored}] =\n\\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid x^{\\textrm{cens}}_n\n\\cdot \\beta).\n\\end{equation*}\\]\nThe covariates form an \\(N \\times K\\) data matrix, \\(x \\in\n\\mathbb{R}^{N \\times K}\\). An intercept can be introduced by adding a column of 1 values to \\(x\\).\nA Stan program for the exponential survival model with covariates is as follows. It relies on the fact that the order of failure times (t and t_cens) corresponds to the ordering of items in the covariate matrices (x and x_cens).\ndata {\n int<lower=0> N;\n vector[N] t;\n int<lower=0> N_cens;\n real<lower=0> t_cens;\n int<lower=0> K;\n matrix[N, K] x;\n matrix[N_cens, K] x_cens;\n}\nparameters {\n vector[K] gamma;\n}\nmodel {\n gamma ~ normal(0, 2);\n\n t ~ exponential(exp(x * gamma));\n target += exponential_lccdf(t_cens | exp(x_cens * gamma));\n}\nBoth the distribution statement for uncensored times and the log density increment statement for censored times are vectorized, one in terms of the exponential distribution and one in terms of the log complementary cumulative distribution function.\n\n\n\nSuppose \\(T\\) is a random variable representing a survival time, with a smooth cumulative distribution function \\[\\begin{equation*}\nF_T(t) = \\Pr[T \\leq t],\n\\end{equation*}\\] so that its probability density function is \\[\\begin{equation*}\np_T(t) = \\frac{\\textrm{d}}{\\textrm{d}t} F_T(t).\n\\end{equation*}\\]\nThe survival function \\(S(t)\\) is the probability of surviving until at least time \\(t\\), which is just the complementary cumulative distribution function (ccdf) of the survival random variable \\(T\\), \\[\\begin{equation*}\nS(t) = 1 - F_T(t).\n\\end{equation*}\\] The survival function appeared in the Stan model in the previous section as the likelihood for items that did not fail during the period of the experiment (i.e., the censored failure times for the items that survived through the trial period).\nThe hazard function \\(h(t)\\) is the instantaneous risk of not surviving past time \\(t\\) assuming survival until time \\(t\\), which is given by \\[\\begin{equation*}\nh(t) = \\frac{p_T(t)}{S(t)} = \\frac{p_T(t)}{1 - F_T(t)}.\n\\end{equation*}\\] The cumulative hazard function \\(H(t)\\) is defined to be the accumulated hazard over time, \\[\\begin{equation*}\nH(t) = \\int_0^t h(u) \\, \\textrm{d}u.\n\\end{equation*}\\]\nThe hazard function and survival function are related through the differential equation \\[\\begin{eqnarray*}\nh(t) & = & -\\frac{\\textrm{d}}{\\textrm{d}t} \\log S(t).\n\\\\[4pt]\n& = & -\\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} S(t)\n\\\\[4pt]\n& = & \\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} -(1 - F_T(t))\n\\\\[4pt]\n& = & \\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} (F_T(t) - 1)\n\\\\[4pt]\n& = & \\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} F_T(t)\n\\\\[4pt]\n& = & \\frac{p_T(t)}{S(t)}.\n\\end{eqnarray*}\\]\nIf \\(T \\sim \\textrm{exponential}(\\beta)\\) has an exponential distribution, then its hazard function is constant, \\[\\begin{eqnarray*}\nh(t \\mid \\beta)\n& = & \\frac{p_T(t \\mid \\beta)}{S(t \\mid \\beta)}\n\\\\[4pt]\n& = & \\frac{\\textrm{exponential}(t \\mid \\beta)}{1 - \\textrm{exponentialCCDF}(t \\mid \\beta)}\n\\\\[4pt]\n& = & \\frac{\\beta \\cdot \\exp(-\\beta \\cdot t)}\n {1 - (1 - \\exp(-\\beta \\cdot t))}\n\\\\[4pt]\n& = & \\frac{\\beta \\cdot \\exp(-\\beta \\cdot t)}\n {\\exp(-\\beta \\cdot t)}\n\\\\[4pt]\n& = & \\beta.\n\\end{eqnarray*}\\] The exponential distribution is the only distribution of survival times with a constant hazard function.\nIf \\(T \\sim \\textrm{Weibull}(\\alpha, \\sigma),\\) then its hazard function is \\[\\begin{eqnarray*}\nh(t \\mid \\alpha, \\sigma)\n& = & \\frac{p_T(t \\mid \\alpha, \\sigma)}{S(t \\mid \\alpha, \\sigma)}\n\\\\[4pt]\n& = & \\frac{\\textrm{Weibull}(t \\mid \\alpha, \\sigma)}{1 - \\textrm{WeibullCCDF}(t \\mid \\alpha, \\sigma)}\n\\\\[4pt]\n& = &\n\\frac{\\frac{\\alpha}{\\sigma} \\cdot \\left( \\frac{t}{\\sigma} \\right)^{\\alpha - 1}\n \\cdot \\exp\\left(-\\left(\\frac{t}{\\sigma} \\right)^\\alpha\\right)}\n {1 - \\left(1 -\n \\exp\\left(-\\left(\\frac{t}{\\sigma}\\right)^\\alpha\n \\right)\\right)} \n\\\\[4pt]\n& = & \\frac{\\alpha}{\\sigma}\n \\cdot\n \\left( \\frac{t}{\\sigma} \\right)^{\\alpha - 1}.\n\\end{eqnarray*}\\]\nIf \\(\\alpha = 1\\) the hazard is constant over time (which also follows from the fact that the Weibull distribution reduces to the exponential distribution when \\(\\alpha = 1\\)). When \\(\\alpha > 1,\\) the hazard grows as time passes, whereas when \\(\\alpha < 1,\\) it decreases as time passes.\n\n\n\nThe exponential model is parametric in that is specifies an explicit parametric form for the distribution of survival times. Cox (1972) introduced a semi-parametric survival model specified directly in terms of a hazard function \\(h(t)\\) rather than in terms of a distribution over survival times. Cox’s model is semi-parametric in that it does not model the full hazard function, instead modeling only the proportional differences in hazards among subjects.\nLet \\(x_n \\in \\mathbb{R}^K\\) be a (row) vector of covariates for subject \\(n\\) so that the full covariate data matrix is \\(x \\in \\mathbb{R}^{N \\times\nK}\\). In Cox’s model, the hazard function for subject \\(n\\) is defined conditionally in terms of their covariates \\(x_n\\) and the parameter vector \\(\\gamma \\in \\mathbb{R}^K\\) as \\[\\begin{equation*}\nh(t \\mid x_n, \\beta) = h_0(t) \\cdot \\exp(x_n \\cdot \\gamma),\n\\end{equation*}\\] where \\(h_0(t)\\) is a shared baseline hazard function and \\(x_n \\cdot\n\\gamma = \\sum_{k=1}^K x_{n, k} \\cdot \\beta_k\\) is a row vector-vector product.\nIn the semi-parametric, proportional hazards model, the baseline hazard function \\(h_0(t)\\) is not modeled. This is why it is called “semi-parametric.” Only the factor \\(\\exp(x_n \\cdot \\gamma),\\) which determines how individual \\(n\\) varies by a proportion from the baseline hazard, is modeled. This is why it’s called “proportional hazards.”\nCox’s proportional hazards model is not fully generative. There is no way to generate the times of failure because the baseline hazard function \\(h_0(t)\\) is unmodeled; if the baseline hazard were known, failure times could be generated. Cox’s proportional hazards model is generative for the ordering of failures conditional on a number of censored items. Proportional hazard models may also include parametric or non-parametric model for the baseline hazard function1.\n\n\nCox’s proportional specification of the hazard function is insufficient to generate random variates because the baseline hazard function \\(h_0(t)\\) is unknown. On the other hand, the proportional specification is sufficient to generate a partial likelihood that accounts for the order of the survival times.\nThe hazard function \\(h(t \\mid x_n, \\beta) = h_0(t) \\cdot \\exp(x_n\n\\cdot \\beta)\\) for subject \\(n\\) represents the instantaneous probability that subject \\(n\\) fails at time \\(t\\) given that it has survived until time \\(t.\\) The probability that subject \\(n\\) is the first to fail among \\(N\\) subjects is thus proportional to subject \\(n\\)’s hazard function, \\[\\begin{equation*}\n\\Pr[n \\textrm{ first to fail at time } t]\n\\propto h(t \\mid x_n, \\beta).\n\\end{equation*}\\] Normalizing yields \\[\\begin{eqnarray*}\n\\Pr[n \\textrm{ first to fail at time } t]\n& = & \\frac{h(t \\mid x_n, \\beta)}\n {\\sum_{n' = 1}^N h(t \\mid x_{n'}, \\beta)}\n\\\\[4pt]\n& = & \\frac{h_0(t) \\cdot \\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = 1}^N h_0(t) \\cdot \\exp(x_{n'} \\cdot \\beta)}\n\\\\[4pt]\n& = & \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = 1}^N \\exp(x_{n'} \\cdot \\beta)}.\n\\end{eqnarray*}\\]\nSuppose there are \\(N\\) subjects with strictly ordered survival times \\(t_1 <\nt_2 < \\cdots < t_N\\) and covariate (row) vectors \\(x_1, \\ldots, x_N\\). Let \\(t^{\\textrm{cens}}\\) be the (right) censoring time and let \\(N^{\\textrm{obs}}\\) be the largest value of \\(n\\) such that \\(t_n \\leq\nt^{\\textrm{cens}}\\). This means \\(N^{\\textrm{obs}}\\) is the number of subjects whose failure time was observed. The ordering is for convenient indexing and does not cause any loss of generality—survival times can simply be sorted into the necessary order.\nWith failure times sorted in decreasing order, the partial likelihood for each observed subject \\(n \\in 1{:}N^{\\textrm{obs}}\\) can be expressed as \\[\\begin{equation*}\n\\Pr[n \\textrm{ first to fail among } n, n + 1, \\ldots N]\n= \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)}.\n\\end{equation*}\\] The group of items for comparison and hence the summation is over all items, including those with observed and censored failure times.\nThe partial likelihood, defined in this form by Breslow (1975), is just the product of the partial likelihoods for the observed subjects (i.e., excluding subjects whose failure time is censored). \\[\\begin{equation*}\n\\Pr[\\textrm{observed failures ordered } 1, \\ldots, N^{\\textrm{obs}} |\nx, \\beta]\n= \\prod_{n = 1}^{N^{\\textrm{obs}}}\n \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)}.\n\\end{equation*}\\] On the log scale, \\[\\begin{eqnarray*}\n\\log \\Pr[\\textrm{obs.\\ fail ordered } 1, \\ldots, N^{\\textrm{obs}} |\nx, \\beta]\n& = &\n\\sum_{n = 1}^{N^{\\textrm{obs}}}\n \\log \\left(\n \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)}\n \\right)\n\\\\[4pt]\n& = & x_n \\cdot \\beta - \\log \\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)\n\\\\\n& = & x_n \\cdot \\beta - \\textrm{logSumExp}_{n' = n}^N \\ x_{n'} \\cdot \\beta,\n\\end{eqnarray*}\\] where \\[\\begin{equation*}\n\\textrm{logSumExp}_{n = a}^b \\ x_n\n= \\log \\sum_{n = a}^b \\exp(x_n)\n\\end{equation*}\\] is implemented so as to preserve numerical precision.\nThis likelihood follows the same approach to ranking as that developed by Plackett (1975) for estimating the probability of the order of the first few finishers in a horse race.\nA simple normal prior on the components of \\(\\beta\\) completes the model, \\[\\begin{equation*}\n\\beta \\sim \\textrm{normal}(0, 2).\n\\end{equation*}\\] This should be scaled based on knowledge of the predictors.\n\n\n\nTo simplify the Stan program, the survival times for uncensored events are sorted into decreasing order (unlike in the mathematical presentation, where they were sorted into ascending order). The covariates for censored and uncensored observations are separated into two matrices.\ndata {\n int<lower=0> K; // num covariates\n\n int<lower=0> N; // num uncensored obs\n vector[N] t; // event time (non-strict decreasing)\n matrix[N, K] x; // covariates for uncensored obs\n\n int N_c; // num censored obs\n real<lower=t[N]> t_c; // censoring time\n matrix[N_c, K] x_c; // covariates for censored obs\n}\nThe parameters are just the coefficients.\nparameters {\n vector[K] beta; // slopes (no intercept)\n}\nThe prior is a simple independent centered normal distribution on each element of the parameter vector, which is vectorized in the Stan code.\nmodel {\n beta ~ normal(0, 2);\n ...\nThe log likelihood is implemented so as to minimize duplicated effort. The first order of business is to calculate the linear predictors, which is done separately for the subjects whose event time is observed and those for which the event time is censored.\n vector[N] log_theta = x * beta;\n vector[N_c] log_theta_c = x_c * beta;\nThese vectors are computed using efficient matrix-vector multiplies. The log of exponential values of the censored covariates times the coefficients is reused in the denominator of each factor, which on the log scale, starts with the log sum of exponentials of the censored items’ linear predictors.\n real log_denom = log_sum_exp(log_theta_c);\nThen, for each observed survival time, going backwards from the latest to the earliest, the denominator can be incremented (which turns into a log sum of exponentials on the log scale), and then the target is updated with its likelihood contribution.\n for (n in 1:N) {\n log_denom = log_sum_exp(log_denom, log_theta[n]);\n target += log_theta[n] - log_denom; // log likelihood\n }\nThe running log sum of exponentials is why the list is iterated in reverse order of survival times. It allows the log denominator to be accumulated one term at a time. The condition that the survival times are sorted into decreasing order is not checked. It could be checked very easily in the transformed data block by adding the following code.\ntransformed data {\n for (n in 2:N) {\n if (!(t[n] < t[n - 1])) {\n reject(\"times must be strictly decreasing, but found\"\n \"!(t[\", n, \"] < t[, \", (n - 1), \"])\");\n } \n }\n}\n\n\n\nTechnically, for continuous survival times, the probability of two survival times being identical will be zero. Nevertheless, real data sets often round survival times, for instance to the nearest day or week in a multi-year clinical trial. The technically “correct” thing to do in the face of unknown survival times in a range would be to treat their order as unknown and infer it. But considering all \\(N!\\) permutations for a set of \\(N\\) subjects with tied survival times is not tractable. As an alternative, Efron (1977) introduced an approximate partial likelihood with better properties than a random permutation while not being quite as good as considering all permutations. Efron’s model averages the contributions as if they truly did occur simultaneously.\nIn the interest of completeness, here is the Stan code for an implementation of Efron’s estimator. It uses two user-defined functions. The first calculates how many different survival times occur in the data.\nfunctions {\n int num_unique_starts(vector t) {\n if (size(t) == 0) return 0;\n int us = 1;\n for (n in 2:size(t)) {\n if (t[n] != t[n - 1]) us += 1;\n }\n return us;\n }\nThis is then used to compute the value J to send into the function that computes the position in the array of failure times where each new failure time starts, plus an end point that goes one past the target. This is a standard way in Stan to code ragged arrays.\n array[] int unique_starts(vector t, int J) {\n array[J + 1] int starts;\n if (J == 0) return starts;\n starts[1] = 1;\n int pos = 2;\n for (n in 2:size(t)) {\n if (t[n] != t[n - 1]) {\n starts[pos] = n;\n pos += 1;\n }\n }\n starts[J + 1] = size(t) + 1;\n return starts;\n }\n}\nThe data format is exactly the same as for the model in the previous section, but in this case, the transformed data block is used to cache some precomputations required for the model, namely the ragged array grouping elements that share the same survival time.\ntransformed data {\n int<lower=0> J = num_unique_starts(t);\n array[J + 1] int<lower=0> starts = unique_starts(t, J);\n}\nFor each unique survival time j in 1:J, the subjects indexed from starts[j] to starts[j + 1] - 1 (inclusive) share the same survival time. The number of elements with survival time j is thus (starts[j + 1] - 1) - starts[j] + 1, or just starts[j + 1] - starts[j].\nThe parameters and prior are also the same—just a vector beta of coefficients with a centered normal prior. Although it starts with the same caching of results for later, and uses the same accumulator for the denominator, the overall partial likelihood is much more involved, and depends on the user-defined functions defining the transformed data variables J and starts.\n vector[N] log_theta = x * beta;\n vector[N_c] log_theta_c = x_c * beta;\n real log_denom_lhs = log_sum_exp(log_theta_c);\n for (j in 1:J) {\n int start = starts[j];\n int end = starts[j + 1] - 1;\n int len = end - start + 1;\n real log_len = log(len);\n real numerator = sum(log_theta[start:end]);\n log_denom_lhs = log_sum_exp(log_denom_lhs,\n log_sum_exp(log_theta[start:end]));\n vector[len] diff;\n for (ell in 1:len) {\n diff[ell] = log_diff_exp(log_denom_lhs,\n log(ell - 1) - log_len\n + log_sum_exp(log_theta[start:end]));\n }\n target += numerator - sum(diff);\n }\nThe special function log_diff_exp is defined as\n\\[\\begin{equation*}\n\\textrm{logDiffExp}(u, v) = \\log(\\exp(u) - \\exp(v)).\n\\end{equation*}\\]\nBecause of how J and starts are constructed, the length len will always be strictly positive so that the log is well defined.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/survival.html#exponential-survival-model", + "href": "stan-users-guide/survival.html#exponential-survival-model", + "title": "Survival Models", + "section": "", + "text": "The exponential distribution is commonly used in survival models where there is a constant risk of failure that does not go up the longer a subject survives. This is because the exponential distribution is memoryless in sense that if \\(T \\sim \\textrm{exponential}(\\lambda)\\) for some rate \\(\\lambda > 0,\\) then \\[\\begin{equation*}\n\\Pr[T > t] = \\Pr[T > t + t' \\mid T > t'].\n\\end{equation*}\\] If component survival times are distributed exponentially, it means the distribution of time to failure is the same no matter how long the item has already survived. This can be a reasonable assumption for electronic components, but is not a reasonable model for animal survival.\nThe exponential survival model has a single parameter for the rate, which assumes all subjects have the same distribution of failure time (this assumption is relaxed in the next section by introducing per-subject covariates). With the rate parameterization, the expected survival time for a component with survival time represented as the random variable \\(T\\) is \\[\\begin{equation*}\n\\mathbb{E}[T \\mid \\lambda] = \\frac{1}{\\lambda}.\n\\end{equation*}\\] The exponential distribution is sometimes parameterized in terms of a scale (i.e., inverse rate) \\(\\beta = 1 / \\lambda\\).\nThe data for a survival model consists of two components. First, there is a vector \\(t \\in (0, \\infty)^N\\) of \\(N\\) observed failure times. Second, there is a censoring time \\(t^{\\textrm{cens}}\\) such that failure times greater than \\(t^{\\textrm{cens}}\\) are not observed. The censoring time assumption imposes a constraint which requires \\(t_n < t^{\\textrm{cens}}\\) for all \\(n \\in 1{:}N.\\) For the censored subjects, the only thing required in the model is their total count, \\(N^\\textrm{cens}\\) (their covariates are also required for models with covariates).\nThe model for the observed failure times is exponential, so that for \\(n \\in 1{:}N,\\) \\[\\begin{equation*}\nt_n \\sim \\textrm{exponential}(\\lambda).\n\\end{equation*}\\]\nThe model for the censored failure times is also exponential. All that is known of a censored item is that its failure time is greater than the censoring time, so each censored item contributes a factor to the likelihood of \\[\\begin{equation*}\n\\Pr[T > t^{\\textrm{cens}}] = 1 - F_T(t^{\\textrm{cens}}),\n\\end{equation*}\\] where \\(F_T\\) is the cumulative distribution function (cdf) of survival time \\(T\\) (\\(F_X(x) = \\Pr[X \\leq x]\\) is standard notation for the cdf of a random variable \\(X\\)). The function \\(1 - F_T(t)\\) is the complementary cumulative distribution function (ccdf), and it is used directly to define the likelihood \\[\\begin{eqnarray*}\np(t, t^{\\textrm{cens}}, N^{\\textrm{cens}} \\mid \\lambda)\n& = &\n\\prod_{n=1}^N \\textrm{exponential}(t_n \\mid \\lambda)\n\\cdot\n\\prod_{n=1}^{N^{\\textrm{cens}}}\n\\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid \\lambda)\n\\\\\n& = &\n\\prod_{n=1}^N \\textrm{exponential}(t_n \\mid \\lambda)\n\\cdot\n\\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid \\lambda)^{N^{\\textrm{cens}}}.\n\\end{eqnarray*}\\]\nOn the log scale, that’s \\[\\begin{eqnarray*}\n\\log p(t, t^{\\textrm{cens}}, N^{\\textrm{cens}} \\mid \\lambda)\n& = &\n\\sum_{n=1}^N \\log \\textrm{exponential}(t_n \\mid \\lambda)\n\\\\\n& & { } + N^{\\textrm{cens}} \\cdot \\log \\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid \\lambda).\n\\end{eqnarray*}\\]\nThe model can be completed with a standard lognormal prior on \\(\\lambda,\\) \\[\\begin{equation*}\n\\lambda \\sim \\textrm{lognormal}(0, 1),\n\\end{equation*}\\] which is reasonable if failure times are in the range of 0.1 to 10 time units, because that’s roughly the 95% central interval for a variable distributed \\(\\textrm{lognormal}(0, 1)\\). In general, the range of the prior (and likelihood!) should be adjusted with prior knowledge of expected failure times.\n\n\nThe data for a simple survival analysis without covariates can be coded as follows.\ndata {\n int<lower=0> N;\n vector[N] t;\n int<lower=0> N_cens;\n real<lower=0> t_cens;\n}\nIn this program, N is the number of uncensored observations and t contains the times of the uncensored observations. There are a further N_cens items that are right censored at time t_cens. Right censoring means that if the time to failure is greater than\nt_cens, it is only observed that the part survived until time t_cens. In the case where there are no covariates, the model only needs the number of censored items because they all share the same censoring time.\nThere is a single rate parameter, the inverse of which is the expected time to failure.\nparameters {\n real<lower=0> lambda;\n}\nThe exponential survival model and the prior are coded directly using vectorized distribution and ccdf statements. This both simplifies the code and makes it more computationally efficient by sharing computation across instances.\nmodel {\n t ~ exponential(lambda);\n target += N_cens * exponential_lccdf(t_cens | lambda);\n\n lambda ~ lognormal(0, 1);\n}\nThe likelihood for rate lambda is just the density of exponential distribution for observed failure time. The Stan code is vectorized, modeling each entry of the vector t as a having an exponential distribution with rate lambda. This data model could have been written as\nfor (n in 1:N) {\n t[n] ~ exponential(lambda);\n}\nThe log likelihood contribution given censored items is the number of censored items times the log complementary cumulative distribution function (lccdf) at the censoring time of the exponential distribution with rate lambda. The log likelihood terms arising from the censored events could have been added to the target log density one at a time,\nfor (n in 1:N)\n target += exponential_lccdf(t_cens | lambda);\nto define the same log density, but it is much more efficient computationally to multiply by a constant than do a handful of sequential additions.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/survival.html#weibull-survival-model", + "href": "stan-users-guide/survival.html#weibull-survival-model", + "title": "Survival Models", + "section": "", + "text": "The Weibull distribution is a popular alternative to the exponential distribution in cases where there is a decreasing probability of survival as a subject gets older. The Weibull distribution models this by generalizing the exponential distribution to include a power-law trend.\nThe Weibull distribution is parameterized by a shape \\(\\alpha > 0\\) and scale \\(\\sigma > 0.\\) For an outcome \\(t \\geq 0\\), the Weibull distribution’s probability density function is \\[\\begin{equation*}\n\\textrm{Weibull}(t \\mid \\alpha, \\sigma)\n= \\frac{\\alpha}{\\sigma}\n \\cdot \\left( \\frac{t}{\\sigma} \\right)^{\\alpha - 1}\n \\cdot \\exp\\left(-\\left(\\frac{t}{\\sigma}\\right)^{\\alpha}\\right).\n\\end{equation*}\\] In contrast, recall that the exponential distribution can be expressed using a rate (inverse scale) parameter \\(\\beta > 0\\) with probability density function \\[\\begin{equation*}\n\\textrm{exponential}(t \\mid \\beta) =\n\\beta\n\\cdot\n\\exp(-\\beta \\cdot t).\n\\end{equation*}\\] When \\(\\alpha = 1,\\) the Weibull distribution reduces to an exponential distribution, \\[\\begin{equation*}\n\\textrm{Weibull}(t \\mid 1, \\sigma)\n=\n\\textrm{exponential}\\!\\left(t \\,\\bigg|\\, \\frac{1}{\\sigma}\\right).\n\\end{equation*}\\] In other words, the Weibull is a continuous expansion of the exponential distribution.\nIf \\(T \\sim \\textrm{Weibull}(\\alpha, \\sigma),\\) then the expected survival time is \\[\\begin{equation*}\n\\mathbb{E}[T] = \\sigma \\cdot \\Gamma\\!\\left(1 + \\frac{1}{\\alpha}\\right),\n\\end{equation*}\\] where the \\(\\Gamma\\) function is the continuous completion of the factorial function (i.e., \\(\\Gamma(1 + n) = n!\\ \\) for \\(n \\in\n\\mathbb{N}\\)). As \\(\\alpha \\rightarrow 0\\) for a fixed \\(\\sigma\\) or as \\(\\sigma \\rightarrow \\infty\\) for a fixed \\(\\alpha\\), the expected survival time goes to infinity.\nThere are three regimes of the Weibull distribution.\n\n\\(\\alpha < 1.\\) A subject is more likely to fail early. When \\(\\alpha\n< 1,\\) the Weibull density approaches infinity as \\(t \\rightarrow 0.\\)\n\\(\\alpha = 1.\\) The Weibull distribution reduces to the exponential distribution, with a constant rate of failure over time. When \\(\\alpha = 1,\\) the Weibull distribution approaches \\(\\sigma\\) as \\(t\n\\rightarrow 0.\\)\n\\(\\alpha > 1.\\) Subjects are less likely to fail early. When \\(\\alpha < 1,\\) the Weibull density approaches zero as \\(t \\rightarrow 0.\\)\n\nWith \\(\\alpha \\leq 1,\\) the mode is zero (\\(t = 0\\)), whereas with \\(\\alpha > 1,\\) the mode is nonzero (\\(t > 0\\)).\n\n\nWith Stan, one can just swap the exponential distribution for the Weibull distribution with the appropriate parameters and the model remains essentially the same. Recall the exponential model’s parameters and model block.\nparameters {\n real<lower=0> beta;\n}\nmodel {\n t ~ exponential(beta);\n target += N_cens * exponential_lccdf(t_cens | beta);\n\n beta ~ lognormal(0, 1);\n}\nThe Stan program for the Weibull model just swaps in the Weibull distribution and complementary cumulative distribution function with shape (alpha) and scale (sigma) parameters.\nparameters {\n real<lower=0> alpha;\n real<lower=0> sigma;\n}\nmodel {\n t ~ weibull(alpha, sigma);\n target += N_cens * weibull_lccdf(t_cens | alpha, sigma);\n\n alpha ~ lognormal(0, 1);\n sigma ~ lognormal(0, 1);\n}\nAs usual, if more is known about expected survival times, alpha and sigma should be given more informative priors.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/survival.html#survival-with-covariates", + "href": "stan-users-guide/survival.html#survival-with-covariates", + "title": "Survival Models", + "section": "", + "text": "Suppose that for each of \\(n \\in 1{:}N\\) items observed, both censored and uncensored, there is a covariate (row) vector \\(x_n \\in\n\\mathbb{R}^K.\\) For example, a clinical trial may include the age (or a one-hot encoding of an age group) and the sex of a participant; an electronic component might include a one-hot encoding of the factory at which it was manufactured and a covariate for the load under which it has been run.\nSurvival with covariates replaces what is essentially a simple regression with only an intercept \\(\\lambda\\) with a generalized linear model with a log link, where the rate for item \\(n\\) is \\[\\begin{equation*}\n\\lambda_n = \\exp(x_n \\cdot \\beta),\n\\end{equation*}\\] where \\(\\beta \\in \\mathbb{R}^K\\) is a \\(K\\)-vector of regression coefficients. Thus \\[\\begin{equation*}\nt_n \\sim \\textrm{exponential}(\\lambda_n).\n\\end{equation*}\\] The censored items have probability \\[\\begin{equation*}\n\\Pr[n\\textrm{-th censored}] =\n\\textrm{exponentialCCDF}(t^{\\textrm{cens}} \\mid x^{\\textrm{cens}}_n\n\\cdot \\beta).\n\\end{equation*}\\]\nThe covariates form an \\(N \\times K\\) data matrix, \\(x \\in\n\\mathbb{R}^{N \\times K}\\). An intercept can be introduced by adding a column of 1 values to \\(x\\).\nA Stan program for the exponential survival model with covariates is as follows. It relies on the fact that the order of failure times (t and t_cens) corresponds to the ordering of items in the covariate matrices (x and x_cens).\ndata {\n int<lower=0> N;\n vector[N] t;\n int<lower=0> N_cens;\n real<lower=0> t_cens;\n int<lower=0> K;\n matrix[N, K] x;\n matrix[N_cens, K] x_cens;\n}\nparameters {\n vector[K] gamma;\n}\nmodel {\n gamma ~ normal(0, 2);\n\n t ~ exponential(exp(x * gamma));\n target += exponential_lccdf(t_cens | exp(x_cens * gamma));\n}\nBoth the distribution statement for uncensored times and the log density increment statement for censored times are vectorized, one in terms of the exponential distribution and one in terms of the log complementary cumulative distribution function.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/survival.html#hazard-and-survival-functions", + "href": "stan-users-guide/survival.html#hazard-and-survival-functions", + "title": "Survival Models", + "section": "", + "text": "Suppose \\(T\\) is a random variable representing a survival time, with a smooth cumulative distribution function \\[\\begin{equation*}\nF_T(t) = \\Pr[T \\leq t],\n\\end{equation*}\\] so that its probability density function is \\[\\begin{equation*}\np_T(t) = \\frac{\\textrm{d}}{\\textrm{d}t} F_T(t).\n\\end{equation*}\\]\nThe survival function \\(S(t)\\) is the probability of surviving until at least time \\(t\\), which is just the complementary cumulative distribution function (ccdf) of the survival random variable \\(T\\), \\[\\begin{equation*}\nS(t) = 1 - F_T(t).\n\\end{equation*}\\] The survival function appeared in the Stan model in the previous section as the likelihood for items that did not fail during the period of the experiment (i.e., the censored failure times for the items that survived through the trial period).\nThe hazard function \\(h(t)\\) is the instantaneous risk of not surviving past time \\(t\\) assuming survival until time \\(t\\), which is given by \\[\\begin{equation*}\nh(t) = \\frac{p_T(t)}{S(t)} = \\frac{p_T(t)}{1 - F_T(t)}.\n\\end{equation*}\\] The cumulative hazard function \\(H(t)\\) is defined to be the accumulated hazard over time, \\[\\begin{equation*}\nH(t) = \\int_0^t h(u) \\, \\textrm{d}u.\n\\end{equation*}\\]\nThe hazard function and survival function are related through the differential equation \\[\\begin{eqnarray*}\nh(t) & = & -\\frac{\\textrm{d}}{\\textrm{d}t} \\log S(t).\n\\\\[4pt]\n& = & -\\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} S(t)\n\\\\[4pt]\n& = & \\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} -(1 - F_T(t))\n\\\\[4pt]\n& = & \\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} (F_T(t) - 1)\n\\\\[4pt]\n& = & \\frac{1}{S(t)} \\frac{\\textrm{d}}{\\textrm{d}t} F_T(t)\n\\\\[4pt]\n& = & \\frac{p_T(t)}{S(t)}.\n\\end{eqnarray*}\\]\nIf \\(T \\sim \\textrm{exponential}(\\beta)\\) has an exponential distribution, then its hazard function is constant, \\[\\begin{eqnarray*}\nh(t \\mid \\beta)\n& = & \\frac{p_T(t \\mid \\beta)}{S(t \\mid \\beta)}\n\\\\[4pt]\n& = & \\frac{\\textrm{exponential}(t \\mid \\beta)}{1 - \\textrm{exponentialCCDF}(t \\mid \\beta)}\n\\\\[4pt]\n& = & \\frac{\\beta \\cdot \\exp(-\\beta \\cdot t)}\n {1 - (1 - \\exp(-\\beta \\cdot t))}\n\\\\[4pt]\n& = & \\frac{\\beta \\cdot \\exp(-\\beta \\cdot t)}\n {\\exp(-\\beta \\cdot t)}\n\\\\[4pt]\n& = & \\beta.\n\\end{eqnarray*}\\] The exponential distribution is the only distribution of survival times with a constant hazard function.\nIf \\(T \\sim \\textrm{Weibull}(\\alpha, \\sigma),\\) then its hazard function is \\[\\begin{eqnarray*}\nh(t \\mid \\alpha, \\sigma)\n& = & \\frac{p_T(t \\mid \\alpha, \\sigma)}{S(t \\mid \\alpha, \\sigma)}\n\\\\[4pt]\n& = & \\frac{\\textrm{Weibull}(t \\mid \\alpha, \\sigma)}{1 - \\textrm{WeibullCCDF}(t \\mid \\alpha, \\sigma)}\n\\\\[4pt]\n& = &\n\\frac{\\frac{\\alpha}{\\sigma} \\cdot \\left( \\frac{t}{\\sigma} \\right)^{\\alpha - 1}\n \\cdot \\exp\\left(-\\left(\\frac{t}{\\sigma} \\right)^\\alpha\\right)}\n {1 - \\left(1 -\n \\exp\\left(-\\left(\\frac{t}{\\sigma}\\right)^\\alpha\n \\right)\\right)} \n\\\\[4pt]\n& = & \\frac{\\alpha}{\\sigma}\n \\cdot\n \\left( \\frac{t}{\\sigma} \\right)^{\\alpha - 1}.\n\\end{eqnarray*}\\]\nIf \\(\\alpha = 1\\) the hazard is constant over time (which also follows from the fact that the Weibull distribution reduces to the exponential distribution when \\(\\alpha = 1\\)). When \\(\\alpha > 1,\\) the hazard grows as time passes, whereas when \\(\\alpha < 1,\\) it decreases as time passes.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/survival.html#proportional-hazards-model", + "href": "stan-users-guide/survival.html#proportional-hazards-model", + "title": "Survival Models", + "section": "", + "text": "The exponential model is parametric in that is specifies an explicit parametric form for the distribution of survival times. Cox (1972) introduced a semi-parametric survival model specified directly in terms of a hazard function \\(h(t)\\) rather than in terms of a distribution over survival times. Cox’s model is semi-parametric in that it does not model the full hazard function, instead modeling only the proportional differences in hazards among subjects.\nLet \\(x_n \\in \\mathbb{R}^K\\) be a (row) vector of covariates for subject \\(n\\) so that the full covariate data matrix is \\(x \\in \\mathbb{R}^{N \\times\nK}\\). In Cox’s model, the hazard function for subject \\(n\\) is defined conditionally in terms of their covariates \\(x_n\\) and the parameter vector \\(\\gamma \\in \\mathbb{R}^K\\) as \\[\\begin{equation*}\nh(t \\mid x_n, \\beta) = h_0(t) \\cdot \\exp(x_n \\cdot \\gamma),\n\\end{equation*}\\] where \\(h_0(t)\\) is a shared baseline hazard function and \\(x_n \\cdot\n\\gamma = \\sum_{k=1}^K x_{n, k} \\cdot \\beta_k\\) is a row vector-vector product.\nIn the semi-parametric, proportional hazards model, the baseline hazard function \\(h_0(t)\\) is not modeled. This is why it is called “semi-parametric.” Only the factor \\(\\exp(x_n \\cdot \\gamma),\\) which determines how individual \\(n\\) varies by a proportion from the baseline hazard, is modeled. This is why it’s called “proportional hazards.”\nCox’s proportional hazards model is not fully generative. There is no way to generate the times of failure because the baseline hazard function \\(h_0(t)\\) is unmodeled; if the baseline hazard were known, failure times could be generated. Cox’s proportional hazards model is generative for the ordering of failures conditional on a number of censored items. Proportional hazard models may also include parametric or non-parametric model for the baseline hazard function1.\n\n\nCox’s proportional specification of the hazard function is insufficient to generate random variates because the baseline hazard function \\(h_0(t)\\) is unknown. On the other hand, the proportional specification is sufficient to generate a partial likelihood that accounts for the order of the survival times.\nThe hazard function \\(h(t \\mid x_n, \\beta) = h_0(t) \\cdot \\exp(x_n\n\\cdot \\beta)\\) for subject \\(n\\) represents the instantaneous probability that subject \\(n\\) fails at time \\(t\\) given that it has survived until time \\(t.\\) The probability that subject \\(n\\) is the first to fail among \\(N\\) subjects is thus proportional to subject \\(n\\)’s hazard function, \\[\\begin{equation*}\n\\Pr[n \\textrm{ first to fail at time } t]\n\\propto h(t \\mid x_n, \\beta).\n\\end{equation*}\\] Normalizing yields \\[\\begin{eqnarray*}\n\\Pr[n \\textrm{ first to fail at time } t]\n& = & \\frac{h(t \\mid x_n, \\beta)}\n {\\sum_{n' = 1}^N h(t \\mid x_{n'}, \\beta)}\n\\\\[4pt]\n& = & \\frac{h_0(t) \\cdot \\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = 1}^N h_0(t) \\cdot \\exp(x_{n'} \\cdot \\beta)}\n\\\\[4pt]\n& = & \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = 1}^N \\exp(x_{n'} \\cdot \\beta)}.\n\\end{eqnarray*}\\]\nSuppose there are \\(N\\) subjects with strictly ordered survival times \\(t_1 <\nt_2 < \\cdots < t_N\\) and covariate (row) vectors \\(x_1, \\ldots, x_N\\). Let \\(t^{\\textrm{cens}}\\) be the (right) censoring time and let \\(N^{\\textrm{obs}}\\) be the largest value of \\(n\\) such that \\(t_n \\leq\nt^{\\textrm{cens}}\\). This means \\(N^{\\textrm{obs}}\\) is the number of subjects whose failure time was observed. The ordering is for convenient indexing and does not cause any loss of generality—survival times can simply be sorted into the necessary order.\nWith failure times sorted in decreasing order, the partial likelihood for each observed subject \\(n \\in 1{:}N^{\\textrm{obs}}\\) can be expressed as \\[\\begin{equation*}\n\\Pr[n \\textrm{ first to fail among } n, n + 1, \\ldots N]\n= \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)}.\n\\end{equation*}\\] The group of items for comparison and hence the summation is over all items, including those with observed and censored failure times.\nThe partial likelihood, defined in this form by Breslow (1975), is just the product of the partial likelihoods for the observed subjects (i.e., excluding subjects whose failure time is censored). \\[\\begin{equation*}\n\\Pr[\\textrm{observed failures ordered } 1, \\ldots, N^{\\textrm{obs}} |\nx, \\beta]\n= \\prod_{n = 1}^{N^{\\textrm{obs}}}\n \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)}.\n\\end{equation*}\\] On the log scale, \\[\\begin{eqnarray*}\n\\log \\Pr[\\textrm{obs.\\ fail ordered } 1, \\ldots, N^{\\textrm{obs}} |\nx, \\beta]\n& = &\n\\sum_{n = 1}^{N^{\\textrm{obs}}}\n \\log \\left(\n \\frac{\\exp(x_n \\cdot \\beta)}\n {\\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)}\n \\right)\n\\\\[4pt]\n& = & x_n \\cdot \\beta - \\log \\sum_{n' = n}^N \\exp(x_{n'} \\cdot \\beta)\n\\\\\n& = & x_n \\cdot \\beta - \\textrm{logSumExp}_{n' = n}^N \\ x_{n'} \\cdot \\beta,\n\\end{eqnarray*}\\] where \\[\\begin{equation*}\n\\textrm{logSumExp}_{n = a}^b \\ x_n\n= \\log \\sum_{n = a}^b \\exp(x_n)\n\\end{equation*}\\] is implemented so as to preserve numerical precision.\nThis likelihood follows the same approach to ranking as that developed by Plackett (1975) for estimating the probability of the order of the first few finishers in a horse race.\nA simple normal prior on the components of \\(\\beta\\) completes the model, \\[\\begin{equation*}\n\\beta \\sim \\textrm{normal}(0, 2).\n\\end{equation*}\\] This should be scaled based on knowledge of the predictors.\n\n\n\nTo simplify the Stan program, the survival times for uncensored events are sorted into decreasing order (unlike in the mathematical presentation, where they were sorted into ascending order). The covariates for censored and uncensored observations are separated into two matrices.\ndata {\n int<lower=0> K; // num covariates\n\n int<lower=0> N; // num uncensored obs\n vector[N] t; // event time (non-strict decreasing)\n matrix[N, K] x; // covariates for uncensored obs\n\n int N_c; // num censored obs\n real<lower=t[N]> t_c; // censoring time\n matrix[N_c, K] x_c; // covariates for censored obs\n}\nThe parameters are just the coefficients.\nparameters {\n vector[K] beta; // slopes (no intercept)\n}\nThe prior is a simple independent centered normal distribution on each element of the parameter vector, which is vectorized in the Stan code.\nmodel {\n beta ~ normal(0, 2);\n ...\nThe log likelihood is implemented so as to minimize duplicated effort. The first order of business is to calculate the linear predictors, which is done separately for the subjects whose event time is observed and those for which the event time is censored.\n vector[N] log_theta = x * beta;\n vector[N_c] log_theta_c = x_c * beta;\nThese vectors are computed using efficient matrix-vector multiplies. The log of exponential values of the censored covariates times the coefficients is reused in the denominator of each factor, which on the log scale, starts with the log sum of exponentials of the censored items’ linear predictors.\n real log_denom = log_sum_exp(log_theta_c);\nThen, for each observed survival time, going backwards from the latest to the earliest, the denominator can be incremented (which turns into a log sum of exponentials on the log scale), and then the target is updated with its likelihood contribution.\n for (n in 1:N) {\n log_denom = log_sum_exp(log_denom, log_theta[n]);\n target += log_theta[n] - log_denom; // log likelihood\n }\nThe running log sum of exponentials is why the list is iterated in reverse order of survival times. It allows the log denominator to be accumulated one term at a time. The condition that the survival times are sorted into decreasing order is not checked. It could be checked very easily in the transformed data block by adding the following code.\ntransformed data {\n for (n in 2:N) {\n if (!(t[n] < t[n - 1])) {\n reject(\"times must be strictly decreasing, but found\"\n \"!(t[\", n, \"] < t[, \", (n - 1), \"])\");\n } \n }\n}\n\n\n\nTechnically, for continuous survival times, the probability of two survival times being identical will be zero. Nevertheless, real data sets often round survival times, for instance to the nearest day or week in a multi-year clinical trial. The technically “correct” thing to do in the face of unknown survival times in a range would be to treat their order as unknown and infer it. But considering all \\(N!\\) permutations for a set of \\(N\\) subjects with tied survival times is not tractable. As an alternative, Efron (1977) introduced an approximate partial likelihood with better properties than a random permutation while not being quite as good as considering all permutations. Efron’s model averages the contributions as if they truly did occur simultaneously.\nIn the interest of completeness, here is the Stan code for an implementation of Efron’s estimator. It uses two user-defined functions. The first calculates how many different survival times occur in the data.\nfunctions {\n int num_unique_starts(vector t) {\n if (size(t) == 0) return 0;\n int us = 1;\n for (n in 2:size(t)) {\n if (t[n] != t[n - 1]) us += 1;\n }\n return us;\n }\nThis is then used to compute the value J to send into the function that computes the position in the array of failure times where each new failure time starts, plus an end point that goes one past the target. This is a standard way in Stan to code ragged arrays.\n array[] int unique_starts(vector t, int J) {\n array[J + 1] int starts;\n if (J == 0) return starts;\n starts[1] = 1;\n int pos = 2;\n for (n in 2:size(t)) {\n if (t[n] != t[n - 1]) {\n starts[pos] = n;\n pos += 1;\n }\n }\n starts[J + 1] = size(t) + 1;\n return starts;\n }\n}\nThe data format is exactly the same as for the model in the previous section, but in this case, the transformed data block is used to cache some precomputations required for the model, namely the ragged array grouping elements that share the same survival time.\ntransformed data {\n int<lower=0> J = num_unique_starts(t);\n array[J + 1] int<lower=0> starts = unique_starts(t, J);\n}\nFor each unique survival time j in 1:J, the subjects indexed from starts[j] to starts[j + 1] - 1 (inclusive) share the same survival time. The number of elements with survival time j is thus (starts[j + 1] - 1) - starts[j] + 1, or just starts[j + 1] - starts[j].\nThe parameters and prior are also the same—just a vector beta of coefficients with a centered normal prior. Although it starts with the same caching of results for later, and uses the same accumulator for the denominator, the overall partial likelihood is much more involved, and depends on the user-defined functions defining the transformed data variables J and starts.\n vector[N] log_theta = x * beta;\n vector[N_c] log_theta_c = x_c * beta;\n real log_denom_lhs = log_sum_exp(log_theta_c);\n for (j in 1:J) {\n int start = starts[j];\n int end = starts[j + 1] - 1;\n int len = end - start + 1;\n real log_len = log(len);\n real numerator = sum(log_theta[start:end]);\n log_denom_lhs = log_sum_exp(log_denom_lhs,\n log_sum_exp(log_theta[start:end]));\n vector[len] diff;\n for (ell in 1:len) {\n diff[ell] = log_diff_exp(log_denom_lhs,\n log(ell - 1) - log_len\n + log_sum_exp(log_theta[start:end]));\n }\n target += numerator - sum(diff);\n }\nThe special function log_diff_exp is defined as\n\\[\\begin{equation*}\n\\textrm{logDiffExp}(u, v) = \\log(\\exp(u) - \\exp(v)).\n\\end{equation*}\\]\nBecause of how J and starts are constructed, the length len will always be strictly positive so that the log is well defined.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/survival.html#footnotes", + "href": "stan-users-guide/survival.html#footnotes", + "title": "Survival Models", + "section": "Footnotes", + "text": "Footnotes\n\n\nCox mentioned in his seminal paper that modeling the baseline hazard function would improve statistical efficiency, but he did not do it for computational reasons.↩︎", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Survival Models" + ] + }, + { + "objectID": "stan-users-guide/truncation-censoring.html", + "href": "stan-users-guide/truncation-censoring.html", + "title": "Truncated or Censored Data", + "section": "", + "text": "Data in which measurements have been truncated or censored can be coded in Stan following their respective probability models.\n\n\nTruncation in Stan is restricted to univariate distributions for which the corresponding log cumulative distribution function (CDF) and log complementary cumulative distribution (CCDF) functions are available. See the reference manual section on truncated distributions for more information on truncated distributions, CDFs, and CCDFs.\n\n\n\nTruncated data are data for which measurements are only reported if they fall above a lower bound, below an upper bound, or between a lower and upper bound.\nTruncated data may be modeled in Stan using truncated distributions. For example, suppose the truncated data are \\(y_n\\) with an upper truncation point of \\(U = 300\\) so that \\(y_n < 300\\). In Stan, this data can be modeled as following a truncated normal distribution for the observations as follows.\ndata {\n int<lower=0> N;\n real U;\n array[N] real<upper=U> y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(mu, sigma) T[ , U];\n}\nThe model declares an upper bound U as data and constrains the data for y to respect the constraint; this will be checked when the data are loaded into the model before sampling begins.\nSee the Stan Reference Manual’s Statements chapter for how to use truncated distributions with the log probability increment statements (target += ...).\nThis model implicitly uses an improper flat prior on the scale and location parameters; these could be given priors in the model using distribution statements.\n\n\nIf the sampled variate in a truncated distribution lies outside of the truncation range, the probability is zero, so the log probability will evaluate to \\(-\\infty\\). For instance, if variate y is sampled with the statement.\ny ~ normal(mu, sigma) T[L, U];\nthen if any value inside y is less than the value of L or greater than the value of U, the distribution statement produces a zero-probability estimate. For user-defined truncation, this zeroing outside of truncation bounds must be handled explicitly.\nTo avoid variables straying outside of truncation bounds, appropriate constraints are required. For example, if y is a parameter in the above model, the declaration should constrain it to fall between the values of L and U.\nparameters {\n array[N] real<lower=L, upper=U> y;\n // ...\n}\nIf in the above model, L or U is a parameter and y is data, then L and U must be appropriately constrained so that all data are in range and the value of L is less than that of U (if they are equal, the parameter range collapses to a single point and the Hamiltonian dynamics used by the sampler break down). The following declarations ensure the bounds are well behaved.\nparameters {\n real<upper=min(y)> L; // L < y[n]\n real<lower=fmax(L, max(y))> U; // L < U; y[n] < U\nFor pairs of real numbers, the function fmax is used rather than max.\n\n\n\nIf the truncation points are unknown, they may be estimated as parameters. This can be done with a slight rearrangement of the variable declarations from the model in the previous section with known truncation points.\ndata {\n int<lower=1> N;\n array[N] real y;\n}\nparameters {\n real<upper=min(y)> L;\n real<lower=max(y)> U;\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n L ~ // ...\n U ~ // ...\n y ~ normal(mu, sigma) T[L, U];\n}\nHere there is a lower truncation point L which is declared to be less than or equal to the minimum value of y. The upper truncation point U is declared to be larger than the maximum value of y. This declaration, although dependent on the data, only enforces the constraint that the data fall within the truncation bounds. With N declared as type int<lower=1>, there must be at least one data point. The constraint that L is less than U is enforced indirectly, based on the non-empty data.\nThe ellipses where the priors for the bounds L and U should go should be filled in with a an informative prior in order for this model to not concentrate L strongly around min(y) and U strongly around max(y).\n\n\n\n\nCensoring hides values from points that are too large, too small, or both. Unlike with truncated data, the number of data points that were censored is known. The textbook example is the household scale which does not report values above 300 pounds.\n\n\nOne way to model censored data is to treat the censored data as missing data that is constrained to fall in the censored range of values. Since Stan does not allow unknown values in its arrays or matrices, the censored values must be represented explicitly, as in the following right-censored case.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_cens;\n array[N_obs] real y_obs;\n real<lower=max(y_obs)> U;\n}\nparameters {\n array[N_cens] real<lower=U> y_cens;\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y_obs ~ normal(mu, sigma);\n y_cens ~ normal(mu, sigma);\n}\nBecause the censored data array y_cens is declared to be a parameter, it will be sampled along with the location and scale parameters mu and sigma. Because the censored data array y_cens is declared to have values of type real<lower=U>, all imputed values for censored data will be greater than U. The imputed censored data affects the location and scale parameters through the last distribution statement in the model.\n\n\n\nAlthough it is wrong to ignore the censored values in estimating location and scale, it is not necessary to impute values. Instead, the values can be integrated out. Each censored data point has a probability of \\[\\begin{align*}\n\\Pr[y_{\\mathrm{cens},m} > U]\n &= \\int_U^{\\infty} \\textsf{normal}\\left(y_{\\mathrm{cens},m} \\mid \\mu,\\sigma \\right) \\,\\textsf{d}y_{\\mathrm{cens},m} \\\\\n &= 1 - \\Phi\\left(\\frac{U - \\mu}{\\sigma}\\right),\n\\end{align*}\\]\nwhere \\(\\Phi()\\) is the standard normal cumulative distribution function. This probability is equivalent to the likelihood contribution of knowing that \\(y_{\\mathrm{cens},m}>U\\). With \\(M\\) censored observations, the likelihood on the log scale is \\[\\begin{align*}\n\\log \\prod_{m=1}^M \\Pr[y_{\\mathrm{cens},m} > U]\n &= \\log \\left( 1 - \\Phi\\left(\\left(\\frac{U - \\mu}{\\sigma}\\right)\\right)^{M}\\right) \\\\\n &= M \\times \\texttt{normal}\\mathtt{\\_}\\texttt{lccdf}\\left(U \\mid \\mu, \\sigma \\right),\n\\end{align*}\\]\nwhere normal_lccdf is the log of complementary CDF (Stan provides <distr>_lccdf for each distribution implemented in Stan).\nThe following right-censored model assumes that the censoring point is known, so it is declared as data.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_cens;\n array[N_obs] real y_obs;\n real<lower=max(y_obs)> U;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y_obs ~ normal(mu, sigma);\n target += N_cens * normal_lccdf(U | mu, sigma);\n}\nFor the observed values in y_obs, the normal model is used without truncation. The likelihood contribution from the integrated out censored values can not be coded with distribution statement, and the log probability is directly incremented using the calculated log cumulative normal probability of the censored observations.\nFor the left-censored data the CDF (normal_lcdf) has to be used instead of complementary CDF. If the censoring point variable (L) is unknown, its declaration should be moved from the data to the parameters block.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_cens;\n array[N_obs] real y_obs;\n}\nparameters {\n real<upper=min(y_obs)> L;\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n L ~ normal(mu, sigma);\n y_obs ~ normal(mu, sigma);\n target += N_cens * normal_lcdf(L | mu, sigma);\n}", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Truncated or Censored Data" + ] + }, + { + "objectID": "stan-users-guide/truncation-censoring.html#truncation.section", + "href": "stan-users-guide/truncation-censoring.html#truncation.section", + "title": "Truncated or Censored Data", + "section": "", + "text": "Truncation in Stan is restricted to univariate distributions for which the corresponding log cumulative distribution function (CDF) and log complementary cumulative distribution (CCDF) functions are available. See the reference manual section on truncated distributions for more information on truncated distributions, CDFs, and CCDFs.", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Truncated or Censored Data" + ] + }, + { + "objectID": "stan-users-guide/truncation-censoring.html#truncated-data.section", + "href": "stan-users-guide/truncation-censoring.html#truncated-data.section", + "title": "Truncated or Censored Data", + "section": "", + "text": "Truncated data are data for which measurements are only reported if they fall above a lower bound, below an upper bound, or between a lower and upper bound.\nTruncated data may be modeled in Stan using truncated distributions. For example, suppose the truncated data are \\(y_n\\) with an upper truncation point of \\(U = 300\\) so that \\(y_n < 300\\). In Stan, this data can be modeled as following a truncated normal distribution for the observations as follows.\ndata {\n int<lower=0> N;\n real U;\n array[N] real<upper=U> y;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y ~ normal(mu, sigma) T[ , U];\n}\nThe model declares an upper bound U as data and constrains the data for y to respect the constraint; this will be checked when the data are loaded into the model before sampling begins.\nSee the Stan Reference Manual’s Statements chapter for how to use truncated distributions with the log probability increment statements (target += ...).\nThis model implicitly uses an improper flat prior on the scale and location parameters; these could be given priors in the model using distribution statements.\n\n\nIf the sampled variate in a truncated distribution lies outside of the truncation range, the probability is zero, so the log probability will evaluate to \\(-\\infty\\). For instance, if variate y is sampled with the statement.\ny ~ normal(mu, sigma) T[L, U];\nthen if any value inside y is less than the value of L or greater than the value of U, the distribution statement produces a zero-probability estimate. For user-defined truncation, this zeroing outside of truncation bounds must be handled explicitly.\nTo avoid variables straying outside of truncation bounds, appropriate constraints are required. For example, if y is a parameter in the above model, the declaration should constrain it to fall between the values of L and U.\nparameters {\n array[N] real<lower=L, upper=U> y;\n // ...\n}\nIf in the above model, L or U is a parameter and y is data, then L and U must be appropriately constrained so that all data are in range and the value of L is less than that of U (if they are equal, the parameter range collapses to a single point and the Hamiltonian dynamics used by the sampler break down). The following declarations ensure the bounds are well behaved.\nparameters {\n real<upper=min(y)> L; // L < y[n]\n real<lower=fmax(L, max(y))> U; // L < U; y[n] < U\nFor pairs of real numbers, the function fmax is used rather than max.\n\n\n\nIf the truncation points are unknown, they may be estimated as parameters. This can be done with a slight rearrangement of the variable declarations from the model in the previous section with known truncation points.\ndata {\n int<lower=1> N;\n array[N] real y;\n}\nparameters {\n real<upper=min(y)> L;\n real<lower=max(y)> U;\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n L ~ // ...\n U ~ // ...\n y ~ normal(mu, sigma) T[L, U];\n}\nHere there is a lower truncation point L which is declared to be less than or equal to the minimum value of y. The upper truncation point U is declared to be larger than the maximum value of y. This declaration, although dependent on the data, only enforces the constraint that the data fall within the truncation bounds. With N declared as type int<lower=1>, there must be at least one data point. The constraint that L is less than U is enforced indirectly, based on the non-empty data.\nThe ellipses where the priors for the bounds L and U should go should be filled in with a an informative prior in order for this model to not concentrate L strongly around min(y) and U strongly around max(y).", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Truncated or Censored Data" + ] + }, + { + "objectID": "stan-users-guide/truncation-censoring.html#censored.section", + "href": "stan-users-guide/truncation-censoring.html#censored.section", + "title": "Truncated or Censored Data", + "section": "", + "text": "Censoring hides values from points that are too large, too small, or both. Unlike with truncated data, the number of data points that were censored is known. The textbook example is the household scale which does not report values above 300 pounds.\n\n\nOne way to model censored data is to treat the censored data as missing data that is constrained to fall in the censored range of values. Since Stan does not allow unknown values in its arrays or matrices, the censored values must be represented explicitly, as in the following right-censored case.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_cens;\n array[N_obs] real y_obs;\n real<lower=max(y_obs)> U;\n}\nparameters {\n array[N_cens] real<lower=U> y_cens;\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y_obs ~ normal(mu, sigma);\n y_cens ~ normal(mu, sigma);\n}\nBecause the censored data array y_cens is declared to be a parameter, it will be sampled along with the location and scale parameters mu and sigma. Because the censored data array y_cens is declared to have values of type real<lower=U>, all imputed values for censored data will be greater than U. The imputed censored data affects the location and scale parameters through the last distribution statement in the model.\n\n\n\nAlthough it is wrong to ignore the censored values in estimating location and scale, it is not necessary to impute values. Instead, the values can be integrated out. Each censored data point has a probability of \\[\\begin{align*}\n\\Pr[y_{\\mathrm{cens},m} > U]\n &= \\int_U^{\\infty} \\textsf{normal}\\left(y_{\\mathrm{cens},m} \\mid \\mu,\\sigma \\right) \\,\\textsf{d}y_{\\mathrm{cens},m} \\\\\n &= 1 - \\Phi\\left(\\frac{U - \\mu}{\\sigma}\\right),\n\\end{align*}\\]\nwhere \\(\\Phi()\\) is the standard normal cumulative distribution function. This probability is equivalent to the likelihood contribution of knowing that \\(y_{\\mathrm{cens},m}>U\\). With \\(M\\) censored observations, the likelihood on the log scale is \\[\\begin{align*}\n\\log \\prod_{m=1}^M \\Pr[y_{\\mathrm{cens},m} > U]\n &= \\log \\left( 1 - \\Phi\\left(\\left(\\frac{U - \\mu}{\\sigma}\\right)\\right)^{M}\\right) \\\\\n &= M \\times \\texttt{normal}\\mathtt{\\_}\\texttt{lccdf}\\left(U \\mid \\mu, \\sigma \\right),\n\\end{align*}\\]\nwhere normal_lccdf is the log of complementary CDF (Stan provides <distr>_lccdf for each distribution implemented in Stan).\nThe following right-censored model assumes that the censoring point is known, so it is declared as data.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_cens;\n array[N_obs] real y_obs;\n real<lower=max(y_obs)> U;\n}\nparameters {\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n y_obs ~ normal(mu, sigma);\n target += N_cens * normal_lccdf(U | mu, sigma);\n}\nFor the observed values in y_obs, the normal model is used without truncation. The likelihood contribution from the integrated out censored values can not be coded with distribution statement, and the log probability is directly incremented using the calculated log cumulative normal probability of the censored observations.\nFor the left-censored data the CDF (normal_lcdf) has to be used instead of complementary CDF. If the censoring point variable (L) is unknown, its declaration should be moved from the data to the parameters block.\ndata {\n int<lower=0> N_obs;\n int<lower=0> N_cens;\n array[N_obs] real y_obs;\n}\nparameters {\n real<upper=min(y_obs)> L;\n real mu;\n real<lower=0> sigma;\n}\nmodel {\n L ~ normal(mu, sigma);\n y_obs ~ normal(mu, sigma);\n target += N_cens * normal_lcdf(L | mu, sigma);\n}", + "crumbs": [ + "Stan Users Guide", + "Example Models", + "Truncated or Censored Data" + ] + }, + { + "objectID": "stan-users-guide/using-stanc.html", + "href": "stan-users-guide/using-stanc.html", + "title": "Using the Stan Compiler", + "section": "", + "text": "Stan is used in most of our interfaces through the Stan compiler stanc. Since version 2.22, the Stan compiler has been implemented in OCaml and is referred to as stanc3. The binary name is still simply stanc, so this document uses both stanc and stanc3 interchangeably.\n\n\nThe stanc3 compiler has the following command-line syntax:\n> stanc (options) <model_file>\nwhere <model_file> is either a path to a file ending in .stan or .stanfunctions (which automatically sets --standalone-functions), or '-' to read from standard input.\nThe stanc3 options are:\n\n--help / -? - Displays the complete list of stanc3 options, then exits.\n--version - Display stanc version number\n--info - Print information about the model, such as the type information for variables and the list of used distributions.\n--name=<model_name> - Specify the name of the class used for the implementation of the Stan model in the generated C++ code.\n--o=<file_name> / -o=<filename> / --output=<filename> - Specify a path to an output file for generated C++ code (default = .hpp) or auto-formatting output (default: no file/print to stdout)\n--auto-format - Pretty prints the program to the console. See more on auto formatting.\n--allow-undefined - Do not throw a parser error if there is a function in the Stan program that is declared but not defined in the functions block.\n--canonicalize - Make changes to the program before pretty-printing by specifying options in a comma separated list. Options are ‘deprecations’, ‘parentheses’, ‘braces’, ‘includes’, and ‘strip-comments’.\n--include_paths=<dir1,...dirN> - Takes a comma-separated list of directories that may contain a file in an #include directive.\n--max-line-length=<number> - Set the column number at which formatting with --auto-format attempts to split lines. The default value is 78, which results in most lines being shorter than 80 characters.\n--print-canonical - Synonymous with --auto-format --canonicalize=deprecations,includes,parentheses,braces.\n--print-cpp - If set, output the generated C++ Stan model class to stdout.\n--filename-in-msg=<name> - Sets the filename used in compiler and runtime errors. If absent, the <model_file> argument is used.\n--standalone-functions - If set, only generate the code for the functions defined in the file. This is the default behavior for .stanfunctions files.\n--O0 (Default) Do not apply optimizations to the Stan code.\n--O1 Apply level 1 compiler optimizations (only basic optimizations).\n--Oexperimental WARNING: This is currently an experimental feature whose components are not thoroughly tested and may not improve a programs performance! Allow the compiler to apply all optimizations to the Stan code.\n--O Synonym for --O1 as of Stan 2.37. In earlier versions this was a synonym for --Oexperimental.\n--use-opencl - If set, will use additional Stan OpenCL features enabled in the Stan-to-C++ compiler.\n--warn-pedantic - Emit warnings in Pedantic mode which warns of potential issues in the meaning of your program. Note: This may produce false positive warnings.\n--warn-uninitialized - Emit warnings about uninitialized variables to stderr. Currently an experimental feature.\n--color - Control whether errors and warnings are emitted with colored styling on terminals that support it. Valid values are auto (the default), always, never. Can also be controlled by the STANC_COLOR environment variable.\n\nThe compiler also provides a number of debug options which are primarily of interest to stanc3 developers; use the --help option to see the full set.\n\n\n\nDuring model compilation, stanc can produce a variety of errors (issues that prevent the model from being compiled) and warnings (non-fatal issues that should still be considered).\n\n\nEven without the optional --warn-pedantic and --warn-uninitialized command line flags, both of which enable additional warnings, stanc can still produce warnings about your program. In particular, warnings will be produced in two situations\n\nA completely blank Stan program will produce the following warning message\nWarning in 'empty.stan', line 1, column 0 to line 2, column 0:\n Empty model detected; this is a valid Stan model but likely unintended!\nThe use of any deprecated features will lead to warnings which will look as follows\n Warning in 'deprecated.stan', line 2, column 10 to column 17:\n lkj_cov is deprecated and will be removed in Stan 3.0. Use lkj_corr with\n an independent lognormal distribution on the scales, see:\n https://mc-stan.org/docs/reference-manual/deprecations.html#lkj_cov-distribution\nA single Stan program can produce many warnings during compilation.\n\n\n\n\nErrors differ from warnings in their severity and format. In particular, errors are fatal and stop compilation, so at most one error is displayed per run of stanc.\nThere are five kinds of errors emitted by stanc3\n\nFile errors occur when the file passed to stanc is either missing or cannot be opened (i.e. has permissions issues). They look like\nError: file 'notfound.stan' not found or cannot be opened\nSyntactic errors occur whenever a program violates the Stan language’s syntax requirements. There are three kinds of errors within syntax errors; “lexing” errors mean that the input was unable to be read properly on the character level, “include” errors which occur when the #include directive fails, and “parsing” errors which result when the structure of the program is incorrect.\n\nThe lexing errors occur due to the use of invalid characters in a program. For example, a lexing error due to the use of $ in a variable name will look like the following.\nSyntax error in 'char.stan', line 2, column 7 to column 8, lexing error:\n-------------------------------------------------\n 1: data {\n 2: int $ome_variable;\n ^\n 3: }\n-------------------------------------------------\nInvalid character found.\nWhen an include directive is used, it can lead to errors if the included file is not found, or if a file includes itself (including a recursive loop of includes, such as A -> B -> A).\nSyntax error in './incl.stan', line 1, column 0, included from\n'./incl.stan', line 1, column 0, included from\n'incl.stan', line 1, column 0, include error:\n -------------------------------------------------\n 1: #include <incl.stan>\n ^\n -------------------------------------------------\nFile incl.stan recursively included itself.\nIt is much more common to see parsing errors, which tend to have more in-depth explanations of the error found. For example, if a user forgets to put a size on a type like vector, as in the following, this raises a parsing (structural) error in the compiler.\nSyntax error in 'vec.stan', line 3, column 10 to column 11, parsing error:\n -------------------------------------------------\n 1: data {\n 2: int<lower=0> N;\n 3: vector x;\n ^\n 4: }\n -------------------------------------------------\nIll-formed type. Expected \"[\" expression \"]\" for vector size.\n\nSemantic errors (also known as type errors) occur when a program is structured correctly but features an error in the type rules imposed by the language. An example of this is assigning a real value to a variable defined as an integer.\nSemantic error in 'type.stan', line 2, column 3 to column 15:\n -------------------------------------------------\n 1: transformed data {\n 2: int x = 1.5;\n ^\n 3: }\n -------------------------------------------------\nIll-typed arguments supplied to assignment operator =:\nThe left hand side has type\n int\nand the right hand side has type\n real\nThe compiler will raise an error for use of any removed features for at least one version following their removal. The deprecation warnings mentioned above eventually turn into this kind of error to prompt the user to update their model. After the version of removal, these errors will be converted to one of the other types listed here, depending on the feature.\nFinally, the compiler can raise an internal error. These are caused by bugs in the compiler, not your model, and we would appreciate it if you report them on the stanc3 repo with the error message provided. These errors usually say something like “This should never happen,” and we apologize if they do.\n\n\n\n\n\nPedantic mode is a compilation option built into Stanc3 that warns you about potential issues in your Stan program.\nFor example, consider the following program.\ndata {\n int N;\n array[N] real x;\n}\nparameters {\n real sigma;\n}\nmodel {\n real mu;\n x ~ normal(mu, sigma);\n}\nWhen pedantic mode is turned on, the compiler will produce the following warnings.\nWarning in 'ped-mode-ex1.stan', line 6, column 2 to column 13:\n The parameter sigma has no priors. This means either no prior is\n provided, or the prior(s) depend on data variables. In the later case,\n this may be a false positive.\nWarning in 'ped-mode-ex1.stan', line 10, column 13 to column 15:\n The variable mu may not have been assigned a value before its use.\nWarning in 'ped-mode-ex1.stan', line 10, column 17 to column 22:\n A normal distribution is given parameter sigma as a scale parameter\n (argument 2), but sigma was not constrained to be strictly positive.\nHere are the kinds of issues that pedantic mode will find (which are described in more detail in following sections):\n\nDistribution usages issues. Distribution arguments don’t match the distribution specification, or some specific distribution is used in an inadvisable way.\nUnused parameter. A parameter is defined but doesn’t contribute to target.\nLarge or small constant in a distribution. Very large or very small constants are used as distribution arguments.\nControl flow depends on a parameter. Branching control flow (like if/else) depends on a parameter value .\nParameter has multiple tildes. A parameter is on the left-hand side of multiple tildes.\nParameter has zero or multiple priors. A parameter has zero or more than one prior distribution.\nVariable is used before assignment. A variable is used before being assigned a value.\nStrict or nonsensical parameter bounds. A parameter is given questionable bounds.\nNonlinear transformations. When the left-hand side of a tilde statement (or first argument of a log probability function) contains a nonlinear transform which may require a Jacobian change of variables adjustment.\n\nSome important limitations of pedantic mode are listed at the end of this chapter.\n\n\nWhen an argument to a built-in distribution certainly does not match that distribution’s specification in the Stan Functions Reference, a warning is thrown. This primarily checks if any distribution argument’s bounds at declaration, compile-time value, or subtype at declaration (e.g. simplex) is incompatible with the domain of the distribution. x\nFor example, consider the following program.\nparameters {\n real unb_p;\n real<lower=0> pos_p;\n}\nmodel {\n 1 ~ poisson(unb_p);\n 1 ~ poisson(pos_p);\n}\nThe parameter of poisson should be strictly positive, but unb_p is not constrained to be positive.\nPedantic mode produces the following warning.\nWarning in 'ex-dist-args.stan', line 6, column 14 to column 19:\n A poisson distribution is given parameter unb_p as a rate parameter\n (argument 1), but unb_p was not constrained to be strictly positive.\n\n\n\nPedantic mode checks for some specific uses of distributions that may indicate a statistical mistake:\n\n\nAny use of uniform distribution generates a warning, except when the variate parameter’s declared upper and lower bounds exactly match the uniform distribution bounds. In general, assigning a parameter a uniform distribution can create non-differentiable boundary conditions and is not recommended.\nFor example, consider the following program.\nparameters {\n real a;\n real<lower=0, upper=1> b;\n}\nmodel {\n a ~ uniform(0, 1);\n b ~ uniform(0, 1);\n}\na is assigned a uniform distribution that doesn’t match its constraints.\nPedantic mode produces the following warning.\nWarning in 'uniform-warn.stan', line 6, column 2 to column 20:\n Parameter a is given a uniform distribution. The uniform distribution is\n not recommended, for two reasons: (a) Except when there are logical or\n physical constraints, it is very unusual for you to be sure that a\n parameter will fall inside a specified range, and (b) The infinite gradient\n induced by a uniform density can cause difficulties for Stan's sampling\n algorithm. As a consequence, we recommend soft constraints rather than hard\n constraints; for example, instead of giving an elasticity parameter a\n uniform(0, 1) distribution, try normal(0.5, 0.5).\n\n\n\nGamma distributions are sometimes used as an attempt to assign an improper prior to a parameter. Pedantic mode gives a warning when the Gamma arguments indicate that this may be the case.\n\n\n\nAny use of the lkj_corr distribution generates a warning that suggests using the Cholesky variant instead. See the LKJ correlation distribution section of the Stan Functions Reference for details.\n\n\n\n\nA warning is generated when a parameter is declared but does not have any effect on the program. This is determined by checking whether the value of the target variable depends in any way on each of the parameters.\nFor example, consider the following program.\nparameters {\n real a;\n real b;\n}\nmodel {\n a ~ normal(1, 1);\n}\na participates in the density function but b does not.\nPedantic mode produces the following warning.\nWarning in 'unused.stan', line 3, column 2 to column 9:\n The parameter b was declared but was not used in the density calculation.\n\n\n\nWhen numbers with magnitude less than 0.1 or greater than 10 are used as arguments to a distribution, it indicates that some parameter is not scaled to unit value, so a warning is thrown. See the efficiency tuning section of the Stan User’s guide for a discussion of scaling parameters.\nFor example, consider the following program.\nparameters {\n real x;\n real y;\n}\nmodel {\n x ~ normal(-100, 100);\n y ~ normal(0, 1);\n}\nThe constants -100 and 100 suggest that x is not unit scaled.\nPedantic mode produces the following warning.\nWarning in 'constants-warn.stan', line 6, column 13 to column 17:\n Argument -100 suggests there may be parameters that are not unit scale;\n consider rescaling with a multiplier, see:\n https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#standardizing-predictors\nWarning in 'constants-warn.stan', line 6, column 19 to column 22:\n Argument 100 suggests there may be parameters that are not unit scale;\n consider rescaling with a multiplier, see:\n https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#standardizing-predictors\n\n\n\nControl flow statements, such as if, for and while should not depend on parameters or functions of parameters to determine their branching conditions. This is likely to introduce a discontinuity into the density function. Pedantic mode generates a warning when any branching condition may depend on a parameter value.\nFor example, consider the following program.\nparameters {\n real a;\n}\nmodel {\n // x depends on parameter a\n real x = a * a;\n\n int m;\n\n // the if-then-else depends on x which depends on a\n if(x > 0) {\n //now m depends on x which depends on a\n m = 1;\n } else {\n m = 2;\n }\n\n // for loop depends on m -> x -> a\n for (i in 0:m) {\n a ~ normal(i, 1);\n }\n}\nThe if and for statements are control flow that depend (indirectly) on the value of the parameter m.\nPedantic mode produces the following warning.\nWarning in 'param-dep-cf-warn.stan', line 11, column 2 to line 16, column 3:\n A control flow statement depends on parameter(s): a.\nWarning in 'param-dep-cf-warn.stan', line 19, column 2 to line 21, column 3:\n A control flow statement depends on parameter(s): a.\n\n\n\nA warning is generated when a parameter is found on the left-hand side of more than one ~ statements (or an equivalent target += conditional density statement). This pattern is not inherently an issue, but it is unusual and may indicate a mistake.\nPedantic mode only searches for repeated statements, it will not for example generate a warning when a ~ statement is executed repeatedly inside of a loop.\nFor example, consider the following program.\ndata {\n real x;\n}\nparameters {\n real a;\n real b;\n}\nmodel {\n a ~ normal(0, 1);\n a ~ normal(x, 1);\n\n b ~ normal(1, 1);\n}\nPedantic mode produces the following warning.\nWarning in 'multi-tildes.stan', line 9, column 2 to column 19:\n The parameter a is on the left-hand side of more than one tildes\n statement.\n\n\n\nA warning is generated when a parameter appears to have greater than or less than one prior distribution factor.\nThis analysis depends on a factor graph representation of a Stan program. A factor F that depends on a parameter P is called a prior factor for P if there is no path in the factor graph from F to any data variable except through P.\nOne limitation of this approach is that the compiler cannot distinguish between modeled data variables and other convenient uses of data variables such as data sizes or hyperparameters. This warning assumes that all data variables (except for int variables) are modeled data, which may cause extra warnings.\nFor example, consider the following program.\ndata {\n real x;\n}\nparameters {\n real a;\n real b;\n real c;\n real d;\n}\nmodel\n{\n a ~ normal(0, 1); // this is a prior\n x ~ normal(a, 1); // this is not a prior, since data is involved\n\n b ~ normal(x, 1); // this is also not a prior, since data is involved\n\n // this is not a prior for c, since data is involved through b\n // but it is a prior for b, since the data is only involved through b\n c ~ normal(b, 1);\n\n //these are multiple priors:\n d ~ normal(0, 1);\n 1 ~ normal(d, 1);\n}\nOne prior is found for a and for b, while c only has a factor that touches a data variable and d has multiple priors.\nPedantic mode produces the following warning.\nWarning in 'priors.stan', line 7, column 2 to column 9:\n The parameter c has no priors. This means either no prior is provided, or\n the prior(s) depend on data variables. In the later case, this may be a\n false positive.\nWarning in 'priors.stan', line 8, column 2 to column 9:\n The parameter d has 2 priors.\n\n\n\n\nA warning is generated when any variable is used before it has been assigned a value.\nFor example, consider the following program.\ntransformed data {\n real x;\n if (1 > 2) {\n x = 1;\n } else {\n print(\"oops\");\n }\n print(x);\n}\nSince x is only assigned in one of the branches of the if statement, it might get to print(x) without having been assigned to.\nPedantic mode produces the following warning.\nWarning in 'uninit-warn.stan', line 7, column 8 to column 9:\n The variable x may not have been assigned a value before its use.\n\n\n\nExcept when there are logical or physical constraints, it is very unusual for you to be sure that a parameter will fall inside a specified range. A warning is generated for all parameters declared with the bounds <lower=.., upper=..> except for <lower=0, upper=1> or <lower=-1, upper=1>.\nIn addition, a warning is generated when a parameter bound is found to have lower >= upper.\nFor example, consider the following program.\nparameters {\n real<lower=0, upper=1> a;\n real<lower=-1, upper=1> b;\n real<lower=-2, upper=1012> c;\n}\nmodel {\n c ~ normal(b, a);\n}\nPedantic mode produces the following warning.\nWarning in 'hard-constraint.stan', line 4, column 2 to column 31:\n Your Stan program has a parameter c with a lower and upper bound in its\n declaration. These hard constraints are not recommended, for two reasons:\n (a) Except when there are logical or physical constraints, it is very\n unusual for you to be sure that a parameter will fall inside a specified\n range, and (b) The infinite gradient induced by a hard constraint can\n cause difficulties for Stan's sampling algorithm. As a consequence, we\n recommend soft constraints rather than hard constraints; for example,\n instead of constraining an elasticity parameter to fall between 0, and 1,\n leave it unconstrained and give it a normal(0.5,0.5) prior distribution.\n\n\n\nWhen a parameter is transformed in a non-linear fashion, an adjustment must be applied to account for distortion caused by the transform. This is discussed in depth in the Changes of variables section.\nThis portion of pedantic mode tries to detect instances where such an adjustment would be necessary and remind the user.\nFor example, consider the following program.\nparameters {\n real y;\n}\nmodel {\n log(y) ~ normal(0,1);\n}\nPedantic mode produces the following warning.\nWarning in 'jacobian.stan', line 5, column 2 to column 23:\n Left-hand side of distribution statement (~) may contain a non-linear\n transform of a parameter or local variable. If it does, you need to\n include a target += statement with the log absolute determinant of the\n Jacobian of the transform. You could also consider defining a transformed\n parameter and using jacobian += in the transformed parameters block.\n\n\n\n\nConstant values are sometimes uncomputable\nPedantic mode attempts to evaluate expressions down to literal values so that they can be used to generate warnings. For example, in the code normal(x, 1 - 2), the expression 1 - 2 will be evaluated to -1, which is not a valid variance argument so a warning is generated. However, this strategy is limited; it is often impossible to fully evaluate expressions in finite time.\nContainer types\nCurrently, indexed variables are not handled intelligently, so they are treated as monolithic variables. Each analysis treats indexed variables conservatively (erring toward generating fewer warnings).\nData variables\nThe declaration information for data variables is currently not considered, so using data as incompatible arguments to distributions may not generate the appropriate warnings.\nControl flow dependent on parameters in nested functions\nIf a parameter is passed as an argument to a user-defined function within another user-defined function, and then some control flow depends on that argument, the appropriate warning will not be thrown.\n\n\n\n\n\nIn addition to compiling Stan programs, stanc3 features several flags which can be used to format Stan programs and update them to the most recent Stan syntax by removing any deprecation features which can be automatically replaced.\nThese flags work for both .stan model files and .stanfunctions function files. They can be combined with --o to redirect the formatted output to a new file.\n\n\nInvoking stanc --auto-format <model_file> will print a version of your model which has been re-formatted. The goal is to have this automatic formatting stay as close as possible to the Stan Program Style Guide. This means spacing, indentation, and line length are all regularized. Some deprecated features, like the use of # for line comments, are replaced, but the goal is mainly to preserve the program while formatting it.\nBy default, this will try to split lines at or before column 78. This number can be changed using --max-line-length.\n\n\n\nIn addition to automatic formatting, stanc can also “canonicalize” programs by updating deprecated syntax, removing unnecessary parenthesis, and adding braces around bodies of if statements and for and while loops.\nThis can be done by using stanc --auto-format --canonicalize=... where ... is a comma-separated list of options. Currently these options are:\n\ndeprecations\nRemoves deprecated syntax such as replacing deprecated functions with their drop-in replacements.\nparentheses\nRemoves unnecessary extra parentheses, such as converting y = ((x-1)) to y = x - 1\nbraces\nPlaces braces around all blocks. For example, the following statement\nif (cond)\n //result\nwill be formatted as\nif (cond) {\n //result\n}\nand similarly for both kinds of loops containing a single statement.\nincludes\nThis will pretty-print code from other files included with #include as part of the program. This was the default behavior prior to Stan 2.29. When not enabled, the pretty-printer output will include the same #include directives as the input program.\n\nInvoking stanc --print-canonical <model_file> is synonymous with running stanc --auto-format --canonicalize=deprecations,braces,parentheses,includes\n\n\n\nThe formatting and canonicalizing features of stanc3 are still under development. The following are some known issues one should be aware of before using either:\n\nOddly placed comments\nIf your Stan program features comments in unexpected places, such as inside an expression, they may be moved in the process of formatting. Moved comments are prefixed with the string ^^^: to indicate they originally appeared higher in the program.\nWe hope to improve this functionality in future versions. For now, this can usually be avoided by manually moving the comment outside of an expression, either by placing it on its own line or following a separator such as a comma or keyword.\nFailure to recreate strange #include structure\nPrinting without include inlining (--canonicalize=includes) can fail when includes were used in atypical locations, such as in the middle of statements. We recommend either printing with inlining enabled or reconsidering the use of includes in this way.\n\n\n\n\n\nThe stanc3 compiler can optimize the code of Stan model during compilation. The optimized model code behaves the same as unoptimized code, but it may be faster, more memory efficient, or more numerically stable.\nThis section introduces the available optimization options and describes their effect.\nTo print out a representation of the optimized Stan program, use the stanc3 command-line flag --debug-optimized-mir-pretty. To print an analogous representation of the Stan program prior to optimization, use the flag --debug-transformed-mir-pretty.\n\n\nTo turn optimizations on, the user specifies the desired optimization level. The level specifies the set of optimizations to use. The chosen optimizations are used in a specific order, with some of them applied repeatedly.\nOptimization levels are specified by the numbers 0 and 1 and the ‘experimental’ tag:\n\nO0 No optimizations are applied.\nO1 Optimizations that are simple, do not dramatically change the program, and are unlikely to noticeably slow down compile times are applied.\nOexperimental All optimizations are applied. Some of these are not thorougly tested and may not always improve a programs performance.\n\nO0 is the default setting.\nThe levels include these optimizations:\n\nO0 includes no optimizations.\nO1 includes:\n\nDead code elimination\nCopy propagation\nConstant propagation\nPartial evaluation\nFunction inlining\nMatrix memory layout optimization\n\nOexperimental includes optimizations specified by O1 and also:\n\nAutomatic-differentiation level optimization\nOne step loop unrolling\nExpression propagation\nLazy code motion\nStatic loop unrolling\n\n\nIn addition, Oexperimental will apply more repetitions of the optimizations, which may increase compile times.\n\n\n\n\n\nDead code is code that does not affect the behavior of the program. Code is not dead if it affects target, the value of any outside-observable variable like transformed parameters or generated quantities, or side effects such as print statements. Removing dead code can speed up a program by avoiding unnecessary computations.\nExample Stan program:\nmodel {\n int i;\n i = 5;\n for (j in 1:10);\n if (0) {\n print(\"Dead code\");\n } else {\n print(\"Hi!\");\n }\n}\nCompiler representation of program before dead code elimination (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n int i = 5;\n for(j in 1:10) {\n ;\n }\n if(0) {\n FnPrint__(\"Dead code\");\n } else {\n FnPrint__(\"Hi!\");\n }\n}\nCompiler representation of program after dead code elimination (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n int i;\n FnPrint__(\"Hi!\");\n}\n\n\n\nConstant propagation replaces uses of a variable which is known to have a constant value C with that constant C. This removes the overhead of looking up the variable, and also makes many other optimizations possible (such as static loop unrolling and partial evaluation).\nExample Stan program:\ntransformed data {\n int n = 100;\n int a[n];\n for (i in 1:n) {\n a[i] = i;\n }\n}\nCompiler representation of program before constant propagation (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int n = 100;\n data array[int, n] a;\n for(i in 1:n) {\n a[i] = i;\n }\n}\nCompiler representation of program after constant propagation (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int n = 100;\n data array[int, 100] a;\n for(i in 1:100) {\n a[i] = i;\n }\n}\n\n\n\nCopy propagation is similar to expression propagation, but only propagates variables rather than arbitrary expressions. This can reduce the complexity of the code for other optimizations such as expression propagation.\nExample Stan program:\nmodel {\n int i = 1;\n int j = i;\n int k = i + j;\n}\nCompiler representation of program before copy propagation (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n int i = 1;\n int j = i;\n int k = (i + j);\n}\nCompiler representation of program after copy propagation (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n int i = 1;\n int j = i;\n int k = (i + i);\n}\n\n\n\nPartial evaluation searches for expressions that we can replace with a faster, simpler, more memory efficient, or more numerically stable expression with the same meaning.\nExample Stan program:\nmodel {\n real a = 1 + 1;\n real b = log(1 - a);\n real c = a + b * 5;\n}\nCompiler representation of program before partial evaluation (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n real a = (1 + 1);\n real b = log((1 - a));\n real c = (a + (b * 5));\n}\nCompiler representation of program after partial evaluation (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n real a = 2;\n real b = log1m(a);\n real c = fma(b, 5, a);\n}\n\n\n\nFunction inlining replaces each function call to each user-defined function f with the body of f. It does this by copying the function body to the call site and doing appropriately renaming the argument variables. This optimization can speed up a program by avoiding the overhead of a function call and providing more opportunities for further optimizations (such as partial evaluation).\nExample Stan program:\nfunctions {\n int incr(int x) {\n int y = 1;\n return x + y;\n }\n}\ntransformed data {\n int a = 2;\n int b = incr(a);\n}\nCompiler representation of program before function inlining (simplified from the output of --debug-transformed-mir-pretty):\nfunctions {\n int incr(int x) {\n int y = 1;\n return (x + y);\n }\n}\n\nprepare_data {\n data int a = 2;\n data int b = incr(a);\n}\nCompiler representation of program after function inlining (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int a;\n a = 2;\n data int b;\n data int inline_sym1__;\n data int inline_sym3__;\n inline_sym3__ = 0;\n for(inline_sym4__ in 1:1) {\n int inline_sym2__;\n inline_sym2__ = 1;\n inline_sym3__ = 1;\n inline_sym1__ = (a + inline_sym2__);\n break;\n }\n b = inline_sym1__;\n}\nIn this code, the for loop and break is used to simulate the behavior of a return statement. The value to be returned is held in inline_sym1__. The flag variable inline_sym3__ indicates whether a return has occurred and is necessary to handle return statements nested inside loops within the function body.\n\n\n\nMatrices and vector variables which require automatic-differentiation (AD) in Stan can be represented in two different forms.\nThe first (and default) representation is the “Array of Structs” (AoS) or “Matrix of vars” (matvar) layout. A “var” is the term used in the Stan implementation of autodiff for a single real. It is represented as a structure containing it’s value and its adjoint. The AoS representation constructs matrices and vectors by simply using those structures as the elements of the matrix internally. This is flexible and very general, but many operations want to deal with the values or the adjoints as blocks, requiring expensive memory access patterns.\nThe second representation is the “Struct of Arrays” (SoA) or “Var of matrices” (varmat) layout. Rather than a matrix containing tiny structures of one value and one adjoint each, this representation uses a single structure which contains separately a matrix of values and a matrix of adjoints. Some operations, like iterating over elements or assigning to specific indices, become more expensive, but many matrix operations like multiplications become much faster in this representation.\nMore general reading on AoS vs SoA can be found on Wikipedia\nThis optimization pass attempts to identify which matrix or vector variables in the Stan program are candidates for using the SoA representation. The conditions change over time, but broadly speaking:\n\nAny Stan Math Library functions the matrix is passed to must be able to support it.\nThe matrix should not be accessed/assigned elementwise in a loop.\n\nThe debug flag --debug-mem-patterns will list each variable and whether it is using the AoS representation or the SoA representation.\n\n\n\n\n\n\nStan variables can have two auto-differentiation (AD) levels: AD or non-AD. AD variables carry gradient information with them, which allows Stan to calculate the log-density gradient, but they also have more overhead than non-AD variables. It is therefore inefficient for a variable to be AD unnecessarily. AD-level optimization sets every variable to be a floating point type unless its gradient is necessary.\nExample Stan program:\ndata {\n real y;\n}\nmodel {\n real x = y + 1;\n}\nCompiler representation of program before AD-level optimization (simplified from the output of --debug-transformed-mir-pretty):\ninput_vars {\n real y;\n}\n\nlog_prob {\n real x = (y + 1);\n}\nCompiler representation of program after AD-level optimization (simplified from the output of --debug-optimized-mir-pretty):\ninput_vars {\n real y;\n}\n\nlog_prob {\n data real x = (y + 1);\n}\n\n\n\nOne step loop unrolling is similar to static loop unrolling. However, this optimization only ‘unrolls’ the first loop iteration, and can therefore work even when the total number of iterations is not predictable. This can speed up a program by providing more opportunities for further optimizations such as partial evaluation and lazy code motion.\nExample Stan program:\ndata {\n int n;\n}\ntransformed data {\n int x = 0;\n for (i in 1:n) {\n x += i;\n }\n}\nCompiler representation of program before one step static loop unrolling (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int n = FnReadData__(\"n\")[1];\n data int x = 0;\n for(i in 1:n) {\n x = (x + i);\n }\n}\nCompiler representation of program after one step static loop unrolling (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int n = FnReadData__(\"n\")[1];\n int x = 0;\n if((n >= 1)) {\n x = (x + 1);\n for(i in (1 + 1):n) {\n x = (x + i);\n }\n }\n}\n\n\n\nConstant propagation replaces the uses of a variable which is known to have a constant value E with that constant E. This often results in recalculating the expression, but provides more opportunities for further optimizations such as partial evaluation. Expression propagation is always followed by lazy code motion to avoid unnecessarily recomputing expressions.\nExample Stan program:\ndata {\n int m;\n}\ntransformed data {\n int n = m+1;\n int a[n];\n for (i in 1:n-1) {\n a[i] = i;\n }\n}\nCompiler representation of program before expression propagation (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int m = FnReadData__(\"m\")[1];\n data int n = (m + 1);\n data array[int, n] a;\n for(i in 1:(n - 1)) {\n a[i] = i;\n }\n}\nCompiler representation of program after expression propagation (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int m = FnReadData__(\"m\")[1];\n data int n = (m + 1);\n data array[int, (m + 1)] a;\n for(i in 1:((m + 1) - 1)) {\n a[i] = i;\n }\n}\n\n\n\nLazy code motion rearranges the statements and expressions in a program with the goals of:\n\nAvoiding computing expressions more than once, and\nComputing expressions as late as possible (to minimize the strain on the working memory set).\n\nTo accomplish these goals, lazy code motion will perform optimizations such as:\n\nMoving a repeatedly calculated expression to its own variable (also referred to as common-subexpression elimination)\nMoving an expression outside of a loop if it does not need to be in the loop (also referred to as loop-invariant code motion)\n\nLazy code motion can make some programs significantly more efficient by avoiding redundant or early computations.\nAs currently implemented in the compiler, it may move items between blocks in a way that actually increases overall computation. Improving this is an ongoing project.\nExample Stan program:\nmodel {\n real x;\n real y;\n real z;\n\n for (i in 1:10) {\n x = sqrt(10);\n y = sqrt(i);\n }\n z = sqrt(10);\n}\nCompiler representation of program before lazy code motion (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n real x;\n real y;\n real z;\n for(i in 1:10) {\n x = sqrt(10);\n y = sqrt(i);\n }\n z = sqrt(10);\n}\nCompiler representation of program after lazy code motion (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n data real lcm_sym4__;\n data real lcm_sym3__;\n real x;\n real y;\n lcm_sym4__ = sqrt(10);\n real z;\n for(i in 1:10) {\n x = lcm_sym4__;\n y = sqrt(i);\n }\n z = lcm_sym4__;\n}\n\n\n\nStatic loop unrolling takes a loop with a predictable number of iterations X and replaces it by writing out the loop body X times. The loop index in each repeat is replaced with the appropriate constant. This optimization can speed up a program by avoiding the overhead of a loop and providing more opportunities for further optimizations (such as partial evaluation).\nExample Stan program:\ntransformed data {\n int x = 0;\n for (i in 1:4) {\n x += i;\n }\n}\nCompiler representation of program before static loop unrolling (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int x = 0;\n for(i in 1:4) {\n x = (x + i);\n }\n}\nCompiler representation of program after static loop unrolling (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int x;\n x = 0;\n x = (x + 1);\n x = (x + 2);\n x = (x + 3);\n x = (x + 4);\n}", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Using the Stan Compiler" + ] + }, + { + "objectID": "stan-users-guide/using-stanc.html#stanc-args", + "href": "stan-users-guide/using-stanc.html#stanc-args", + "title": "Using the Stan Compiler", + "section": "", + "text": "The stanc3 compiler has the following command-line syntax:\n> stanc (options) <model_file>\nwhere <model_file> is either a path to a file ending in .stan or .stanfunctions (which automatically sets --standalone-functions), or '-' to read from standard input.\nThe stanc3 options are:\n\n--help / -? - Displays the complete list of stanc3 options, then exits.\n--version - Display stanc version number\n--info - Print information about the model, such as the type information for variables and the list of used distributions.\n--name=<model_name> - Specify the name of the class used for the implementation of the Stan model in the generated C++ code.\n--o=<file_name> / -o=<filename> / --output=<filename> - Specify a path to an output file for generated C++ code (default = .hpp) or auto-formatting output (default: no file/print to stdout)\n--auto-format - Pretty prints the program to the console. See more on auto formatting.\n--allow-undefined - Do not throw a parser error if there is a function in the Stan program that is declared but not defined in the functions block.\n--canonicalize - Make changes to the program before pretty-printing by specifying options in a comma separated list. Options are ‘deprecations’, ‘parentheses’, ‘braces’, ‘includes’, and ‘strip-comments’.\n--include_paths=<dir1,...dirN> - Takes a comma-separated list of directories that may contain a file in an #include directive.\n--max-line-length=<number> - Set the column number at which formatting with --auto-format attempts to split lines. The default value is 78, which results in most lines being shorter than 80 characters.\n--print-canonical - Synonymous with --auto-format --canonicalize=deprecations,includes,parentheses,braces.\n--print-cpp - If set, output the generated C++ Stan model class to stdout.\n--filename-in-msg=<name> - Sets the filename used in compiler and runtime errors. If absent, the <model_file> argument is used.\n--standalone-functions - If set, only generate the code for the functions defined in the file. This is the default behavior for .stanfunctions files.\n--O0 (Default) Do not apply optimizations to the Stan code.\n--O1 Apply level 1 compiler optimizations (only basic optimizations).\n--Oexperimental WARNING: This is currently an experimental feature whose components are not thoroughly tested and may not improve a programs performance! Allow the compiler to apply all optimizations to the Stan code.\n--O Synonym for --O1 as of Stan 2.37. In earlier versions this was a synonym for --Oexperimental.\n--use-opencl - If set, will use additional Stan OpenCL features enabled in the Stan-to-C++ compiler.\n--warn-pedantic - Emit warnings in Pedantic mode which warns of potential issues in the meaning of your program. Note: This may produce false positive warnings.\n--warn-uninitialized - Emit warnings about uninitialized variables to stderr. Currently an experimental feature.\n--color - Control whether errors and warnings are emitted with colored styling on terminals that support it. Valid values are auto (the default), always, never. Can also be controlled by the STANC_COLOR environment variable.\n\nThe compiler also provides a number of debug options which are primarily of interest to stanc3 developers; use the --help option to see the full set.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Using the Stan Compiler" + ] + }, + { + "objectID": "stan-users-guide/using-stanc.html#understanding-stanc3-errors-and-warnings", + "href": "stan-users-guide/using-stanc.html#understanding-stanc3-errors-and-warnings", + "title": "Using the Stan Compiler", + "section": "", + "text": "During model compilation, stanc can produce a variety of errors (issues that prevent the model from being compiled) and warnings (non-fatal issues that should still be considered).\n\n\nEven without the optional --warn-pedantic and --warn-uninitialized command line flags, both of which enable additional warnings, stanc can still produce warnings about your program. In particular, warnings will be produced in two situations\n\nA completely blank Stan program will produce the following warning message\nWarning in 'empty.stan', line 1, column 0 to line 2, column 0:\n Empty model detected; this is a valid Stan model but likely unintended!\nThe use of any deprecated features will lead to warnings which will look as follows\n Warning in 'deprecated.stan', line 2, column 10 to column 17:\n lkj_cov is deprecated and will be removed in Stan 3.0. Use lkj_corr with\n an independent lognormal distribution on the scales, see:\n https://mc-stan.org/docs/reference-manual/deprecations.html#lkj_cov-distribution\nA single Stan program can produce many warnings during compilation.\n\n\n\n\nErrors differ from warnings in their severity and format. In particular, errors are fatal and stop compilation, so at most one error is displayed per run of stanc.\nThere are five kinds of errors emitted by stanc3\n\nFile errors occur when the file passed to stanc is either missing or cannot be opened (i.e. has permissions issues). They look like\nError: file 'notfound.stan' not found or cannot be opened\nSyntactic errors occur whenever a program violates the Stan language’s syntax requirements. There are three kinds of errors within syntax errors; “lexing” errors mean that the input was unable to be read properly on the character level, “include” errors which occur when the #include directive fails, and “parsing” errors which result when the structure of the program is incorrect.\n\nThe lexing errors occur due to the use of invalid characters in a program. For example, a lexing error due to the use of $ in a variable name will look like the following.\nSyntax error in 'char.stan', line 2, column 7 to column 8, lexing error:\n-------------------------------------------------\n 1: data {\n 2: int $ome_variable;\n ^\n 3: }\n-------------------------------------------------\nInvalid character found.\nWhen an include directive is used, it can lead to errors if the included file is not found, or if a file includes itself (including a recursive loop of includes, such as A -> B -> A).\nSyntax error in './incl.stan', line 1, column 0, included from\n'./incl.stan', line 1, column 0, included from\n'incl.stan', line 1, column 0, include error:\n -------------------------------------------------\n 1: #include <incl.stan>\n ^\n -------------------------------------------------\nFile incl.stan recursively included itself.\nIt is much more common to see parsing errors, which tend to have more in-depth explanations of the error found. For example, if a user forgets to put a size on a type like vector, as in the following, this raises a parsing (structural) error in the compiler.\nSyntax error in 'vec.stan', line 3, column 10 to column 11, parsing error:\n -------------------------------------------------\n 1: data {\n 2: int<lower=0> N;\n 3: vector x;\n ^\n 4: }\n -------------------------------------------------\nIll-formed type. Expected \"[\" expression \"]\" for vector size.\n\nSemantic errors (also known as type errors) occur when a program is structured correctly but features an error in the type rules imposed by the language. An example of this is assigning a real value to a variable defined as an integer.\nSemantic error in 'type.stan', line 2, column 3 to column 15:\n -------------------------------------------------\n 1: transformed data {\n 2: int x = 1.5;\n ^\n 3: }\n -------------------------------------------------\nIll-typed arguments supplied to assignment operator =:\nThe left hand side has type\n int\nand the right hand side has type\n real\nThe compiler will raise an error for use of any removed features for at least one version following their removal. The deprecation warnings mentioned above eventually turn into this kind of error to prompt the user to update their model. After the version of removal, these errors will be converted to one of the other types listed here, depending on the feature.\nFinally, the compiler can raise an internal error. These are caused by bugs in the compiler, not your model, and we would appreciate it if you report them on the stanc3 repo with the error message provided. These errors usually say something like “This should never happen,” and we apologize if they do.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Using the Stan Compiler" + ] + }, + { + "objectID": "stan-users-guide/using-stanc.html#pedantic-mode", + "href": "stan-users-guide/using-stanc.html#pedantic-mode", + "title": "Using the Stan Compiler", + "section": "", + "text": "Pedantic mode is a compilation option built into Stanc3 that warns you about potential issues in your Stan program.\nFor example, consider the following program.\ndata {\n int N;\n array[N] real x;\n}\nparameters {\n real sigma;\n}\nmodel {\n real mu;\n x ~ normal(mu, sigma);\n}\nWhen pedantic mode is turned on, the compiler will produce the following warnings.\nWarning in 'ped-mode-ex1.stan', line 6, column 2 to column 13:\n The parameter sigma has no priors. This means either no prior is\n provided, or the prior(s) depend on data variables. In the later case,\n this may be a false positive.\nWarning in 'ped-mode-ex1.stan', line 10, column 13 to column 15:\n The variable mu may not have been assigned a value before its use.\nWarning in 'ped-mode-ex1.stan', line 10, column 17 to column 22:\n A normal distribution is given parameter sigma as a scale parameter\n (argument 2), but sigma was not constrained to be strictly positive.\nHere are the kinds of issues that pedantic mode will find (which are described in more detail in following sections):\n\nDistribution usages issues. Distribution arguments don’t match the distribution specification, or some specific distribution is used in an inadvisable way.\nUnused parameter. A parameter is defined but doesn’t contribute to target.\nLarge or small constant in a distribution. Very large or very small constants are used as distribution arguments.\nControl flow depends on a parameter. Branching control flow (like if/else) depends on a parameter value .\nParameter has multiple tildes. A parameter is on the left-hand side of multiple tildes.\nParameter has zero or multiple priors. A parameter has zero or more than one prior distribution.\nVariable is used before assignment. A variable is used before being assigned a value.\nStrict or nonsensical parameter bounds. A parameter is given questionable bounds.\nNonlinear transformations. When the left-hand side of a tilde statement (or first argument of a log probability function) contains a nonlinear transform which may require a Jacobian change of variables adjustment.\n\nSome important limitations of pedantic mode are listed at the end of this chapter.\n\n\nWhen an argument to a built-in distribution certainly does not match that distribution’s specification in the Stan Functions Reference, a warning is thrown. This primarily checks if any distribution argument’s bounds at declaration, compile-time value, or subtype at declaration (e.g. simplex) is incompatible with the domain of the distribution. x\nFor example, consider the following program.\nparameters {\n real unb_p;\n real<lower=0> pos_p;\n}\nmodel {\n 1 ~ poisson(unb_p);\n 1 ~ poisson(pos_p);\n}\nThe parameter of poisson should be strictly positive, but unb_p is not constrained to be positive.\nPedantic mode produces the following warning.\nWarning in 'ex-dist-args.stan', line 6, column 14 to column 19:\n A poisson distribution is given parameter unb_p as a rate parameter\n (argument 1), but unb_p was not constrained to be strictly positive.\n\n\n\nPedantic mode checks for some specific uses of distributions that may indicate a statistical mistake:\n\n\nAny use of uniform distribution generates a warning, except when the variate parameter’s declared upper and lower bounds exactly match the uniform distribution bounds. In general, assigning a parameter a uniform distribution can create non-differentiable boundary conditions and is not recommended.\nFor example, consider the following program.\nparameters {\n real a;\n real<lower=0, upper=1> b;\n}\nmodel {\n a ~ uniform(0, 1);\n b ~ uniform(0, 1);\n}\na is assigned a uniform distribution that doesn’t match its constraints.\nPedantic mode produces the following warning.\nWarning in 'uniform-warn.stan', line 6, column 2 to column 20:\n Parameter a is given a uniform distribution. The uniform distribution is\n not recommended, for two reasons: (a) Except when there are logical or\n physical constraints, it is very unusual for you to be sure that a\n parameter will fall inside a specified range, and (b) The infinite gradient\n induced by a uniform density can cause difficulties for Stan's sampling\n algorithm. As a consequence, we recommend soft constraints rather than hard\n constraints; for example, instead of giving an elasticity parameter a\n uniform(0, 1) distribution, try normal(0.5, 0.5).\n\n\n\nGamma distributions are sometimes used as an attempt to assign an improper prior to a parameter. Pedantic mode gives a warning when the Gamma arguments indicate that this may be the case.\n\n\n\nAny use of the lkj_corr distribution generates a warning that suggests using the Cholesky variant instead. See the LKJ correlation distribution section of the Stan Functions Reference for details.\n\n\n\n\nA warning is generated when a parameter is declared but does not have any effect on the program. This is determined by checking whether the value of the target variable depends in any way on each of the parameters.\nFor example, consider the following program.\nparameters {\n real a;\n real b;\n}\nmodel {\n a ~ normal(1, 1);\n}\na participates in the density function but b does not.\nPedantic mode produces the following warning.\nWarning in 'unused.stan', line 3, column 2 to column 9:\n The parameter b was declared but was not used in the density calculation.\n\n\n\nWhen numbers with magnitude less than 0.1 or greater than 10 are used as arguments to a distribution, it indicates that some parameter is not scaled to unit value, so a warning is thrown. See the efficiency tuning section of the Stan User’s guide for a discussion of scaling parameters.\nFor example, consider the following program.\nparameters {\n real x;\n real y;\n}\nmodel {\n x ~ normal(-100, 100);\n y ~ normal(0, 1);\n}\nThe constants -100 and 100 suggest that x is not unit scaled.\nPedantic mode produces the following warning.\nWarning in 'constants-warn.stan', line 6, column 13 to column 17:\n Argument -100 suggests there may be parameters that are not unit scale;\n consider rescaling with a multiplier, see:\n https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#standardizing-predictors\nWarning in 'constants-warn.stan', line 6, column 19 to column 22:\n Argument 100 suggests there may be parameters that are not unit scale;\n consider rescaling with a multiplier, see:\n https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#standardizing-predictors\n\n\n\nControl flow statements, such as if, for and while should not depend on parameters or functions of parameters to determine their branching conditions. This is likely to introduce a discontinuity into the density function. Pedantic mode generates a warning when any branching condition may depend on a parameter value.\nFor example, consider the following program.\nparameters {\n real a;\n}\nmodel {\n // x depends on parameter a\n real x = a * a;\n\n int m;\n\n // the if-then-else depends on x which depends on a\n if(x > 0) {\n //now m depends on x which depends on a\n m = 1;\n } else {\n m = 2;\n }\n\n // for loop depends on m -> x -> a\n for (i in 0:m) {\n a ~ normal(i, 1);\n }\n}\nThe if and for statements are control flow that depend (indirectly) on the value of the parameter m.\nPedantic mode produces the following warning.\nWarning in 'param-dep-cf-warn.stan', line 11, column 2 to line 16, column 3:\n A control flow statement depends on parameter(s): a.\nWarning in 'param-dep-cf-warn.stan', line 19, column 2 to line 21, column 3:\n A control flow statement depends on parameter(s): a.\n\n\n\nA warning is generated when a parameter is found on the left-hand side of more than one ~ statements (or an equivalent target += conditional density statement). This pattern is not inherently an issue, but it is unusual and may indicate a mistake.\nPedantic mode only searches for repeated statements, it will not for example generate a warning when a ~ statement is executed repeatedly inside of a loop.\nFor example, consider the following program.\ndata {\n real x;\n}\nparameters {\n real a;\n real b;\n}\nmodel {\n a ~ normal(0, 1);\n a ~ normal(x, 1);\n\n b ~ normal(1, 1);\n}\nPedantic mode produces the following warning.\nWarning in 'multi-tildes.stan', line 9, column 2 to column 19:\n The parameter a is on the left-hand side of more than one tildes\n statement.\n\n\n\nA warning is generated when a parameter appears to have greater than or less than one prior distribution factor.\nThis analysis depends on a factor graph representation of a Stan program. A factor F that depends on a parameter P is called a prior factor for P if there is no path in the factor graph from F to any data variable except through P.\nOne limitation of this approach is that the compiler cannot distinguish between modeled data variables and other convenient uses of data variables such as data sizes or hyperparameters. This warning assumes that all data variables (except for int variables) are modeled data, which may cause extra warnings.\nFor example, consider the following program.\ndata {\n real x;\n}\nparameters {\n real a;\n real b;\n real c;\n real d;\n}\nmodel\n{\n a ~ normal(0, 1); // this is a prior\n x ~ normal(a, 1); // this is not a prior, since data is involved\n\n b ~ normal(x, 1); // this is also not a prior, since data is involved\n\n // this is not a prior for c, since data is involved through b\n // but it is a prior for b, since the data is only involved through b\n c ~ normal(b, 1);\n\n //these are multiple priors:\n d ~ normal(0, 1);\n 1 ~ normal(d, 1);\n}\nOne prior is found for a and for b, while c only has a factor that touches a data variable and d has multiple priors.\nPedantic mode produces the following warning.\nWarning in 'priors.stan', line 7, column 2 to column 9:\n The parameter c has no priors. This means either no prior is provided, or\n the prior(s) depend on data variables. In the later case, this may be a\n false positive.\nWarning in 'priors.stan', line 8, column 2 to column 9:\n The parameter d has 2 priors.\n\n\n\n\nA warning is generated when any variable is used before it has been assigned a value.\nFor example, consider the following program.\ntransformed data {\n real x;\n if (1 > 2) {\n x = 1;\n } else {\n print(\"oops\");\n }\n print(x);\n}\nSince x is only assigned in one of the branches of the if statement, it might get to print(x) without having been assigned to.\nPedantic mode produces the following warning.\nWarning in 'uninit-warn.stan', line 7, column 8 to column 9:\n The variable x may not have been assigned a value before its use.\n\n\n\nExcept when there are logical or physical constraints, it is very unusual for you to be sure that a parameter will fall inside a specified range. A warning is generated for all parameters declared with the bounds <lower=.., upper=..> except for <lower=0, upper=1> or <lower=-1, upper=1>.\nIn addition, a warning is generated when a parameter bound is found to have lower >= upper.\nFor example, consider the following program.\nparameters {\n real<lower=0, upper=1> a;\n real<lower=-1, upper=1> b;\n real<lower=-2, upper=1012> c;\n}\nmodel {\n c ~ normal(b, a);\n}\nPedantic mode produces the following warning.\nWarning in 'hard-constraint.stan', line 4, column 2 to column 31:\n Your Stan program has a parameter c with a lower and upper bound in its\n declaration. These hard constraints are not recommended, for two reasons:\n (a) Except when there are logical or physical constraints, it is very\n unusual for you to be sure that a parameter will fall inside a specified\n range, and (b) The infinite gradient induced by a hard constraint can\n cause difficulties for Stan's sampling algorithm. As a consequence, we\n recommend soft constraints rather than hard constraints; for example,\n instead of constraining an elasticity parameter to fall between 0, and 1,\n leave it unconstrained and give it a normal(0.5,0.5) prior distribution.\n\n\n\nWhen a parameter is transformed in a non-linear fashion, an adjustment must be applied to account for distortion caused by the transform. This is discussed in depth in the Changes of variables section.\nThis portion of pedantic mode tries to detect instances where such an adjustment would be necessary and remind the user.\nFor example, consider the following program.\nparameters {\n real y;\n}\nmodel {\n log(y) ~ normal(0,1);\n}\nPedantic mode produces the following warning.\nWarning in 'jacobian.stan', line 5, column 2 to column 23:\n Left-hand side of distribution statement (~) may contain a non-linear\n transform of a parameter or local variable. If it does, you need to\n include a target += statement with the log absolute determinant of the\n Jacobian of the transform. You could also consider defining a transformed\n parameter and using jacobian += in the transformed parameters block.\n\n\n\n\nConstant values are sometimes uncomputable\nPedantic mode attempts to evaluate expressions down to literal values so that they can be used to generate warnings. For example, in the code normal(x, 1 - 2), the expression 1 - 2 will be evaluated to -1, which is not a valid variance argument so a warning is generated. However, this strategy is limited; it is often impossible to fully evaluate expressions in finite time.\nContainer types\nCurrently, indexed variables are not handled intelligently, so they are treated as monolithic variables. Each analysis treats indexed variables conservatively (erring toward generating fewer warnings).\nData variables\nThe declaration information for data variables is currently not considered, so using data as incompatible arguments to distributions may not generate the appropriate warnings.\nControl flow dependent on parameters in nested functions\nIf a parameter is passed as an argument to a user-defined function within another user-defined function, and then some control flow depends on that argument, the appropriate warning will not be thrown.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Using the Stan Compiler" + ] + }, + { + "objectID": "stan-users-guide/using-stanc.html#stanc-pretty-printing", + "href": "stan-users-guide/using-stanc.html#stanc-pretty-printing", + "title": "Using the Stan Compiler", + "section": "", + "text": "In addition to compiling Stan programs, stanc3 features several flags which can be used to format Stan programs and update them to the most recent Stan syntax by removing any deprecation features which can be automatically replaced.\nThese flags work for both .stan model files and .stanfunctions function files. They can be combined with --o to redirect the formatted output to a new file.\n\n\nInvoking stanc --auto-format <model_file> will print a version of your model which has been re-formatted. The goal is to have this automatic formatting stay as close as possible to the Stan Program Style Guide. This means spacing, indentation, and line length are all regularized. Some deprecated features, like the use of # for line comments, are replaced, but the goal is mainly to preserve the program while formatting it.\nBy default, this will try to split lines at or before column 78. This number can be changed using --max-line-length.\n\n\n\nIn addition to automatic formatting, stanc can also “canonicalize” programs by updating deprecated syntax, removing unnecessary parenthesis, and adding braces around bodies of if statements and for and while loops.\nThis can be done by using stanc --auto-format --canonicalize=... where ... is a comma-separated list of options. Currently these options are:\n\ndeprecations\nRemoves deprecated syntax such as replacing deprecated functions with their drop-in replacements.\nparentheses\nRemoves unnecessary extra parentheses, such as converting y = ((x-1)) to y = x - 1\nbraces\nPlaces braces around all blocks. For example, the following statement\nif (cond)\n //result\nwill be formatted as\nif (cond) {\n //result\n}\nand similarly for both kinds of loops containing a single statement.\nincludes\nThis will pretty-print code from other files included with #include as part of the program. This was the default behavior prior to Stan 2.29. When not enabled, the pretty-printer output will include the same #include directives as the input program.\n\nInvoking stanc --print-canonical <model_file> is synonymous with running stanc --auto-format --canonicalize=deprecations,braces,parentheses,includes\n\n\n\nThe formatting and canonicalizing features of stanc3 are still under development. The following are some known issues one should be aware of before using either:\n\nOddly placed comments\nIf your Stan program features comments in unexpected places, such as inside an expression, they may be moved in the process of formatting. Moved comments are prefixed with the string ^^^: to indicate they originally appeared higher in the program.\nWe hope to improve this functionality in future versions. For now, this can usually be avoided by manually moving the comment outside of an expression, either by placing it on its own line or following a separator such as a comma or keyword.\nFailure to recreate strange #include structure\nPrinting without include inlining (--canonicalize=includes) can fail when includes were used in atypical locations, such as in the middle of statements. We recommend either printing with inlining enabled or reconsidering the use of includes in this way.", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Using the Stan Compiler" + ] + }, + { + "objectID": "stan-users-guide/using-stanc.html#optimization", + "href": "stan-users-guide/using-stanc.html#optimization", + "title": "Using the Stan Compiler", + "section": "", + "text": "The stanc3 compiler can optimize the code of Stan model during compilation. The optimized model code behaves the same as unoptimized code, but it may be faster, more memory efficient, or more numerically stable.\nThis section introduces the available optimization options and describes their effect.\nTo print out a representation of the optimized Stan program, use the stanc3 command-line flag --debug-optimized-mir-pretty. To print an analogous representation of the Stan program prior to optimization, use the flag --debug-transformed-mir-pretty.\n\n\nTo turn optimizations on, the user specifies the desired optimization level. The level specifies the set of optimizations to use. The chosen optimizations are used in a specific order, with some of them applied repeatedly.\nOptimization levels are specified by the numbers 0 and 1 and the ‘experimental’ tag:\n\nO0 No optimizations are applied.\nO1 Optimizations that are simple, do not dramatically change the program, and are unlikely to noticeably slow down compile times are applied.\nOexperimental All optimizations are applied. Some of these are not thorougly tested and may not always improve a programs performance.\n\nO0 is the default setting.\nThe levels include these optimizations:\n\nO0 includes no optimizations.\nO1 includes:\n\nDead code elimination\nCopy propagation\nConstant propagation\nPartial evaluation\nFunction inlining\nMatrix memory layout optimization\n\nOexperimental includes optimizations specified by O1 and also:\n\nAutomatic-differentiation level optimization\nOne step loop unrolling\nExpression propagation\nLazy code motion\nStatic loop unrolling\n\n\nIn addition, Oexperimental will apply more repetitions of the optimizations, which may increase compile times.\n\n\n\n\n\nDead code is code that does not affect the behavior of the program. Code is not dead if it affects target, the value of any outside-observable variable like transformed parameters or generated quantities, or side effects such as print statements. Removing dead code can speed up a program by avoiding unnecessary computations.\nExample Stan program:\nmodel {\n int i;\n i = 5;\n for (j in 1:10);\n if (0) {\n print(\"Dead code\");\n } else {\n print(\"Hi!\");\n }\n}\nCompiler representation of program before dead code elimination (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n int i = 5;\n for(j in 1:10) {\n ;\n }\n if(0) {\n FnPrint__(\"Dead code\");\n } else {\n FnPrint__(\"Hi!\");\n }\n}\nCompiler representation of program after dead code elimination (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n int i;\n FnPrint__(\"Hi!\");\n}\n\n\n\nConstant propagation replaces uses of a variable which is known to have a constant value C with that constant C. This removes the overhead of looking up the variable, and also makes many other optimizations possible (such as static loop unrolling and partial evaluation).\nExample Stan program:\ntransformed data {\n int n = 100;\n int a[n];\n for (i in 1:n) {\n a[i] = i;\n }\n}\nCompiler representation of program before constant propagation (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int n = 100;\n data array[int, n] a;\n for(i in 1:n) {\n a[i] = i;\n }\n}\nCompiler representation of program after constant propagation (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int n = 100;\n data array[int, 100] a;\n for(i in 1:100) {\n a[i] = i;\n }\n}\n\n\n\nCopy propagation is similar to expression propagation, but only propagates variables rather than arbitrary expressions. This can reduce the complexity of the code for other optimizations such as expression propagation.\nExample Stan program:\nmodel {\n int i = 1;\n int j = i;\n int k = i + j;\n}\nCompiler representation of program before copy propagation (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n int i = 1;\n int j = i;\n int k = (i + j);\n}\nCompiler representation of program after copy propagation (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n int i = 1;\n int j = i;\n int k = (i + i);\n}\n\n\n\nPartial evaluation searches for expressions that we can replace with a faster, simpler, more memory efficient, or more numerically stable expression with the same meaning.\nExample Stan program:\nmodel {\n real a = 1 + 1;\n real b = log(1 - a);\n real c = a + b * 5;\n}\nCompiler representation of program before partial evaluation (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n real a = (1 + 1);\n real b = log((1 - a));\n real c = (a + (b * 5));\n}\nCompiler representation of program after partial evaluation (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n real a = 2;\n real b = log1m(a);\n real c = fma(b, 5, a);\n}\n\n\n\nFunction inlining replaces each function call to each user-defined function f with the body of f. It does this by copying the function body to the call site and doing appropriately renaming the argument variables. This optimization can speed up a program by avoiding the overhead of a function call and providing more opportunities for further optimizations (such as partial evaluation).\nExample Stan program:\nfunctions {\n int incr(int x) {\n int y = 1;\n return x + y;\n }\n}\ntransformed data {\n int a = 2;\n int b = incr(a);\n}\nCompiler representation of program before function inlining (simplified from the output of --debug-transformed-mir-pretty):\nfunctions {\n int incr(int x) {\n int y = 1;\n return (x + y);\n }\n}\n\nprepare_data {\n data int a = 2;\n data int b = incr(a);\n}\nCompiler representation of program after function inlining (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int a;\n a = 2;\n data int b;\n data int inline_sym1__;\n data int inline_sym3__;\n inline_sym3__ = 0;\n for(inline_sym4__ in 1:1) {\n int inline_sym2__;\n inline_sym2__ = 1;\n inline_sym3__ = 1;\n inline_sym1__ = (a + inline_sym2__);\n break;\n }\n b = inline_sym1__;\n}\nIn this code, the for loop and break is used to simulate the behavior of a return statement. The value to be returned is held in inline_sym1__. The flag variable inline_sym3__ indicates whether a return has occurred and is necessary to handle return statements nested inside loops within the function body.\n\n\n\nMatrices and vector variables which require automatic-differentiation (AD) in Stan can be represented in two different forms.\nThe first (and default) representation is the “Array of Structs” (AoS) or “Matrix of vars” (matvar) layout. A “var” is the term used in the Stan implementation of autodiff for a single real. It is represented as a structure containing it’s value and its adjoint. The AoS representation constructs matrices and vectors by simply using those structures as the elements of the matrix internally. This is flexible and very general, but many operations want to deal with the values or the adjoints as blocks, requiring expensive memory access patterns.\nThe second representation is the “Struct of Arrays” (SoA) or “Var of matrices” (varmat) layout. Rather than a matrix containing tiny structures of one value and one adjoint each, this representation uses a single structure which contains separately a matrix of values and a matrix of adjoints. Some operations, like iterating over elements or assigning to specific indices, become more expensive, but many matrix operations like multiplications become much faster in this representation.\nMore general reading on AoS vs SoA can be found on Wikipedia\nThis optimization pass attempts to identify which matrix or vector variables in the Stan program are candidates for using the SoA representation. The conditions change over time, but broadly speaking:\n\nAny Stan Math Library functions the matrix is passed to must be able to support it.\nThe matrix should not be accessed/assigned elementwise in a loop.\n\nThe debug flag --debug-mem-patterns will list each variable and whether it is using the AoS representation or the SoA representation.\n\n\n\n\n\n\nStan variables can have two auto-differentiation (AD) levels: AD or non-AD. AD variables carry gradient information with them, which allows Stan to calculate the log-density gradient, but they also have more overhead than non-AD variables. It is therefore inefficient for a variable to be AD unnecessarily. AD-level optimization sets every variable to be a floating point type unless its gradient is necessary.\nExample Stan program:\ndata {\n real y;\n}\nmodel {\n real x = y + 1;\n}\nCompiler representation of program before AD-level optimization (simplified from the output of --debug-transformed-mir-pretty):\ninput_vars {\n real y;\n}\n\nlog_prob {\n real x = (y + 1);\n}\nCompiler representation of program after AD-level optimization (simplified from the output of --debug-optimized-mir-pretty):\ninput_vars {\n real y;\n}\n\nlog_prob {\n data real x = (y + 1);\n}\n\n\n\nOne step loop unrolling is similar to static loop unrolling. However, this optimization only ‘unrolls’ the first loop iteration, and can therefore work even when the total number of iterations is not predictable. This can speed up a program by providing more opportunities for further optimizations such as partial evaluation and lazy code motion.\nExample Stan program:\ndata {\n int n;\n}\ntransformed data {\n int x = 0;\n for (i in 1:n) {\n x += i;\n }\n}\nCompiler representation of program before one step static loop unrolling (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int n = FnReadData__(\"n\")[1];\n data int x = 0;\n for(i in 1:n) {\n x = (x + i);\n }\n}\nCompiler representation of program after one step static loop unrolling (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int n = FnReadData__(\"n\")[1];\n int x = 0;\n if((n >= 1)) {\n x = (x + 1);\n for(i in (1 + 1):n) {\n x = (x + i);\n }\n }\n}\n\n\n\nConstant propagation replaces the uses of a variable which is known to have a constant value E with that constant E. This often results in recalculating the expression, but provides more opportunities for further optimizations such as partial evaluation. Expression propagation is always followed by lazy code motion to avoid unnecessarily recomputing expressions.\nExample Stan program:\ndata {\n int m;\n}\ntransformed data {\n int n = m+1;\n int a[n];\n for (i in 1:n-1) {\n a[i] = i;\n }\n}\nCompiler representation of program before expression propagation (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int m = FnReadData__(\"m\")[1];\n data int n = (m + 1);\n data array[int, n] a;\n for(i in 1:(n - 1)) {\n a[i] = i;\n }\n}\nCompiler representation of program after expression propagation (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int m = FnReadData__(\"m\")[1];\n data int n = (m + 1);\n data array[int, (m + 1)] a;\n for(i in 1:((m + 1) - 1)) {\n a[i] = i;\n }\n}\n\n\n\nLazy code motion rearranges the statements and expressions in a program with the goals of:\n\nAvoiding computing expressions more than once, and\nComputing expressions as late as possible (to minimize the strain on the working memory set).\n\nTo accomplish these goals, lazy code motion will perform optimizations such as:\n\nMoving a repeatedly calculated expression to its own variable (also referred to as common-subexpression elimination)\nMoving an expression outside of a loop if it does not need to be in the loop (also referred to as loop-invariant code motion)\n\nLazy code motion can make some programs significantly more efficient by avoiding redundant or early computations.\nAs currently implemented in the compiler, it may move items between blocks in a way that actually increases overall computation. Improving this is an ongoing project.\nExample Stan program:\nmodel {\n real x;\n real y;\n real z;\n\n for (i in 1:10) {\n x = sqrt(10);\n y = sqrt(i);\n }\n z = sqrt(10);\n}\nCompiler representation of program before lazy code motion (simplified from the output of --debug-transformed-mir-pretty):\nlog_prob {\n real x;\n real y;\n real z;\n for(i in 1:10) {\n x = sqrt(10);\n y = sqrt(i);\n }\n z = sqrt(10);\n}\nCompiler representation of program after lazy code motion (simplified from the output of --debug-optimized-mir-pretty):\nlog_prob {\n data real lcm_sym4__;\n data real lcm_sym3__;\n real x;\n real y;\n lcm_sym4__ = sqrt(10);\n real z;\n for(i in 1:10) {\n x = lcm_sym4__;\n y = sqrt(i);\n }\n z = lcm_sym4__;\n}\n\n\n\nStatic loop unrolling takes a loop with a predictable number of iterations X and replaces it by writing out the loop body X times. The loop index in each repeat is replaced with the appropriate constant. This optimization can speed up a program by avoiding the overhead of a loop and providing more opportunities for further optimizations (such as partial evaluation).\nExample Stan program:\ntransformed data {\n int x = 0;\n for (i in 1:4) {\n x += i;\n }\n}\nCompiler representation of program before static loop unrolling (simplified from the output of --debug-transformed-mir-pretty):\nprepare_data {\n data int x = 0;\n for(i in 1:4) {\n x = (x + i);\n }\n}\nCompiler representation of program after static loop unrolling (simplified from the output of --debug-optimized-mir-pretty):\nprepare_data {\n data int x;\n x = 0;\n x = (x + 1);\n x = (x + 2);\n x = (x + 3);\n x = (x + 4);\n}", + "crumbs": [ + "Stan Users Guide", + "Appendices", + "Using the Stan Compiler" + ] + } +] \ No newline at end of file diff --git a/docs/2_39/site_libs/bootstrap/bootstrap-c777a6582eb78ab223b9896ab55f446e.min.css b/docs/2_39/site_libs/bootstrap/bootstrap-c777a6582eb78ab223b9896ab55f446e.min.css new file mode 100644 index 000000000..a9fb78da9 --- /dev/null +++ b/docs/2_39/site_libs/bootstrap/bootstrap-c777a6582eb78ab223b9896ab55f446e.min.css @@ -0,0 +1,12 @@ +@import"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;700&display=swap";:root{--stan-bg: #FEFCF9;--stan-highlight: #F2E5BD;--stan-secondary: #3E8EBC;--stan-dark: #052744;--stan-hero: #F2E5BD;--stan-hero-bg: #052744}/*! + * Bootstrap v5.3.1 (https://getbootstrap.com/) + * Copyright 2011-2023 The Bootstrap Authors + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */:root,[data-bs-theme=light]{--bs-blue: #2780e3;--bs-indigo: #6610f2;--bs-purple: #613d7c;--bs-pink: #e83e8c;--bs-red: #ff0039;--bs-orange: #f0ad4e;--bs-yellow: #ff7518;--bs-green: #3fb618;--bs-teal: #20c997;--bs-cyan: #9954bb;--bs-black: #000;--bs-white: #fff;--bs-gray: #6c757d;--bs-gray-dark: #343a40;--bs-gray-100: #f8f9fa;--bs-gray-200: #e9ecef;--bs-gray-300: #dee2e6;--bs-gray-400: #ced4da;--bs-gray-500: #adb5bd;--bs-gray-600: #6c757d;--bs-gray-700: #495057;--bs-gray-800: #343a40;--bs-gray-900: #212529;--bs-default: #343a40;--bs-primary: #2780e3;--bs-secondary: #343a40;--bs-success: #3fb618;--bs-info: #9954bb;--bs-warning: #ff7518;--bs-danger: #ff0039;--bs-light: #f8f9fa;--bs-dark: #343a40;--bs-default-rgb: 52, 58, 64;--bs-primary-rgb: 39, 128, 227;--bs-secondary-rgb: 52, 58, 64;--bs-success-rgb: 63, 182, 24;--bs-info-rgb: 153, 84, 187;--bs-warning-rgb: 255, 117, 24;--bs-danger-rgb: 255, 0, 57;--bs-light-rgb: 248, 249, 250;--bs-dark-rgb: 52, 58, 64;--bs-primary-text-emphasis: #10335b;--bs-secondary-text-emphasis: #15171a;--bs-success-text-emphasis: #19490a;--bs-info-text-emphasis: #3d224b;--bs-warning-text-emphasis: #662f0a;--bs-danger-text-emphasis: #660017;--bs-light-text-emphasis: #495057;--bs-dark-text-emphasis: #495057;--bs-primary-bg-subtle: #d4e6f9;--bs-secondary-bg-subtle: #d6d8d9;--bs-success-bg-subtle: #d9f0d1;--bs-info-bg-subtle: #ebddf1;--bs-warning-bg-subtle: #ffe3d1;--bs-danger-bg-subtle: #ffccd7;--bs-light-bg-subtle: #fcfcfd;--bs-dark-bg-subtle: #ced4da;--bs-primary-border-subtle: #a9ccf4;--bs-secondary-border-subtle: #aeb0b3;--bs-success-border-subtle: #b2e2a3;--bs-info-border-subtle: #d6bbe4;--bs-warning-border-subtle: #ffc8a3;--bs-danger-border-subtle: #ff99b0;--bs-light-border-subtle: #e9ecef;--bs-dark-border-subtle: #adb5bd;--bs-white-rgb: 255, 255, 255;--bs-black-rgb: 0, 0, 0;--bs-font-sans-serif: "Source Sans Pro", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";--bs-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;--bs-gradient: linear-gradient(180deg, rgba(255, 255, 255, 0.15), rgba(255, 255, 255, 0));--bs-root-font-size: 17px;--bs-body-font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";--bs-body-font-size:1rem;--bs-body-font-weight: 400;--bs-body-line-height: 1.5;--bs-body-color: #343a40;--bs-body-color-rgb: 52, 58, 64;--bs-body-bg: #FEFBF2;--bs-body-bg-rgb: 254, 251, 242;--bs-emphasis-color: #000;--bs-emphasis-color-rgb: 0, 0, 0;--bs-secondary-color: rgba(52, 58, 64, 0.75);--bs-secondary-color-rgb: 52, 58, 64;--bs-secondary-bg: #e9ecef;--bs-secondary-bg-rgb: 233, 236, 239;--bs-tertiary-color: rgba(52, 58, 64, 0.5);--bs-tertiary-color-rgb: 52, 58, 64;--bs-tertiary-bg: #f8f9fa;--bs-tertiary-bg-rgb: 248, 249, 250;--bs-heading-color: inherit;--bs-link-color: #2761e3;--bs-link-color-rgb: 39, 97, 227;--bs-link-decoration: underline;--bs-link-hover-color: #1f4eb6;--bs-link-hover-color-rgb: 31, 78, 182;--bs-code-color: #7d12ba;--bs-highlight-bg: #ffe3d1;--bs-border-width: 1px;--bs-border-style: solid;--bs-border-color: #dee2e6;--bs-border-color-translucent: rgba(0, 0, 0, 0.175);--bs-border-radius: 0.25rem;--bs-border-radius-sm: 0.2em;--bs-border-radius-lg: 0.5rem;--bs-border-radius-xl: 1rem;--bs-border-radius-xxl: 2rem;--bs-border-radius-2xl: var(--bs-border-radius-xxl);--bs-border-radius-pill: 50rem;--bs-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-box-shadow-sm: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);--bs-box-shadow-lg: 0 1rem 3rem rgba(0, 0, 0, 0.175);--bs-box-shadow-inset: inset 0 1px 2px rgba(0, 0, 0, 0.075);--bs-focus-ring-width: 0.25rem;--bs-focus-ring-opacity: 0.25;--bs-focus-ring-color: rgba(39, 128, 227, 0.25);--bs-form-valid-color: #3fb618;--bs-form-valid-border-color: #3fb618;--bs-form-invalid-color: #ff0039;--bs-form-invalid-border-color: #ff0039}[data-bs-theme=dark]{color-scheme:dark;--bs-body-color: #dee2e6;--bs-body-color-rgb: 222, 226, 230;--bs-body-bg: #212529;--bs-body-bg-rgb: 33, 37, 41;--bs-emphasis-color: #fff;--bs-emphasis-color-rgb: 255, 255, 255;--bs-secondary-color: rgba(222, 226, 230, 0.75);--bs-secondary-color-rgb: 222, 226, 230;--bs-secondary-bg: #343a40;--bs-secondary-bg-rgb: 52, 58, 64;--bs-tertiary-color: rgba(222, 226, 230, 0.5);--bs-tertiary-color-rgb: 222, 226, 230;--bs-tertiary-bg: #2b3035;--bs-tertiary-bg-rgb: 43, 48, 53;--bs-primary-text-emphasis: #7db3ee;--bs-secondary-text-emphasis: #85898c;--bs-success-text-emphasis: #8cd374;--bs-info-text-emphasis: #c298d6;--bs-warning-text-emphasis: #ffac74;--bs-danger-text-emphasis: #ff6688;--bs-light-text-emphasis: #f8f9fa;--bs-dark-text-emphasis: #dee2e6;--bs-primary-bg-subtle: #081a2d;--bs-secondary-bg-subtle: #0a0c0d;--bs-success-bg-subtle: #0d2405;--bs-info-bg-subtle: #1f1125;--bs-warning-bg-subtle: #331705;--bs-danger-bg-subtle: #33000b;--bs-light-bg-subtle: #343a40;--bs-dark-bg-subtle: #1a1d20;--bs-primary-border-subtle: #174d88;--bs-secondary-border-subtle: #1f2326;--bs-success-border-subtle: #266d0e;--bs-info-border-subtle: #5c3270;--bs-warning-border-subtle: #99460e;--bs-danger-border-subtle: #990022;--bs-light-border-subtle: #495057;--bs-dark-border-subtle: #343a40;--bs-heading-color: inherit;--bs-link-color: #7db3ee;--bs-link-hover-color: #97c2f1;--bs-link-color-rgb: 125, 179, 238;--bs-link-hover-color-rgb: 151, 194, 241;--bs-code-color: white;--bs-border-color: #495057;--bs-border-color-translucent: rgba(255, 255, 255, 0.15);--bs-form-valid-color: #8cd374;--bs-form-valid-border-color: #8cd374;--bs-form-invalid-color: #ff6688;--bs-form-invalid-border-color: #ff6688}*,*::before,*::after{box-sizing:border-box}:root{font-size:var(--bs-root-font-size)}body{margin:0;font-family:var(--bs-body-font-family);font-size:var(--bs-body-font-size);font-weight:var(--bs-body-font-weight);line-height:var(--bs-body-line-height);color:var(--bs-body-color);text-align:var(--bs-body-text-align);background-color:var(--bs-body-bg);-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:rgba(0,0,0,0)}hr{margin:1rem 0;color:inherit;border:0;border-top:1px solid;opacity:.25}h6,.h6,h5,.h5,h4,.h4,h3,.h3,h2,.h2,h1,.h1{margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2;color:var(--bs-heading-color)}h1,.h1{font-size:calc(1.325rem + 0.9vw)}@media(min-width: 1200px){h1,.h1{font-size:2rem}}h2,.h2{font-size:calc(1.29rem + 0.48vw)}@media(min-width: 1200px){h2,.h2{font-size:1.65rem}}h3,.h3{font-size:calc(1.27rem + 0.24vw)}@media(min-width: 1200px){h3,.h3{font-size:1.45rem}}h4,.h4{font-size:1.25rem}h5,.h5{font-size:1.1rem}h6,.h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}abbr[title]{text-decoration:underline dotted;-webkit-text-decoration:underline dotted;-moz-text-decoration:underline dotted;-ms-text-decoration:underline dotted;-o-text-decoration:underline dotted;cursor:help;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul{padding-left:2rem}ol,ul,dl{margin-top:0;margin-bottom:1rem}ol ol,ul ul,ol ul,ul ol{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem;padding:.625rem 1.25rem;border-left:.25rem solid #e9ecef}blockquote p:last-child,blockquote ul:last-child,blockquote ol:last-child{margin-bottom:0}b,strong{font-weight:bolder}small,.small{font-size:0.875em}mark,.mark{padding:.1875em;background-color:var(--bs-highlight-bg)}sub,sup{position:relative;font-size:0.75em;line-height:0;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}a{color:rgba(var(--bs-link-color-rgb), var(--bs-link-opacity, 1));text-decoration:underline;-webkit-text-decoration:underline;-moz-text-decoration:underline;-ms-text-decoration:underline;-o-text-decoration:underline}a:hover{--bs-link-color-rgb: var(--bs-link-hover-color-rgb)}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}pre,code,kbd,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:0.875em;color:#000;background-color:#f8f9fa;line-height:1.5;padding:.5rem;border:1px solid var(--bs-border-color, #dee2e6)}pre code{background-color:rgba(0,0,0,0);font-size:inherit;color:inherit;word-break:normal}code{font-size:0.875em;color:var(--bs-code-color);background-color:#f8f9fa;padding:.125rem .25rem;word-wrap:break-word}a>code{color:inherit}kbd{padding:.4rem .4rem;font-size:0.875em;color:#fefbf2;background-color:#343a40}kbd kbd{padding:0;font-size:1em}figure{margin:0 0 1rem}img,svg{vertical-align:middle}table{caption-side:bottom;border-collapse:collapse}caption{padding-top:.5rem;padding-bottom:.5rem;color:rgba(52,58,64,.75);text-align:left}th{text-align:inherit;text-align:-webkit-match-parent}thead,tbody,tfoot,tr,td,th{border-color:inherit;border-style:solid;border-width:0}label{display:inline-block}button{border-radius:0}button:focus:not(:focus-visible){outline:0}input,button,select,optgroup,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}select:disabled{opacity:1}[list]:not([type=date]):not([type=datetime-local]):not([type=month]):not([type=week]):not([type=time])::-webkit-calendar-picker-indicator{display:none !important}button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button}button:not(:disabled),[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{float:left;width:100%;padding:0;margin-bottom:.5rem;font-size:calc(1.275rem + 0.3vw);line-height:inherit}@media(min-width: 1200px){legend{font-size:1.5rem}}legend+*{clear:left}::-webkit-datetime-edit-fields-wrapper,::-webkit-datetime-edit-text,::-webkit-datetime-edit-minute,::-webkit-datetime-edit-hour-field,::-webkit-datetime-edit-day-field,::-webkit-datetime-edit-month-field,::-webkit-datetime-edit-year-field{padding:0}::-webkit-inner-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::file-selector-button{font:inherit;-webkit-appearance:button}output{display:inline-block}iframe{border:0}summary{display:list-item;cursor:pointer}progress{vertical-align:baseline}[hidden]{display:none !important}.lead{font-size:1.25rem;font-weight:300}.display-1{font-size:calc(1.625rem + 4.5vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-1{font-size:5rem}}.display-2{font-size:calc(1.575rem + 3.9vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-2{font-size:4.5rem}}.display-3{font-size:calc(1.525rem + 3.3vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-3{font-size:4rem}}.display-4{font-size:calc(1.475rem + 2.7vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-4{font-size:3.5rem}}.display-5{font-size:calc(1.425rem + 2.1vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-5{font-size:3rem}}.display-6{font-size:calc(1.375rem + 1.5vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-6{font-size:2.5rem}}.list-unstyled{padding-left:0;list-style:none}.list-inline{padding-left:0;list-style:none}.list-inline-item{display:inline-block}.list-inline-item:not(:last-child){margin-right:.5rem}.initialism{font-size:0.875em;text-transform:uppercase}.blockquote{margin-bottom:1rem;font-size:1.25rem}.blockquote>:last-child{margin-bottom:0}.blockquote-footer{margin-top:-1rem;margin-bottom:1rem;font-size:0.875em;color:#6c757d}.blockquote-footer::before{content:"— "}.img-fluid{max-width:100%;height:auto}.img-thumbnail{padding:.25rem;background-color:#fefbf2;border:1px solid #dee2e6;max-width:100%;height:auto}.figure{display:inline-block}.figure-img{margin-bottom:.5rem;line-height:1}.figure-caption{font-size:0.875em;color:rgba(52,58,64,.75)}.container,.container-fluid,.container-xxl,.container-xl,.container-lg,.container-md,.container-sm{--bs-gutter-x: 1.5rem;--bs-gutter-y: 0;width:100%;padding-right:calc(var(--bs-gutter-x)*.5);padding-left:calc(var(--bs-gutter-x)*.5);margin-right:auto;margin-left:auto}@media(min-width: 576px){.container-sm,.container{max-width:540px}}@media(min-width: 768px){.container-md,.container-sm,.container{max-width:720px}}@media(min-width: 992px){.container-lg,.container-md,.container-sm,.container{max-width:960px}}@media(min-width: 1200px){.container-xl,.container-lg,.container-md,.container-sm,.container{max-width:1140px}}@media(min-width: 1400px){.container-xxl,.container-xl,.container-lg,.container-md,.container-sm,.container{max-width:1320px}}:root{--bs-breakpoint-xs: 0;--bs-breakpoint-sm: 576px;--bs-breakpoint-md: 768px;--bs-breakpoint-lg: 992px;--bs-breakpoint-xl: 1200px;--bs-breakpoint-xxl: 1400px}.grid{display:grid;grid-template-rows:repeat(var(--bs-rows, 1), 1fr);grid-template-columns:repeat(var(--bs-columns, 12), 1fr);gap:var(--bs-gap, 1.5rem)}.grid .g-col-1{grid-column:auto/span 1}.grid .g-col-2{grid-column:auto/span 2}.grid .g-col-3{grid-column:auto/span 3}.grid .g-col-4{grid-column:auto/span 4}.grid .g-col-5{grid-column:auto/span 5}.grid .g-col-6{grid-column:auto/span 6}.grid .g-col-7{grid-column:auto/span 7}.grid .g-col-8{grid-column:auto/span 8}.grid .g-col-9{grid-column:auto/span 9}.grid .g-col-10{grid-column:auto/span 10}.grid .g-col-11{grid-column:auto/span 11}.grid .g-col-12{grid-column:auto/span 12}.grid .g-start-1{grid-column-start:1}.grid .g-start-2{grid-column-start:2}.grid .g-start-3{grid-column-start:3}.grid .g-start-4{grid-column-start:4}.grid .g-start-5{grid-column-start:5}.grid .g-start-6{grid-column-start:6}.grid .g-start-7{grid-column-start:7}.grid .g-start-8{grid-column-start:8}.grid .g-start-9{grid-column-start:9}.grid .g-start-10{grid-column-start:10}.grid .g-start-11{grid-column-start:11}@media(min-width: 576px){.grid .g-col-sm-1{grid-column:auto/span 1}.grid .g-col-sm-2{grid-column:auto/span 2}.grid .g-col-sm-3{grid-column:auto/span 3}.grid .g-col-sm-4{grid-column:auto/span 4}.grid .g-col-sm-5{grid-column:auto/span 5}.grid .g-col-sm-6{grid-column:auto/span 6}.grid .g-col-sm-7{grid-column:auto/span 7}.grid .g-col-sm-8{grid-column:auto/span 8}.grid .g-col-sm-9{grid-column:auto/span 9}.grid .g-col-sm-10{grid-column:auto/span 10}.grid .g-col-sm-11{grid-column:auto/span 11}.grid .g-col-sm-12{grid-column:auto/span 12}.grid .g-start-sm-1{grid-column-start:1}.grid .g-start-sm-2{grid-column-start:2}.grid .g-start-sm-3{grid-column-start:3}.grid .g-start-sm-4{grid-column-start:4}.grid .g-start-sm-5{grid-column-start:5}.grid .g-start-sm-6{grid-column-start:6}.grid .g-start-sm-7{grid-column-start:7}.grid .g-start-sm-8{grid-column-start:8}.grid .g-start-sm-9{grid-column-start:9}.grid .g-start-sm-10{grid-column-start:10}.grid .g-start-sm-11{grid-column-start:11}}@media(min-width: 768px){.grid .g-col-md-1{grid-column:auto/span 1}.grid .g-col-md-2{grid-column:auto/span 2}.grid .g-col-md-3{grid-column:auto/span 3}.grid .g-col-md-4{grid-column:auto/span 4}.grid .g-col-md-5{grid-column:auto/span 5}.grid .g-col-md-6{grid-column:auto/span 6}.grid .g-col-md-7{grid-column:auto/span 7}.grid .g-col-md-8{grid-column:auto/span 8}.grid .g-col-md-9{grid-column:auto/span 9}.grid .g-col-md-10{grid-column:auto/span 10}.grid .g-col-md-11{grid-column:auto/span 11}.grid .g-col-md-12{grid-column:auto/span 12}.grid .g-start-md-1{grid-column-start:1}.grid .g-start-md-2{grid-column-start:2}.grid .g-start-md-3{grid-column-start:3}.grid .g-start-md-4{grid-column-start:4}.grid .g-start-md-5{grid-column-start:5}.grid .g-start-md-6{grid-column-start:6}.grid .g-start-md-7{grid-column-start:7}.grid .g-start-md-8{grid-column-start:8}.grid .g-start-md-9{grid-column-start:9}.grid .g-start-md-10{grid-column-start:10}.grid .g-start-md-11{grid-column-start:11}}@media(min-width: 992px){.grid .g-col-lg-1{grid-column:auto/span 1}.grid .g-col-lg-2{grid-column:auto/span 2}.grid .g-col-lg-3{grid-column:auto/span 3}.grid .g-col-lg-4{grid-column:auto/span 4}.grid .g-col-lg-5{grid-column:auto/span 5}.grid .g-col-lg-6{grid-column:auto/span 6}.grid .g-col-lg-7{grid-column:auto/span 7}.grid .g-col-lg-8{grid-column:auto/span 8}.grid .g-col-lg-9{grid-column:auto/span 9}.grid .g-col-lg-10{grid-column:auto/span 10}.grid .g-col-lg-11{grid-column:auto/span 11}.grid .g-col-lg-12{grid-column:auto/span 12}.grid .g-start-lg-1{grid-column-start:1}.grid .g-start-lg-2{grid-column-start:2}.grid .g-start-lg-3{grid-column-start:3}.grid .g-start-lg-4{grid-column-start:4}.grid .g-start-lg-5{grid-column-start:5}.grid .g-start-lg-6{grid-column-start:6}.grid .g-start-lg-7{grid-column-start:7}.grid .g-start-lg-8{grid-column-start:8}.grid .g-start-lg-9{grid-column-start:9}.grid .g-start-lg-10{grid-column-start:10}.grid .g-start-lg-11{grid-column-start:11}}@media(min-width: 1200px){.grid .g-col-xl-1{grid-column:auto/span 1}.grid .g-col-xl-2{grid-column:auto/span 2}.grid .g-col-xl-3{grid-column:auto/span 3}.grid .g-col-xl-4{grid-column:auto/span 4}.grid .g-col-xl-5{grid-column:auto/span 5}.grid .g-col-xl-6{grid-column:auto/span 6}.grid .g-col-xl-7{grid-column:auto/span 7}.grid .g-col-xl-8{grid-column:auto/span 8}.grid .g-col-xl-9{grid-column:auto/span 9}.grid .g-col-xl-10{grid-column:auto/span 10}.grid .g-col-xl-11{grid-column:auto/span 11}.grid .g-col-xl-12{grid-column:auto/span 12}.grid .g-start-xl-1{grid-column-start:1}.grid .g-start-xl-2{grid-column-start:2}.grid .g-start-xl-3{grid-column-start:3}.grid .g-start-xl-4{grid-column-start:4}.grid .g-start-xl-5{grid-column-start:5}.grid .g-start-xl-6{grid-column-start:6}.grid .g-start-xl-7{grid-column-start:7}.grid .g-start-xl-8{grid-column-start:8}.grid .g-start-xl-9{grid-column-start:9}.grid .g-start-xl-10{grid-column-start:10}.grid .g-start-xl-11{grid-column-start:11}}@media(min-width: 1400px){.grid .g-col-xxl-1{grid-column:auto/span 1}.grid .g-col-xxl-2{grid-column:auto/span 2}.grid .g-col-xxl-3{grid-column:auto/span 3}.grid .g-col-xxl-4{grid-column:auto/span 4}.grid .g-col-xxl-5{grid-column:auto/span 5}.grid .g-col-xxl-6{grid-column:auto/span 6}.grid .g-col-xxl-7{grid-column:auto/span 7}.grid .g-col-xxl-8{grid-column:auto/span 8}.grid .g-col-xxl-9{grid-column:auto/span 9}.grid .g-col-xxl-10{grid-column:auto/span 10}.grid .g-col-xxl-11{grid-column:auto/span 11}.grid .g-col-xxl-12{grid-column:auto/span 12}.grid .g-start-xxl-1{grid-column-start:1}.grid .g-start-xxl-2{grid-column-start:2}.grid .g-start-xxl-3{grid-column-start:3}.grid .g-start-xxl-4{grid-column-start:4}.grid .g-start-xxl-5{grid-column-start:5}.grid .g-start-xxl-6{grid-column-start:6}.grid .g-start-xxl-7{grid-column-start:7}.grid .g-start-xxl-8{grid-column-start:8}.grid .g-start-xxl-9{grid-column-start:9}.grid .g-start-xxl-10{grid-column-start:10}.grid .g-start-xxl-11{grid-column-start:11}}.table{--bs-table-color-type: initial;--bs-table-bg-type: initial;--bs-table-color-state: initial;--bs-table-bg-state: initial;--bs-table-color: #343a40;--bs-table-bg: #FEFBF2;--bs-table-border-color: #dee2e6;--bs-table-accent-bg: transparent;--bs-table-striped-color: #343a40;--bs-table-striped-bg: rgba(0, 0, 0, 0.05);--bs-table-active-color: #343a40;--bs-table-active-bg: rgba(0, 0, 0, 0.1);--bs-table-hover-color: #343a40;--bs-table-hover-bg: rgba(0, 0, 0, 0.075);width:100%;margin-bottom:1rem;vertical-align:top;border-color:var(--bs-table-border-color)}.table>:not(caption)>*>*{padding:.5rem .5rem;color:var(--bs-table-color-state, var(--bs-table-color-type, var(--bs-table-color)));background-color:var(--bs-table-bg);border-bottom-width:1px;box-shadow:inset 0 0 0 9999px var(--bs-table-bg-state, var(--bs-table-bg-type, var(--bs-table-accent-bg)))}.table>tbody{vertical-align:inherit}.table>thead{vertical-align:bottom}.table-group-divider{border-top:calc(1px*2) solid #999b99}.caption-top{caption-side:top}.table-sm>:not(caption)>*>*{padding:.25rem .25rem}.table-bordered>:not(caption)>*{border-width:1px 0}.table-bordered>:not(caption)>*>*{border-width:0 1px}.table-borderless>:not(caption)>*>*{border-bottom-width:0}.table-borderless>:not(:first-child){border-top-width:0}.table-striped>tbody>tr:nth-of-type(odd)>*{--bs-table-color-type: var(--bs-table-striped-color);--bs-table-bg-type: var(--bs-table-striped-bg)}.table-striped-columns>:not(caption)>tr>:nth-child(even){--bs-table-color-type: var(--bs-table-striped-color);--bs-table-bg-type: var(--bs-table-striped-bg)}.table-active{--bs-table-color-state: var(--bs-table-active-color);--bs-table-bg-state: var(--bs-table-active-bg)}.table-hover>tbody>tr:hover>*{--bs-table-color-state: var(--bs-table-hover-color);--bs-table-bg-state: var(--bs-table-hover-bg)}.table-primary{--bs-table-color: #000;--bs-table-bg: #d4e6f9;--bs-table-border-color: #bfcfe0;--bs-table-striped-bg: #c9dbed;--bs-table-striped-color: #000;--bs-table-active-bg: #bfcfe0;--bs-table-active-color: #000;--bs-table-hover-bg: #c4d5e6;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-secondary{--bs-table-color: #000;--bs-table-bg: #d6d8d9;--bs-table-border-color: #c1c2c3;--bs-table-striped-bg: #cbcdce;--bs-table-striped-color: #000;--bs-table-active-bg: #c1c2c3;--bs-table-active-color: #000;--bs-table-hover-bg: #c6c8c9;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-success{--bs-table-color: #000;--bs-table-bg: #d9f0d1;--bs-table-border-color: #c3d8bc;--bs-table-striped-bg: #cee4c7;--bs-table-striped-color: #000;--bs-table-active-bg: #c3d8bc;--bs-table-active-color: #000;--bs-table-hover-bg: #c9dec1;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-info{--bs-table-color: #000;--bs-table-bg: #ebddf1;--bs-table-border-color: #d4c7d9;--bs-table-striped-bg: #dfd2e5;--bs-table-striped-color: #000;--bs-table-active-bg: #d4c7d9;--bs-table-active-color: #000;--bs-table-hover-bg: #d9ccdf;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-warning{--bs-table-color: #000;--bs-table-bg: #ffe3d1;--bs-table-border-color: #e6ccbc;--bs-table-striped-bg: #f2d8c7;--bs-table-striped-color: #000;--bs-table-active-bg: #e6ccbc;--bs-table-active-color: #000;--bs-table-hover-bg: #ecd2c1;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-danger{--bs-table-color: #000;--bs-table-bg: #ffccd7;--bs-table-border-color: #e6b8c2;--bs-table-striped-bg: #f2c2cc;--bs-table-striped-color: #000;--bs-table-active-bg: #e6b8c2;--bs-table-active-color: #000;--bs-table-hover-bg: #ecbdc7;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-light{--bs-table-color: #000;--bs-table-bg: #f8f9fa;--bs-table-border-color: #dfe0e1;--bs-table-striped-bg: #ecedee;--bs-table-striped-color: #000;--bs-table-active-bg: #dfe0e1;--bs-table-active-color: #000;--bs-table-hover-bg: #e5e6e7;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-dark{--bs-table-color: #fff;--bs-table-bg: #343a40;--bs-table-border-color: #484e53;--bs-table-striped-bg: #3e444a;--bs-table-striped-color: #fff;--bs-table-active-bg: #484e53;--bs-table-active-color: #fff;--bs-table-hover-bg: #43494e;--bs-table-hover-color: #fff;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-responsive{overflow-x:auto;-webkit-overflow-scrolling:touch}@media(max-width: 575.98px){.table-responsive-sm{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 767.98px){.table-responsive-md{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 991.98px){.table-responsive-lg{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 1199.98px){.table-responsive-xl{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 1399.98px){.table-responsive-xxl{overflow-x:auto;-webkit-overflow-scrolling:touch}}.form-label,.shiny-input-container .control-label{margin-bottom:.5rem}.col-form-label{padding-top:calc(0.375rem + 1px);padding-bottom:calc(0.375rem + 1px);margin-bottom:0;font-size:inherit;line-height:1.5}.col-form-label-lg{padding-top:calc(0.5rem + 1px);padding-bottom:calc(0.5rem + 1px);font-size:1.25rem}.col-form-label-sm{padding-top:calc(0.25rem + 1px);padding-bottom:calc(0.25rem + 1px);font-size:0.875rem}.form-text{margin-top:.25rem;font-size:0.875em;color:rgba(52,58,64,.75)}.form-control{display:block;width:100%;padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#343a40;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#fefbf2;background-clip:padding-box;border:1px solid #dee2e6;border-radius:0;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control{transition:none}}.form-control[type=file]{overflow:hidden}.form-control[type=file]:not(:disabled):not([readonly]){cursor:pointer}.form-control:focus{color:#343a40;background-color:#fefbf2;border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-control::-webkit-date-and-time-value{min-width:85px;height:1.5em;margin:0}.form-control::-webkit-datetime-edit{display:block;padding:0}.form-control::placeholder{color:rgba(52,58,64,.75);opacity:1}.form-control:disabled{background-color:#e9ecef;opacity:1}.form-control::file-selector-button{padding:.375rem .75rem;margin:-0.375rem -0.75rem;margin-inline-end:.75rem;color:#343a40;background-color:#f8f9fa;pointer-events:none;border-color:inherit;border-style:solid;border-width:0;border-inline-end-width:1px;border-radius:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control::file-selector-button{transition:none}}.form-control:hover:not(:disabled):not([readonly])::file-selector-button{background-color:#e9ecef}.form-control-plaintext{display:block;width:100%;padding:.375rem 0;margin-bottom:0;line-height:1.5;color:#343a40;background-color:rgba(0,0,0,0);border:solid rgba(0,0,0,0);border-width:1px 0}.form-control-plaintext:focus{outline:0}.form-control-plaintext.form-control-sm,.form-control-plaintext.form-control-lg{padding-right:0;padding-left:0}.form-control-sm{min-height:calc(1.5em + 0.5rem + calc(1px * 2));padding:.25rem .5rem;font-size:0.875rem}.form-control-sm::file-selector-button{padding:.25rem .5rem;margin:-0.25rem -0.5rem;margin-inline-end:.5rem}.form-control-lg{min-height:calc(1.5em + 1rem + calc(1px * 2));padding:.5rem 1rem;font-size:1.25rem}.form-control-lg::file-selector-button{padding:.5rem 1rem;margin:-0.5rem -1rem;margin-inline-end:1rem}textarea.form-control{min-height:calc(1.5em + 0.75rem + calc(1px * 2))}textarea.form-control-sm{min-height:calc(1.5em + 0.5rem + calc(1px * 2))}textarea.form-control-lg{min-height:calc(1.5em + 1rem + calc(1px * 2))}.form-control-color{width:3rem;height:calc(1.5em + 0.75rem + calc(1px * 2));padding:.375rem}.form-control-color:not(:disabled):not([readonly]){cursor:pointer}.form-control-color::-moz-color-swatch{border:0 !important}.form-control-color::-webkit-color-swatch{border:0 !important}.form-control-color.form-control-sm{height:calc(1.5em + 0.5rem + calc(1px * 2))}.form-control-color.form-control-lg{height:calc(1.5em + 1rem + calc(1px * 2))}.form-select{--bs-form-select-bg-img: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23343a40' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='m2 5 6 6 6-6'/%3e%3c/svg%3e");display:block;width:100%;padding:.375rem 2.25rem .375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#343a40;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#fefbf2;background-image:var(--bs-form-select-bg-img),var(--bs-form-select-bg-icon, none);background-repeat:no-repeat;background-position:right .75rem center;background-size:16px 12px;border:1px solid #dee2e6;border-radius:0;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-select{transition:none}}.form-select:focus{border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-select[multiple],.form-select[size]:not([size="1"]){padding-right:.75rem;background-image:none}.form-select:disabled{background-color:#e9ecef}.form-select:-moz-focusring{color:rgba(0,0,0,0);text-shadow:0 0 0 #343a40}.form-select-sm{padding-top:.25rem;padding-bottom:.25rem;padding-left:.5rem;font-size:0.875rem}.form-select-lg{padding-top:.5rem;padding-bottom:.5rem;padding-left:1rem;font-size:1.25rem}[data-bs-theme=dark] .form-select{--bs-form-select-bg-img: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23dee2e6' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='m2 5 6 6 6-6'/%3e%3c/svg%3e")}.form-check,.shiny-input-container .checkbox,.shiny-input-container .radio{display:block;min-height:1.5rem;padding-left:0;margin-bottom:.125rem}.form-check .form-check-input,.form-check .shiny-input-container .checkbox input,.form-check .shiny-input-container .radio input,.shiny-input-container .checkbox .form-check-input,.shiny-input-container .checkbox .shiny-input-container .checkbox input,.shiny-input-container .checkbox .shiny-input-container .radio input,.shiny-input-container .radio .form-check-input,.shiny-input-container .radio .shiny-input-container .checkbox input,.shiny-input-container .radio .shiny-input-container .radio input{float:left;margin-left:0}.form-check-reverse{padding-right:0;padding-left:0;text-align:right}.form-check-reverse .form-check-input{float:right;margin-right:0;margin-left:0}.form-check-input,.shiny-input-container .checkbox input,.shiny-input-container .checkbox-inline input,.shiny-input-container .radio input,.shiny-input-container .radio-inline input{--bs-form-check-bg: #FEFBF2;width:1em;height:1em;margin-top:.25em;vertical-align:top;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:var(--bs-form-check-bg);background-image:var(--bs-form-check-bg-image);background-repeat:no-repeat;background-position:center;background-size:contain;border:1px solid #dee2e6;print-color-adjust:exact}.form-check-input[type=radio],.shiny-input-container .checkbox input[type=radio],.shiny-input-container .checkbox-inline input[type=radio],.shiny-input-container .radio input[type=radio],.shiny-input-container .radio-inline input[type=radio]{border-radius:50%}.form-check-input:active,.shiny-input-container .checkbox input:active,.shiny-input-container .checkbox-inline input:active,.shiny-input-container .radio input:active,.shiny-input-container .radio-inline input:active{filter:brightness(90%)}.form-check-input:focus,.shiny-input-container .checkbox input:focus,.shiny-input-container .checkbox-inline input:focus,.shiny-input-container .radio input:focus,.shiny-input-container .radio-inline input:focus{border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-check-input:checked,.shiny-input-container .checkbox input:checked,.shiny-input-container .checkbox-inline input:checked,.shiny-input-container .radio input:checked,.shiny-input-container .radio-inline input:checked{background-color:#2780e3;border-color:#2780e3}.form-check-input:checked[type=checkbox],.shiny-input-container .checkbox input:checked[type=checkbox],.shiny-input-container .checkbox-inline input:checked[type=checkbox],.shiny-input-container .radio input:checked[type=checkbox],.shiny-input-container .radio-inline input:checked[type=checkbox]{--bs-form-check-bg-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3e%3cpath fill='none' stroke='%23fff' stroke-linecap='round' stroke-linejoin='round' stroke-width='3' d='m6 10 3 3 6-6'/%3e%3c/svg%3e")}.form-check-input:checked[type=radio],.shiny-input-container .checkbox input:checked[type=radio],.shiny-input-container .checkbox-inline input:checked[type=radio],.shiny-input-container .radio input:checked[type=radio],.shiny-input-container .radio-inline input:checked[type=radio]{--bs-form-check-bg-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='2' fill='%23fff'/%3e%3c/svg%3e")}.form-check-input[type=checkbox]:indeterminate,.shiny-input-container .checkbox input[type=checkbox]:indeterminate,.shiny-input-container .checkbox-inline input[type=checkbox]:indeterminate,.shiny-input-container .radio input[type=checkbox]:indeterminate,.shiny-input-container .radio-inline input[type=checkbox]:indeterminate{background-color:#2780e3;border-color:#2780e3;--bs-form-check-bg-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3e%3cpath fill='none' stroke='%23fff' stroke-linecap='round' stroke-linejoin='round' stroke-width='3' d='M6 10h8'/%3e%3c/svg%3e")}.form-check-input:disabled,.shiny-input-container .checkbox input:disabled,.shiny-input-container .checkbox-inline input:disabled,.shiny-input-container .radio input:disabled,.shiny-input-container .radio-inline input:disabled{pointer-events:none;filter:none;opacity:.5}.form-check-input[disabled]~.form-check-label,.form-check-input[disabled]~span,.form-check-input:disabled~.form-check-label,.form-check-input:disabled~span,.shiny-input-container .checkbox input[disabled]~.form-check-label,.shiny-input-container .checkbox input[disabled]~span,.shiny-input-container .checkbox input:disabled~.form-check-label,.shiny-input-container .checkbox input:disabled~span,.shiny-input-container .checkbox-inline input[disabled]~.form-check-label,.shiny-input-container .checkbox-inline input[disabled]~span,.shiny-input-container .checkbox-inline input:disabled~.form-check-label,.shiny-input-container .checkbox-inline input:disabled~span,.shiny-input-container .radio input[disabled]~.form-check-label,.shiny-input-container .radio input[disabled]~span,.shiny-input-container .radio input:disabled~.form-check-label,.shiny-input-container .radio input:disabled~span,.shiny-input-container .radio-inline input[disabled]~.form-check-label,.shiny-input-container .radio-inline input[disabled]~span,.shiny-input-container .radio-inline input:disabled~.form-check-label,.shiny-input-container .radio-inline input:disabled~span{cursor:default;opacity:.5}.form-check-label,.shiny-input-container .checkbox label,.shiny-input-container .checkbox-inline label,.shiny-input-container .radio label,.shiny-input-container .radio-inline label{cursor:pointer}.form-switch{padding-left:2.5em}.form-switch .form-check-input{--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='rgba%280, 0, 0, 0.25%29'/%3e%3c/svg%3e");width:2em;margin-left:-2.5em;background-image:var(--bs-form-switch-bg);background-position:left center;transition:background-position .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-switch .form-check-input{transition:none}}.form-switch .form-check-input:focus{--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='%2393c0f1'/%3e%3c/svg%3e")}.form-switch .form-check-input:checked{background-position:right center;--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='%23fff'/%3e%3c/svg%3e")}.form-switch.form-check-reverse{padding-right:2.5em;padding-left:0}.form-switch.form-check-reverse .form-check-input{margin-right:-2.5em;margin-left:0}.form-check-inline{display:inline-block;margin-right:1rem}.btn-check{position:absolute;clip:rect(0, 0, 0, 0);pointer-events:none}.btn-check[disabled]+.btn,.btn-check:disabled+.btn{pointer-events:none;filter:none;opacity:.65}[data-bs-theme=dark] .form-switch .form-check-input:not(:checked):not(:focus){--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='rgba%28255, 255, 255, 0.25%29'/%3e%3c/svg%3e")}.form-range{width:100%;height:1.5rem;padding:0;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:rgba(0,0,0,0)}.form-range:focus{outline:0}.form-range:focus::-webkit-slider-thumb{box-shadow:0 0 0 1px #fefbf2,0 0 0 .25rem rgba(39,128,227,.25)}.form-range:focus::-moz-range-thumb{box-shadow:0 0 0 1px #fefbf2,0 0 0 .25rem rgba(39,128,227,.25)}.form-range::-moz-focus-outer{border:0}.form-range::-webkit-slider-thumb{width:1rem;height:1rem;margin-top:-0.25rem;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#2780e3;border:0;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-range::-webkit-slider-thumb{transition:none}}.form-range::-webkit-slider-thumb:active{background-color:#bed9f7}.form-range::-webkit-slider-runnable-track{width:100%;height:.5rem;color:rgba(0,0,0,0);cursor:pointer;background-color:#f8f9fa;border-color:rgba(0,0,0,0)}.form-range::-moz-range-thumb{width:1rem;height:1rem;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#2780e3;border:0;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-range::-moz-range-thumb{transition:none}}.form-range::-moz-range-thumb:active{background-color:#bed9f7}.form-range::-moz-range-track{width:100%;height:.5rem;color:rgba(0,0,0,0);cursor:pointer;background-color:#f8f9fa;border-color:rgba(0,0,0,0)}.form-range:disabled{pointer-events:none}.form-range:disabled::-webkit-slider-thumb{background-color:rgba(52,58,64,.75)}.form-range:disabled::-moz-range-thumb{background-color:rgba(52,58,64,.75)}.form-floating{position:relative}.form-floating>.form-control,.form-floating>.form-control-plaintext,.form-floating>.form-select{height:calc(3.5rem + calc(1px * 2));min-height:calc(3.5rem + calc(1px * 2));line-height:1.25}.form-floating>label{position:absolute;top:0;left:0;z-index:2;height:100%;padding:1rem .75rem;overflow:hidden;text-align:start;text-overflow:ellipsis;white-space:nowrap;pointer-events:none;border:1px solid rgba(0,0,0,0);transform-origin:0 0;transition:opacity .1s ease-in-out,transform .1s ease-in-out}@media(prefers-reduced-motion: reduce){.form-floating>label{transition:none}}.form-floating>.form-control,.form-floating>.form-control-plaintext{padding:1rem .75rem}.form-floating>.form-control::placeholder,.form-floating>.form-control-plaintext::placeholder{color:rgba(0,0,0,0)}.form-floating>.form-control:focus,.form-floating>.form-control:not(:placeholder-shown),.form-floating>.form-control-plaintext:focus,.form-floating>.form-control-plaintext:not(:placeholder-shown){padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-control:-webkit-autofill,.form-floating>.form-control-plaintext:-webkit-autofill{padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-select{padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-control:focus~label,.form-floating>.form-control:not(:placeholder-shown)~label,.form-floating>.form-control-plaintext~label,.form-floating>.form-select~label{color:rgba(var(--bs-body-color-rgb), 0.65);transform:scale(0.85) translateY(-0.5rem) translateX(0.15rem)}.form-floating>.form-control:focus~label::after,.form-floating>.form-control:not(:placeholder-shown)~label::after,.form-floating>.form-control-plaintext~label::after,.form-floating>.form-select~label::after{position:absolute;inset:1rem .375rem;z-index:-1;height:1.5em;content:"";background-color:#fefbf2}.form-floating>.form-control:-webkit-autofill~label{color:rgba(var(--bs-body-color-rgb), 0.65);transform:scale(0.85) translateY(-0.5rem) translateX(0.15rem)}.form-floating>.form-control-plaintext~label{border-width:1px 0}.form-floating>:disabled~label,.form-floating>.form-control:disabled~label{color:#6c757d}.form-floating>:disabled~label::after,.form-floating>.form-control:disabled~label::after{background-color:#e9ecef}.input-group{position:relative;display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:stretch;-webkit-align-items:stretch;width:100%}.input-group>.form-control,.input-group>.form-select,.input-group>.form-floating{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto;width:1%;min-width:0}.input-group>.form-control:focus,.input-group>.form-select:focus,.input-group>.form-floating:focus-within{z-index:5}.input-group .btn{position:relative;z-index:2}.input-group .btn:focus{z-index:5}.input-group-text{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#343a40;text-align:center;white-space:nowrap;background-color:#f8f9fa;border:1px solid #dee2e6}.input-group-lg>.form-control,.input-group-lg>.form-select,.input-group-lg>.input-group-text,.input-group-lg>.btn{padding:.5rem 1rem;font-size:1.25rem}.input-group-sm>.form-control,.input-group-sm>.form-select,.input-group-sm>.input-group-text,.input-group-sm>.btn{padding:.25rem .5rem;font-size:0.875rem}.input-group-lg>.form-select,.input-group-sm>.form-select{padding-right:3rem}.input-group>:not(:first-child):not(.dropdown-menu):not(.valid-tooltip):not(.valid-feedback):not(.invalid-tooltip):not(.invalid-feedback){margin-left:calc(1px*-1)}.valid-feedback{display:none;width:100%;margin-top:.25rem;font-size:0.875em;color:#3fb618}.valid-tooltip{position:absolute;top:100%;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:0.875rem;color:#fff;background-color:#3fb618}.was-validated :valid~.valid-feedback,.was-validated :valid~.valid-tooltip,.is-valid~.valid-feedback,.is-valid~.valid-tooltip{display:block}.was-validated .form-control:valid,.form-control.is-valid{border-color:#3fb618;padding-right:calc(1.5em + 0.75rem);background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3e%3cpath fill='%233fb618' d='M2.3 6.73.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right calc(0.375em + 0.1875rem) center;background-size:calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-control:valid:focus,.form-control.is-valid:focus{border-color:#3fb618;box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated textarea.form-control:valid,textarea.form-control.is-valid{padding-right:calc(1.5em + 0.75rem);background-position:top calc(0.375em + 0.1875rem) right calc(0.375em + 0.1875rem)}.was-validated .form-select:valid,.form-select.is-valid{border-color:#3fb618}.was-validated .form-select:valid:not([multiple]):not([size]),.was-validated .form-select:valid:not([multiple])[size="1"],.form-select.is-valid:not([multiple]):not([size]),.form-select.is-valid:not([multiple])[size="1"]{--bs-form-select-bg-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3e%3cpath fill='%233fb618' d='M2.3 6.73.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3e%3c/svg%3e");padding-right:4.125rem;background-position:right .75rem center,center right 2.25rem;background-size:16px 12px,calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-select:valid:focus,.form-select.is-valid:focus{border-color:#3fb618;box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated .form-control-color:valid,.form-control-color.is-valid{width:calc(3rem + calc(1.5em + 0.75rem))}.was-validated .form-check-input:valid,.form-check-input.is-valid{border-color:#3fb618}.was-validated .form-check-input:valid:checked,.form-check-input.is-valid:checked{background-color:#3fb618}.was-validated .form-check-input:valid:focus,.form-check-input.is-valid:focus{box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated .form-check-input:valid~.form-check-label,.form-check-input.is-valid~.form-check-label{color:#3fb618}.form-check-inline .form-check-input~.valid-feedback{margin-left:.5em}.was-validated .input-group>.form-control:not(:focus):valid,.input-group>.form-control:not(:focus).is-valid,.was-validated .input-group>.form-select:not(:focus):valid,.input-group>.form-select:not(:focus).is-valid,.was-validated .input-group>.form-floating:not(:focus-within):valid,.input-group>.form-floating:not(:focus-within).is-valid{z-index:3}.invalid-feedback{display:none;width:100%;margin-top:.25rem;font-size:0.875em;color:#ff0039}.invalid-tooltip{position:absolute;top:100%;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:0.875rem;color:#fff;background-color:#ff0039}.was-validated :invalid~.invalid-feedback,.was-validated :invalid~.invalid-tooltip,.is-invalid~.invalid-feedback,.is-invalid~.invalid-tooltip{display:block}.was-validated .form-control:invalid,.form-control.is-invalid{border-color:#ff0039;padding-right:calc(1.5em + 0.75rem);background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 12' width='12' height='12' fill='none' stroke='%23ff0039'%3e%3ccircle cx='6' cy='6' r='4.5'/%3e%3cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3e%3ccircle cx='6' cy='8.2' r='.6' fill='%23ff0039' stroke='none'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right calc(0.375em + 0.1875rem) center;background-size:calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-control:invalid:focus,.form-control.is-invalid:focus{border-color:#ff0039;box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated textarea.form-control:invalid,textarea.form-control.is-invalid{padding-right:calc(1.5em + 0.75rem);background-position:top calc(0.375em + 0.1875rem) right calc(0.375em + 0.1875rem)}.was-validated .form-select:invalid,.form-select.is-invalid{border-color:#ff0039}.was-validated .form-select:invalid:not([multiple]):not([size]),.was-validated .form-select:invalid:not([multiple])[size="1"],.form-select.is-invalid:not([multiple]):not([size]),.form-select.is-invalid:not([multiple])[size="1"]{--bs-form-select-bg-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 12' width='12' height='12' fill='none' stroke='%23ff0039'%3e%3ccircle cx='6' cy='6' r='4.5'/%3e%3cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3e%3ccircle cx='6' cy='8.2' r='.6' fill='%23ff0039' stroke='none'/%3e%3c/svg%3e");padding-right:4.125rem;background-position:right .75rem center,center right 2.25rem;background-size:16px 12px,calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-select:invalid:focus,.form-select.is-invalid:focus{border-color:#ff0039;box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated .form-control-color:invalid,.form-control-color.is-invalid{width:calc(3rem + calc(1.5em + 0.75rem))}.was-validated .form-check-input:invalid,.form-check-input.is-invalid{border-color:#ff0039}.was-validated .form-check-input:invalid:checked,.form-check-input.is-invalid:checked{background-color:#ff0039}.was-validated .form-check-input:invalid:focus,.form-check-input.is-invalid:focus{box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated .form-check-input:invalid~.form-check-label,.form-check-input.is-invalid~.form-check-label{color:#ff0039}.form-check-inline .form-check-input~.invalid-feedback{margin-left:.5em}.was-validated .input-group>.form-control:not(:focus):invalid,.input-group>.form-control:not(:focus).is-invalid,.was-validated .input-group>.form-select:not(:focus):invalid,.input-group>.form-select:not(:focus).is-invalid,.was-validated .input-group>.form-floating:not(:focus-within):invalid,.input-group>.form-floating:not(:focus-within).is-invalid{z-index:4}.btn{--bs-btn-padding-x: 0.75rem;--bs-btn-padding-y: 0.375rem;--bs-btn-font-family: ;--bs-btn-font-size:1rem;--bs-btn-font-weight: 400;--bs-btn-line-height: 1.5;--bs-btn-color: #343a40;--bs-btn-bg: transparent;--bs-btn-border-width: 1px;--bs-btn-border-color: transparent;--bs-btn-border-radius: 0.25rem;--bs-btn-hover-border-color: transparent;--bs-btn-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.15), 0 1px 1px rgba(0, 0, 0, 0.075);--bs-btn-disabled-opacity: 0.65;--bs-btn-focus-box-shadow: 0 0 0 0.25rem rgba(var(--bs-btn-focus-shadow-rgb), .5);display:inline-block;padding:var(--bs-btn-padding-y) var(--bs-btn-padding-x);font-family:var(--bs-btn-font-family);font-size:var(--bs-btn-font-size);font-weight:var(--bs-btn-font-weight);line-height:var(--bs-btn-line-height);color:var(--bs-btn-color);text-align:center;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;vertical-align:middle;cursor:pointer;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;border:var(--bs-btn-border-width) solid var(--bs-btn-border-color);background-color:var(--bs-btn-bg);transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.btn{transition:none}}.btn:hover{color:var(--bs-btn-hover-color);background-color:var(--bs-btn-hover-bg);border-color:var(--bs-btn-hover-border-color)}.btn-check+.btn:hover{color:var(--bs-btn-color);background-color:var(--bs-btn-bg);border-color:var(--bs-btn-border-color)}.btn:focus-visible{color:var(--bs-btn-hover-color);background-color:var(--bs-btn-hover-bg);border-color:var(--bs-btn-hover-border-color);outline:0;box-shadow:var(--bs-btn-focus-box-shadow)}.btn-check:focus-visible+.btn{border-color:var(--bs-btn-hover-border-color);outline:0;box-shadow:var(--bs-btn-focus-box-shadow)}.btn-check:checked+.btn,:not(.btn-check)+.btn:active,.btn:first-child:active,.btn.active,.btn.show{color:var(--bs-btn-active-color);background-color:var(--bs-btn-active-bg);border-color:var(--bs-btn-active-border-color)}.btn-check:checked+.btn:focus-visible,:not(.btn-check)+.btn:active:focus-visible,.btn:first-child:active:focus-visible,.btn.active:focus-visible,.btn.show:focus-visible{box-shadow:var(--bs-btn-focus-box-shadow)}.btn:disabled,.btn.disabled,fieldset:disabled .btn{color:var(--bs-btn-disabled-color);pointer-events:none;background-color:var(--bs-btn-disabled-bg);border-color:var(--bs-btn-disabled-border-color);opacity:var(--bs-btn-disabled-opacity)}.btn-default{--bs-btn-color: #fff;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #2c3136;--bs-btn-hover-border-color: #2a2e33;--bs-btn-focus-shadow-rgb: 82, 88, 93;--bs-btn-active-color: #fff;--bs-btn-active-bg: #2a2e33;--bs-btn-active-border-color: #272c30;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}.btn-primary{--bs-btn-color: #fff;--bs-btn-bg: #2780e3;--bs-btn-border-color: #2780e3;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #216dc1;--bs-btn-hover-border-color: #1f66b6;--bs-btn-focus-shadow-rgb: 71, 147, 231;--bs-btn-active-color: #fff;--bs-btn-active-bg: #1f66b6;--bs-btn-active-border-color: #1d60aa;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #2780e3;--bs-btn-disabled-border-color: #2780e3}.btn-secondary{--bs-btn-color: #fff;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #2c3136;--bs-btn-hover-border-color: #2a2e33;--bs-btn-focus-shadow-rgb: 82, 88, 93;--bs-btn-active-color: #fff;--bs-btn-active-bg: #2a2e33;--bs-btn-active-border-color: #272c30;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}.btn-success{--bs-btn-color: #fff;--bs-btn-bg: #3fb618;--bs-btn-border-color: #3fb618;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #369b14;--bs-btn-hover-border-color: #329213;--bs-btn-focus-shadow-rgb: 92, 193, 59;--bs-btn-active-color: #fff;--bs-btn-active-bg: #329213;--bs-btn-active-border-color: #2f8912;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #3fb618;--bs-btn-disabled-border-color: #3fb618}.btn-info{--bs-btn-color: #fff;--bs-btn-bg: #9954bb;--bs-btn-border-color: #9954bb;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #82479f;--bs-btn-hover-border-color: #7a4396;--bs-btn-focus-shadow-rgb: 168, 110, 197;--bs-btn-active-color: #fff;--bs-btn-active-bg: #7a4396;--bs-btn-active-border-color: #733f8c;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #9954bb;--bs-btn-disabled-border-color: #9954bb}.btn-warning{--bs-btn-color: #fff;--bs-btn-bg: #ff7518;--bs-btn-border-color: #ff7518;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #d96314;--bs-btn-hover-border-color: #cc5e13;--bs-btn-focus-shadow-rgb: 255, 138, 59;--bs-btn-active-color: #fff;--bs-btn-active-bg: #cc5e13;--bs-btn-active-border-color: #bf5812;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #ff7518;--bs-btn-disabled-border-color: #ff7518}.btn-danger{--bs-btn-color: #fff;--bs-btn-bg: #ff0039;--bs-btn-border-color: #ff0039;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #d90030;--bs-btn-hover-border-color: #cc002e;--bs-btn-focus-shadow-rgb: 255, 38, 87;--bs-btn-active-color: #fff;--bs-btn-active-bg: #cc002e;--bs-btn-active-border-color: #bf002b;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #ff0039;--bs-btn-disabled-border-color: #ff0039}.btn-light{--bs-btn-color: #000;--bs-btn-bg: #f8f9fa;--bs-btn-border-color: #f8f9fa;--bs-btn-hover-color: #000;--bs-btn-hover-bg: #d3d4d5;--bs-btn-hover-border-color: #c6c7c8;--bs-btn-focus-shadow-rgb: 211, 212, 213;--bs-btn-active-color: #000;--bs-btn-active-bg: #c6c7c8;--bs-btn-active-border-color: #babbbc;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #000;--bs-btn-disabled-bg: #f8f9fa;--bs-btn-disabled-border-color: #f8f9fa}.btn-dark{--bs-btn-color: #fff;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #52585d;--bs-btn-hover-border-color: #484e53;--bs-btn-focus-shadow-rgb: 82, 88, 93;--bs-btn-active-color: #fff;--bs-btn-active-bg: #5d6166;--bs-btn-active-border-color: #484e53;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}.btn-outline-default{--bs-btn-color: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #343a40;--bs-btn-hover-border-color: #343a40;--bs-btn-focus-shadow-rgb: 52, 58, 64;--bs-btn-active-color: #fff;--bs-btn-active-bg: #343a40;--bs-btn-active-border-color: #343a40;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #343a40;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #343a40;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-primary{--bs-btn-color: #2780e3;--bs-btn-border-color: #2780e3;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #2780e3;--bs-btn-hover-border-color: #2780e3;--bs-btn-focus-shadow-rgb: 39, 128, 227;--bs-btn-active-color: #fff;--bs-btn-active-bg: #2780e3;--bs-btn-active-border-color: #2780e3;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #2780e3;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #2780e3;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-secondary{--bs-btn-color: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #343a40;--bs-btn-hover-border-color: #343a40;--bs-btn-focus-shadow-rgb: 52, 58, 64;--bs-btn-active-color: #fff;--bs-btn-active-bg: #343a40;--bs-btn-active-border-color: #343a40;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #343a40;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #343a40;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-success{--bs-btn-color: #3fb618;--bs-btn-border-color: #3fb618;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #3fb618;--bs-btn-hover-border-color: #3fb618;--bs-btn-focus-shadow-rgb: 63, 182, 24;--bs-btn-active-color: #fff;--bs-btn-active-bg: #3fb618;--bs-btn-active-border-color: #3fb618;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #3fb618;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #3fb618;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-info{--bs-btn-color: #9954bb;--bs-btn-border-color: #9954bb;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #9954bb;--bs-btn-hover-border-color: #9954bb;--bs-btn-focus-shadow-rgb: 153, 84, 187;--bs-btn-active-color: #fff;--bs-btn-active-bg: #9954bb;--bs-btn-active-border-color: #9954bb;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #9954bb;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #9954bb;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-warning{--bs-btn-color: #ff7518;--bs-btn-border-color: #ff7518;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #ff7518;--bs-btn-hover-border-color: #ff7518;--bs-btn-focus-shadow-rgb: 255, 117, 24;--bs-btn-active-color: #fff;--bs-btn-active-bg: #ff7518;--bs-btn-active-border-color: #ff7518;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #ff7518;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #ff7518;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-danger{--bs-btn-color: #ff0039;--bs-btn-border-color: #ff0039;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #ff0039;--bs-btn-hover-border-color: #ff0039;--bs-btn-focus-shadow-rgb: 255, 0, 57;--bs-btn-active-color: #fff;--bs-btn-active-bg: #ff0039;--bs-btn-active-border-color: #ff0039;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #ff0039;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #ff0039;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-light{--bs-btn-color: #f8f9fa;--bs-btn-border-color: #f8f9fa;--bs-btn-hover-color: #000;--bs-btn-hover-bg: #f8f9fa;--bs-btn-hover-border-color: #f8f9fa;--bs-btn-focus-shadow-rgb: 248, 249, 250;--bs-btn-active-color: #000;--bs-btn-active-bg: #f8f9fa;--bs-btn-active-border-color: #f8f9fa;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #f8f9fa;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #f8f9fa;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-dark{--bs-btn-color: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #343a40;--bs-btn-hover-border-color: #343a40;--bs-btn-focus-shadow-rgb: 52, 58, 64;--bs-btn-active-color: #fff;--bs-btn-active-bg: #343a40;--bs-btn-active-border-color: #343a40;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #343a40;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #343a40;--bs-btn-bg: transparent;--bs-gradient: none}.btn-link{--bs-btn-font-weight: 400;--bs-btn-color: #2761e3;--bs-btn-bg: transparent;--bs-btn-border-color: transparent;--bs-btn-hover-color: #1f4eb6;--bs-btn-hover-border-color: transparent;--bs-btn-active-color: #1f4eb6;--bs-btn-active-border-color: transparent;--bs-btn-disabled-color: #6c757d;--bs-btn-disabled-border-color: transparent;--bs-btn-box-shadow: 0 0 0 #000;--bs-btn-focus-shadow-rgb: 71, 121, 231;text-decoration:underline;-webkit-text-decoration:underline;-moz-text-decoration:underline;-ms-text-decoration:underline;-o-text-decoration:underline}.btn-link:focus-visible{color:var(--bs-btn-color)}.btn-link:hover{color:var(--bs-btn-hover-color)}.btn-lg,.btn-group-lg>.btn{--bs-btn-padding-y: 0.5rem;--bs-btn-padding-x: 1rem;--bs-btn-font-size:1.25rem;--bs-btn-border-radius: 0.5rem}.btn-sm,.btn-group-sm>.btn{--bs-btn-padding-y: 0.25rem;--bs-btn-padding-x: 0.5rem;--bs-btn-font-size:0.875rem;--bs-btn-border-radius: 0.2em}.fade{transition:opacity .15s linear}@media(prefers-reduced-motion: reduce){.fade{transition:none}}.fade:not(.show){opacity:0}.collapse:not(.show){display:none}.collapsing{height:0;overflow:hidden;transition:height .2s ease}@media(prefers-reduced-motion: reduce){.collapsing{transition:none}}.collapsing.collapse-horizontal{width:0;height:auto;transition:width .35s ease}@media(prefers-reduced-motion: reduce){.collapsing.collapse-horizontal{transition:none}}.dropup,.dropend,.dropdown,.dropstart,.dropup-center,.dropdown-center{position:relative}.dropdown-toggle{white-space:nowrap}.dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid;border-right:.3em solid rgba(0,0,0,0);border-bottom:0;border-left:.3em solid rgba(0,0,0,0)}.dropdown-toggle:empty::after{margin-left:0}.dropdown-menu{--bs-dropdown-zindex: 1000;--bs-dropdown-min-width: 10rem;--bs-dropdown-padding-x: 0;--bs-dropdown-padding-y: 0.5rem;--bs-dropdown-spacer: 0.125rem;--bs-dropdown-font-size:1rem;--bs-dropdown-color: #343a40;--bs-dropdown-bg: #FEFBF2;--bs-dropdown-border-color: rgba(0, 0, 0, 0.175);--bs-dropdown-border-radius: 0.25rem;--bs-dropdown-border-width: 1px;--bs-dropdown-inner-border-radius: calc(0.25rem - 1px);--bs-dropdown-divider-bg: rgba(0, 0, 0, 0.175);--bs-dropdown-divider-margin-y: 0.5rem;--bs-dropdown-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-dropdown-link-color: #343a40;--bs-dropdown-link-hover-color: #343a40;--bs-dropdown-link-hover-bg: #f8f9fa;--bs-dropdown-link-active-color: #fff;--bs-dropdown-link-active-bg: #2780e3;--bs-dropdown-link-disabled-color: rgba(52, 58, 64, 0.5);--bs-dropdown-item-padding-x: 1rem;--bs-dropdown-item-padding-y: 0.25rem;--bs-dropdown-header-color: #6c757d;--bs-dropdown-header-padding-x: 1rem;--bs-dropdown-header-padding-y: 0.5rem;position:absolute;z-index:var(--bs-dropdown-zindex);display:none;min-width:var(--bs-dropdown-min-width);padding:var(--bs-dropdown-padding-y) var(--bs-dropdown-padding-x);margin:0;font-size:var(--bs-dropdown-font-size);color:var(--bs-dropdown-color);text-align:left;list-style:none;background-color:var(--bs-dropdown-bg);background-clip:padding-box;border:var(--bs-dropdown-border-width) solid var(--bs-dropdown-border-color)}.dropdown-menu[data-bs-popper]{top:100%;left:0;margin-top:var(--bs-dropdown-spacer)}.dropdown-menu-start{--bs-position: start}.dropdown-menu-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-end{--bs-position: end}.dropdown-menu-end[data-bs-popper]{right:0;left:auto}@media(min-width: 576px){.dropdown-menu-sm-start{--bs-position: start}.dropdown-menu-sm-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-sm-end{--bs-position: end}.dropdown-menu-sm-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 768px){.dropdown-menu-md-start{--bs-position: start}.dropdown-menu-md-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-md-end{--bs-position: end}.dropdown-menu-md-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 992px){.dropdown-menu-lg-start{--bs-position: start}.dropdown-menu-lg-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-lg-end{--bs-position: end}.dropdown-menu-lg-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 1200px){.dropdown-menu-xl-start{--bs-position: start}.dropdown-menu-xl-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-xl-end{--bs-position: end}.dropdown-menu-xl-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 1400px){.dropdown-menu-xxl-start{--bs-position: start}.dropdown-menu-xxl-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-xxl-end{--bs-position: end}.dropdown-menu-xxl-end[data-bs-popper]{right:0;left:auto}}.dropup .dropdown-menu[data-bs-popper]{top:auto;bottom:100%;margin-top:0;margin-bottom:var(--bs-dropdown-spacer)}.dropup .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:0;border-right:.3em solid rgba(0,0,0,0);border-bottom:.3em solid;border-left:.3em solid rgba(0,0,0,0)}.dropup .dropdown-toggle:empty::after{margin-left:0}.dropend .dropdown-menu[data-bs-popper]{top:0;right:auto;left:100%;margin-top:0;margin-left:var(--bs-dropdown-spacer)}.dropend .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid rgba(0,0,0,0);border-right:0;border-bottom:.3em solid rgba(0,0,0,0);border-left:.3em solid}.dropend .dropdown-toggle:empty::after{margin-left:0}.dropend .dropdown-toggle::after{vertical-align:0}.dropstart .dropdown-menu[data-bs-popper]{top:0;right:100%;left:auto;margin-top:0;margin-right:var(--bs-dropdown-spacer)}.dropstart .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:""}.dropstart .dropdown-toggle::after{display:none}.dropstart .dropdown-toggle::before{display:inline-block;margin-right:.255em;vertical-align:.255em;content:"";border-top:.3em solid rgba(0,0,0,0);border-right:.3em solid;border-bottom:.3em solid rgba(0,0,0,0)}.dropstart .dropdown-toggle:empty::after{margin-left:0}.dropstart .dropdown-toggle::before{vertical-align:0}.dropdown-divider{height:0;margin:var(--bs-dropdown-divider-margin-y) 0;overflow:hidden;border-top:1px solid var(--bs-dropdown-divider-bg);opacity:1}.dropdown-item{display:block;width:100%;padding:var(--bs-dropdown-item-padding-y) var(--bs-dropdown-item-padding-x);clear:both;font-weight:400;color:var(--bs-dropdown-link-color);text-align:inherit;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;white-space:nowrap;background-color:rgba(0,0,0,0);border:0}.dropdown-item:hover,.dropdown-item:focus{color:var(--bs-dropdown-link-hover-color);background-color:var(--bs-dropdown-link-hover-bg)}.dropdown-item.active,.dropdown-item:active{color:var(--bs-dropdown-link-active-color);text-decoration:none;background-color:var(--bs-dropdown-link-active-bg)}.dropdown-item.disabled,.dropdown-item:disabled{color:var(--bs-dropdown-link-disabled-color);pointer-events:none;background-color:rgba(0,0,0,0)}.dropdown-menu.show{display:block}.dropdown-header{display:block;padding:var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x);margin-bottom:0;font-size:0.875rem;color:var(--bs-dropdown-header-color);white-space:nowrap}.dropdown-item-text{display:block;padding:var(--bs-dropdown-item-padding-y) var(--bs-dropdown-item-padding-x);color:var(--bs-dropdown-link-color)}.dropdown-menu-dark{--bs-dropdown-color: #dee2e6;--bs-dropdown-bg: #343a40;--bs-dropdown-border-color: rgba(0, 0, 0, 0.175);--bs-dropdown-box-shadow: ;--bs-dropdown-link-color: #dee2e6;--bs-dropdown-link-hover-color: #fff;--bs-dropdown-divider-bg: rgba(0, 0, 0, 0.175);--bs-dropdown-link-hover-bg: rgba(255, 255, 255, 0.15);--bs-dropdown-link-active-color: #fff;--bs-dropdown-link-active-bg: #2780e3;--bs-dropdown-link-disabled-color: #adb5bd;--bs-dropdown-header-color: #adb5bd}.btn-group,.btn-group-vertical{position:relative;display:inline-flex;vertical-align:middle}.btn-group>.btn,.btn-group-vertical>.btn{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto}.btn-group>.btn-check:checked+.btn,.btn-group>.btn-check:focus+.btn,.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active,.btn-group-vertical>.btn-check:checked+.btn,.btn-group-vertical>.btn-check:focus+.btn,.btn-group-vertical>.btn:hover,.btn-group-vertical>.btn:focus,.btn-group-vertical>.btn:active,.btn-group-vertical>.btn.active{z-index:1}.btn-toolbar{display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;justify-content:flex-start;-webkit-justify-content:flex-start}.btn-toolbar .input-group{width:auto}.btn-group>:not(.btn-check:first-child)+.btn,.btn-group>.btn-group:not(:first-child){margin-left:calc(1px*-1)}.dropdown-toggle-split{padding-right:.5625rem;padding-left:.5625rem}.dropdown-toggle-split::after,.dropup .dropdown-toggle-split::after,.dropend .dropdown-toggle-split::after{margin-left:0}.dropstart .dropdown-toggle-split::before{margin-right:0}.btn-sm+.dropdown-toggle-split,.btn-group-sm>.btn+.dropdown-toggle-split{padding-right:.375rem;padding-left:.375rem}.btn-lg+.dropdown-toggle-split,.btn-group-lg>.btn+.dropdown-toggle-split{padding-right:.75rem;padding-left:.75rem}.btn-group-vertical{flex-direction:column;-webkit-flex-direction:column;align-items:flex-start;-webkit-align-items:flex-start;justify-content:center;-webkit-justify-content:center}.btn-group-vertical>.btn,.btn-group-vertical>.btn-group{width:100%}.btn-group-vertical>.btn:not(:first-child),.btn-group-vertical>.btn-group:not(:first-child){margin-top:calc(1px*-1)}.nav{--bs-nav-link-padding-x: 1rem;--bs-nav-link-padding-y: 0.5rem;--bs-nav-link-font-weight: ;--bs-nav-link-color: #2761e3;--bs-nav-link-hover-color: #1f4eb6;--bs-nav-link-disabled-color: rgba(52, 58, 64, 0.75);display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;padding-left:0;margin-bottom:0;list-style:none}.nav-link{display:block;padding:var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x);font-size:var(--bs-nav-link-font-size);font-weight:var(--bs-nav-link-font-weight);color:var(--bs-nav-link-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background:none;border:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out}@media(prefers-reduced-motion: reduce){.nav-link{transition:none}}.nav-link:hover,.nav-link:focus{color:var(--bs-nav-link-hover-color)}.nav-link:focus-visible{outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.nav-link.disabled,.nav-link:disabled{color:var(--bs-nav-link-disabled-color);pointer-events:none;cursor:default}.nav-tabs{--bs-nav-tabs-border-width: 1px;--bs-nav-tabs-border-color: #dee2e6;--bs-nav-tabs-border-radius: 0.25rem;--bs-nav-tabs-link-hover-border-color: #e9ecef #e9ecef #dee2e6;--bs-nav-tabs-link-active-color: #000;--bs-nav-tabs-link-active-bg: #FEFBF2;--bs-nav-tabs-link-active-border-color: #dee2e6 #dee2e6 #FEFBF2;border-bottom:var(--bs-nav-tabs-border-width) solid var(--bs-nav-tabs-border-color)}.nav-tabs .nav-link{margin-bottom:calc(-1*var(--bs-nav-tabs-border-width));border:var(--bs-nav-tabs-border-width) solid rgba(0,0,0,0)}.nav-tabs .nav-link:hover,.nav-tabs .nav-link:focus{isolation:isolate;border-color:var(--bs-nav-tabs-link-hover-border-color)}.nav-tabs .nav-link.active,.nav-tabs .nav-item.show .nav-link{color:var(--bs-nav-tabs-link-active-color);background-color:var(--bs-nav-tabs-link-active-bg);border-color:var(--bs-nav-tabs-link-active-border-color)}.nav-tabs .dropdown-menu{margin-top:calc(-1*var(--bs-nav-tabs-border-width))}.nav-pills{--bs-nav-pills-border-radius: 0.25rem;--bs-nav-pills-link-active-color: #fff;--bs-nav-pills-link-active-bg: #2780e3}.nav-pills .nav-link.active,.nav-pills .show>.nav-link{color:var(--bs-nav-pills-link-active-color);background-color:var(--bs-nav-pills-link-active-bg)}.nav-underline{--bs-nav-underline-gap: 1rem;--bs-nav-underline-border-width: 0.125rem;--bs-nav-underline-link-active-color: #000;gap:var(--bs-nav-underline-gap)}.nav-underline .nav-link{padding-right:0;padding-left:0;border-bottom:var(--bs-nav-underline-border-width) solid rgba(0,0,0,0)}.nav-underline .nav-link:hover,.nav-underline .nav-link:focus{border-bottom-color:currentcolor}.nav-underline .nav-link.active,.nav-underline .show>.nav-link{font-weight:700;color:var(--bs-nav-underline-link-active-color);border-bottom-color:currentcolor}.nav-fill>.nav-link,.nav-fill .nav-item{flex:1 1 auto;-webkit-flex:1 1 auto;text-align:center}.nav-justified>.nav-link,.nav-justified .nav-item{flex-basis:0;-webkit-flex-basis:0;flex-grow:1;-webkit-flex-grow:1;text-align:center}.nav-fill .nav-item .nav-link,.nav-justified .nav-item .nav-link{width:100%}.tab-content>.tab-pane{display:none}.tab-content>.active{display:block}.navbar{--bs-navbar-padding-x: 0;--bs-navbar-padding-y: 0.5rem;--bs-navbar-color: #F2E5BD;--bs-navbar-hover-color: rgba(147, 176, 241, 0.8);--bs-navbar-disabled-color: rgba(242, 229, 189, 0.75);--bs-navbar-active-color: #93b0f1;--bs-navbar-brand-padding-y: 0.3125rem;--bs-navbar-brand-margin-end: 1rem;--bs-navbar-brand-font-size: 1.25rem;--bs-navbar-brand-color: #F2E5BD;--bs-navbar-brand-hover-color: #93b0f1;--bs-navbar-nav-link-padding-x: 0.5rem;--bs-navbar-toggler-padding-y: 0.25;--bs-navbar-toggler-padding-x: 0;--bs-navbar-toggler-font-size: 1.25rem;--bs-navbar-toggler-icon-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23F2E5BD' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e");--bs-navbar-toggler-border-color: rgba(242, 229, 189, 0);--bs-navbar-toggler-border-radius: 0.25rem;--bs-navbar-toggler-focus-width: 0.25rem;--bs-navbar-toggler-transition: box-shadow 0.15s ease-in-out;position:relative;display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:var(--bs-navbar-padding-y) var(--bs-navbar-padding-x)}.navbar>.container,.navbar>.container-fluid,.navbar>.container-sm,.navbar>.container-md,.navbar>.container-lg,.navbar>.container-xl,.navbar>.container-xxl{display:flex;display:-webkit-flex;flex-wrap:inherit;-webkit-flex-wrap:inherit;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between}.navbar-brand{padding-top:var(--bs-navbar-brand-padding-y);padding-bottom:var(--bs-navbar-brand-padding-y);margin-right:var(--bs-navbar-brand-margin-end);font-size:var(--bs-navbar-brand-font-size);color:var(--bs-navbar-brand-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;white-space:nowrap}.navbar-brand:hover,.navbar-brand:focus{color:var(--bs-navbar-brand-hover-color)}.navbar-nav{--bs-nav-link-padding-x: 0;--bs-nav-link-padding-y: 0.5rem;--bs-nav-link-font-weight: ;--bs-nav-link-color: var(--bs-navbar-color);--bs-nav-link-hover-color: var(--bs-navbar-hover-color);--bs-nav-link-disabled-color: var(--bs-navbar-disabled-color);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;padding-left:0;margin-bottom:0;list-style:none}.navbar-nav .nav-link.active,.navbar-nav .nav-link.show{color:var(--bs-navbar-active-color)}.navbar-nav .dropdown-menu{position:static}.navbar-text{padding-top:.5rem;padding-bottom:.5rem;color:var(--bs-navbar-color)}.navbar-text a,.navbar-text a:hover,.navbar-text a:focus{color:var(--bs-navbar-active-color)}.navbar-collapse{flex-basis:100%;-webkit-flex-basis:100%;flex-grow:1;-webkit-flex-grow:1;align-items:center;-webkit-align-items:center}.navbar-toggler{padding:var(--bs-navbar-toggler-padding-y) var(--bs-navbar-toggler-padding-x);font-size:var(--bs-navbar-toggler-font-size);line-height:1;color:var(--bs-navbar-color);background-color:rgba(0,0,0,0);border:var(--bs-border-width) solid var(--bs-navbar-toggler-border-color);transition:var(--bs-navbar-toggler-transition)}@media(prefers-reduced-motion: reduce){.navbar-toggler{transition:none}}.navbar-toggler:hover{text-decoration:none}.navbar-toggler:focus{text-decoration:none;outline:0;box-shadow:0 0 0 var(--bs-navbar-toggler-focus-width)}.navbar-toggler-icon{display:inline-block;width:1.5em;height:1.5em;vertical-align:middle;background-image:var(--bs-navbar-toggler-icon-bg);background-repeat:no-repeat;background-position:center;background-size:100%}.navbar-nav-scroll{max-height:var(--bs-scroll-height, 75vh);overflow-y:auto}@media(min-width: 576px){.navbar-expand-sm{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-sm .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-sm .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-sm .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-sm .navbar-nav-scroll{overflow:visible}.navbar-expand-sm .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-sm .navbar-toggler{display:none}.navbar-expand-sm .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-sm .offcanvas .offcanvas-header{display:none}.navbar-expand-sm .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 768px){.navbar-expand-md{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-md .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-md .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-md .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-md .navbar-nav-scroll{overflow:visible}.navbar-expand-md .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-md .navbar-toggler{display:none}.navbar-expand-md .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-md .offcanvas .offcanvas-header{display:none}.navbar-expand-md .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 992px){.navbar-expand-lg{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-lg .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-lg .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-lg .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-lg .navbar-nav-scroll{overflow:visible}.navbar-expand-lg .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-lg .navbar-toggler{display:none}.navbar-expand-lg .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-lg .offcanvas .offcanvas-header{display:none}.navbar-expand-lg .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 1200px){.navbar-expand-xl{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-xl .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-xl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xl .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-xl .navbar-nav-scroll{overflow:visible}.navbar-expand-xl .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-xl .navbar-toggler{display:none}.navbar-expand-xl .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-xl .offcanvas .offcanvas-header{display:none}.navbar-expand-xl .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 1400px){.navbar-expand-xxl{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-xxl .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-xxl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xxl .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-xxl .navbar-nav-scroll{overflow:visible}.navbar-expand-xxl .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-xxl .navbar-toggler{display:none}.navbar-expand-xxl .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-xxl .offcanvas .offcanvas-header{display:none}.navbar-expand-xxl .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}.navbar-expand{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand .navbar-nav .dropdown-menu{position:absolute}.navbar-expand .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand .navbar-nav-scroll{overflow:visible}.navbar-expand .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand .navbar-toggler{display:none}.navbar-expand .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand .offcanvas .offcanvas-header{display:none}.navbar-expand .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}.navbar-dark,.navbar[data-bs-theme=dark]{--bs-navbar-color: #F2E5BD;--bs-navbar-hover-color: rgba(147, 176, 241, 0.8);--bs-navbar-disabled-color: rgba(242, 229, 189, 0.75);--bs-navbar-active-color: #93b0f1;--bs-navbar-brand-color: #F2E5BD;--bs-navbar-brand-hover-color: #93b0f1;--bs-navbar-toggler-border-color: rgba(242, 229, 189, 0);--bs-navbar-toggler-icon-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23F2E5BD' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e")}[data-bs-theme=dark] .navbar-toggler-icon{--bs-navbar-toggler-icon-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23F2E5BD' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e")}.card{--bs-card-spacer-y: 1rem;--bs-card-spacer-x: 1rem;--bs-card-title-spacer-y: 0.5rem;--bs-card-title-color: ;--bs-card-subtitle-color: ;--bs-card-border-width: 1px;--bs-card-border-color: rgba(0, 0, 0, 0.175);--bs-card-border-radius: 0.25rem;--bs-card-box-shadow: ;--bs-card-inner-border-radius: calc(0.25rem - 1px);--bs-card-cap-padding-y: 0.5rem;--bs-card-cap-padding-x: 1rem;--bs-card-cap-bg: rgba(52, 58, 64, 0.25);--bs-card-cap-color: ;--bs-card-height: ;--bs-card-color: ;--bs-card-bg: #FEFBF2;--bs-card-img-overlay-padding: 1rem;--bs-card-group-margin: 0.75rem;position:relative;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;min-width:0;height:var(--bs-card-height);color:var(--bs-body-color);word-wrap:break-word;background-color:var(--bs-card-bg);background-clip:border-box;border:var(--bs-card-border-width) solid var(--bs-card-border-color)}.card>hr{margin-right:0;margin-left:0}.card>.list-group{border-top:inherit;border-bottom:inherit}.card>.list-group:first-child{border-top-width:0}.card>.list-group:last-child{border-bottom-width:0}.card>.card-header+.list-group,.card>.list-group+.card-footer{border-top:0}.card-body{flex:1 1 auto;-webkit-flex:1 1 auto;padding:var(--bs-card-spacer-y) var(--bs-card-spacer-x);color:var(--bs-card-color)}.card-title{margin-bottom:var(--bs-card-title-spacer-y);color:var(--bs-card-title-color)}.card-subtitle{margin-top:calc(-0.5*var(--bs-card-title-spacer-y));margin-bottom:0;color:var(--bs-card-subtitle-color)}.card-text:last-child{margin-bottom:0}.card-link+.card-link{margin-left:var(--bs-card-spacer-x)}.card-header{padding:var(--bs-card-cap-padding-y) var(--bs-card-cap-padding-x);margin-bottom:0;color:var(--bs-card-cap-color);background-color:var(--bs-card-cap-bg);border-bottom:var(--bs-card-border-width) solid var(--bs-card-border-color)}.card-footer{padding:var(--bs-card-cap-padding-y) var(--bs-card-cap-padding-x);color:var(--bs-card-cap-color);background-color:var(--bs-card-cap-bg);border-top:var(--bs-card-border-width) solid var(--bs-card-border-color)}.card-header-tabs{margin-right:calc(-0.5*var(--bs-card-cap-padding-x));margin-bottom:calc(-1*var(--bs-card-cap-padding-y));margin-left:calc(-0.5*var(--bs-card-cap-padding-x));border-bottom:0}.card-header-tabs .nav-link.active{background-color:var(--bs-card-bg);border-bottom-color:var(--bs-card-bg)}.card-header-pills{margin-right:calc(-0.5*var(--bs-card-cap-padding-x));margin-left:calc(-0.5*var(--bs-card-cap-padding-x))}.card-img-overlay{position:absolute;top:0;right:0;bottom:0;left:0;padding:var(--bs-card-img-overlay-padding)}.card-img,.card-img-top,.card-img-bottom{width:100%}.card-group>.card{margin-bottom:var(--bs-card-group-margin)}@media(min-width: 576px){.card-group{display:flex;display:-webkit-flex;flex-flow:row wrap;-webkit-flex-flow:row wrap}.card-group>.card{flex:1 0 0%;-webkit-flex:1 0 0%;margin-bottom:0}.card-group>.card+.card{margin-left:0;border-left:0}}.accordion{--bs-accordion-color: #343a40;--bs-accordion-bg: #FEFBF2;--bs-accordion-transition: color 0.15s ease-in-out, background-color 0.15s ease-in-out, border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out, border-radius 0.15s ease;--bs-accordion-border-color: #dee2e6;--bs-accordion-border-width: 1px;--bs-accordion-border-radius: 0.25rem;--bs-accordion-inner-border-radius: calc(0.25rem - 1px);--bs-accordion-btn-padding-x: 1.25rem;--bs-accordion-btn-padding-y: 1rem;--bs-accordion-btn-color: #343a40;--bs-accordion-btn-bg: #FEFBF2;--bs-accordion-btn-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23343a40'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");--bs-accordion-btn-icon-width: 1.25rem;--bs-accordion-btn-icon-transform: rotate(-180deg);--bs-accordion-btn-icon-transition: transform 0.2s ease-in-out;--bs-accordion-btn-active-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%2310335b'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");--bs-accordion-btn-focus-border-color: #93c0f1;--bs-accordion-btn-focus-box-shadow: 0 0 0 0.25rem rgba(39, 128, 227, 0.25);--bs-accordion-body-padding-x: 1.25rem;--bs-accordion-body-padding-y: 1rem;--bs-accordion-active-color: #10335b;--bs-accordion-active-bg: #d4e6f9}.accordion-button{position:relative;display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;width:100%;padding:var(--bs-accordion-btn-padding-y) var(--bs-accordion-btn-padding-x);font-size:1rem;color:var(--bs-accordion-btn-color);text-align:left;background-color:var(--bs-accordion-btn-bg);border:0;overflow-anchor:none;transition:var(--bs-accordion-transition)}@media(prefers-reduced-motion: reduce){.accordion-button{transition:none}}.accordion-button:not(.collapsed){color:var(--bs-accordion-active-color);background-color:var(--bs-accordion-active-bg);box-shadow:inset 0 calc(-1*var(--bs-accordion-border-width)) 0 var(--bs-accordion-border-color)}.accordion-button:not(.collapsed)::after{background-image:var(--bs-accordion-btn-active-icon);transform:var(--bs-accordion-btn-icon-transform)}.accordion-button::after{flex-shrink:0;-webkit-flex-shrink:0;width:var(--bs-accordion-btn-icon-width);height:var(--bs-accordion-btn-icon-width);margin-left:auto;content:"";background-image:var(--bs-accordion-btn-icon);background-repeat:no-repeat;background-size:var(--bs-accordion-btn-icon-width);transition:var(--bs-accordion-btn-icon-transition)}@media(prefers-reduced-motion: reduce){.accordion-button::after{transition:none}}.accordion-button:hover{z-index:2}.accordion-button:focus{z-index:3;border-color:var(--bs-accordion-btn-focus-border-color);outline:0;box-shadow:var(--bs-accordion-btn-focus-box-shadow)}.accordion-header{margin-bottom:0}.accordion-item{color:var(--bs-accordion-color);background-color:var(--bs-accordion-bg);border:var(--bs-accordion-border-width) solid var(--bs-accordion-border-color)}.accordion-item:not(:first-of-type){border-top:0}.accordion-body{padding:var(--bs-accordion-body-padding-y) var(--bs-accordion-body-padding-x)}.accordion-flush .accordion-collapse{border-width:0}.accordion-flush .accordion-item{border-right:0;border-left:0}.accordion-flush .accordion-item:first-child{border-top:0}.accordion-flush .accordion-item:last-child{border-bottom:0}[data-bs-theme=dark] .accordion-button::after{--bs-accordion-btn-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%237db3ee'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");--bs-accordion-btn-active-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%237db3ee'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e")}.breadcrumb{--bs-breadcrumb-padding-x: 0;--bs-breadcrumb-padding-y: 0;--bs-breadcrumb-margin-bottom: 1rem;--bs-breadcrumb-bg: ;--bs-breadcrumb-border-radius: ;--bs-breadcrumb-divider-color: rgba(52, 58, 64, 0.75);--bs-breadcrumb-item-padding-x: 0.5rem;--bs-breadcrumb-item-active-color: rgba(52, 58, 64, 0.75);display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;padding:var(--bs-breadcrumb-padding-y) var(--bs-breadcrumb-padding-x);margin-bottom:var(--bs-breadcrumb-margin-bottom);font-size:var(--bs-breadcrumb-font-size);list-style:none;background-color:var(--bs-breadcrumb-bg)}.breadcrumb-item+.breadcrumb-item{padding-left:var(--bs-breadcrumb-item-padding-x)}.breadcrumb-item+.breadcrumb-item::before{float:left;padding-right:var(--bs-breadcrumb-item-padding-x);color:var(--bs-breadcrumb-divider-color);content:var(--bs-breadcrumb-divider, ">") /* rtl: var(--bs-breadcrumb-divider, ">") */}.breadcrumb-item.active{color:var(--bs-breadcrumb-item-active-color)}.pagination{--bs-pagination-padding-x: 0.75rem;--bs-pagination-padding-y: 0.375rem;--bs-pagination-font-size:1rem;--bs-pagination-color: #2761e3;--bs-pagination-bg: #FEFBF2;--bs-pagination-border-width: 1px;--bs-pagination-border-color: #dee2e6;--bs-pagination-border-radius: 0.25rem;--bs-pagination-hover-color: #1f4eb6;--bs-pagination-hover-bg: #f8f9fa;--bs-pagination-hover-border-color: #dee2e6;--bs-pagination-focus-color: #1f4eb6;--bs-pagination-focus-bg: #e9ecef;--bs-pagination-focus-box-shadow: 0 0 0 0.25rem rgba(39, 128, 227, 0.25);--bs-pagination-active-color: #fff;--bs-pagination-active-bg: #2780e3;--bs-pagination-active-border-color: #2780e3;--bs-pagination-disabled-color: rgba(52, 58, 64, 0.75);--bs-pagination-disabled-bg: #e9ecef;--bs-pagination-disabled-border-color: #dee2e6;display:flex;display:-webkit-flex;padding-left:0;list-style:none}.page-link{position:relative;display:block;padding:var(--bs-pagination-padding-y) var(--bs-pagination-padding-x);font-size:var(--bs-pagination-font-size);color:var(--bs-pagination-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background-color:var(--bs-pagination-bg);border:var(--bs-pagination-border-width) solid var(--bs-pagination-border-color);transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.page-link{transition:none}}.page-link:hover{z-index:2;color:var(--bs-pagination-hover-color);background-color:var(--bs-pagination-hover-bg);border-color:var(--bs-pagination-hover-border-color)}.page-link:focus{z-index:3;color:var(--bs-pagination-focus-color);background-color:var(--bs-pagination-focus-bg);outline:0;box-shadow:var(--bs-pagination-focus-box-shadow)}.page-link.active,.active>.page-link{z-index:3;color:var(--bs-pagination-active-color);background-color:var(--bs-pagination-active-bg);border-color:var(--bs-pagination-active-border-color)}.page-link.disabled,.disabled>.page-link{color:var(--bs-pagination-disabled-color);pointer-events:none;background-color:var(--bs-pagination-disabled-bg);border-color:var(--bs-pagination-disabled-border-color)}.page-item:not(:first-child) .page-link{margin-left:calc(1px*-1)}.pagination-lg{--bs-pagination-padding-x: 1.5rem;--bs-pagination-padding-y: 0.75rem;--bs-pagination-font-size:1.25rem;--bs-pagination-border-radius: 0.5rem}.pagination-sm{--bs-pagination-padding-x: 0.5rem;--bs-pagination-padding-y: 0.25rem;--bs-pagination-font-size:0.875rem;--bs-pagination-border-radius: 0.2em}.badge{--bs-badge-padding-x: 0.65em;--bs-badge-padding-y: 0.35em;--bs-badge-font-size:0.75em;--bs-badge-font-weight: 700;--bs-badge-color: #fff;--bs-badge-border-radius: 0.25rem;display:inline-block;padding:var(--bs-badge-padding-y) var(--bs-badge-padding-x);font-size:var(--bs-badge-font-size);font-weight:var(--bs-badge-font-weight);line-height:1;color:var(--bs-badge-color);text-align:center;white-space:nowrap;vertical-align:baseline}.badge:empty{display:none}.btn .badge{position:relative;top:-1px}.alert{--bs-alert-bg: transparent;--bs-alert-padding-x: 1rem;--bs-alert-padding-y: 1rem;--bs-alert-margin-bottom: 1rem;--bs-alert-color: inherit;--bs-alert-border-color: transparent;--bs-alert-border: 0 solid var(--bs-alert-border-color);--bs-alert-border-radius: 0.25rem;--bs-alert-link-color: inherit;position:relative;padding:var(--bs-alert-padding-y) var(--bs-alert-padding-x);margin-bottom:var(--bs-alert-margin-bottom);color:var(--bs-alert-color);background-color:var(--bs-alert-bg);border:var(--bs-alert-border)}.alert-heading{color:inherit}.alert-link{font-weight:700;color:var(--bs-alert-link-color)}.alert-dismissible{padding-right:3rem}.alert-dismissible .btn-close{position:absolute;top:0;right:0;z-index:2;padding:1.25rem 1rem}.alert-default{--bs-alert-color: var(--bs-default-text-emphasis);--bs-alert-bg: var(--bs-default-bg-subtle);--bs-alert-border-color: var(--bs-default-border-subtle);--bs-alert-link-color: var(--bs-default-text-emphasis)}.alert-primary{--bs-alert-color: var(--bs-primary-text-emphasis);--bs-alert-bg: var(--bs-primary-bg-subtle);--bs-alert-border-color: var(--bs-primary-border-subtle);--bs-alert-link-color: var(--bs-primary-text-emphasis)}.alert-secondary{--bs-alert-color: var(--bs-secondary-text-emphasis);--bs-alert-bg: var(--bs-secondary-bg-subtle);--bs-alert-border-color: var(--bs-secondary-border-subtle);--bs-alert-link-color: var(--bs-secondary-text-emphasis)}.alert-success{--bs-alert-color: var(--bs-success-text-emphasis);--bs-alert-bg: var(--bs-success-bg-subtle);--bs-alert-border-color: var(--bs-success-border-subtle);--bs-alert-link-color: var(--bs-success-text-emphasis)}.alert-info{--bs-alert-color: var(--bs-info-text-emphasis);--bs-alert-bg: var(--bs-info-bg-subtle);--bs-alert-border-color: var(--bs-info-border-subtle);--bs-alert-link-color: var(--bs-info-text-emphasis)}.alert-warning{--bs-alert-color: var(--bs-warning-text-emphasis);--bs-alert-bg: var(--bs-warning-bg-subtle);--bs-alert-border-color: var(--bs-warning-border-subtle);--bs-alert-link-color: var(--bs-warning-text-emphasis)}.alert-danger{--bs-alert-color: var(--bs-danger-text-emphasis);--bs-alert-bg: var(--bs-danger-bg-subtle);--bs-alert-border-color: var(--bs-danger-border-subtle);--bs-alert-link-color: var(--bs-danger-text-emphasis)}.alert-light{--bs-alert-color: var(--bs-light-text-emphasis);--bs-alert-bg: var(--bs-light-bg-subtle);--bs-alert-border-color: var(--bs-light-border-subtle);--bs-alert-link-color: var(--bs-light-text-emphasis)}.alert-dark{--bs-alert-color: var(--bs-dark-text-emphasis);--bs-alert-bg: var(--bs-dark-bg-subtle);--bs-alert-border-color: var(--bs-dark-border-subtle);--bs-alert-link-color: var(--bs-dark-text-emphasis)}@keyframes progress-bar-stripes{0%{background-position-x:.5rem}}.progress,.progress-stacked{--bs-progress-height: 0.5rem;--bs-progress-font-size:0.75rem;--bs-progress-bg: #e9ecef;--bs-progress-border-radius: 0.25rem;--bs-progress-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.075);--bs-progress-bar-color: #fff;--bs-progress-bar-bg: #2780e3;--bs-progress-bar-transition: width 0.6s ease;display:flex;display:-webkit-flex;height:var(--bs-progress-height);overflow:hidden;font-size:var(--bs-progress-font-size);background-color:var(--bs-progress-bg)}.progress-bar{display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;justify-content:center;-webkit-justify-content:center;overflow:hidden;color:var(--bs-progress-bar-color);text-align:center;white-space:nowrap;background-color:var(--bs-progress-bar-bg);transition:var(--bs-progress-bar-transition)}@media(prefers-reduced-motion: reduce){.progress-bar{transition:none}}.progress-bar-striped{background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-size:var(--bs-progress-height) var(--bs-progress-height)}.progress-stacked>.progress{overflow:visible}.progress-stacked>.progress>.progress-bar{width:100%}.progress-bar-animated{animation:1s linear infinite progress-bar-stripes}@media(prefers-reduced-motion: reduce){.progress-bar-animated{animation:none}}.list-group{--bs-list-group-color: #343a40;--bs-list-group-bg: #FEFBF2;--bs-list-group-border-color: #dee2e6;--bs-list-group-border-width: 1px;--bs-list-group-border-radius: 0.25rem;--bs-list-group-item-padding-x: 1rem;--bs-list-group-item-padding-y: 0.5rem;--bs-list-group-action-color: rgba(52, 58, 64, 0.75);--bs-list-group-action-hover-color: #000;--bs-list-group-action-hover-bg: #f8f9fa;--bs-list-group-action-active-color: #343a40;--bs-list-group-action-active-bg: #e9ecef;--bs-list-group-disabled-color: rgba(52, 58, 64, 0.75);--bs-list-group-disabled-bg: #FEFBF2;--bs-list-group-active-color: #fff;--bs-list-group-active-bg: #2780e3;--bs-list-group-active-border-color: #2780e3;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;padding-left:0;margin-bottom:0}.list-group-numbered{list-style-type:none;counter-reset:section}.list-group-numbered>.list-group-item::before{content:counters(section, ".") ". ";counter-increment:section}.list-group-item-action{width:100%;color:var(--bs-list-group-action-color);text-align:inherit}.list-group-item-action:hover,.list-group-item-action:focus{z-index:1;color:var(--bs-list-group-action-hover-color);text-decoration:none;background-color:var(--bs-list-group-action-hover-bg)}.list-group-item-action:active{color:var(--bs-list-group-action-active-color);background-color:var(--bs-list-group-action-active-bg)}.list-group-item{position:relative;display:block;padding:var(--bs-list-group-item-padding-y) var(--bs-list-group-item-padding-x);color:var(--bs-list-group-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background-color:var(--bs-list-group-bg);border:var(--bs-list-group-border-width) solid var(--bs-list-group-border-color)}.list-group-item.disabled,.list-group-item:disabled{color:var(--bs-list-group-disabled-color);pointer-events:none;background-color:var(--bs-list-group-disabled-bg)}.list-group-item.active{z-index:2;color:var(--bs-list-group-active-color);background-color:var(--bs-list-group-active-bg);border-color:var(--bs-list-group-active-border-color)}.list-group-item+.list-group-item{border-top-width:0}.list-group-item+.list-group-item.active{margin-top:calc(-1*var(--bs-list-group-border-width));border-top-width:var(--bs-list-group-border-width)}.list-group-horizontal{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal>.list-group-item.active{margin-top:0}.list-group-horizontal>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}@media(min-width: 576px){.list-group-horizontal-sm{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-sm>.list-group-item.active{margin-top:0}.list-group-horizontal-sm>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-sm>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 768px){.list-group-horizontal-md{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-md>.list-group-item.active{margin-top:0}.list-group-horizontal-md>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-md>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 992px){.list-group-horizontal-lg{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-lg>.list-group-item.active{margin-top:0}.list-group-horizontal-lg>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-lg>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 1200px){.list-group-horizontal-xl{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-xl>.list-group-item.active{margin-top:0}.list-group-horizontal-xl>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-xl>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 1400px){.list-group-horizontal-xxl{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-xxl>.list-group-item.active{margin-top:0}.list-group-horizontal-xxl>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-xxl>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}.list-group-flush>.list-group-item{border-width:0 0 var(--bs-list-group-border-width)}.list-group-flush>.list-group-item:last-child{border-bottom-width:0}.list-group-item-default{--bs-list-group-color: var(--bs-default-text-emphasis);--bs-list-group-bg: var(--bs-default-bg-subtle);--bs-list-group-border-color: var(--bs-default-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-default-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-default-border-subtle);--bs-list-group-active-color: var(--bs-default-bg-subtle);--bs-list-group-active-bg: var(--bs-default-text-emphasis);--bs-list-group-active-border-color: var(--bs-default-text-emphasis)}.list-group-item-primary{--bs-list-group-color: var(--bs-primary-text-emphasis);--bs-list-group-bg: var(--bs-primary-bg-subtle);--bs-list-group-border-color: var(--bs-primary-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-primary-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-primary-border-subtle);--bs-list-group-active-color: var(--bs-primary-bg-subtle);--bs-list-group-active-bg: var(--bs-primary-text-emphasis);--bs-list-group-active-border-color: var(--bs-primary-text-emphasis)}.list-group-item-secondary{--bs-list-group-color: var(--bs-secondary-text-emphasis);--bs-list-group-bg: var(--bs-secondary-bg-subtle);--bs-list-group-border-color: var(--bs-secondary-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-secondary-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-secondary-border-subtle);--bs-list-group-active-color: var(--bs-secondary-bg-subtle);--bs-list-group-active-bg: var(--bs-secondary-text-emphasis);--bs-list-group-active-border-color: var(--bs-secondary-text-emphasis)}.list-group-item-success{--bs-list-group-color: var(--bs-success-text-emphasis);--bs-list-group-bg: var(--bs-success-bg-subtle);--bs-list-group-border-color: var(--bs-success-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-success-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-success-border-subtle);--bs-list-group-active-color: var(--bs-success-bg-subtle);--bs-list-group-active-bg: var(--bs-success-text-emphasis);--bs-list-group-active-border-color: var(--bs-success-text-emphasis)}.list-group-item-info{--bs-list-group-color: var(--bs-info-text-emphasis);--bs-list-group-bg: var(--bs-info-bg-subtle);--bs-list-group-border-color: var(--bs-info-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-info-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-info-border-subtle);--bs-list-group-active-color: var(--bs-info-bg-subtle);--bs-list-group-active-bg: var(--bs-info-text-emphasis);--bs-list-group-active-border-color: var(--bs-info-text-emphasis)}.list-group-item-warning{--bs-list-group-color: var(--bs-warning-text-emphasis);--bs-list-group-bg: var(--bs-warning-bg-subtle);--bs-list-group-border-color: var(--bs-warning-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-warning-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-warning-border-subtle);--bs-list-group-active-color: var(--bs-warning-bg-subtle);--bs-list-group-active-bg: var(--bs-warning-text-emphasis);--bs-list-group-active-border-color: var(--bs-warning-text-emphasis)}.list-group-item-danger{--bs-list-group-color: var(--bs-danger-text-emphasis);--bs-list-group-bg: var(--bs-danger-bg-subtle);--bs-list-group-border-color: var(--bs-danger-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-danger-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-danger-border-subtle);--bs-list-group-active-color: var(--bs-danger-bg-subtle);--bs-list-group-active-bg: var(--bs-danger-text-emphasis);--bs-list-group-active-border-color: var(--bs-danger-text-emphasis)}.list-group-item-light{--bs-list-group-color: var(--bs-light-text-emphasis);--bs-list-group-bg: var(--bs-light-bg-subtle);--bs-list-group-border-color: var(--bs-light-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-light-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-light-border-subtle);--bs-list-group-active-color: var(--bs-light-bg-subtle);--bs-list-group-active-bg: var(--bs-light-text-emphasis);--bs-list-group-active-border-color: var(--bs-light-text-emphasis)}.list-group-item-dark{--bs-list-group-color: var(--bs-dark-text-emphasis);--bs-list-group-bg: var(--bs-dark-bg-subtle);--bs-list-group-border-color: var(--bs-dark-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-dark-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-dark-border-subtle);--bs-list-group-active-color: var(--bs-dark-bg-subtle);--bs-list-group-active-bg: var(--bs-dark-text-emphasis);--bs-list-group-active-border-color: var(--bs-dark-text-emphasis)}.btn-close{--bs-btn-close-color: #000;--bs-btn-close-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23000'%3e%3cpath d='M.293.293a1 1 0 0 1 1.414 0L8 6.586 14.293.293a1 1 0 1 1 1.414 1.414L9.414 8l6.293 6.293a1 1 0 0 1-1.414 1.414L8 9.414l-6.293 6.293a1 1 0 0 1-1.414-1.414L6.586 8 .293 1.707a1 1 0 0 1 0-1.414z'/%3e%3c/svg%3e");--bs-btn-close-opacity: 0.5;--bs-btn-close-hover-opacity: 0.75;--bs-btn-close-focus-shadow: 0 0 0 0.25rem rgba(39, 128, 227, 0.25);--bs-btn-close-focus-opacity: 1;--bs-btn-close-disabled-opacity: 0.25;--bs-btn-close-white-filter: invert(1) grayscale(100%) brightness(200%);box-sizing:content-box;width:1em;height:1em;padding:.25em .25em;color:var(--bs-btn-close-color);background:rgba(0,0,0,0) var(--bs-btn-close-bg) center/1em auto no-repeat;border:0;opacity:var(--bs-btn-close-opacity)}.btn-close:hover{color:var(--bs-btn-close-color);text-decoration:none;opacity:var(--bs-btn-close-hover-opacity)}.btn-close:focus{outline:0;box-shadow:var(--bs-btn-close-focus-shadow);opacity:var(--bs-btn-close-focus-opacity)}.btn-close:disabled,.btn-close.disabled{pointer-events:none;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;opacity:var(--bs-btn-close-disabled-opacity)}.btn-close-white{filter:var(--bs-btn-close-white-filter)}[data-bs-theme=dark] .btn-close{filter:var(--bs-btn-close-white-filter)}.toast{--bs-toast-zindex: 1090;--bs-toast-padding-x: 0.75rem;--bs-toast-padding-y: 0.5rem;--bs-toast-spacing: 1.5rem;--bs-toast-max-width: 350px;--bs-toast-font-size:0.875rem;--bs-toast-color: ;--bs-toast-bg: rgba(254, 251, 242, 0.85);--bs-toast-border-width: 1px;--bs-toast-border-color: rgba(0, 0, 0, 0.175);--bs-toast-border-radius: 0.25rem;--bs-toast-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-toast-header-color: rgba(52, 58, 64, 0.75);--bs-toast-header-bg: rgba(254, 251, 242, 0.85);--bs-toast-header-border-color: rgba(0, 0, 0, 0.175);width:var(--bs-toast-max-width);max-width:100%;font-size:var(--bs-toast-font-size);color:var(--bs-toast-color);pointer-events:auto;background-color:var(--bs-toast-bg);background-clip:padding-box;border:var(--bs-toast-border-width) solid var(--bs-toast-border-color);box-shadow:var(--bs-toast-box-shadow)}.toast.showing{opacity:0}.toast:not(.show){display:none}.toast-container{--bs-toast-zindex: 1090;position:absolute;z-index:var(--bs-toast-zindex);width:max-content;width:-webkit-max-content;width:-moz-max-content;width:-ms-max-content;width:-o-max-content;max-width:100%;pointer-events:none}.toast-container>:not(:last-child){margin-bottom:var(--bs-toast-spacing)}.toast-header{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;padding:var(--bs-toast-padding-y) var(--bs-toast-padding-x);color:var(--bs-toast-header-color);background-color:var(--bs-toast-header-bg);background-clip:padding-box;border-bottom:var(--bs-toast-border-width) solid var(--bs-toast-header-border-color)}.toast-header .btn-close{margin-right:calc(-0.5*var(--bs-toast-padding-x));margin-left:var(--bs-toast-padding-x)}.toast-body{padding:var(--bs-toast-padding-x);word-wrap:break-word}.modal{--bs-modal-zindex: 1055;--bs-modal-width: 500px;--bs-modal-padding: 1rem;--bs-modal-margin: 0.5rem;--bs-modal-color: ;--bs-modal-bg: #FEFBF2;--bs-modal-border-color: rgba(0, 0, 0, 0.175);--bs-modal-border-width: 1px;--bs-modal-border-radius: 0.5rem;--bs-modal-box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);--bs-modal-inner-border-radius: calc(0.5rem - 1px);--bs-modal-header-padding-x: 1rem;--bs-modal-header-padding-y: 1rem;--bs-modal-header-padding: 1rem 1rem;--bs-modal-header-border-color: #dee2e6;--bs-modal-header-border-width: 1px;--bs-modal-title-line-height: 1.5;--bs-modal-footer-gap: 0.5rem;--bs-modal-footer-bg: ;--bs-modal-footer-border-color: #dee2e6;--bs-modal-footer-border-width: 1px;position:fixed;top:0;left:0;z-index:var(--bs-modal-zindex);display:none;width:100%;height:100%;overflow-x:hidden;overflow-y:auto;outline:0}.modal-dialog{position:relative;width:auto;margin:var(--bs-modal-margin);pointer-events:none}.modal.fade .modal-dialog{transition:transform .3s ease-out;transform:translate(0, -50px)}@media(prefers-reduced-motion: reduce){.modal.fade .modal-dialog{transition:none}}.modal.show .modal-dialog{transform:none}.modal.modal-static .modal-dialog{transform:scale(1.02)}.modal-dialog-scrollable{height:calc(100% - var(--bs-modal-margin)*2)}.modal-dialog-scrollable .modal-content{max-height:100%;overflow:hidden}.modal-dialog-scrollable .modal-body{overflow-y:auto}.modal-dialog-centered{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;min-height:calc(100% - var(--bs-modal-margin)*2)}.modal-content{position:relative;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;width:100%;color:var(--bs-modal-color);pointer-events:auto;background-color:var(--bs-modal-bg);background-clip:padding-box;border:var(--bs-modal-border-width) solid var(--bs-modal-border-color);outline:0}.modal-backdrop{--bs-backdrop-zindex: 1050;--bs-backdrop-bg: #000;--bs-backdrop-opacity: 0.5;position:fixed;top:0;left:0;z-index:var(--bs-backdrop-zindex);width:100vw;height:100vh;background-color:var(--bs-backdrop-bg)}.modal-backdrop.fade{opacity:0}.modal-backdrop.show{opacity:var(--bs-backdrop-opacity)}.modal-header{display:flex;display:-webkit-flex;flex-shrink:0;-webkit-flex-shrink:0;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:var(--bs-modal-header-padding);border-bottom:var(--bs-modal-header-border-width) solid var(--bs-modal-header-border-color)}.modal-header .btn-close{padding:calc(var(--bs-modal-header-padding-y)*.5) calc(var(--bs-modal-header-padding-x)*.5);margin:calc(-0.5*var(--bs-modal-header-padding-y)) calc(-0.5*var(--bs-modal-header-padding-x)) calc(-0.5*var(--bs-modal-header-padding-y)) auto}.modal-title{margin-bottom:0;line-height:var(--bs-modal-title-line-height)}.modal-body{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto;padding:var(--bs-modal-padding)}.modal-footer{display:flex;display:-webkit-flex;flex-shrink:0;-webkit-flex-shrink:0;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:center;-webkit-align-items:center;justify-content:flex-end;-webkit-justify-content:flex-end;padding:calc(var(--bs-modal-padding) - var(--bs-modal-footer-gap)*.5);background-color:var(--bs-modal-footer-bg);border-top:var(--bs-modal-footer-border-width) solid var(--bs-modal-footer-border-color)}.modal-footer>*{margin:calc(var(--bs-modal-footer-gap)*.5)}@media(min-width: 576px){.modal{--bs-modal-margin: 1.75rem;--bs-modal-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15)}.modal-dialog{max-width:var(--bs-modal-width);margin-right:auto;margin-left:auto}.modal-sm{--bs-modal-width: 300px}}@media(min-width: 992px){.modal-lg,.modal-xl{--bs-modal-width: 800px}}@media(min-width: 1200px){.modal-xl{--bs-modal-width: 1140px}}.modal-fullscreen{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen .modal-content{height:100%;border:0}.modal-fullscreen .modal-body{overflow-y:auto}@media(max-width: 575.98px){.modal-fullscreen-sm-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-sm-down .modal-content{height:100%;border:0}.modal-fullscreen-sm-down .modal-body{overflow-y:auto}}@media(max-width: 767.98px){.modal-fullscreen-md-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-md-down .modal-content{height:100%;border:0}.modal-fullscreen-md-down .modal-body{overflow-y:auto}}@media(max-width: 991.98px){.modal-fullscreen-lg-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-lg-down .modal-content{height:100%;border:0}.modal-fullscreen-lg-down .modal-body{overflow-y:auto}}@media(max-width: 1199.98px){.modal-fullscreen-xl-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-xl-down .modal-content{height:100%;border:0}.modal-fullscreen-xl-down .modal-body{overflow-y:auto}}@media(max-width: 1399.98px){.modal-fullscreen-xxl-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-xxl-down .modal-content{height:100%;border:0}.modal-fullscreen-xxl-down .modal-body{overflow-y:auto}}.tooltip{--bs-tooltip-zindex: 1080;--bs-tooltip-max-width: 200px;--bs-tooltip-padding-x: 0.5rem;--bs-tooltip-padding-y: 0.25rem;--bs-tooltip-margin: ;--bs-tooltip-font-size:0.875rem;--bs-tooltip-color: #FEFBF2;--bs-tooltip-bg: #000;--bs-tooltip-border-radius: 0.25rem;--bs-tooltip-opacity: 0.9;--bs-tooltip-arrow-width: 0.8rem;--bs-tooltip-arrow-height: 0.4rem;z-index:var(--bs-tooltip-zindex);display:block;margin:var(--bs-tooltip-margin);font-family:"Source Sans Pro",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";font-style:normal;font-weight:400;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;white-space:normal;word-spacing:normal;line-break:auto;font-size:var(--bs-tooltip-font-size);word-wrap:break-word;opacity:0}.tooltip.show{opacity:var(--bs-tooltip-opacity)}.tooltip .tooltip-arrow{display:block;width:var(--bs-tooltip-arrow-width);height:var(--bs-tooltip-arrow-height)}.tooltip .tooltip-arrow::before{position:absolute;content:"";border-color:rgba(0,0,0,0);border-style:solid}.bs-tooltip-top .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=top] .tooltip-arrow{bottom:calc(-1*var(--bs-tooltip-arrow-height))}.bs-tooltip-top .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=top] .tooltip-arrow::before{top:-1px;border-width:var(--bs-tooltip-arrow-height) calc(var(--bs-tooltip-arrow-width)*.5) 0;border-top-color:var(--bs-tooltip-bg)}.bs-tooltip-end .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=right] .tooltip-arrow{left:calc(-1*var(--bs-tooltip-arrow-height));width:var(--bs-tooltip-arrow-height);height:var(--bs-tooltip-arrow-width)}.bs-tooltip-end .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=right] .tooltip-arrow::before{right:-1px;border-width:calc(var(--bs-tooltip-arrow-width)*.5) var(--bs-tooltip-arrow-height) calc(var(--bs-tooltip-arrow-width)*.5) 0;border-right-color:var(--bs-tooltip-bg)}.bs-tooltip-bottom .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=bottom] .tooltip-arrow{top:calc(-1*var(--bs-tooltip-arrow-height))}.bs-tooltip-bottom .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=bottom] .tooltip-arrow::before{bottom:-1px;border-width:0 calc(var(--bs-tooltip-arrow-width)*.5) var(--bs-tooltip-arrow-height);border-bottom-color:var(--bs-tooltip-bg)}.bs-tooltip-start .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=left] .tooltip-arrow{right:calc(-1*var(--bs-tooltip-arrow-height));width:var(--bs-tooltip-arrow-height);height:var(--bs-tooltip-arrow-width)}.bs-tooltip-start .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=left] .tooltip-arrow::before{left:-1px;border-width:calc(var(--bs-tooltip-arrow-width)*.5) 0 calc(var(--bs-tooltip-arrow-width)*.5) var(--bs-tooltip-arrow-height);border-left-color:var(--bs-tooltip-bg)}.tooltip-inner{max-width:var(--bs-tooltip-max-width);padding:var(--bs-tooltip-padding-y) var(--bs-tooltip-padding-x);color:var(--bs-tooltip-color);text-align:center;background-color:var(--bs-tooltip-bg)}.popover{--bs-popover-zindex: 1070;--bs-popover-max-width: 276px;--bs-popover-font-size:0.875rem;--bs-popover-bg: #FEFBF2;--bs-popover-border-width: 1px;--bs-popover-border-color: rgba(0, 0, 0, 0.175);--bs-popover-border-radius: 0.5rem;--bs-popover-inner-border-radius: calc(0.5rem - 1px);--bs-popover-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-popover-header-padding-x: 1rem;--bs-popover-header-padding-y: 0.5rem;--bs-popover-header-font-size:1rem;--bs-popover-header-color: inherit;--bs-popover-header-bg: #e9ecef;--bs-popover-body-padding-x: 1rem;--bs-popover-body-padding-y: 1rem;--bs-popover-body-color: #343a40;--bs-popover-arrow-width: 1rem;--bs-popover-arrow-height: 0.5rem;--bs-popover-arrow-border: var(--bs-popover-border-color);z-index:var(--bs-popover-zindex);display:block;max-width:var(--bs-popover-max-width);font-family:"Source Sans Pro",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";font-style:normal;font-weight:400;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;white-space:normal;word-spacing:normal;line-break:auto;font-size:var(--bs-popover-font-size);word-wrap:break-word;background-color:var(--bs-popover-bg);background-clip:padding-box;border:var(--bs-popover-border-width) solid var(--bs-popover-border-color)}.popover .popover-arrow{display:block;width:var(--bs-popover-arrow-width);height:var(--bs-popover-arrow-height)}.popover .popover-arrow::before,.popover .popover-arrow::after{position:absolute;display:block;content:"";border-color:rgba(0,0,0,0);border-style:solid;border-width:0}.bs-popover-top>.popover-arrow,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow{bottom:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width))}.bs-popover-top>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::before,.bs-popover-top>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::after{border-width:var(--bs-popover-arrow-height) calc(var(--bs-popover-arrow-width)*.5) 0}.bs-popover-top>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::before{bottom:0;border-top-color:var(--bs-popover-arrow-border)}.bs-popover-top>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::after{bottom:var(--bs-popover-border-width);border-top-color:var(--bs-popover-bg)}.bs-popover-end>.popover-arrow,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow{left:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width));width:var(--bs-popover-arrow-height);height:var(--bs-popover-arrow-width)}.bs-popover-end>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::before,.bs-popover-end>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::after{border-width:calc(var(--bs-popover-arrow-width)*.5) var(--bs-popover-arrow-height) calc(var(--bs-popover-arrow-width)*.5) 0}.bs-popover-end>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::before{left:0;border-right-color:var(--bs-popover-arrow-border)}.bs-popover-end>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::after{left:var(--bs-popover-border-width);border-right-color:var(--bs-popover-bg)}.bs-popover-bottom>.popover-arrow,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow{top:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width))}.bs-popover-bottom>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::before,.bs-popover-bottom>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::after{border-width:0 calc(var(--bs-popover-arrow-width)*.5) var(--bs-popover-arrow-height)}.bs-popover-bottom>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::before{top:0;border-bottom-color:var(--bs-popover-arrow-border)}.bs-popover-bottom>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::after{top:var(--bs-popover-border-width);border-bottom-color:var(--bs-popover-bg)}.bs-popover-bottom .popover-header::before,.bs-popover-auto[data-popper-placement^=bottom] .popover-header::before{position:absolute;top:0;left:50%;display:block;width:var(--bs-popover-arrow-width);margin-left:calc(-0.5*var(--bs-popover-arrow-width));content:"";border-bottom:var(--bs-popover-border-width) solid var(--bs-popover-header-bg)}.bs-popover-start>.popover-arrow,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow{right:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width));width:var(--bs-popover-arrow-height);height:var(--bs-popover-arrow-width)}.bs-popover-start>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::before,.bs-popover-start>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::after{border-width:calc(var(--bs-popover-arrow-width)*.5) 0 calc(var(--bs-popover-arrow-width)*.5) var(--bs-popover-arrow-height)}.bs-popover-start>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::before{right:0;border-left-color:var(--bs-popover-arrow-border)}.bs-popover-start>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::after{right:var(--bs-popover-border-width);border-left-color:var(--bs-popover-bg)}.popover-header{padding:var(--bs-popover-header-padding-y) var(--bs-popover-header-padding-x);margin-bottom:0;font-size:var(--bs-popover-header-font-size);color:var(--bs-popover-header-color);background-color:var(--bs-popover-header-bg);border-bottom:var(--bs-popover-border-width) solid var(--bs-popover-border-color)}.popover-header:empty{display:none}.popover-body{padding:var(--bs-popover-body-padding-y) var(--bs-popover-body-padding-x);color:var(--bs-popover-body-color)}.carousel{position:relative}.carousel.pointer-event{touch-action:pan-y;-webkit-touch-action:pan-y;-moz-touch-action:pan-y;-ms-touch-action:pan-y;-o-touch-action:pan-y}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner::after{display:block;clear:both;content:""}.carousel-item{position:relative;display:none;float:left;width:100%;margin-right:-100%;backface-visibility:hidden;-webkit-backface-visibility:hidden;-moz-backface-visibility:hidden;-ms-backface-visibility:hidden;-o-backface-visibility:hidden;transition:transform .6s ease-in-out}@media(prefers-reduced-motion: reduce){.carousel-item{transition:none}}.carousel-item.active,.carousel-item-next,.carousel-item-prev{display:block}.carousel-item-next:not(.carousel-item-start),.active.carousel-item-end{transform:translateX(100%)}.carousel-item-prev:not(.carousel-item-end),.active.carousel-item-start{transform:translateX(-100%)}.carousel-fade .carousel-item{opacity:0;transition-property:opacity;transform:none}.carousel-fade .carousel-item.active,.carousel-fade .carousel-item-next.carousel-item-start,.carousel-fade .carousel-item-prev.carousel-item-end{z-index:1;opacity:1}.carousel-fade .active.carousel-item-start,.carousel-fade .active.carousel-item-end{z-index:0;opacity:0;transition:opacity 0s .6s}@media(prefers-reduced-motion: reduce){.carousel-fade .active.carousel-item-start,.carousel-fade .active.carousel-item-end{transition:none}}.carousel-control-prev,.carousel-control-next{position:absolute;top:0;bottom:0;z-index:1;display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;justify-content:center;-webkit-justify-content:center;width:15%;padding:0;color:#fff;text-align:center;background:none;border:0;opacity:.5;transition:opacity .15s ease}@media(prefers-reduced-motion: reduce){.carousel-control-prev,.carousel-control-next{transition:none}}.carousel-control-prev:hover,.carousel-control-prev:focus,.carousel-control-next:hover,.carousel-control-next:focus{color:#fff;text-decoration:none;outline:0;opacity:.9}.carousel-control-prev{left:0}.carousel-control-next{right:0}.carousel-control-prev-icon,.carousel-control-next-icon{display:inline-block;width:2rem;height:2rem;background-repeat:no-repeat;background-position:50%;background-size:100% 100%}.carousel-control-prev-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23fff'%3e%3cpath d='M11.354 1.646a.5.5 0 0 1 0 .708L5.707 8l5.647 5.646a.5.5 0 0 1-.708.708l-6-6a.5.5 0 0 1 0-.708l6-6a.5.5 0 0 1 .708 0z'/%3e%3c/svg%3e")}.carousel-control-next-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23fff'%3e%3cpath d='M4.646 1.646a.5.5 0 0 1 .708 0l6 6a.5.5 0 0 1 0 .708l-6 6a.5.5 0 0 1-.708-.708L10.293 8 4.646 2.354a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e")}.carousel-indicators{position:absolute;right:0;bottom:0;left:0;z-index:2;display:flex;display:-webkit-flex;justify-content:center;-webkit-justify-content:center;padding:0;margin-right:15%;margin-bottom:1rem;margin-left:15%}.carousel-indicators [data-bs-target]{box-sizing:content-box;flex:0 1 auto;-webkit-flex:0 1 auto;width:30px;height:3px;padding:0;margin-right:3px;margin-left:3px;text-indent:-999px;cursor:pointer;background-color:#fff;background-clip:padding-box;border:0;border-top:10px solid rgba(0,0,0,0);border-bottom:10px solid rgba(0,0,0,0);opacity:.5;transition:opacity .6s ease}@media(prefers-reduced-motion: reduce){.carousel-indicators [data-bs-target]{transition:none}}.carousel-indicators .active{opacity:1}.carousel-caption{position:absolute;right:15%;bottom:1.25rem;left:15%;padding-top:1.25rem;padding-bottom:1.25rem;color:#fff;text-align:center}.carousel-dark .carousel-control-prev-icon,.carousel-dark .carousel-control-next-icon{filter:invert(1) grayscale(100)}.carousel-dark .carousel-indicators [data-bs-target]{background-color:#000}.carousel-dark .carousel-caption{color:#000}[data-bs-theme=dark] .carousel .carousel-control-prev-icon,[data-bs-theme=dark] .carousel .carousel-control-next-icon,[data-bs-theme=dark].carousel .carousel-control-prev-icon,[data-bs-theme=dark].carousel .carousel-control-next-icon{filter:invert(1) grayscale(100)}[data-bs-theme=dark] .carousel .carousel-indicators [data-bs-target],[data-bs-theme=dark].carousel .carousel-indicators [data-bs-target]{background-color:#000}[data-bs-theme=dark] .carousel .carousel-caption,[data-bs-theme=dark].carousel .carousel-caption{color:#000}.spinner-grow,.spinner-border{display:inline-block;width:var(--bs-spinner-width);height:var(--bs-spinner-height);vertical-align:var(--bs-spinner-vertical-align);border-radius:50%;animation:var(--bs-spinner-animation-speed) linear infinite var(--bs-spinner-animation-name)}@keyframes spinner-border{to{transform:rotate(360deg) /* rtl:ignore */}}.spinner-border{--bs-spinner-width: 2rem;--bs-spinner-height: 2rem;--bs-spinner-vertical-align: -0.125em;--bs-spinner-border-width: 0.25em;--bs-spinner-animation-speed: 0.75s;--bs-spinner-animation-name: spinner-border;border:var(--bs-spinner-border-width) solid currentcolor;border-right-color:rgba(0,0,0,0)}.spinner-border-sm{--bs-spinner-width: 1rem;--bs-spinner-height: 1rem;--bs-spinner-border-width: 0.2em}@keyframes spinner-grow{0%{transform:scale(0)}50%{opacity:1;transform:none}}.spinner-grow{--bs-spinner-width: 2rem;--bs-spinner-height: 2rem;--bs-spinner-vertical-align: -0.125em;--bs-spinner-animation-speed: 0.75s;--bs-spinner-animation-name: spinner-grow;background-color:currentcolor;opacity:0}.spinner-grow-sm{--bs-spinner-width: 1rem;--bs-spinner-height: 1rem}@media(prefers-reduced-motion: reduce){.spinner-border,.spinner-grow{--bs-spinner-animation-speed: 1.5s}}.offcanvas,.offcanvas-xxl,.offcanvas-xl,.offcanvas-lg,.offcanvas-md,.offcanvas-sm{--bs-offcanvas-zindex: 1045;--bs-offcanvas-width: 400px;--bs-offcanvas-height: 30vh;--bs-offcanvas-padding-x: 1rem;--bs-offcanvas-padding-y: 1rem;--bs-offcanvas-color: #343a40;--bs-offcanvas-bg: #FEFBF2;--bs-offcanvas-border-width: 1px;--bs-offcanvas-border-color: rgba(0, 0, 0, 0.175);--bs-offcanvas-box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);--bs-offcanvas-transition: transform 0.3s ease-in-out;--bs-offcanvas-title-line-height: 1.5}@media(max-width: 575.98px){.offcanvas-sm{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 575.98px)and (prefers-reduced-motion: reduce){.offcanvas-sm{transition:none}}@media(max-width: 575.98px){.offcanvas-sm.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-sm.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-sm.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-sm.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-sm.showing,.offcanvas-sm.show:not(.hiding){transform:none}.offcanvas-sm.showing,.offcanvas-sm.hiding,.offcanvas-sm.show{visibility:visible}}@media(min-width: 576px){.offcanvas-sm{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-sm .offcanvas-header{display:none}.offcanvas-sm .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 767.98px){.offcanvas-md{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 767.98px)and (prefers-reduced-motion: reduce){.offcanvas-md{transition:none}}@media(max-width: 767.98px){.offcanvas-md.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-md.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-md.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-md.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-md.showing,.offcanvas-md.show:not(.hiding){transform:none}.offcanvas-md.showing,.offcanvas-md.hiding,.offcanvas-md.show{visibility:visible}}@media(min-width: 768px){.offcanvas-md{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-md .offcanvas-header{display:none}.offcanvas-md .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 991.98px){.offcanvas-lg{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 991.98px)and (prefers-reduced-motion: reduce){.offcanvas-lg{transition:none}}@media(max-width: 991.98px){.offcanvas-lg.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-lg.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-lg.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-lg.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-lg.showing,.offcanvas-lg.show:not(.hiding){transform:none}.offcanvas-lg.showing,.offcanvas-lg.hiding,.offcanvas-lg.show{visibility:visible}}@media(min-width: 992px){.offcanvas-lg{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-lg .offcanvas-header{display:none}.offcanvas-lg .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 1199.98px){.offcanvas-xl{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 1199.98px)and (prefers-reduced-motion: reduce){.offcanvas-xl{transition:none}}@media(max-width: 1199.98px){.offcanvas-xl.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-xl.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-xl.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-xl.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-xl.showing,.offcanvas-xl.show:not(.hiding){transform:none}.offcanvas-xl.showing,.offcanvas-xl.hiding,.offcanvas-xl.show{visibility:visible}}@media(min-width: 1200px){.offcanvas-xl{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-xl .offcanvas-header{display:none}.offcanvas-xl .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 1399.98px){.offcanvas-xxl{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 1399.98px)and (prefers-reduced-motion: reduce){.offcanvas-xxl{transition:none}}@media(max-width: 1399.98px){.offcanvas-xxl.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-xxl.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-xxl.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-xxl.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-xxl.showing,.offcanvas-xxl.show:not(.hiding){transform:none}.offcanvas-xxl.showing,.offcanvas-xxl.hiding,.offcanvas-xxl.show{visibility:visible}}@media(min-width: 1400px){.offcanvas-xxl{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-xxl .offcanvas-header{display:none}.offcanvas-xxl .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}.offcanvas{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}@media(prefers-reduced-motion: reduce){.offcanvas{transition:none}}.offcanvas.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas.showing,.offcanvas.show:not(.hiding){transform:none}.offcanvas.showing,.offcanvas.hiding,.offcanvas.show{visibility:visible}.offcanvas-backdrop{position:fixed;top:0;left:0;z-index:1040;width:100vw;height:100vh;background-color:#000}.offcanvas-backdrop.fade{opacity:0}.offcanvas-backdrop.show{opacity:.5}.offcanvas-header{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:var(--bs-offcanvas-padding-y) var(--bs-offcanvas-padding-x)}.offcanvas-header .btn-close{padding:calc(var(--bs-offcanvas-padding-y)*.5) calc(var(--bs-offcanvas-padding-x)*.5);margin-top:calc(-0.5*var(--bs-offcanvas-padding-y));margin-right:calc(-0.5*var(--bs-offcanvas-padding-x));margin-bottom:calc(-0.5*var(--bs-offcanvas-padding-y))}.offcanvas-title{margin-bottom:0;line-height:var(--bs-offcanvas-title-line-height)}.offcanvas-body{flex-grow:1;-webkit-flex-grow:1;padding:var(--bs-offcanvas-padding-y) var(--bs-offcanvas-padding-x);overflow-y:auto}.placeholder{display:inline-block;min-height:1em;vertical-align:middle;cursor:wait;background-color:currentcolor;opacity:.5}.placeholder.btn::before{display:inline-block;content:""}.placeholder-xs{min-height:.6em}.placeholder-sm{min-height:.8em}.placeholder-lg{min-height:1.2em}.placeholder-glow .placeholder{animation:placeholder-glow 2s ease-in-out infinite}@keyframes placeholder-glow{50%{opacity:.2}}.placeholder-wave{mask-image:linear-gradient(130deg, #000 55%, rgba(0, 0, 0, 0.8) 75%, #000 95%);-webkit-mask-image:linear-gradient(130deg, #000 55%, rgba(0, 0, 0, 0.8) 75%, #000 95%);mask-size:200% 100%;-webkit-mask-size:200% 100%;animation:placeholder-wave 2s linear infinite}@keyframes placeholder-wave{100%{mask-position:-200% 0%;-webkit-mask-position:-200% 0%}}.clearfix::after{display:block;clear:both;content:""}.text-bg-default{color:#fff !important;background-color:RGBA(var(--bs-default-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-primary{color:#fff !important;background-color:RGBA(var(--bs-primary-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-secondary{color:#fff !important;background-color:RGBA(var(--bs-secondary-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-success{color:#fff !important;background-color:RGBA(var(--bs-success-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-info{color:#fff !important;background-color:RGBA(var(--bs-info-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-warning{color:#fff !important;background-color:RGBA(var(--bs-warning-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-danger{color:#fff !important;background-color:RGBA(var(--bs-danger-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-light{color:#000 !important;background-color:RGBA(var(--bs-light-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-dark{color:#fff !important;background-color:RGBA(var(--bs-dark-rgb), var(--bs-bg-opacity, 1)) !important}.link-default{color:RGBA(var(--bs-default-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-default-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-default:hover,.link-default:focus{color:RGBA(42, 46, 51, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(42, 46, 51, var(--bs-link-underline-opacity, 1)) !important}.link-primary{color:RGBA(var(--bs-primary-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-primary-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-primary:hover,.link-primary:focus{color:RGBA(31, 102, 182, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(31, 102, 182, var(--bs-link-underline-opacity, 1)) !important}.link-secondary{color:RGBA(var(--bs-secondary-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-secondary-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-secondary:hover,.link-secondary:focus{color:RGBA(42, 46, 51, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(42, 46, 51, var(--bs-link-underline-opacity, 1)) !important}.link-success{color:RGBA(var(--bs-success-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-success-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-success:hover,.link-success:focus{color:RGBA(50, 146, 19, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(50, 146, 19, var(--bs-link-underline-opacity, 1)) !important}.link-info{color:RGBA(var(--bs-info-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-info-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-info:hover,.link-info:focus{color:RGBA(122, 67, 150, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(122, 67, 150, var(--bs-link-underline-opacity, 1)) !important}.link-warning{color:RGBA(var(--bs-warning-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-warning-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-warning:hover,.link-warning:focus{color:RGBA(204, 94, 19, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(204, 94, 19, var(--bs-link-underline-opacity, 1)) !important}.link-danger{color:RGBA(var(--bs-danger-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-danger-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-danger:hover,.link-danger:focus{color:RGBA(204, 0, 46, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(204, 0, 46, var(--bs-link-underline-opacity, 1)) !important}.link-light{color:RGBA(var(--bs-light-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-light-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-light:hover,.link-light:focus{color:RGBA(249, 250, 251, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(249, 250, 251, var(--bs-link-underline-opacity, 1)) !important}.link-dark{color:RGBA(var(--bs-dark-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-dark-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-dark:hover,.link-dark:focus{color:RGBA(42, 46, 51, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(42, 46, 51, var(--bs-link-underline-opacity, 1)) !important}.link-body-emphasis{color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-body-emphasis:hover,.link-body-emphasis:focus{color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-opacity, 0.75)) !important;text-decoration-color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-underline-opacity, 0.75)) !important}.focus-ring:focus{outline:0;box-shadow:var(--bs-focus-ring-x, 0) var(--bs-focus-ring-y, 0) var(--bs-focus-ring-blur, 0) var(--bs-focus-ring-width) var(--bs-focus-ring-color)}.icon-link{display:inline-flex;gap:.375rem;align-items:center;-webkit-align-items:center;text-decoration-color:rgba(var(--bs-link-color-rgb), var(--bs-link-opacity, 0.5));text-underline-offset:.25em;backface-visibility:hidden;-webkit-backface-visibility:hidden;-moz-backface-visibility:hidden;-ms-backface-visibility:hidden;-o-backface-visibility:hidden}.icon-link>.bi{flex-shrink:0;-webkit-flex-shrink:0;width:1em;height:1em;fill:currentcolor;transition:.2s ease-in-out transform}@media(prefers-reduced-motion: reduce){.icon-link>.bi{transition:none}}.icon-link-hover:hover>.bi,.icon-link-hover:focus-visible>.bi{transform:var(--bs-icon-link-transform, translate3d(0.25em, 0, 0))}.ratio{position:relative;width:100%}.ratio::before{display:block;padding-top:var(--bs-aspect-ratio);content:""}.ratio>*{position:absolute;top:0;left:0;width:100%;height:100%}.ratio-1x1{--bs-aspect-ratio: 100%}.ratio-4x3{--bs-aspect-ratio: 75%}.ratio-16x9{--bs-aspect-ratio: 56.25%}.ratio-21x9{--bs-aspect-ratio: 42.8571428571%}.fixed-top{position:fixed;top:0;right:0;left:0;z-index:1030}.fixed-bottom{position:fixed;right:0;bottom:0;left:0;z-index:1030}.sticky-top{position:sticky;top:0;z-index:1020}.sticky-bottom{position:sticky;bottom:0;z-index:1020}@media(min-width: 576px){.sticky-sm-top{position:sticky;top:0;z-index:1020}.sticky-sm-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 768px){.sticky-md-top{position:sticky;top:0;z-index:1020}.sticky-md-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 992px){.sticky-lg-top{position:sticky;top:0;z-index:1020}.sticky-lg-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 1200px){.sticky-xl-top{position:sticky;top:0;z-index:1020}.sticky-xl-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 1400px){.sticky-xxl-top{position:sticky;top:0;z-index:1020}.sticky-xxl-bottom{position:sticky;bottom:0;z-index:1020}}.hstack{display:flex;display:-webkit-flex;flex-direction:row;-webkit-flex-direction:row;align-items:center;-webkit-align-items:center;align-self:stretch;-webkit-align-self:stretch}.vstack{display:flex;display:-webkit-flex;flex:1 1 auto;-webkit-flex:1 1 auto;flex-direction:column;-webkit-flex-direction:column;align-self:stretch;-webkit-align-self:stretch}.visually-hidden,.visually-hidden-focusable:not(:focus):not(:focus-within){width:1px !important;height:1px !important;padding:0 !important;margin:-1px !important;overflow:hidden !important;clip:rect(0, 0, 0, 0) !important;white-space:nowrap !important;border:0 !important}.visually-hidden:not(caption),.visually-hidden-focusable:not(:focus):not(:focus-within):not(caption){position:absolute !important}.stretched-link::after{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;content:""}.text-truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.vr{display:inline-block;align-self:stretch;-webkit-align-self:stretch;width:1px;min-height:1em;background-color:currentcolor;opacity:.25}.align-baseline{vertical-align:baseline !important}.align-top{vertical-align:top !important}.align-middle{vertical-align:middle !important}.align-bottom{vertical-align:bottom !important}.align-text-bottom{vertical-align:text-bottom !important}.align-text-top{vertical-align:text-top !important}.float-start{float:left !important}.float-end{float:right !important}.float-none{float:none !important}.object-fit-contain{object-fit:contain !important}.object-fit-cover{object-fit:cover !important}.object-fit-fill{object-fit:fill !important}.object-fit-scale{object-fit:scale-down !important}.object-fit-none{object-fit:none !important}.opacity-0{opacity:0 !important}.opacity-25{opacity:.25 !important}.opacity-50{opacity:.5 !important}.opacity-75{opacity:.75 !important}.opacity-100{opacity:1 !important}.overflow-auto{overflow:auto !important}.overflow-hidden{overflow:hidden !important}.overflow-visible{overflow:visible !important}.overflow-scroll{overflow:scroll !important}.overflow-x-auto{overflow-x:auto !important}.overflow-x-hidden{overflow-x:hidden !important}.overflow-x-visible{overflow-x:visible !important}.overflow-x-scroll{overflow-x:scroll !important}.overflow-y-auto{overflow-y:auto !important}.overflow-y-hidden{overflow-y:hidden !important}.overflow-y-visible{overflow-y:visible !important}.overflow-y-scroll{overflow-y:scroll !important}.d-inline{display:inline !important}.d-inline-block{display:inline-block !important}.d-block{display:block !important}.d-grid{display:grid !important}.d-inline-grid{display:inline-grid !important}.d-table{display:table !important}.d-table-row{display:table-row !important}.d-table-cell{display:table-cell !important}.d-flex{display:flex !important}.d-inline-flex{display:inline-flex !important}.d-none{display:none !important}.shadow{box-shadow:0 .5rem 1rem rgba(0,0,0,.15) !important}.shadow-sm{box-shadow:0 .125rem .25rem rgba(0,0,0,.075) !important}.shadow-lg{box-shadow:0 1rem 3rem rgba(0,0,0,.175) !important}.shadow-none{box-shadow:none !important}.focus-ring-default{--bs-focus-ring-color: rgba(var(--bs-default-rgb), var(--bs-focus-ring-opacity))}.focus-ring-primary{--bs-focus-ring-color: rgba(var(--bs-primary-rgb), var(--bs-focus-ring-opacity))}.focus-ring-secondary{--bs-focus-ring-color: rgba(var(--bs-secondary-rgb), var(--bs-focus-ring-opacity))}.focus-ring-success{--bs-focus-ring-color: rgba(var(--bs-success-rgb), var(--bs-focus-ring-opacity))}.focus-ring-info{--bs-focus-ring-color: rgba(var(--bs-info-rgb), var(--bs-focus-ring-opacity))}.focus-ring-warning{--bs-focus-ring-color: rgba(var(--bs-warning-rgb), var(--bs-focus-ring-opacity))}.focus-ring-danger{--bs-focus-ring-color: rgba(var(--bs-danger-rgb), var(--bs-focus-ring-opacity))}.focus-ring-light{--bs-focus-ring-color: rgba(var(--bs-light-rgb), var(--bs-focus-ring-opacity))}.focus-ring-dark{--bs-focus-ring-color: rgba(var(--bs-dark-rgb), var(--bs-focus-ring-opacity))}.position-static{position:static !important}.position-relative{position:relative !important}.position-absolute{position:absolute !important}.position-fixed{position:fixed !important}.position-sticky{position:sticky !important}.top-0{top:0 !important}.top-50{top:50% !important}.top-100{top:100% !important}.bottom-0{bottom:0 !important}.bottom-50{bottom:50% !important}.bottom-100{bottom:100% !important}.start-0{left:0 !important}.start-50{left:50% !important}.start-100{left:100% !important}.end-0{right:0 !important}.end-50{right:50% !important}.end-100{right:100% !important}.translate-middle{transform:translate(-50%, -50%) !important}.translate-middle-x{transform:translateX(-50%) !important}.translate-middle-y{transform:translateY(-50%) !important}.border{border:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-0{border:0 !important}.border-top{border-top:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-top-0{border-top:0 !important}.border-end{border-right:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-end-0{border-right:0 !important}.border-bottom{border-bottom:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-bottom-0{border-bottom:0 !important}.border-start{border-left:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-start-0{border-left:0 !important}.border-default{--bs-border-opacity: 1;border-color:rgba(var(--bs-default-rgb), var(--bs-border-opacity)) !important}.border-primary{--bs-border-opacity: 1;border-color:rgba(var(--bs-primary-rgb), var(--bs-border-opacity)) !important}.border-secondary{--bs-border-opacity: 1;border-color:rgba(var(--bs-secondary-rgb), var(--bs-border-opacity)) !important}.border-success{--bs-border-opacity: 1;border-color:rgba(var(--bs-success-rgb), var(--bs-border-opacity)) !important}.border-info{--bs-border-opacity: 1;border-color:rgba(var(--bs-info-rgb), var(--bs-border-opacity)) !important}.border-warning{--bs-border-opacity: 1;border-color:rgba(var(--bs-warning-rgb), var(--bs-border-opacity)) !important}.border-danger{--bs-border-opacity: 1;border-color:rgba(var(--bs-danger-rgb), var(--bs-border-opacity)) !important}.border-light{--bs-border-opacity: 1;border-color:rgba(var(--bs-light-rgb), var(--bs-border-opacity)) !important}.border-dark{--bs-border-opacity: 1;border-color:rgba(var(--bs-dark-rgb), var(--bs-border-opacity)) !important}.border-black{--bs-border-opacity: 1;border-color:rgba(var(--bs-black-rgb), var(--bs-border-opacity)) !important}.border-white{--bs-border-opacity: 1;border-color:rgba(var(--bs-white-rgb), var(--bs-border-opacity)) !important}.border-primary-subtle{border-color:var(--bs-primary-border-subtle) !important}.border-secondary-subtle{border-color:var(--bs-secondary-border-subtle) !important}.border-success-subtle{border-color:var(--bs-success-border-subtle) !important}.border-info-subtle{border-color:var(--bs-info-border-subtle) !important}.border-warning-subtle{border-color:var(--bs-warning-border-subtle) !important}.border-danger-subtle{border-color:var(--bs-danger-border-subtle) !important}.border-light-subtle{border-color:var(--bs-light-border-subtle) !important}.border-dark-subtle{border-color:var(--bs-dark-border-subtle) !important}.border-1{border-width:1px !important}.border-2{border-width:2px !important}.border-3{border-width:3px !important}.border-4{border-width:4px !important}.border-5{border-width:5px !important}.border-opacity-10{--bs-border-opacity: 0.1}.border-opacity-25{--bs-border-opacity: 0.25}.border-opacity-50{--bs-border-opacity: 0.5}.border-opacity-75{--bs-border-opacity: 0.75}.border-opacity-100{--bs-border-opacity: 1}.w-25{width:25% !important}.w-50{width:50% !important}.w-75{width:75% !important}.w-100{width:100% !important}.w-auto{width:auto !important}.mw-100{max-width:100% !important}.vw-100{width:100vw !important}.min-vw-100{min-width:100vw !important}.h-25{height:25% !important}.h-50{height:50% !important}.h-75{height:75% !important}.h-100{height:100% !important}.h-auto{height:auto !important}.mh-100{max-height:100% !important}.vh-100{height:100vh !important}.min-vh-100{min-height:100vh !important}.flex-fill{flex:1 1 auto !important}.flex-row{flex-direction:row !important}.flex-column{flex-direction:column !important}.flex-row-reverse{flex-direction:row-reverse !important}.flex-column-reverse{flex-direction:column-reverse !important}.flex-grow-0{flex-grow:0 !important}.flex-grow-1{flex-grow:1 !important}.flex-shrink-0{flex-shrink:0 !important}.flex-shrink-1{flex-shrink:1 !important}.flex-wrap{flex-wrap:wrap !important}.flex-nowrap{flex-wrap:nowrap !important}.flex-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-start{justify-content:flex-start !important}.justify-content-end{justify-content:flex-end !important}.justify-content-center{justify-content:center !important}.justify-content-between{justify-content:space-between !important}.justify-content-around{justify-content:space-around !important}.justify-content-evenly{justify-content:space-evenly !important}.align-items-start{align-items:flex-start !important}.align-items-end{align-items:flex-end !important}.align-items-center{align-items:center !important}.align-items-baseline{align-items:baseline !important}.align-items-stretch{align-items:stretch !important}.align-content-start{align-content:flex-start !important}.align-content-end{align-content:flex-end !important}.align-content-center{align-content:center !important}.align-content-between{align-content:space-between !important}.align-content-around{align-content:space-around !important}.align-content-stretch{align-content:stretch !important}.align-self-auto{align-self:auto !important}.align-self-start{align-self:flex-start !important}.align-self-end{align-self:flex-end !important}.align-self-center{align-self:center !important}.align-self-baseline{align-self:baseline !important}.align-self-stretch{align-self:stretch !important}.order-first{order:-1 !important}.order-0{order:0 !important}.order-1{order:1 !important}.order-2{order:2 !important}.order-3{order:3 !important}.order-4{order:4 !important}.order-5{order:5 !important}.order-last{order:6 !important}.m-0{margin:0 !important}.m-1{margin:.25rem !important}.m-2{margin:.5rem !important}.m-3{margin:1rem !important}.m-4{margin:1.5rem !important}.m-5{margin:3rem !important}.m-auto{margin:auto !important}.mx-0{margin-right:0 !important;margin-left:0 !important}.mx-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-3{margin-right:1rem !important;margin-left:1rem !important}.mx-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-5{margin-right:3rem !important;margin-left:3rem !important}.mx-auto{margin-right:auto !important;margin-left:auto !important}.my-0{margin-top:0 !important;margin-bottom:0 !important}.my-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-0{margin-top:0 !important}.mt-1{margin-top:.25rem !important}.mt-2{margin-top:.5rem !important}.mt-3{margin-top:1rem !important}.mt-4{margin-top:1.5rem !important}.mt-5{margin-top:3rem !important}.mt-auto{margin-top:auto !important}.me-0{margin-right:0 !important}.me-1{margin-right:.25rem !important}.me-2{margin-right:.5rem !important}.me-3{margin-right:1rem !important}.me-4{margin-right:1.5rem !important}.me-5{margin-right:3rem !important}.me-auto{margin-right:auto !important}.mb-0{margin-bottom:0 !important}.mb-1{margin-bottom:.25rem !important}.mb-2{margin-bottom:.5rem !important}.mb-3{margin-bottom:1rem !important}.mb-4{margin-bottom:1.5rem !important}.mb-5{margin-bottom:3rem !important}.mb-auto{margin-bottom:auto !important}.ms-0{margin-left:0 !important}.ms-1{margin-left:.25rem !important}.ms-2{margin-left:.5rem !important}.ms-3{margin-left:1rem !important}.ms-4{margin-left:1.5rem !important}.ms-5{margin-left:3rem !important}.ms-auto{margin-left:auto !important}.p-0{padding:0 !important}.p-1{padding:.25rem !important}.p-2{padding:.5rem !important}.p-3{padding:1rem !important}.p-4{padding:1.5rem !important}.p-5{padding:3rem !important}.px-0{padding-right:0 !important;padding-left:0 !important}.px-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-3{padding-right:1rem !important;padding-left:1rem !important}.px-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-5{padding-right:3rem !important;padding-left:3rem !important}.py-0{padding-top:0 !important;padding-bottom:0 !important}.py-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-0{padding-top:0 !important}.pt-1{padding-top:.25rem !important}.pt-2{padding-top:.5rem !important}.pt-3{padding-top:1rem !important}.pt-4{padding-top:1.5rem !important}.pt-5{padding-top:3rem !important}.pe-0{padding-right:0 !important}.pe-1{padding-right:.25rem !important}.pe-2{padding-right:.5rem !important}.pe-3{padding-right:1rem !important}.pe-4{padding-right:1.5rem !important}.pe-5{padding-right:3rem !important}.pb-0{padding-bottom:0 !important}.pb-1{padding-bottom:.25rem !important}.pb-2{padding-bottom:.5rem !important}.pb-3{padding-bottom:1rem !important}.pb-4{padding-bottom:1.5rem !important}.pb-5{padding-bottom:3rem !important}.ps-0{padding-left:0 !important}.ps-1{padding-left:.25rem !important}.ps-2{padding-left:.5rem !important}.ps-3{padding-left:1rem !important}.ps-4{padding-left:1.5rem !important}.ps-5{padding-left:3rem !important}.gap-0{gap:0 !important}.gap-1{gap:.25rem !important}.gap-2{gap:.5rem !important}.gap-3{gap:1rem !important}.gap-4{gap:1.5rem !important}.gap-5{gap:3rem !important}.row-gap-0{row-gap:0 !important}.row-gap-1{row-gap:.25rem !important}.row-gap-2{row-gap:.5rem !important}.row-gap-3{row-gap:1rem !important}.row-gap-4{row-gap:1.5rem !important}.row-gap-5{row-gap:3rem !important}.column-gap-0{column-gap:0 !important}.column-gap-1{column-gap:.25rem !important}.column-gap-2{column-gap:.5rem !important}.column-gap-3{column-gap:1rem !important}.column-gap-4{column-gap:1.5rem !important}.column-gap-5{column-gap:3rem !important}.font-monospace{font-family:var(--bs-font-monospace) !important}.fs-1{font-size:calc(1.325rem + 0.9vw) !important}.fs-2{font-size:calc(1.29rem + 0.48vw) !important}.fs-3{font-size:calc(1.27rem + 0.24vw) !important}.fs-4{font-size:1.25rem !important}.fs-5{font-size:1.1rem !important}.fs-6{font-size:1rem !important}.fst-italic{font-style:italic !important}.fst-normal{font-style:normal !important}.fw-lighter{font-weight:lighter !important}.fw-light{font-weight:300 !important}.fw-normal{font-weight:400 !important}.fw-medium{font-weight:500 !important}.fw-semibold{font-weight:600 !important}.fw-bold{font-weight:700 !important}.fw-bolder{font-weight:bolder !important}.lh-1{line-height:1 !important}.lh-sm{line-height:1.25 !important}.lh-base{line-height:1.5 !important}.lh-lg{line-height:2 !important}.text-start{text-align:left !important}.text-end{text-align:right !important}.text-center{text-align:center !important}.text-decoration-none{text-decoration:none !important}.text-decoration-underline{text-decoration:underline !important}.text-decoration-line-through{text-decoration:line-through !important}.text-lowercase{text-transform:lowercase !important}.text-uppercase{text-transform:uppercase !important}.text-capitalize{text-transform:capitalize !important}.text-wrap{white-space:normal !important}.text-nowrap{white-space:nowrap !important}.text-break{word-wrap:break-word !important;word-break:break-word !important}.text-default{--bs-text-opacity: 1;color:rgba(var(--bs-default-rgb), var(--bs-text-opacity)) !important}.text-primary{--bs-text-opacity: 1;color:rgba(var(--bs-primary-rgb), var(--bs-text-opacity)) !important}.text-secondary{--bs-text-opacity: 1;color:rgba(var(--bs-secondary-rgb), var(--bs-text-opacity)) !important}.text-success{--bs-text-opacity: 1;color:rgba(var(--bs-success-rgb), var(--bs-text-opacity)) !important}.text-info{--bs-text-opacity: 1;color:rgba(var(--bs-info-rgb), var(--bs-text-opacity)) !important}.text-warning{--bs-text-opacity: 1;color:rgba(var(--bs-warning-rgb), var(--bs-text-opacity)) !important}.text-danger{--bs-text-opacity: 1;color:rgba(var(--bs-danger-rgb), var(--bs-text-opacity)) !important}.text-light{--bs-text-opacity: 1;color:rgba(var(--bs-light-rgb), var(--bs-text-opacity)) !important}.text-dark{--bs-text-opacity: 1;color:rgba(var(--bs-dark-rgb), var(--bs-text-opacity)) !important}.text-black{--bs-text-opacity: 1;color:rgba(var(--bs-black-rgb), var(--bs-text-opacity)) !important}.text-white{--bs-text-opacity: 1;color:rgba(var(--bs-white-rgb), var(--bs-text-opacity)) !important}.text-body{--bs-text-opacity: 1;color:rgba(var(--bs-body-color-rgb), var(--bs-text-opacity)) !important}.text-muted{--bs-text-opacity: 1;color:var(--bs-secondary-color) !important}.text-black-50{--bs-text-opacity: 1;color:rgba(0,0,0,.5) !important}.text-white-50{--bs-text-opacity: 1;color:rgba(255,255,255,.5) !important}.text-body-secondary{--bs-text-opacity: 1;color:var(--bs-secondary-color) !important}.text-body-tertiary{--bs-text-opacity: 1;color:var(--bs-tertiary-color) !important}.text-body-emphasis{--bs-text-opacity: 1;color:var(--bs-emphasis-color) !important}.text-reset{--bs-text-opacity: 1;color:inherit !important}.text-opacity-25{--bs-text-opacity: 0.25}.text-opacity-50{--bs-text-opacity: 0.5}.text-opacity-75{--bs-text-opacity: 0.75}.text-opacity-100{--bs-text-opacity: 1}.text-primary-emphasis{color:var(--bs-primary-text-emphasis) !important}.text-secondary-emphasis{color:var(--bs-secondary-text-emphasis) !important}.text-success-emphasis{color:var(--bs-success-text-emphasis) !important}.text-info-emphasis{color:var(--bs-info-text-emphasis) !important}.text-warning-emphasis{color:var(--bs-warning-text-emphasis) !important}.text-danger-emphasis{color:var(--bs-danger-text-emphasis) !important}.text-light-emphasis{color:var(--bs-light-text-emphasis) !important}.text-dark-emphasis{color:var(--bs-dark-text-emphasis) !important}.link-opacity-10{--bs-link-opacity: 0.1}.link-opacity-10-hover:hover{--bs-link-opacity: 0.1}.link-opacity-25{--bs-link-opacity: 0.25}.link-opacity-25-hover:hover{--bs-link-opacity: 0.25}.link-opacity-50{--bs-link-opacity: 0.5}.link-opacity-50-hover:hover{--bs-link-opacity: 0.5}.link-opacity-75{--bs-link-opacity: 0.75}.link-opacity-75-hover:hover{--bs-link-opacity: 0.75}.link-opacity-100{--bs-link-opacity: 1}.link-opacity-100-hover:hover{--bs-link-opacity: 1}.link-offset-1{text-underline-offset:.125em !important}.link-offset-1-hover:hover{text-underline-offset:.125em !important}.link-offset-2{text-underline-offset:.25em !important}.link-offset-2-hover:hover{text-underline-offset:.25em !important}.link-offset-3{text-underline-offset:.375em !important}.link-offset-3-hover:hover{text-underline-offset:.375em !important}.link-underline-default{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-default-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-primary{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-primary-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-secondary{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-secondary-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-success{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-success-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-info{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-info-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-warning{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-warning-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-danger{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-danger-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-light{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-light-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-dark{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-dark-rgb), var(--bs-link-underline-opacity)) !important}.link-underline{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-link-color-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-underline-opacity-0{--bs-link-underline-opacity: 0}.link-underline-opacity-0-hover:hover{--bs-link-underline-opacity: 0}.link-underline-opacity-10{--bs-link-underline-opacity: 0.1}.link-underline-opacity-10-hover:hover{--bs-link-underline-opacity: 0.1}.link-underline-opacity-25{--bs-link-underline-opacity: 0.25}.link-underline-opacity-25-hover:hover{--bs-link-underline-opacity: 0.25}.link-underline-opacity-50{--bs-link-underline-opacity: 0.5}.link-underline-opacity-50-hover:hover{--bs-link-underline-opacity: 0.5}.link-underline-opacity-75{--bs-link-underline-opacity: 0.75}.link-underline-opacity-75-hover:hover{--bs-link-underline-opacity: 0.75}.link-underline-opacity-100{--bs-link-underline-opacity: 1}.link-underline-opacity-100-hover:hover{--bs-link-underline-opacity: 1}.bg-default{--bs-bg-opacity: 1;background-color:rgba(var(--bs-default-rgb), var(--bs-bg-opacity)) !important}.bg-primary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-primary-rgb), var(--bs-bg-opacity)) !important}.bg-secondary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-secondary-rgb), var(--bs-bg-opacity)) !important}.bg-success{--bs-bg-opacity: 1;background-color:rgba(var(--bs-success-rgb), var(--bs-bg-opacity)) !important}.bg-info{--bs-bg-opacity: 1;background-color:rgba(var(--bs-info-rgb), var(--bs-bg-opacity)) !important}.bg-warning{--bs-bg-opacity: 1;background-color:rgba(var(--bs-warning-rgb), var(--bs-bg-opacity)) !important}.bg-danger{--bs-bg-opacity: 1;background-color:rgba(var(--bs-danger-rgb), var(--bs-bg-opacity)) !important}.bg-light{--bs-bg-opacity: 1;background-color:rgba(var(--bs-light-rgb), var(--bs-bg-opacity)) !important}.bg-dark{--bs-bg-opacity: 1;background-color:rgba(var(--bs-dark-rgb), var(--bs-bg-opacity)) !important}.bg-black{--bs-bg-opacity: 1;background-color:rgba(var(--bs-black-rgb), var(--bs-bg-opacity)) !important}.bg-white{--bs-bg-opacity: 1;background-color:rgba(var(--bs-white-rgb), var(--bs-bg-opacity)) !important}.bg-body{--bs-bg-opacity: 1;background-color:rgba(var(--bs-body-bg-rgb), var(--bs-bg-opacity)) !important}.bg-transparent{--bs-bg-opacity: 1;background-color:rgba(0,0,0,0) !important}.bg-body-secondary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-secondary-bg-rgb), var(--bs-bg-opacity)) !important}.bg-body-tertiary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-tertiary-bg-rgb), var(--bs-bg-opacity)) !important}.bg-opacity-10{--bs-bg-opacity: 0.1}.bg-opacity-25{--bs-bg-opacity: 0.25}.bg-opacity-50{--bs-bg-opacity: 0.5}.bg-opacity-75{--bs-bg-opacity: 0.75}.bg-opacity-100{--bs-bg-opacity: 1}.bg-primary-subtle{background-color:var(--bs-primary-bg-subtle) !important}.bg-secondary-subtle{background-color:var(--bs-secondary-bg-subtle) !important}.bg-success-subtle{background-color:var(--bs-success-bg-subtle) !important}.bg-info-subtle{background-color:var(--bs-info-bg-subtle) !important}.bg-warning-subtle{background-color:var(--bs-warning-bg-subtle) !important}.bg-danger-subtle{background-color:var(--bs-danger-bg-subtle) !important}.bg-light-subtle{background-color:var(--bs-light-bg-subtle) !important}.bg-dark-subtle{background-color:var(--bs-dark-bg-subtle) !important}.bg-gradient{background-image:var(--bs-gradient) !important}.user-select-all{user-select:all !important}.user-select-auto{user-select:auto !important}.user-select-none{user-select:none !important}.pe-none{pointer-events:none !important}.pe-auto{pointer-events:auto !important}.rounded{border-radius:var(--bs-border-radius) !important}.rounded-0{border-radius:0 !important}.rounded-1{border-radius:var(--bs-border-radius-sm) !important}.rounded-2{border-radius:var(--bs-border-radius) !important}.rounded-3{border-radius:var(--bs-border-radius-lg) !important}.rounded-4{border-radius:var(--bs-border-radius-xl) !important}.rounded-5{border-radius:var(--bs-border-radius-xxl) !important}.rounded-circle{border-radius:50% !important}.rounded-pill{border-radius:var(--bs-border-radius-pill) !important}.rounded-top{border-top-left-radius:var(--bs-border-radius) !important;border-top-right-radius:var(--bs-border-radius) !important}.rounded-top-0{border-top-left-radius:0 !important;border-top-right-radius:0 !important}.rounded-top-1{border-top-left-radius:var(--bs-border-radius-sm) !important;border-top-right-radius:var(--bs-border-radius-sm) !important}.rounded-top-2{border-top-left-radius:var(--bs-border-radius) !important;border-top-right-radius:var(--bs-border-radius) !important}.rounded-top-3{border-top-left-radius:var(--bs-border-radius-lg) !important;border-top-right-radius:var(--bs-border-radius-lg) !important}.rounded-top-4{border-top-left-radius:var(--bs-border-radius-xl) !important;border-top-right-radius:var(--bs-border-radius-xl) !important}.rounded-top-5{border-top-left-radius:var(--bs-border-radius-xxl) !important;border-top-right-radius:var(--bs-border-radius-xxl) !important}.rounded-top-circle{border-top-left-radius:50% !important;border-top-right-radius:50% !important}.rounded-top-pill{border-top-left-radius:var(--bs-border-radius-pill) !important;border-top-right-radius:var(--bs-border-radius-pill) !important}.rounded-end{border-top-right-radius:var(--bs-border-radius) !important;border-bottom-right-radius:var(--bs-border-radius) !important}.rounded-end-0{border-top-right-radius:0 !important;border-bottom-right-radius:0 !important}.rounded-end-1{border-top-right-radius:var(--bs-border-radius-sm) !important;border-bottom-right-radius:var(--bs-border-radius-sm) !important}.rounded-end-2{border-top-right-radius:var(--bs-border-radius) !important;border-bottom-right-radius:var(--bs-border-radius) !important}.rounded-end-3{border-top-right-radius:var(--bs-border-radius-lg) !important;border-bottom-right-radius:var(--bs-border-radius-lg) !important}.rounded-end-4{border-top-right-radius:var(--bs-border-radius-xl) !important;border-bottom-right-radius:var(--bs-border-radius-xl) !important}.rounded-end-5{border-top-right-radius:var(--bs-border-radius-xxl) !important;border-bottom-right-radius:var(--bs-border-radius-xxl) !important}.rounded-end-circle{border-top-right-radius:50% !important;border-bottom-right-radius:50% !important}.rounded-end-pill{border-top-right-radius:var(--bs-border-radius-pill) !important;border-bottom-right-radius:var(--bs-border-radius-pill) !important}.rounded-bottom{border-bottom-right-radius:var(--bs-border-radius) !important;border-bottom-left-radius:var(--bs-border-radius) !important}.rounded-bottom-0{border-bottom-right-radius:0 !important;border-bottom-left-radius:0 !important}.rounded-bottom-1{border-bottom-right-radius:var(--bs-border-radius-sm) !important;border-bottom-left-radius:var(--bs-border-radius-sm) !important}.rounded-bottom-2{border-bottom-right-radius:var(--bs-border-radius) !important;border-bottom-left-radius:var(--bs-border-radius) !important}.rounded-bottom-3{border-bottom-right-radius:var(--bs-border-radius-lg) !important;border-bottom-left-radius:var(--bs-border-radius-lg) !important}.rounded-bottom-4{border-bottom-right-radius:var(--bs-border-radius-xl) !important;border-bottom-left-radius:var(--bs-border-radius-xl) !important}.rounded-bottom-5{border-bottom-right-radius:var(--bs-border-radius-xxl) !important;border-bottom-left-radius:var(--bs-border-radius-xxl) !important}.rounded-bottom-circle{border-bottom-right-radius:50% !important;border-bottom-left-radius:50% !important}.rounded-bottom-pill{border-bottom-right-radius:var(--bs-border-radius-pill) !important;border-bottom-left-radius:var(--bs-border-radius-pill) !important}.rounded-start{border-bottom-left-radius:var(--bs-border-radius) !important;border-top-left-radius:var(--bs-border-radius) !important}.rounded-start-0{border-bottom-left-radius:0 !important;border-top-left-radius:0 !important}.rounded-start-1{border-bottom-left-radius:var(--bs-border-radius-sm) !important;border-top-left-radius:var(--bs-border-radius-sm) !important}.rounded-start-2{border-bottom-left-radius:var(--bs-border-radius) !important;border-top-left-radius:var(--bs-border-radius) !important}.rounded-start-3{border-bottom-left-radius:var(--bs-border-radius-lg) !important;border-top-left-radius:var(--bs-border-radius-lg) !important}.rounded-start-4{border-bottom-left-radius:var(--bs-border-radius-xl) !important;border-top-left-radius:var(--bs-border-radius-xl) !important}.rounded-start-5{border-bottom-left-radius:var(--bs-border-radius-xxl) !important;border-top-left-radius:var(--bs-border-radius-xxl) !important}.rounded-start-circle{border-bottom-left-radius:50% !important;border-top-left-radius:50% !important}.rounded-start-pill{border-bottom-left-radius:var(--bs-border-radius-pill) !important;border-top-left-radius:var(--bs-border-radius-pill) !important}.visible{visibility:visible !important}.invisible{visibility:hidden !important}.z-n1{z-index:-1 !important}.z-0{z-index:0 !important}.z-1{z-index:1 !important}.z-2{z-index:2 !important}.z-3{z-index:3 !important}@media(min-width: 576px){.float-sm-start{float:left !important}.float-sm-end{float:right !important}.float-sm-none{float:none !important}.object-fit-sm-contain{object-fit:contain !important}.object-fit-sm-cover{object-fit:cover !important}.object-fit-sm-fill{object-fit:fill !important}.object-fit-sm-scale{object-fit:scale-down !important}.object-fit-sm-none{object-fit:none !important}.d-sm-inline{display:inline !important}.d-sm-inline-block{display:inline-block !important}.d-sm-block{display:block !important}.d-sm-grid{display:grid !important}.d-sm-inline-grid{display:inline-grid !important}.d-sm-table{display:table !important}.d-sm-table-row{display:table-row !important}.d-sm-table-cell{display:table-cell !important}.d-sm-flex{display:flex !important}.d-sm-inline-flex{display:inline-flex !important}.d-sm-none{display:none !important}.flex-sm-fill{flex:1 1 auto !important}.flex-sm-row{flex-direction:row !important}.flex-sm-column{flex-direction:column !important}.flex-sm-row-reverse{flex-direction:row-reverse !important}.flex-sm-column-reverse{flex-direction:column-reverse !important}.flex-sm-grow-0{flex-grow:0 !important}.flex-sm-grow-1{flex-grow:1 !important}.flex-sm-shrink-0{flex-shrink:0 !important}.flex-sm-shrink-1{flex-shrink:1 !important}.flex-sm-wrap{flex-wrap:wrap !important}.flex-sm-nowrap{flex-wrap:nowrap !important}.flex-sm-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-sm-start{justify-content:flex-start !important}.justify-content-sm-end{justify-content:flex-end !important}.justify-content-sm-center{justify-content:center !important}.justify-content-sm-between{justify-content:space-between !important}.justify-content-sm-around{justify-content:space-around !important}.justify-content-sm-evenly{justify-content:space-evenly !important}.align-items-sm-start{align-items:flex-start !important}.align-items-sm-end{align-items:flex-end !important}.align-items-sm-center{align-items:center !important}.align-items-sm-baseline{align-items:baseline !important}.align-items-sm-stretch{align-items:stretch !important}.align-content-sm-start{align-content:flex-start !important}.align-content-sm-end{align-content:flex-end !important}.align-content-sm-center{align-content:center !important}.align-content-sm-between{align-content:space-between !important}.align-content-sm-around{align-content:space-around !important}.align-content-sm-stretch{align-content:stretch !important}.align-self-sm-auto{align-self:auto !important}.align-self-sm-start{align-self:flex-start !important}.align-self-sm-end{align-self:flex-end !important}.align-self-sm-center{align-self:center !important}.align-self-sm-baseline{align-self:baseline !important}.align-self-sm-stretch{align-self:stretch !important}.order-sm-first{order:-1 !important}.order-sm-0{order:0 !important}.order-sm-1{order:1 !important}.order-sm-2{order:2 !important}.order-sm-3{order:3 !important}.order-sm-4{order:4 !important}.order-sm-5{order:5 !important}.order-sm-last{order:6 !important}.m-sm-0{margin:0 !important}.m-sm-1{margin:.25rem !important}.m-sm-2{margin:.5rem !important}.m-sm-3{margin:1rem !important}.m-sm-4{margin:1.5rem !important}.m-sm-5{margin:3rem !important}.m-sm-auto{margin:auto !important}.mx-sm-0{margin-right:0 !important;margin-left:0 !important}.mx-sm-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-sm-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-sm-3{margin-right:1rem !important;margin-left:1rem !important}.mx-sm-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-sm-5{margin-right:3rem !important;margin-left:3rem !important}.mx-sm-auto{margin-right:auto !important;margin-left:auto !important}.my-sm-0{margin-top:0 !important;margin-bottom:0 !important}.my-sm-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-sm-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-sm-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-sm-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-sm-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-sm-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-sm-0{margin-top:0 !important}.mt-sm-1{margin-top:.25rem !important}.mt-sm-2{margin-top:.5rem !important}.mt-sm-3{margin-top:1rem !important}.mt-sm-4{margin-top:1.5rem !important}.mt-sm-5{margin-top:3rem !important}.mt-sm-auto{margin-top:auto !important}.me-sm-0{margin-right:0 !important}.me-sm-1{margin-right:.25rem !important}.me-sm-2{margin-right:.5rem !important}.me-sm-3{margin-right:1rem !important}.me-sm-4{margin-right:1.5rem !important}.me-sm-5{margin-right:3rem !important}.me-sm-auto{margin-right:auto !important}.mb-sm-0{margin-bottom:0 !important}.mb-sm-1{margin-bottom:.25rem !important}.mb-sm-2{margin-bottom:.5rem !important}.mb-sm-3{margin-bottom:1rem !important}.mb-sm-4{margin-bottom:1.5rem !important}.mb-sm-5{margin-bottom:3rem !important}.mb-sm-auto{margin-bottom:auto !important}.ms-sm-0{margin-left:0 !important}.ms-sm-1{margin-left:.25rem !important}.ms-sm-2{margin-left:.5rem !important}.ms-sm-3{margin-left:1rem !important}.ms-sm-4{margin-left:1.5rem !important}.ms-sm-5{margin-left:3rem !important}.ms-sm-auto{margin-left:auto !important}.p-sm-0{padding:0 !important}.p-sm-1{padding:.25rem !important}.p-sm-2{padding:.5rem !important}.p-sm-3{padding:1rem !important}.p-sm-4{padding:1.5rem !important}.p-sm-5{padding:3rem !important}.px-sm-0{padding-right:0 !important;padding-left:0 !important}.px-sm-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-sm-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-sm-3{padding-right:1rem !important;padding-left:1rem !important}.px-sm-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-sm-5{padding-right:3rem !important;padding-left:3rem !important}.py-sm-0{padding-top:0 !important;padding-bottom:0 !important}.py-sm-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-sm-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-sm-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-sm-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-sm-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-sm-0{padding-top:0 !important}.pt-sm-1{padding-top:.25rem !important}.pt-sm-2{padding-top:.5rem !important}.pt-sm-3{padding-top:1rem !important}.pt-sm-4{padding-top:1.5rem !important}.pt-sm-5{padding-top:3rem !important}.pe-sm-0{padding-right:0 !important}.pe-sm-1{padding-right:.25rem !important}.pe-sm-2{padding-right:.5rem !important}.pe-sm-3{padding-right:1rem !important}.pe-sm-4{padding-right:1.5rem !important}.pe-sm-5{padding-right:3rem !important}.pb-sm-0{padding-bottom:0 !important}.pb-sm-1{padding-bottom:.25rem !important}.pb-sm-2{padding-bottom:.5rem !important}.pb-sm-3{padding-bottom:1rem !important}.pb-sm-4{padding-bottom:1.5rem !important}.pb-sm-5{padding-bottom:3rem !important}.ps-sm-0{padding-left:0 !important}.ps-sm-1{padding-left:.25rem !important}.ps-sm-2{padding-left:.5rem !important}.ps-sm-3{padding-left:1rem !important}.ps-sm-4{padding-left:1.5rem !important}.ps-sm-5{padding-left:3rem !important}.gap-sm-0{gap:0 !important}.gap-sm-1{gap:.25rem !important}.gap-sm-2{gap:.5rem !important}.gap-sm-3{gap:1rem !important}.gap-sm-4{gap:1.5rem !important}.gap-sm-5{gap:3rem !important}.row-gap-sm-0{row-gap:0 !important}.row-gap-sm-1{row-gap:.25rem !important}.row-gap-sm-2{row-gap:.5rem !important}.row-gap-sm-3{row-gap:1rem !important}.row-gap-sm-4{row-gap:1.5rem !important}.row-gap-sm-5{row-gap:3rem !important}.column-gap-sm-0{column-gap:0 !important}.column-gap-sm-1{column-gap:.25rem !important}.column-gap-sm-2{column-gap:.5rem !important}.column-gap-sm-3{column-gap:1rem !important}.column-gap-sm-4{column-gap:1.5rem !important}.column-gap-sm-5{column-gap:3rem !important}.text-sm-start{text-align:left !important}.text-sm-end{text-align:right !important}.text-sm-center{text-align:center !important}}@media(min-width: 768px){.float-md-start{float:left !important}.float-md-end{float:right !important}.float-md-none{float:none !important}.object-fit-md-contain{object-fit:contain !important}.object-fit-md-cover{object-fit:cover !important}.object-fit-md-fill{object-fit:fill !important}.object-fit-md-scale{object-fit:scale-down !important}.object-fit-md-none{object-fit:none !important}.d-md-inline{display:inline !important}.d-md-inline-block{display:inline-block !important}.d-md-block{display:block !important}.d-md-grid{display:grid !important}.d-md-inline-grid{display:inline-grid !important}.d-md-table{display:table !important}.d-md-table-row{display:table-row !important}.d-md-table-cell{display:table-cell !important}.d-md-flex{display:flex !important}.d-md-inline-flex{display:inline-flex !important}.d-md-none{display:none !important}.flex-md-fill{flex:1 1 auto !important}.flex-md-row{flex-direction:row !important}.flex-md-column{flex-direction:column !important}.flex-md-row-reverse{flex-direction:row-reverse !important}.flex-md-column-reverse{flex-direction:column-reverse !important}.flex-md-grow-0{flex-grow:0 !important}.flex-md-grow-1{flex-grow:1 !important}.flex-md-shrink-0{flex-shrink:0 !important}.flex-md-shrink-1{flex-shrink:1 !important}.flex-md-wrap{flex-wrap:wrap !important}.flex-md-nowrap{flex-wrap:nowrap !important}.flex-md-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-md-start{justify-content:flex-start !important}.justify-content-md-end{justify-content:flex-end !important}.justify-content-md-center{justify-content:center !important}.justify-content-md-between{justify-content:space-between !important}.justify-content-md-around{justify-content:space-around !important}.justify-content-md-evenly{justify-content:space-evenly !important}.align-items-md-start{align-items:flex-start !important}.align-items-md-end{align-items:flex-end !important}.align-items-md-center{align-items:center !important}.align-items-md-baseline{align-items:baseline !important}.align-items-md-stretch{align-items:stretch !important}.align-content-md-start{align-content:flex-start !important}.align-content-md-end{align-content:flex-end !important}.align-content-md-center{align-content:center !important}.align-content-md-between{align-content:space-between !important}.align-content-md-around{align-content:space-around !important}.align-content-md-stretch{align-content:stretch !important}.align-self-md-auto{align-self:auto !important}.align-self-md-start{align-self:flex-start !important}.align-self-md-end{align-self:flex-end !important}.align-self-md-center{align-self:center !important}.align-self-md-baseline{align-self:baseline !important}.align-self-md-stretch{align-self:stretch !important}.order-md-first{order:-1 !important}.order-md-0{order:0 !important}.order-md-1{order:1 !important}.order-md-2{order:2 !important}.order-md-3{order:3 !important}.order-md-4{order:4 !important}.order-md-5{order:5 !important}.order-md-last{order:6 !important}.m-md-0{margin:0 !important}.m-md-1{margin:.25rem !important}.m-md-2{margin:.5rem !important}.m-md-3{margin:1rem !important}.m-md-4{margin:1.5rem !important}.m-md-5{margin:3rem !important}.m-md-auto{margin:auto !important}.mx-md-0{margin-right:0 !important;margin-left:0 !important}.mx-md-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-md-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-md-3{margin-right:1rem !important;margin-left:1rem !important}.mx-md-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-md-5{margin-right:3rem !important;margin-left:3rem !important}.mx-md-auto{margin-right:auto !important;margin-left:auto !important}.my-md-0{margin-top:0 !important;margin-bottom:0 !important}.my-md-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-md-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-md-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-md-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-md-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-md-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-md-0{margin-top:0 !important}.mt-md-1{margin-top:.25rem !important}.mt-md-2{margin-top:.5rem !important}.mt-md-3{margin-top:1rem !important}.mt-md-4{margin-top:1.5rem !important}.mt-md-5{margin-top:3rem !important}.mt-md-auto{margin-top:auto !important}.me-md-0{margin-right:0 !important}.me-md-1{margin-right:.25rem !important}.me-md-2{margin-right:.5rem !important}.me-md-3{margin-right:1rem !important}.me-md-4{margin-right:1.5rem !important}.me-md-5{margin-right:3rem !important}.me-md-auto{margin-right:auto !important}.mb-md-0{margin-bottom:0 !important}.mb-md-1{margin-bottom:.25rem !important}.mb-md-2{margin-bottom:.5rem !important}.mb-md-3{margin-bottom:1rem !important}.mb-md-4{margin-bottom:1.5rem !important}.mb-md-5{margin-bottom:3rem !important}.mb-md-auto{margin-bottom:auto !important}.ms-md-0{margin-left:0 !important}.ms-md-1{margin-left:.25rem !important}.ms-md-2{margin-left:.5rem !important}.ms-md-3{margin-left:1rem !important}.ms-md-4{margin-left:1.5rem !important}.ms-md-5{margin-left:3rem !important}.ms-md-auto{margin-left:auto !important}.p-md-0{padding:0 !important}.p-md-1{padding:.25rem !important}.p-md-2{padding:.5rem !important}.p-md-3{padding:1rem !important}.p-md-4{padding:1.5rem !important}.p-md-5{padding:3rem !important}.px-md-0{padding-right:0 !important;padding-left:0 !important}.px-md-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-md-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-md-3{padding-right:1rem !important;padding-left:1rem !important}.px-md-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-md-5{padding-right:3rem !important;padding-left:3rem !important}.py-md-0{padding-top:0 !important;padding-bottom:0 !important}.py-md-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-md-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-md-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-md-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-md-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-md-0{padding-top:0 !important}.pt-md-1{padding-top:.25rem !important}.pt-md-2{padding-top:.5rem !important}.pt-md-3{padding-top:1rem !important}.pt-md-4{padding-top:1.5rem !important}.pt-md-5{padding-top:3rem !important}.pe-md-0{padding-right:0 !important}.pe-md-1{padding-right:.25rem !important}.pe-md-2{padding-right:.5rem !important}.pe-md-3{padding-right:1rem !important}.pe-md-4{padding-right:1.5rem !important}.pe-md-5{padding-right:3rem !important}.pb-md-0{padding-bottom:0 !important}.pb-md-1{padding-bottom:.25rem !important}.pb-md-2{padding-bottom:.5rem !important}.pb-md-3{padding-bottom:1rem !important}.pb-md-4{padding-bottom:1.5rem !important}.pb-md-5{padding-bottom:3rem !important}.ps-md-0{padding-left:0 !important}.ps-md-1{padding-left:.25rem !important}.ps-md-2{padding-left:.5rem !important}.ps-md-3{padding-left:1rem !important}.ps-md-4{padding-left:1.5rem !important}.ps-md-5{padding-left:3rem !important}.gap-md-0{gap:0 !important}.gap-md-1{gap:.25rem !important}.gap-md-2{gap:.5rem !important}.gap-md-3{gap:1rem !important}.gap-md-4{gap:1.5rem !important}.gap-md-5{gap:3rem !important}.row-gap-md-0{row-gap:0 !important}.row-gap-md-1{row-gap:.25rem !important}.row-gap-md-2{row-gap:.5rem !important}.row-gap-md-3{row-gap:1rem !important}.row-gap-md-4{row-gap:1.5rem !important}.row-gap-md-5{row-gap:3rem !important}.column-gap-md-0{column-gap:0 !important}.column-gap-md-1{column-gap:.25rem !important}.column-gap-md-2{column-gap:.5rem !important}.column-gap-md-3{column-gap:1rem !important}.column-gap-md-4{column-gap:1.5rem !important}.column-gap-md-5{column-gap:3rem !important}.text-md-start{text-align:left !important}.text-md-end{text-align:right !important}.text-md-center{text-align:center !important}}@media(min-width: 992px){.float-lg-start{float:left !important}.float-lg-end{float:right !important}.float-lg-none{float:none !important}.object-fit-lg-contain{object-fit:contain !important}.object-fit-lg-cover{object-fit:cover !important}.object-fit-lg-fill{object-fit:fill !important}.object-fit-lg-scale{object-fit:scale-down !important}.object-fit-lg-none{object-fit:none !important}.d-lg-inline{display:inline !important}.d-lg-inline-block{display:inline-block !important}.d-lg-block{display:block !important}.d-lg-grid{display:grid !important}.d-lg-inline-grid{display:inline-grid !important}.d-lg-table{display:table !important}.d-lg-table-row{display:table-row !important}.d-lg-table-cell{display:table-cell !important}.d-lg-flex{display:flex !important}.d-lg-inline-flex{display:inline-flex !important}.d-lg-none{display:none !important}.flex-lg-fill{flex:1 1 auto !important}.flex-lg-row{flex-direction:row !important}.flex-lg-column{flex-direction:column !important}.flex-lg-row-reverse{flex-direction:row-reverse !important}.flex-lg-column-reverse{flex-direction:column-reverse !important}.flex-lg-grow-0{flex-grow:0 !important}.flex-lg-grow-1{flex-grow:1 !important}.flex-lg-shrink-0{flex-shrink:0 !important}.flex-lg-shrink-1{flex-shrink:1 !important}.flex-lg-wrap{flex-wrap:wrap !important}.flex-lg-nowrap{flex-wrap:nowrap !important}.flex-lg-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-lg-start{justify-content:flex-start !important}.justify-content-lg-end{justify-content:flex-end !important}.justify-content-lg-center{justify-content:center !important}.justify-content-lg-between{justify-content:space-between !important}.justify-content-lg-around{justify-content:space-around !important}.justify-content-lg-evenly{justify-content:space-evenly !important}.align-items-lg-start{align-items:flex-start !important}.align-items-lg-end{align-items:flex-end !important}.align-items-lg-center{align-items:center !important}.align-items-lg-baseline{align-items:baseline !important}.align-items-lg-stretch{align-items:stretch !important}.align-content-lg-start{align-content:flex-start !important}.align-content-lg-end{align-content:flex-end !important}.align-content-lg-center{align-content:center !important}.align-content-lg-between{align-content:space-between !important}.align-content-lg-around{align-content:space-around !important}.align-content-lg-stretch{align-content:stretch !important}.align-self-lg-auto{align-self:auto !important}.align-self-lg-start{align-self:flex-start !important}.align-self-lg-end{align-self:flex-end !important}.align-self-lg-center{align-self:center !important}.align-self-lg-baseline{align-self:baseline !important}.align-self-lg-stretch{align-self:stretch !important}.order-lg-first{order:-1 !important}.order-lg-0{order:0 !important}.order-lg-1{order:1 !important}.order-lg-2{order:2 !important}.order-lg-3{order:3 !important}.order-lg-4{order:4 !important}.order-lg-5{order:5 !important}.order-lg-last{order:6 !important}.m-lg-0{margin:0 !important}.m-lg-1{margin:.25rem !important}.m-lg-2{margin:.5rem !important}.m-lg-3{margin:1rem !important}.m-lg-4{margin:1.5rem !important}.m-lg-5{margin:3rem !important}.m-lg-auto{margin:auto !important}.mx-lg-0{margin-right:0 !important;margin-left:0 !important}.mx-lg-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-lg-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-lg-3{margin-right:1rem !important;margin-left:1rem !important}.mx-lg-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-lg-5{margin-right:3rem !important;margin-left:3rem !important}.mx-lg-auto{margin-right:auto !important;margin-left:auto !important}.my-lg-0{margin-top:0 !important;margin-bottom:0 !important}.my-lg-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-lg-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-lg-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-lg-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-lg-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-lg-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-lg-0{margin-top:0 !important}.mt-lg-1{margin-top:.25rem !important}.mt-lg-2{margin-top:.5rem !important}.mt-lg-3{margin-top:1rem !important}.mt-lg-4{margin-top:1.5rem !important}.mt-lg-5{margin-top:3rem !important}.mt-lg-auto{margin-top:auto !important}.me-lg-0{margin-right:0 !important}.me-lg-1{margin-right:.25rem !important}.me-lg-2{margin-right:.5rem !important}.me-lg-3{margin-right:1rem !important}.me-lg-4{margin-right:1.5rem !important}.me-lg-5{margin-right:3rem !important}.me-lg-auto{margin-right:auto !important}.mb-lg-0{margin-bottom:0 !important}.mb-lg-1{margin-bottom:.25rem !important}.mb-lg-2{margin-bottom:.5rem !important}.mb-lg-3{margin-bottom:1rem !important}.mb-lg-4{margin-bottom:1.5rem !important}.mb-lg-5{margin-bottom:3rem !important}.mb-lg-auto{margin-bottom:auto !important}.ms-lg-0{margin-left:0 !important}.ms-lg-1{margin-left:.25rem !important}.ms-lg-2{margin-left:.5rem !important}.ms-lg-3{margin-left:1rem !important}.ms-lg-4{margin-left:1.5rem !important}.ms-lg-5{margin-left:3rem !important}.ms-lg-auto{margin-left:auto !important}.p-lg-0{padding:0 !important}.p-lg-1{padding:.25rem !important}.p-lg-2{padding:.5rem !important}.p-lg-3{padding:1rem !important}.p-lg-4{padding:1.5rem !important}.p-lg-5{padding:3rem !important}.px-lg-0{padding-right:0 !important;padding-left:0 !important}.px-lg-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-lg-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-lg-3{padding-right:1rem !important;padding-left:1rem !important}.px-lg-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-lg-5{padding-right:3rem !important;padding-left:3rem !important}.py-lg-0{padding-top:0 !important;padding-bottom:0 !important}.py-lg-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-lg-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-lg-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-lg-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-lg-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-lg-0{padding-top:0 !important}.pt-lg-1{padding-top:.25rem !important}.pt-lg-2{padding-top:.5rem !important}.pt-lg-3{padding-top:1rem !important}.pt-lg-4{padding-top:1.5rem !important}.pt-lg-5{padding-top:3rem !important}.pe-lg-0{padding-right:0 !important}.pe-lg-1{padding-right:.25rem !important}.pe-lg-2{padding-right:.5rem !important}.pe-lg-3{padding-right:1rem !important}.pe-lg-4{padding-right:1.5rem !important}.pe-lg-5{padding-right:3rem !important}.pb-lg-0{padding-bottom:0 !important}.pb-lg-1{padding-bottom:.25rem !important}.pb-lg-2{padding-bottom:.5rem !important}.pb-lg-3{padding-bottom:1rem !important}.pb-lg-4{padding-bottom:1.5rem !important}.pb-lg-5{padding-bottom:3rem !important}.ps-lg-0{padding-left:0 !important}.ps-lg-1{padding-left:.25rem !important}.ps-lg-2{padding-left:.5rem !important}.ps-lg-3{padding-left:1rem !important}.ps-lg-4{padding-left:1.5rem !important}.ps-lg-5{padding-left:3rem !important}.gap-lg-0{gap:0 !important}.gap-lg-1{gap:.25rem !important}.gap-lg-2{gap:.5rem !important}.gap-lg-3{gap:1rem !important}.gap-lg-4{gap:1.5rem !important}.gap-lg-5{gap:3rem !important}.row-gap-lg-0{row-gap:0 !important}.row-gap-lg-1{row-gap:.25rem !important}.row-gap-lg-2{row-gap:.5rem !important}.row-gap-lg-3{row-gap:1rem !important}.row-gap-lg-4{row-gap:1.5rem !important}.row-gap-lg-5{row-gap:3rem !important}.column-gap-lg-0{column-gap:0 !important}.column-gap-lg-1{column-gap:.25rem !important}.column-gap-lg-2{column-gap:.5rem !important}.column-gap-lg-3{column-gap:1rem !important}.column-gap-lg-4{column-gap:1.5rem !important}.column-gap-lg-5{column-gap:3rem !important}.text-lg-start{text-align:left !important}.text-lg-end{text-align:right !important}.text-lg-center{text-align:center !important}}@media(min-width: 1200px){.float-xl-start{float:left !important}.float-xl-end{float:right !important}.float-xl-none{float:none !important}.object-fit-xl-contain{object-fit:contain !important}.object-fit-xl-cover{object-fit:cover !important}.object-fit-xl-fill{object-fit:fill !important}.object-fit-xl-scale{object-fit:scale-down !important}.object-fit-xl-none{object-fit:none !important}.d-xl-inline{display:inline !important}.d-xl-inline-block{display:inline-block !important}.d-xl-block{display:block !important}.d-xl-grid{display:grid !important}.d-xl-inline-grid{display:inline-grid !important}.d-xl-table{display:table !important}.d-xl-table-row{display:table-row !important}.d-xl-table-cell{display:table-cell !important}.d-xl-flex{display:flex !important}.d-xl-inline-flex{display:inline-flex !important}.d-xl-none{display:none !important}.flex-xl-fill{flex:1 1 auto !important}.flex-xl-row{flex-direction:row !important}.flex-xl-column{flex-direction:column !important}.flex-xl-row-reverse{flex-direction:row-reverse !important}.flex-xl-column-reverse{flex-direction:column-reverse !important}.flex-xl-grow-0{flex-grow:0 !important}.flex-xl-grow-1{flex-grow:1 !important}.flex-xl-shrink-0{flex-shrink:0 !important}.flex-xl-shrink-1{flex-shrink:1 !important}.flex-xl-wrap{flex-wrap:wrap !important}.flex-xl-nowrap{flex-wrap:nowrap !important}.flex-xl-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-xl-start{justify-content:flex-start !important}.justify-content-xl-end{justify-content:flex-end !important}.justify-content-xl-center{justify-content:center !important}.justify-content-xl-between{justify-content:space-between !important}.justify-content-xl-around{justify-content:space-around !important}.justify-content-xl-evenly{justify-content:space-evenly !important}.align-items-xl-start{align-items:flex-start !important}.align-items-xl-end{align-items:flex-end !important}.align-items-xl-center{align-items:center !important}.align-items-xl-baseline{align-items:baseline !important}.align-items-xl-stretch{align-items:stretch !important}.align-content-xl-start{align-content:flex-start !important}.align-content-xl-end{align-content:flex-end !important}.align-content-xl-center{align-content:center !important}.align-content-xl-between{align-content:space-between !important}.align-content-xl-around{align-content:space-around !important}.align-content-xl-stretch{align-content:stretch !important}.align-self-xl-auto{align-self:auto !important}.align-self-xl-start{align-self:flex-start !important}.align-self-xl-end{align-self:flex-end !important}.align-self-xl-center{align-self:center !important}.align-self-xl-baseline{align-self:baseline !important}.align-self-xl-stretch{align-self:stretch !important}.order-xl-first{order:-1 !important}.order-xl-0{order:0 !important}.order-xl-1{order:1 !important}.order-xl-2{order:2 !important}.order-xl-3{order:3 !important}.order-xl-4{order:4 !important}.order-xl-5{order:5 !important}.order-xl-last{order:6 !important}.m-xl-0{margin:0 !important}.m-xl-1{margin:.25rem !important}.m-xl-2{margin:.5rem !important}.m-xl-3{margin:1rem !important}.m-xl-4{margin:1.5rem !important}.m-xl-5{margin:3rem !important}.m-xl-auto{margin:auto !important}.mx-xl-0{margin-right:0 !important;margin-left:0 !important}.mx-xl-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-xl-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-xl-3{margin-right:1rem !important;margin-left:1rem !important}.mx-xl-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-xl-5{margin-right:3rem !important;margin-left:3rem !important}.mx-xl-auto{margin-right:auto !important;margin-left:auto !important}.my-xl-0{margin-top:0 !important;margin-bottom:0 !important}.my-xl-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-xl-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-xl-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-xl-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-xl-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-xl-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-xl-0{margin-top:0 !important}.mt-xl-1{margin-top:.25rem !important}.mt-xl-2{margin-top:.5rem !important}.mt-xl-3{margin-top:1rem !important}.mt-xl-4{margin-top:1.5rem !important}.mt-xl-5{margin-top:3rem !important}.mt-xl-auto{margin-top:auto !important}.me-xl-0{margin-right:0 !important}.me-xl-1{margin-right:.25rem !important}.me-xl-2{margin-right:.5rem !important}.me-xl-3{margin-right:1rem !important}.me-xl-4{margin-right:1.5rem !important}.me-xl-5{margin-right:3rem !important}.me-xl-auto{margin-right:auto !important}.mb-xl-0{margin-bottom:0 !important}.mb-xl-1{margin-bottom:.25rem !important}.mb-xl-2{margin-bottom:.5rem !important}.mb-xl-3{margin-bottom:1rem !important}.mb-xl-4{margin-bottom:1.5rem !important}.mb-xl-5{margin-bottom:3rem !important}.mb-xl-auto{margin-bottom:auto !important}.ms-xl-0{margin-left:0 !important}.ms-xl-1{margin-left:.25rem !important}.ms-xl-2{margin-left:.5rem !important}.ms-xl-3{margin-left:1rem !important}.ms-xl-4{margin-left:1.5rem !important}.ms-xl-5{margin-left:3rem !important}.ms-xl-auto{margin-left:auto !important}.p-xl-0{padding:0 !important}.p-xl-1{padding:.25rem !important}.p-xl-2{padding:.5rem !important}.p-xl-3{padding:1rem !important}.p-xl-4{padding:1.5rem !important}.p-xl-5{padding:3rem !important}.px-xl-0{padding-right:0 !important;padding-left:0 !important}.px-xl-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-xl-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-xl-3{padding-right:1rem !important;padding-left:1rem !important}.px-xl-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-xl-5{padding-right:3rem !important;padding-left:3rem !important}.py-xl-0{padding-top:0 !important;padding-bottom:0 !important}.py-xl-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-xl-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-xl-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-xl-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-xl-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-xl-0{padding-top:0 !important}.pt-xl-1{padding-top:.25rem !important}.pt-xl-2{padding-top:.5rem !important}.pt-xl-3{padding-top:1rem !important}.pt-xl-4{padding-top:1.5rem !important}.pt-xl-5{padding-top:3rem !important}.pe-xl-0{padding-right:0 !important}.pe-xl-1{padding-right:.25rem !important}.pe-xl-2{padding-right:.5rem !important}.pe-xl-3{padding-right:1rem !important}.pe-xl-4{padding-right:1.5rem !important}.pe-xl-5{padding-right:3rem !important}.pb-xl-0{padding-bottom:0 !important}.pb-xl-1{padding-bottom:.25rem !important}.pb-xl-2{padding-bottom:.5rem !important}.pb-xl-3{padding-bottom:1rem !important}.pb-xl-4{padding-bottom:1.5rem !important}.pb-xl-5{padding-bottom:3rem !important}.ps-xl-0{padding-left:0 !important}.ps-xl-1{padding-left:.25rem !important}.ps-xl-2{padding-left:.5rem !important}.ps-xl-3{padding-left:1rem !important}.ps-xl-4{padding-left:1.5rem !important}.ps-xl-5{padding-left:3rem !important}.gap-xl-0{gap:0 !important}.gap-xl-1{gap:.25rem !important}.gap-xl-2{gap:.5rem !important}.gap-xl-3{gap:1rem !important}.gap-xl-4{gap:1.5rem !important}.gap-xl-5{gap:3rem !important}.row-gap-xl-0{row-gap:0 !important}.row-gap-xl-1{row-gap:.25rem !important}.row-gap-xl-2{row-gap:.5rem !important}.row-gap-xl-3{row-gap:1rem !important}.row-gap-xl-4{row-gap:1.5rem !important}.row-gap-xl-5{row-gap:3rem !important}.column-gap-xl-0{column-gap:0 !important}.column-gap-xl-1{column-gap:.25rem !important}.column-gap-xl-2{column-gap:.5rem !important}.column-gap-xl-3{column-gap:1rem !important}.column-gap-xl-4{column-gap:1.5rem !important}.column-gap-xl-5{column-gap:3rem !important}.text-xl-start{text-align:left !important}.text-xl-end{text-align:right !important}.text-xl-center{text-align:center !important}}@media(min-width: 1400px){.float-xxl-start{float:left !important}.float-xxl-end{float:right !important}.float-xxl-none{float:none !important}.object-fit-xxl-contain{object-fit:contain !important}.object-fit-xxl-cover{object-fit:cover !important}.object-fit-xxl-fill{object-fit:fill !important}.object-fit-xxl-scale{object-fit:scale-down !important}.object-fit-xxl-none{object-fit:none !important}.d-xxl-inline{display:inline !important}.d-xxl-inline-block{display:inline-block !important}.d-xxl-block{display:block !important}.d-xxl-grid{display:grid !important}.d-xxl-inline-grid{display:inline-grid !important}.d-xxl-table{display:table !important}.d-xxl-table-row{display:table-row !important}.d-xxl-table-cell{display:table-cell !important}.d-xxl-flex{display:flex !important}.d-xxl-inline-flex{display:inline-flex !important}.d-xxl-none{display:none !important}.flex-xxl-fill{flex:1 1 auto !important}.flex-xxl-row{flex-direction:row !important}.flex-xxl-column{flex-direction:column !important}.flex-xxl-row-reverse{flex-direction:row-reverse !important}.flex-xxl-column-reverse{flex-direction:column-reverse !important}.flex-xxl-grow-0{flex-grow:0 !important}.flex-xxl-grow-1{flex-grow:1 !important}.flex-xxl-shrink-0{flex-shrink:0 !important}.flex-xxl-shrink-1{flex-shrink:1 !important}.flex-xxl-wrap{flex-wrap:wrap !important}.flex-xxl-nowrap{flex-wrap:nowrap !important}.flex-xxl-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-xxl-start{justify-content:flex-start !important}.justify-content-xxl-end{justify-content:flex-end !important}.justify-content-xxl-center{justify-content:center !important}.justify-content-xxl-between{justify-content:space-between !important}.justify-content-xxl-around{justify-content:space-around !important}.justify-content-xxl-evenly{justify-content:space-evenly !important}.align-items-xxl-start{align-items:flex-start !important}.align-items-xxl-end{align-items:flex-end !important}.align-items-xxl-center{align-items:center !important}.align-items-xxl-baseline{align-items:baseline !important}.align-items-xxl-stretch{align-items:stretch !important}.align-content-xxl-start{align-content:flex-start !important}.align-content-xxl-end{align-content:flex-end !important}.align-content-xxl-center{align-content:center !important}.align-content-xxl-between{align-content:space-between !important}.align-content-xxl-around{align-content:space-around !important}.align-content-xxl-stretch{align-content:stretch !important}.align-self-xxl-auto{align-self:auto !important}.align-self-xxl-start{align-self:flex-start !important}.align-self-xxl-end{align-self:flex-end !important}.align-self-xxl-center{align-self:center !important}.align-self-xxl-baseline{align-self:baseline !important}.align-self-xxl-stretch{align-self:stretch !important}.order-xxl-first{order:-1 !important}.order-xxl-0{order:0 !important}.order-xxl-1{order:1 !important}.order-xxl-2{order:2 !important}.order-xxl-3{order:3 !important}.order-xxl-4{order:4 !important}.order-xxl-5{order:5 !important}.order-xxl-last{order:6 !important}.m-xxl-0{margin:0 !important}.m-xxl-1{margin:.25rem !important}.m-xxl-2{margin:.5rem !important}.m-xxl-3{margin:1rem !important}.m-xxl-4{margin:1.5rem !important}.m-xxl-5{margin:3rem !important}.m-xxl-auto{margin:auto !important}.mx-xxl-0{margin-right:0 !important;margin-left:0 !important}.mx-xxl-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-xxl-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-xxl-3{margin-right:1rem !important;margin-left:1rem !important}.mx-xxl-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-xxl-5{margin-right:3rem !important;margin-left:3rem !important}.mx-xxl-auto{margin-right:auto !important;margin-left:auto !important}.my-xxl-0{margin-top:0 !important;margin-bottom:0 !important}.my-xxl-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-xxl-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-xxl-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-xxl-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-xxl-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-xxl-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-xxl-0{margin-top:0 !important}.mt-xxl-1{margin-top:.25rem !important}.mt-xxl-2{margin-top:.5rem !important}.mt-xxl-3{margin-top:1rem !important}.mt-xxl-4{margin-top:1.5rem !important}.mt-xxl-5{margin-top:3rem !important}.mt-xxl-auto{margin-top:auto !important}.me-xxl-0{margin-right:0 !important}.me-xxl-1{margin-right:.25rem !important}.me-xxl-2{margin-right:.5rem !important}.me-xxl-3{margin-right:1rem !important}.me-xxl-4{margin-right:1.5rem !important}.me-xxl-5{margin-right:3rem !important}.me-xxl-auto{margin-right:auto !important}.mb-xxl-0{margin-bottom:0 !important}.mb-xxl-1{margin-bottom:.25rem !important}.mb-xxl-2{margin-bottom:.5rem !important}.mb-xxl-3{margin-bottom:1rem !important}.mb-xxl-4{margin-bottom:1.5rem !important}.mb-xxl-5{margin-bottom:3rem !important}.mb-xxl-auto{margin-bottom:auto !important}.ms-xxl-0{margin-left:0 !important}.ms-xxl-1{margin-left:.25rem !important}.ms-xxl-2{margin-left:.5rem !important}.ms-xxl-3{margin-left:1rem !important}.ms-xxl-4{margin-left:1.5rem !important}.ms-xxl-5{margin-left:3rem !important}.ms-xxl-auto{margin-left:auto !important}.p-xxl-0{padding:0 !important}.p-xxl-1{padding:.25rem !important}.p-xxl-2{padding:.5rem !important}.p-xxl-3{padding:1rem !important}.p-xxl-4{padding:1.5rem !important}.p-xxl-5{padding:3rem !important}.px-xxl-0{padding-right:0 !important;padding-left:0 !important}.px-xxl-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-xxl-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-xxl-3{padding-right:1rem !important;padding-left:1rem !important}.px-xxl-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-xxl-5{padding-right:3rem !important;padding-left:3rem !important}.py-xxl-0{padding-top:0 !important;padding-bottom:0 !important}.py-xxl-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-xxl-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-xxl-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-xxl-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-xxl-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-xxl-0{padding-top:0 !important}.pt-xxl-1{padding-top:.25rem !important}.pt-xxl-2{padding-top:.5rem !important}.pt-xxl-3{padding-top:1rem !important}.pt-xxl-4{padding-top:1.5rem !important}.pt-xxl-5{padding-top:3rem !important}.pe-xxl-0{padding-right:0 !important}.pe-xxl-1{padding-right:.25rem !important}.pe-xxl-2{padding-right:.5rem !important}.pe-xxl-3{padding-right:1rem !important}.pe-xxl-4{padding-right:1.5rem !important}.pe-xxl-5{padding-right:3rem !important}.pb-xxl-0{padding-bottom:0 !important}.pb-xxl-1{padding-bottom:.25rem !important}.pb-xxl-2{padding-bottom:.5rem !important}.pb-xxl-3{padding-bottom:1rem !important}.pb-xxl-4{padding-bottom:1.5rem !important}.pb-xxl-5{padding-bottom:3rem !important}.ps-xxl-0{padding-left:0 !important}.ps-xxl-1{padding-left:.25rem !important}.ps-xxl-2{padding-left:.5rem !important}.ps-xxl-3{padding-left:1rem !important}.ps-xxl-4{padding-left:1.5rem !important}.ps-xxl-5{padding-left:3rem !important}.gap-xxl-0{gap:0 !important}.gap-xxl-1{gap:.25rem !important}.gap-xxl-2{gap:.5rem !important}.gap-xxl-3{gap:1rem !important}.gap-xxl-4{gap:1.5rem !important}.gap-xxl-5{gap:3rem !important}.row-gap-xxl-0{row-gap:0 !important}.row-gap-xxl-1{row-gap:.25rem !important}.row-gap-xxl-2{row-gap:.5rem !important}.row-gap-xxl-3{row-gap:1rem !important}.row-gap-xxl-4{row-gap:1.5rem !important}.row-gap-xxl-5{row-gap:3rem !important}.column-gap-xxl-0{column-gap:0 !important}.column-gap-xxl-1{column-gap:.25rem !important}.column-gap-xxl-2{column-gap:.5rem !important}.column-gap-xxl-3{column-gap:1rem !important}.column-gap-xxl-4{column-gap:1.5rem !important}.column-gap-xxl-5{column-gap:3rem !important}.text-xxl-start{text-align:left !important}.text-xxl-end{text-align:right !important}.text-xxl-center{text-align:center !important}}.bg-default{color:#fff}.bg-primary{color:#fff}.bg-secondary{color:#fff}.bg-success{color:#fff}.bg-info{color:#fff}.bg-warning{color:#fff}.bg-danger{color:#fff}.bg-light{color:#000}.bg-dark{color:#fff}@media(min-width: 1200px){.fs-1{font-size:2rem !important}.fs-2{font-size:1.65rem !important}.fs-3{font-size:1.45rem !important}}@media print{.d-print-inline{display:inline !important}.d-print-inline-block{display:inline-block !important}.d-print-block{display:block !important}.d-print-grid{display:grid !important}.d-print-inline-grid{display:inline-grid !important}.d-print-table{display:table !important}.d-print-table-row{display:table-row !important}.d-print-table-cell{display:table-cell !important}.d-print-flex{display:flex !important}.d-print-inline-flex{display:inline-flex !important}.d-print-none{display:none !important}}:root{--bslib-spacer: 1rem;--bslib-mb-spacer: var(--bslib-spacer, 1rem)}.bslib-mb-spacing{margin-bottom:var(--bslib-mb-spacer)}.bslib-gap-spacing{gap:var(--bslib-mb-spacer)}.bslib-gap-spacing>.bslib-mb-spacing,.bslib-gap-spacing>.form-group,.bslib-gap-spacing>p,.bslib-gap-spacing>pre{margin-bottom:0}.html-fill-container>.html-fill-item.bslib-mb-spacing{margin-bottom:0}.tab-content>.tab-pane.html-fill-container{display:none}.tab-content>.active.html-fill-container{display:flex}.tab-content.html-fill-container{padding:0}.bg-blue{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-blue{--bslib-color-fg: #2780e3;color:var(--bslib-color-fg)}.bg-indigo{--bslib-color-bg: #6610f2;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-indigo{--bslib-color-fg: #6610f2;color:var(--bslib-color-fg)}.bg-purple{--bslib-color-bg: #613d7c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-purple{--bslib-color-fg: #613d7c;color:var(--bslib-color-fg)}.bg-pink{--bslib-color-bg: #e83e8c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-pink{--bslib-color-fg: #e83e8c;color:var(--bslib-color-fg)}.bg-red{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-red{--bslib-color-fg: #ff0039;color:var(--bslib-color-fg)}.bg-orange{--bslib-color-bg: #f0ad4e;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-orange{--bslib-color-fg: #f0ad4e;color:var(--bslib-color-fg)}.bg-yellow{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-yellow{--bslib-color-fg: #ff7518;color:var(--bslib-color-fg)}.bg-green{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-green{--bslib-color-fg: #3fb618;color:var(--bslib-color-fg)}.bg-teal{--bslib-color-bg: #20c997;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-teal{--bslib-color-fg: #20c997;color:var(--bslib-color-fg)}.bg-cyan{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-cyan{--bslib-color-fg: #9954bb;color:var(--bslib-color-fg)}.text-default{--bslib-color-fg: #343a40}.bg-default{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-primary{--bslib-color-fg: #2780e3}.bg-primary{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff}.text-secondary{--bslib-color-fg: #343a40}.bg-secondary{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-success{--bslib-color-fg: #3fb618}.bg-success{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff}.text-info{--bslib-color-fg: #9954bb}.bg-info{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff}.text-warning{--bslib-color-fg: #ff7518}.bg-warning{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff}.text-danger{--bslib-color-fg: #ff0039}.bg-danger{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff}.text-light{--bslib-color-fg: #f8f9fa}.bg-light{--bslib-color-bg: #f8f9fa;--bslib-color-fg: #000}.text-dark{--bslib-color-fg: #343a40}.bg-dark{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.bg-gradient-blue-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4053e9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4053e9;color:#fff}.bg-gradient-blue-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3e65ba;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3e65ba;color:#fff}.bg-gradient-blue-pink{--bslib-color-fg: #fff;--bslib-color-bg: #7466c0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #7466c0;color:#fff}.bg-gradient-blue-red{--bslib-color-fg: #fff;--bslib-color-bg: #7d4d9f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #7d4d9f;color:#fff}.bg-gradient-blue-orange{--bslib-color-fg: #fff;--bslib-color-bg: #7792a7;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #7792a7;color:#fff}.bg-gradient-blue-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #7d7c92;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #7d7c92;color:#fff}.bg-gradient-blue-green{--bslib-color-fg: #fff;--bslib-color-bg: #319692;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #319692;color:#fff}.bg-gradient-blue-teal{--bslib-color-fg: #fff;--bslib-color-bg: #249dc5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #249dc5;color:#fff}.bg-gradient-blue-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #556ed3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #556ed3;color:#fff}.bg-gradient-indigo-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4d3dec;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4d3dec;color:#fff}.bg-gradient-indigo-purple{--bslib-color-fg: #fff;--bslib-color-bg: #6422c3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #6422c3;color:#fff}.bg-gradient-indigo-pink{--bslib-color-fg: #fff;--bslib-color-bg: #9a22c9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #9a22c9;color:#fff}.bg-gradient-indigo-red{--bslib-color-fg: #fff;--bslib-color-bg: #a30aa8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a30aa8;color:#fff}.bg-gradient-indigo-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9d4fb0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9d4fb0;color:#fff}.bg-gradient-indigo-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a3389b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a3389b;color:#fff}.bg-gradient-indigo-green{--bslib-color-fg: #fff;--bslib-color-bg: #56529b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #56529b;color:#fff}.bg-gradient-indigo-teal{--bslib-color-fg: #fff;--bslib-color-bg: #4a5ace;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #4a5ace;color:#fff}.bg-gradient-indigo-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #7a2bdc;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #7a2bdc;color:#fff}.bg-gradient-purple-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4a58a5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4a58a5;color:#fff}.bg-gradient-purple-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #632bab;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #632bab;color:#fff}.bg-gradient-purple-pink{--bslib-color-fg: #fff;--bslib-color-bg: #973d82;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #973d82;color:#fff}.bg-gradient-purple-red{--bslib-color-fg: #fff;--bslib-color-bg: #a02561;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a02561;color:#fff}.bg-gradient-purple-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9a6a6a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9a6a6a;color:#fff}.bg-gradient-purple-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a05354;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a05354;color:#fff}.bg-gradient-purple-green{--bslib-color-fg: #fff;--bslib-color-bg: #536d54;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #536d54;color:#fff}.bg-gradient-purple-teal{--bslib-color-fg: #fff;--bslib-color-bg: #477587;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #477587;color:#fff}.bg-gradient-purple-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #774695;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #774695;color:#fff}.bg-gradient-pink-blue{--bslib-color-fg: #fff;--bslib-color-bg: #9b58af;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #9b58af;color:#fff}.bg-gradient-pink-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b42cb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b42cb5;color:#fff}.bg-gradient-pink-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b23e86;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b23e86;color:#fff}.bg-gradient-pink-red{--bslib-color-fg: #fff;--bslib-color-bg: #f1256b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f1256b;color:#fff}.bg-gradient-pink-orange{--bslib-color-fg: #fff;--bslib-color-bg: #eb6a73;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #eb6a73;color:#fff}.bg-gradient-pink-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #f1545e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f1545e;color:#fff}.bg-gradient-pink-green{--bslib-color-fg: #fff;--bslib-color-bg: #a46e5e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a46e5e;color:#fff}.bg-gradient-pink-teal{--bslib-color-fg: #fff;--bslib-color-bg: #987690;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #987690;color:#fff}.bg-gradient-pink-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #c8479f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #c8479f;color:#fff}.bg-gradient-red-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a9337d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a9337d;color:#fff}.bg-gradient-red-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c20683;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c20683;color:#fff}.bg-gradient-red-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c01854;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c01854;color:#fff}.bg-gradient-red-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f6195a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f6195a;color:#fff}.bg-gradient-red-orange{--bslib-color-fg: #fff;--bslib-color-bg: #f94541;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f94541;color:#fff}.bg-gradient-red-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #ff2f2c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #ff2f2c;color:#fff}.bg-gradient-red-green{--bslib-color-fg: #fff;--bslib-color-bg: #b2492c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b2492c;color:#fff}.bg-gradient-red-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6505f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6505f;color:#fff}.bg-gradient-red-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d6226d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d6226d;color:#fff}.bg-gradient-orange-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a09b8a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a09b8a;color:#fff}.bg-gradient-orange-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b96e90;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b96e90;color:#fff}.bg-gradient-orange-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b78060;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b78060;color:#fff}.bg-gradient-orange-pink{--bslib-color-fg: #fff;--bslib-color-bg: #ed8167;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #ed8167;color:#fff}.bg-gradient-orange-red{--bslib-color-fg: #fff;--bslib-color-bg: #f66846;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f66846;color:#fff}.bg-gradient-orange-yellow{--bslib-color-fg: #000;--bslib-color-bg: #f69738;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f69738;color:#000}.bg-gradient-orange-green{--bslib-color-fg: #000;--bslib-color-bg: #a9b138;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a9b138;color:#000}.bg-gradient-orange-teal{--bslib-color-fg: #000;--bslib-color-bg: #9db86b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #9db86b;color:#000}.bg-gradient-orange-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #cd897a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #cd897a;color:#fff}.bg-gradient-yellow-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a97969;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a97969;color:#fff}.bg-gradient-yellow-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c24d6f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c24d6f;color:#fff}.bg-gradient-yellow-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c05f40;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c05f40;color:#fff}.bg-gradient-yellow-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f65f46;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f65f46;color:#fff}.bg-gradient-yellow-red{--bslib-color-fg: #fff;--bslib-color-bg: #ff4625;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #ff4625;color:#fff}.bg-gradient-yellow-orange{--bslib-color-fg: #000;--bslib-color-bg: #f98b2e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f98b2e;color:#000}.bg-gradient-yellow-green{--bslib-color-fg: #fff;--bslib-color-bg: #b28f18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b28f18;color:#fff}.bg-gradient-yellow-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6974b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6974b;color:#fff}.bg-gradient-yellow-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d66859;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d66859;color:#fff}.bg-gradient-green-blue{--bslib-color-fg: #fff;--bslib-color-bg: #35a069;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #35a069;color:#fff}.bg-gradient-green-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4f746f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4f746f;color:#fff}.bg-gradient-green-purple{--bslib-color-fg: #fff;--bslib-color-bg: #4d8640;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #4d8640;color:#fff}.bg-gradient-green-pink{--bslib-color-fg: #fff;--bslib-color-bg: #838646;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #838646;color:#fff}.bg-gradient-green-red{--bslib-color-fg: #fff;--bslib-color-bg: #8c6d25;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #8c6d25;color:#fff}.bg-gradient-green-orange{--bslib-color-fg: #000;--bslib-color-bg: #86b22e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #86b22e;color:#000}.bg-gradient-green-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #8c9c18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #8c9c18;color:#fff}.bg-gradient-green-teal{--bslib-color-fg: #000;--bslib-color-bg: #33be4b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #33be4b;color:#000}.bg-gradient-green-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #638f59;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #638f59;color:#fff}.bg-gradient-teal-blue{--bslib-color-fg: #fff;--bslib-color-bg: #23acb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #23acb5;color:#fff}.bg-gradient-teal-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #3c7fbb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #3c7fbb;color:#fff}.bg-gradient-teal-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3a918c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3a918c;color:#fff}.bg-gradient-teal-pink{--bslib-color-fg: #fff;--bslib-color-bg: #709193;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #709193;color:#fff}.bg-gradient-teal-red{--bslib-color-fg: #fff;--bslib-color-bg: #797971;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #797971;color:#fff}.bg-gradient-teal-orange{--bslib-color-fg: #000;--bslib-color-bg: #73be7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #73be7a;color:#000}.bg-gradient-teal-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #79a764;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #79a764;color:#fff}.bg-gradient-teal-green{--bslib-color-fg: #000;--bslib-color-bg: #2cc164;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #2cc164;color:#000}.bg-gradient-teal-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #509aa5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #509aa5;color:#fff}.bg-gradient-cyan-blue{--bslib-color-fg: #fff;--bslib-color-bg: #6b66cb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #6b66cb;color:#fff}.bg-gradient-cyan-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #8539d1;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #8539d1;color:#fff}.bg-gradient-cyan-purple{--bslib-color-fg: #fff;--bslib-color-bg: #834ba2;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #834ba2;color:#fff}.bg-gradient-cyan-pink{--bslib-color-fg: #fff;--bslib-color-bg: #b94ba8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #b94ba8;color:#fff}.bg-gradient-cyan-red{--bslib-color-fg: #fff;--bslib-color-bg: #c23287;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #c23287;color:#fff}.bg-gradient-cyan-orange{--bslib-color-fg: #fff;--bslib-color-bg: #bc788f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #bc788f;color:#fff}.bg-gradient-cyan-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #c2617a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #c2617a;color:#fff}.bg-gradient-cyan-green{--bslib-color-fg: #fff;--bslib-color-bg: #757b7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #757b7a;color:#fff}.bg-gradient-cyan-teal{--bslib-color-fg: #fff;--bslib-color-bg: #6983ad;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #6983ad;color:#fff}.bg-blue{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-blue{--bslib-color-fg: #2780e3;color:var(--bslib-color-fg)}.bg-indigo{--bslib-color-bg: #6610f2;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-indigo{--bslib-color-fg: #6610f2;color:var(--bslib-color-fg)}.bg-purple{--bslib-color-bg: #613d7c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-purple{--bslib-color-fg: #613d7c;color:var(--bslib-color-fg)}.bg-pink{--bslib-color-bg: #e83e8c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-pink{--bslib-color-fg: #e83e8c;color:var(--bslib-color-fg)}.bg-red{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-red{--bslib-color-fg: #ff0039;color:var(--bslib-color-fg)}.bg-orange{--bslib-color-bg: #f0ad4e;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-orange{--bslib-color-fg: #f0ad4e;color:var(--bslib-color-fg)}.bg-yellow{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-yellow{--bslib-color-fg: #ff7518;color:var(--bslib-color-fg)}.bg-green{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-green{--bslib-color-fg: #3fb618;color:var(--bslib-color-fg)}.bg-teal{--bslib-color-bg: #20c997;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-teal{--bslib-color-fg: #20c997;color:var(--bslib-color-fg)}.bg-cyan{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-cyan{--bslib-color-fg: #9954bb;color:var(--bslib-color-fg)}.text-default{--bslib-color-fg: #343a40}.bg-default{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-primary{--bslib-color-fg: #2780e3}.bg-primary{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff}.text-secondary{--bslib-color-fg: #343a40}.bg-secondary{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-success{--bslib-color-fg: #3fb618}.bg-success{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff}.text-info{--bslib-color-fg: #9954bb}.bg-info{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff}.text-warning{--bslib-color-fg: #ff7518}.bg-warning{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff}.text-danger{--bslib-color-fg: #ff0039}.bg-danger{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff}.text-light{--bslib-color-fg: #f8f9fa}.bg-light{--bslib-color-bg: #f8f9fa;--bslib-color-fg: #000}.text-dark{--bslib-color-fg: #343a40}.bg-dark{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.bg-gradient-blue-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4053e9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4053e9;color:#fff}.bg-gradient-blue-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3e65ba;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3e65ba;color:#fff}.bg-gradient-blue-pink{--bslib-color-fg: #fff;--bslib-color-bg: #7466c0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #7466c0;color:#fff}.bg-gradient-blue-red{--bslib-color-fg: #fff;--bslib-color-bg: #7d4d9f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #7d4d9f;color:#fff}.bg-gradient-blue-orange{--bslib-color-fg: #fff;--bslib-color-bg: #7792a7;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #7792a7;color:#fff}.bg-gradient-blue-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #7d7c92;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #7d7c92;color:#fff}.bg-gradient-blue-green{--bslib-color-fg: #fff;--bslib-color-bg: #319692;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #319692;color:#fff}.bg-gradient-blue-teal{--bslib-color-fg: #fff;--bslib-color-bg: #249dc5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #249dc5;color:#fff}.bg-gradient-blue-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #556ed3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #556ed3;color:#fff}.bg-gradient-indigo-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4d3dec;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4d3dec;color:#fff}.bg-gradient-indigo-purple{--bslib-color-fg: #fff;--bslib-color-bg: #6422c3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #6422c3;color:#fff}.bg-gradient-indigo-pink{--bslib-color-fg: #fff;--bslib-color-bg: #9a22c9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #9a22c9;color:#fff}.bg-gradient-indigo-red{--bslib-color-fg: #fff;--bslib-color-bg: #a30aa8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a30aa8;color:#fff}.bg-gradient-indigo-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9d4fb0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9d4fb0;color:#fff}.bg-gradient-indigo-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a3389b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a3389b;color:#fff}.bg-gradient-indigo-green{--bslib-color-fg: #fff;--bslib-color-bg: #56529b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #56529b;color:#fff}.bg-gradient-indigo-teal{--bslib-color-fg: #fff;--bslib-color-bg: #4a5ace;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #4a5ace;color:#fff}.bg-gradient-indigo-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #7a2bdc;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #7a2bdc;color:#fff}.bg-gradient-purple-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4a58a5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4a58a5;color:#fff}.bg-gradient-purple-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #632bab;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #632bab;color:#fff}.bg-gradient-purple-pink{--bslib-color-fg: #fff;--bslib-color-bg: #973d82;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #973d82;color:#fff}.bg-gradient-purple-red{--bslib-color-fg: #fff;--bslib-color-bg: #a02561;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a02561;color:#fff}.bg-gradient-purple-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9a6a6a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9a6a6a;color:#fff}.bg-gradient-purple-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a05354;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a05354;color:#fff}.bg-gradient-purple-green{--bslib-color-fg: #fff;--bslib-color-bg: #536d54;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #536d54;color:#fff}.bg-gradient-purple-teal{--bslib-color-fg: #fff;--bslib-color-bg: #477587;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #477587;color:#fff}.bg-gradient-purple-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #774695;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #774695;color:#fff}.bg-gradient-pink-blue{--bslib-color-fg: #fff;--bslib-color-bg: #9b58af;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #9b58af;color:#fff}.bg-gradient-pink-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b42cb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b42cb5;color:#fff}.bg-gradient-pink-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b23e86;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b23e86;color:#fff}.bg-gradient-pink-red{--bslib-color-fg: #fff;--bslib-color-bg: #f1256b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f1256b;color:#fff}.bg-gradient-pink-orange{--bslib-color-fg: #fff;--bslib-color-bg: #eb6a73;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #eb6a73;color:#fff}.bg-gradient-pink-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #f1545e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f1545e;color:#fff}.bg-gradient-pink-green{--bslib-color-fg: #fff;--bslib-color-bg: #a46e5e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a46e5e;color:#fff}.bg-gradient-pink-teal{--bslib-color-fg: #fff;--bslib-color-bg: #987690;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #987690;color:#fff}.bg-gradient-pink-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #c8479f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #c8479f;color:#fff}.bg-gradient-red-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a9337d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a9337d;color:#fff}.bg-gradient-red-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c20683;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c20683;color:#fff}.bg-gradient-red-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c01854;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c01854;color:#fff}.bg-gradient-red-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f6195a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f6195a;color:#fff}.bg-gradient-red-orange{--bslib-color-fg: #fff;--bslib-color-bg: #f94541;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f94541;color:#fff}.bg-gradient-red-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #ff2f2c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #ff2f2c;color:#fff}.bg-gradient-red-green{--bslib-color-fg: #fff;--bslib-color-bg: #b2492c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b2492c;color:#fff}.bg-gradient-red-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6505f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6505f;color:#fff}.bg-gradient-red-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d6226d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d6226d;color:#fff}.bg-gradient-orange-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a09b8a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a09b8a;color:#fff}.bg-gradient-orange-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b96e90;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b96e90;color:#fff}.bg-gradient-orange-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b78060;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b78060;color:#fff}.bg-gradient-orange-pink{--bslib-color-fg: #fff;--bslib-color-bg: #ed8167;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #ed8167;color:#fff}.bg-gradient-orange-red{--bslib-color-fg: #fff;--bslib-color-bg: #f66846;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f66846;color:#fff}.bg-gradient-orange-yellow{--bslib-color-fg: #000;--bslib-color-bg: #f69738;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f69738;color:#000}.bg-gradient-orange-green{--bslib-color-fg: #000;--bslib-color-bg: #a9b138;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a9b138;color:#000}.bg-gradient-orange-teal{--bslib-color-fg: #000;--bslib-color-bg: #9db86b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #9db86b;color:#000}.bg-gradient-orange-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #cd897a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #cd897a;color:#fff}.bg-gradient-yellow-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a97969;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a97969;color:#fff}.bg-gradient-yellow-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c24d6f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c24d6f;color:#fff}.bg-gradient-yellow-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c05f40;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c05f40;color:#fff}.bg-gradient-yellow-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f65f46;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f65f46;color:#fff}.bg-gradient-yellow-red{--bslib-color-fg: #fff;--bslib-color-bg: #ff4625;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #ff4625;color:#fff}.bg-gradient-yellow-orange{--bslib-color-fg: #000;--bslib-color-bg: #f98b2e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f98b2e;color:#000}.bg-gradient-yellow-green{--bslib-color-fg: #fff;--bslib-color-bg: #b28f18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b28f18;color:#fff}.bg-gradient-yellow-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6974b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6974b;color:#fff}.bg-gradient-yellow-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d66859;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d66859;color:#fff}.bg-gradient-green-blue{--bslib-color-fg: #fff;--bslib-color-bg: #35a069;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #35a069;color:#fff}.bg-gradient-green-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4f746f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4f746f;color:#fff}.bg-gradient-green-purple{--bslib-color-fg: #fff;--bslib-color-bg: #4d8640;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #4d8640;color:#fff}.bg-gradient-green-pink{--bslib-color-fg: #fff;--bslib-color-bg: #838646;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #838646;color:#fff}.bg-gradient-green-red{--bslib-color-fg: #fff;--bslib-color-bg: #8c6d25;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #8c6d25;color:#fff}.bg-gradient-green-orange{--bslib-color-fg: #000;--bslib-color-bg: #86b22e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #86b22e;color:#000}.bg-gradient-green-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #8c9c18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #8c9c18;color:#fff}.bg-gradient-green-teal{--bslib-color-fg: #000;--bslib-color-bg: #33be4b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #33be4b;color:#000}.bg-gradient-green-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #638f59;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #638f59;color:#fff}.bg-gradient-teal-blue{--bslib-color-fg: #fff;--bslib-color-bg: #23acb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #23acb5;color:#fff}.bg-gradient-teal-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #3c7fbb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #3c7fbb;color:#fff}.bg-gradient-teal-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3a918c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3a918c;color:#fff}.bg-gradient-teal-pink{--bslib-color-fg: #fff;--bslib-color-bg: #709193;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #709193;color:#fff}.bg-gradient-teal-red{--bslib-color-fg: #fff;--bslib-color-bg: #797971;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #797971;color:#fff}.bg-gradient-teal-orange{--bslib-color-fg: #000;--bslib-color-bg: #73be7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #73be7a;color:#000}.bg-gradient-teal-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #79a764;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #79a764;color:#fff}.bg-gradient-teal-green{--bslib-color-fg: #000;--bslib-color-bg: #2cc164;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #2cc164;color:#000}.bg-gradient-teal-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #509aa5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #509aa5;color:#fff}.bg-gradient-cyan-blue{--bslib-color-fg: #fff;--bslib-color-bg: #6b66cb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #6b66cb;color:#fff}.bg-gradient-cyan-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #8539d1;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #8539d1;color:#fff}.bg-gradient-cyan-purple{--bslib-color-fg: #fff;--bslib-color-bg: #834ba2;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #834ba2;color:#fff}.bg-gradient-cyan-pink{--bslib-color-fg: #fff;--bslib-color-bg: #b94ba8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #b94ba8;color:#fff}.bg-gradient-cyan-red{--bslib-color-fg: #fff;--bslib-color-bg: #c23287;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #c23287;color:#fff}.bg-gradient-cyan-orange{--bslib-color-fg: #fff;--bslib-color-bg: #bc788f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #bc788f;color:#fff}.bg-gradient-cyan-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #c2617a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #c2617a;color:#fff}.bg-gradient-cyan-green{--bslib-color-fg: #fff;--bslib-color-bg: #757b7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #757b7a;color:#fff}.bg-gradient-cyan-teal{--bslib-color-fg: #fff;--bslib-color-bg: #6983ad;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #6983ad;color:#fff}.tab-content>.tab-pane.html-fill-container{display:none}.tab-content>.active.html-fill-container{display:flex}.tab-content.html-fill-container{padding:0}:root{--bslib-spacer: 1rem;--bslib-mb-spacer: var(--bslib-spacer, 1rem)}.bslib-mb-spacing{margin-bottom:var(--bslib-mb-spacer)}.bslib-gap-spacing{gap:var(--bslib-mb-spacer)}.bslib-gap-spacing>.bslib-mb-spacing,.bslib-gap-spacing>.form-group,.bslib-gap-spacing>p,.bslib-gap-spacing>pre{margin-bottom:0}.html-fill-container>.html-fill-item.bslib-mb-spacing{margin-bottom:0}.bslib-grid{display:grid !important;gap:var(--bslib-spacer, 1rem);height:var(--bslib-grid-height)}.bslib-grid.grid{grid-template-columns:repeat(var(--bs-columns, 12), minmax(0, 1fr));grid-template-rows:unset;grid-auto-rows:var(--bslib-grid--row-heights);--bslib-grid--row-heights--xs: unset;--bslib-grid--row-heights--sm: unset;--bslib-grid--row-heights--md: unset;--bslib-grid--row-heights--lg: unset;--bslib-grid--row-heights--xl: unset;--bslib-grid--row-heights--xxl: unset}.bslib-grid.grid.bslib-grid--row-heights--xs{--bslib-grid--row-heights: var(--bslib-grid--row-heights--xs)}@media(min-width: 576px){.bslib-grid.grid.bslib-grid--row-heights--sm{--bslib-grid--row-heights: var(--bslib-grid--row-heights--sm)}}@media(min-width: 768px){.bslib-grid.grid.bslib-grid--row-heights--md{--bslib-grid--row-heights: var(--bslib-grid--row-heights--md)}}@media(min-width: 992px){.bslib-grid.grid.bslib-grid--row-heights--lg{--bslib-grid--row-heights: var(--bslib-grid--row-heights--lg)}}@media(min-width: 1200px){.bslib-grid.grid.bslib-grid--row-heights--xl{--bslib-grid--row-heights: var(--bslib-grid--row-heights--xl)}}@media(min-width: 1400px){.bslib-grid.grid.bslib-grid--row-heights--xxl{--bslib-grid--row-heights: var(--bslib-grid--row-heights--xxl)}}.bslib-grid>*>.shiny-input-container{width:100%}.bslib-grid-item{grid-column:auto/span 1}@media(max-width: 767.98px){.bslib-grid-item{grid-column:1/-1}}@media(max-width: 575.98px){.bslib-grid{grid-template-columns:1fr !important;height:var(--bslib-grid-height-mobile)}.bslib-grid.grid{height:unset !important;grid-auto-rows:var(--bslib-grid--row-heights--xs, auto)}}.bslib-card{overflow:auto}.bslib-card .card-body+.card-body{padding-top:0}.bslib-card .card-body{overflow:auto}.bslib-card .card-body p{margin-top:0}.bslib-card .card-body p:last-child{margin-bottom:0}.bslib-card .card-body{max-height:var(--bslib-card-body-max-height, none)}.bslib-card[data-full-screen=true]>.card-body{max-height:var(--bslib-card-body-max-height-full-screen, none)}.bslib-card .card-header .form-group{margin-bottom:0}.bslib-card .card-header .selectize-control{margin-bottom:0}.bslib-card .card-header .selectize-control .item{margin-right:1.15rem}.bslib-card .card-footer{margin-top:auto}.bslib-card .bslib-navs-card-title{display:flex;flex-wrap:wrap;justify-content:space-between;align-items:center}.bslib-card .bslib-navs-card-title .nav{margin-left:auto}.bslib-card .bslib-sidebar-layout:not([data-bslib-sidebar-border=true]){border:none}.bslib-card .bslib-sidebar-layout:not([data-bslib-sidebar-border-radius=true]){border-top-left-radius:0;border-top-right-radius:0}[data-full-screen=true]{position:fixed;inset:3.5rem 1rem 1rem;height:auto !important;max-height:none !important;width:auto !important;z-index:1070}.bslib-full-screen-enter{display:none;position:absolute;bottom:var(--bslib-full-screen-enter-bottom, 0.2rem);right:var(--bslib-full-screen-enter-right, 0);top:var(--bslib-full-screen-enter-top);left:var(--bslib-full-screen-enter-left);color:var(--bslib-color-fg, var(--bs-card-color));background-color:var(--bslib-color-bg, var(--bs-card-bg, var(--bs-body-bg)));border:var(--bs-card-border-width) solid var(--bslib-color-fg, var(--bs-card-border-color));box-shadow:0 2px 4px rgba(0,0,0,.15);margin:.2rem .4rem;padding:.55rem !important;font-size:.8rem;cursor:pointer;opacity:.7;z-index:1070}.bslib-full-screen-enter:hover{opacity:1}.card[data-full-screen=false]:hover>*>.bslib-full-screen-enter{display:block}.bslib-has-full-screen .card:hover>*>.bslib-full-screen-enter{display:none}@media(max-width: 575.98px){.bslib-full-screen-enter{display:none !important}}.bslib-full-screen-exit{position:relative;top:1.35rem;font-size:.9rem;cursor:pointer;text-decoration:none;display:flex;float:right;margin-right:2.15rem;align-items:center;color:rgba(var(--bs-body-bg-rgb), 0.8)}.bslib-full-screen-exit:hover{color:rgba(var(--bs-body-bg-rgb), 1)}.bslib-full-screen-exit svg{margin-left:.5rem;font-size:1.5rem}#bslib-full-screen-overlay{position:fixed;inset:0;background-color:rgba(var(--bs-body-color-rgb), 0.6);backdrop-filter:blur(2px);-webkit-backdrop-filter:blur(2px);z-index:1069;animation:bslib-full-screen-overlay-enter 400ms cubic-bezier(0.6, 0.02, 0.65, 1) forwards}@keyframes bslib-full-screen-overlay-enter{0%{opacity:0}100%{opacity:1}}:root{--bslib-value-box-shadow: none;--bslib-value-box-border-width-auto-yes: var(--bslib-value-box-border-width-baseline);--bslib-value-box-border-width-auto-no: 0;--bslib-value-box-border-width-baseline: 1px}.bslib-value-box{border-width:var(--bslib-value-box-border-width-auto-no, var(--bslib-value-box-border-width-baseline));container-name:bslib-value-box;container-type:inline-size}.bslib-value-box.card{box-shadow:var(--bslib-value-box-shadow)}.bslib-value-box.border-auto{border-width:var(--bslib-value-box-border-width-auto-yes, var(--bslib-value-box-border-width-baseline))}.bslib-value-box.default{--bslib-value-box-bg-default: var(--bs-card-bg, #FEFBF2);--bslib-value-box-border-color-default: var(--bs-card-border-color, rgba(0, 0, 0, 0.175));color:var(--bslib-value-box-color);background-color:var(--bslib-value-box-bg, var(--bslib-value-box-bg-default));border-color:var(--bslib-value-box-border-color, var(--bslib-value-box-border-color-default))}.bslib-value-box .value-box-grid{display:grid;grid-template-areas:"left right";align-items:center;overflow:hidden}.bslib-value-box .value-box-showcase{height:100%;max-height:var(---bslib-value-box-showcase-max-h, 100%)}.bslib-value-box .value-box-showcase,.bslib-value-box .value-box-showcase>.html-fill-item{width:100%}.bslib-value-box[data-full-screen=true] .value-box-showcase{max-height:var(---bslib-value-box-showcase-max-h-fs, 100%)}@media screen and (min-width: 575.98px){@container bslib-value-box (max-width: 300px){.bslib-value-box:not(.showcase-bottom) .value-box-grid{grid-template-columns:1fr !important;grid-template-rows:auto auto;grid-template-areas:"top" "bottom"}.bslib-value-box:not(.showcase-bottom) .value-box-grid .value-box-showcase{grid-area:top !important}.bslib-value-box:not(.showcase-bottom) .value-box-grid .value-box-area{grid-area:bottom !important;justify-content:end}}}.bslib-value-box .value-box-area{justify-content:center;padding:1.5rem 1rem;font-size:.9rem;font-weight:500}.bslib-value-box .value-box-area *{margin-bottom:0;margin-top:0}.bslib-value-box .value-box-title{font-size:1rem;margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2}.bslib-value-box .value-box-title:empty::after{content:" "}.bslib-value-box .value-box-value{font-size:calc(1.29rem + 0.48vw);margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2}@media(min-width: 1200px){.bslib-value-box .value-box-value{font-size:1.65rem}}.bslib-value-box .value-box-value:empty::after{content:" "}.bslib-value-box .value-box-showcase{align-items:center;justify-content:center;margin-top:auto;margin-bottom:auto;padding:1rem}.bslib-value-box .value-box-showcase .bi,.bslib-value-box .value-box-showcase .fa,.bslib-value-box .value-box-showcase .fab,.bslib-value-box .value-box-showcase .fas,.bslib-value-box .value-box-showcase .far{opacity:.85;min-width:50px;max-width:125%}.bslib-value-box .value-box-showcase .bi,.bslib-value-box .value-box-showcase .fa,.bslib-value-box .value-box-showcase .fab,.bslib-value-box .value-box-showcase .fas,.bslib-value-box .value-box-showcase .far{font-size:4rem}.bslib-value-box.showcase-top-right .value-box-grid{grid-template-columns:1fr var(---bslib-value-box-showcase-w, 50%)}.bslib-value-box.showcase-top-right .value-box-grid .value-box-showcase{grid-area:right;margin-left:auto;align-self:start;align-items:end;padding-left:0;padding-bottom:0}.bslib-value-box.showcase-top-right .value-box-grid .value-box-area{grid-area:left;align-self:end}.bslib-value-box.showcase-top-right[data-full-screen=true] .value-box-grid{grid-template-columns:auto var(---bslib-value-box-showcase-w-fs, 1fr)}.bslib-value-box.showcase-top-right[data-full-screen=true] .value-box-grid>div{align-self:center}.bslib-value-box.showcase-top-right:not([data-full-screen=true]) .value-box-showcase{margin-top:0}@container bslib-value-box (max-width: 300px){.bslib-value-box.showcase-top-right:not([data-full-screen=true]) .value-box-grid .value-box-showcase{padding-left:1rem}}.bslib-value-box.showcase-left-center .value-box-grid{grid-template-columns:var(---bslib-value-box-showcase-w, 30%) auto}.bslib-value-box.showcase-left-center[data-full-screen=true] .value-box-grid{grid-template-columns:var(---bslib-value-box-showcase-w-fs, 1fr) auto}.bslib-value-box.showcase-left-center:not([data-fill-screen=true]) .value-box-grid .value-box-showcase{grid-area:left}.bslib-value-box.showcase-left-center:not([data-fill-screen=true]) .value-box-grid .value-box-area{grid-area:right}.bslib-value-box.showcase-bottom .value-box-grid{grid-template-columns:1fr;grid-template-rows:1fr var(---bslib-value-box-showcase-h, auto);grid-template-areas:"top" "bottom";overflow:hidden}.bslib-value-box.showcase-bottom .value-box-grid .value-box-showcase{grid-area:bottom;padding:0;margin:0}.bslib-value-box.showcase-bottom .value-box-grid .value-box-area{grid-area:top}.bslib-value-box.showcase-bottom[data-full-screen=true] .value-box-grid{grid-template-rows:1fr var(---bslib-value-box-showcase-h-fs, 2fr)}.bslib-value-box.showcase-bottom[data-full-screen=true] .value-box-grid .value-box-showcase{padding:1rem}[data-bs-theme=dark] .bslib-value-box{--bslib-value-box-shadow: 0 0.5rem 1rem rgb(0 0 0 / 50%)}:root{--bslib-page-sidebar-title-bg: #052744;--bslib-page-sidebar-title-color: #fff}.bslib-page-title{background-color:var(--bslib-page-sidebar-title-bg);color:var(--bslib-page-sidebar-title-color);font-size:1.25rem;font-weight:300;padding:var(--bslib-spacer, 1rem);padding-left:1.5rem;margin-bottom:0;border-bottom:1px solid #dee2e6}html{height:100%}.bslib-page-fill{width:100%;height:100%;margin:0;padding:var(--bslib-spacer, 1rem);gap:var(--bslib-spacer, 1rem)}@media(max-width: 575.98px){.bslib-page-fill{height:var(--bslib-page-fill-mobile-height, auto)}}@media(min-width: 576px){.nav:not(.nav-hidden){display:flex !important;display:-webkit-flex !important}.nav:not(.nav-hidden):not(.nav-stacked):not(.flex-column){float:none !important}.nav:not(.nav-hidden):not(.nav-stacked):not(.flex-column)>.bslib-nav-spacer{margin-left:auto !important}.nav:not(.nav-hidden):not(.nav-stacked):not(.flex-column)>.form-inline{margin-top:auto;margin-bottom:auto}.nav:not(.nav-hidden).nav-stacked{flex-direction:column;-webkit-flex-direction:column;height:100%}.nav:not(.nav-hidden).nav-stacked>.bslib-nav-spacer{margin-top:auto !important}}.accordion .accordion-header{font-size:calc(1.29rem + 0.48vw);margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2;color:var(--bs-heading-color);margin-bottom:0}@media(min-width: 1200px){.accordion .accordion-header{font-size:1.65rem}}.accordion .accordion-icon:not(:empty){margin-right:.75rem;display:flex}.accordion .accordion-button:not(.collapsed){box-shadow:none}.accordion .accordion-button:not(.collapsed):focus{box-shadow:var(--bs-accordion-btn-focus-box-shadow)}.bslib-sidebar-layout{--bslib-sidebar-transition-duration: 500ms;--bslib-sidebar-transition-easing-x: cubic-bezier(0.8, 0.78, 0.22, 1.07);--bslib-sidebar-border: var(--bs-card-border-width, 1px) solid var(--bs-card-border-color, rgba(0, 0, 0, 0.175));--bslib-sidebar-border-radius: var(--bs-border-radius);--bslib-sidebar-vert-border: var(--bs-card-border-width, 1px) solid var(--bs-card-border-color, rgba(0, 0, 0, 0.175));--bslib-sidebar-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.05);--bslib-sidebar-fg: var(--bs-emphasis-color, black);--bslib-sidebar-main-fg: var(--bs-card-color, var(--bs-body-color));--bslib-sidebar-main-bg: var(--bs-card-bg, var(--bs-body-bg));--bslib-sidebar-toggle-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.1);--bslib-sidebar-padding: calc(var(--bslib-spacer) * 1.5);--bslib-sidebar-icon-size: var(--bslib-spacer, 1rem);--bslib-sidebar-icon-button-size: calc(var(--bslib-sidebar-icon-size, 1rem) * 2);--bslib-sidebar-padding-icon: calc(var(--bslib-sidebar-icon-button-size, 2rem) * 1.5);--bslib-collapse-toggle-border-radius: var(--bs-border-radius, 0.25rem);--bslib-collapse-toggle-transform: 0deg;--bslib-sidebar-toggle-transition-easing: cubic-bezier(1, 0, 0, 1);--bslib-collapse-toggle-right-transform: 180deg;--bslib-sidebar-column-main: minmax(0, 1fr);display:grid !important;grid-template-columns:min(100% - var(--bslib-sidebar-icon-size),var(--bslib-sidebar-width, 250px)) var(--bslib-sidebar-column-main);position:relative;transition:grid-template-columns ease-in-out var(--bslib-sidebar-transition-duration);border:var(--bslib-sidebar-border);border-radius:var(--bslib-sidebar-border-radius)}@media(prefers-reduced-motion: reduce){.bslib-sidebar-layout{transition:none}}.bslib-sidebar-layout[data-bslib-sidebar-border=false]{border:none}.bslib-sidebar-layout[data-bslib-sidebar-border-radius=false]{border-radius:initial}.bslib-sidebar-layout>.main,.bslib-sidebar-layout>.sidebar{grid-row:1/2;border-radius:inherit;overflow:auto}.bslib-sidebar-layout>.main{grid-column:2/3;border-top-left-radius:0;border-bottom-left-radius:0;padding:var(--bslib-sidebar-padding);transition:padding var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration);color:var(--bslib-sidebar-main-fg);background-color:var(--bslib-sidebar-main-bg)}.bslib-sidebar-layout>.sidebar{grid-column:1/2;width:100%;height:100%;border-right:var(--bslib-sidebar-vert-border);border-top-right-radius:0;border-bottom-right-radius:0;color:var(--bslib-sidebar-fg);background-color:var(--bslib-sidebar-bg);backdrop-filter:blur(5px)}.bslib-sidebar-layout>.sidebar>.sidebar-content{display:flex;flex-direction:column;gap:var(--bslib-spacer, 1rem);padding:var(--bslib-sidebar-padding);padding-top:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout>.sidebar>.sidebar-content>:last-child:not(.sidebar-title){margin-bottom:0}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion{margin-left:calc(-1*var(--bslib-sidebar-padding));margin-right:calc(-1*var(--bslib-sidebar-padding))}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:last-child{margin-bottom:calc(-1*var(--bslib-sidebar-padding))}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:not(:last-child){margin-bottom:1rem}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion .accordion-body{display:flex;flex-direction:column}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:not(:first-child) .accordion-item:first-child{border-top:var(--bs-accordion-border-width) solid var(--bs-accordion-border-color)}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:not(:last-child) .accordion-item:last-child{border-bottom:var(--bs-accordion-border-width) solid var(--bs-accordion-border-color)}.bslib-sidebar-layout>.sidebar>.sidebar-content.has-accordion>.sidebar-title{border-bottom:none;padding-bottom:0}.bslib-sidebar-layout>.sidebar .shiny-input-container{width:100%}.bslib-sidebar-layout[data-bslib-sidebar-open=always]>.sidebar>.sidebar-content{padding-top:var(--bslib-sidebar-padding)}.bslib-sidebar-layout>.collapse-toggle{grid-row:1/2;grid-column:1/2;display:inline-flex;align-items:center;position:absolute;right:calc(var(--bslib-sidebar-icon-size));top:calc(var(--bslib-sidebar-icon-size, 1rem)/2);border:none;border-radius:var(--bslib-collapse-toggle-border-radius);height:var(--bslib-sidebar-icon-button-size, 2rem);width:var(--bslib-sidebar-icon-button-size, 2rem);display:flex;align-items:center;justify-content:center;padding:0;color:var(--bslib-sidebar-fg);background-color:unset;transition:color var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration),top var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration),right var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration),left var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration)}.bslib-sidebar-layout>.collapse-toggle:hover{background-color:var(--bslib-sidebar-toggle-bg)}.bslib-sidebar-layout>.collapse-toggle>.collapse-icon{opacity:.8;width:var(--bslib-sidebar-icon-size);height:var(--bslib-sidebar-icon-size);transform:rotateY(var(--bslib-collapse-toggle-transform));transition:transform var(--bslib-sidebar-toggle-transition-easing) var(--bslib-sidebar-transition-duration)}.bslib-sidebar-layout>.collapse-toggle:hover>.collapse-icon{opacity:1}.bslib-sidebar-layout .sidebar-title{font-size:1.25rem;line-height:1.25;margin-top:0;margin-bottom:1rem;padding-bottom:1rem;border-bottom:var(--bslib-sidebar-border)}.bslib-sidebar-layout.sidebar-right{grid-template-columns:var(--bslib-sidebar-column-main) min(100% - var(--bslib-sidebar-icon-size),var(--bslib-sidebar-width, 250px))}.bslib-sidebar-layout.sidebar-right>.main{grid-column:1/2;border-top-right-radius:0;border-bottom-right-radius:0;border-top-left-radius:inherit;border-bottom-left-radius:inherit}.bslib-sidebar-layout.sidebar-right>.sidebar{grid-column:2/3;border-right:none;border-left:var(--bslib-sidebar-vert-border);border-top-left-radius:0;border-bottom-left-radius:0}.bslib-sidebar-layout.sidebar-right>.collapse-toggle{grid-column:2/3;left:var(--bslib-sidebar-icon-size);right:unset;border:var(--bslib-collapse-toggle-border)}.bslib-sidebar-layout.sidebar-right>.collapse-toggle>.collapse-icon{transform:rotateY(var(--bslib-collapse-toggle-right-transform))}.bslib-sidebar-layout.sidebar-collapsed{--bslib-collapse-toggle-transform: 180deg;--bslib-collapse-toggle-right-transform: 0deg;--bslib-sidebar-vert-border: none;grid-template-columns:0 minmax(0, 1fr)}.bslib-sidebar-layout.sidebar-collapsed.sidebar-right{grid-template-columns:minmax(0, 1fr) 0}.bslib-sidebar-layout.sidebar-collapsed:not(.transitioning)>.sidebar>*{display:none}.bslib-sidebar-layout.sidebar-collapsed>.main{border-radius:inherit}.bslib-sidebar-layout.sidebar-collapsed:not(.sidebar-right)>.main{padding-left:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout.sidebar-collapsed.sidebar-right>.main{padding-right:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout.sidebar-collapsed>.collapse-toggle{color:var(--bslib-sidebar-main-fg);top:calc(var(--bslib-sidebar-overlap-counter, 0)*(var(--bslib-sidebar-icon-size) + var(--bslib-sidebar-padding)) + var(--bslib-sidebar-icon-size, 1rem)/2);right:calc(-2.5*var(--bslib-sidebar-icon-size) - var(--bs-card-border-width, 1px))}.bslib-sidebar-layout.sidebar-collapsed.sidebar-right>.collapse-toggle{left:calc(-2.5*var(--bslib-sidebar-icon-size) - var(--bs-card-border-width, 1px));right:unset}@media(min-width: 576px){.bslib-sidebar-layout.transitioning>.sidebar>.sidebar-content{display:none}}@media(max-width: 575.98px){.bslib-sidebar-layout[data-bslib-sidebar-open=desktop]{--bslib-sidebar-js-init-collapsed: true}.bslib-sidebar-layout>.sidebar,.bslib-sidebar-layout.sidebar-right>.sidebar{border:none}.bslib-sidebar-layout>.main,.bslib-sidebar-layout.sidebar-right>.main{grid-column:1/3}.bslib-sidebar-layout[data-bslib-sidebar-open=always]{display:block !important}.bslib-sidebar-layout[data-bslib-sidebar-open=always]>.sidebar{max-height:var(--bslib-sidebar-max-height-mobile);overflow-y:auto;border-top:var(--bslib-sidebar-vert-border)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]){grid-template-columns:100% 0}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]):not(.sidebar-collapsed)>.sidebar{z-index:1}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]):not(.sidebar-collapsed)>.collapse-toggle{z-index:1}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-right{grid-template-columns:0 100%}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-collapsed{grid-template-columns:0 100%}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-collapsed.sidebar-right{grid-template-columns:100% 0}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]):not(.sidebar-right)>.main{padding-left:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-right>.main{padding-right:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always])>.main{opacity:0;transition:opacity var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-collapsed>.main{opacity:1}}.navbar+.container-fluid:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-sm:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-md:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-lg:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-xl:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-xxl:has(>.tab-content>.tab-pane.active.html-fill-container){padding-left:0;padding-right:0}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container{padding:var(--bslib-spacer, 1rem);gap:var(--bslib-spacer, 1rem)}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child){padding:0}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]){border-left:none;border-right:none;border-bottom:none}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]){border-radius:0}.navbar+div>.bslib-sidebar-layout{border-top:var(--bslib-sidebar-border)}.html-fill-container{display:flex;flex-direction:column;min-height:0;min-width:0}.html-fill-container>.html-fill-item{flex:1 1 auto;min-height:0;min-width:0}.html-fill-container>:not(.html-fill-item){flex:0 0 auto}.quarto-container{min-height:calc(100vh - 132px)}body.hypothesis-enabled #quarto-header{margin-right:16px}footer.footer .nav-footer,#quarto-header>nav{padding-left:1em;padding-right:1em}footer.footer div.nav-footer p:first-child{margin-top:0}footer.footer div.nav-footer p:last-child{margin-bottom:0}#quarto-content>*{padding-top:14px}#quarto-content>#quarto-sidebar-glass{padding-top:0px}@media(max-width: 991.98px){#quarto-content>*{padding-top:0}#quarto-content .subtitle{padding-top:14px}#quarto-content section:first-of-type h2:first-of-type,#quarto-content section:first-of-type .h2:first-of-type{margin-top:1rem}}.headroom-target,header.headroom{will-change:transform;transition:position 200ms linear;transition:all 200ms linear}header.headroom--pinned{transform:translateY(0%)}header.headroom--unpinned{transform:translateY(-100%)}.navbar-container{width:100%}.navbar-brand{overflow:hidden;text-overflow:ellipsis}.navbar-brand-container{max-width:calc(100% - 115px);min-width:0;display:flex;align-items:center}@media(min-width: 992px){.navbar-brand-container{margin-right:1em}}.navbar-brand.navbar-brand-logo{margin-right:4px;display:inline-flex}.navbar-toggler{flex-basis:content;flex-shrink:0}.navbar .navbar-brand-container{order:2}.navbar .navbar-toggler{order:1}.navbar .navbar-container>.navbar-nav{order:20}.navbar .navbar-container>.navbar-brand-container{margin-left:0 !important;margin-right:0 !important}.navbar .navbar-collapse{order:20}.navbar #quarto-search{order:4;margin-left:auto}.navbar .navbar-toggler{margin-right:.5em}.navbar-collapse .quarto-navbar-tools{margin-left:.5em}.navbar-logo{max-height:24px;width:auto;padding-right:4px}nav .nav-item:not(.compact){padding-top:1px}nav .nav-link i,nav .dropdown-item i{padding-right:1px}.navbar-expand-lg .navbar-nav .nav-link{padding-left:.6rem;padding-right:.6rem}nav .nav-item.compact .nav-link{padding-left:.5rem;padding-right:.5rem;font-size:1.1rem}.navbar .quarto-navbar-tools{order:3}.navbar .quarto-navbar-tools div.dropdown{display:inline-block}.navbar .quarto-navbar-tools .quarto-navigation-tool{color:#f2e5bd}.navbar .quarto-navbar-tools .quarto-navigation-tool:hover{color:#93b0f1}.navbar-nav .dropdown-menu{min-width:220px;font-size:.9rem}.navbar .navbar-nav .nav-link.dropdown-toggle::after{opacity:.75;vertical-align:.175em}.navbar ul.dropdown-menu{padding-top:0;padding-bottom:0}.navbar .dropdown-header{text-transform:uppercase;font-size:.8rem;padding:0 .5rem}.navbar .dropdown-item{padding:.4rem .5rem}.navbar .dropdown-item>i.bi{margin-left:.1rem;margin-right:.25em}.sidebar #quarto-search{margin-top:-1px}.sidebar #quarto-search svg.aa-SubmitIcon{width:16px;height:16px}.sidebar-navigation a{color:inherit}.sidebar-title{margin-top:.25rem;padding-bottom:.5rem;font-size:1.3rem;line-height:1.6rem;visibility:visible}.sidebar-title>a{font-size:inherit;text-decoration:none}.sidebar-title .sidebar-tools-main{margin-top:-6px}@media(max-width: 991.98px){#quarto-sidebar div.sidebar-header{padding-top:.2em}}.sidebar-header-stacked .sidebar-title{margin-top:.6rem}.sidebar-logo{max-width:90%;padding-bottom:.5rem}.sidebar-logo-link{text-decoration:none}.sidebar-navigation li a{text-decoration:none}.sidebar-navigation .quarto-navigation-tool{opacity:.7;font-size:.875rem}#quarto-sidebar>nav>.sidebar-tools-main{margin-left:14px}.sidebar-tools-main{display:inline-flex;margin-left:0px;order:2}.sidebar-tools-main:not(.tools-wide){vertical-align:middle}.sidebar-navigation .quarto-navigation-tool.dropdown-toggle::after{display:none}.sidebar.sidebar-navigation>*{padding-top:1em}.sidebar-item{margin-bottom:.2em;line-height:1rem;margin-top:.4rem}.sidebar-section{padding-left:.5em;padding-bottom:.2em}.sidebar-item .sidebar-item-container{display:flex;justify-content:space-between;cursor:pointer}.sidebar-item-toggle:hover{cursor:pointer}.sidebar-item .sidebar-item-toggle .bi{font-size:.7rem;text-align:center}.sidebar-item .sidebar-item-toggle .bi-chevron-right::before{transition:transform 200ms ease}.sidebar-item .sidebar-item-toggle[aria-expanded=false] .bi-chevron-right::before{transform:none}.sidebar-item .sidebar-item-toggle[aria-expanded=true] .bi-chevron-right::before{transform:rotate(90deg)}.sidebar-item-text{width:100%}.sidebar-navigation .sidebar-divider{margin-left:0;margin-right:0;margin-top:.5rem;margin-bottom:.5rem}@media(max-width: 991.98px){.quarto-secondary-nav{display:block}.quarto-secondary-nav button.quarto-search-button{padding-right:0em;padding-left:2em}.quarto-secondary-nav button.quarto-btn-toggle{margin-left:-0.75rem;margin-right:.15rem}.quarto-secondary-nav nav.quarto-title-breadcrumbs{display:none}.quarto-secondary-nav nav.quarto-page-breadcrumbs{display:flex;align-items:center;padding-right:1em;margin-left:-0.25em}.quarto-secondary-nav nav.quarto-page-breadcrumbs a{text-decoration:none}.quarto-secondary-nav nav.quarto-page-breadcrumbs ol.breadcrumb{margin-bottom:0}}@media(min-width: 992px){.quarto-secondary-nav{display:none}}.quarto-title-breadcrumbs .breadcrumb{margin-bottom:.5em;font-size:.9rem}.quarto-title-breadcrumbs .breadcrumb li:last-of-type a{color:#6c757d}.quarto-secondary-nav .quarto-btn-toggle{color:#052744}.quarto-secondary-nav[aria-expanded=false] .quarto-btn-toggle .bi-chevron-right::before{transform:none}.quarto-secondary-nav[aria-expanded=true] .quarto-btn-toggle .bi-chevron-right::before{transform:rotate(90deg)}.quarto-secondary-nav .quarto-btn-toggle .bi-chevron-right::before{transition:transform 200ms ease}.quarto-secondary-nav{cursor:pointer}.no-decor{text-decoration:none}.quarto-secondary-nav-title{margin-top:.3em;color:#052744;padding-top:4px}.quarto-secondary-nav nav.quarto-page-breadcrumbs{color:#052744}.quarto-secondary-nav nav.quarto-page-breadcrumbs a{color:#052744}.quarto-secondary-nav nav.quarto-page-breadcrumbs a:hover{color:rgba(32,80,186,.8)}.quarto-secondary-nav nav.quarto-page-breadcrumbs .breadcrumb-item::before{color:#0c5da3}.breadcrumb-item{line-height:1.2rem}div.sidebar-item-container{color:#052744}div.sidebar-item-container:hover,div.sidebar-item-container:focus{color:rgba(32,80,186,.8)}div.sidebar-item-container.disabled{color:rgba(5,39,68,.75)}div.sidebar-item-container .active,div.sidebar-item-container .show>.nav-link,div.sidebar-item-container .sidebar-link>code{color:#2050ba}div.sidebar.sidebar-navigation.rollup.quarto-sidebar-toggle-contents,nav.sidebar.sidebar-navigation:not(.rollup){background-color:#fefcf9}@media(max-width: 991.98px){.sidebar-navigation .sidebar-item a,.nav-page .nav-page-text,.sidebar-navigation{font-size:1rem}.sidebar-navigation ul.sidebar-section.depth1 .sidebar-section-item{font-size:1.1rem}.sidebar-logo{display:none}.sidebar.sidebar-navigation{position:static;border-bottom:1px solid #dee2e6}.sidebar.sidebar-navigation.collapsing{position:fixed;z-index:1000}.sidebar.sidebar-navigation.show{position:fixed;z-index:1000}.sidebar.sidebar-navigation{min-height:100%}nav.quarto-secondary-nav{background-color:#fefcf9;border-bottom:1px solid #dee2e6}.quarto-banner nav.quarto-secondary-nav{background-color:#052744;color:#f2e5bd;border-top:1px solid #dee2e6}.sidebar .sidebar-footer{visibility:visible;padding-top:1rem;position:inherit}.sidebar-tools-collapse{display:block}}#quarto-sidebar{transition:width .15s ease-in}#quarto-sidebar>*{padding-right:1em}@media(max-width: 991.98px){#quarto-sidebar .sidebar-menu-container{white-space:nowrap;min-width:225px}#quarto-sidebar.show{transition:width .15s ease-out}}@media(min-width: 992px){#quarto-sidebar{display:flex;flex-direction:column}.nav-page .nav-page-text,.sidebar-navigation .sidebar-section .sidebar-item{font-size:.875rem}.sidebar-navigation .sidebar-item{font-size:.925rem}.sidebar.sidebar-navigation{display:block;position:sticky}.sidebar-search{width:100%}.sidebar .sidebar-footer{visibility:visible}}@media(min-width: 992px){#quarto-sidebar-glass{display:none}}@media(max-width: 991.98px){#quarto-sidebar-glass{position:fixed;top:0;bottom:0;left:0;right:0;background-color:rgba(255,255,255,0);transition:background-color .15s ease-in;z-index:-1}#quarto-sidebar-glass.collapsing{z-index:1000}#quarto-sidebar-glass.show{transition:background-color .15s ease-out;background-color:rgba(102,102,102,.4);z-index:1000}}.sidebar .sidebar-footer{padding:.5rem 1rem;align-self:flex-end;color:#6c757d;width:100%}.quarto-page-breadcrumbs .breadcrumb-item+.breadcrumb-item,.quarto-page-breadcrumbs .breadcrumb-item{padding-right:.33em;padding-left:0}.quarto-page-breadcrumbs .breadcrumb-item::before{padding-right:.33em}.quarto-sidebar-footer{font-size:.875em}.sidebar-section .bi-chevron-right{vertical-align:middle}.sidebar-section .bi-chevron-right::before{font-size:.9em}.notransition{-webkit-transition:none !important;-moz-transition:none !important;-o-transition:none !important;transition:none !important}.btn:focus:not(:focus-visible){box-shadow:none}.page-navigation{display:flex;justify-content:space-between}.nav-page{padding-bottom:.75em}.nav-page .bi{font-size:1.8rem;vertical-align:middle}.nav-page .nav-page-text{padding-left:.25em;padding-right:.25em}.nav-page a{color:#6c757d;text-decoration:none;display:flex;align-items:center}.nav-page a:hover{color:#1f4eb6}.nav-footer .toc-actions{padding-bottom:.5em;padding-top:.5em}.nav-footer .toc-actions a,.nav-footer .toc-actions a:hover{text-decoration:none}.nav-footer .toc-actions ul{display:flex;list-style:none}.nav-footer .toc-actions ul :first-child{margin-left:auto}.nav-footer .toc-actions ul :last-child{margin-right:auto}.nav-footer .toc-actions ul li{padding-right:1.5em}.nav-footer .toc-actions ul li i.bi{padding-right:.4em}.nav-footer .toc-actions ul li:last-of-type{padding-right:0}.nav-footer{display:flex;flex-direction:row;flex-wrap:wrap;justify-content:space-between;align-items:baseline;text-align:center;padding-top:.5rem;padding-bottom:.5rem;background-color:#fefbf2}body.nav-fixed{padding-top:64px}.nav-footer-contents{color:#6c757d;margin-top:.25rem}.nav-footer{min-height:3.5em;color:#75736f}.nav-footer a{color:#75736f}.nav-footer .nav-footer-left{font-size:.825em}.nav-footer .nav-footer-center{font-size:.825em}.nav-footer .nav-footer-right{font-size:.825em}.nav-footer-left .footer-items,.nav-footer-center .footer-items,.nav-footer-right .footer-items{display:inline-flex;padding-top:.3em;padding-bottom:.3em;margin-bottom:0em}.nav-footer-left .footer-items .nav-link,.nav-footer-center .footer-items .nav-link,.nav-footer-right .footer-items .nav-link{padding-left:.6em;padding-right:.6em}@media(min-width: 768px){.nav-footer-left{flex:1 1 0px;text-align:left}}@media(max-width: 575.98px){.nav-footer-left{margin-bottom:1em;flex:100%}}@media(min-width: 768px){.nav-footer-right{flex:1 1 0px;text-align:right}}@media(max-width: 575.98px){.nav-footer-right{margin-bottom:1em;flex:100%}}.nav-footer-center{text-align:center;min-height:3em}@media(min-width: 768px){.nav-footer-center{flex:1 1 0px}}.nav-footer-center .footer-items{justify-content:center}@media(max-width: 767.98px){.nav-footer-center{margin-bottom:1em;flex:100%}}@media(max-width: 767.98px){.nav-footer-center{margin-top:3em;order:10}}.navbar .quarto-reader-toggle.reader .quarto-reader-toggle-btn{background-color:#f2e5bd;border-radius:3px}@media(max-width: 991.98px){.quarto-reader-toggle{display:none}}.quarto-reader-toggle.reader.quarto-navigation-tool .quarto-reader-toggle-btn{background-color:#052744;border-radius:3px}.quarto-reader-toggle .quarto-reader-toggle-btn{display:inline-flex;padding-left:.2em;padding-right:.2em;margin-left:-0.2em;margin-right:-0.2em;text-align:center}.navbar .quarto-reader-toggle:not(.reader) .bi::before{background-image:url('data:image/svg+xml,')}.navbar .quarto-reader-toggle.reader .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-reader-toggle:not(.reader) .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-reader-toggle.reader .bi::before{background-image:url('data:image/svg+xml,')}#quarto-back-to-top{display:none;position:fixed;bottom:50px;background-color:#fefbf2;border-radius:.25rem;box-shadow:0 .2rem .5rem #6c757d,0 0 .05rem #6c757d;color:#6c757d;text-decoration:none;font-size:.9em;text-align:center;left:50%;padding:.4rem .8rem;transform:translate(-50%, 0)}#quarto-announcement{padding:.5em;display:flex;justify-content:space-between;margin-bottom:0;font-size:.9em}#quarto-announcement .quarto-announcement-content{margin-right:auto}#quarto-announcement .quarto-announcement-content p{margin-bottom:0}#quarto-announcement .quarto-announcement-icon{margin-right:.5em;font-size:1.2em;margin-top:-0.15em}#quarto-announcement .quarto-announcement-action{cursor:pointer}.aa-DetachedSearchButtonQuery{display:none}.aa-DetachedOverlay ul.aa-List,#quarto-search-results ul.aa-List{list-style:none;padding-left:0}.aa-DetachedOverlay .aa-Panel,#quarto-search-results .aa-Panel{background-color:#fefbf2;position:absolute;z-index:2000}#quarto-search-results .aa-Panel{max-width:400px}#quarto-search input{font-size:.925rem}@media(min-width: 992px){.navbar #quarto-search{margin-left:.25rem;order:999}}.navbar.navbar-expand-sm #quarto-search,.navbar.navbar-expand-md #quarto-search{order:999}@media(min-width: 992px){.navbar .quarto-navbar-tools{order:900}}@media(min-width: 992px){.navbar .quarto-navbar-tools.tools-end{margin-left:auto !important}}@media(max-width: 991.98px){#quarto-sidebar .sidebar-search{display:none}}#quarto-sidebar .sidebar-search .aa-Autocomplete{width:100%}.navbar .aa-Autocomplete .aa-Form{width:180px}.navbar #quarto-search.type-overlay .aa-Autocomplete{width:40px}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form{background-color:inherit;border:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form:focus-within{box-shadow:none;outline:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-InputWrapper{display:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-InputWrapper:focus-within{display:inherit}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-Label svg,.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-LoadingIndicator svg{width:26px;height:26px;color:#f2e5bd;opacity:1}.navbar #quarto-search.type-overlay .aa-Autocomplete svg.aa-SubmitIcon{width:26px;height:26px;color:#f2e5bd;opacity:1}.aa-Autocomplete .aa-Form,.aa-DetachedFormContainer .aa-Form{align-items:center;background-color:#fefbf2;border:1px solid #dee2e6;border-radius:.25rem;color:#343a40;display:flex;line-height:1em;margin:0;position:relative;width:100%}.aa-Autocomplete .aa-Form:focus-within,.aa-DetachedFormContainer .aa-Form:focus-within{box-shadow:rgba(39,128,227,.6) 0 0 0 1px;outline:currentColor none medium}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix{align-items:center;display:flex;flex-shrink:0;order:1}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-Label,.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-Label,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator{cursor:initial;flex-shrink:0;padding:0;text-align:left}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-Label svg,.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-Label svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator svg{color:#343a40;opacity:.5}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-SubmitButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-SubmitButton{appearance:none;background:none;border:0;margin:0}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator{align-items:center;display:flex;justify-content:center}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator[hidden]{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapper,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper{order:3;position:relative;width:100%}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input{appearance:none;background:none;border:0;color:#343a40;font:inherit;height:calc(1.5em + .1rem + 2px);padding:0;width:100%}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::placeholder,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::placeholder{color:#343a40;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input:focus{border-color:none;box-shadow:none;outline:none}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-decoration,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-cancel-button,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-button,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-decoration,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-decoration,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-cancel-button,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-button,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-decoration{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix{align-items:center;display:flex;order:4}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton{align-items:center;background:none;border:0;color:#343a40;opacity:.8;cursor:pointer;display:flex;margin:0;width:calc(1.5em + .1rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:hover,.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:hover,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:focus{color:#343a40;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton[hidden]{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton svg{width:calc(1.5em + 0.75rem + calc(1px * 2))}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton{border:none;align-items:center;background:none;color:#343a40;opacity:.4;font-size:.7rem;cursor:pointer;display:none;margin:0;width:calc(1em + .1rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:hover,.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:hover,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:focus{color:#343a40;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton[hidden]{display:none}.aa-PanelLayout:empty{display:none}.quarto-search-no-results.no-query{display:none}.aa-Source:has(.no-query){display:none}#quarto-search-results .aa-Panel{border:solid #dee2e6 1px}#quarto-search-results .aa-SourceNoResults{width:398px}.aa-DetachedOverlay .aa-Panel,#quarto-search-results .aa-Panel{max-height:65vh;overflow-y:auto;font-size:.925rem}.aa-DetachedOverlay .aa-SourceNoResults,#quarto-search-results .aa-SourceNoResults{height:60px;display:flex;justify-content:center;align-items:center}.aa-DetachedOverlay .search-error,#quarto-search-results .search-error{padding-top:10px;padding-left:20px;padding-right:20px;cursor:default}.aa-DetachedOverlay .search-error .search-error-title,#quarto-search-results .search-error .search-error-title{font-size:1.1rem;margin-bottom:.5rem}.aa-DetachedOverlay .search-error .search-error-title .search-error-icon,#quarto-search-results .search-error .search-error-title .search-error-icon{margin-right:8px}.aa-DetachedOverlay .search-error .search-error-text,#quarto-search-results .search-error .search-error-text{font-weight:300}.aa-DetachedOverlay .search-result-text,#quarto-search-results .search-result-text{font-weight:300;overflow:hidden;text-overflow:ellipsis;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;line-height:1.2rem;max-height:2.4rem}.aa-DetachedOverlay .aa-SourceHeader .search-result-header,#quarto-search-results .aa-SourceHeader .search-result-header{font-size:.875rem;background-color:#fcf4da;padding-left:14px;padding-bottom:4px;padding-top:4px}.aa-DetachedOverlay .aa-SourceHeader .search-result-header-no-results,#quarto-search-results .aa-SourceHeader .search-result-header-no-results{display:none}.aa-DetachedOverlay .aa-SourceFooter .algolia-search-logo,#quarto-search-results .aa-SourceFooter .algolia-search-logo{width:110px;opacity:.85;margin:8px;float:right}.aa-DetachedOverlay .search-result-section,#quarto-search-results .search-result-section{font-size:.925em}.aa-DetachedOverlay a.search-result-link,#quarto-search-results a.search-result-link{color:inherit;text-decoration:none}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item,#quarto-search-results li.aa-Item[aria-selected=true] .search-item{background-color:#2780e3}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item.search-result-more,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-section,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-text,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-title-container,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-text-container,#quarto-search-results li.aa-Item[aria-selected=true] .search-item.search-result-more,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-section,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-text,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-title-container,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-text-container{color:#fff;background-color:#2780e3}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item mark.search-match,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-match.mark,#quarto-search-results li.aa-Item[aria-selected=true] .search-item mark.search-match,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-match.mark{color:#fff;background-color:#4b95e8}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item,#quarto-search-results li.aa-Item[aria-selected=false] .search-item{background-color:#fefbf2}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item.search-result-more,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-section,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-text,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-title-container,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-text-container,#quarto-search-results li.aa-Item[aria-selected=false] .search-item.search-result-more,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-section,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-text,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-title-container,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-text-container{color:#343a40}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item mark.search-match,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-match.mark,#quarto-search-results li.aa-Item[aria-selected=false] .search-item mark.search-match,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-match.mark{color:inherit;background-color:#e5effc}.aa-DetachedOverlay .aa-Item .search-result-doc:not(.document-selectable) .search-result-title-container,#quarto-search-results .aa-Item .search-result-doc:not(.document-selectable) .search-result-title-container{background-color:#fefbf2;color:#343a40}.aa-DetachedOverlay .aa-Item .search-result-doc:not(.document-selectable) .search-result-text-container,#quarto-search-results .aa-Item .search-result-doc:not(.document-selectable) .search-result-text-container{padding-top:0px}.aa-DetachedOverlay li.aa-Item .search-result-doc.document-selectable .search-result-text-container,#quarto-search-results li.aa-Item .search-result-doc.document-selectable .search-result-text-container{margin-top:-4px}.aa-DetachedOverlay .aa-Item,#quarto-search-results .aa-Item{cursor:pointer}.aa-DetachedOverlay .aa-Item .search-item,#quarto-search-results .aa-Item .search-item{border-left:none;border-right:none;border-top:none;background-color:#fefbf2;border-color:#dee2e6;color:#343a40}.aa-DetachedOverlay .aa-Item .search-item p,#quarto-search-results .aa-Item .search-item p{margin-top:0;margin-bottom:0}.aa-DetachedOverlay .aa-Item .search-item i.bi,#quarto-search-results .aa-Item .search-item i.bi{padding-left:8px;padding-right:8px;font-size:1.3em}.aa-DetachedOverlay .aa-Item .search-item .search-result-title,#quarto-search-results .aa-Item .search-item .search-result-title{margin-top:.3em;margin-bottom:0em}.aa-DetachedOverlay .aa-Item .search-item .search-result-crumbs,#quarto-search-results .aa-Item .search-item .search-result-crumbs{white-space:nowrap;text-overflow:ellipsis;font-size:.8em;font-weight:300;margin-right:1em}.aa-DetachedOverlay .aa-Item .search-item .search-result-crumbs:not(.search-result-crumbs-wrap),#quarto-search-results .aa-Item .search-item .search-result-crumbs:not(.search-result-crumbs-wrap){max-width:30%;margin-left:auto;margin-top:.5em;margin-bottom:.1rem}.aa-DetachedOverlay .aa-Item .search-item .search-result-crumbs.search-result-crumbs-wrap,#quarto-search-results .aa-Item .search-item .search-result-crumbs.search-result-crumbs-wrap{flex-basis:100%;margin-top:0em;margin-bottom:.2em;margin-left:37px}.aa-DetachedOverlay .aa-Item .search-result-title-container,#quarto-search-results .aa-Item .search-result-title-container{font-size:1em;display:flex;flex-wrap:wrap;padding:6px 4px 6px 4px}.aa-DetachedOverlay .aa-Item .search-result-text-container,#quarto-search-results .aa-Item .search-result-text-container{padding-bottom:8px;padding-right:8px;margin-left:42px}.aa-DetachedOverlay .aa-Item .search-result-doc-section,.aa-DetachedOverlay .aa-Item .search-result-more,#quarto-search-results .aa-Item .search-result-doc-section,#quarto-search-results .aa-Item .search-result-more{padding-top:8px;padding-bottom:8px;padding-left:44px}.aa-DetachedOverlay .aa-Item .search-result-more,#quarto-search-results .aa-Item .search-result-more{font-size:.8em;font-weight:400}.aa-DetachedOverlay .aa-Item .search-result-doc,#quarto-search-results .aa-Item .search-result-doc{border-top:1px solid #dee2e6}.aa-DetachedSearchButton{background:none;border:none}.aa-DetachedSearchButton .aa-DetachedSearchButtonPlaceholder{display:none}.navbar .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon{color:#f2e5bd}.sidebar-tools-collapse #quarto-search,.sidebar-tools-main #quarto-search{display:inline}.sidebar-tools-collapse #quarto-search .aa-Autocomplete,.sidebar-tools-main #quarto-search .aa-Autocomplete{display:inline}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton{padding-left:4px;padding-right:4px}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon{color:#052744}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon .aa-SubmitIcon,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon .aa-SubmitIcon{margin-top:-3px}.aa-DetachedContainer{background:rgba(254,251,242,.65);width:90%;bottom:0;box-shadow:rgba(222,226,230,.6) 0 0 0 1px;outline:currentColor none medium;display:flex;flex-direction:column;left:0;margin:0;overflow:hidden;padding:0;position:fixed;right:0;top:0;z-index:1101}.aa-DetachedContainer::after{height:32px}.aa-DetachedContainer .aa-SourceHeader{margin:var(--aa-spacing-half) 0 var(--aa-spacing-half) 2px}.aa-DetachedContainer .aa-Panel{background-color:#fefbf2;border-radius:0;box-shadow:none;flex-grow:1;margin:0;padding:0;position:relative}.aa-DetachedContainer .aa-PanelLayout{bottom:0;box-shadow:none;left:0;margin:0;max-height:none;overflow-y:auto;position:absolute;right:0;top:0;width:100%}.aa-DetachedFormContainer{background-color:#fefbf2;border-bottom:1px solid #dee2e6;display:flex;flex-direction:row;justify-content:space-between;margin:0;padding:.5em}.aa-DetachedCancelButton{background:none;font-size:.8em;border:0;border-radius:3px;color:#343a40;cursor:pointer;margin:0 0 0 .5em;padding:0 .5em}.aa-DetachedCancelButton:hover,.aa-DetachedCancelButton:focus{box-shadow:rgba(39,128,227,.6) 0 0 0 1px;outline:currentColor none medium}.aa-DetachedContainer--modal{bottom:inherit;height:auto;margin:0 auto;position:absolute;top:100px;border-radius:6px;max-width:850px}@media(max-width: 575.98px){.aa-DetachedContainer--modal{width:100%;top:0px;border-radius:0px;border:none}}.aa-DetachedContainer--modal .aa-PanelLayout{max-height:var(--aa-detached-modal-max-height);padding-bottom:var(--aa-spacing-half);position:static}.aa-Detached{height:100vh;overflow:hidden}.aa-DetachedOverlay{background-color:rgba(52,58,64,.4);position:fixed;left:0;right:0;top:0;margin:0;padding:0;height:100vh;z-index:1100}.quarto-dashboard.nav-fixed.dashboard-sidebar #quarto-content.quarto-dashboard-content{padding:0em}.quarto-dashboard #quarto-content.quarto-dashboard-content{padding:1em}.quarto-dashboard #quarto-content.quarto-dashboard-content>*{padding-top:0}@media(min-width: 576px){.quarto-dashboard{height:100%}}.quarto-dashboard .card.valuebox.bslib-card.bg-primary{background-color:#5397e9 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-secondary{background-color:#343a40 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-success{background-color:#3aa716 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-info{background-color:rgba(153,84,187,.7019607843) !important}.quarto-dashboard .card.valuebox.bslib-card.bg-warning{background-color:#fa6400 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-danger{background-color:rgba(255,0,57,.7019607843) !important}.quarto-dashboard .card.valuebox.bslib-card.bg-light{background-color:#f8f9fa !important}.quarto-dashboard .card.valuebox.bslib-card.bg-dark{background-color:#343a40 !important}.quarto-dashboard.dashboard-fill{display:flex;flex-direction:column}.quarto-dashboard #quarto-appendix{display:none}.quarto-dashboard #quarto-header #quarto-dashboard-header{border-top:solid 1px #084274;border-bottom:solid 1px #084274}.quarto-dashboard #quarto-header #quarto-dashboard-header>nav{padding-left:1em;padding-right:1em}.quarto-dashboard #quarto-header #quarto-dashboard-header>nav .navbar-brand-container{padding-left:0}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-toggler{margin-right:0}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-toggler-icon{height:1em;width:1em;background-image:url('data:image/svg+xml,')}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-brand-container{padding-right:1em}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-title{font-size:1.1em}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-nav{font-size:.9em}.quarto-dashboard #quarto-dashboard-header .navbar{padding:0}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-container{padding-left:1em}.quarto-dashboard #quarto-dashboard-header .navbar.slim .navbar-brand-container .nav-link,.quarto-dashboard #quarto-dashboard-header .navbar.slim .navbar-nav .nav-link{padding:.7em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-color-scheme-toggle{order:9}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-toggler{margin-left:.5em;order:10}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-nav .nav-link{padding:.5em;height:100%;display:flex;align-items:center}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-nav .active{background-color:#083d6a}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-brand-container{padding:.5em .5em .5em 0;display:flex;flex-direction:row;margin-right:2em;align-items:center}@media(max-width: 767.98px){.quarto-dashboard #quarto-dashboard-header .navbar .navbar-brand-container{margin-right:auto}}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse{align-self:stretch}@media(min-width: 768px){.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse{order:8}}@media(max-width: 767.98px){.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse{order:1000;padding-bottom:.5em}}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse .navbar-nav{align-self:stretch}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-title{font-size:1.25em;line-height:1.1em;display:flex;flex-direction:row;flex-wrap:wrap;align-items:baseline}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-title .navbar-title-text{margin-right:.4em}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-title a{text-decoration:none;color:inherit}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-subtitle,.quarto-dashboard #quarto-dashboard-header .navbar .navbar-author{font-size:.9rem;margin-right:.5em}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-author{margin-left:auto}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-logo{max-height:48px;min-height:30px;object-fit:cover;margin-right:1em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-dashboard-links{order:9;padding-right:1em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-dashboard-link-text{margin-left:.25em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-dashboard-link{padding-right:0em;padding-left:.7em;text-decoration:none;color:#f2e5bd}.quarto-dashboard .page-layout-custom .tab-content{padding:0;border:none}.quarto-dashboard-img-contain{height:100%;width:100%;object-fit:contain}@media(max-width: 575.98px){.quarto-dashboard .bslib-grid{grid-template-rows:minmax(1em, max-content) !important}.quarto-dashboard .sidebar-content{height:inherit}.quarto-dashboard .page-layout-custom{min-height:100vh}}.quarto-dashboard.dashboard-toolbar>.page-layout-custom,.quarto-dashboard.dashboard-sidebar>.page-layout-custom{padding:0}.quarto-dashboard .quarto-dashboard-content.quarto-dashboard-pages{padding:0}.quarto-dashboard .callout{margin-bottom:0;margin-top:0}.quarto-dashboard .html-fill-container figure{overflow:hidden}.quarto-dashboard bslib-tooltip .rounded-pill{border:solid #6c757d 1px}.quarto-dashboard bslib-tooltip .rounded-pill .svg{fill:#343a40}.quarto-dashboard .tabset .dashboard-card-no-title .nav-tabs{margin-left:0;margin-right:auto}.quarto-dashboard .tabset .tab-content{border:none}.quarto-dashboard .tabset .card-header .nav-link[role=tab]{margin-top:-6px;padding-top:6px;padding-bottom:6px}.quarto-dashboard .card.valuebox,.quarto-dashboard .card.bslib-value-box{min-height:3rem}.quarto-dashboard .card.valuebox .card-body,.quarto-dashboard .card.bslib-value-box .card-body{padding:0}.quarto-dashboard .bslib-value-box .value-box-value{font-size:clamp(.1em,15cqw,5em)}.quarto-dashboard .bslib-value-box .value-box-showcase .bi{font-size:clamp(.1em,max(18cqw,5.2cqh),5em);text-align:center;height:1em}.quarto-dashboard .bslib-value-box .value-box-showcase .bi::before{vertical-align:1em}.quarto-dashboard .bslib-value-box .value-box-area{margin-top:auto;margin-bottom:auto}.quarto-dashboard .card figure.quarto-float{display:flex;flex-direction:column;align-items:center}.quarto-dashboard .dashboard-scrolling{padding:1em}.quarto-dashboard .full-height{height:100%}.quarto-dashboard .showcase-bottom .value-box-grid{display:grid;grid-template-columns:1fr;grid-template-rows:1fr auto;grid-template-areas:"top" "bottom"}.quarto-dashboard .showcase-bottom .value-box-grid .value-box-showcase{grid-area:bottom;padding:0;margin:0}.quarto-dashboard .showcase-bottom .value-box-grid .value-box-showcase i.bi{font-size:4rem}.quarto-dashboard .showcase-bottom .value-box-grid .value-box-area{grid-area:top}.quarto-dashboard .tab-content{margin-bottom:0}.quarto-dashboard .bslib-card .bslib-navs-card-title{justify-content:stretch;align-items:end}.quarto-dashboard .card-header{display:flex;flex-wrap:wrap;justify-content:space-between}.quarto-dashboard .card-header .card-title{display:flex;flex-direction:column;justify-content:center;margin-bottom:0}.quarto-dashboard .tabset .card-toolbar{margin-bottom:1em}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout{border:none;gap:var(--bslib-spacer, 1rem)}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.main{padding:0}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.sidebar{border-radius:.25rem;border:1px solid rgba(0,0,0,.175)}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.collapse-toggle{display:none}@media(max-width: 767.98px){.quarto-dashboard .bslib-grid>.bslib-sidebar-layout{grid-template-columns:1fr;grid-template-rows:max-content 1fr}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.main{grid-column:1;grid-row:2}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout .sidebar{grid-column:1;grid-row:1}}.quarto-dashboard .sidebar-right .sidebar{padding-left:2.5em}.quarto-dashboard .sidebar-right .collapse-toggle{left:2px}.quarto-dashboard .quarto-dashboard .sidebar-right button.collapse-toggle:not(.transitioning){left:unset}.quarto-dashboard aside.sidebar{padding-left:1em;padding-right:1em;background-color:rgba(52,58,64,.25);color:#343a40}.quarto-dashboard .bslib-sidebar-layout>div.main{padding:.7em}.quarto-dashboard .bslib-sidebar-layout button.collapse-toggle{margin-top:.3em}.quarto-dashboard .bslib-sidebar-layout .collapse-toggle{top:0}.quarto-dashboard .bslib-sidebar-layout.sidebar-collapsed:not(.transitioning):not(.sidebar-right) .collapse-toggle{left:2px}.quarto-dashboard .sidebar>section>.h3:first-of-type{margin-top:0em}.quarto-dashboard .sidebar .h3,.quarto-dashboard .sidebar .h4,.quarto-dashboard .sidebar .h5,.quarto-dashboard .sidebar .h6{margin-top:.5em}.quarto-dashboard .sidebar form{flex-direction:column;align-items:start;margin-bottom:1em}.quarto-dashboard .sidebar form div[class*=oi-][class$=-input]{flex-direction:column}.quarto-dashboard .sidebar form[class*=oi-][class$=-toggle]{flex-direction:row-reverse;align-items:center;justify-content:start}.quarto-dashboard .sidebar form input[type=range]{margin-top:.5em;margin-right:.8em;margin-left:1em}.quarto-dashboard .sidebar label{width:fit-content}.quarto-dashboard .sidebar .card-body{margin-bottom:2em}.quarto-dashboard .sidebar .shiny-input-container{margin-bottom:1em}.quarto-dashboard .sidebar .shiny-options-group{margin-top:0}.quarto-dashboard .sidebar .control-label{margin-bottom:.3em}.quarto-dashboard .card .card-body .quarto-layout-row{align-items:stretch}.quarto-dashboard .toolbar{font-size:.9em;display:flex;flex-direction:row;border-top:solid 1px #bcbfc0;padding:1em;flex-wrap:wrap;background-color:rgba(52,58,64,.25)}.quarto-dashboard .toolbar .cell-output-display{display:flex}.quarto-dashboard .toolbar .shiny-input-container{padding-bottom:.5em;margin-bottom:.5em;width:inherit}.quarto-dashboard .toolbar .shiny-input-container>.checkbox:first-child{margin-top:6px}.quarto-dashboard .toolbar>*:last-child{margin-right:0}.quarto-dashboard .toolbar>*>*{margin-right:1em;align-items:baseline}.quarto-dashboard .toolbar>*>*>a{text-decoration:none;margin-top:auto;margin-bottom:auto}.quarto-dashboard .toolbar .shiny-input-container{padding-bottom:0;margin-bottom:0}.quarto-dashboard .toolbar .shiny-input-container>*{flex-shrink:0;flex-grow:0}.quarto-dashboard .toolbar .form-group.shiny-input-container:not([role=group])>label{margin-bottom:0}.quarto-dashboard .toolbar .shiny-input-container.no-baseline{align-items:start;padding-top:6px}.quarto-dashboard .toolbar .shiny-input-container{display:flex;align-items:baseline}.quarto-dashboard .toolbar .shiny-input-container label{padding-right:.4em}.quarto-dashboard .toolbar .shiny-input-container .bslib-input-switch{margin-top:6px}.quarto-dashboard .toolbar input[type=text]{line-height:1;width:inherit}.quarto-dashboard .toolbar .input-daterange{width:inherit}.quarto-dashboard .toolbar .input-daterange input[type=text]{height:2.4em;width:10em}.quarto-dashboard .toolbar .input-daterange .input-group-addon{height:auto;padding:0;margin-left:-5px !important;margin-right:-5px}.quarto-dashboard .toolbar .input-daterange .input-group-addon .input-group-text{padding-top:0;padding-bottom:0;height:100%}.quarto-dashboard .toolbar span.irs.irs--shiny{width:10em}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-line{top:9px}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-min,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-max,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-from,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-to,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-single{top:20px}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-bar{top:8px}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-handle{top:0px}.quarto-dashboard .toolbar .shiny-input-checkboxgroup>label{margin-top:6px}.quarto-dashboard .toolbar .shiny-input-checkboxgroup>.shiny-options-group{margin-top:0;align-items:baseline}.quarto-dashboard .toolbar .shiny-input-radiogroup>label{margin-top:6px}.quarto-dashboard .toolbar .shiny-input-radiogroup>.shiny-options-group{align-items:baseline;margin-top:0}.quarto-dashboard .toolbar .shiny-input-radiogroup>.shiny-options-group>.radio{margin-right:.3em}.quarto-dashboard .toolbar .form-select{padding-top:.2em;padding-bottom:.2em}.quarto-dashboard .toolbar .shiny-input-select{min-width:6em}.quarto-dashboard .toolbar div.checkbox{margin-bottom:0px}.quarto-dashboard .toolbar>.checkbox:first-child{margin-top:6px}.quarto-dashboard .toolbar form{width:fit-content}.quarto-dashboard .toolbar form label{padding-top:.2em;padding-bottom:.2em;width:fit-content}.quarto-dashboard .toolbar form input[type=date]{width:fit-content}.quarto-dashboard .toolbar form input[type=color]{width:3em}.quarto-dashboard .toolbar form button{padding:.4em}.quarto-dashboard .toolbar form select{width:fit-content}.quarto-dashboard .toolbar>*{font-size:.9em;flex-grow:0}.quarto-dashboard .toolbar .shiny-input-container label{margin-bottom:1px}.quarto-dashboard .toolbar-bottom{margin-top:1em;margin-bottom:0 !important;order:2}.quarto-dashboard .quarto-dashboard-content>.dashboard-toolbar-container>.toolbar-content>.tab-content>.tab-pane>*:not(.bslib-sidebar-layout){padding:1em}.quarto-dashboard .quarto-dashboard-content>.dashboard-toolbar-container>.toolbar-content>*:not(.tab-content){padding:1em}.quarto-dashboard .quarto-dashboard-content>.tab-content>.dashboard-page>.dashboard-toolbar-container>.toolbar-content,.quarto-dashboard .quarto-dashboard-content>.tab-content>.dashboard-page:not(.dashboard-sidebar-container)>*:not(.dashboard-toolbar-container){padding:1em}.quarto-dashboard .toolbar-content{padding:0}.quarto-dashboard .quarto-dashboard-content.quarto-dashboard-pages .tab-pane>.dashboard-toolbar-container .toolbar{border-radius:0;margin-bottom:0}.quarto-dashboard .dashboard-toolbar-container.toolbar-toplevel .toolbar{border-bottom:1px solid rgba(0,0,0,.175)}.quarto-dashboard .dashboard-toolbar-container.toolbar-toplevel .toolbar-bottom{margin-top:0}.quarto-dashboard .dashboard-toolbar-container:not(.toolbar-toplevel) .toolbar{margin-bottom:1em;border-top:none;border-radius:.25rem;border:1px solid rgba(0,0,0,.175)}.quarto-dashboard .vega-embed.has-actions details{width:1.7em;height:2em;position:absolute !important;top:0;right:0}.quarto-dashboard .dashboard-toolbar-container{padding:0}.quarto-dashboard .card .card-header p:last-child,.quarto-dashboard .card .card-footer p:last-child{margin-bottom:0}.quarto-dashboard .card .card-body>.h4:first-child{margin-top:0}.quarto-dashboard .card .card-body{z-index:4}@media(max-width: 767.98px){.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_length,.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_info,.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_paginate{text-align:initial}.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_filter{text-align:right}.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_paginate ul.pagination{justify-content:initial}}.quarto-dashboard .card .card-body .itables .dataTables_wrapper{display:flex;flex-wrap:wrap;justify-content:space-between;align-items:center;padding-top:0}.quarto-dashboard .card .card-body .itables .dataTables_wrapper table{flex-shrink:0}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons{margin-bottom:.5em;margin-left:auto;width:fit-content;float:right}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons.btn-group{background:#fefbf2;border:none}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons .btn-secondary{background-color:#fefbf2;background-image:none;border:solid #dee2e6 1px;padding:.2em .7em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons .btn span{font-size:.8em;color:#343a40}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_info{margin-left:.5em;margin-bottom:.5em;padding-top:0}@media(min-width: 768px){.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_info{font-size:.875em}}@media(max-width: 767.98px){.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_info{font-size:.8em}}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_filter{margin-bottom:.5em;font-size:.875em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_filter input[type=search]{padding:1px 5px 1px 5px;font-size:.875em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_length{flex-basis:1 1 50%;margin-bottom:.5em;font-size:.875em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_length select{padding:.4em 3em .4em .5em;font-size:.875em;margin-left:.2em;margin-right:.2em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_paginate{flex-shrink:0}@media(min-width: 768px){.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_paginate{margin-left:auto}}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_paginate ul.pagination .paginate_button .page-link{font-size:.8em}.quarto-dashboard .card .card-footer{font-size:.9em}.quarto-dashboard .card .card-toolbar{display:flex;flex-grow:1;flex-direction:row;width:100%;flex-wrap:wrap}.quarto-dashboard .card .card-toolbar>*{font-size:.8em;flex-grow:0}.quarto-dashboard .card .card-toolbar>.card-title{font-size:1em;flex-grow:1;align-self:flex-start;margin-top:.1em}.quarto-dashboard .card .card-toolbar .cell-output-display{display:flex}.quarto-dashboard .card .card-toolbar .shiny-input-container{padding-bottom:.5em;margin-bottom:.5em;width:inherit}.quarto-dashboard .card .card-toolbar .shiny-input-container>.checkbox:first-child{margin-top:6px}.quarto-dashboard .card .card-toolbar>*:last-child{margin-right:0}.quarto-dashboard .card .card-toolbar>*>*{margin-right:1em;align-items:baseline}.quarto-dashboard .card .card-toolbar>*>*>a{text-decoration:none;margin-top:auto;margin-bottom:auto}.quarto-dashboard .card .card-toolbar form{width:fit-content}.quarto-dashboard .card .card-toolbar form label{padding-top:.2em;padding-bottom:.2em;width:fit-content}.quarto-dashboard .card .card-toolbar form input[type=date]{width:fit-content}.quarto-dashboard .card .card-toolbar form input[type=color]{width:3em}.quarto-dashboard .card .card-toolbar form button{padding:.4em}.quarto-dashboard .card .card-toolbar form select{width:fit-content}.quarto-dashboard .card .card-toolbar .cell-output-display{display:flex}.quarto-dashboard .card .card-toolbar .shiny-input-container{padding-bottom:.5em;margin-bottom:.5em;width:inherit}.quarto-dashboard .card .card-toolbar .shiny-input-container>.checkbox:first-child{margin-top:6px}.quarto-dashboard .card .card-toolbar>*:last-child{margin-right:0}.quarto-dashboard .card .card-toolbar>*>*{margin-right:1em;align-items:baseline}.quarto-dashboard .card .card-toolbar>*>*>a{text-decoration:none;margin-top:auto;margin-bottom:auto}.quarto-dashboard .card .card-toolbar .shiny-input-container{padding-bottom:0;margin-bottom:0}.quarto-dashboard .card .card-toolbar .shiny-input-container>*{flex-shrink:0;flex-grow:0}.quarto-dashboard .card .card-toolbar .form-group.shiny-input-container:not([role=group])>label{margin-bottom:0}.quarto-dashboard .card .card-toolbar .shiny-input-container.no-baseline{align-items:start;padding-top:6px}.quarto-dashboard .card .card-toolbar .shiny-input-container{display:flex;align-items:baseline}.quarto-dashboard .card .card-toolbar .shiny-input-container label{padding-right:.4em}.quarto-dashboard .card .card-toolbar .shiny-input-container .bslib-input-switch{margin-top:6px}.quarto-dashboard .card .card-toolbar input[type=text]{line-height:1;width:inherit}.quarto-dashboard .card .card-toolbar .input-daterange{width:inherit}.quarto-dashboard .card .card-toolbar .input-daterange input[type=text]{height:2.4em;width:10em}.quarto-dashboard .card .card-toolbar .input-daterange .input-group-addon{height:auto;padding:0;margin-left:-5px !important;margin-right:-5px}.quarto-dashboard .card .card-toolbar .input-daterange .input-group-addon .input-group-text{padding-top:0;padding-bottom:0;height:100%}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny{width:10em}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-line{top:9px}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-min,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-max,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-from,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-to,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-single{top:20px}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-bar{top:8px}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-handle{top:0px}.quarto-dashboard .card .card-toolbar .shiny-input-checkboxgroup>label{margin-top:6px}.quarto-dashboard .card .card-toolbar .shiny-input-checkboxgroup>.shiny-options-group{margin-top:0;align-items:baseline}.quarto-dashboard .card .card-toolbar .shiny-input-radiogroup>label{margin-top:6px}.quarto-dashboard .card .card-toolbar .shiny-input-radiogroup>.shiny-options-group{align-items:baseline;margin-top:0}.quarto-dashboard .card .card-toolbar .shiny-input-radiogroup>.shiny-options-group>.radio{margin-right:.3em}.quarto-dashboard .card .card-toolbar .form-select{padding-top:.2em;padding-bottom:.2em}.quarto-dashboard .card .card-toolbar .shiny-input-select{min-width:6em}.quarto-dashboard .card .card-toolbar div.checkbox{margin-bottom:0px}.quarto-dashboard .card .card-toolbar>.checkbox:first-child{margin-top:6px}.quarto-dashboard .card-body>table>thead{border-top:none}.quarto-dashboard .card-body>.table>:not(caption)>*>*{background-color:#fefbf2}.tableFloatingHeaderOriginal{background-color:#fefbf2;position:sticky !important;top:0 !important}.dashboard-data-table{margin-top:-1px}div.value-box-area span.observablehq--number{font-size:calc(clamp(.1em,15cqw,5em)*1.25);line-height:1.2;color:inherit;font-family:var(--bs-body-font-family)}.quarto-listing{padding-bottom:1em}.listing-pagination{padding-top:.5em}ul.pagination{float:right;padding-left:8px;padding-top:.5em}ul.pagination li{padding-right:.75em}ul.pagination li.disabled a,ul.pagination li.active a{color:#fff;text-decoration:none}ul.pagination li:last-of-type{padding-right:0}.listing-actions-group{display:flex}.quarto-listing-filter{margin-bottom:1em;width:200px;margin-left:auto}.quarto-listing-sort{margin-bottom:1em;margin-right:auto;width:auto}.quarto-listing-sort .input-group-text{font-size:.8em}.input-group-text{border-right:none}.quarto-listing-sort select.form-select{font-size:.8em}.listing-no-matching{text-align:center;padding-top:2em;padding-bottom:3em;font-size:1em}#quarto-margin-sidebar .quarto-listing-category{padding-top:0;font-size:1rem}#quarto-margin-sidebar .quarto-listing-category-title{cursor:pointer;font-weight:600;font-size:1rem}.quarto-listing-category .category{cursor:pointer}.quarto-listing-category .category.active{font-weight:600}.quarto-listing-category.category-cloud{display:flex;flex-wrap:wrap;align-items:baseline}.quarto-listing-category.category-cloud .category{padding-right:5px}.quarto-listing-category.category-cloud .category-cloud-1{font-size:.75em}.quarto-listing-category.category-cloud .category-cloud-2{font-size:.95em}.quarto-listing-category.category-cloud .category-cloud-3{font-size:1.15em}.quarto-listing-category.category-cloud .category-cloud-4{font-size:1.35em}.quarto-listing-category.category-cloud .category-cloud-5{font-size:1.55em}.quarto-listing-category.category-cloud .category-cloud-6{font-size:1.75em}.quarto-listing-category.category-cloud .category-cloud-7{font-size:1.95em}.quarto-listing-category.category-cloud .category-cloud-8{font-size:2.15em}.quarto-listing-category.category-cloud .category-cloud-9{font-size:2.35em}.quarto-listing-category.category-cloud .category-cloud-10{font-size:2.55em}.quarto-listing-cols-1{grid-template-columns:repeat(1, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-1{grid-template-columns:repeat(1, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-1{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-2{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-2{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-2{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-3{grid-template-columns:repeat(3, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-3{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-3{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-4{grid-template-columns:repeat(4, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-4{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-4{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-5{grid-template-columns:repeat(5, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-5{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-5{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-6{grid-template-columns:repeat(6, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-6{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-6{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-7{grid-template-columns:repeat(7, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-7{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-7{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-8{grid-template-columns:repeat(8, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-8{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-8{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-9{grid-template-columns:repeat(9, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-9{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-9{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-10{grid-template-columns:repeat(10, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-10{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-10{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-11{grid-template-columns:repeat(11, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-11{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-11{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-12{grid-template-columns:repeat(12, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-12{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-12{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-grid{gap:1.5em}.quarto-grid-item.borderless{border:none}.quarto-grid-item.borderless .listing-categories .listing-category:last-of-type,.quarto-grid-item.borderless .listing-categories .listing-category:first-of-type{padding-left:0}.quarto-grid-item.borderless .listing-categories .listing-category{border:0}.quarto-grid-link{text-decoration:none;color:inherit}.quarto-grid-link:hover{text-decoration:none;color:inherit}.quarto-grid-item h5.title,.quarto-grid-item .title.h5{margin-top:0;margin-bottom:0}.quarto-grid-item .card-footer{display:flex;justify-content:space-between;font-size:.8em}.quarto-grid-item .card-footer p{margin-bottom:0}.quarto-grid-item p.card-img-top{margin-bottom:0}.quarto-grid-item p.card-img-top>img{object-fit:cover}.quarto-grid-item .card-other-values{margin-top:.5em;font-size:.8em}.quarto-grid-item .card-other-values tr{margin-bottom:.5em}.quarto-grid-item .card-other-values tr>td:first-of-type{font-weight:600;padding-right:1em;padding-left:1em;vertical-align:top}.quarto-grid-item div.post-contents{display:flex;flex-direction:column;text-decoration:none;height:100%}.quarto-grid-item .listing-item-img-placeholder{background-color:rgba(52,58,64,.25);flex-shrink:0}.quarto-grid-item .card-attribution{padding-top:1em;display:flex;gap:1em;text-transform:uppercase;color:#6c757d;font-weight:500;flex-grow:10;align-items:flex-end}.quarto-grid-item .description{padding-bottom:1em}.quarto-grid-item .card-attribution .date{align-self:flex-end}.quarto-grid-item .card-attribution.justify{justify-content:space-between}.quarto-grid-item .card-attribution.start{justify-content:flex-start}.quarto-grid-item .card-attribution.end{justify-content:flex-end}.quarto-grid-item .card-title{margin-bottom:.1em}.quarto-grid-item .card-subtitle{padding-top:.25em}.quarto-grid-item .card-text{font-size:.9em}.quarto-grid-item .listing-reading-time{padding-bottom:.25em}.quarto-grid-item .card-text-small{font-size:.8em}.quarto-grid-item .card-subtitle.subtitle{font-size:.9em;font-weight:600;padding-bottom:.5em}.quarto-grid-item .listing-categories{display:flex;flex-wrap:wrap;padding-bottom:5px}.quarto-grid-item .listing-categories .listing-category{color:#6c757d;border:solid 1px #dee2e6;border-radius:.25rem;text-transform:uppercase;font-size:.65em;padding-left:.5em;padding-right:.5em;padding-top:.15em;padding-bottom:.15em;cursor:pointer;margin-right:4px;margin-bottom:4px}.quarto-grid-item.card-right{text-align:right}.quarto-grid-item.card-right .listing-categories{justify-content:flex-end}.quarto-grid-item.card-left{text-align:left}.quarto-grid-item.card-center{text-align:center}.quarto-grid-item.card-center .listing-description{text-align:justify}.quarto-grid-item.card-center .listing-categories{justify-content:center}table.quarto-listing-table td.image{padding:0px}table.quarto-listing-table td.image img{width:100%;max-width:50px;object-fit:contain}table.quarto-listing-table a{text-decoration:none;word-break:keep-all}table.quarto-listing-table th a{color:inherit}table.quarto-listing-table th a.asc:after{margin-bottom:-2px;margin-left:5px;display:inline-block;height:1rem;width:1rem;background-repeat:no-repeat;background-size:1rem 1rem;background-image:url('data:image/svg+xml,');content:""}table.quarto-listing-table th a.desc:after{margin-bottom:-2px;margin-left:5px;display:inline-block;height:1rem;width:1rem;background-repeat:no-repeat;background-size:1rem 1rem;background-image:url('data:image/svg+xml,');content:""}table.quarto-listing-table.table-hover td{cursor:pointer}.quarto-post.image-left{flex-direction:row}.quarto-post.image-right{flex-direction:row-reverse}@media(max-width: 767.98px){.quarto-post.image-right,.quarto-post.image-left{gap:0em;flex-direction:column}.quarto-post .metadata{padding-bottom:1em;order:2}.quarto-post .body{order:1}.quarto-post .thumbnail{order:3}}.list.quarto-listing-default div:last-of-type{border-bottom:none}@media(min-width: 992px){.quarto-listing-container-default{margin-right:2em}}div.quarto-post{display:flex;gap:2em;margin-bottom:1.5em;border-bottom:1px solid #dee2e6}@media(max-width: 767.98px){div.quarto-post{padding-bottom:1em}}div.quarto-post .metadata{flex-basis:20%;flex-grow:0;margin-top:.2em;flex-shrink:10}div.quarto-post .thumbnail{flex-basis:30%;flex-grow:0;flex-shrink:0}div.quarto-post .thumbnail img{margin-top:.4em;width:100%;object-fit:cover}div.quarto-post .body{flex-basis:45%;flex-grow:1;flex-shrink:0}div.quarto-post .body h3.listing-title,div.quarto-post .body .listing-title.h3{margin-top:0px;margin-bottom:0px;border-bottom:none}div.quarto-post .body .listing-subtitle{font-size:.875em;margin-bottom:.5em;margin-top:.2em}div.quarto-post .body .description{font-size:.9em}div.quarto-post .body pre code{white-space:pre-wrap}div.quarto-post a{color:#343a40;text-decoration:none}div.quarto-post .metadata{display:flex;flex-direction:column;font-size:.8em;font-family:"Source Sans Pro",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";flex-basis:33%}div.quarto-post .listing-categories{display:flex;flex-wrap:wrap;padding-bottom:5px}div.quarto-post .listing-categories .listing-category{color:#6c757d;border:solid 1px #dee2e6;border-radius:.25rem;text-transform:uppercase;font-size:.65em;padding-left:.5em;padding-right:.5em;padding-top:.15em;padding-bottom:.15em;cursor:pointer;margin-right:4px;margin-bottom:4px}div.quarto-post .listing-description{margin-bottom:.5em}div.quarto-about-jolla{display:flex !important;flex-direction:column;align-items:center;margin-top:10%;padding-bottom:1em}div.quarto-about-jolla .about-image{object-fit:cover;margin-left:auto;margin-right:auto;margin-bottom:1.5em}div.quarto-about-jolla img.round{border-radius:50%}div.quarto-about-jolla img.rounded{border-radius:10px}div.quarto-about-jolla .quarto-title h1.title,div.quarto-about-jolla .quarto-title .title.h1{text-align:center}div.quarto-about-jolla .quarto-title .description{text-align:center}div.quarto-about-jolla h2,div.quarto-about-jolla .h2{border-bottom:none}div.quarto-about-jolla .about-sep{width:60%}div.quarto-about-jolla main{text-align:center}div.quarto-about-jolla .about-links{display:flex}@media(min-width: 992px){div.quarto-about-jolla .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-jolla .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-jolla .about-link{color:#626d78;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-jolla .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-jolla .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-jolla .about-link:hover{color:#2761e3}div.quarto-about-jolla .about-link i.bi{margin-right:.15em}div.quarto-about-solana{display:flex !important;flex-direction:column;padding-top:3em !important;padding-bottom:1em}div.quarto-about-solana .about-entity{display:flex !important;align-items:start;justify-content:space-between}@media(min-width: 992px){div.quarto-about-solana .about-entity{flex-direction:row}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity{flex-direction:column-reverse;align-items:center;text-align:center}}div.quarto-about-solana .about-entity .entity-contents{display:flex;flex-direction:column}@media(max-width: 767.98px){div.quarto-about-solana .about-entity .entity-contents{width:100%}}div.quarto-about-solana .about-entity .about-image{object-fit:cover}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-image{margin-bottom:1.5em}}div.quarto-about-solana .about-entity img.round{border-radius:50%}div.quarto-about-solana .about-entity img.rounded{border-radius:10px}div.quarto-about-solana .about-entity .about-links{display:flex;justify-content:left;padding-bottom:1.2em}@media(min-width: 992px){div.quarto-about-solana .about-entity .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-solana .about-entity .about-link{color:#626d78;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-solana .about-entity .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-solana .about-entity .about-link:hover{color:#2761e3}div.quarto-about-solana .about-entity .about-link i.bi{margin-right:.15em}div.quarto-about-solana .about-contents{padding-right:1.5em;flex-basis:0;flex-grow:1}div.quarto-about-solana .about-contents main.content{margin-top:0}div.quarto-about-solana .about-contents h2,div.quarto-about-solana .about-contents .h2{border-bottom:none}div.quarto-about-trestles{display:flex !important;flex-direction:row;padding-top:3em !important;padding-bottom:1em}@media(max-width: 991.98px){div.quarto-about-trestles{flex-direction:column;padding-top:0em !important}}div.quarto-about-trestles .about-entity{display:flex !important;flex-direction:column;align-items:center;text-align:center;padding-right:1em}@media(min-width: 992px){div.quarto-about-trestles .about-entity{flex:0 0 42%}}div.quarto-about-trestles .about-entity .about-image{object-fit:cover;margin-bottom:1.5em}div.quarto-about-trestles .about-entity img.round{border-radius:50%}div.quarto-about-trestles .about-entity img.rounded{border-radius:10px}div.quarto-about-trestles .about-entity .about-links{display:flex;justify-content:center}@media(min-width: 992px){div.quarto-about-trestles .about-entity .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-trestles .about-entity .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-trestles .about-entity .about-link{color:#626d78;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-trestles .about-entity .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-trestles .about-entity .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-trestles .about-entity .about-link:hover{color:#2761e3}div.quarto-about-trestles .about-entity .about-link i.bi{margin-right:.15em}div.quarto-about-trestles .about-contents{flex-basis:0;flex-grow:1}div.quarto-about-trestles .about-contents h2,div.quarto-about-trestles .about-contents .h2{border-bottom:none}@media(min-width: 992px){div.quarto-about-trestles .about-contents{border-left:solid 1px #dee2e6;padding-left:1.5em}}div.quarto-about-trestles .about-contents main.content{margin-top:0}div.quarto-about-marquee{padding-bottom:1em}div.quarto-about-marquee .about-contents{display:flex;flex-direction:column}div.quarto-about-marquee .about-image{max-height:550px;margin-bottom:1.5em;object-fit:cover}div.quarto-about-marquee img.round{border-radius:50%}div.quarto-about-marquee img.rounded{border-radius:10px}div.quarto-about-marquee h2,div.quarto-about-marquee .h2{border-bottom:none}div.quarto-about-marquee .about-links{display:flex;justify-content:center;padding-top:1.5em}@media(min-width: 992px){div.quarto-about-marquee .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-marquee .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-marquee .about-link{color:#626d78;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-marquee .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-marquee .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-marquee .about-link:hover{color:#2761e3}div.quarto-about-marquee .about-link i.bi{margin-right:.15em}@media(min-width: 992px){div.quarto-about-marquee .about-link{border:none}}div.quarto-about-broadside{display:flex;flex-direction:column;padding-bottom:1em}div.quarto-about-broadside .about-main{display:flex !important;padding-top:0 !important}@media(min-width: 992px){div.quarto-about-broadside .about-main{flex-direction:row;align-items:flex-start}}@media(max-width: 991.98px){div.quarto-about-broadside .about-main{flex-direction:column}}@media(max-width: 991.98px){div.quarto-about-broadside .about-main .about-entity{flex-shrink:0;width:100%;height:450px;margin-bottom:1.5em;background-size:cover;background-repeat:no-repeat}}@media(min-width: 992px){div.quarto-about-broadside .about-main .about-entity{flex:0 10 50%;margin-right:1.5em;width:100%;height:100%;background-size:100%;background-repeat:no-repeat}}div.quarto-about-broadside .about-main .about-contents{padding-top:14px;flex:0 0 50%}div.quarto-about-broadside h2,div.quarto-about-broadside .h2{border-bottom:none}div.quarto-about-broadside .about-sep{margin-top:1.5em;width:60%;align-self:center}div.quarto-about-broadside .about-links{display:flex;justify-content:center;column-gap:20px;padding-top:1.5em}@media(min-width: 992px){div.quarto-about-broadside .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-broadside .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-broadside .about-link{color:#626d78;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-broadside .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-broadside .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-broadside .about-link:hover{color:#2761e3}div.quarto-about-broadside .about-link i.bi{margin-right:.15em}@media(min-width: 992px){div.quarto-about-broadside .about-link{border:none}}.tippy-box[data-theme~=quarto]{background-color:#fefbf2;border:solid 1px #dee2e6;border-radius:.25rem;color:#343a40;font-size:.875rem}.tippy-box[data-theme~=quarto]>.tippy-backdrop{background-color:#fefbf2}.tippy-box[data-theme~=quarto]>.tippy-arrow:after,.tippy-box[data-theme~=quarto]>.tippy-svg-arrow:after{content:"";position:absolute;z-index:-1}.tippy-box[data-theme~=quarto]>.tippy-arrow:after{border-color:rgba(0,0,0,0);border-style:solid}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-6px}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-6px}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-6px}.tippy-box[data-placement^=left]>.tippy-arrow:before{right:-6px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-arrow:before{border-top-color:#fefbf2}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-arrow:after{border-top-color:#dee2e6;border-width:7px 7px 0;top:17px;left:1px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-svg-arrow>svg{top:16px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-svg-arrow:after{top:17px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-arrow:before{border-bottom-color:#fefbf2;bottom:16px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-arrow:after{border-bottom-color:#dee2e6;border-width:0 7px 7px;bottom:17px;left:1px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-svg-arrow>svg{bottom:15px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-svg-arrow:after{bottom:17px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-arrow:before{border-left-color:#fefbf2}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-arrow:after{border-left-color:#dee2e6;border-width:7px 0 7px 7px;left:17px;top:1px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-svg-arrow>svg{left:11px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-svg-arrow:after{left:12px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-arrow:before{border-right-color:#fefbf2;right:16px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-arrow:after{border-width:7px 7px 7px 0;right:17px;top:1px;border-right-color:#dee2e6}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-svg-arrow>svg{right:11px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-svg-arrow:after{right:12px}.tippy-box[data-theme~=quarto]>.tippy-svg-arrow{fill:#343a40}.tippy-box[data-theme~=quarto]>.tippy-svg-arrow:after{background-image:url(data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iNiIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMCA2czEuNzk2LS4wMTMgNC42Ny0zLjYxNUM1Ljg1MS45IDYuOTMuMDA2IDggMGMxLjA3LS4wMDYgMi4xNDguODg3IDMuMzQzIDIuMzg1QzE0LjIzMyA2LjAwNSAxNiA2IDE2IDZIMHoiIGZpbGw9InJnYmEoMCwgOCwgMTYsIDAuMikiLz48L3N2Zz4=);background-size:16px 6px;width:16px;height:6px}.top-right{position:absolute;top:1em;right:1em}.visually-hidden{border:0;clip:rect(0 0 0 0);height:auto;margin:0;overflow:hidden;padding:0;position:absolute;width:1px;white-space:nowrap}.hidden{display:none !important}.zindex-bottom{z-index:-1 !important}figure.figure{display:block}.quarto-layout-panel{margin-bottom:1em}.quarto-layout-panel>figure{width:100%}.quarto-layout-panel>figure>figcaption,.quarto-layout-panel>.panel-caption{margin-top:10pt}.quarto-layout-panel>.table-caption{margin-top:0px}.table-caption p{margin-bottom:.5em}.quarto-layout-row{display:flex;flex-direction:row;align-items:flex-start}.quarto-layout-valign-top{align-items:flex-start}.quarto-layout-valign-bottom{align-items:flex-end}.quarto-layout-valign-center{align-items:center}.quarto-layout-cell{position:relative;margin-right:20px}.quarto-layout-cell:last-child{margin-right:0}.quarto-layout-cell figure,.quarto-layout-cell>p{margin:.2em}.quarto-layout-cell img{max-width:100%}.quarto-layout-cell .html-widget{width:100% !important}.quarto-layout-cell div figure p{margin:0}.quarto-layout-cell figure{display:block;margin-inline-start:0;margin-inline-end:0}.quarto-layout-cell table{display:inline-table}.quarto-layout-cell-subref figcaption,figure .quarto-layout-row figure figcaption{text-align:center;font-style:italic}.quarto-figure{position:relative;margin-bottom:1em}.quarto-figure>figure{width:100%;margin-bottom:0}.quarto-figure-left>figure>p,.quarto-figure-left>figure>div{text-align:left}.quarto-figure-center>figure>p,.quarto-figure-center>figure>div{text-align:center}.quarto-figure-right>figure>p,.quarto-figure-right>figure>div{text-align:right}.quarto-figure>figure>div.cell-annotation,.quarto-figure>figure>div code{text-align:left}figure>p:empty{display:none}figure>p:first-child{margin-top:0;margin-bottom:0}figure>figcaption.quarto-float-caption-bottom{margin-bottom:.5em}figure>figcaption.quarto-float-caption-top{margin-top:.5em}div[id^=tbl-]{position:relative}.quarto-figure>.anchorjs-link{position:absolute;top:.6em;right:.5em}div[id^=tbl-]>.anchorjs-link{position:absolute;top:.7em;right:.3em}.quarto-figure:hover>.anchorjs-link,div[id^=tbl-]:hover>.anchorjs-link,h2:hover>.anchorjs-link,.h2:hover>.anchorjs-link,h3:hover>.anchorjs-link,.h3:hover>.anchorjs-link,h4:hover>.anchorjs-link,.h4:hover>.anchorjs-link,h5:hover>.anchorjs-link,.h5:hover>.anchorjs-link,h6:hover>.anchorjs-link,.h6:hover>.anchorjs-link,.reveal-anchorjs-link>.anchorjs-link{opacity:1}#title-block-header{margin-block-end:1rem;position:relative;margin-top:-1px}#title-block-header .abstract{margin-block-start:1rem}#title-block-header .abstract .abstract-title{font-weight:600}#title-block-header a{text-decoration:none}#title-block-header .author,#title-block-header .date,#title-block-header .doi{margin-block-end:.2rem}#title-block-header .quarto-title-block>div{display:flex}#title-block-header .quarto-title-block>div>h1,#title-block-header .quarto-title-block>div>.h1{flex-grow:1}#title-block-header .quarto-title-block>div>button{flex-shrink:0;height:2.25rem;margin-top:0}@media(min-width: 992px){#title-block-header .quarto-title-block>div>button{margin-top:5px}}tr.header>th>p:last-of-type{margin-bottom:0px}table,table.table{margin-top:.5rem;margin-bottom:.5rem}caption,.table-caption{padding-top:.5rem;padding-bottom:.5rem;text-align:center}figure.quarto-float-tbl figcaption.quarto-float-caption-top{margin-top:.5rem;margin-bottom:.25rem;text-align:center}figure.quarto-float-tbl figcaption.quarto-float-caption-bottom{padding-top:.25rem;margin-bottom:.5rem;text-align:center}.utterances{max-width:none;margin-left:-8px}iframe{margin-bottom:1em}details{margin-bottom:1em}details[show]{margin-bottom:0}details>summary{color:#6c757d}details>summary>p:only-child{display:inline}pre.sourceCode,code.sourceCode{position:relative}dd code:not(.sourceCode),p code:not(.sourceCode){white-space:pre-wrap}code{white-space:pre}@media print{code{white-space:pre-wrap}}pre>code{display:block}pre>code.sourceCode{white-space:pre-wrap}pre>code.sourceCode>span>a:first-child::before{text-decoration:none}pre.code-overflow-wrap>code.sourceCode{white-space:pre-wrap}pre.code-overflow-scroll>code.sourceCode{white-space:pre}code a:any-link{color:inherit;text-decoration:none}code a:hover{color:inherit;text-decoration:underline}ul.task-list{padding-left:1em}[data-tippy-root]{display:inline-block}.tippy-content .footnote-back{display:none}.footnote-back{margin-left:.2em}.tippy-content{overflow-x:auto}.quarto-embedded-source-code{display:none}.quarto-unresolved-ref{font-weight:600}.quarto-cover-image{max-width:35%;float:right;margin-left:30px}.cell-output-display .widget-subarea{margin-bottom:1em}.cell-output-display:not(.no-overflow-x),.knitsql-table:not(.no-overflow-x){overflow-x:auto}.panel-input{margin-bottom:1em}.panel-input>div,.panel-input>div>div{display:inline-block;vertical-align:top;padding-right:12px}.panel-input>p:last-child{margin-bottom:0}.layout-sidebar{margin-bottom:1em}.layout-sidebar .tab-content{border:none}.tab-content>.page-columns.active{display:grid}div.sourceCode>iframe{width:100%;height:300px;margin-bottom:-0.5em}a{text-underline-offset:3px}.callout pre.sourceCode{padding-left:0}div.ansi-escaped-output{font-family:monospace;display:block}/*! +* +* ansi colors from IPython notebook's +* +* we also add `bright-[color]-` synonyms for the `-[color]-intense` classes since +* that seems to be what ansi_up emits +* +*/.ansi-black-fg{color:#3e424d}.ansi-black-bg{background-color:#3e424d}.ansi-black-intense-black,.ansi-bright-black-fg{color:#282c36}.ansi-black-intense-black,.ansi-bright-black-bg{background-color:#282c36}.ansi-red-fg{color:#e75c58}.ansi-red-bg{background-color:#e75c58}.ansi-red-intense-red,.ansi-bright-red-fg{color:#b22b31}.ansi-red-intense-red,.ansi-bright-red-bg{background-color:#b22b31}.ansi-green-fg{color:#00a250}.ansi-green-bg{background-color:#00a250}.ansi-green-intense-green,.ansi-bright-green-fg{color:#007427}.ansi-green-intense-green,.ansi-bright-green-bg{background-color:#007427}.ansi-yellow-fg{color:#ddb62b}.ansi-yellow-bg{background-color:#ddb62b}.ansi-yellow-intense-yellow,.ansi-bright-yellow-fg{color:#b27d12}.ansi-yellow-intense-yellow,.ansi-bright-yellow-bg{background-color:#b27d12}.ansi-blue-fg{color:#208ffb}.ansi-blue-bg{background-color:#208ffb}.ansi-blue-intense-blue,.ansi-bright-blue-fg{color:#0065ca}.ansi-blue-intense-blue,.ansi-bright-blue-bg{background-color:#0065ca}.ansi-magenta-fg{color:#d160c4}.ansi-magenta-bg{background-color:#d160c4}.ansi-magenta-intense-magenta,.ansi-bright-magenta-fg{color:#a03196}.ansi-magenta-intense-magenta,.ansi-bright-magenta-bg{background-color:#a03196}.ansi-cyan-fg{color:#60c6c8}.ansi-cyan-bg{background-color:#60c6c8}.ansi-cyan-intense-cyan,.ansi-bright-cyan-fg{color:#258f8f}.ansi-cyan-intense-cyan,.ansi-bright-cyan-bg{background-color:#258f8f}.ansi-white-fg{color:#c5c1b4}.ansi-white-bg{background-color:#c5c1b4}.ansi-white-intense-white,.ansi-bright-white-fg{color:#a1a6b2}.ansi-white-intense-white,.ansi-bright-white-bg{background-color:#a1a6b2}.ansi-default-inverse-fg{color:#fff}.ansi-default-inverse-bg{background-color:#000}.ansi-bold{font-weight:bold}.ansi-underline{text-decoration:underline}:root{--quarto-body-bg: #FEFBF2;--quarto-body-color: #343a40;--quarto-text-muted: #6c757d;--quarto-border-color: #dee2e6;--quarto-border-width: 1px}table.gt_table{color:var(--quarto-body-color);font-size:1em;width:100%;background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_column_spanner_outer{color:var(--quarto-body-color);background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_col_heading{color:var(--quarto-body-color);font-weight:bold;background-color:rgba(0,0,0,0)}table.gt_table thead.gt_col_headings{border-bottom:1px solid currentColor;border-top-width:inherit;border-top-color:var(--quarto-border-color)}table.gt_table thead.gt_col_headings:not(:first-child){border-top-width:1px;border-top-color:var(--quarto-border-color)}table.gt_table td.gt_row{border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-width:0px}table.gt_table tbody.gt_table_body{border-top-width:1px;border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-color:currentColor}div.columns{display:initial;gap:initial}div.column{display:inline-block;overflow-x:initial;vertical-align:top;width:50%}.code-annotation-tip-content{word-wrap:break-word}.code-annotation-container-hidden{display:none !important}dl.code-annotation-container-grid{display:grid;grid-template-columns:min-content auto}dl.code-annotation-container-grid dt{grid-column:1}dl.code-annotation-container-grid dd{grid-column:2}pre.sourceCode.code-annotation-code{padding-right:0}code.sourceCode .code-annotation-anchor{z-index:100;position:relative;float:right;background-color:rgba(0,0,0,0)}input[type=checkbox]{margin-right:.5ch}:root{--mermaid-bg-color: #FEFBF2;--mermaid-edge-color: #343a40;--mermaid-node-fg-color: #343a40;--mermaid-fg-color: #343a40;--mermaid-fg-color--lighter: #4b545c;--mermaid-fg-color--lightest: #626d78;--mermaid-font-family: Source Sans Pro, -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol;--mermaid-label-bg-color: #FEFBF2;--mermaid-label-fg-color: #2780e3;--mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--mermaid-node-fg-color: #343a40}@media print{:root{font-size:11pt}#quarto-sidebar,#TOC,.nav-page{display:none}.page-columns .content{grid-column-start:page-start}.fixed-top{position:relative}.panel-caption,.figure-caption,figcaption{color:#666}}.code-copy-button{position:absolute;top:0;right:0;border:0;margin-top:5px;margin-right:5px;background-color:rgba(0,0,0,0);z-index:3}.code-copy-button:focus{outline:none}.code-copy-button-tooltip{font-size:.75em}.code-copy-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:1rem 1rem}.code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,')}.code-copy-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}.code-copy-button-checked:hover>.bi::before{background-image:url('data:image/svg+xml,')}main ol ol,main ul ul,main ol ul,main ul ol{margin-bottom:1em}ul>li:not(:has(>p))>ul,ol>li:not(:has(>p))>ul,ul>li:not(:has(>p))>ol,ol>li:not(:has(>p))>ol{margin-bottom:0}ul>li:not(:has(>p))>ul>li:has(>p),ol>li:not(:has(>p))>ul>li:has(>p),ul>li:not(:has(>p))>ol>li:has(>p),ol>li:not(:has(>p))>ol>li:has(>p){margin-top:1rem}body{margin:0}main.page-columns>header>h1.title,main.page-columns>header>.title.h1{margin-bottom:0}@media(min-width: 992px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] 42px [page-end-inset page-end] 5fr [screen-end-inset] 1.5em}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 3em [body-end] 50px [body-end-outset] minmax(0px, 300px) [page-end-inset] minmax(50px, 100px) [page-end] 1fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] minmax(30px, 60px) [body-end-outset] minmax(60px, 180px) [page-end-inset] minmax(30px, 60px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(60px, 120px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(60px, 180px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] minmax(30px, 60px) [body-end-outset] minmax(60px, 180px) [page-end-inset] minmax(30px, 60px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 991.98px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(1350px - 3em)) [body-content-end body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1.5em [body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(90px, 180px) [page-end-inset] 30px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(30px, 60px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(30px, 60px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(30px, 60px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(90px, 180px) [page-end-inset] 30px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 767.98px){body .page-columns,body.fullcontent:not(.floating):not(.docked) .page-columns,body.slimcontent:not(.floating):not(.docked) .page-columns,body.docked .page-columns,body.docked.slimcontent .page-columns,body.docked.fullcontent .page-columns,body.floating .page-columns,body.floating.slimcontent .page-columns,body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}nav[role=doc-toc]{display:none}}body,.page-row-navigation{grid-template-rows:[page-top] max-content [contents-top] max-content [contents-bottom] max-content [page-bottom]}.page-rows-contents{grid-template-rows:[content-top] minmax(max-content, 1fr) [content-bottom] minmax(60px, max-content) [page-bottom]}.page-full{grid-column:screen-start/screen-end !important}.page-columns>*{grid-column:body-content-start/body-content-end}.page-columns.column-page>*{grid-column:page-start/page-end}.page-columns.column-page-left .page-columns.page-full>*,.page-columns.column-page-left>*{grid-column:page-start/body-content-end}.page-columns.column-page-right .page-columns.page-full>*,.page-columns.column-page-right>*{grid-column:body-content-start/page-end}.page-rows{grid-auto-rows:auto}.header{grid-column:screen-start/screen-end;grid-row:page-top/contents-top}#quarto-content{padding:0;grid-column:screen-start/screen-end;grid-row:contents-top/contents-bottom}body.floating .sidebar.sidebar-navigation{grid-column:page-start/body-start;grid-row:content-top/page-bottom}body.docked .sidebar.sidebar-navigation{grid-column:screen-start/body-start;grid-row:content-top/page-bottom}.sidebar.toc-left{grid-column:page-start/body-start;grid-row:content-top/page-bottom}.sidebar.margin-sidebar{grid-column:body-end/page-end;grid-row:content-top/page-bottom}.page-columns .content{grid-column:body-content-start/body-content-end;grid-row:content-top/content-bottom;align-content:flex-start}.page-columns .page-navigation{grid-column:body-content-start/body-content-end;grid-row:content-bottom/page-bottom}.page-columns .footer{grid-column:screen-start/screen-end;grid-row:contents-bottom/page-bottom}.page-columns .column-body{grid-column:body-content-start/body-content-end}.page-columns .column-body-fullbleed{grid-column:body-start/body-end}.page-columns .column-body-outset{grid-column:body-start-outset/body-end-outset;z-index:998;opacity:.999}.page-columns .column-body-outset table{background:#fefbf2}.page-columns .column-body-outset-left{grid-column:body-start-outset/body-content-end;z-index:998;opacity:.999}.page-columns .column-body-outset-left table{background:#fefbf2}.page-columns .column-body-outset-right{grid-column:body-content-start/body-end-outset;z-index:998;opacity:.999}.page-columns .column-body-outset-right table{background:#fefbf2}.page-columns .column-page{grid-column:page-start/page-end;z-index:998;opacity:.999}.page-columns .column-page table{background:#fefbf2}.page-columns .column-page-inset{grid-column:page-start-inset/page-end-inset;z-index:998;opacity:.999}.page-columns .column-page-inset table{background:#fefbf2}.page-columns .column-page-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;opacity:.999}.page-columns .column-page-inset-left table{background:#fefbf2}.page-columns .column-page-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;opacity:.999}.page-columns .column-page-inset-right figcaption table{background:#fefbf2}.page-columns .column-page-left{grid-column:page-start/body-content-end;z-index:998;opacity:.999}.page-columns .column-page-left table{background:#fefbf2}.page-columns .column-page-right{grid-column:body-content-start/page-end;z-index:998;opacity:.999}.page-columns .column-page-right figcaption table{background:#fefbf2}#quarto-content.page-columns #quarto-margin-sidebar,#quarto-content.page-columns #quarto-sidebar{z-index:1}@media(max-width: 991.98px){#quarto-content.page-columns #quarto-margin-sidebar.collapse,#quarto-content.page-columns #quarto-sidebar.collapse,#quarto-content.page-columns #quarto-margin-sidebar.collapsing,#quarto-content.page-columns #quarto-sidebar.collapsing{z-index:1055}}#quarto-content.page-columns main.column-page,#quarto-content.page-columns main.column-page-right,#quarto-content.page-columns main.column-page-left{z-index:0}.page-columns .column-screen-inset{grid-column:screen-start-inset/screen-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset table{background:#fefbf2}.page-columns .column-screen-inset-left{grid-column:screen-start-inset/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-inset-left table{background:#fefbf2}.page-columns .column-screen-inset-right{grid-column:body-content-start/screen-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset-right table{background:#fefbf2}.page-columns .column-screen{grid-column:screen-start/screen-end;z-index:998;opacity:.999}.page-columns .column-screen table{background:#fefbf2}.page-columns .column-screen-left{grid-column:screen-start/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-left table{background:#fefbf2}.page-columns .column-screen-right{grid-column:body-content-start/screen-end;z-index:998;opacity:.999}.page-columns .column-screen-right table{background:#fefbf2}.page-columns .column-screen-inset-shaded{grid-column:screen-start/screen-end;padding:1em;background:#f8f9fa;z-index:998;opacity:.999;margin-bottom:1em}.zindex-content{z-index:998;opacity:.999}.zindex-modal{z-index:1055;opacity:.999}.zindex-over-content{z-index:999;opacity:.999}img.img-fluid.column-screen,img.img-fluid.column-screen-inset-shaded,img.img-fluid.column-screen-inset,img.img-fluid.column-screen-inset-left,img.img-fluid.column-screen-inset-right,img.img-fluid.column-screen-left,img.img-fluid.column-screen-right{width:100%}@media(min-width: 992px){.margin-caption,div.aside,aside:not(.footnotes):not(.sidebar),.column-margin{grid-column:body-end/page-end !important;z-index:998}.column-sidebar{grid-column:page-start/body-start !important;z-index:998}.column-leftmargin{grid-column:screen-start-inset/body-start !important;z-index:998}.no-row-height{height:1em;overflow:visible}}@media(max-width: 991.98px){.margin-caption,div.aside,aside:not(.footnotes):not(.sidebar),.column-margin{grid-column:body-end/page-end !important;z-index:998}.no-row-height{height:1em;overflow:visible}.page-columns.page-full{overflow:visible}.page-columns.toc-left .margin-caption,.page-columns.toc-left div.aside,.page-columns.toc-left aside:not(.footnotes):not(.sidebar),.page-columns.toc-left .column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;opacity:.999}.page-columns.toc-left .no-row-height{height:initial;overflow:initial}}@media(max-width: 767.98px){.margin-caption,div.aside,aside:not(.footnotes):not(.sidebar),.column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;opacity:.999}.no-row-height{height:initial;overflow:initial}#quarto-margin-sidebar{display:none}#quarto-sidebar-toc-left{display:none}.hidden-sm{display:none}}.panel-grid{display:grid;grid-template-rows:repeat(1, 1fr);grid-template-columns:repeat(24, 1fr);gap:1em}.panel-grid .g-col-1{grid-column:auto/span 1}.panel-grid .g-col-2{grid-column:auto/span 2}.panel-grid .g-col-3{grid-column:auto/span 3}.panel-grid .g-col-4{grid-column:auto/span 4}.panel-grid .g-col-5{grid-column:auto/span 5}.panel-grid .g-col-6{grid-column:auto/span 6}.panel-grid .g-col-7{grid-column:auto/span 7}.panel-grid .g-col-8{grid-column:auto/span 8}.panel-grid .g-col-9{grid-column:auto/span 9}.panel-grid .g-col-10{grid-column:auto/span 10}.panel-grid .g-col-11{grid-column:auto/span 11}.panel-grid .g-col-12{grid-column:auto/span 12}.panel-grid .g-col-13{grid-column:auto/span 13}.panel-grid .g-col-14{grid-column:auto/span 14}.panel-grid .g-col-15{grid-column:auto/span 15}.panel-grid .g-col-16{grid-column:auto/span 16}.panel-grid .g-col-17{grid-column:auto/span 17}.panel-grid .g-col-18{grid-column:auto/span 18}.panel-grid .g-col-19{grid-column:auto/span 19}.panel-grid .g-col-20{grid-column:auto/span 20}.panel-grid .g-col-21{grid-column:auto/span 21}.panel-grid .g-col-22{grid-column:auto/span 22}.panel-grid .g-col-23{grid-column:auto/span 23}.panel-grid .g-col-24{grid-column:auto/span 24}.panel-grid .g-start-1{grid-column-start:1}.panel-grid .g-start-2{grid-column-start:2}.panel-grid .g-start-3{grid-column-start:3}.panel-grid .g-start-4{grid-column-start:4}.panel-grid .g-start-5{grid-column-start:5}.panel-grid .g-start-6{grid-column-start:6}.panel-grid .g-start-7{grid-column-start:7}.panel-grid .g-start-8{grid-column-start:8}.panel-grid .g-start-9{grid-column-start:9}.panel-grid .g-start-10{grid-column-start:10}.panel-grid .g-start-11{grid-column-start:11}.panel-grid .g-start-12{grid-column-start:12}.panel-grid .g-start-13{grid-column-start:13}.panel-grid .g-start-14{grid-column-start:14}.panel-grid .g-start-15{grid-column-start:15}.panel-grid .g-start-16{grid-column-start:16}.panel-grid .g-start-17{grid-column-start:17}.panel-grid .g-start-18{grid-column-start:18}.panel-grid .g-start-19{grid-column-start:19}.panel-grid .g-start-20{grid-column-start:20}.panel-grid .g-start-21{grid-column-start:21}.panel-grid .g-start-22{grid-column-start:22}.panel-grid .g-start-23{grid-column-start:23}@media(min-width: 576px){.panel-grid .g-col-sm-1{grid-column:auto/span 1}.panel-grid .g-col-sm-2{grid-column:auto/span 2}.panel-grid .g-col-sm-3{grid-column:auto/span 3}.panel-grid .g-col-sm-4{grid-column:auto/span 4}.panel-grid .g-col-sm-5{grid-column:auto/span 5}.panel-grid .g-col-sm-6{grid-column:auto/span 6}.panel-grid .g-col-sm-7{grid-column:auto/span 7}.panel-grid .g-col-sm-8{grid-column:auto/span 8}.panel-grid .g-col-sm-9{grid-column:auto/span 9}.panel-grid .g-col-sm-10{grid-column:auto/span 10}.panel-grid .g-col-sm-11{grid-column:auto/span 11}.panel-grid .g-col-sm-12{grid-column:auto/span 12}.panel-grid .g-col-sm-13{grid-column:auto/span 13}.panel-grid .g-col-sm-14{grid-column:auto/span 14}.panel-grid .g-col-sm-15{grid-column:auto/span 15}.panel-grid .g-col-sm-16{grid-column:auto/span 16}.panel-grid .g-col-sm-17{grid-column:auto/span 17}.panel-grid .g-col-sm-18{grid-column:auto/span 18}.panel-grid .g-col-sm-19{grid-column:auto/span 19}.panel-grid .g-col-sm-20{grid-column:auto/span 20}.panel-grid .g-col-sm-21{grid-column:auto/span 21}.panel-grid .g-col-sm-22{grid-column:auto/span 22}.panel-grid .g-col-sm-23{grid-column:auto/span 23}.panel-grid .g-col-sm-24{grid-column:auto/span 24}.panel-grid .g-start-sm-1{grid-column-start:1}.panel-grid .g-start-sm-2{grid-column-start:2}.panel-grid .g-start-sm-3{grid-column-start:3}.panel-grid .g-start-sm-4{grid-column-start:4}.panel-grid .g-start-sm-5{grid-column-start:5}.panel-grid .g-start-sm-6{grid-column-start:6}.panel-grid .g-start-sm-7{grid-column-start:7}.panel-grid .g-start-sm-8{grid-column-start:8}.panel-grid .g-start-sm-9{grid-column-start:9}.panel-grid .g-start-sm-10{grid-column-start:10}.panel-grid .g-start-sm-11{grid-column-start:11}.panel-grid .g-start-sm-12{grid-column-start:12}.panel-grid .g-start-sm-13{grid-column-start:13}.panel-grid .g-start-sm-14{grid-column-start:14}.panel-grid .g-start-sm-15{grid-column-start:15}.panel-grid .g-start-sm-16{grid-column-start:16}.panel-grid .g-start-sm-17{grid-column-start:17}.panel-grid .g-start-sm-18{grid-column-start:18}.panel-grid .g-start-sm-19{grid-column-start:19}.panel-grid .g-start-sm-20{grid-column-start:20}.panel-grid .g-start-sm-21{grid-column-start:21}.panel-grid .g-start-sm-22{grid-column-start:22}.panel-grid .g-start-sm-23{grid-column-start:23}}@media(min-width: 768px){.panel-grid .g-col-md-1{grid-column:auto/span 1}.panel-grid .g-col-md-2{grid-column:auto/span 2}.panel-grid .g-col-md-3{grid-column:auto/span 3}.panel-grid .g-col-md-4{grid-column:auto/span 4}.panel-grid .g-col-md-5{grid-column:auto/span 5}.panel-grid .g-col-md-6{grid-column:auto/span 6}.panel-grid .g-col-md-7{grid-column:auto/span 7}.panel-grid .g-col-md-8{grid-column:auto/span 8}.panel-grid .g-col-md-9{grid-column:auto/span 9}.panel-grid .g-col-md-10{grid-column:auto/span 10}.panel-grid .g-col-md-11{grid-column:auto/span 11}.panel-grid .g-col-md-12{grid-column:auto/span 12}.panel-grid .g-col-md-13{grid-column:auto/span 13}.panel-grid .g-col-md-14{grid-column:auto/span 14}.panel-grid .g-col-md-15{grid-column:auto/span 15}.panel-grid .g-col-md-16{grid-column:auto/span 16}.panel-grid .g-col-md-17{grid-column:auto/span 17}.panel-grid .g-col-md-18{grid-column:auto/span 18}.panel-grid .g-col-md-19{grid-column:auto/span 19}.panel-grid .g-col-md-20{grid-column:auto/span 20}.panel-grid .g-col-md-21{grid-column:auto/span 21}.panel-grid .g-col-md-22{grid-column:auto/span 22}.panel-grid .g-col-md-23{grid-column:auto/span 23}.panel-grid .g-col-md-24{grid-column:auto/span 24}.panel-grid .g-start-md-1{grid-column-start:1}.panel-grid .g-start-md-2{grid-column-start:2}.panel-grid .g-start-md-3{grid-column-start:3}.panel-grid .g-start-md-4{grid-column-start:4}.panel-grid .g-start-md-5{grid-column-start:5}.panel-grid .g-start-md-6{grid-column-start:6}.panel-grid .g-start-md-7{grid-column-start:7}.panel-grid .g-start-md-8{grid-column-start:8}.panel-grid .g-start-md-9{grid-column-start:9}.panel-grid .g-start-md-10{grid-column-start:10}.panel-grid .g-start-md-11{grid-column-start:11}.panel-grid .g-start-md-12{grid-column-start:12}.panel-grid .g-start-md-13{grid-column-start:13}.panel-grid .g-start-md-14{grid-column-start:14}.panel-grid .g-start-md-15{grid-column-start:15}.panel-grid .g-start-md-16{grid-column-start:16}.panel-grid .g-start-md-17{grid-column-start:17}.panel-grid .g-start-md-18{grid-column-start:18}.panel-grid .g-start-md-19{grid-column-start:19}.panel-grid .g-start-md-20{grid-column-start:20}.panel-grid .g-start-md-21{grid-column-start:21}.panel-grid .g-start-md-22{grid-column-start:22}.panel-grid .g-start-md-23{grid-column-start:23}}@media(min-width: 992px){.panel-grid .g-col-lg-1{grid-column:auto/span 1}.panel-grid .g-col-lg-2{grid-column:auto/span 2}.panel-grid .g-col-lg-3{grid-column:auto/span 3}.panel-grid .g-col-lg-4{grid-column:auto/span 4}.panel-grid .g-col-lg-5{grid-column:auto/span 5}.panel-grid .g-col-lg-6{grid-column:auto/span 6}.panel-grid .g-col-lg-7{grid-column:auto/span 7}.panel-grid .g-col-lg-8{grid-column:auto/span 8}.panel-grid .g-col-lg-9{grid-column:auto/span 9}.panel-grid .g-col-lg-10{grid-column:auto/span 10}.panel-grid .g-col-lg-11{grid-column:auto/span 11}.panel-grid .g-col-lg-12{grid-column:auto/span 12}.panel-grid .g-col-lg-13{grid-column:auto/span 13}.panel-grid .g-col-lg-14{grid-column:auto/span 14}.panel-grid .g-col-lg-15{grid-column:auto/span 15}.panel-grid .g-col-lg-16{grid-column:auto/span 16}.panel-grid .g-col-lg-17{grid-column:auto/span 17}.panel-grid .g-col-lg-18{grid-column:auto/span 18}.panel-grid .g-col-lg-19{grid-column:auto/span 19}.panel-grid .g-col-lg-20{grid-column:auto/span 20}.panel-grid .g-col-lg-21{grid-column:auto/span 21}.panel-grid .g-col-lg-22{grid-column:auto/span 22}.panel-grid .g-col-lg-23{grid-column:auto/span 23}.panel-grid .g-col-lg-24{grid-column:auto/span 24}.panel-grid .g-start-lg-1{grid-column-start:1}.panel-grid .g-start-lg-2{grid-column-start:2}.panel-grid .g-start-lg-3{grid-column-start:3}.panel-grid .g-start-lg-4{grid-column-start:4}.panel-grid .g-start-lg-5{grid-column-start:5}.panel-grid .g-start-lg-6{grid-column-start:6}.panel-grid .g-start-lg-7{grid-column-start:7}.panel-grid .g-start-lg-8{grid-column-start:8}.panel-grid .g-start-lg-9{grid-column-start:9}.panel-grid .g-start-lg-10{grid-column-start:10}.panel-grid .g-start-lg-11{grid-column-start:11}.panel-grid .g-start-lg-12{grid-column-start:12}.panel-grid .g-start-lg-13{grid-column-start:13}.panel-grid .g-start-lg-14{grid-column-start:14}.panel-grid .g-start-lg-15{grid-column-start:15}.panel-grid .g-start-lg-16{grid-column-start:16}.panel-grid .g-start-lg-17{grid-column-start:17}.panel-grid .g-start-lg-18{grid-column-start:18}.panel-grid .g-start-lg-19{grid-column-start:19}.panel-grid .g-start-lg-20{grid-column-start:20}.panel-grid .g-start-lg-21{grid-column-start:21}.panel-grid .g-start-lg-22{grid-column-start:22}.panel-grid .g-start-lg-23{grid-column-start:23}}@media(min-width: 1200px){.panel-grid .g-col-xl-1{grid-column:auto/span 1}.panel-grid .g-col-xl-2{grid-column:auto/span 2}.panel-grid .g-col-xl-3{grid-column:auto/span 3}.panel-grid .g-col-xl-4{grid-column:auto/span 4}.panel-grid .g-col-xl-5{grid-column:auto/span 5}.panel-grid .g-col-xl-6{grid-column:auto/span 6}.panel-grid .g-col-xl-7{grid-column:auto/span 7}.panel-grid .g-col-xl-8{grid-column:auto/span 8}.panel-grid .g-col-xl-9{grid-column:auto/span 9}.panel-grid .g-col-xl-10{grid-column:auto/span 10}.panel-grid .g-col-xl-11{grid-column:auto/span 11}.panel-grid .g-col-xl-12{grid-column:auto/span 12}.panel-grid .g-col-xl-13{grid-column:auto/span 13}.panel-grid .g-col-xl-14{grid-column:auto/span 14}.panel-grid .g-col-xl-15{grid-column:auto/span 15}.panel-grid .g-col-xl-16{grid-column:auto/span 16}.panel-grid .g-col-xl-17{grid-column:auto/span 17}.panel-grid .g-col-xl-18{grid-column:auto/span 18}.panel-grid .g-col-xl-19{grid-column:auto/span 19}.panel-grid .g-col-xl-20{grid-column:auto/span 20}.panel-grid .g-col-xl-21{grid-column:auto/span 21}.panel-grid .g-col-xl-22{grid-column:auto/span 22}.panel-grid .g-col-xl-23{grid-column:auto/span 23}.panel-grid .g-col-xl-24{grid-column:auto/span 24}.panel-grid .g-start-xl-1{grid-column-start:1}.panel-grid .g-start-xl-2{grid-column-start:2}.panel-grid .g-start-xl-3{grid-column-start:3}.panel-grid .g-start-xl-4{grid-column-start:4}.panel-grid .g-start-xl-5{grid-column-start:5}.panel-grid .g-start-xl-6{grid-column-start:6}.panel-grid .g-start-xl-7{grid-column-start:7}.panel-grid .g-start-xl-8{grid-column-start:8}.panel-grid .g-start-xl-9{grid-column-start:9}.panel-grid .g-start-xl-10{grid-column-start:10}.panel-grid .g-start-xl-11{grid-column-start:11}.panel-grid .g-start-xl-12{grid-column-start:12}.panel-grid .g-start-xl-13{grid-column-start:13}.panel-grid .g-start-xl-14{grid-column-start:14}.panel-grid .g-start-xl-15{grid-column-start:15}.panel-grid .g-start-xl-16{grid-column-start:16}.panel-grid .g-start-xl-17{grid-column-start:17}.panel-grid .g-start-xl-18{grid-column-start:18}.panel-grid .g-start-xl-19{grid-column-start:19}.panel-grid .g-start-xl-20{grid-column-start:20}.panel-grid .g-start-xl-21{grid-column-start:21}.panel-grid .g-start-xl-22{grid-column-start:22}.panel-grid .g-start-xl-23{grid-column-start:23}}@media(min-width: 1400px){.panel-grid .g-col-xxl-1{grid-column:auto/span 1}.panel-grid .g-col-xxl-2{grid-column:auto/span 2}.panel-grid .g-col-xxl-3{grid-column:auto/span 3}.panel-grid .g-col-xxl-4{grid-column:auto/span 4}.panel-grid .g-col-xxl-5{grid-column:auto/span 5}.panel-grid .g-col-xxl-6{grid-column:auto/span 6}.panel-grid .g-col-xxl-7{grid-column:auto/span 7}.panel-grid .g-col-xxl-8{grid-column:auto/span 8}.panel-grid .g-col-xxl-9{grid-column:auto/span 9}.panel-grid .g-col-xxl-10{grid-column:auto/span 10}.panel-grid .g-col-xxl-11{grid-column:auto/span 11}.panel-grid .g-col-xxl-12{grid-column:auto/span 12}.panel-grid .g-col-xxl-13{grid-column:auto/span 13}.panel-grid .g-col-xxl-14{grid-column:auto/span 14}.panel-grid .g-col-xxl-15{grid-column:auto/span 15}.panel-grid .g-col-xxl-16{grid-column:auto/span 16}.panel-grid .g-col-xxl-17{grid-column:auto/span 17}.panel-grid .g-col-xxl-18{grid-column:auto/span 18}.panel-grid .g-col-xxl-19{grid-column:auto/span 19}.panel-grid .g-col-xxl-20{grid-column:auto/span 20}.panel-grid .g-col-xxl-21{grid-column:auto/span 21}.panel-grid .g-col-xxl-22{grid-column:auto/span 22}.panel-grid .g-col-xxl-23{grid-column:auto/span 23}.panel-grid .g-col-xxl-24{grid-column:auto/span 24}.panel-grid .g-start-xxl-1{grid-column-start:1}.panel-grid .g-start-xxl-2{grid-column-start:2}.panel-grid .g-start-xxl-3{grid-column-start:3}.panel-grid .g-start-xxl-4{grid-column-start:4}.panel-grid .g-start-xxl-5{grid-column-start:5}.panel-grid .g-start-xxl-6{grid-column-start:6}.panel-grid .g-start-xxl-7{grid-column-start:7}.panel-grid .g-start-xxl-8{grid-column-start:8}.panel-grid .g-start-xxl-9{grid-column-start:9}.panel-grid .g-start-xxl-10{grid-column-start:10}.panel-grid .g-start-xxl-11{grid-column-start:11}.panel-grid .g-start-xxl-12{grid-column-start:12}.panel-grid .g-start-xxl-13{grid-column-start:13}.panel-grid .g-start-xxl-14{grid-column-start:14}.panel-grid .g-start-xxl-15{grid-column-start:15}.panel-grid .g-start-xxl-16{grid-column-start:16}.panel-grid .g-start-xxl-17{grid-column-start:17}.panel-grid .g-start-xxl-18{grid-column-start:18}.panel-grid .g-start-xxl-19{grid-column-start:19}.panel-grid .g-start-xxl-20{grid-column-start:20}.panel-grid .g-start-xxl-21{grid-column-start:21}.panel-grid .g-start-xxl-22{grid-column-start:22}.panel-grid .g-start-xxl-23{grid-column-start:23}}main{margin-top:1em;margin-bottom:1em}h1,.h1,h2,.h2{color:inherit;margin-top:2rem;margin-bottom:1rem;font-weight:600}h1.title,.title.h1{margin-top:0}main.content>section:first-of-type>h2:first-child,main.content>section:first-of-type>.h2:first-child{margin-top:0}h2,.h2{border-bottom:1px solid #dee2e6;padding-bottom:.5rem}h3,.h3{font-weight:600}h3,.h3,h4,.h4{opacity:.9;margin-top:1.5rem}h5,.h5,h6,.h6{opacity:.9}.header-section-number{color:#6d7a86}.nav-link.active .header-section-number{color:inherit}mark,.mark{padding:0em}.panel-caption,.figure-caption,.subfigure-caption,.table-caption,figcaption,caption{font-size:.9rem;color:#6d7a86}.quarto-layout-cell[data-ref-parent] caption{color:#6d7a86}.column-margin figcaption,.margin-caption,div.aside,aside,.column-margin{color:#6d7a86;font-size:.825rem}.panel-caption.margin-caption{text-align:inherit}.column-margin.column-container p{margin-bottom:0}.column-margin.column-container>*:not(.collapse):first-child{padding-bottom:.5em;display:block}.column-margin.column-container>*:not(.collapse):not(:first-child){padding-top:.5em;padding-bottom:.5em;display:block}.column-margin.column-container>*.collapse:not(.show){display:none}@media(min-width: 768px){.column-margin.column-container .callout-margin-content:first-child{margin-top:4.5em}.column-margin.column-container .callout-margin-content-simple:first-child{margin-top:3.5em}}.margin-caption>*{padding-top:.5em;padding-bottom:.5em}@media(max-width: 767.98px){.quarto-layout-row{flex-direction:column}}.nav-tabs .nav-item{margin-top:1px;cursor:pointer}.tab-content{margin-top:0px;border-left:#dee2e6 1px solid;border-right:#dee2e6 1px solid;border-bottom:#dee2e6 1px solid;margin-left:0;padding:1em;margin-bottom:1em}@media(max-width: 767.98px){.layout-sidebar{margin-left:0;margin-right:0}}.panel-sidebar,.panel-sidebar .form-control,.panel-input,.panel-input .form-control,.selectize-dropdown{font-size:.9rem}.panel-sidebar .form-control,.panel-input .form-control{padding-top:.1rem}.tab-pane div.sourceCode{margin-top:0px}.tab-pane>p{padding-top:0}.tab-pane>p:nth-child(1){padding-top:0}.tab-pane>p:last-child{margin-bottom:0}.tab-pane>pre:last-child{margin-bottom:0}.tab-content>.tab-pane:not(.active){display:none !important}div.sourceCode{background-color:rgba(233,236,239,.65);border:1px solid rgba(233,236,239,.65)}pre.sourceCode{background-color:rgba(0,0,0,0)}pre.sourceCode{border:none;font-size:.875em;overflow:visible !important;padding:.4em}div.sourceCode{overflow-y:hidden}.callout div.sourceCode{margin-left:initial}.blockquote{font-size:inherit;padding-left:1rem;padding-right:1.5rem;color:#6d7a86}.blockquote h1:first-child,.blockquote .h1:first-child,.blockquote h2:first-child,.blockquote .h2:first-child,.blockquote h3:first-child,.blockquote .h3:first-child,.blockquote h4:first-child,.blockquote .h4:first-child,.blockquote h5:first-child,.blockquote .h5:first-child{margin-top:0}pre{background-color:initial;padding:initial;border:initial}p pre code:not(.sourceCode),li pre code:not(.sourceCode),pre code:not(.sourceCode){background-color:initial}p code:not(.sourceCode),li code:not(.sourceCode),td code:not(.sourceCode){background-color:#f8f9fa;padding:.2em}nav p code:not(.sourceCode),nav li code:not(.sourceCode),nav td code:not(.sourceCode){background-color:rgba(0,0,0,0);padding:0}td code:not(.sourceCode){white-space:pre-wrap}#quarto-embedded-source-code-modal>.modal-dialog{max-width:1000px;padding-left:1.75rem;padding-right:1.75rem}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body{padding:0}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body div.sourceCode{margin:0;padding:.2rem .2rem;border-radius:0px;border:none}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-header{padding:.7rem}.code-tools-button{font-size:1rem;padding:.15rem .15rem;margin-left:5px;color:#6c757d;background-color:rgba(0,0,0,0);transition:initial;cursor:pointer}.code-tools-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:1rem 1rem}.code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}#quarto-embedded-source-code-modal .code-copy-button>.bi::before{background-image:url('data:image/svg+xml,')}#quarto-embedded-source-code-modal .code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,')}.sidebar{will-change:top;transition:top 200ms linear;position:sticky;overflow-y:auto;padding-top:1.2em;max-height:100vh}.sidebar.toc-left,.sidebar.margin-sidebar{top:0px;padding-top:1em}.sidebar.quarto-banner-title-block-sidebar>*{padding-top:1.65em}figure .quarto-notebook-link{margin-top:.5em}.quarto-notebook-link{font-size:.75em;color:#6c757d;margin-bottom:1em;text-decoration:none;display:block}.quarto-notebook-link:hover{text-decoration:underline;color:#2761e3}.quarto-notebook-link::before{display:inline-block;height:.75rem;width:.75rem;margin-bottom:0em;margin-right:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:.75rem .75rem}.toc-actions i.bi,.quarto-code-links i.bi,.quarto-other-links i.bi,.quarto-alternate-notebooks i.bi,.quarto-alternate-formats i.bi{margin-right:.4em;font-size:.8rem}.quarto-other-links-text-target .quarto-code-links i.bi,.quarto-other-links-text-target .quarto-other-links i.bi{margin-right:.2em}.quarto-other-formats-text-target .quarto-alternate-formats i.bi{margin-right:.1em}.toc-actions i.bi.empty,.quarto-code-links i.bi.empty,.quarto-other-links i.bi.empty,.quarto-alternate-notebooks i.bi.empty,.quarto-alternate-formats i.bi.empty{padding-left:1em}.quarto-notebook h2,.quarto-notebook .h2{border-bottom:none}.quarto-notebook .cell-container{display:flex}.quarto-notebook .cell-container .cell{flex-grow:4}.quarto-notebook .cell-container .cell-decorator{padding-top:1.5em;padding-right:1em;text-align:right}.quarto-notebook .cell-container.code-fold .cell-decorator{padding-top:3em}.quarto-notebook .cell-code code{white-space:pre-wrap}.quarto-notebook .cell .cell-output-stderr pre code,.quarto-notebook .cell .cell-output-stdout pre code{white-space:pre-wrap;overflow-wrap:anywhere}.toc-actions,.quarto-alternate-formats,.quarto-other-links,.quarto-code-links,.quarto-alternate-notebooks{padding-left:0em}.sidebar .toc-actions a,.sidebar .quarto-alternate-formats a,.sidebar .quarto-other-links a,.sidebar .quarto-code-links a,.sidebar .quarto-alternate-notebooks a,.sidebar nav[role=doc-toc] a{text-decoration:none}.sidebar .toc-actions a:hover,.sidebar .quarto-other-links a:hover,.sidebar .quarto-code-links a:hover,.sidebar .quarto-alternate-formats a:hover,.sidebar .quarto-alternate-notebooks a:hover{color:#2761e3}.sidebar .toc-actions h2,.sidebar .toc-actions .h2,.sidebar .quarto-code-links h2,.sidebar .quarto-code-links .h2,.sidebar .quarto-other-links h2,.sidebar .quarto-other-links .h2,.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2,.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-weight:500;margin-bottom:.2rem;margin-top:.3rem;font-family:inherit;border-bottom:0;padding-bottom:0;padding-top:0px}.sidebar .toc-actions>h2,.sidebar .toc-actions>.h2,.sidebar .quarto-code-links>h2,.sidebar .quarto-code-links>.h2,.sidebar .quarto-other-links>h2,.sidebar .quarto-other-links>.h2,.sidebar .quarto-alternate-notebooks>h2,.sidebar .quarto-alternate-notebooks>.h2,.sidebar .quarto-alternate-formats>h2,.sidebar .quarto-alternate-formats>.h2{font-size:.8rem}.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-size:.875rem}.sidebar nav[role=doc-toc]>ul a{border-left:1px solid #e9ecef;padding-left:.6rem}.sidebar .toc-actions h2>ul a,.sidebar .toc-actions .h2>ul a,.sidebar .quarto-code-links h2>ul a,.sidebar .quarto-code-links .h2>ul a,.sidebar .quarto-other-links h2>ul a,.sidebar .quarto-other-links .h2>ul a,.sidebar .quarto-alternate-notebooks h2>ul a,.sidebar .quarto-alternate-notebooks .h2>ul a,.sidebar .quarto-alternate-formats h2>ul a,.sidebar .quarto-alternate-formats .h2>ul a{border-left:none;padding-left:.6rem}.sidebar .toc-actions ul a:empty,.sidebar .quarto-code-links ul a:empty,.sidebar .quarto-other-links ul a:empty,.sidebar .quarto-alternate-notebooks ul a:empty,.sidebar .quarto-alternate-formats ul a:empty,.sidebar nav[role=doc-toc]>ul a:empty{display:none}.sidebar .toc-actions ul,.sidebar .quarto-code-links ul,.sidebar .quarto-other-links ul,.sidebar .quarto-alternate-notebooks ul,.sidebar .quarto-alternate-formats ul{padding-left:0;list-style:none}.sidebar nav[role=doc-toc] ul{list-style:none;padding-left:0;list-style:none}.sidebar nav[role=doc-toc]>ul{margin-left:.45em}.quarto-margin-sidebar nav[role=doc-toc]{padding-left:.5em}.sidebar .toc-actions>ul,.sidebar .quarto-code-links>ul,.sidebar .quarto-other-links>ul,.sidebar .quarto-alternate-notebooks>ul,.sidebar .quarto-alternate-formats>ul{font-size:.8rem}.sidebar nav[role=doc-toc]>ul{font-size:.875rem}.sidebar .toc-actions ul li a,.sidebar .quarto-code-links ul li a,.sidebar .quarto-other-links ul li a,.sidebar .quarto-alternate-notebooks ul li a,.sidebar .quarto-alternate-formats ul li a,.sidebar nav[role=doc-toc]>ul li a{line-height:1.1rem;padding-bottom:.2rem;padding-top:.2rem;color:inherit}.sidebar nav[role=doc-toc] ul>li>ul>li>a{padding-left:1.2em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>a{padding-left:2.4em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>a{padding-left:3.6em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:4.8em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:6em}.sidebar nav[role=doc-toc] ul>li>a.active,.sidebar nav[role=doc-toc] ul>li>ul>li>a.active{border-left:1px solid #2761e3;color:#2761e3 !important}.sidebar nav[role=doc-toc] ul>li>a:hover,.sidebar nav[role=doc-toc] ul>li>ul>li>a:hover{color:#2761e3 !important}kbd,.kbd{color:#343a40;background-color:#f8f9fa;border:1px solid;border-radius:5px;border-color:#dee2e6}.quarto-appendix-contents div.hanging-indent{margin-left:0em}.quarto-appendix-contents div.hanging-indent div.csl-entry{margin-left:1em;text-indent:-1em}.citation a,.footnote-ref{text-decoration:none}.footnotes ol{padding-left:1em}.tippy-content>*{margin-bottom:.7em}.tippy-content>*:last-child{margin-bottom:0}.callout{margin-top:1.25rem;margin-bottom:1.25rem;border-radius:.25rem;overflow-wrap:break-word}.callout .callout-title-container{overflow-wrap:anywhere}.callout.callout-style-simple{padding:.4em .7em;border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout.callout-style-default{border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout .callout-body-container{flex-grow:1}.callout.callout-style-simple .callout-body{font-size:.9rem;font-weight:400}.callout.callout-style-default .callout-body{font-size:.9rem;font-weight:400}.callout:not(.no-icon).callout-titled.callout-style-simple .callout-body{padding-left:1.6em}.callout.callout-titled>.callout-header{padding-top:.2em;margin-bottom:-0.2em}.callout.callout-style-simple>div.callout-header{border-bottom:none;font-size:.9rem;font-weight:600;opacity:75%}.callout.callout-style-default>div.callout-header{border-bottom:none;font-weight:600;opacity:85%;font-size:.9rem;padding-left:.5em;padding-right:.5em}.callout.callout-style-default .callout-body{padding-left:.5em;padding-right:.5em}.callout.callout-style-default .callout-body>:first-child{padding-top:.5rem;margin-top:0}.callout>div.callout-header[data-bs-toggle=collapse]{cursor:pointer}.callout.callout-style-default .callout-header[aria-expanded=false],.callout.callout-style-default .callout-header[aria-expanded=true]{padding-top:0px;margin-bottom:0px;align-items:center}.callout.callout-titled .callout-body>:last-child:not(.sourceCode),.callout.callout-titled .callout-body>div>:last-child:not(.sourceCode){padding-bottom:.5rem;margin-bottom:0}.callout:not(.callout-titled) .callout-body>:first-child,.callout:not(.callout-titled) .callout-body>div>:first-child{margin-top:.25rem}.callout:not(.callout-titled) .callout-body>:last-child,.callout:not(.callout-titled) .callout-body>div>:last-child{margin-bottom:.2rem}.callout.callout-style-simple .callout-icon::before,.callout.callout-style-simple .callout-toggle::before{height:1rem;width:1rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.callout.callout-style-default .callout-icon::before,.callout.callout-style-default .callout-toggle::before{height:.9rem;width:.9rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:.9rem .9rem}.callout.callout-style-default .callout-toggle::before{margin-top:5px}.callout .callout-btn-toggle .callout-toggle::before{transition:transform .2s linear}.callout .callout-header[aria-expanded=false] .callout-toggle::before{transform:rotate(-90deg)}.callout .callout-header[aria-expanded=true] .callout-toggle::before{transform:none}.callout.callout-style-simple:not(.no-icon) div.callout-icon-container{padding-top:.2em;padding-right:.55em}.callout.callout-style-default:not(.no-icon) div.callout-icon-container{padding-top:.1em;padding-right:.35em}.callout.callout-style-default:not(.no-icon) div.callout-title-container{margin-top:-1px}.callout.callout-style-default.callout-caution:not(.no-icon) div.callout-icon-container{padding-top:.3em;padding-right:.35em}.callout>.callout-body>.callout-icon-container>.no-icon,.callout>.callout-header>.callout-icon-container>.no-icon{display:none}div.callout.callout{border-left-color:#6c757d}div.callout.callout-style-default>.callout-header{background-color:#6c757d}div.callout-note.callout{border-left-color:#2780e3}div.callout-note.callout-style-default>.callout-header{background-color:#e9f2fc}div.callout-note:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-note.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-note .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-tip.callout{border-left-color:#3fb618}div.callout-tip.callout-style-default>.callout-header{background-color:#ecf8e8}div.callout-tip:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-tip.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-tip .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-warning.callout{border-left-color:#ff7518}div.callout-warning.callout-style-default>.callout-header{background-color:#fff1e8}div.callout-warning:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-warning.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-warning .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-caution.callout{border-left-color:#f0ad4e}div.callout-caution.callout-style-default>.callout-header{background-color:#fef7ed}div.callout-caution:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-caution.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-caution .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-important.callout{border-left-color:#ff0039}div.callout-important.callout-style-default>.callout-header{background-color:#ffe6eb}div.callout-important:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-important.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-important .callout-toggle::before{background-image:url('data:image/svg+xml,')}.quarto-toggle-container{display:flex;align-items:center}.quarto-reader-toggle .bi::before,.quarto-color-scheme-toggle .bi::before{display:inline-block;height:1rem;width:1rem;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.sidebar-navigation{padding-left:20px}.navbar{background-color:#052744;color:#f2e5bd}.navbar .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,')}.navbar .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,')}.quarto-sidebar-toggle{border-color:#dee2e6;border-bottom-left-radius:.25rem;border-bottom-right-radius:.25rem;border-style:solid;border-width:1px;overflow:hidden;border-top-width:0px;padding-top:0px !important}.quarto-sidebar-toggle-title{cursor:pointer;padding-bottom:2px;margin-left:.25em;text-align:center;font-weight:400;font-size:.775em}#quarto-content .quarto-sidebar-toggle{background:#fdf8e9}#quarto-content .quarto-sidebar-toggle-title{color:#343a40}.quarto-sidebar-toggle-icon{color:#dee2e6;margin-right:.5em;float:right;transition:transform .2s ease}.quarto-sidebar-toggle-icon::before{padding-top:5px}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-icon{transform:rotate(-180deg)}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-title{border-bottom:solid #dee2e6 1px}.quarto-sidebar-toggle-contents{background-color:#fefbf2;padding-right:10px;padding-left:10px;margin-top:0px !important;transition:max-height .5s ease}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-contents{padding-top:1em;padding-bottom:10px}@media(max-width: 767.98px){.sidebar-menu-container{padding-bottom:5em}}.quarto-sidebar-toggle:not(.expanded) .quarto-sidebar-toggle-contents{padding-top:0px !important;padding-bottom:0px}nav[role=doc-toc]{z-index:1020}#quarto-sidebar>*,nav[role=doc-toc]>*{transition:opacity .1s ease,border .1s ease}#quarto-sidebar.slow>*,nav[role=doc-toc].slow>*{transition:opacity .4s ease,border .4s ease}.quarto-color-scheme-toggle:not(.alternate).top-right .bi::before{background-image:url('data:image/svg+xml,')}.quarto-color-scheme-toggle.alternate.top-right .bi::before{background-image:url('data:image/svg+xml,')}#quarto-appendix.default{border-top:1px solid #dee2e6}#quarto-appendix.default{background-color:#fefbf2;padding-top:1.5em;margin-top:2em;z-index:998}#quarto-appendix.default .quarto-appendix-heading{margin-top:0;line-height:1.4em;font-weight:600;opacity:.9;border-bottom:none;margin-bottom:0}#quarto-appendix.default .footnotes ol,#quarto-appendix.default .footnotes ol li>p:last-of-type,#quarto-appendix.default .quarto-appendix-contents>p:last-of-type{margin-bottom:0}#quarto-appendix.default .footnotes ol{margin-left:.5em}#quarto-appendix.default .quarto-appendix-secondary-label{margin-bottom:.4em}#quarto-appendix.default .quarto-appendix-bibtex{font-size:.7em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-bibtex code.sourceCode{white-space:pre-wrap}#quarto-appendix.default .quarto-appendix-citeas{font-size:.9em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-heading{font-size:1em !important}#quarto-appendix.default *[role=doc-endnotes]>ol,#quarto-appendix.default .quarto-appendix-contents>*:not(h2):not(.h2){font-size:.9em}#quarto-appendix.default section{padding-bottom:1.5em}#quarto-appendix.default section *[role=doc-endnotes],#quarto-appendix.default section>*:not(a){opacity:.9;word-wrap:break-word}.btn.btn-quarto,div.cell-output-display .btn-quarto{--bs-btn-color: #cacccd;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #cacccd;--bs-btn-hover-bg: #52585d;--bs-btn-hover-border-color: #484e53;--bs-btn-focus-shadow-rgb: 75, 80, 85;--bs-btn-active-color: #fff;--bs-btn-active-bg: #5d6166;--bs-btn-active-border-color: #484e53;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}nav.quarto-secondary-nav.color-navbar{background-color:#052744;color:#f2e5bd}nav.quarto-secondary-nav.color-navbar h1,nav.quarto-secondary-nav.color-navbar .h1,nav.quarto-secondary-nav.color-navbar .quarto-btn-toggle{color:#f2e5bd}@media(max-width: 991.98px){body.nav-sidebar .quarto-title-banner{margin-bottom:0;padding-bottom:1em}body.nav-sidebar #title-block-header{margin-block-end:0}}p.subtitle{margin-top:.25em;margin-bottom:.5em}code a:any-link{color:inherit;text-decoration-color:#6c757d}/*! light */div.observablehq table thead tr th{background-color:var(--bs-body-bg)}input,button,select,optgroup,textarea{background-color:var(--bs-body-bg)}.code-annotated .code-copy-button{margin-right:1.25em;margin-top:0;padding-bottom:0;padding-top:3px}.code-annotation-gutter-bg{background-color:#fefbf2}.code-annotation-gutter{background-color:rgba(233,236,239,.65)}.code-annotation-gutter,.code-annotation-gutter-bg{height:100%;width:calc(20px + .5em);position:absolute;top:0;right:0}dl.code-annotation-container-grid dt{margin-right:1em;margin-top:.25rem}dl.code-annotation-container-grid dt{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;color:#4b545c;border:solid #4b545c 1px;border-radius:50%;height:22px;width:22px;line-height:22px;font-size:11px;text-align:center;vertical-align:middle;text-decoration:none}dl.code-annotation-container-grid dt[data-target-cell]{cursor:pointer}dl.code-annotation-container-grid dt[data-target-cell].code-annotation-active{color:#fefbf2;border:solid #aaa 1px;background-color:#aaa}pre.code-annotation-code{padding-top:0;padding-bottom:0}pre.code-annotation-code code{z-index:3}#code-annotation-line-highlight-gutter{width:100%;border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}#code-annotation-line-highlight{margin-left:-4em;width:calc(100% + 4em);border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}code.sourceCode .code-annotation-anchor.code-annotation-active{background-color:var(--quarto-hl-normal-color, #aaaaaa);border:solid var(--quarto-hl-normal-color, #aaaaaa) 1px;color:#e9ecef;font-weight:bolder}code.sourceCode .code-annotation-anchor{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;color:var(--quarto-hl-co-color);border:solid var(--quarto-hl-co-color) 1px;border-radius:50%;height:18px;width:18px;font-size:9px;margin-top:2px}code.sourceCode button.code-annotation-anchor{padding:2px;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none}code.sourceCode a.code-annotation-anchor{line-height:18px;text-align:center;vertical-align:middle;cursor:default;text-decoration:none}@media print{.page-columns .column-screen-inset{grid-column:page-start-inset/page-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset table{background:#fefbf2}.page-columns .column-screen-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-inset-left table{background:#fefbf2}.page-columns .column-screen-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset-right table{background:#fefbf2}.page-columns .column-screen{grid-column:page-start/page-end;z-index:998;opacity:.999}.page-columns .column-screen table{background:#fefbf2}.page-columns .column-screen-left{grid-column:page-start/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-left table{background:#fefbf2}.page-columns .column-screen-right{grid-column:body-content-start/page-end;z-index:998;opacity:.999}.page-columns .column-screen-right table{background:#fefbf2}.page-columns .column-screen-inset-shaded{grid-column:page-start-inset/page-end-inset;padding:1em;background:#f8f9fa;z-index:998;opacity:.999;margin-bottom:1em}}.quarto-video{margin-bottom:1em}.table{border-top:1px solid #d6d4ce;border-bottom:1px solid #d6d4ce}.table>thead{border-top-width:0;border-bottom:1px solid #999b99}.table a{word-break:break-word}.table>:not(caption)>*>*{background-color:unset;color:unset}#quarto-document-content .crosstalk-input .checkbox input[type=checkbox],#quarto-document-content .crosstalk-input .checkbox-inline input[type=checkbox]{position:unset;margin-top:unset;margin-left:unset}#quarto-document-content .row{margin-left:unset;margin-right:unset}.quarto-xref{white-space:nowrap}#quarto-draft-alert{margin-top:0px;margin-bottom:0px;padding:.3em;text-align:center;font-size:.9em}#quarto-draft-alert i{margin-right:.3em}#quarto-back-to-top{z-index:1000}pre{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:0.875em;font-weight:400}pre code{font-family:inherit;font-size:inherit;font-weight:inherit}code{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:0.875em;font-weight:400}a{background-color:rgba(0,0,0,0);font-weight:400;text-decoration:underline}a.external:after{content:"";background-image:url('data:image/svg+xml,');background-size:contain;background-repeat:no-repeat;background-position:center center;margin-left:.2em;padding-right:.75em}div.sourceCode code a.external:after{content:none}a.external:after:hover{cursor:pointer}.quarto-ext-icon{display:inline-block;font-size:.75em;padding-left:.3em}.code-with-filename .code-with-filename-file{margin-bottom:0;padding-bottom:2px;padding-top:2px;padding-left:.7em;border:var(--quarto-border-width) solid var(--quarto-border-color);border-radius:var(--quarto-border-radius);border-bottom:0;border-bottom-left-radius:0%;border-bottom-right-radius:0%}.code-with-filename div.sourceCode,.reveal .code-with-filename div.sourceCode{margin-top:0;border-top-left-radius:0%;border-top-right-radius:0%}.code-with-filename .code-with-filename-file pre{margin-bottom:0}.code-with-filename .code-with-filename-file{background-color:rgba(219,219,219,.8)}.quarto-dark .code-with-filename .code-with-filename-file{background-color:#555}.code-with-filename .code-with-filename-file strong{font-weight:400}.quarto-title-banner{margin-bottom:1em;color:#f2e5bd;background:#052744}.quarto-title-banner a{color:#f2e5bd}.quarto-title-banner h1,.quarto-title-banner .h1,.quarto-title-banner h2,.quarto-title-banner .h2{color:#f2e5bd}.quarto-title-banner .code-tools-button{color:#e1c368}.quarto-title-banner .code-tools-button:hover{color:#f2e5bd}.quarto-title-banner .code-tools-button>.bi::before{background-image:url('data:image/svg+xml,')}.quarto-title-banner .code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}.quarto-title-banner .quarto-title .title{font-weight:600}.quarto-title-banner .quarto-categories{margin-top:.75em}@media(min-width: 992px){.quarto-title-banner{padding-top:2.5em;padding-bottom:2.5em}}@media(max-width: 991.98px){.quarto-title-banner{padding-top:1em;padding-bottom:1em}}@media(max-width: 767.98px){body.hypothesis-enabled #title-block-header>*{padding-right:20px}}main.quarto-banner-title-block>section:first-child>h2,main.quarto-banner-title-block>section:first-child>.h2,main.quarto-banner-title-block>section:first-child>h3,main.quarto-banner-title-block>section:first-child>.h3,main.quarto-banner-title-block>section:first-child>h4,main.quarto-banner-title-block>section:first-child>.h4{margin-top:0}.quarto-title .quarto-categories{display:flex;flex-wrap:wrap;row-gap:.5em;column-gap:.4em;padding-bottom:.5em;margin-top:.75em}.quarto-title .quarto-categories .quarto-category{padding:.25em .75em;font-size:.65em;text-transform:uppercase;border:solid 1px;border-radius:.25rem;opacity:.6}.quarto-title .quarto-categories .quarto-category a{color:inherit}.quarto-title-meta-container{display:grid;grid-template-columns:1fr auto}.quarto-title-meta-column-end{display:flex;flex-direction:column;padding-left:1em}.quarto-title-meta-column-end a .bi{margin-right:.3em}#title-block-header.quarto-title-block.default .quarto-title-meta{display:grid;grid-template-columns:repeat(2, 1fr);grid-column-gap:1em}#title-block-header.quarto-title-block.default .quarto-title .title{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-author-orcid img{margin-top:-0.2em;height:.8em;width:.8em}#title-block-header.quarto-title-block.default .quarto-title-author-email{opacity:.7}#title-block-header.quarto-title-block.default .quarto-description p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p,#title-block-header.quarto-title-block.default .quarto-title-authors p,#title-block-header.quarto-title-block.default .quarto-title-affiliations p{margin-bottom:.1em}#title-block-header.quarto-title-block.default .quarto-title-meta-heading{text-transform:uppercase;margin-top:1em;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-contents{font-size:.9em}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p.affiliation:last-of-type{margin-bottom:.1em}#title-block-header.quarto-title-block.default p.affiliation{margin-bottom:.1em}#title-block-header.quarto-title-block.default .keywords,#title-block-header.quarto-title-block.default .description,#title-block-header.quarto-title-block.default .abstract{margin-top:0}#title-block-header.quarto-title-block.default .keywords>p,#title-block-header.quarto-title-block.default .description>p,#title-block-header.quarto-title-block.default .abstract>p{font-size:.9em}#title-block-header.quarto-title-block.default .keywords>p:last-of-type,#title-block-header.quarto-title-block.default .description>p:last-of-type,#title-block-header.quarto-title-block.default .abstract>p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .keywords .block-title,#title-block-header.quarto-title-block.default .description .block-title,#title-block-header.quarto-title-block.default .abstract .block-title{margin-top:1em;text-transform:uppercase;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-author{display:grid;grid-template-columns:minmax(max-content, 1fr) 1fr;grid-column-gap:1em}.quarto-title-tools-only{display:flex;justify-content:right}body{-webkit-font-smoothing:antialiased}.badge.bg-light{color:#343a40}.progress .progress-bar{font-size:8px;line-height:8px}:root{--quarto-scss-export-gray-300: #dee2e6;--quarto-scss-export-gray-500: #adb5bd;--quarto-scss-export-gray-600: #6c757d;--quarto-scss-export-gray-800: #343a40;--quarto-scss-export-card-cap-bg: rgba(52, 58, 64, 0.25);--quarto-scss-export-border-color: #dee2e6;--quarto-scss-export-text-muted: #6c757d;--quarto-scss-export-old-quarto-body-bg: #FEFBF2;--quarto-scss-export-body-bg: #FEFBF2;--quarto-scss-export-white: #fff;--quarto-scss-export-gray-100: #f8f9fa;--quarto-scss-export-gray-200: #e9ecef;--quarto-scss-export-gray-400: #ced4da;--quarto-scss-export-gray-700: #495057;--quarto-scss-export-gray-900: #212529;--quarto-scss-export-black: #000;--quarto-scss-export-blue: #2780e3;--quarto-scss-export-indigo: #6610f2;--quarto-scss-export-purple: #613d7c;--quarto-scss-export-pink: #e83e8c;--quarto-scss-export-red: #ff0039;--quarto-scss-export-orange: #f0ad4e;--quarto-scss-export-yellow: #ff7518;--quarto-scss-export-green: #3fb618;--quarto-scss-export-teal: #20c997;--quarto-scss-export-cyan: #9954bb;--quarto-scss-export-primary: #2780e3;--quarto-scss-export-secondary: #343a40;--quarto-scss-export-success: #3fb618;--quarto-scss-export-info: #9954bb;--quarto-scss-export-warning: #ff7518;--quarto-scss-export-danger: #ff0039;--quarto-scss-export-light: #f8f9fa;--quarto-scss-export-dark: #343a40;--quarto-scss-export-body-color: #343a40;--quarto-scss-export-title-banner-color: ;--quarto-scss-export-title-banner-bg: ;--quarto-scss-export-btn-code-copy-color: #8f5902;--quarto-scss-export-btn-code-copy-color-active: #204a87;--quarto-scss-export-link-color: #2761e3;--quarto-scss-export-link-color-bg: transparent;--quarto-scss-export-code-color: #7d12ba;--quarto-scss-export-code-bg: #f8f9fa;--quarto-scss-export-toc-color: #2761e3;--quarto-scss-export-toc-active-border: #2761e3;--quarto-scss-export-toc-inactive-border: #e9ecef;--quarto-scss-export-navbar-default: #2780e3;--quarto-scss-export-navbar-hl-override: #93b0f1;--quarto-scss-export-btn-bg: #343a40;--quarto-scss-export-btn-fg: #cacccd;--quarto-scss-export-body-contrast-bg: #FEFBF2;--quarto-scss-export-body-contrast-color: #343a40;--quarto-scss-export-navbar-hl: #93b0f1;--quarto-scss-export-navbar-brand-hl: #93b0f1;--quarto-scss-export-navbar-toggler-border-color: rgba(242, 229, 189, 0);--quarto-scss-export-navbar-hover-color: rgba(147, 176, 241, 0.8);--quarto-scss-export-navbar-disabled-color: rgba(242, 229, 189, 0.75);--quarto-scss-export-title-block-color: #343a40;--quarto-scss-export-title-block-contast-color: #FEFBF2;--quarto-scss-export-footer-bg: #FEFBF2;--quarto-scss-export-footer-fg: #75736f;--quarto-scss-export-popover-bg: #FEFBF2;--quarto-scss-export-input-bg: #FEFBF2;--quarto-scss-export-input-border-color: #dee2e6;--quarto-scss-export-code-annotation-higlight-color: rgba(170, 170, 170, 0.2666666667);--quarto-scss-export-code-annotation-higlight-bg: rgba(170, 170, 170, 0.1333333333);--quarto-scss-export-table-group-separator-color: #999b99;--quarto-scss-export-table-group-separator-color-lighter: #d6d4ce;--quarto-scss-export-link-decoration: underline;--quarto-scss-export-table-border-color: #dee2e6;--quarto-scss-export-sidebar-glass-bg: rgba(102, 102, 102, 0.4);--quarto-scss-export-color-contrast-dark: #000;--quarto-scss-export-color-contrast-light: #fff;--quarto-scss-export-blue-100: #d4e6f9;--quarto-scss-export-blue-200: #a9ccf4;--quarto-scss-export-blue-300: #7db3ee;--quarto-scss-export-blue-400: #5299e9;--quarto-scss-export-blue-500: #2780e3;--quarto-scss-export-blue-600: #1f66b6;--quarto-scss-export-blue-700: #174d88;--quarto-scss-export-blue-800: #10335b;--quarto-scss-export-blue-900: #081a2d;--quarto-scss-export-indigo-100: #e0cffc;--quarto-scss-export-indigo-200: #c29ffa;--quarto-scss-export-indigo-300: #a370f7;--quarto-scss-export-indigo-400: #8540f5;--quarto-scss-export-indigo-500: #6610f2;--quarto-scss-export-indigo-600: #520dc2;--quarto-scss-export-indigo-700: #3d0a91;--quarto-scss-export-indigo-800: #290661;--quarto-scss-export-indigo-900: #140330;--quarto-scss-export-purple-100: #dfd8e5;--quarto-scss-export-purple-200: #c0b1cb;--quarto-scss-export-purple-300: #a08bb0;--quarto-scss-export-purple-400: #816496;--quarto-scss-export-purple-500: #613d7c;--quarto-scss-export-purple-600: #4e3163;--quarto-scss-export-purple-700: #3a254a;--quarto-scss-export-purple-800: #271832;--quarto-scss-export-purple-900: #130c19;--quarto-scss-export-pink-100: #fad8e8;--quarto-scss-export-pink-200: #f6b2d1;--quarto-scss-export-pink-300: #f18bba;--quarto-scss-export-pink-400: #ed65a3;--quarto-scss-export-pink-500: #e83e8c;--quarto-scss-export-pink-600: #ba3270;--quarto-scss-export-pink-700: #8b2554;--quarto-scss-export-pink-800: #5d1938;--quarto-scss-export-pink-900: #2e0c1c;--quarto-scss-export-red-100: #ffccd7;--quarto-scss-export-red-200: #ff99b0;--quarto-scss-export-red-300: #ff6688;--quarto-scss-export-red-400: #ff3361;--quarto-scss-export-red-500: #ff0039;--quarto-scss-export-red-600: #cc002e;--quarto-scss-export-red-700: #990022;--quarto-scss-export-red-800: #660017;--quarto-scss-export-red-900: #33000b;--quarto-scss-export-orange-100: #fcefdc;--quarto-scss-export-orange-200: #f9deb8;--quarto-scss-export-orange-300: #f6ce95;--quarto-scss-export-orange-400: #f3bd71;--quarto-scss-export-orange-500: #f0ad4e;--quarto-scss-export-orange-600: #c08a3e;--quarto-scss-export-orange-700: #90682f;--quarto-scss-export-orange-800: #60451f;--quarto-scss-export-orange-900: #302310;--quarto-scss-export-yellow-100: #ffe3d1;--quarto-scss-export-yellow-200: #ffc8a3;--quarto-scss-export-yellow-300: #ffac74;--quarto-scss-export-yellow-400: #ff9146;--quarto-scss-export-yellow-500: #ff7518;--quarto-scss-export-yellow-600: #cc5e13;--quarto-scss-export-yellow-700: #99460e;--quarto-scss-export-yellow-800: #662f0a;--quarto-scss-export-yellow-900: #331705;--quarto-scss-export-green-100: #d9f0d1;--quarto-scss-export-green-200: #b2e2a3;--quarto-scss-export-green-300: #8cd374;--quarto-scss-export-green-400: #65c546;--quarto-scss-export-green-500: #3fb618;--quarto-scss-export-green-600: #329213;--quarto-scss-export-green-700: #266d0e;--quarto-scss-export-green-800: #19490a;--quarto-scss-export-green-900: #0d2405;--quarto-scss-export-teal-100: #d2f4ea;--quarto-scss-export-teal-200: #a6e9d5;--quarto-scss-export-teal-300: #79dfc1;--quarto-scss-export-teal-400: #4dd4ac;--quarto-scss-export-teal-500: #20c997;--quarto-scss-export-teal-600: #1aa179;--quarto-scss-export-teal-700: #13795b;--quarto-scss-export-teal-800: #0d503c;--quarto-scss-export-teal-900: #06281e;--quarto-scss-export-cyan-100: #ebddf1;--quarto-scss-export-cyan-200: #d6bbe4;--quarto-scss-export-cyan-300: #c298d6;--quarto-scss-export-cyan-400: #ad76c9;--quarto-scss-export-cyan-500: #9954bb;--quarto-scss-export-cyan-600: #7a4396;--quarto-scss-export-cyan-700: #5c3270;--quarto-scss-export-cyan-800: #3d224b;--quarto-scss-export-cyan-900: #1f1125;--quarto-scss-export-default: #343a40;--quarto-scss-export-primary-text-emphasis: #10335b;--quarto-scss-export-secondary-text-emphasis: #15171a;--quarto-scss-export-success-text-emphasis: #19490a;--quarto-scss-export-info-text-emphasis: #3d224b;--quarto-scss-export-warning-text-emphasis: #662f0a;--quarto-scss-export-danger-text-emphasis: #660017;--quarto-scss-export-light-text-emphasis: #495057;--quarto-scss-export-dark-text-emphasis: #495057;--quarto-scss-export-primary-bg-subtle: #d4e6f9;--quarto-scss-export-secondary-bg-subtle: #d6d8d9;--quarto-scss-export-success-bg-subtle: #d9f0d1;--quarto-scss-export-info-bg-subtle: #ebddf1;--quarto-scss-export-warning-bg-subtle: #ffe3d1;--quarto-scss-export-danger-bg-subtle: #ffccd7;--quarto-scss-export-light-bg-subtle: #fcfcfd;--quarto-scss-export-dark-bg-subtle: #ced4da;--quarto-scss-export-primary-border-subtle: #a9ccf4;--quarto-scss-export-secondary-border-subtle: #aeb0b3;--quarto-scss-export-success-border-subtle: #b2e2a3;--quarto-scss-export-info-border-subtle: #d6bbe4;--quarto-scss-export-warning-border-subtle: #ffc8a3;--quarto-scss-export-danger-border-subtle: #ff99b0;--quarto-scss-export-light-border-subtle: #e9ecef;--quarto-scss-export-dark-border-subtle: #adb5bd;--quarto-scss-export-body-text-align: ;--quarto-scss-export-body-secondary-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-body-secondary-bg: #e9ecef;--quarto-scss-export-body-tertiary-color: rgba(52, 58, 64, 0.5);--quarto-scss-export-body-tertiary-bg: #f8f9fa;--quarto-scss-export-body-emphasis-color: #000;--quarto-scss-export-link-hover-color: #1f4eb6;--quarto-scss-export-link-hover-decoration: ;--quarto-scss-export-border-color-translucent: rgba(0, 0, 0, 0.175);--quarto-scss-export-component-active-bg: #2780e3;--quarto-scss-export-component-active-color: #fff;--quarto-scss-export-focus-ring-color: rgba(39, 128, 227, 0.25);--quarto-scss-export-headings-font-family: ;--quarto-scss-export-headings-font-style: ;--quarto-scss-export-display-font-family: ;--quarto-scss-export-display-font-style: ;--quarto-scss-export-blockquote-footer-color: #6c757d;--quarto-scss-export-blockquote-border-color: #e9ecef;--quarto-scss-export-hr-bg-color: ;--quarto-scss-export-hr-height: ;--quarto-scss-export-hr-border-color: ;--quarto-scss-export-legend-font-weight: ;--quarto-scss-export-mark-bg: #ffe3d1;--quarto-scss-export-table-color: #343a40;--quarto-scss-export-table-bg: #FEFBF2;--quarto-scss-export-table-accent-bg: transparent;--quarto-scss-export-table-th-font-weight: ;--quarto-scss-export-table-striped-color: #343a40;--quarto-scss-export-table-striped-bg: rgba(0, 0, 0, 0.05);--quarto-scss-export-table-active-color: #343a40;--quarto-scss-export-table-active-bg: rgba(0, 0, 0, 0.1);--quarto-scss-export-table-hover-color: #343a40;--quarto-scss-export-table-hover-bg: rgba(0, 0, 0, 0.075);--quarto-scss-export-table-caption-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-input-btn-font-family: ;--quarto-scss-export-input-btn-focus-color: rgba(39, 128, 227, 0.25);--quarto-scss-export-btn-color: #343a40;--quarto-scss-export-btn-font-family: ;--quarto-scss-export-btn-white-space: ;--quarto-scss-export-btn-link-color: #2761e3;--quarto-scss-export-btn-link-hover-color: #1f4eb6;--quarto-scss-export-btn-link-disabled-color: #6c757d;--quarto-scss-export-form-text-font-style: ;--quarto-scss-export-form-text-font-weight: ;--quarto-scss-export-form-text-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-form-label-font-size: ;--quarto-scss-export-form-label-font-style: ;--quarto-scss-export-form-label-font-weight: ;--quarto-scss-export-form-label-color: ;--quarto-scss-export-input-font-family: ;--quarto-scss-export-input-disabled-color: ;--quarto-scss-export-input-disabled-bg: #e9ecef;--quarto-scss-export-input-disabled-border-color: ;--quarto-scss-export-input-color: #343a40;--quarto-scss-export-input-focus-bg: #FEFBF2;--quarto-scss-export-input-focus-border-color: #93c0f1;--quarto-scss-export-input-focus-color: #343a40;--quarto-scss-export-input-placeholder-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-input-plaintext-color: #343a40;--quarto-scss-export-form-check-label-color: ;--quarto-scss-export-form-check-transition: ;--quarto-scss-export-form-check-input-bg: #FEFBF2;--quarto-scss-export-form-check-input-focus-border: #93c0f1;--quarto-scss-export-form-check-input-checked-color: #fff;--quarto-scss-export-form-check-input-checked-bg-color: #2780e3;--quarto-scss-export-form-check-input-checked-border-color: #2780e3;--quarto-scss-export-form-check-input-indeterminate-color: #fff;--quarto-scss-export-form-check-input-indeterminate-bg-color: #2780e3;--quarto-scss-export-form-check-input-indeterminate-border-color: #2780e3;--quarto-scss-export-form-switch-color: rgba(0, 0, 0, 0.25);--quarto-scss-export-form-switch-focus-color: #93c0f1;--quarto-scss-export-form-switch-checked-color: #fff;--quarto-scss-export-input-group-addon-color: #343a40;--quarto-scss-export-input-group-addon-bg: #f8f9fa;--quarto-scss-export-input-group-addon-border-color: #dee2e6;--quarto-scss-export-form-select-font-family: ;--quarto-scss-export-form-select-color: #343a40;--quarto-scss-export-form-select-bg: #FEFBF2;--quarto-scss-export-form-select-disabled-color: ;--quarto-scss-export-form-select-disabled-bg: #e9ecef;--quarto-scss-export-form-select-disabled-border-color: ;--quarto-scss-export-form-select-indicator-color: #343a40;--quarto-scss-export-form-select-border-color: #dee2e6;--quarto-scss-export-form-select-focus-border-color: #93c0f1;--quarto-scss-export-form-range-track-bg: #f8f9fa;--quarto-scss-export-form-range-thumb-bg: #2780e3;--quarto-scss-export-form-range-thumb-active-bg: #bed9f7;--quarto-scss-export-form-range-thumb-disabled-bg: rgba(52, 58, 64, 0.75);--quarto-scss-export-form-file-button-color: #343a40;--quarto-scss-export-form-file-button-bg: #f8f9fa;--quarto-scss-export-form-file-button-hover-bg: #e9ecef;--quarto-scss-export-form-floating-label-disabled-color: #6c757d;--quarto-scss-export-form-feedback-font-style: ;--quarto-scss-export-form-feedback-valid-color: #3fb618;--quarto-scss-export-form-feedback-invalid-color: #ff0039;--quarto-scss-export-form-feedback-icon-valid-color: #3fb618;--quarto-scss-export-form-feedback-icon-invalid-color: #ff0039;--quarto-scss-export-form-valid-color: #3fb618;--quarto-scss-export-form-valid-border-color: #3fb618;--quarto-scss-export-form-invalid-color: #ff0039;--quarto-scss-export-form-invalid-border-color: #ff0039;--quarto-scss-export-nav-link-font-size: ;--quarto-scss-export-nav-link-font-weight: ;--quarto-scss-export-nav-link-color: #2761e3;--quarto-scss-export-nav-link-hover-color: #1f4eb6;--quarto-scss-export-nav-link-disabled-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-nav-tabs-border-color: #dee2e6;--quarto-scss-export-nav-tabs-link-hover-border-color: #e9ecef #e9ecef #dee2e6;--quarto-scss-export-nav-tabs-link-active-color: #000;--quarto-scss-export-nav-tabs-link-active-bg: #FEFBF2;--quarto-scss-export-nav-pills-link-active-bg: #2780e3;--quarto-scss-export-nav-pills-link-active-color: #fff;--quarto-scss-export-nav-underline-link-active-color: #000;--quarto-scss-export-navbar-padding-x: ;--quarto-scss-export-navbar-light-contrast: #fff;--quarto-scss-export-navbar-dark-contrast: #fff;--quarto-scss-export-navbar-light-icon-color: rgba(255, 255, 255, 0.75);--quarto-scss-export-navbar-dark-icon-color: rgba(255, 255, 255, 0.75);--quarto-scss-export-dropdown-color: #343a40;--quarto-scss-export-dropdown-bg: #FEFBF2;--quarto-scss-export-dropdown-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-divider-bg: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-link-color: #343a40;--quarto-scss-export-dropdown-link-hover-color: #343a40;--quarto-scss-export-dropdown-link-hover-bg: #f8f9fa;--quarto-scss-export-dropdown-link-active-bg: #2780e3;--quarto-scss-export-dropdown-link-active-color: #fff;--quarto-scss-export-dropdown-link-disabled-color: rgba(52, 58, 64, 0.5);--quarto-scss-export-dropdown-header-color: #6c757d;--quarto-scss-export-dropdown-dark-color: #dee2e6;--quarto-scss-export-dropdown-dark-bg: #343a40;--quarto-scss-export-dropdown-dark-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-dark-divider-bg: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-dark-box-shadow: ;--quarto-scss-export-dropdown-dark-link-color: #dee2e6;--quarto-scss-export-dropdown-dark-link-hover-color: #fff;--quarto-scss-export-dropdown-dark-link-hover-bg: rgba(255, 255, 255, 0.15);--quarto-scss-export-dropdown-dark-link-active-color: #fff;--quarto-scss-export-dropdown-dark-link-active-bg: #2780e3;--quarto-scss-export-dropdown-dark-link-disabled-color: #adb5bd;--quarto-scss-export-dropdown-dark-header-color: #adb5bd;--quarto-scss-export-pagination-color: #2761e3;--quarto-scss-export-pagination-bg: #FEFBF2;--quarto-scss-export-pagination-border-color: #dee2e6;--quarto-scss-export-pagination-focus-color: #1f4eb6;--quarto-scss-export-pagination-focus-bg: #e9ecef;--quarto-scss-export-pagination-hover-color: #1f4eb6;--quarto-scss-export-pagination-hover-bg: #f8f9fa;--quarto-scss-export-pagination-hover-border-color: #dee2e6;--quarto-scss-export-pagination-active-color: #fff;--quarto-scss-export-pagination-active-bg: #2780e3;--quarto-scss-export-pagination-active-border-color: #2780e3;--quarto-scss-export-pagination-disabled-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-pagination-disabled-bg: #e9ecef;--quarto-scss-export-pagination-disabled-border-color: #dee2e6;--quarto-scss-export-card-title-color: ;--quarto-scss-export-card-subtitle-color: ;--quarto-scss-export-card-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-card-box-shadow: ;--quarto-scss-export-card-cap-color: ;--quarto-scss-export-card-height: ;--quarto-scss-export-card-color: ;--quarto-scss-export-card-bg: #FEFBF2;--quarto-scss-export-accordion-color: #343a40;--quarto-scss-export-accordion-bg: #FEFBF2;--quarto-scss-export-accordion-border-color: #dee2e6;--quarto-scss-export-accordion-button-color: #343a40;--quarto-scss-export-accordion-button-bg: #FEFBF2;--quarto-scss-export-accordion-button-active-bg: #d4e6f9;--quarto-scss-export-accordion-button-active-color: #10335b;--quarto-scss-export-accordion-button-focus-border-color: #93c0f1;--quarto-scss-export-accordion-icon-color: #343a40;--quarto-scss-export-accordion-icon-active-color: #10335b;--quarto-scss-export-tooltip-color: #FEFBF2;--quarto-scss-export-tooltip-bg: #000;--quarto-scss-export-tooltip-margin: ;--quarto-scss-export-tooltip-arrow-color: ;--quarto-scss-export-form-feedback-tooltip-line-height: ;--quarto-scss-export-popover-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-popover-header-bg: #e9ecef;--quarto-scss-export-popover-body-color: #343a40;--quarto-scss-export-popover-arrow-color: #FEFBF2;--quarto-scss-export-popover-arrow-outer-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-toast-color: ;--quarto-scss-export-toast-background-color: rgba(254, 251, 242, 0.85);--quarto-scss-export-toast-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-toast-header-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-toast-header-background-color: rgba(254, 251, 242, 0.85);--quarto-scss-export-toast-header-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-badge-color: #fff;--quarto-scss-export-modal-content-color: ;--quarto-scss-export-modal-content-bg: #FEFBF2;--quarto-scss-export-modal-content-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-modal-backdrop-bg: #000;--quarto-scss-export-modal-header-border-color: #dee2e6;--quarto-scss-export-modal-footer-bg: ;--quarto-scss-export-modal-footer-border-color: #dee2e6;--quarto-scss-export-progress-bg: #e9ecef;--quarto-scss-export-progress-bar-color: #fff;--quarto-scss-export-progress-bar-bg: #2780e3;--quarto-scss-export-list-group-color: #343a40;--quarto-scss-export-list-group-bg: #FEFBF2;--quarto-scss-export-list-group-border-color: #dee2e6;--quarto-scss-export-list-group-hover-bg: #f8f9fa;--quarto-scss-export-list-group-active-bg: #2780e3;--quarto-scss-export-list-group-active-color: #fff;--quarto-scss-export-list-group-active-border-color: #2780e3;--quarto-scss-export-list-group-disabled-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-list-group-disabled-bg: #FEFBF2;--quarto-scss-export-list-group-action-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-list-group-action-hover-color: #000;--quarto-scss-export-list-group-action-active-color: #343a40;--quarto-scss-export-list-group-action-active-bg: #e9ecef;--quarto-scss-export-thumbnail-bg: #FEFBF2;--quarto-scss-export-thumbnail-border-color: #dee2e6;--quarto-scss-export-figure-caption-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-breadcrumb-font-size: ;--quarto-scss-export-breadcrumb-bg: ;--quarto-scss-export-breadcrumb-divider-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-breadcrumb-active-color: rgba(52, 58, 64, 0.75);--quarto-scss-export-breadcrumb-border-radius: ;--quarto-scss-export-carousel-control-color: #fff;--quarto-scss-export-carousel-indicator-active-bg: #fff;--quarto-scss-export-carousel-caption-color: #fff;--quarto-scss-export-carousel-dark-indicator-active-bg: #000;--quarto-scss-export-carousel-dark-caption-color: #000;--quarto-scss-export-btn-close-color: #000;--quarto-scss-export-offcanvas-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-offcanvas-bg-color: #FEFBF2;--quarto-scss-export-offcanvas-color: #343a40;--quarto-scss-export-offcanvas-backdrop-bg: #000;--quarto-scss-export-code-color-dark: white;--quarto-scss-export-kbd-color: #FEFBF2;--quarto-scss-export-kbd-bg: #343a40;--quarto-scss-export-nested-kbd-font-weight: ;--quarto-scss-export-pre-bg: #f8f9fa;--quarto-scss-export-pre-color: #000;--quarto-scss-export-bslib-page-sidebar-title-bg: #052744;--quarto-scss-export-bslib-page-sidebar-title-color: #fff;--quarto-scss-export-bslib-sidebar-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.05);--quarto-scss-export-bslib-sidebar-toggle-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.1);--quarto-scss-export-sidebar-color: #052744;--quarto-scss-export-sidebar-hover-color: rgba(32, 80, 186, 0.8);--quarto-scss-export-sidebar-disabled-color: rgba(5, 39, 68, 0.75);--quarto-scss-export-valuebox-bg-primary: #5397e9;--quarto-scss-export-valuebox-bg-secondary: #343a40;--quarto-scss-export-valuebox-bg-success: #3aa716;--quarto-scss-export-valuebox-bg-info: rgba(153, 84, 187, 0.7019607843);--quarto-scss-export-valuebox-bg-warning: #fa6400;--quarto-scss-export-valuebox-bg-danger: rgba(255, 0, 57, 0.7019607843);--quarto-scss-export-valuebox-bg-light: #f8f9fa;--quarto-scss-export-valuebox-bg-dark: #343a40;--quarto-scss-export-mermaid-bg-color: #FEFBF2;--quarto-scss-export-mermaid-edge-color: #343a40;--quarto-scss-export-mermaid-node-fg-color: #343a40;--quarto-scss-export-mermaid-fg-color: #343a40;--quarto-scss-export-mermaid-fg-color--lighter: #4b545c;--quarto-scss-export-mermaid-fg-color--lightest: #626d78;--quarto-scss-export-mermaid-label-bg-color: #FEFBF2;--quarto-scss-export-mermaid-label-fg-color: #2780e3;--quarto-scss-export-mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--quarto-scss-export-code-block-border-left-color: #dee2e6;--quarto-scss-export-callout-color-note: #2780e3;--quarto-scss-export-callout-color-tip: #3fb618;--quarto-scss-export-callout-color-important: #ff0039;--quarto-scss-export-callout-color-caution: #f0ad4e;--quarto-scss-export-callout-color-warning: #ff7518} \ No newline at end of file diff --git a/docs/2_39/site_libs/bootstrap/bootstrap-dark-2df8a8547a8386440a781d5ef2c5f5b1.min.css b/docs/2_39/site_libs/bootstrap/bootstrap-dark-2df8a8547a8386440a781d5ef2c5f5b1.min.css new file mode 100644 index 000000000..05acf2182 --- /dev/null +++ b/docs/2_39/site_libs/bootstrap/bootstrap-dark-2df8a8547a8386440a781d5ef2c5f5b1.min.css @@ -0,0 +1,12 @@ +@import"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;700&display=swap";:root{--stan-bg: #242424;--stan-highlight: #053A42;--stan-secondary: #AED1E4;--stan-dark: #3E8EBC;--stan-hero: #cee9f7;--stan-hero-bg: #052744}/*! + * Bootstrap v5.3.1 (https://getbootstrap.com/) + * Copyright 2011-2023 The Bootstrap Authors + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */:root,[data-bs-theme=light]{--bs-blue: #2780e3;--bs-indigo: #6610f2;--bs-purple: #613d7c;--bs-pink: #e83e8c;--bs-red: #ff0039;--bs-orange: #f0ad4e;--bs-yellow: #ff7518;--bs-green: #3fb618;--bs-teal: #20c997;--bs-cyan: #9954bb;--bs-black: #000;--bs-white: #fff;--bs-gray: #6c757d;--bs-gray-dark: #343a40;--bs-gray-100: #f8f9fa;--bs-gray-200: #e9ecef;--bs-gray-300: #dee2e6;--bs-gray-400: #ced4da;--bs-gray-500: #adb5bd;--bs-gray-600: #6c757d;--bs-gray-700: #495057;--bs-gray-800: #343a40;--bs-gray-900: #212529;--bs-default: #343a40;--bs-primary: #2780e3;--bs-secondary: #343a40;--bs-success: #3fb618;--bs-info: #9954bb;--bs-warning: #ff7518;--bs-danger: #ff0039;--bs-light: #525252;--bs-dark: #343a40;--bs-default-rgb: 52, 58, 64;--bs-primary-rgb: 39, 128, 227;--bs-secondary-rgb: 52, 58, 64;--bs-success-rgb: 63, 182, 24;--bs-info-rgb: 153, 84, 187;--bs-warning-rgb: 255, 117, 24;--bs-danger-rgb: 255, 0, 57;--bs-light-rgb: 82, 82, 82;--bs-dark-rgb: 52, 58, 64;--bs-primary-text-emphasis: #10335b;--bs-secondary-text-emphasis: #15171a;--bs-success-text-emphasis: #19490a;--bs-info-text-emphasis: #3d224b;--bs-warning-text-emphasis: #662f0a;--bs-danger-text-emphasis: #660017;--bs-light-text-emphasis: #495057;--bs-dark-text-emphasis: #495057;--bs-primary-bg-subtle: #d4e6f9;--bs-secondary-bg-subtle: #d6d8d9;--bs-success-bg-subtle: #d9f0d1;--bs-info-bg-subtle: #ebddf1;--bs-warning-bg-subtle: #ffe3d1;--bs-danger-bg-subtle: #ffccd7;--bs-light-bg-subtle: #fcfcfd;--bs-dark-bg-subtle: #ced4da;--bs-primary-border-subtle: #a9ccf4;--bs-secondary-border-subtle: #aeb0b3;--bs-success-border-subtle: #b2e2a3;--bs-info-border-subtle: #d6bbe4;--bs-warning-border-subtle: #ffc8a3;--bs-danger-border-subtle: #ff99b0;--bs-light-border-subtle: #e9ecef;--bs-dark-border-subtle: #adb5bd;--bs-white-rgb: 255, 255, 255;--bs-black-rgb: 0, 0, 0;--bs-font-sans-serif: "Source Sans Pro", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";--bs-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;--bs-gradient: linear-gradient(180deg, rgba(255, 255, 255, 0.15), rgba(255, 255, 255, 0));--bs-root-font-size: 17px;--bs-body-font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";--bs-body-font-size:1rem;--bs-body-font-weight: 400;--bs-body-line-height: 1.5;--bs-body-color: #FAF1E4;--bs-body-color-rgb: 250, 241, 228;--bs-body-bg: #181818;--bs-body-bg-rgb: 24, 24, 24;--bs-emphasis-color: #000;--bs-emphasis-color-rgb: 0, 0, 0;--bs-secondary-color: rgba(250, 241, 228, 0.75);--bs-secondary-color-rgb: 250, 241, 228;--bs-secondary-bg: #e9ecef;--bs-secondary-bg-rgb: 233, 236, 239;--bs-tertiary-color: rgba(250, 241, 228, 0.5);--bs-tertiary-color-rgb: 250, 241, 228;--bs-tertiary-bg: #f8f9fa;--bs-tertiary-bg-rgb: 248, 249, 250;--bs-heading-color: inherit;--bs-link-color: #AED1E4;--bs-link-color-rgb: 174, 209, 228;--bs-link-decoration: underline;--bs-link-hover-color: #8ba7b6;--bs-link-hover-color-rgb: 139, 167, 182;--bs-code-color: #7d12ba;--bs-highlight-bg: #ffe3d1;--bs-border-width: 1px;--bs-border-style: solid;--bs-border-color: #dee2e6;--bs-border-color-translucent: rgba(0, 0, 0, 0.175);--bs-border-radius: 0.25rem;--bs-border-radius-sm: 0.2em;--bs-border-radius-lg: 0.5rem;--bs-border-radius-xl: 1rem;--bs-border-radius-xxl: 2rem;--bs-border-radius-2xl: var(--bs-border-radius-xxl);--bs-border-radius-pill: 50rem;--bs-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-box-shadow-sm: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);--bs-box-shadow-lg: 0 1rem 3rem rgba(0, 0, 0, 0.175);--bs-box-shadow-inset: inset 0 1px 2px rgba(0, 0, 0, 0.075);--bs-focus-ring-width: 0.25rem;--bs-focus-ring-opacity: 0.25;--bs-focus-ring-color: rgba(39, 128, 227, 0.25);--bs-form-valid-color: #3fb618;--bs-form-valid-border-color: #3fb618;--bs-form-invalid-color: #ff0039;--bs-form-invalid-border-color: #ff0039}[data-bs-theme=dark]{color-scheme:dark;--bs-body-color: #dee2e6;--bs-body-color-rgb: 222, 226, 230;--bs-body-bg: #212529;--bs-body-bg-rgb: 33, 37, 41;--bs-emphasis-color: #fff;--bs-emphasis-color-rgb: 255, 255, 255;--bs-secondary-color: rgba(222, 226, 230, 0.75);--bs-secondary-color-rgb: 222, 226, 230;--bs-secondary-bg: #343a40;--bs-secondary-bg-rgb: 52, 58, 64;--bs-tertiary-color: rgba(222, 226, 230, 0.5);--bs-tertiary-color-rgb: 222, 226, 230;--bs-tertiary-bg: #2b3035;--bs-tertiary-bg-rgb: 43, 48, 53;--bs-primary-text-emphasis: #7db3ee;--bs-secondary-text-emphasis: #85898c;--bs-success-text-emphasis: #8cd374;--bs-info-text-emphasis: #c298d6;--bs-warning-text-emphasis: #ffac74;--bs-danger-text-emphasis: #ff6688;--bs-light-text-emphasis: #f8f9fa;--bs-dark-text-emphasis: #dee2e6;--bs-primary-bg-subtle: #081a2d;--bs-secondary-bg-subtle: #0a0c0d;--bs-success-bg-subtle: #0d2405;--bs-info-bg-subtle: #1f1125;--bs-warning-bg-subtle: #331705;--bs-danger-bg-subtle: #33000b;--bs-light-bg-subtle: #343a40;--bs-dark-bg-subtle: #1a1d20;--bs-primary-border-subtle: #174d88;--bs-secondary-border-subtle: #1f2326;--bs-success-border-subtle: #266d0e;--bs-info-border-subtle: #5c3270;--bs-warning-border-subtle: #99460e;--bs-danger-border-subtle: #990022;--bs-light-border-subtle: #495057;--bs-dark-border-subtle: #343a40;--bs-heading-color: inherit;--bs-link-color: #7db3ee;--bs-link-hover-color: #97c2f1;--bs-link-color-rgb: 125, 179, 238;--bs-link-hover-color-rgb: 151, 194, 241;--bs-code-color: white;--bs-border-color: #495057;--bs-border-color-translucent: rgba(255, 255, 255, 0.15);--bs-form-valid-color: #8cd374;--bs-form-valid-border-color: #8cd374;--bs-form-invalid-color: #ff6688;--bs-form-invalid-border-color: #ff6688}*,*::before,*::after{box-sizing:border-box}:root{font-size:var(--bs-root-font-size)}body{margin:0;font-family:var(--bs-body-font-family);font-size:var(--bs-body-font-size);font-weight:var(--bs-body-font-weight);line-height:var(--bs-body-line-height);color:var(--bs-body-color);text-align:var(--bs-body-text-align);background-color:var(--bs-body-bg);-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:rgba(0,0,0,0)}hr{margin:1rem 0;color:inherit;border:0;border-top:1px solid;opacity:.25}h6,.h6,h5,.h5,h4,.h4,h3,.h3,h2,.h2,h1,.h1{margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2;color:var(--bs-heading-color)}h1,.h1{font-size:calc(1.325rem + 0.9vw)}@media(min-width: 1200px){h1,.h1{font-size:2rem}}h2,.h2{font-size:calc(1.29rem + 0.48vw)}@media(min-width: 1200px){h2,.h2{font-size:1.65rem}}h3,.h3{font-size:calc(1.27rem + 0.24vw)}@media(min-width: 1200px){h3,.h3{font-size:1.45rem}}h4,.h4{font-size:1.25rem}h5,.h5{font-size:1.1rem}h6,.h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}abbr[title]{text-decoration:underline dotted;-webkit-text-decoration:underline dotted;-moz-text-decoration:underline dotted;-ms-text-decoration:underline dotted;-o-text-decoration:underline dotted;cursor:help;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul{padding-left:2rem}ol,ul,dl{margin-top:0;margin-bottom:1rem}ol ol,ul ul,ol ul,ul ol{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem;padding:.625rem 1.25rem;border-left:.25rem solid #e9ecef}blockquote p:last-child,blockquote ul:last-child,blockquote ol:last-child{margin-bottom:0}b,strong{font-weight:bolder}small,.small{font-size:0.875em}mark,.mark{padding:.1875em;background-color:var(--bs-highlight-bg)}sub,sup{position:relative;font-size:0.75em;line-height:0;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}a{color:rgba(var(--bs-link-color-rgb), var(--bs-link-opacity, 1));text-decoration:underline;-webkit-text-decoration:underline;-moz-text-decoration:underline;-ms-text-decoration:underline;-o-text-decoration:underline}a:hover{--bs-link-color-rgb: var(--bs-link-hover-color-rgb)}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}pre,code,kbd,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:0.875em;color:#000;background-color:#f8f9fa;line-height:1.5;padding:.5rem;border:1px solid var(--bs-border-color, #dee2e6)}pre code{background-color:rgba(0,0,0,0);font-size:inherit;color:inherit;word-break:normal}code{font-size:0.875em;color:var(--bs-code-color);background-color:#f8f9fa;padding:.125rem .25rem;word-wrap:break-word}a>code{color:inherit}kbd{padding:.4rem .4rem;font-size:0.875em;color:#181818;background-color:#faf1e4}kbd kbd{padding:0;font-size:1em}figure{margin:0 0 1rem}img,svg{vertical-align:middle}table{caption-side:bottom;border-collapse:collapse}caption{padding-top:.5rem;padding-bottom:.5rem;color:rgba(250,241,228,.75);text-align:left}th{text-align:inherit;text-align:-webkit-match-parent}thead,tbody,tfoot,tr,td,th{border-color:inherit;border-style:solid;border-width:0}label{display:inline-block}button{border-radius:0}button:focus:not(:focus-visible){outline:0}input,button,select,optgroup,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}select:disabled{opacity:1}[list]:not([type=date]):not([type=datetime-local]):not([type=month]):not([type=week]):not([type=time])::-webkit-calendar-picker-indicator{display:none !important}button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button}button:not(:disabled),[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{float:left;width:100%;padding:0;margin-bottom:.5rem;font-size:calc(1.275rem + 0.3vw);line-height:inherit}@media(min-width: 1200px){legend{font-size:1.5rem}}legend+*{clear:left}::-webkit-datetime-edit-fields-wrapper,::-webkit-datetime-edit-text,::-webkit-datetime-edit-minute,::-webkit-datetime-edit-hour-field,::-webkit-datetime-edit-day-field,::-webkit-datetime-edit-month-field,::-webkit-datetime-edit-year-field{padding:0}::-webkit-inner-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::file-selector-button{font:inherit;-webkit-appearance:button}output{display:inline-block}iframe{border:0}summary{display:list-item;cursor:pointer}progress{vertical-align:baseline}[hidden]{display:none !important}.lead{font-size:1.25rem;font-weight:300}.display-1{font-size:calc(1.625rem + 4.5vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-1{font-size:5rem}}.display-2{font-size:calc(1.575rem + 3.9vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-2{font-size:4.5rem}}.display-3{font-size:calc(1.525rem + 3.3vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-3{font-size:4rem}}.display-4{font-size:calc(1.475rem + 2.7vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-4{font-size:3.5rem}}.display-5{font-size:calc(1.425rem + 2.1vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-5{font-size:3rem}}.display-6{font-size:calc(1.375rem + 1.5vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-6{font-size:2.5rem}}.list-unstyled{padding-left:0;list-style:none}.list-inline{padding-left:0;list-style:none}.list-inline-item{display:inline-block}.list-inline-item:not(:last-child){margin-right:.5rem}.initialism{font-size:0.875em;text-transform:uppercase}.blockquote{margin-bottom:1rem;font-size:1.25rem}.blockquote>:last-child{margin-bottom:0}.blockquote-footer{margin-top:-1rem;margin-bottom:1rem;font-size:0.875em;color:#6c757d}.blockquote-footer::before{content:"— "}.img-fluid{max-width:100%;height:auto}.img-thumbnail{padding:.25rem;background-color:#181818;border:1px solid #dee2e6;max-width:100%;height:auto}.figure{display:inline-block}.figure-img{margin-bottom:.5rem;line-height:1}.figure-caption{font-size:0.875em;color:rgba(250,241,228,.75)}.container,.container-fluid,.container-xxl,.container-xl,.container-lg,.container-md,.container-sm{--bs-gutter-x: 1.5rem;--bs-gutter-y: 0;width:100%;padding-right:calc(var(--bs-gutter-x)*.5);padding-left:calc(var(--bs-gutter-x)*.5);margin-right:auto;margin-left:auto}@media(min-width: 576px){.container-sm,.container{max-width:540px}}@media(min-width: 768px){.container-md,.container-sm,.container{max-width:720px}}@media(min-width: 992px){.container-lg,.container-md,.container-sm,.container{max-width:960px}}@media(min-width: 1200px){.container-xl,.container-lg,.container-md,.container-sm,.container{max-width:1140px}}@media(min-width: 1400px){.container-xxl,.container-xl,.container-lg,.container-md,.container-sm,.container{max-width:1320px}}:root{--bs-breakpoint-xs: 0;--bs-breakpoint-sm: 576px;--bs-breakpoint-md: 768px;--bs-breakpoint-lg: 992px;--bs-breakpoint-xl: 1200px;--bs-breakpoint-xxl: 1400px}.grid{display:grid;grid-template-rows:repeat(var(--bs-rows, 1), 1fr);grid-template-columns:repeat(var(--bs-columns, 12), 1fr);gap:var(--bs-gap, 1.5rem)}.grid .g-col-1{grid-column:auto/span 1}.grid .g-col-2{grid-column:auto/span 2}.grid .g-col-3{grid-column:auto/span 3}.grid .g-col-4{grid-column:auto/span 4}.grid .g-col-5{grid-column:auto/span 5}.grid .g-col-6{grid-column:auto/span 6}.grid .g-col-7{grid-column:auto/span 7}.grid .g-col-8{grid-column:auto/span 8}.grid .g-col-9{grid-column:auto/span 9}.grid .g-col-10{grid-column:auto/span 10}.grid .g-col-11{grid-column:auto/span 11}.grid .g-col-12{grid-column:auto/span 12}.grid .g-start-1{grid-column-start:1}.grid .g-start-2{grid-column-start:2}.grid .g-start-3{grid-column-start:3}.grid .g-start-4{grid-column-start:4}.grid .g-start-5{grid-column-start:5}.grid .g-start-6{grid-column-start:6}.grid .g-start-7{grid-column-start:7}.grid .g-start-8{grid-column-start:8}.grid .g-start-9{grid-column-start:9}.grid .g-start-10{grid-column-start:10}.grid .g-start-11{grid-column-start:11}@media(min-width: 576px){.grid .g-col-sm-1{grid-column:auto/span 1}.grid .g-col-sm-2{grid-column:auto/span 2}.grid .g-col-sm-3{grid-column:auto/span 3}.grid .g-col-sm-4{grid-column:auto/span 4}.grid .g-col-sm-5{grid-column:auto/span 5}.grid .g-col-sm-6{grid-column:auto/span 6}.grid .g-col-sm-7{grid-column:auto/span 7}.grid .g-col-sm-8{grid-column:auto/span 8}.grid .g-col-sm-9{grid-column:auto/span 9}.grid .g-col-sm-10{grid-column:auto/span 10}.grid .g-col-sm-11{grid-column:auto/span 11}.grid .g-col-sm-12{grid-column:auto/span 12}.grid .g-start-sm-1{grid-column-start:1}.grid .g-start-sm-2{grid-column-start:2}.grid .g-start-sm-3{grid-column-start:3}.grid .g-start-sm-4{grid-column-start:4}.grid .g-start-sm-5{grid-column-start:5}.grid .g-start-sm-6{grid-column-start:6}.grid .g-start-sm-7{grid-column-start:7}.grid .g-start-sm-8{grid-column-start:8}.grid .g-start-sm-9{grid-column-start:9}.grid .g-start-sm-10{grid-column-start:10}.grid .g-start-sm-11{grid-column-start:11}}@media(min-width: 768px){.grid .g-col-md-1{grid-column:auto/span 1}.grid .g-col-md-2{grid-column:auto/span 2}.grid .g-col-md-3{grid-column:auto/span 3}.grid .g-col-md-4{grid-column:auto/span 4}.grid .g-col-md-5{grid-column:auto/span 5}.grid .g-col-md-6{grid-column:auto/span 6}.grid .g-col-md-7{grid-column:auto/span 7}.grid .g-col-md-8{grid-column:auto/span 8}.grid .g-col-md-9{grid-column:auto/span 9}.grid .g-col-md-10{grid-column:auto/span 10}.grid .g-col-md-11{grid-column:auto/span 11}.grid .g-col-md-12{grid-column:auto/span 12}.grid .g-start-md-1{grid-column-start:1}.grid .g-start-md-2{grid-column-start:2}.grid .g-start-md-3{grid-column-start:3}.grid .g-start-md-4{grid-column-start:4}.grid .g-start-md-5{grid-column-start:5}.grid .g-start-md-6{grid-column-start:6}.grid .g-start-md-7{grid-column-start:7}.grid .g-start-md-8{grid-column-start:8}.grid .g-start-md-9{grid-column-start:9}.grid .g-start-md-10{grid-column-start:10}.grid .g-start-md-11{grid-column-start:11}}@media(min-width: 992px){.grid .g-col-lg-1{grid-column:auto/span 1}.grid .g-col-lg-2{grid-column:auto/span 2}.grid .g-col-lg-3{grid-column:auto/span 3}.grid .g-col-lg-4{grid-column:auto/span 4}.grid .g-col-lg-5{grid-column:auto/span 5}.grid .g-col-lg-6{grid-column:auto/span 6}.grid .g-col-lg-7{grid-column:auto/span 7}.grid .g-col-lg-8{grid-column:auto/span 8}.grid .g-col-lg-9{grid-column:auto/span 9}.grid .g-col-lg-10{grid-column:auto/span 10}.grid .g-col-lg-11{grid-column:auto/span 11}.grid .g-col-lg-12{grid-column:auto/span 12}.grid .g-start-lg-1{grid-column-start:1}.grid .g-start-lg-2{grid-column-start:2}.grid .g-start-lg-3{grid-column-start:3}.grid .g-start-lg-4{grid-column-start:4}.grid .g-start-lg-5{grid-column-start:5}.grid .g-start-lg-6{grid-column-start:6}.grid .g-start-lg-7{grid-column-start:7}.grid .g-start-lg-8{grid-column-start:8}.grid .g-start-lg-9{grid-column-start:9}.grid .g-start-lg-10{grid-column-start:10}.grid .g-start-lg-11{grid-column-start:11}}@media(min-width: 1200px){.grid .g-col-xl-1{grid-column:auto/span 1}.grid .g-col-xl-2{grid-column:auto/span 2}.grid .g-col-xl-3{grid-column:auto/span 3}.grid .g-col-xl-4{grid-column:auto/span 4}.grid .g-col-xl-5{grid-column:auto/span 5}.grid .g-col-xl-6{grid-column:auto/span 6}.grid .g-col-xl-7{grid-column:auto/span 7}.grid .g-col-xl-8{grid-column:auto/span 8}.grid .g-col-xl-9{grid-column:auto/span 9}.grid .g-col-xl-10{grid-column:auto/span 10}.grid .g-col-xl-11{grid-column:auto/span 11}.grid .g-col-xl-12{grid-column:auto/span 12}.grid .g-start-xl-1{grid-column-start:1}.grid .g-start-xl-2{grid-column-start:2}.grid .g-start-xl-3{grid-column-start:3}.grid .g-start-xl-4{grid-column-start:4}.grid .g-start-xl-5{grid-column-start:5}.grid .g-start-xl-6{grid-column-start:6}.grid .g-start-xl-7{grid-column-start:7}.grid .g-start-xl-8{grid-column-start:8}.grid .g-start-xl-9{grid-column-start:9}.grid .g-start-xl-10{grid-column-start:10}.grid .g-start-xl-11{grid-column-start:11}}@media(min-width: 1400px){.grid .g-col-xxl-1{grid-column:auto/span 1}.grid .g-col-xxl-2{grid-column:auto/span 2}.grid .g-col-xxl-3{grid-column:auto/span 3}.grid .g-col-xxl-4{grid-column:auto/span 4}.grid .g-col-xxl-5{grid-column:auto/span 5}.grid .g-col-xxl-6{grid-column:auto/span 6}.grid .g-col-xxl-7{grid-column:auto/span 7}.grid .g-col-xxl-8{grid-column:auto/span 8}.grid .g-col-xxl-9{grid-column:auto/span 9}.grid .g-col-xxl-10{grid-column:auto/span 10}.grid .g-col-xxl-11{grid-column:auto/span 11}.grid .g-col-xxl-12{grid-column:auto/span 12}.grid .g-start-xxl-1{grid-column-start:1}.grid .g-start-xxl-2{grid-column-start:2}.grid .g-start-xxl-3{grid-column-start:3}.grid .g-start-xxl-4{grid-column-start:4}.grid .g-start-xxl-5{grid-column-start:5}.grid .g-start-xxl-6{grid-column-start:6}.grid .g-start-xxl-7{grid-column-start:7}.grid .g-start-xxl-8{grid-column-start:8}.grid .g-start-xxl-9{grid-column-start:9}.grid .g-start-xxl-10{grid-column-start:10}.grid .g-start-xxl-11{grid-column-start:11}}.table{--bs-table-color-type: initial;--bs-table-bg-type: initial;--bs-table-color-state: initial;--bs-table-bg-state: initial;--bs-table-color: #FAF1E4;--bs-table-bg: #181818;--bs-table-border-color: #dee2e6;--bs-table-accent-bg: transparent;--bs-table-striped-color: #FAF1E4;--bs-table-striped-bg: rgba(0, 0, 0, 0.05);--bs-table-active-color: #FAF1E4;--bs-table-active-bg: rgba(0, 0, 0, 0.1);--bs-table-hover-color: #FAF1E4;--bs-table-hover-bg: rgba(0, 0, 0, 0.075);width:100%;margin-bottom:1rem;vertical-align:top;border-color:var(--bs-table-border-color)}.table>:not(caption)>*>*{padding:.5rem .5rem;color:var(--bs-table-color-state, var(--bs-table-color-type, var(--bs-table-color)));background-color:var(--bs-table-bg);border-bottom-width:1px;box-shadow:inset 0 0 0 9999px var(--bs-table-bg-state, var(--bs-table-bg-type, var(--bs-table-accent-bg)))}.table>tbody{vertical-align:inherit}.table>thead{vertical-align:bottom}.table-group-divider{border-top:calc(1px*2) solid #89857e}.caption-top{caption-side:top}.table-sm>:not(caption)>*>*{padding:.25rem .25rem}.table-bordered>:not(caption)>*{border-width:1px 0}.table-bordered>:not(caption)>*>*{border-width:0 1px}.table-borderless>:not(caption)>*>*{border-bottom-width:0}.table-borderless>:not(:first-child){border-top-width:0}.table-striped>tbody>tr:nth-of-type(odd)>*{--bs-table-color-type: var(--bs-table-striped-color);--bs-table-bg-type: var(--bs-table-striped-bg)}.table-striped-columns>:not(caption)>tr>:nth-child(even){--bs-table-color-type: var(--bs-table-striped-color);--bs-table-bg-type: var(--bs-table-striped-bg)}.table-active{--bs-table-color-state: var(--bs-table-active-color);--bs-table-bg-state: var(--bs-table-active-bg)}.table-hover>tbody>tr:hover>*{--bs-table-color-state: var(--bs-table-hover-color);--bs-table-bg-state: var(--bs-table-hover-bg)}.table-primary{--bs-table-color: #000;--bs-table-bg: #d4e6f9;--bs-table-border-color: #bfcfe0;--bs-table-striped-bg: #c9dbed;--bs-table-striped-color: #000;--bs-table-active-bg: #bfcfe0;--bs-table-active-color: #000;--bs-table-hover-bg: #c4d5e6;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-secondary{--bs-table-color: #000;--bs-table-bg: #d6d8d9;--bs-table-border-color: #c1c2c3;--bs-table-striped-bg: #cbcdce;--bs-table-striped-color: #000;--bs-table-active-bg: #c1c2c3;--bs-table-active-color: #000;--bs-table-hover-bg: #c6c8c9;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-success{--bs-table-color: #000;--bs-table-bg: #d9f0d1;--bs-table-border-color: #c3d8bc;--bs-table-striped-bg: #cee4c7;--bs-table-striped-color: #000;--bs-table-active-bg: #c3d8bc;--bs-table-active-color: #000;--bs-table-hover-bg: #c9dec1;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-info{--bs-table-color: #000;--bs-table-bg: #ebddf1;--bs-table-border-color: #d4c7d9;--bs-table-striped-bg: #dfd2e5;--bs-table-striped-color: #000;--bs-table-active-bg: #d4c7d9;--bs-table-active-color: #000;--bs-table-hover-bg: #d9ccdf;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-warning{--bs-table-color: #000;--bs-table-bg: #ffe3d1;--bs-table-border-color: #e6ccbc;--bs-table-striped-bg: #f2d8c7;--bs-table-striped-color: #000;--bs-table-active-bg: #e6ccbc;--bs-table-active-color: #000;--bs-table-hover-bg: #ecd2c1;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-danger{--bs-table-color: #000;--bs-table-bg: #ffccd7;--bs-table-border-color: #e6b8c2;--bs-table-striped-bg: #f2c2cc;--bs-table-striped-color: #000;--bs-table-active-bg: #e6b8c2;--bs-table-active-color: #000;--bs-table-hover-bg: #ecbdc7;--bs-table-hover-color: #000;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-light{--bs-table-color: #fff;--bs-table-bg: #525252;--bs-table-border-color: #636363;--bs-table-striped-bg: #5b5b5b;--bs-table-striped-color: #fff;--bs-table-active-bg: #636363;--bs-table-active-color: #fff;--bs-table-hover-bg: #5f5f5f;--bs-table-hover-color: #fff;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-dark{--bs-table-color: #fff;--bs-table-bg: #343a40;--bs-table-border-color: #484e53;--bs-table-striped-bg: #3e444a;--bs-table-striped-color: #fff;--bs-table-active-bg: #484e53;--bs-table-active-color: #fff;--bs-table-hover-bg: #43494e;--bs-table-hover-color: #fff;color:var(--bs-table-color);border-color:var(--bs-table-border-color)}.table-responsive{overflow-x:auto;-webkit-overflow-scrolling:touch}@media(max-width: 575.98px){.table-responsive-sm{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 767.98px){.table-responsive-md{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 991.98px){.table-responsive-lg{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 1199.98px){.table-responsive-xl{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 1399.98px){.table-responsive-xxl{overflow-x:auto;-webkit-overflow-scrolling:touch}}.form-label,.shiny-input-container .control-label{margin-bottom:.5rem}.col-form-label{padding-top:calc(0.375rem + 1px);padding-bottom:calc(0.375rem + 1px);margin-bottom:0;font-size:inherit;line-height:1.5}.col-form-label-lg{padding-top:calc(0.5rem + 1px);padding-bottom:calc(0.5rem + 1px);font-size:1.25rem}.col-form-label-sm{padding-top:calc(0.25rem + 1px);padding-bottom:calc(0.25rem + 1px);font-size:0.875rem}.form-text{margin-top:.25rem;font-size:0.875em;color:rgba(250,241,228,.75)}.form-control{display:block;width:100%;padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#faf1e4;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#242424;background-clip:padding-box;border:1px solid #dee2e6;border-radius:0;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control{transition:none}}.form-control[type=file]{overflow:hidden}.form-control[type=file]:not(:disabled):not([readonly]){cursor:pointer}.form-control:focus{color:#faf1e4;background-color:#242424;border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-control::-webkit-date-and-time-value{min-width:85px;height:1.5em;margin:0}.form-control::-webkit-datetime-edit{display:block;padding:0}.form-control::placeholder{color:rgba(250,241,228,.75);opacity:1}.form-control:disabled{background-color:#e9ecef;opacity:1}.form-control::file-selector-button{padding:.375rem .75rem;margin:-0.375rem -0.75rem;margin-inline-end:.75rem;color:#faf1e4;background-color:#f8f9fa;pointer-events:none;border-color:inherit;border-style:solid;border-width:0;border-inline-end-width:1px;border-radius:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control::file-selector-button{transition:none}}.form-control:hover:not(:disabled):not([readonly])::file-selector-button{background-color:#e9ecef}.form-control-plaintext{display:block;width:100%;padding:.375rem 0;margin-bottom:0;line-height:1.5;color:#faf1e4;background-color:rgba(0,0,0,0);border:solid rgba(0,0,0,0);border-width:1px 0}.form-control-plaintext:focus{outline:0}.form-control-plaintext.form-control-sm,.form-control-plaintext.form-control-lg{padding-right:0;padding-left:0}.form-control-sm{min-height:calc(1.5em + 0.5rem + calc(1px * 2));padding:.25rem .5rem;font-size:0.875rem}.form-control-sm::file-selector-button{padding:.25rem .5rem;margin:-0.25rem -0.5rem;margin-inline-end:.5rem}.form-control-lg{min-height:calc(1.5em + 1rem + calc(1px * 2));padding:.5rem 1rem;font-size:1.25rem}.form-control-lg::file-selector-button{padding:.5rem 1rem;margin:-0.5rem -1rem;margin-inline-end:1rem}textarea.form-control{min-height:calc(1.5em + 0.75rem + calc(1px * 2))}textarea.form-control-sm{min-height:calc(1.5em + 0.5rem + calc(1px * 2))}textarea.form-control-lg{min-height:calc(1.5em + 1rem + calc(1px * 2))}.form-control-color{width:3rem;height:calc(1.5em + 0.75rem + calc(1px * 2));padding:.375rem}.form-control-color:not(:disabled):not([readonly]){cursor:pointer}.form-control-color::-moz-color-swatch{border:0 !important}.form-control-color::-webkit-color-swatch{border:0 !important}.form-control-color.form-control-sm{height:calc(1.5em + 0.5rem + calc(1px * 2))}.form-control-color.form-control-lg{height:calc(1.5em + 1rem + calc(1px * 2))}.form-select{--bs-form-select-bg-img: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23343a40' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='m2 5 6 6 6-6'/%3e%3c/svg%3e");display:block;width:100%;padding:.375rem 2.25rem .375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#faf1e4;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#242424;background-image:var(--bs-form-select-bg-img),var(--bs-form-select-bg-icon, none);background-repeat:no-repeat;background-position:right .75rem center;background-size:16px 12px;border:1px solid #dee2e6;border-radius:0;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-select{transition:none}}.form-select:focus{border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-select[multiple],.form-select[size]:not([size="1"]){padding-right:.75rem;background-image:none}.form-select:disabled{background-color:#e9ecef}.form-select:-moz-focusring{color:rgba(0,0,0,0);text-shadow:0 0 0 #faf1e4}.form-select-sm{padding-top:.25rem;padding-bottom:.25rem;padding-left:.5rem;font-size:0.875rem}.form-select-lg{padding-top:.5rem;padding-bottom:.5rem;padding-left:1rem;font-size:1.25rem}[data-bs-theme=dark] .form-select{--bs-form-select-bg-img: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23dee2e6' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='m2 5 6 6 6-6'/%3e%3c/svg%3e")}.form-check,.shiny-input-container .checkbox,.shiny-input-container .radio{display:block;min-height:1.5rem;padding-left:0;margin-bottom:.125rem}.form-check .form-check-input,.form-check .shiny-input-container .checkbox input,.form-check .shiny-input-container .radio input,.shiny-input-container .checkbox .form-check-input,.shiny-input-container .checkbox .shiny-input-container .checkbox input,.shiny-input-container .checkbox .shiny-input-container .radio input,.shiny-input-container .radio .form-check-input,.shiny-input-container .radio .shiny-input-container .checkbox input,.shiny-input-container .radio .shiny-input-container .radio input{float:left;margin-left:0}.form-check-reverse{padding-right:0;padding-left:0;text-align:right}.form-check-reverse .form-check-input{float:right;margin-right:0;margin-left:0}.form-check-input,.shiny-input-container .checkbox input,.shiny-input-container .checkbox-inline input,.shiny-input-container .radio input,.shiny-input-container .radio-inline input{--bs-form-check-bg: #242424;width:1em;height:1em;margin-top:.25em;vertical-align:top;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:var(--bs-form-check-bg);background-image:var(--bs-form-check-bg-image);background-repeat:no-repeat;background-position:center;background-size:contain;border:1px solid #dee2e6;print-color-adjust:exact}.form-check-input[type=radio],.shiny-input-container .checkbox input[type=radio],.shiny-input-container .checkbox-inline input[type=radio],.shiny-input-container .radio input[type=radio],.shiny-input-container .radio-inline input[type=radio]{border-radius:50%}.form-check-input:active,.shiny-input-container .checkbox input:active,.shiny-input-container .checkbox-inline input:active,.shiny-input-container .radio input:active,.shiny-input-container .radio-inline input:active{filter:brightness(90%)}.form-check-input:focus,.shiny-input-container .checkbox input:focus,.shiny-input-container .checkbox-inline input:focus,.shiny-input-container .radio input:focus,.shiny-input-container .radio-inline input:focus{border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-check-input:checked,.shiny-input-container .checkbox input:checked,.shiny-input-container .checkbox-inline input:checked,.shiny-input-container .radio input:checked,.shiny-input-container .radio-inline input:checked{background-color:#2780e3;border-color:#2780e3}.form-check-input:checked[type=checkbox],.shiny-input-container .checkbox input:checked[type=checkbox],.shiny-input-container .checkbox-inline input:checked[type=checkbox],.shiny-input-container .radio input:checked[type=checkbox],.shiny-input-container .radio-inline input:checked[type=checkbox]{--bs-form-check-bg-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3e%3cpath fill='none' stroke='%23fff' stroke-linecap='round' stroke-linejoin='round' stroke-width='3' d='m6 10 3 3 6-6'/%3e%3c/svg%3e")}.form-check-input:checked[type=radio],.shiny-input-container .checkbox input:checked[type=radio],.shiny-input-container .checkbox-inline input:checked[type=radio],.shiny-input-container .radio input:checked[type=radio],.shiny-input-container .radio-inline input:checked[type=radio]{--bs-form-check-bg-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='2' fill='%23fff'/%3e%3c/svg%3e")}.form-check-input[type=checkbox]:indeterminate,.shiny-input-container .checkbox input[type=checkbox]:indeterminate,.shiny-input-container .checkbox-inline input[type=checkbox]:indeterminate,.shiny-input-container .radio input[type=checkbox]:indeterminate,.shiny-input-container .radio-inline input[type=checkbox]:indeterminate{background-color:#2780e3;border-color:#2780e3;--bs-form-check-bg-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3e%3cpath fill='none' stroke='%23fff' stroke-linecap='round' stroke-linejoin='round' stroke-width='3' d='M6 10h8'/%3e%3c/svg%3e")}.form-check-input:disabled,.shiny-input-container .checkbox input:disabled,.shiny-input-container .checkbox-inline input:disabled,.shiny-input-container .radio input:disabled,.shiny-input-container .radio-inline input:disabled{pointer-events:none;filter:none;opacity:.5}.form-check-input[disabled]~.form-check-label,.form-check-input[disabled]~span,.form-check-input:disabled~.form-check-label,.form-check-input:disabled~span,.shiny-input-container .checkbox input[disabled]~.form-check-label,.shiny-input-container .checkbox input[disabled]~span,.shiny-input-container .checkbox input:disabled~.form-check-label,.shiny-input-container .checkbox input:disabled~span,.shiny-input-container .checkbox-inline input[disabled]~.form-check-label,.shiny-input-container .checkbox-inline input[disabled]~span,.shiny-input-container .checkbox-inline input:disabled~.form-check-label,.shiny-input-container .checkbox-inline input:disabled~span,.shiny-input-container .radio input[disabled]~.form-check-label,.shiny-input-container .radio input[disabled]~span,.shiny-input-container .radio input:disabled~.form-check-label,.shiny-input-container .radio input:disabled~span,.shiny-input-container .radio-inline input[disabled]~.form-check-label,.shiny-input-container .radio-inline input[disabled]~span,.shiny-input-container .radio-inline input:disabled~.form-check-label,.shiny-input-container .radio-inline input:disabled~span{cursor:default;opacity:.5}.form-check-label,.shiny-input-container .checkbox label,.shiny-input-container .checkbox-inline label,.shiny-input-container .radio label,.shiny-input-container .radio-inline label{cursor:pointer}.form-switch{padding-left:2.5em}.form-switch .form-check-input{--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='rgba%280, 0, 0, 0.25%29'/%3e%3c/svg%3e");width:2em;margin-left:-2.5em;background-image:var(--bs-form-switch-bg);background-position:left center;transition:background-position .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-switch .form-check-input{transition:none}}.form-switch .form-check-input:focus{--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='%2393c0f1'/%3e%3c/svg%3e")}.form-switch .form-check-input:checked{background-position:right center;--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='%23fff'/%3e%3c/svg%3e")}.form-switch.form-check-reverse{padding-right:2.5em;padding-left:0}.form-switch.form-check-reverse .form-check-input{margin-right:-2.5em;margin-left:0}.form-check-inline{display:inline-block;margin-right:1rem}.btn-check{position:absolute;clip:rect(0, 0, 0, 0);pointer-events:none}.btn-check[disabled]+.btn,.btn-check:disabled+.btn{pointer-events:none;filter:none;opacity:.65}[data-bs-theme=dark] .form-switch .form-check-input:not(:checked):not(:focus){--bs-form-switch-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='rgba%28255, 255, 255, 0.25%29'/%3e%3c/svg%3e")}.form-range{width:100%;height:1.5rem;padding:0;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:rgba(0,0,0,0)}.form-range:focus{outline:0}.form-range:focus::-webkit-slider-thumb{box-shadow:0 0 0 1px #181818,0 0 0 .25rem rgba(39,128,227,.25)}.form-range:focus::-moz-range-thumb{box-shadow:0 0 0 1px #181818,0 0 0 .25rem rgba(39,128,227,.25)}.form-range::-moz-focus-outer{border:0}.form-range::-webkit-slider-thumb{width:1rem;height:1rem;margin-top:-0.25rem;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#2780e3;border:0;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-range::-webkit-slider-thumb{transition:none}}.form-range::-webkit-slider-thumb:active{background-color:#bed9f7}.form-range::-webkit-slider-runnable-track{width:100%;height:.5rem;color:rgba(0,0,0,0);cursor:pointer;background-color:#f8f9fa;border-color:rgba(0,0,0,0)}.form-range::-moz-range-thumb{width:1rem;height:1rem;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;background-color:#2780e3;border:0;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-range::-moz-range-thumb{transition:none}}.form-range::-moz-range-thumb:active{background-color:#bed9f7}.form-range::-moz-range-track{width:100%;height:.5rem;color:rgba(0,0,0,0);cursor:pointer;background-color:#f8f9fa;border-color:rgba(0,0,0,0)}.form-range:disabled{pointer-events:none}.form-range:disabled::-webkit-slider-thumb{background-color:rgba(250,241,228,.75)}.form-range:disabled::-moz-range-thumb{background-color:rgba(250,241,228,.75)}.form-floating{position:relative}.form-floating>.form-control,.form-floating>.form-control-plaintext,.form-floating>.form-select{height:calc(3.5rem + calc(1px * 2));min-height:calc(3.5rem + calc(1px * 2));line-height:1.25}.form-floating>label{position:absolute;top:0;left:0;z-index:2;height:100%;padding:1rem .75rem;overflow:hidden;text-align:start;text-overflow:ellipsis;white-space:nowrap;pointer-events:none;border:1px solid rgba(0,0,0,0);transform-origin:0 0;transition:opacity .1s ease-in-out,transform .1s ease-in-out}@media(prefers-reduced-motion: reduce){.form-floating>label{transition:none}}.form-floating>.form-control,.form-floating>.form-control-plaintext{padding:1rem .75rem}.form-floating>.form-control::placeholder,.form-floating>.form-control-plaintext::placeholder{color:rgba(0,0,0,0)}.form-floating>.form-control:focus,.form-floating>.form-control:not(:placeholder-shown),.form-floating>.form-control-plaintext:focus,.form-floating>.form-control-plaintext:not(:placeholder-shown){padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-control:-webkit-autofill,.form-floating>.form-control-plaintext:-webkit-autofill{padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-select{padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-control:focus~label,.form-floating>.form-control:not(:placeholder-shown)~label,.form-floating>.form-control-plaintext~label,.form-floating>.form-select~label{color:rgba(var(--bs-body-color-rgb), 0.65);transform:scale(0.85) translateY(-0.5rem) translateX(0.15rem)}.form-floating>.form-control:focus~label::after,.form-floating>.form-control:not(:placeholder-shown)~label::after,.form-floating>.form-control-plaintext~label::after,.form-floating>.form-select~label::after{position:absolute;inset:1rem .375rem;z-index:-1;height:1.5em;content:"";background-color:#242424}.form-floating>.form-control:-webkit-autofill~label{color:rgba(var(--bs-body-color-rgb), 0.65);transform:scale(0.85) translateY(-0.5rem) translateX(0.15rem)}.form-floating>.form-control-plaintext~label{border-width:1px 0}.form-floating>:disabled~label,.form-floating>.form-control:disabled~label{color:#6c757d}.form-floating>:disabled~label::after,.form-floating>.form-control:disabled~label::after{background-color:#e9ecef}.input-group{position:relative;display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:stretch;-webkit-align-items:stretch;width:100%}.input-group>.form-control,.input-group>.form-select,.input-group>.form-floating{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto;width:1%;min-width:0}.input-group>.form-control:focus,.input-group>.form-select:focus,.input-group>.form-floating:focus-within{z-index:5}.input-group .btn{position:relative;z-index:2}.input-group .btn:focus{z-index:5}.input-group-text{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#faf1e4;text-align:center;white-space:nowrap;background-color:#f8f9fa;border:1px solid #dee2e6}.input-group-lg>.form-control,.input-group-lg>.form-select,.input-group-lg>.input-group-text,.input-group-lg>.btn{padding:.5rem 1rem;font-size:1.25rem}.input-group-sm>.form-control,.input-group-sm>.form-select,.input-group-sm>.input-group-text,.input-group-sm>.btn{padding:.25rem .5rem;font-size:0.875rem}.input-group-lg>.form-select,.input-group-sm>.form-select{padding-right:3rem}.input-group>:not(:first-child):not(.dropdown-menu):not(.valid-tooltip):not(.valid-feedback):not(.invalid-tooltip):not(.invalid-feedback){margin-left:calc(1px*-1)}.valid-feedback{display:none;width:100%;margin-top:.25rem;font-size:0.875em;color:#3fb618}.valid-tooltip{position:absolute;top:100%;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:0.875rem;color:#fff;background-color:#3fb618}.was-validated :valid~.valid-feedback,.was-validated :valid~.valid-tooltip,.is-valid~.valid-feedback,.is-valid~.valid-tooltip{display:block}.was-validated .form-control:valid,.form-control.is-valid{border-color:#3fb618;padding-right:calc(1.5em + 0.75rem);background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3e%3cpath fill='%233fb618' d='M2.3 6.73.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right calc(0.375em + 0.1875rem) center;background-size:calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-control:valid:focus,.form-control.is-valid:focus{border-color:#3fb618;box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated textarea.form-control:valid,textarea.form-control.is-valid{padding-right:calc(1.5em + 0.75rem);background-position:top calc(0.375em + 0.1875rem) right calc(0.375em + 0.1875rem)}.was-validated .form-select:valid,.form-select.is-valid{border-color:#3fb618}.was-validated .form-select:valid:not([multiple]):not([size]),.was-validated .form-select:valid:not([multiple])[size="1"],.form-select.is-valid:not([multiple]):not([size]),.form-select.is-valid:not([multiple])[size="1"]{--bs-form-select-bg-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3e%3cpath fill='%233fb618' d='M2.3 6.73.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3e%3c/svg%3e");padding-right:4.125rem;background-position:right .75rem center,center right 2.25rem;background-size:16px 12px,calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-select:valid:focus,.form-select.is-valid:focus{border-color:#3fb618;box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated .form-control-color:valid,.form-control-color.is-valid{width:calc(3rem + calc(1.5em + 0.75rem))}.was-validated .form-check-input:valid,.form-check-input.is-valid{border-color:#3fb618}.was-validated .form-check-input:valid:checked,.form-check-input.is-valid:checked{background-color:#3fb618}.was-validated .form-check-input:valid:focus,.form-check-input.is-valid:focus{box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated .form-check-input:valid~.form-check-label,.form-check-input.is-valid~.form-check-label{color:#3fb618}.form-check-inline .form-check-input~.valid-feedback{margin-left:.5em}.was-validated .input-group>.form-control:not(:focus):valid,.input-group>.form-control:not(:focus).is-valid,.was-validated .input-group>.form-select:not(:focus):valid,.input-group>.form-select:not(:focus).is-valid,.was-validated .input-group>.form-floating:not(:focus-within):valid,.input-group>.form-floating:not(:focus-within).is-valid{z-index:3}.invalid-feedback{display:none;width:100%;margin-top:.25rem;font-size:0.875em;color:#ff0039}.invalid-tooltip{position:absolute;top:100%;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:0.875rem;color:#fff;background-color:#ff0039}.was-validated :invalid~.invalid-feedback,.was-validated :invalid~.invalid-tooltip,.is-invalid~.invalid-feedback,.is-invalid~.invalid-tooltip{display:block}.was-validated .form-control:invalid,.form-control.is-invalid{border-color:#ff0039;padding-right:calc(1.5em + 0.75rem);background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 12' width='12' height='12' fill='none' stroke='%23ff0039'%3e%3ccircle cx='6' cy='6' r='4.5'/%3e%3cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3e%3ccircle cx='6' cy='8.2' r='.6' fill='%23ff0039' stroke='none'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right calc(0.375em + 0.1875rem) center;background-size:calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-control:invalid:focus,.form-control.is-invalid:focus{border-color:#ff0039;box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated textarea.form-control:invalid,textarea.form-control.is-invalid{padding-right:calc(1.5em + 0.75rem);background-position:top calc(0.375em + 0.1875rem) right calc(0.375em + 0.1875rem)}.was-validated .form-select:invalid,.form-select.is-invalid{border-color:#ff0039}.was-validated .form-select:invalid:not([multiple]):not([size]),.was-validated .form-select:invalid:not([multiple])[size="1"],.form-select.is-invalid:not([multiple]):not([size]),.form-select.is-invalid:not([multiple])[size="1"]{--bs-form-select-bg-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 12' width='12' height='12' fill='none' stroke='%23ff0039'%3e%3ccircle cx='6' cy='6' r='4.5'/%3e%3cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3e%3ccircle cx='6' cy='8.2' r='.6' fill='%23ff0039' stroke='none'/%3e%3c/svg%3e");padding-right:4.125rem;background-position:right .75rem center,center right 2.25rem;background-size:16px 12px,calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-select:invalid:focus,.form-select.is-invalid:focus{border-color:#ff0039;box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated .form-control-color:invalid,.form-control-color.is-invalid{width:calc(3rem + calc(1.5em + 0.75rem))}.was-validated .form-check-input:invalid,.form-check-input.is-invalid{border-color:#ff0039}.was-validated .form-check-input:invalid:checked,.form-check-input.is-invalid:checked{background-color:#ff0039}.was-validated .form-check-input:invalid:focus,.form-check-input.is-invalid:focus{box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated .form-check-input:invalid~.form-check-label,.form-check-input.is-invalid~.form-check-label{color:#ff0039}.form-check-inline .form-check-input~.invalid-feedback{margin-left:.5em}.was-validated .input-group>.form-control:not(:focus):invalid,.input-group>.form-control:not(:focus).is-invalid,.was-validated .input-group>.form-select:not(:focus):invalid,.input-group>.form-select:not(:focus).is-invalid,.was-validated .input-group>.form-floating:not(:focus-within):invalid,.input-group>.form-floating:not(:focus-within).is-invalid{z-index:4}.btn{--bs-btn-padding-x: 0.75rem;--bs-btn-padding-y: 0.375rem;--bs-btn-font-family: ;--bs-btn-font-size:1rem;--bs-btn-font-weight: 400;--bs-btn-line-height: 1.5;--bs-btn-color: #FAF1E4;--bs-btn-bg: transparent;--bs-btn-border-width: 1px;--bs-btn-border-color: transparent;--bs-btn-border-radius: 0.25rem;--bs-btn-hover-border-color: transparent;--bs-btn-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.15), 0 1px 1px rgba(0, 0, 0, 0.075);--bs-btn-disabled-opacity: 0.65;--bs-btn-focus-box-shadow: 0 0 0 0.25rem rgba(var(--bs-btn-focus-shadow-rgb), .5);display:inline-block;padding:var(--bs-btn-padding-y) var(--bs-btn-padding-x);font-family:var(--bs-btn-font-family);font-size:var(--bs-btn-font-size);font-weight:var(--bs-btn-font-weight);line-height:var(--bs-btn-line-height);color:var(--bs-btn-color);text-align:center;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;vertical-align:middle;cursor:pointer;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;border:var(--bs-btn-border-width) solid var(--bs-btn-border-color);background-color:var(--bs-btn-bg);transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.btn{transition:none}}.btn:hover{color:var(--bs-btn-hover-color);background-color:var(--bs-btn-hover-bg);border-color:var(--bs-btn-hover-border-color)}.btn-check+.btn:hover{color:var(--bs-btn-color);background-color:var(--bs-btn-bg);border-color:var(--bs-btn-border-color)}.btn:focus-visible{color:var(--bs-btn-hover-color);background-color:var(--bs-btn-hover-bg);border-color:var(--bs-btn-hover-border-color);outline:0;box-shadow:var(--bs-btn-focus-box-shadow)}.btn-check:focus-visible+.btn{border-color:var(--bs-btn-hover-border-color);outline:0;box-shadow:var(--bs-btn-focus-box-shadow)}.btn-check:checked+.btn,:not(.btn-check)+.btn:active,.btn:first-child:active,.btn.active,.btn.show{color:var(--bs-btn-active-color);background-color:var(--bs-btn-active-bg);border-color:var(--bs-btn-active-border-color)}.btn-check:checked+.btn:focus-visible,:not(.btn-check)+.btn:active:focus-visible,.btn:first-child:active:focus-visible,.btn.active:focus-visible,.btn.show:focus-visible{box-shadow:var(--bs-btn-focus-box-shadow)}.btn:disabled,.btn.disabled,fieldset:disabled .btn{color:var(--bs-btn-disabled-color);pointer-events:none;background-color:var(--bs-btn-disabled-bg);border-color:var(--bs-btn-disabled-border-color);opacity:var(--bs-btn-disabled-opacity)}.btn-default{--bs-btn-color: #fff;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #2c3136;--bs-btn-hover-border-color: #2a2e33;--bs-btn-focus-shadow-rgb: 82, 88, 93;--bs-btn-active-color: #fff;--bs-btn-active-bg: #2a2e33;--bs-btn-active-border-color: #272c30;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}.btn-primary{--bs-btn-color: #fff;--bs-btn-bg: #2780e3;--bs-btn-border-color: #2780e3;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #216dc1;--bs-btn-hover-border-color: #1f66b6;--bs-btn-focus-shadow-rgb: 71, 147, 231;--bs-btn-active-color: #fff;--bs-btn-active-bg: #1f66b6;--bs-btn-active-border-color: #1d60aa;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #2780e3;--bs-btn-disabled-border-color: #2780e3}.btn-secondary{--bs-btn-color: #fff;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #2c3136;--bs-btn-hover-border-color: #2a2e33;--bs-btn-focus-shadow-rgb: 82, 88, 93;--bs-btn-active-color: #fff;--bs-btn-active-bg: #2a2e33;--bs-btn-active-border-color: #272c30;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}.btn-success{--bs-btn-color: #fff;--bs-btn-bg: #3fb618;--bs-btn-border-color: #3fb618;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #369b14;--bs-btn-hover-border-color: #329213;--bs-btn-focus-shadow-rgb: 92, 193, 59;--bs-btn-active-color: #fff;--bs-btn-active-bg: #329213;--bs-btn-active-border-color: #2f8912;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #3fb618;--bs-btn-disabled-border-color: #3fb618}.btn-info{--bs-btn-color: #fff;--bs-btn-bg: #9954bb;--bs-btn-border-color: #9954bb;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #82479f;--bs-btn-hover-border-color: #7a4396;--bs-btn-focus-shadow-rgb: 168, 110, 197;--bs-btn-active-color: #fff;--bs-btn-active-bg: #7a4396;--bs-btn-active-border-color: #733f8c;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #9954bb;--bs-btn-disabled-border-color: #9954bb}.btn-warning{--bs-btn-color: #fff;--bs-btn-bg: #ff7518;--bs-btn-border-color: #ff7518;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #d96314;--bs-btn-hover-border-color: #cc5e13;--bs-btn-focus-shadow-rgb: 255, 138, 59;--bs-btn-active-color: #fff;--bs-btn-active-bg: #cc5e13;--bs-btn-active-border-color: #bf5812;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #ff7518;--bs-btn-disabled-border-color: #ff7518}.btn-danger{--bs-btn-color: #fff;--bs-btn-bg: #ff0039;--bs-btn-border-color: #ff0039;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #d90030;--bs-btn-hover-border-color: #cc002e;--bs-btn-focus-shadow-rgb: 255, 38, 87;--bs-btn-active-color: #fff;--bs-btn-active-bg: #cc002e;--bs-btn-active-border-color: #bf002b;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #ff0039;--bs-btn-disabled-border-color: #ff0039}.btn-light{--bs-btn-color: #fff;--bs-btn-bg: #525252;--bs-btn-border-color: #525252;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #464646;--bs-btn-hover-border-color: #424242;--bs-btn-focus-shadow-rgb: 108, 108, 108;--bs-btn-active-color: #fff;--bs-btn-active-bg: #424242;--bs-btn-active-border-color: #3e3e3e;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #525252;--bs-btn-disabled-border-color: #525252}.btn-dark{--bs-btn-color: #fff;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #52585d;--bs-btn-hover-border-color: #484e53;--bs-btn-focus-shadow-rgb: 82, 88, 93;--bs-btn-active-color: #fff;--bs-btn-active-bg: #5d6166;--bs-btn-active-border-color: #484e53;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}.btn-outline-default{--bs-btn-color: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #343a40;--bs-btn-hover-border-color: #343a40;--bs-btn-focus-shadow-rgb: 52, 58, 64;--bs-btn-active-color: #fff;--bs-btn-active-bg: #343a40;--bs-btn-active-border-color: #343a40;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #343a40;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #343a40;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-primary{--bs-btn-color: #2780e3;--bs-btn-border-color: #2780e3;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #2780e3;--bs-btn-hover-border-color: #2780e3;--bs-btn-focus-shadow-rgb: 39, 128, 227;--bs-btn-active-color: #fff;--bs-btn-active-bg: #2780e3;--bs-btn-active-border-color: #2780e3;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #2780e3;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #2780e3;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-secondary{--bs-btn-color: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #343a40;--bs-btn-hover-border-color: #343a40;--bs-btn-focus-shadow-rgb: 52, 58, 64;--bs-btn-active-color: #fff;--bs-btn-active-bg: #343a40;--bs-btn-active-border-color: #343a40;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #343a40;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #343a40;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-success{--bs-btn-color: #3fb618;--bs-btn-border-color: #3fb618;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #3fb618;--bs-btn-hover-border-color: #3fb618;--bs-btn-focus-shadow-rgb: 63, 182, 24;--bs-btn-active-color: #fff;--bs-btn-active-bg: #3fb618;--bs-btn-active-border-color: #3fb618;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #3fb618;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #3fb618;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-info{--bs-btn-color: #9954bb;--bs-btn-border-color: #9954bb;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #9954bb;--bs-btn-hover-border-color: #9954bb;--bs-btn-focus-shadow-rgb: 153, 84, 187;--bs-btn-active-color: #fff;--bs-btn-active-bg: #9954bb;--bs-btn-active-border-color: #9954bb;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #9954bb;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #9954bb;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-warning{--bs-btn-color: #ff7518;--bs-btn-border-color: #ff7518;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #ff7518;--bs-btn-hover-border-color: #ff7518;--bs-btn-focus-shadow-rgb: 255, 117, 24;--bs-btn-active-color: #fff;--bs-btn-active-bg: #ff7518;--bs-btn-active-border-color: #ff7518;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #ff7518;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #ff7518;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-danger{--bs-btn-color: #ff0039;--bs-btn-border-color: #ff0039;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #ff0039;--bs-btn-hover-border-color: #ff0039;--bs-btn-focus-shadow-rgb: 255, 0, 57;--bs-btn-active-color: #fff;--bs-btn-active-bg: #ff0039;--bs-btn-active-border-color: #ff0039;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #ff0039;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #ff0039;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-light{--bs-btn-color: #525252;--bs-btn-border-color: #525252;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #525252;--bs-btn-hover-border-color: #525252;--bs-btn-focus-shadow-rgb: 82, 82, 82;--bs-btn-active-color: #fff;--bs-btn-active-bg: #525252;--bs-btn-active-border-color: #525252;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #525252;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #525252;--bs-btn-bg: transparent;--bs-gradient: none}.btn-outline-dark{--bs-btn-color: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #fff;--bs-btn-hover-bg: #343a40;--bs-btn-hover-border-color: #343a40;--bs-btn-focus-shadow-rgb: 52, 58, 64;--bs-btn-active-color: #fff;--bs-btn-active-bg: #343a40;--bs-btn-active-border-color: #343a40;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #343a40;--bs-btn-disabled-bg: transparent;--bs-btn-disabled-border-color: #343a40;--bs-btn-bg: transparent;--bs-gradient: none}.btn-link{--bs-btn-font-weight: 400;--bs-btn-color: #AED1E4;--bs-btn-bg: transparent;--bs-btn-border-color: transparent;--bs-btn-hover-color: #8ba7b6;--bs-btn-hover-border-color: transparent;--bs-btn-active-color: #8ba7b6;--bs-btn-active-border-color: transparent;--bs-btn-disabled-color: #6c757d;--bs-btn-disabled-border-color: transparent;--bs-btn-box-shadow: 0 0 0 #000;--bs-btn-focus-shadow-rgb: 148, 178, 194;text-decoration:underline;-webkit-text-decoration:underline;-moz-text-decoration:underline;-ms-text-decoration:underline;-o-text-decoration:underline}.btn-link:focus-visible{color:var(--bs-btn-color)}.btn-link:hover{color:var(--bs-btn-hover-color)}.btn-lg,.btn-group-lg>.btn{--bs-btn-padding-y: 0.5rem;--bs-btn-padding-x: 1rem;--bs-btn-font-size:1.25rem;--bs-btn-border-radius: 0.5rem}.btn-sm,.btn-group-sm>.btn{--bs-btn-padding-y: 0.25rem;--bs-btn-padding-x: 0.5rem;--bs-btn-font-size:0.875rem;--bs-btn-border-radius: 0.2em}.fade{transition:opacity .15s linear}@media(prefers-reduced-motion: reduce){.fade{transition:none}}.fade:not(.show){opacity:0}.collapse:not(.show){display:none}.collapsing{height:0;overflow:hidden;transition:height .2s ease}@media(prefers-reduced-motion: reduce){.collapsing{transition:none}}.collapsing.collapse-horizontal{width:0;height:auto;transition:width .35s ease}@media(prefers-reduced-motion: reduce){.collapsing.collapse-horizontal{transition:none}}.dropup,.dropend,.dropdown,.dropstart,.dropup-center,.dropdown-center{position:relative}.dropdown-toggle{white-space:nowrap}.dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid;border-right:.3em solid rgba(0,0,0,0);border-bottom:0;border-left:.3em solid rgba(0,0,0,0)}.dropdown-toggle:empty::after{margin-left:0}.dropdown-menu{--bs-dropdown-zindex: 1000;--bs-dropdown-min-width: 10rem;--bs-dropdown-padding-x: 0;--bs-dropdown-padding-y: 0.5rem;--bs-dropdown-spacer: 0.125rem;--bs-dropdown-font-size:1rem;--bs-dropdown-color: #FAF1E4;--bs-dropdown-bg: #181818;--bs-dropdown-border-color: rgba(0, 0, 0, 0.175);--bs-dropdown-border-radius: 0.25rem;--bs-dropdown-border-width: 1px;--bs-dropdown-inner-border-radius: calc(0.25rem - 1px);--bs-dropdown-divider-bg: rgba(0, 0, 0, 0.175);--bs-dropdown-divider-margin-y: 0.5rem;--bs-dropdown-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-dropdown-link-color: #FAF1E4;--bs-dropdown-link-hover-color: #FAF1E4;--bs-dropdown-link-hover-bg: #6c757d;--bs-dropdown-link-active-color: #fff;--bs-dropdown-link-active-bg: #2780e3;--bs-dropdown-link-disabled-color: rgba(250, 241, 228, 0.5);--bs-dropdown-item-padding-x: 1rem;--bs-dropdown-item-padding-y: 0.25rem;--bs-dropdown-header-color: #6c757d;--bs-dropdown-header-padding-x: 1rem;--bs-dropdown-header-padding-y: 0.5rem;position:absolute;z-index:var(--bs-dropdown-zindex);display:none;min-width:var(--bs-dropdown-min-width);padding:var(--bs-dropdown-padding-y) var(--bs-dropdown-padding-x);margin:0;font-size:var(--bs-dropdown-font-size);color:var(--bs-dropdown-color);text-align:left;list-style:none;background-color:var(--bs-dropdown-bg);background-clip:padding-box;border:var(--bs-dropdown-border-width) solid var(--bs-dropdown-border-color)}.dropdown-menu[data-bs-popper]{top:100%;left:0;margin-top:var(--bs-dropdown-spacer)}.dropdown-menu-start{--bs-position: start}.dropdown-menu-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-end{--bs-position: end}.dropdown-menu-end[data-bs-popper]{right:0;left:auto}@media(min-width: 576px){.dropdown-menu-sm-start{--bs-position: start}.dropdown-menu-sm-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-sm-end{--bs-position: end}.dropdown-menu-sm-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 768px){.dropdown-menu-md-start{--bs-position: start}.dropdown-menu-md-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-md-end{--bs-position: end}.dropdown-menu-md-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 992px){.dropdown-menu-lg-start{--bs-position: start}.dropdown-menu-lg-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-lg-end{--bs-position: end}.dropdown-menu-lg-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 1200px){.dropdown-menu-xl-start{--bs-position: start}.dropdown-menu-xl-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-xl-end{--bs-position: end}.dropdown-menu-xl-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 1400px){.dropdown-menu-xxl-start{--bs-position: start}.dropdown-menu-xxl-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-xxl-end{--bs-position: end}.dropdown-menu-xxl-end[data-bs-popper]{right:0;left:auto}}.dropup .dropdown-menu[data-bs-popper]{top:auto;bottom:100%;margin-top:0;margin-bottom:var(--bs-dropdown-spacer)}.dropup .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:0;border-right:.3em solid rgba(0,0,0,0);border-bottom:.3em solid;border-left:.3em solid rgba(0,0,0,0)}.dropup .dropdown-toggle:empty::after{margin-left:0}.dropend .dropdown-menu[data-bs-popper]{top:0;right:auto;left:100%;margin-top:0;margin-left:var(--bs-dropdown-spacer)}.dropend .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid rgba(0,0,0,0);border-right:0;border-bottom:.3em solid rgba(0,0,0,0);border-left:.3em solid}.dropend .dropdown-toggle:empty::after{margin-left:0}.dropend .dropdown-toggle::after{vertical-align:0}.dropstart .dropdown-menu[data-bs-popper]{top:0;right:100%;left:auto;margin-top:0;margin-right:var(--bs-dropdown-spacer)}.dropstart .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:""}.dropstart .dropdown-toggle::after{display:none}.dropstart .dropdown-toggle::before{display:inline-block;margin-right:.255em;vertical-align:.255em;content:"";border-top:.3em solid rgba(0,0,0,0);border-right:.3em solid;border-bottom:.3em solid rgba(0,0,0,0)}.dropstart .dropdown-toggle:empty::after{margin-left:0}.dropstart .dropdown-toggle::before{vertical-align:0}.dropdown-divider{height:0;margin:var(--bs-dropdown-divider-margin-y) 0;overflow:hidden;border-top:1px solid var(--bs-dropdown-divider-bg);opacity:1}.dropdown-item{display:block;width:100%;padding:var(--bs-dropdown-item-padding-y) var(--bs-dropdown-item-padding-x);clear:both;font-weight:400;color:var(--bs-dropdown-link-color);text-align:inherit;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;white-space:nowrap;background-color:rgba(0,0,0,0);border:0}.dropdown-item:hover,.dropdown-item:focus{color:var(--bs-dropdown-link-hover-color);background-color:var(--bs-dropdown-link-hover-bg)}.dropdown-item.active,.dropdown-item:active{color:var(--bs-dropdown-link-active-color);text-decoration:none;background-color:var(--bs-dropdown-link-active-bg)}.dropdown-item.disabled,.dropdown-item:disabled{color:var(--bs-dropdown-link-disabled-color);pointer-events:none;background-color:rgba(0,0,0,0)}.dropdown-menu.show{display:block}.dropdown-header{display:block;padding:var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x);margin-bottom:0;font-size:0.875rem;color:var(--bs-dropdown-header-color);white-space:nowrap}.dropdown-item-text{display:block;padding:var(--bs-dropdown-item-padding-y) var(--bs-dropdown-item-padding-x);color:var(--bs-dropdown-link-color)}.dropdown-menu-dark{--bs-dropdown-color: #dee2e6;--bs-dropdown-bg: #343a40;--bs-dropdown-border-color: rgba(0, 0, 0, 0.175);--bs-dropdown-box-shadow: ;--bs-dropdown-link-color: #dee2e6;--bs-dropdown-link-hover-color: #fff;--bs-dropdown-divider-bg: rgba(0, 0, 0, 0.175);--bs-dropdown-link-hover-bg: rgba(255, 255, 255, 0.15);--bs-dropdown-link-active-color: #fff;--bs-dropdown-link-active-bg: #2780e3;--bs-dropdown-link-disabled-color: #adb5bd;--bs-dropdown-header-color: #adb5bd}.btn-group,.btn-group-vertical{position:relative;display:inline-flex;vertical-align:middle}.btn-group>.btn,.btn-group-vertical>.btn{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto}.btn-group>.btn-check:checked+.btn,.btn-group>.btn-check:focus+.btn,.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active,.btn-group-vertical>.btn-check:checked+.btn,.btn-group-vertical>.btn-check:focus+.btn,.btn-group-vertical>.btn:hover,.btn-group-vertical>.btn:focus,.btn-group-vertical>.btn:active,.btn-group-vertical>.btn.active{z-index:1}.btn-toolbar{display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;justify-content:flex-start;-webkit-justify-content:flex-start}.btn-toolbar .input-group{width:auto}.btn-group>:not(.btn-check:first-child)+.btn,.btn-group>.btn-group:not(:first-child){margin-left:calc(1px*-1)}.dropdown-toggle-split{padding-right:.5625rem;padding-left:.5625rem}.dropdown-toggle-split::after,.dropup .dropdown-toggle-split::after,.dropend .dropdown-toggle-split::after{margin-left:0}.dropstart .dropdown-toggle-split::before{margin-right:0}.btn-sm+.dropdown-toggle-split,.btn-group-sm>.btn+.dropdown-toggle-split{padding-right:.375rem;padding-left:.375rem}.btn-lg+.dropdown-toggle-split,.btn-group-lg>.btn+.dropdown-toggle-split{padding-right:.75rem;padding-left:.75rem}.btn-group-vertical{flex-direction:column;-webkit-flex-direction:column;align-items:flex-start;-webkit-align-items:flex-start;justify-content:center;-webkit-justify-content:center}.btn-group-vertical>.btn,.btn-group-vertical>.btn-group{width:100%}.btn-group-vertical>.btn:not(:first-child),.btn-group-vertical>.btn-group:not(:first-child){margin-top:calc(1px*-1)}.nav{--bs-nav-link-padding-x: 1rem;--bs-nav-link-padding-y: 0.5rem;--bs-nav-link-font-weight: ;--bs-nav-link-color: #AED1E4;--bs-nav-link-hover-color: #8ba7b6;--bs-nav-link-disabled-color: rgba(250, 241, 228, 0.75);display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;padding-left:0;margin-bottom:0;list-style:none}.nav-link{display:block;padding:var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x);font-size:var(--bs-nav-link-font-size);font-weight:var(--bs-nav-link-font-weight);color:var(--bs-nav-link-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background:none;border:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out}@media(prefers-reduced-motion: reduce){.nav-link{transition:none}}.nav-link:hover,.nav-link:focus{color:var(--bs-nav-link-hover-color)}.nav-link:focus-visible{outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.nav-link.disabled,.nav-link:disabled{color:var(--bs-nav-link-disabled-color);pointer-events:none;cursor:default}.nav-tabs{--bs-nav-tabs-border-width: 1px;--bs-nav-tabs-border-color: #dee2e6;--bs-nav-tabs-border-radius: 0.25rem;--bs-nav-tabs-link-hover-border-color: #e9ecef #e9ecef #dee2e6;--bs-nav-tabs-link-active-color: #000;--bs-nav-tabs-link-active-bg: #181818;--bs-nav-tabs-link-active-border-color: #dee2e6 #dee2e6 #181818;border-bottom:var(--bs-nav-tabs-border-width) solid var(--bs-nav-tabs-border-color)}.nav-tabs .nav-link{margin-bottom:calc(-1*var(--bs-nav-tabs-border-width));border:var(--bs-nav-tabs-border-width) solid rgba(0,0,0,0)}.nav-tabs .nav-link:hover,.nav-tabs .nav-link:focus{isolation:isolate;border-color:var(--bs-nav-tabs-link-hover-border-color)}.nav-tabs .nav-link.active,.nav-tabs .nav-item.show .nav-link{color:var(--bs-nav-tabs-link-active-color);background-color:var(--bs-nav-tabs-link-active-bg);border-color:var(--bs-nav-tabs-link-active-border-color)}.nav-tabs .dropdown-menu{margin-top:calc(-1*var(--bs-nav-tabs-border-width))}.nav-pills{--bs-nav-pills-border-radius: 0.25rem;--bs-nav-pills-link-active-color: #fff;--bs-nav-pills-link-active-bg: #2780e3}.nav-pills .nav-link.active,.nav-pills .show>.nav-link{color:var(--bs-nav-pills-link-active-color);background-color:var(--bs-nav-pills-link-active-bg)}.nav-underline{--bs-nav-underline-gap: 1rem;--bs-nav-underline-border-width: 0.125rem;--bs-nav-underline-link-active-color: #000;gap:var(--bs-nav-underline-gap)}.nav-underline .nav-link{padding-right:0;padding-left:0;border-bottom:var(--bs-nav-underline-border-width) solid rgba(0,0,0,0)}.nav-underline .nav-link:hover,.nav-underline .nav-link:focus{border-bottom-color:currentcolor}.nav-underline .nav-link.active,.nav-underline .show>.nav-link{font-weight:700;color:var(--bs-nav-underline-link-active-color);border-bottom-color:currentcolor}.nav-fill>.nav-link,.nav-fill .nav-item{flex:1 1 auto;-webkit-flex:1 1 auto;text-align:center}.nav-justified>.nav-link,.nav-justified .nav-item{flex-basis:0;-webkit-flex-basis:0;flex-grow:1;-webkit-flex-grow:1;text-align:center}.nav-fill .nav-item .nav-link,.nav-justified .nav-item .nav-link{width:100%}.tab-content>.tab-pane{display:none}.tab-content>.active{display:block}.navbar{--bs-navbar-padding-x: 0;--bs-navbar-padding-y: 0.5rem;--bs-navbar-color: #bdbdbd;--bs-navbar-hover-color: rgba(174, 209, 228, 0.8);--bs-navbar-disabled-color: rgba(189, 189, 189, 0.75);--bs-navbar-active-color: #aed1e4;--bs-navbar-brand-padding-y: 0.3125rem;--bs-navbar-brand-margin-end: 1rem;--bs-navbar-brand-font-size: 1.25rem;--bs-navbar-brand-color: #bdbdbd;--bs-navbar-brand-hover-color: #aed1e4;--bs-navbar-nav-link-padding-x: 0.5rem;--bs-navbar-toggler-padding-y: 0.25;--bs-navbar-toggler-padding-x: 0;--bs-navbar-toggler-font-size: 1.25rem;--bs-navbar-toggler-icon-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23bdbdbd' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e");--bs-navbar-toggler-border-color: rgba(189, 189, 189, 0);--bs-navbar-toggler-border-radius: 0.25rem;--bs-navbar-toggler-focus-width: 0.25rem;--bs-navbar-toggler-transition: box-shadow 0.15s ease-in-out;position:relative;display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:var(--bs-navbar-padding-y) var(--bs-navbar-padding-x)}.navbar>.container,.navbar>.container-fluid,.navbar>.container-sm,.navbar>.container-md,.navbar>.container-lg,.navbar>.container-xl,.navbar>.container-xxl{display:flex;display:-webkit-flex;flex-wrap:inherit;-webkit-flex-wrap:inherit;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between}.navbar-brand{padding-top:var(--bs-navbar-brand-padding-y);padding-bottom:var(--bs-navbar-brand-padding-y);margin-right:var(--bs-navbar-brand-margin-end);font-size:var(--bs-navbar-brand-font-size);color:var(--bs-navbar-brand-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;white-space:nowrap}.navbar-brand:hover,.navbar-brand:focus{color:var(--bs-navbar-brand-hover-color)}.navbar-nav{--bs-nav-link-padding-x: 0;--bs-nav-link-padding-y: 0.5rem;--bs-nav-link-font-weight: ;--bs-nav-link-color: var(--bs-navbar-color);--bs-nav-link-hover-color: var(--bs-navbar-hover-color);--bs-nav-link-disabled-color: var(--bs-navbar-disabled-color);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;padding-left:0;margin-bottom:0;list-style:none}.navbar-nav .nav-link.active,.navbar-nav .nav-link.show{color:var(--bs-navbar-active-color)}.navbar-nav .dropdown-menu{position:static}.navbar-text{padding-top:.5rem;padding-bottom:.5rem;color:var(--bs-navbar-color)}.navbar-text a,.navbar-text a:hover,.navbar-text a:focus{color:var(--bs-navbar-active-color)}.navbar-collapse{flex-basis:100%;-webkit-flex-basis:100%;flex-grow:1;-webkit-flex-grow:1;align-items:center;-webkit-align-items:center}.navbar-toggler{padding:var(--bs-navbar-toggler-padding-y) var(--bs-navbar-toggler-padding-x);font-size:var(--bs-navbar-toggler-font-size);line-height:1;color:var(--bs-navbar-color);background-color:rgba(0,0,0,0);border:var(--bs-border-width) solid var(--bs-navbar-toggler-border-color);transition:var(--bs-navbar-toggler-transition)}@media(prefers-reduced-motion: reduce){.navbar-toggler{transition:none}}.navbar-toggler:hover{text-decoration:none}.navbar-toggler:focus{text-decoration:none;outline:0;box-shadow:0 0 0 var(--bs-navbar-toggler-focus-width)}.navbar-toggler-icon{display:inline-block;width:1.5em;height:1.5em;vertical-align:middle;background-image:var(--bs-navbar-toggler-icon-bg);background-repeat:no-repeat;background-position:center;background-size:100%}.navbar-nav-scroll{max-height:var(--bs-scroll-height, 75vh);overflow-y:auto}@media(min-width: 576px){.navbar-expand-sm{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-sm .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-sm .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-sm .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-sm .navbar-nav-scroll{overflow:visible}.navbar-expand-sm .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-sm .navbar-toggler{display:none}.navbar-expand-sm .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-sm .offcanvas .offcanvas-header{display:none}.navbar-expand-sm .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 768px){.navbar-expand-md{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-md .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-md .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-md .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-md .navbar-nav-scroll{overflow:visible}.navbar-expand-md .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-md .navbar-toggler{display:none}.navbar-expand-md .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-md .offcanvas .offcanvas-header{display:none}.navbar-expand-md .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 992px){.navbar-expand-lg{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-lg .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-lg .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-lg .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-lg .navbar-nav-scroll{overflow:visible}.navbar-expand-lg .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-lg .navbar-toggler{display:none}.navbar-expand-lg .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-lg .offcanvas .offcanvas-header{display:none}.navbar-expand-lg .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 1200px){.navbar-expand-xl{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-xl .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-xl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xl .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-xl .navbar-nav-scroll{overflow:visible}.navbar-expand-xl .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-xl .navbar-toggler{display:none}.navbar-expand-xl .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-xl .offcanvas .offcanvas-header{display:none}.navbar-expand-xl .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 1400px){.navbar-expand-xxl{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-xxl .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-xxl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xxl .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand-xxl .navbar-nav-scroll{overflow:visible}.navbar-expand-xxl .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-xxl .navbar-toggler{display:none}.navbar-expand-xxl .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand-xxl .offcanvas .offcanvas-header{display:none}.navbar-expand-xxl .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}.navbar-expand{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand .navbar-nav .dropdown-menu{position:absolute}.navbar-expand .navbar-nav .nav-link{padding-right:var(--bs-navbar-nav-link-padding-x);padding-left:var(--bs-navbar-nav-link-padding-x)}.navbar-expand .navbar-nav-scroll{overflow:visible}.navbar-expand .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand .navbar-toggler{display:none}.navbar-expand .offcanvas{position:static;z-index:auto;flex-grow:1;-webkit-flex-grow:1;width:auto !important;height:auto !important;visibility:visible !important;background-color:rgba(0,0,0,0) !important;border:0 !important;transform:none !important;transition:none}.navbar-expand .offcanvas .offcanvas-header{display:none}.navbar-expand .offcanvas .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}.navbar-dark,.navbar[data-bs-theme=dark]{--bs-navbar-color: #bdbdbd;--bs-navbar-hover-color: rgba(174, 209, 228, 0.8);--bs-navbar-disabled-color: rgba(189, 189, 189, 0.75);--bs-navbar-active-color: #aed1e4;--bs-navbar-brand-color: #bdbdbd;--bs-navbar-brand-hover-color: #aed1e4;--bs-navbar-toggler-border-color: rgba(189, 189, 189, 0);--bs-navbar-toggler-icon-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23bdbdbd' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e")}[data-bs-theme=dark] .navbar-toggler-icon{--bs-navbar-toggler-icon-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23bdbdbd' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e")}.card{--bs-card-spacer-y: 1rem;--bs-card-spacer-x: 1rem;--bs-card-title-spacer-y: 0.5rem;--bs-card-title-color: ;--bs-card-subtitle-color: ;--bs-card-border-width: 1px;--bs-card-border-color: rgba(0, 0, 0, 0.175);--bs-card-border-radius: 0.25rem;--bs-card-box-shadow: ;--bs-card-inner-border-radius: calc(0.25rem - 1px);--bs-card-cap-padding-y: 0.5rem;--bs-card-cap-padding-x: 1rem;--bs-card-cap-bg: rgba(52, 58, 64, 0.25);--bs-card-cap-color: ;--bs-card-height: ;--bs-card-color: ;--bs-card-bg: #181818;--bs-card-img-overlay-padding: 1rem;--bs-card-group-margin: 0.75rem;position:relative;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;min-width:0;height:var(--bs-card-height);color:var(--bs-body-color);word-wrap:break-word;background-color:var(--bs-card-bg);background-clip:border-box;border:var(--bs-card-border-width) solid var(--bs-card-border-color)}.card>hr{margin-right:0;margin-left:0}.card>.list-group{border-top:inherit;border-bottom:inherit}.card>.list-group:first-child{border-top-width:0}.card>.list-group:last-child{border-bottom-width:0}.card>.card-header+.list-group,.card>.list-group+.card-footer{border-top:0}.card-body{flex:1 1 auto;-webkit-flex:1 1 auto;padding:var(--bs-card-spacer-y) var(--bs-card-spacer-x);color:var(--bs-card-color)}.card-title{margin-bottom:var(--bs-card-title-spacer-y);color:var(--bs-card-title-color)}.card-subtitle{margin-top:calc(-0.5*var(--bs-card-title-spacer-y));margin-bottom:0;color:var(--bs-card-subtitle-color)}.card-text:last-child{margin-bottom:0}.card-link+.card-link{margin-left:var(--bs-card-spacer-x)}.card-header{padding:var(--bs-card-cap-padding-y) var(--bs-card-cap-padding-x);margin-bottom:0;color:var(--bs-card-cap-color);background-color:var(--bs-card-cap-bg);border-bottom:var(--bs-card-border-width) solid var(--bs-card-border-color)}.card-footer{padding:var(--bs-card-cap-padding-y) var(--bs-card-cap-padding-x);color:var(--bs-card-cap-color);background-color:var(--bs-card-cap-bg);border-top:var(--bs-card-border-width) solid var(--bs-card-border-color)}.card-header-tabs{margin-right:calc(-0.5*var(--bs-card-cap-padding-x));margin-bottom:calc(-1*var(--bs-card-cap-padding-y));margin-left:calc(-0.5*var(--bs-card-cap-padding-x));border-bottom:0}.card-header-tabs .nav-link.active{background-color:var(--bs-card-bg);border-bottom-color:var(--bs-card-bg)}.card-header-pills{margin-right:calc(-0.5*var(--bs-card-cap-padding-x));margin-left:calc(-0.5*var(--bs-card-cap-padding-x))}.card-img-overlay{position:absolute;top:0;right:0;bottom:0;left:0;padding:var(--bs-card-img-overlay-padding)}.card-img,.card-img-top,.card-img-bottom{width:100%}.card-group>.card{margin-bottom:var(--bs-card-group-margin)}@media(min-width: 576px){.card-group{display:flex;display:-webkit-flex;flex-flow:row wrap;-webkit-flex-flow:row wrap}.card-group>.card{flex:1 0 0%;-webkit-flex:1 0 0%;margin-bottom:0}.card-group>.card+.card{margin-left:0;border-left:0}}.accordion{--bs-accordion-color: #FAF1E4;--bs-accordion-bg: #181818;--bs-accordion-transition: color 0.15s ease-in-out, background-color 0.15s ease-in-out, border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out, border-radius 0.15s ease;--bs-accordion-border-color: #dee2e6;--bs-accordion-border-width: 1px;--bs-accordion-border-radius: 0.25rem;--bs-accordion-inner-border-radius: calc(0.25rem - 1px);--bs-accordion-btn-padding-x: 1.25rem;--bs-accordion-btn-padding-y: 1rem;--bs-accordion-btn-color: #FAF1E4;--bs-accordion-btn-bg: #181818;--bs-accordion-btn-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23FAF1E4'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");--bs-accordion-btn-icon-width: 1.25rem;--bs-accordion-btn-icon-transform: rotate(-180deg);--bs-accordion-btn-icon-transition: transform 0.2s ease-in-out;--bs-accordion-btn-active-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%2310335b'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");--bs-accordion-btn-focus-border-color: #93c0f1;--bs-accordion-btn-focus-box-shadow: 0 0 0 0.25rem rgba(39, 128, 227, 0.25);--bs-accordion-body-padding-x: 1.25rem;--bs-accordion-body-padding-y: 1rem;--bs-accordion-active-color: #10335b;--bs-accordion-active-bg: #d4e6f9}.accordion-button{position:relative;display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;width:100%;padding:var(--bs-accordion-btn-padding-y) var(--bs-accordion-btn-padding-x);font-size:1rem;color:var(--bs-accordion-btn-color);text-align:left;background-color:var(--bs-accordion-btn-bg);border:0;overflow-anchor:none;transition:var(--bs-accordion-transition)}@media(prefers-reduced-motion: reduce){.accordion-button{transition:none}}.accordion-button:not(.collapsed){color:var(--bs-accordion-active-color);background-color:var(--bs-accordion-active-bg);box-shadow:inset 0 calc(-1*var(--bs-accordion-border-width)) 0 var(--bs-accordion-border-color)}.accordion-button:not(.collapsed)::after{background-image:var(--bs-accordion-btn-active-icon);transform:var(--bs-accordion-btn-icon-transform)}.accordion-button::after{flex-shrink:0;-webkit-flex-shrink:0;width:var(--bs-accordion-btn-icon-width);height:var(--bs-accordion-btn-icon-width);margin-left:auto;content:"";background-image:var(--bs-accordion-btn-icon);background-repeat:no-repeat;background-size:var(--bs-accordion-btn-icon-width);transition:var(--bs-accordion-btn-icon-transition)}@media(prefers-reduced-motion: reduce){.accordion-button::after{transition:none}}.accordion-button:hover{z-index:2}.accordion-button:focus{z-index:3;border-color:var(--bs-accordion-btn-focus-border-color);outline:0;box-shadow:var(--bs-accordion-btn-focus-box-shadow)}.accordion-header{margin-bottom:0}.accordion-item{color:var(--bs-accordion-color);background-color:var(--bs-accordion-bg);border:var(--bs-accordion-border-width) solid var(--bs-accordion-border-color)}.accordion-item:not(:first-of-type){border-top:0}.accordion-body{padding:var(--bs-accordion-body-padding-y) var(--bs-accordion-body-padding-x)}.accordion-flush .accordion-collapse{border-width:0}.accordion-flush .accordion-item{border-right:0;border-left:0}.accordion-flush .accordion-item:first-child{border-top:0}.accordion-flush .accordion-item:last-child{border-bottom:0}[data-bs-theme=dark] .accordion-button::after{--bs-accordion-btn-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%237db3ee'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");--bs-accordion-btn-active-icon: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%237db3ee'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e")}.breadcrumb{--bs-breadcrumb-padding-x: 0;--bs-breadcrumb-padding-y: 0;--bs-breadcrumb-margin-bottom: 1rem;--bs-breadcrumb-bg: ;--bs-breadcrumb-border-radius: ;--bs-breadcrumb-divider-color: rgba(250, 241, 228, 0.75);--bs-breadcrumb-item-padding-x: 0.5rem;--bs-breadcrumb-item-active-color: rgba(250, 241, 228, 0.75);display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;padding:var(--bs-breadcrumb-padding-y) var(--bs-breadcrumb-padding-x);margin-bottom:var(--bs-breadcrumb-margin-bottom);font-size:var(--bs-breadcrumb-font-size);list-style:none;background-color:var(--bs-breadcrumb-bg)}.breadcrumb-item+.breadcrumb-item{padding-left:var(--bs-breadcrumb-item-padding-x)}.breadcrumb-item+.breadcrumb-item::before{float:left;padding-right:var(--bs-breadcrumb-item-padding-x);color:var(--bs-breadcrumb-divider-color);content:var(--bs-breadcrumb-divider, ">") /* rtl: var(--bs-breadcrumb-divider, ">") */}.breadcrumb-item.active{color:var(--bs-breadcrumb-item-active-color)}.pagination{--bs-pagination-padding-x: 0.75rem;--bs-pagination-padding-y: 0.375rem;--bs-pagination-font-size:1rem;--bs-pagination-color: #AED1E4;--bs-pagination-bg: #181818;--bs-pagination-border-width: 1px;--bs-pagination-border-color: #dee2e6;--bs-pagination-border-radius: 0.25rem;--bs-pagination-hover-color: #8ba7b6;--bs-pagination-hover-bg: #f8f9fa;--bs-pagination-hover-border-color: #dee2e6;--bs-pagination-focus-color: #8ba7b6;--bs-pagination-focus-bg: #e9ecef;--bs-pagination-focus-box-shadow: 0 0 0 0.25rem rgba(39, 128, 227, 0.25);--bs-pagination-active-color: #fff;--bs-pagination-active-bg: #2780e3;--bs-pagination-active-border-color: #2780e3;--bs-pagination-disabled-color: rgba(250, 241, 228, 0.75);--bs-pagination-disabled-bg: #e9ecef;--bs-pagination-disabled-border-color: #dee2e6;display:flex;display:-webkit-flex;padding-left:0;list-style:none}.page-link{position:relative;display:block;padding:var(--bs-pagination-padding-y) var(--bs-pagination-padding-x);font-size:var(--bs-pagination-font-size);color:var(--bs-pagination-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background-color:var(--bs-pagination-bg);border:var(--bs-pagination-border-width) solid var(--bs-pagination-border-color);transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.page-link{transition:none}}.page-link:hover{z-index:2;color:var(--bs-pagination-hover-color);background-color:var(--bs-pagination-hover-bg);border-color:var(--bs-pagination-hover-border-color)}.page-link:focus{z-index:3;color:var(--bs-pagination-focus-color);background-color:var(--bs-pagination-focus-bg);outline:0;box-shadow:var(--bs-pagination-focus-box-shadow)}.page-link.active,.active>.page-link{z-index:3;color:var(--bs-pagination-active-color);background-color:var(--bs-pagination-active-bg);border-color:var(--bs-pagination-active-border-color)}.page-link.disabled,.disabled>.page-link{color:var(--bs-pagination-disabled-color);pointer-events:none;background-color:var(--bs-pagination-disabled-bg);border-color:var(--bs-pagination-disabled-border-color)}.page-item:not(:first-child) .page-link{margin-left:calc(1px*-1)}.pagination-lg{--bs-pagination-padding-x: 1.5rem;--bs-pagination-padding-y: 0.75rem;--bs-pagination-font-size:1.25rem;--bs-pagination-border-radius: 0.5rem}.pagination-sm{--bs-pagination-padding-x: 0.5rem;--bs-pagination-padding-y: 0.25rem;--bs-pagination-font-size:0.875rem;--bs-pagination-border-radius: 0.2em}.badge{--bs-badge-padding-x: 0.65em;--bs-badge-padding-y: 0.35em;--bs-badge-font-size:0.75em;--bs-badge-font-weight: 700;--bs-badge-color: #fff;--bs-badge-border-radius: 0.25rem;display:inline-block;padding:var(--bs-badge-padding-y) var(--bs-badge-padding-x);font-size:var(--bs-badge-font-size);font-weight:var(--bs-badge-font-weight);line-height:1;color:var(--bs-badge-color);text-align:center;white-space:nowrap;vertical-align:baseline}.badge:empty{display:none}.btn .badge{position:relative;top:-1px}.alert{--bs-alert-bg: transparent;--bs-alert-padding-x: 1rem;--bs-alert-padding-y: 1rem;--bs-alert-margin-bottom: 1rem;--bs-alert-color: inherit;--bs-alert-border-color: transparent;--bs-alert-border: 0 solid var(--bs-alert-border-color);--bs-alert-border-radius: 0.25rem;--bs-alert-link-color: inherit;position:relative;padding:var(--bs-alert-padding-y) var(--bs-alert-padding-x);margin-bottom:var(--bs-alert-margin-bottom);color:var(--bs-alert-color);background-color:var(--bs-alert-bg);border:var(--bs-alert-border)}.alert-heading{color:inherit}.alert-link{font-weight:700;color:var(--bs-alert-link-color)}.alert-dismissible{padding-right:3rem}.alert-dismissible .btn-close{position:absolute;top:0;right:0;z-index:2;padding:1.25rem 1rem}.alert-default{--bs-alert-color: var(--bs-default-text-emphasis);--bs-alert-bg: var(--bs-default-bg-subtle);--bs-alert-border-color: var(--bs-default-border-subtle);--bs-alert-link-color: var(--bs-default-text-emphasis)}.alert-primary{--bs-alert-color: var(--bs-primary-text-emphasis);--bs-alert-bg: var(--bs-primary-bg-subtle);--bs-alert-border-color: var(--bs-primary-border-subtle);--bs-alert-link-color: var(--bs-primary-text-emphasis)}.alert-secondary{--bs-alert-color: var(--bs-secondary-text-emphasis);--bs-alert-bg: var(--bs-secondary-bg-subtle);--bs-alert-border-color: var(--bs-secondary-border-subtle);--bs-alert-link-color: var(--bs-secondary-text-emphasis)}.alert-success{--bs-alert-color: var(--bs-success-text-emphasis);--bs-alert-bg: var(--bs-success-bg-subtle);--bs-alert-border-color: var(--bs-success-border-subtle);--bs-alert-link-color: var(--bs-success-text-emphasis)}.alert-info{--bs-alert-color: var(--bs-info-text-emphasis);--bs-alert-bg: var(--bs-info-bg-subtle);--bs-alert-border-color: var(--bs-info-border-subtle);--bs-alert-link-color: var(--bs-info-text-emphasis)}.alert-warning{--bs-alert-color: var(--bs-warning-text-emphasis);--bs-alert-bg: var(--bs-warning-bg-subtle);--bs-alert-border-color: var(--bs-warning-border-subtle);--bs-alert-link-color: var(--bs-warning-text-emphasis)}.alert-danger{--bs-alert-color: var(--bs-danger-text-emphasis);--bs-alert-bg: var(--bs-danger-bg-subtle);--bs-alert-border-color: var(--bs-danger-border-subtle);--bs-alert-link-color: var(--bs-danger-text-emphasis)}.alert-light{--bs-alert-color: var(--bs-light-text-emphasis);--bs-alert-bg: var(--bs-light-bg-subtle);--bs-alert-border-color: var(--bs-light-border-subtle);--bs-alert-link-color: var(--bs-light-text-emphasis)}.alert-dark{--bs-alert-color: var(--bs-dark-text-emphasis);--bs-alert-bg: var(--bs-dark-bg-subtle);--bs-alert-border-color: var(--bs-dark-border-subtle);--bs-alert-link-color: var(--bs-dark-text-emphasis)}@keyframes progress-bar-stripes{0%{background-position-x:.5rem}}.progress,.progress-stacked{--bs-progress-height: 0.5rem;--bs-progress-font-size:0.75rem;--bs-progress-bg: #e9ecef;--bs-progress-border-radius: 0.25rem;--bs-progress-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.075);--bs-progress-bar-color: #fff;--bs-progress-bar-bg: #2780e3;--bs-progress-bar-transition: width 0.6s ease;display:flex;display:-webkit-flex;height:var(--bs-progress-height);overflow:hidden;font-size:var(--bs-progress-font-size);background-color:var(--bs-progress-bg)}.progress-bar{display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;justify-content:center;-webkit-justify-content:center;overflow:hidden;color:var(--bs-progress-bar-color);text-align:center;white-space:nowrap;background-color:var(--bs-progress-bar-bg);transition:var(--bs-progress-bar-transition)}@media(prefers-reduced-motion: reduce){.progress-bar{transition:none}}.progress-bar-striped{background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-size:var(--bs-progress-height) var(--bs-progress-height)}.progress-stacked>.progress{overflow:visible}.progress-stacked>.progress>.progress-bar{width:100%}.progress-bar-animated{animation:1s linear infinite progress-bar-stripes}@media(prefers-reduced-motion: reduce){.progress-bar-animated{animation:none}}.list-group{--bs-list-group-color: #FAF1E4;--bs-list-group-bg: #181818;--bs-list-group-border-color: #dee2e6;--bs-list-group-border-width: 1px;--bs-list-group-border-radius: 0.25rem;--bs-list-group-item-padding-x: 1rem;--bs-list-group-item-padding-y: 0.5rem;--bs-list-group-action-color: rgba(250, 241, 228, 0.75);--bs-list-group-action-hover-color: #000;--bs-list-group-action-hover-bg: #f8f9fa;--bs-list-group-action-active-color: #FAF1E4;--bs-list-group-action-active-bg: #e9ecef;--bs-list-group-disabled-color: rgba(250, 241, 228, 0.75);--bs-list-group-disabled-bg: #181818;--bs-list-group-active-color: #fff;--bs-list-group-active-bg: #2780e3;--bs-list-group-active-border-color: #2780e3;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;padding-left:0;margin-bottom:0}.list-group-numbered{list-style-type:none;counter-reset:section}.list-group-numbered>.list-group-item::before{content:counters(section, ".") ". ";counter-increment:section}.list-group-item-action{width:100%;color:var(--bs-list-group-action-color);text-align:inherit}.list-group-item-action:hover,.list-group-item-action:focus{z-index:1;color:var(--bs-list-group-action-hover-color);text-decoration:none;background-color:var(--bs-list-group-action-hover-bg)}.list-group-item-action:active{color:var(--bs-list-group-action-active-color);background-color:var(--bs-list-group-action-active-bg)}.list-group-item{position:relative;display:block;padding:var(--bs-list-group-item-padding-y) var(--bs-list-group-item-padding-x);color:var(--bs-list-group-color);text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background-color:var(--bs-list-group-bg);border:var(--bs-list-group-border-width) solid var(--bs-list-group-border-color)}.list-group-item.disabled,.list-group-item:disabled{color:var(--bs-list-group-disabled-color);pointer-events:none;background-color:var(--bs-list-group-disabled-bg)}.list-group-item.active{z-index:2;color:var(--bs-list-group-active-color);background-color:var(--bs-list-group-active-bg);border-color:var(--bs-list-group-active-border-color)}.list-group-item+.list-group-item{border-top-width:0}.list-group-item+.list-group-item.active{margin-top:calc(-1*var(--bs-list-group-border-width));border-top-width:var(--bs-list-group-border-width)}.list-group-horizontal{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal>.list-group-item.active{margin-top:0}.list-group-horizontal>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}@media(min-width: 576px){.list-group-horizontal-sm{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-sm>.list-group-item.active{margin-top:0}.list-group-horizontal-sm>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-sm>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 768px){.list-group-horizontal-md{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-md>.list-group-item.active{margin-top:0}.list-group-horizontal-md>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-md>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 992px){.list-group-horizontal-lg{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-lg>.list-group-item.active{margin-top:0}.list-group-horizontal-lg>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-lg>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 1200px){.list-group-horizontal-xl{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-xl>.list-group-item.active{margin-top:0}.list-group-horizontal-xl>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-xl>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}@media(min-width: 1400px){.list-group-horizontal-xxl{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-xxl>.list-group-item.active{margin-top:0}.list-group-horizontal-xxl>.list-group-item+.list-group-item{border-top-width:var(--bs-list-group-border-width);border-left-width:0}.list-group-horizontal-xxl>.list-group-item+.list-group-item.active{margin-left:calc(-1*var(--bs-list-group-border-width));border-left-width:var(--bs-list-group-border-width)}}.list-group-flush>.list-group-item{border-width:0 0 var(--bs-list-group-border-width)}.list-group-flush>.list-group-item:last-child{border-bottom-width:0}.list-group-item-default{--bs-list-group-color: var(--bs-default-text-emphasis);--bs-list-group-bg: var(--bs-default-bg-subtle);--bs-list-group-border-color: var(--bs-default-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-default-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-default-border-subtle);--bs-list-group-active-color: var(--bs-default-bg-subtle);--bs-list-group-active-bg: var(--bs-default-text-emphasis);--bs-list-group-active-border-color: var(--bs-default-text-emphasis)}.list-group-item-primary{--bs-list-group-color: var(--bs-primary-text-emphasis);--bs-list-group-bg: var(--bs-primary-bg-subtle);--bs-list-group-border-color: var(--bs-primary-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-primary-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-primary-border-subtle);--bs-list-group-active-color: var(--bs-primary-bg-subtle);--bs-list-group-active-bg: var(--bs-primary-text-emphasis);--bs-list-group-active-border-color: var(--bs-primary-text-emphasis)}.list-group-item-secondary{--bs-list-group-color: var(--bs-secondary-text-emphasis);--bs-list-group-bg: var(--bs-secondary-bg-subtle);--bs-list-group-border-color: var(--bs-secondary-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-secondary-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-secondary-border-subtle);--bs-list-group-active-color: var(--bs-secondary-bg-subtle);--bs-list-group-active-bg: var(--bs-secondary-text-emphasis);--bs-list-group-active-border-color: var(--bs-secondary-text-emphasis)}.list-group-item-success{--bs-list-group-color: var(--bs-success-text-emphasis);--bs-list-group-bg: var(--bs-success-bg-subtle);--bs-list-group-border-color: var(--bs-success-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-success-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-success-border-subtle);--bs-list-group-active-color: var(--bs-success-bg-subtle);--bs-list-group-active-bg: var(--bs-success-text-emphasis);--bs-list-group-active-border-color: var(--bs-success-text-emphasis)}.list-group-item-info{--bs-list-group-color: var(--bs-info-text-emphasis);--bs-list-group-bg: var(--bs-info-bg-subtle);--bs-list-group-border-color: var(--bs-info-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-info-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-info-border-subtle);--bs-list-group-active-color: var(--bs-info-bg-subtle);--bs-list-group-active-bg: var(--bs-info-text-emphasis);--bs-list-group-active-border-color: var(--bs-info-text-emphasis)}.list-group-item-warning{--bs-list-group-color: var(--bs-warning-text-emphasis);--bs-list-group-bg: var(--bs-warning-bg-subtle);--bs-list-group-border-color: var(--bs-warning-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-warning-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-warning-border-subtle);--bs-list-group-active-color: var(--bs-warning-bg-subtle);--bs-list-group-active-bg: var(--bs-warning-text-emphasis);--bs-list-group-active-border-color: var(--bs-warning-text-emphasis)}.list-group-item-danger{--bs-list-group-color: var(--bs-danger-text-emphasis);--bs-list-group-bg: var(--bs-danger-bg-subtle);--bs-list-group-border-color: var(--bs-danger-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-danger-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-danger-border-subtle);--bs-list-group-active-color: var(--bs-danger-bg-subtle);--bs-list-group-active-bg: var(--bs-danger-text-emphasis);--bs-list-group-active-border-color: var(--bs-danger-text-emphasis)}.list-group-item-light{--bs-list-group-color: var(--bs-light-text-emphasis);--bs-list-group-bg: var(--bs-light-bg-subtle);--bs-list-group-border-color: var(--bs-light-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-light-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-light-border-subtle);--bs-list-group-active-color: var(--bs-light-bg-subtle);--bs-list-group-active-bg: var(--bs-light-text-emphasis);--bs-list-group-active-border-color: var(--bs-light-text-emphasis)}.list-group-item-dark{--bs-list-group-color: var(--bs-dark-text-emphasis);--bs-list-group-bg: var(--bs-dark-bg-subtle);--bs-list-group-border-color: var(--bs-dark-border-subtle);--bs-list-group-action-hover-color: var(--bs-emphasis-color);--bs-list-group-action-hover-bg: var(--bs-dark-border-subtle);--bs-list-group-action-active-color: var(--bs-emphasis-color);--bs-list-group-action-active-bg: var(--bs-dark-border-subtle);--bs-list-group-active-color: var(--bs-dark-bg-subtle);--bs-list-group-active-bg: var(--bs-dark-text-emphasis);--bs-list-group-active-border-color: var(--bs-dark-text-emphasis)}.btn-close{--bs-btn-close-color: #000;--bs-btn-close-bg: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23000'%3e%3cpath d='M.293.293a1 1 0 0 1 1.414 0L8 6.586 14.293.293a1 1 0 1 1 1.414 1.414L9.414 8l6.293 6.293a1 1 0 0 1-1.414 1.414L8 9.414l-6.293 6.293a1 1 0 0 1-1.414-1.414L6.586 8 .293 1.707a1 1 0 0 1 0-1.414z'/%3e%3c/svg%3e");--bs-btn-close-opacity: 0.5;--bs-btn-close-hover-opacity: 0.75;--bs-btn-close-focus-shadow: 0 0 0 0.25rem rgba(39, 128, 227, 0.25);--bs-btn-close-focus-opacity: 1;--bs-btn-close-disabled-opacity: 0.25;--bs-btn-close-white-filter: invert(1) grayscale(100%) brightness(200%);box-sizing:content-box;width:1em;height:1em;padding:.25em .25em;color:var(--bs-btn-close-color);background:rgba(0,0,0,0) var(--bs-btn-close-bg) center/1em auto no-repeat;border:0;opacity:var(--bs-btn-close-opacity)}.btn-close:hover{color:var(--bs-btn-close-color);text-decoration:none;opacity:var(--bs-btn-close-hover-opacity)}.btn-close:focus{outline:0;box-shadow:var(--bs-btn-close-focus-shadow);opacity:var(--bs-btn-close-focus-opacity)}.btn-close:disabled,.btn-close.disabled{pointer-events:none;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;opacity:var(--bs-btn-close-disabled-opacity)}.btn-close-white{filter:var(--bs-btn-close-white-filter)}[data-bs-theme=dark] .btn-close{filter:var(--bs-btn-close-white-filter)}.toast{--bs-toast-zindex: 1090;--bs-toast-padding-x: 0.75rem;--bs-toast-padding-y: 0.5rem;--bs-toast-spacing: 1.5rem;--bs-toast-max-width: 350px;--bs-toast-font-size:0.875rem;--bs-toast-color: ;--bs-toast-bg: rgba(24, 24, 24, 0.85);--bs-toast-border-width: 1px;--bs-toast-border-color: rgba(0, 0, 0, 0.175);--bs-toast-border-radius: 0.25rem;--bs-toast-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-toast-header-color: rgba(250, 241, 228, 0.75);--bs-toast-header-bg: rgba(24, 24, 24, 0.85);--bs-toast-header-border-color: rgba(0, 0, 0, 0.175);width:var(--bs-toast-max-width);max-width:100%;font-size:var(--bs-toast-font-size);color:var(--bs-toast-color);pointer-events:auto;background-color:var(--bs-toast-bg);background-clip:padding-box;border:var(--bs-toast-border-width) solid var(--bs-toast-border-color);box-shadow:var(--bs-toast-box-shadow)}.toast.showing{opacity:0}.toast:not(.show){display:none}.toast-container{--bs-toast-zindex: 1090;position:absolute;z-index:var(--bs-toast-zindex);width:max-content;width:-webkit-max-content;width:-moz-max-content;width:-ms-max-content;width:-o-max-content;max-width:100%;pointer-events:none}.toast-container>:not(:last-child){margin-bottom:var(--bs-toast-spacing)}.toast-header{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;padding:var(--bs-toast-padding-y) var(--bs-toast-padding-x);color:var(--bs-toast-header-color);background-color:var(--bs-toast-header-bg);background-clip:padding-box;border-bottom:var(--bs-toast-border-width) solid var(--bs-toast-header-border-color)}.toast-header .btn-close{margin-right:calc(-0.5*var(--bs-toast-padding-x));margin-left:var(--bs-toast-padding-x)}.toast-body{padding:var(--bs-toast-padding-x);word-wrap:break-word}.modal{--bs-modal-zindex: 1055;--bs-modal-width: 500px;--bs-modal-padding: 1rem;--bs-modal-margin: 0.5rem;--bs-modal-color: ;--bs-modal-bg: #181818;--bs-modal-border-color: rgba(0, 0, 0, 0.175);--bs-modal-border-width: 1px;--bs-modal-border-radius: 0.5rem;--bs-modal-box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);--bs-modal-inner-border-radius: calc(0.5rem - 1px);--bs-modal-header-padding-x: 1rem;--bs-modal-header-padding-y: 1rem;--bs-modal-header-padding: 1rem 1rem;--bs-modal-header-border-color: #dee2e6;--bs-modal-header-border-width: 1px;--bs-modal-title-line-height: 1.5;--bs-modal-footer-gap: 0.5rem;--bs-modal-footer-bg: ;--bs-modal-footer-border-color: #dee2e6;--bs-modal-footer-border-width: 1px;position:fixed;top:0;left:0;z-index:var(--bs-modal-zindex);display:none;width:100%;height:100%;overflow-x:hidden;overflow-y:auto;outline:0}.modal-dialog{position:relative;width:auto;margin:var(--bs-modal-margin);pointer-events:none}.modal.fade .modal-dialog{transition:transform .3s ease-out;transform:translate(0, -50px)}@media(prefers-reduced-motion: reduce){.modal.fade .modal-dialog{transition:none}}.modal.show .modal-dialog{transform:none}.modal.modal-static .modal-dialog{transform:scale(1.02)}.modal-dialog-scrollable{height:calc(100% - var(--bs-modal-margin)*2)}.modal-dialog-scrollable .modal-content{max-height:100%;overflow:hidden}.modal-dialog-scrollable .modal-body{overflow-y:auto}.modal-dialog-centered{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;min-height:calc(100% - var(--bs-modal-margin)*2)}.modal-content{position:relative;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;width:100%;color:var(--bs-modal-color);pointer-events:auto;background-color:var(--bs-modal-bg);background-clip:padding-box;border:var(--bs-modal-border-width) solid var(--bs-modal-border-color);outline:0}.modal-backdrop{--bs-backdrop-zindex: 1050;--bs-backdrop-bg: #000;--bs-backdrop-opacity: 0.5;position:fixed;top:0;left:0;z-index:var(--bs-backdrop-zindex);width:100vw;height:100vh;background-color:var(--bs-backdrop-bg)}.modal-backdrop.fade{opacity:0}.modal-backdrop.show{opacity:var(--bs-backdrop-opacity)}.modal-header{display:flex;display:-webkit-flex;flex-shrink:0;-webkit-flex-shrink:0;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:var(--bs-modal-header-padding);border-bottom:var(--bs-modal-header-border-width) solid var(--bs-modal-header-border-color)}.modal-header .btn-close{padding:calc(var(--bs-modal-header-padding-y)*.5) calc(var(--bs-modal-header-padding-x)*.5);margin:calc(-0.5*var(--bs-modal-header-padding-y)) calc(-0.5*var(--bs-modal-header-padding-x)) calc(-0.5*var(--bs-modal-header-padding-y)) auto}.modal-title{margin-bottom:0;line-height:var(--bs-modal-title-line-height)}.modal-body{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto;padding:var(--bs-modal-padding)}.modal-footer{display:flex;display:-webkit-flex;flex-shrink:0;-webkit-flex-shrink:0;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:center;-webkit-align-items:center;justify-content:flex-end;-webkit-justify-content:flex-end;padding:calc(var(--bs-modal-padding) - var(--bs-modal-footer-gap)*.5);background-color:var(--bs-modal-footer-bg);border-top:var(--bs-modal-footer-border-width) solid var(--bs-modal-footer-border-color)}.modal-footer>*{margin:calc(var(--bs-modal-footer-gap)*.5)}@media(min-width: 576px){.modal{--bs-modal-margin: 1.75rem;--bs-modal-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15)}.modal-dialog{max-width:var(--bs-modal-width);margin-right:auto;margin-left:auto}.modal-sm{--bs-modal-width: 300px}}@media(min-width: 992px){.modal-lg,.modal-xl{--bs-modal-width: 800px}}@media(min-width: 1200px){.modal-xl{--bs-modal-width: 1140px}}.modal-fullscreen{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen .modal-content{height:100%;border:0}.modal-fullscreen .modal-body{overflow-y:auto}@media(max-width: 575.98px){.modal-fullscreen-sm-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-sm-down .modal-content{height:100%;border:0}.modal-fullscreen-sm-down .modal-body{overflow-y:auto}}@media(max-width: 767.98px){.modal-fullscreen-md-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-md-down .modal-content{height:100%;border:0}.modal-fullscreen-md-down .modal-body{overflow-y:auto}}@media(max-width: 991.98px){.modal-fullscreen-lg-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-lg-down .modal-content{height:100%;border:0}.modal-fullscreen-lg-down .modal-body{overflow-y:auto}}@media(max-width: 1199.98px){.modal-fullscreen-xl-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-xl-down .modal-content{height:100%;border:0}.modal-fullscreen-xl-down .modal-body{overflow-y:auto}}@media(max-width: 1399.98px){.modal-fullscreen-xxl-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-xxl-down .modal-content{height:100%;border:0}.modal-fullscreen-xxl-down .modal-body{overflow-y:auto}}.tooltip{--bs-tooltip-zindex: 1080;--bs-tooltip-max-width: 200px;--bs-tooltip-padding-x: 0.5rem;--bs-tooltip-padding-y: 0.25rem;--bs-tooltip-margin: ;--bs-tooltip-font-size:0.875rem;--bs-tooltip-color: #181818;--bs-tooltip-bg: #000;--bs-tooltip-border-radius: 0.25rem;--bs-tooltip-opacity: 0.9;--bs-tooltip-arrow-width: 0.8rem;--bs-tooltip-arrow-height: 0.4rem;z-index:var(--bs-tooltip-zindex);display:block;margin:var(--bs-tooltip-margin);font-family:"Source Sans Pro",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";font-style:normal;font-weight:400;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;white-space:normal;word-spacing:normal;line-break:auto;font-size:var(--bs-tooltip-font-size);word-wrap:break-word;opacity:0}.tooltip.show{opacity:var(--bs-tooltip-opacity)}.tooltip .tooltip-arrow{display:block;width:var(--bs-tooltip-arrow-width);height:var(--bs-tooltip-arrow-height)}.tooltip .tooltip-arrow::before{position:absolute;content:"";border-color:rgba(0,0,0,0);border-style:solid}.bs-tooltip-top .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=top] .tooltip-arrow{bottom:calc(-1*var(--bs-tooltip-arrow-height))}.bs-tooltip-top .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=top] .tooltip-arrow::before{top:-1px;border-width:var(--bs-tooltip-arrow-height) calc(var(--bs-tooltip-arrow-width)*.5) 0;border-top-color:var(--bs-tooltip-bg)}.bs-tooltip-end .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=right] .tooltip-arrow{left:calc(-1*var(--bs-tooltip-arrow-height));width:var(--bs-tooltip-arrow-height);height:var(--bs-tooltip-arrow-width)}.bs-tooltip-end .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=right] .tooltip-arrow::before{right:-1px;border-width:calc(var(--bs-tooltip-arrow-width)*.5) var(--bs-tooltip-arrow-height) calc(var(--bs-tooltip-arrow-width)*.5) 0;border-right-color:var(--bs-tooltip-bg)}.bs-tooltip-bottom .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=bottom] .tooltip-arrow{top:calc(-1*var(--bs-tooltip-arrow-height))}.bs-tooltip-bottom .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=bottom] .tooltip-arrow::before{bottom:-1px;border-width:0 calc(var(--bs-tooltip-arrow-width)*.5) var(--bs-tooltip-arrow-height);border-bottom-color:var(--bs-tooltip-bg)}.bs-tooltip-start .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=left] .tooltip-arrow{right:calc(-1*var(--bs-tooltip-arrow-height));width:var(--bs-tooltip-arrow-height);height:var(--bs-tooltip-arrow-width)}.bs-tooltip-start .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=left] .tooltip-arrow::before{left:-1px;border-width:calc(var(--bs-tooltip-arrow-width)*.5) 0 calc(var(--bs-tooltip-arrow-width)*.5) var(--bs-tooltip-arrow-height);border-left-color:var(--bs-tooltip-bg)}.tooltip-inner{max-width:var(--bs-tooltip-max-width);padding:var(--bs-tooltip-padding-y) var(--bs-tooltip-padding-x);color:var(--bs-tooltip-color);text-align:center;background-color:var(--bs-tooltip-bg)}.popover{--bs-popover-zindex: 1070;--bs-popover-max-width: 276px;--bs-popover-font-size:0.875rem;--bs-popover-bg: #242424;--bs-popover-border-width: 1px;--bs-popover-border-color: rgba(0, 0, 0, 0.175);--bs-popover-border-radius: 0.5rem;--bs-popover-inner-border-radius: calc(0.5rem - 1px);--bs-popover-box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);--bs-popover-header-padding-x: 1rem;--bs-popover-header-padding-y: 0.5rem;--bs-popover-header-font-size:1rem;--bs-popover-header-color: inherit;--bs-popover-header-bg: #e9ecef;--bs-popover-body-padding-x: 1rem;--bs-popover-body-padding-y: 1rem;--bs-popover-body-color: #FAF1E4;--bs-popover-arrow-width: 1rem;--bs-popover-arrow-height: 0.5rem;--bs-popover-arrow-border: var(--bs-popover-border-color);z-index:var(--bs-popover-zindex);display:block;max-width:var(--bs-popover-max-width);font-family:"Source Sans Pro",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";font-style:normal;font-weight:400;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;white-space:normal;word-spacing:normal;line-break:auto;font-size:var(--bs-popover-font-size);word-wrap:break-word;background-color:var(--bs-popover-bg);background-clip:padding-box;border:var(--bs-popover-border-width) solid var(--bs-popover-border-color)}.popover .popover-arrow{display:block;width:var(--bs-popover-arrow-width);height:var(--bs-popover-arrow-height)}.popover .popover-arrow::before,.popover .popover-arrow::after{position:absolute;display:block;content:"";border-color:rgba(0,0,0,0);border-style:solid;border-width:0}.bs-popover-top>.popover-arrow,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow{bottom:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width))}.bs-popover-top>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::before,.bs-popover-top>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::after{border-width:var(--bs-popover-arrow-height) calc(var(--bs-popover-arrow-width)*.5) 0}.bs-popover-top>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::before{bottom:0;border-top-color:var(--bs-popover-arrow-border)}.bs-popover-top>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::after{bottom:var(--bs-popover-border-width);border-top-color:var(--bs-popover-bg)}.bs-popover-end>.popover-arrow,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow{left:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width));width:var(--bs-popover-arrow-height);height:var(--bs-popover-arrow-width)}.bs-popover-end>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::before,.bs-popover-end>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::after{border-width:calc(var(--bs-popover-arrow-width)*.5) var(--bs-popover-arrow-height) calc(var(--bs-popover-arrow-width)*.5) 0}.bs-popover-end>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::before{left:0;border-right-color:var(--bs-popover-arrow-border)}.bs-popover-end>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::after{left:var(--bs-popover-border-width);border-right-color:var(--bs-popover-bg)}.bs-popover-bottom>.popover-arrow,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow{top:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width))}.bs-popover-bottom>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::before,.bs-popover-bottom>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::after{border-width:0 calc(var(--bs-popover-arrow-width)*.5) var(--bs-popover-arrow-height)}.bs-popover-bottom>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::before{top:0;border-bottom-color:var(--bs-popover-arrow-border)}.bs-popover-bottom>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::after{top:var(--bs-popover-border-width);border-bottom-color:var(--bs-popover-bg)}.bs-popover-bottom .popover-header::before,.bs-popover-auto[data-popper-placement^=bottom] .popover-header::before{position:absolute;top:0;left:50%;display:block;width:var(--bs-popover-arrow-width);margin-left:calc(-0.5*var(--bs-popover-arrow-width));content:"";border-bottom:var(--bs-popover-border-width) solid var(--bs-popover-header-bg)}.bs-popover-start>.popover-arrow,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow{right:calc(-1*(var(--bs-popover-arrow-height)) - var(--bs-popover-border-width));width:var(--bs-popover-arrow-height);height:var(--bs-popover-arrow-width)}.bs-popover-start>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::before,.bs-popover-start>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::after{border-width:calc(var(--bs-popover-arrow-width)*.5) 0 calc(var(--bs-popover-arrow-width)*.5) var(--bs-popover-arrow-height)}.bs-popover-start>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::before{right:0;border-left-color:var(--bs-popover-arrow-border)}.bs-popover-start>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::after{right:var(--bs-popover-border-width);border-left-color:var(--bs-popover-bg)}.popover-header{padding:var(--bs-popover-header-padding-y) var(--bs-popover-header-padding-x);margin-bottom:0;font-size:var(--bs-popover-header-font-size);color:var(--bs-popover-header-color);background-color:var(--bs-popover-header-bg);border-bottom:var(--bs-popover-border-width) solid var(--bs-popover-border-color)}.popover-header:empty{display:none}.popover-body{padding:var(--bs-popover-body-padding-y) var(--bs-popover-body-padding-x);color:var(--bs-popover-body-color)}.carousel{position:relative}.carousel.pointer-event{touch-action:pan-y;-webkit-touch-action:pan-y;-moz-touch-action:pan-y;-ms-touch-action:pan-y;-o-touch-action:pan-y}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner::after{display:block;clear:both;content:""}.carousel-item{position:relative;display:none;float:left;width:100%;margin-right:-100%;backface-visibility:hidden;-webkit-backface-visibility:hidden;-moz-backface-visibility:hidden;-ms-backface-visibility:hidden;-o-backface-visibility:hidden;transition:transform .6s ease-in-out}@media(prefers-reduced-motion: reduce){.carousel-item{transition:none}}.carousel-item.active,.carousel-item-next,.carousel-item-prev{display:block}.carousel-item-next:not(.carousel-item-start),.active.carousel-item-end{transform:translateX(100%)}.carousel-item-prev:not(.carousel-item-end),.active.carousel-item-start{transform:translateX(-100%)}.carousel-fade .carousel-item{opacity:0;transition-property:opacity;transform:none}.carousel-fade .carousel-item.active,.carousel-fade .carousel-item-next.carousel-item-start,.carousel-fade .carousel-item-prev.carousel-item-end{z-index:1;opacity:1}.carousel-fade .active.carousel-item-start,.carousel-fade .active.carousel-item-end{z-index:0;opacity:0;transition:opacity 0s .6s}@media(prefers-reduced-motion: reduce){.carousel-fade .active.carousel-item-start,.carousel-fade .active.carousel-item-end{transition:none}}.carousel-control-prev,.carousel-control-next{position:absolute;top:0;bottom:0;z-index:1;display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;justify-content:center;-webkit-justify-content:center;width:15%;padding:0;color:#fff;text-align:center;background:none;border:0;opacity:.5;transition:opacity .15s ease}@media(prefers-reduced-motion: reduce){.carousel-control-prev,.carousel-control-next{transition:none}}.carousel-control-prev:hover,.carousel-control-prev:focus,.carousel-control-next:hover,.carousel-control-next:focus{color:#fff;text-decoration:none;outline:0;opacity:.9}.carousel-control-prev{left:0}.carousel-control-next{right:0}.carousel-control-prev-icon,.carousel-control-next-icon{display:inline-block;width:2rem;height:2rem;background-repeat:no-repeat;background-position:50%;background-size:100% 100%}.carousel-control-prev-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23fff'%3e%3cpath d='M11.354 1.646a.5.5 0 0 1 0 .708L5.707 8l5.647 5.646a.5.5 0 0 1-.708.708l-6-6a.5.5 0 0 1 0-.708l6-6a.5.5 0 0 1 .708 0z'/%3e%3c/svg%3e")}.carousel-control-next-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23fff'%3e%3cpath d='M4.646 1.646a.5.5 0 0 1 .708 0l6 6a.5.5 0 0 1 0 .708l-6 6a.5.5 0 0 1-.708-.708L10.293 8 4.646 2.354a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e")}.carousel-indicators{position:absolute;right:0;bottom:0;left:0;z-index:2;display:flex;display:-webkit-flex;justify-content:center;-webkit-justify-content:center;padding:0;margin-right:15%;margin-bottom:1rem;margin-left:15%}.carousel-indicators [data-bs-target]{box-sizing:content-box;flex:0 1 auto;-webkit-flex:0 1 auto;width:30px;height:3px;padding:0;margin-right:3px;margin-left:3px;text-indent:-999px;cursor:pointer;background-color:#fff;background-clip:padding-box;border:0;border-top:10px solid rgba(0,0,0,0);border-bottom:10px solid rgba(0,0,0,0);opacity:.5;transition:opacity .6s ease}@media(prefers-reduced-motion: reduce){.carousel-indicators [data-bs-target]{transition:none}}.carousel-indicators .active{opacity:1}.carousel-caption{position:absolute;right:15%;bottom:1.25rem;left:15%;padding-top:1.25rem;padding-bottom:1.25rem;color:#fff;text-align:center}.carousel-dark .carousel-control-prev-icon,.carousel-dark .carousel-control-next-icon{filter:invert(1) grayscale(100)}.carousel-dark .carousel-indicators [data-bs-target]{background-color:#000}.carousel-dark .carousel-caption{color:#000}[data-bs-theme=dark] .carousel .carousel-control-prev-icon,[data-bs-theme=dark] .carousel .carousel-control-next-icon,[data-bs-theme=dark].carousel .carousel-control-prev-icon,[data-bs-theme=dark].carousel .carousel-control-next-icon{filter:invert(1) grayscale(100)}[data-bs-theme=dark] .carousel .carousel-indicators [data-bs-target],[data-bs-theme=dark].carousel .carousel-indicators [data-bs-target]{background-color:#000}[data-bs-theme=dark] .carousel .carousel-caption,[data-bs-theme=dark].carousel .carousel-caption{color:#000}.spinner-grow,.spinner-border{display:inline-block;width:var(--bs-spinner-width);height:var(--bs-spinner-height);vertical-align:var(--bs-spinner-vertical-align);border-radius:50%;animation:var(--bs-spinner-animation-speed) linear infinite var(--bs-spinner-animation-name)}@keyframes spinner-border{to{transform:rotate(360deg) /* rtl:ignore */}}.spinner-border{--bs-spinner-width: 2rem;--bs-spinner-height: 2rem;--bs-spinner-vertical-align: -0.125em;--bs-spinner-border-width: 0.25em;--bs-spinner-animation-speed: 0.75s;--bs-spinner-animation-name: spinner-border;border:var(--bs-spinner-border-width) solid currentcolor;border-right-color:rgba(0,0,0,0)}.spinner-border-sm{--bs-spinner-width: 1rem;--bs-spinner-height: 1rem;--bs-spinner-border-width: 0.2em}@keyframes spinner-grow{0%{transform:scale(0)}50%{opacity:1;transform:none}}.spinner-grow{--bs-spinner-width: 2rem;--bs-spinner-height: 2rem;--bs-spinner-vertical-align: -0.125em;--bs-spinner-animation-speed: 0.75s;--bs-spinner-animation-name: spinner-grow;background-color:currentcolor;opacity:0}.spinner-grow-sm{--bs-spinner-width: 1rem;--bs-spinner-height: 1rem}@media(prefers-reduced-motion: reduce){.spinner-border,.spinner-grow{--bs-spinner-animation-speed: 1.5s}}.offcanvas,.offcanvas-xxl,.offcanvas-xl,.offcanvas-lg,.offcanvas-md,.offcanvas-sm{--bs-offcanvas-zindex: 1045;--bs-offcanvas-width: 400px;--bs-offcanvas-height: 30vh;--bs-offcanvas-padding-x: 1rem;--bs-offcanvas-padding-y: 1rem;--bs-offcanvas-color: #FAF1E4;--bs-offcanvas-bg: #181818;--bs-offcanvas-border-width: 1px;--bs-offcanvas-border-color: rgba(0, 0, 0, 0.175);--bs-offcanvas-box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075);--bs-offcanvas-transition: transform 0.3s ease-in-out;--bs-offcanvas-title-line-height: 1.5}@media(max-width: 575.98px){.offcanvas-sm{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 575.98px)and (prefers-reduced-motion: reduce){.offcanvas-sm{transition:none}}@media(max-width: 575.98px){.offcanvas-sm.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-sm.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-sm.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-sm.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-sm.showing,.offcanvas-sm.show:not(.hiding){transform:none}.offcanvas-sm.showing,.offcanvas-sm.hiding,.offcanvas-sm.show{visibility:visible}}@media(min-width: 576px){.offcanvas-sm{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-sm .offcanvas-header{display:none}.offcanvas-sm .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 767.98px){.offcanvas-md{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 767.98px)and (prefers-reduced-motion: reduce){.offcanvas-md{transition:none}}@media(max-width: 767.98px){.offcanvas-md.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-md.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-md.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-md.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-md.showing,.offcanvas-md.show:not(.hiding){transform:none}.offcanvas-md.showing,.offcanvas-md.hiding,.offcanvas-md.show{visibility:visible}}@media(min-width: 768px){.offcanvas-md{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-md .offcanvas-header{display:none}.offcanvas-md .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 991.98px){.offcanvas-lg{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 991.98px)and (prefers-reduced-motion: reduce){.offcanvas-lg{transition:none}}@media(max-width: 991.98px){.offcanvas-lg.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-lg.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-lg.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-lg.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-lg.showing,.offcanvas-lg.show:not(.hiding){transform:none}.offcanvas-lg.showing,.offcanvas-lg.hiding,.offcanvas-lg.show{visibility:visible}}@media(min-width: 992px){.offcanvas-lg{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-lg .offcanvas-header{display:none}.offcanvas-lg .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 1199.98px){.offcanvas-xl{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 1199.98px)and (prefers-reduced-motion: reduce){.offcanvas-xl{transition:none}}@media(max-width: 1199.98px){.offcanvas-xl.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-xl.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-xl.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-xl.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-xl.showing,.offcanvas-xl.show:not(.hiding){transform:none}.offcanvas-xl.showing,.offcanvas-xl.hiding,.offcanvas-xl.show{visibility:visible}}@media(min-width: 1200px){.offcanvas-xl{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-xl .offcanvas-header{display:none}.offcanvas-xl .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}@media(max-width: 1399.98px){.offcanvas-xxl{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}}@media(max-width: 1399.98px)and (prefers-reduced-motion: reduce){.offcanvas-xxl{transition:none}}@media(max-width: 1399.98px){.offcanvas-xxl.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas-xxl.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas-xxl.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas-xxl.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas-xxl.showing,.offcanvas-xxl.show:not(.hiding){transform:none}.offcanvas-xxl.showing,.offcanvas-xxl.hiding,.offcanvas-xxl.show{visibility:visible}}@media(min-width: 1400px){.offcanvas-xxl{--bs-offcanvas-height: auto;--bs-offcanvas-border-width: 0;background-color:rgba(0,0,0,0) !important}.offcanvas-xxl .offcanvas-header{display:none}.offcanvas-xxl .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible;background-color:rgba(0,0,0,0) !important}}.offcanvas{position:fixed;bottom:0;z-index:var(--bs-offcanvas-zindex);display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;color:var(--bs-offcanvas-color);visibility:hidden;background-color:var(--bs-offcanvas-bg);background-clip:padding-box;outline:0;transition:var(--bs-offcanvas-transition)}@media(prefers-reduced-motion: reduce){.offcanvas{transition:none}}.offcanvas.offcanvas-start{top:0;left:0;width:var(--bs-offcanvas-width);border-right:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(-100%)}.offcanvas.offcanvas-end{top:0;right:0;width:var(--bs-offcanvas-width);border-left:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateX(100%)}.offcanvas.offcanvas-top{top:0;right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-bottom:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(-100%)}.offcanvas.offcanvas-bottom{right:0;left:0;height:var(--bs-offcanvas-height);max-height:100%;border-top:var(--bs-offcanvas-border-width) solid var(--bs-offcanvas-border-color);transform:translateY(100%)}.offcanvas.showing,.offcanvas.show:not(.hiding){transform:none}.offcanvas.showing,.offcanvas.hiding,.offcanvas.show{visibility:visible}.offcanvas-backdrop{position:fixed;top:0;left:0;z-index:1040;width:100vw;height:100vh;background-color:#000}.offcanvas-backdrop.fade{opacity:0}.offcanvas-backdrop.show{opacity:.5}.offcanvas-header{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:var(--bs-offcanvas-padding-y) var(--bs-offcanvas-padding-x)}.offcanvas-header .btn-close{padding:calc(var(--bs-offcanvas-padding-y)*.5) calc(var(--bs-offcanvas-padding-x)*.5);margin-top:calc(-0.5*var(--bs-offcanvas-padding-y));margin-right:calc(-0.5*var(--bs-offcanvas-padding-x));margin-bottom:calc(-0.5*var(--bs-offcanvas-padding-y))}.offcanvas-title{margin-bottom:0;line-height:var(--bs-offcanvas-title-line-height)}.offcanvas-body{flex-grow:1;-webkit-flex-grow:1;padding:var(--bs-offcanvas-padding-y) var(--bs-offcanvas-padding-x);overflow-y:auto}.placeholder{display:inline-block;min-height:1em;vertical-align:middle;cursor:wait;background-color:currentcolor;opacity:.5}.placeholder.btn::before{display:inline-block;content:""}.placeholder-xs{min-height:.6em}.placeholder-sm{min-height:.8em}.placeholder-lg{min-height:1.2em}.placeholder-glow .placeholder{animation:placeholder-glow 2s ease-in-out infinite}@keyframes placeholder-glow{50%{opacity:.2}}.placeholder-wave{mask-image:linear-gradient(130deg, #000 55%, rgba(0, 0, 0, 0.8) 75%, #000 95%);-webkit-mask-image:linear-gradient(130deg, #000 55%, rgba(0, 0, 0, 0.8) 75%, #000 95%);mask-size:200% 100%;-webkit-mask-size:200% 100%;animation:placeholder-wave 2s linear infinite}@keyframes placeholder-wave{100%{mask-position:-200% 0%;-webkit-mask-position:-200% 0%}}.clearfix::after{display:block;clear:both;content:""}.text-bg-default{color:#fff !important;background-color:RGBA(var(--bs-default-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-primary{color:#fff !important;background-color:RGBA(var(--bs-primary-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-secondary{color:#fff !important;background-color:RGBA(var(--bs-secondary-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-success{color:#fff !important;background-color:RGBA(var(--bs-success-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-info{color:#fff !important;background-color:RGBA(var(--bs-info-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-warning{color:#fff !important;background-color:RGBA(var(--bs-warning-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-danger{color:#fff !important;background-color:RGBA(var(--bs-danger-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-light{color:#fff !important;background-color:RGBA(var(--bs-light-rgb), var(--bs-bg-opacity, 1)) !important}.text-bg-dark{color:#fff !important;background-color:RGBA(var(--bs-dark-rgb), var(--bs-bg-opacity, 1)) !important}.link-default{color:RGBA(var(--bs-default-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-default-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-default:hover,.link-default:focus{color:RGBA(42, 46, 51, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(42, 46, 51, var(--bs-link-underline-opacity, 1)) !important}.link-primary{color:RGBA(var(--bs-primary-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-primary-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-primary:hover,.link-primary:focus{color:RGBA(31, 102, 182, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(31, 102, 182, var(--bs-link-underline-opacity, 1)) !important}.link-secondary{color:RGBA(var(--bs-secondary-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-secondary-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-secondary:hover,.link-secondary:focus{color:RGBA(42, 46, 51, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(42, 46, 51, var(--bs-link-underline-opacity, 1)) !important}.link-success{color:RGBA(var(--bs-success-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-success-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-success:hover,.link-success:focus{color:RGBA(50, 146, 19, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(50, 146, 19, var(--bs-link-underline-opacity, 1)) !important}.link-info{color:RGBA(var(--bs-info-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-info-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-info:hover,.link-info:focus{color:RGBA(122, 67, 150, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(122, 67, 150, var(--bs-link-underline-opacity, 1)) !important}.link-warning{color:RGBA(var(--bs-warning-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-warning-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-warning:hover,.link-warning:focus{color:RGBA(204, 94, 19, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(204, 94, 19, var(--bs-link-underline-opacity, 1)) !important}.link-danger{color:RGBA(var(--bs-danger-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-danger-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-danger:hover,.link-danger:focus{color:RGBA(204, 0, 46, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(204, 0, 46, var(--bs-link-underline-opacity, 1)) !important}.link-light{color:RGBA(var(--bs-light-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-light-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-light:hover,.link-light:focus{color:RGBA(66, 66, 66, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(66, 66, 66, var(--bs-link-underline-opacity, 1)) !important}.link-dark{color:RGBA(var(--bs-dark-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-dark-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-dark:hover,.link-dark:focus{color:RGBA(42, 46, 51, var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(42, 46, 51, var(--bs-link-underline-opacity, 1)) !important}.link-body-emphasis{color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-opacity, 1)) !important;text-decoration-color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-body-emphasis:hover,.link-body-emphasis:focus{color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-opacity, 0.75)) !important;text-decoration-color:RGBA(var(--bs-emphasis-color-rgb), var(--bs-link-underline-opacity, 0.75)) !important}.focus-ring:focus{outline:0;box-shadow:var(--bs-focus-ring-x, 0) var(--bs-focus-ring-y, 0) var(--bs-focus-ring-blur, 0) var(--bs-focus-ring-width) var(--bs-focus-ring-color)}.icon-link{display:inline-flex;gap:.375rem;align-items:center;-webkit-align-items:center;text-decoration-color:rgba(var(--bs-link-color-rgb), var(--bs-link-opacity, 0.5));text-underline-offset:.25em;backface-visibility:hidden;-webkit-backface-visibility:hidden;-moz-backface-visibility:hidden;-ms-backface-visibility:hidden;-o-backface-visibility:hidden}.icon-link>.bi{flex-shrink:0;-webkit-flex-shrink:0;width:1em;height:1em;fill:currentcolor;transition:.2s ease-in-out transform}@media(prefers-reduced-motion: reduce){.icon-link>.bi{transition:none}}.icon-link-hover:hover>.bi,.icon-link-hover:focus-visible>.bi{transform:var(--bs-icon-link-transform, translate3d(0.25em, 0, 0))}.ratio{position:relative;width:100%}.ratio::before{display:block;padding-top:var(--bs-aspect-ratio);content:""}.ratio>*{position:absolute;top:0;left:0;width:100%;height:100%}.ratio-1x1{--bs-aspect-ratio: 100%}.ratio-4x3{--bs-aspect-ratio: 75%}.ratio-16x9{--bs-aspect-ratio: 56.25%}.ratio-21x9{--bs-aspect-ratio: 42.8571428571%}.fixed-top{position:fixed;top:0;right:0;left:0;z-index:1030}.fixed-bottom{position:fixed;right:0;bottom:0;left:0;z-index:1030}.sticky-top{position:sticky;top:0;z-index:1020}.sticky-bottom{position:sticky;bottom:0;z-index:1020}@media(min-width: 576px){.sticky-sm-top{position:sticky;top:0;z-index:1020}.sticky-sm-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 768px){.sticky-md-top{position:sticky;top:0;z-index:1020}.sticky-md-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 992px){.sticky-lg-top{position:sticky;top:0;z-index:1020}.sticky-lg-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 1200px){.sticky-xl-top{position:sticky;top:0;z-index:1020}.sticky-xl-bottom{position:sticky;bottom:0;z-index:1020}}@media(min-width: 1400px){.sticky-xxl-top{position:sticky;top:0;z-index:1020}.sticky-xxl-bottom{position:sticky;bottom:0;z-index:1020}}.hstack{display:flex;display:-webkit-flex;flex-direction:row;-webkit-flex-direction:row;align-items:center;-webkit-align-items:center;align-self:stretch;-webkit-align-self:stretch}.vstack{display:flex;display:-webkit-flex;flex:1 1 auto;-webkit-flex:1 1 auto;flex-direction:column;-webkit-flex-direction:column;align-self:stretch;-webkit-align-self:stretch}.visually-hidden,.visually-hidden-focusable:not(:focus):not(:focus-within){width:1px !important;height:1px !important;padding:0 !important;margin:-1px !important;overflow:hidden !important;clip:rect(0, 0, 0, 0) !important;white-space:nowrap !important;border:0 !important}.visually-hidden:not(caption),.visually-hidden-focusable:not(:focus):not(:focus-within):not(caption){position:absolute !important}.stretched-link::after{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;content:""}.text-truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.vr{display:inline-block;align-self:stretch;-webkit-align-self:stretch;width:1px;min-height:1em;background-color:currentcolor;opacity:.25}.align-baseline{vertical-align:baseline !important}.align-top{vertical-align:top !important}.align-middle{vertical-align:middle !important}.align-bottom{vertical-align:bottom !important}.align-text-bottom{vertical-align:text-bottom !important}.align-text-top{vertical-align:text-top !important}.float-start{float:left !important}.float-end{float:right !important}.float-none{float:none !important}.object-fit-contain{object-fit:contain !important}.object-fit-cover{object-fit:cover !important}.object-fit-fill{object-fit:fill !important}.object-fit-scale{object-fit:scale-down !important}.object-fit-none{object-fit:none !important}.opacity-0{opacity:0 !important}.opacity-25{opacity:.25 !important}.opacity-50{opacity:.5 !important}.opacity-75{opacity:.75 !important}.opacity-100{opacity:1 !important}.overflow-auto{overflow:auto !important}.overflow-hidden{overflow:hidden !important}.overflow-visible{overflow:visible !important}.overflow-scroll{overflow:scroll !important}.overflow-x-auto{overflow-x:auto !important}.overflow-x-hidden{overflow-x:hidden !important}.overflow-x-visible{overflow-x:visible !important}.overflow-x-scroll{overflow-x:scroll !important}.overflow-y-auto{overflow-y:auto !important}.overflow-y-hidden{overflow-y:hidden !important}.overflow-y-visible{overflow-y:visible !important}.overflow-y-scroll{overflow-y:scroll !important}.d-inline{display:inline !important}.d-inline-block{display:inline-block !important}.d-block{display:block !important}.d-grid{display:grid !important}.d-inline-grid{display:inline-grid !important}.d-table{display:table !important}.d-table-row{display:table-row !important}.d-table-cell{display:table-cell !important}.d-flex{display:flex !important}.d-inline-flex{display:inline-flex !important}.d-none{display:none !important}.shadow{box-shadow:0 .5rem 1rem rgba(0,0,0,.15) !important}.shadow-sm{box-shadow:0 .125rem .25rem rgba(0,0,0,.075) !important}.shadow-lg{box-shadow:0 1rem 3rem rgba(0,0,0,.175) !important}.shadow-none{box-shadow:none !important}.focus-ring-default{--bs-focus-ring-color: rgba(var(--bs-default-rgb), var(--bs-focus-ring-opacity))}.focus-ring-primary{--bs-focus-ring-color: rgba(var(--bs-primary-rgb), var(--bs-focus-ring-opacity))}.focus-ring-secondary{--bs-focus-ring-color: rgba(var(--bs-secondary-rgb), var(--bs-focus-ring-opacity))}.focus-ring-success{--bs-focus-ring-color: rgba(var(--bs-success-rgb), var(--bs-focus-ring-opacity))}.focus-ring-info{--bs-focus-ring-color: rgba(var(--bs-info-rgb), var(--bs-focus-ring-opacity))}.focus-ring-warning{--bs-focus-ring-color: rgba(var(--bs-warning-rgb), var(--bs-focus-ring-opacity))}.focus-ring-danger{--bs-focus-ring-color: rgba(var(--bs-danger-rgb), var(--bs-focus-ring-opacity))}.focus-ring-light{--bs-focus-ring-color: rgba(var(--bs-light-rgb), var(--bs-focus-ring-opacity))}.focus-ring-dark{--bs-focus-ring-color: rgba(var(--bs-dark-rgb), var(--bs-focus-ring-opacity))}.position-static{position:static !important}.position-relative{position:relative !important}.position-absolute{position:absolute !important}.position-fixed{position:fixed !important}.position-sticky{position:sticky !important}.top-0{top:0 !important}.top-50{top:50% !important}.top-100{top:100% !important}.bottom-0{bottom:0 !important}.bottom-50{bottom:50% !important}.bottom-100{bottom:100% !important}.start-0{left:0 !important}.start-50{left:50% !important}.start-100{left:100% !important}.end-0{right:0 !important}.end-50{right:50% !important}.end-100{right:100% !important}.translate-middle{transform:translate(-50%, -50%) !important}.translate-middle-x{transform:translateX(-50%) !important}.translate-middle-y{transform:translateY(-50%) !important}.border{border:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-0{border:0 !important}.border-top{border-top:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-top-0{border-top:0 !important}.border-end{border-right:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-end-0{border-right:0 !important}.border-bottom{border-bottom:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-bottom-0{border-bottom:0 !important}.border-start{border-left:var(--bs-border-width) var(--bs-border-style) var(--bs-border-color) !important}.border-start-0{border-left:0 !important}.border-default{--bs-border-opacity: 1;border-color:rgba(var(--bs-default-rgb), var(--bs-border-opacity)) !important}.border-primary{--bs-border-opacity: 1;border-color:rgba(var(--bs-primary-rgb), var(--bs-border-opacity)) !important}.border-secondary{--bs-border-opacity: 1;border-color:rgba(var(--bs-secondary-rgb), var(--bs-border-opacity)) !important}.border-success{--bs-border-opacity: 1;border-color:rgba(var(--bs-success-rgb), var(--bs-border-opacity)) !important}.border-info{--bs-border-opacity: 1;border-color:rgba(var(--bs-info-rgb), var(--bs-border-opacity)) !important}.border-warning{--bs-border-opacity: 1;border-color:rgba(var(--bs-warning-rgb), var(--bs-border-opacity)) !important}.border-danger{--bs-border-opacity: 1;border-color:rgba(var(--bs-danger-rgb), var(--bs-border-opacity)) !important}.border-light{--bs-border-opacity: 1;border-color:rgba(var(--bs-light-rgb), var(--bs-border-opacity)) !important}.border-dark{--bs-border-opacity: 1;border-color:rgba(var(--bs-dark-rgb), var(--bs-border-opacity)) !important}.border-black{--bs-border-opacity: 1;border-color:rgba(var(--bs-black-rgb), var(--bs-border-opacity)) !important}.border-white{--bs-border-opacity: 1;border-color:rgba(var(--bs-white-rgb), var(--bs-border-opacity)) !important}.border-primary-subtle{border-color:var(--bs-primary-border-subtle) !important}.border-secondary-subtle{border-color:var(--bs-secondary-border-subtle) !important}.border-success-subtle{border-color:var(--bs-success-border-subtle) !important}.border-info-subtle{border-color:var(--bs-info-border-subtle) !important}.border-warning-subtle{border-color:var(--bs-warning-border-subtle) !important}.border-danger-subtle{border-color:var(--bs-danger-border-subtle) !important}.border-light-subtle{border-color:var(--bs-light-border-subtle) !important}.border-dark-subtle{border-color:var(--bs-dark-border-subtle) !important}.border-1{border-width:1px !important}.border-2{border-width:2px !important}.border-3{border-width:3px !important}.border-4{border-width:4px !important}.border-5{border-width:5px !important}.border-opacity-10{--bs-border-opacity: 0.1}.border-opacity-25{--bs-border-opacity: 0.25}.border-opacity-50{--bs-border-opacity: 0.5}.border-opacity-75{--bs-border-opacity: 0.75}.border-opacity-100{--bs-border-opacity: 1}.w-25{width:25% !important}.w-50{width:50% !important}.w-75{width:75% !important}.w-100{width:100% !important}.w-auto{width:auto !important}.mw-100{max-width:100% !important}.vw-100{width:100vw !important}.min-vw-100{min-width:100vw !important}.h-25{height:25% !important}.h-50{height:50% !important}.h-75{height:75% !important}.h-100{height:100% !important}.h-auto{height:auto !important}.mh-100{max-height:100% !important}.vh-100{height:100vh !important}.min-vh-100{min-height:100vh !important}.flex-fill{flex:1 1 auto !important}.flex-row{flex-direction:row !important}.flex-column{flex-direction:column !important}.flex-row-reverse{flex-direction:row-reverse !important}.flex-column-reverse{flex-direction:column-reverse !important}.flex-grow-0{flex-grow:0 !important}.flex-grow-1{flex-grow:1 !important}.flex-shrink-0{flex-shrink:0 !important}.flex-shrink-1{flex-shrink:1 !important}.flex-wrap{flex-wrap:wrap !important}.flex-nowrap{flex-wrap:nowrap !important}.flex-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-start{justify-content:flex-start !important}.justify-content-end{justify-content:flex-end !important}.justify-content-center{justify-content:center !important}.justify-content-between{justify-content:space-between !important}.justify-content-around{justify-content:space-around !important}.justify-content-evenly{justify-content:space-evenly !important}.align-items-start{align-items:flex-start !important}.align-items-end{align-items:flex-end !important}.align-items-center{align-items:center !important}.align-items-baseline{align-items:baseline !important}.align-items-stretch{align-items:stretch !important}.align-content-start{align-content:flex-start !important}.align-content-end{align-content:flex-end !important}.align-content-center{align-content:center !important}.align-content-between{align-content:space-between !important}.align-content-around{align-content:space-around !important}.align-content-stretch{align-content:stretch !important}.align-self-auto{align-self:auto !important}.align-self-start{align-self:flex-start !important}.align-self-end{align-self:flex-end !important}.align-self-center{align-self:center !important}.align-self-baseline{align-self:baseline !important}.align-self-stretch{align-self:stretch !important}.order-first{order:-1 !important}.order-0{order:0 !important}.order-1{order:1 !important}.order-2{order:2 !important}.order-3{order:3 !important}.order-4{order:4 !important}.order-5{order:5 !important}.order-last{order:6 !important}.m-0{margin:0 !important}.m-1{margin:.25rem !important}.m-2{margin:.5rem !important}.m-3{margin:1rem !important}.m-4{margin:1.5rem !important}.m-5{margin:3rem !important}.m-auto{margin:auto !important}.mx-0{margin-right:0 !important;margin-left:0 !important}.mx-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-3{margin-right:1rem !important;margin-left:1rem !important}.mx-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-5{margin-right:3rem !important;margin-left:3rem !important}.mx-auto{margin-right:auto !important;margin-left:auto !important}.my-0{margin-top:0 !important;margin-bottom:0 !important}.my-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-0{margin-top:0 !important}.mt-1{margin-top:.25rem !important}.mt-2{margin-top:.5rem !important}.mt-3{margin-top:1rem !important}.mt-4{margin-top:1.5rem !important}.mt-5{margin-top:3rem !important}.mt-auto{margin-top:auto !important}.me-0{margin-right:0 !important}.me-1{margin-right:.25rem !important}.me-2{margin-right:.5rem !important}.me-3{margin-right:1rem !important}.me-4{margin-right:1.5rem !important}.me-5{margin-right:3rem !important}.me-auto{margin-right:auto !important}.mb-0{margin-bottom:0 !important}.mb-1{margin-bottom:.25rem !important}.mb-2{margin-bottom:.5rem !important}.mb-3{margin-bottom:1rem !important}.mb-4{margin-bottom:1.5rem !important}.mb-5{margin-bottom:3rem !important}.mb-auto{margin-bottom:auto !important}.ms-0{margin-left:0 !important}.ms-1{margin-left:.25rem !important}.ms-2{margin-left:.5rem !important}.ms-3{margin-left:1rem !important}.ms-4{margin-left:1.5rem !important}.ms-5{margin-left:3rem !important}.ms-auto{margin-left:auto !important}.p-0{padding:0 !important}.p-1{padding:.25rem !important}.p-2{padding:.5rem !important}.p-3{padding:1rem !important}.p-4{padding:1.5rem !important}.p-5{padding:3rem !important}.px-0{padding-right:0 !important;padding-left:0 !important}.px-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-3{padding-right:1rem !important;padding-left:1rem !important}.px-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-5{padding-right:3rem !important;padding-left:3rem !important}.py-0{padding-top:0 !important;padding-bottom:0 !important}.py-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-0{padding-top:0 !important}.pt-1{padding-top:.25rem !important}.pt-2{padding-top:.5rem !important}.pt-3{padding-top:1rem !important}.pt-4{padding-top:1.5rem !important}.pt-5{padding-top:3rem !important}.pe-0{padding-right:0 !important}.pe-1{padding-right:.25rem !important}.pe-2{padding-right:.5rem !important}.pe-3{padding-right:1rem !important}.pe-4{padding-right:1.5rem !important}.pe-5{padding-right:3rem !important}.pb-0{padding-bottom:0 !important}.pb-1{padding-bottom:.25rem !important}.pb-2{padding-bottom:.5rem !important}.pb-3{padding-bottom:1rem !important}.pb-4{padding-bottom:1.5rem !important}.pb-5{padding-bottom:3rem !important}.ps-0{padding-left:0 !important}.ps-1{padding-left:.25rem !important}.ps-2{padding-left:.5rem !important}.ps-3{padding-left:1rem !important}.ps-4{padding-left:1.5rem !important}.ps-5{padding-left:3rem !important}.gap-0{gap:0 !important}.gap-1{gap:.25rem !important}.gap-2{gap:.5rem !important}.gap-3{gap:1rem !important}.gap-4{gap:1.5rem !important}.gap-5{gap:3rem !important}.row-gap-0{row-gap:0 !important}.row-gap-1{row-gap:.25rem !important}.row-gap-2{row-gap:.5rem !important}.row-gap-3{row-gap:1rem !important}.row-gap-4{row-gap:1.5rem !important}.row-gap-5{row-gap:3rem !important}.column-gap-0{column-gap:0 !important}.column-gap-1{column-gap:.25rem !important}.column-gap-2{column-gap:.5rem !important}.column-gap-3{column-gap:1rem !important}.column-gap-4{column-gap:1.5rem !important}.column-gap-5{column-gap:3rem !important}.font-monospace{font-family:var(--bs-font-monospace) !important}.fs-1{font-size:calc(1.325rem + 0.9vw) !important}.fs-2{font-size:calc(1.29rem + 0.48vw) !important}.fs-3{font-size:calc(1.27rem + 0.24vw) !important}.fs-4{font-size:1.25rem !important}.fs-5{font-size:1.1rem !important}.fs-6{font-size:1rem !important}.fst-italic{font-style:italic !important}.fst-normal{font-style:normal !important}.fw-lighter{font-weight:lighter !important}.fw-light{font-weight:300 !important}.fw-normal{font-weight:400 !important}.fw-medium{font-weight:500 !important}.fw-semibold{font-weight:600 !important}.fw-bold{font-weight:700 !important}.fw-bolder{font-weight:bolder !important}.lh-1{line-height:1 !important}.lh-sm{line-height:1.25 !important}.lh-base{line-height:1.5 !important}.lh-lg{line-height:2 !important}.text-start{text-align:left !important}.text-end{text-align:right !important}.text-center{text-align:center !important}.text-decoration-none{text-decoration:none !important}.text-decoration-underline{text-decoration:underline !important}.text-decoration-line-through{text-decoration:line-through !important}.text-lowercase{text-transform:lowercase !important}.text-uppercase{text-transform:uppercase !important}.text-capitalize{text-transform:capitalize !important}.text-wrap{white-space:normal !important}.text-nowrap{white-space:nowrap !important}.text-break{word-wrap:break-word !important;word-break:break-word !important}.text-default{--bs-text-opacity: 1;color:rgba(var(--bs-default-rgb), var(--bs-text-opacity)) !important}.text-primary{--bs-text-opacity: 1;color:rgba(var(--bs-primary-rgb), var(--bs-text-opacity)) !important}.text-secondary{--bs-text-opacity: 1;color:rgba(var(--bs-secondary-rgb), var(--bs-text-opacity)) !important}.text-success{--bs-text-opacity: 1;color:rgba(var(--bs-success-rgb), var(--bs-text-opacity)) !important}.text-info{--bs-text-opacity: 1;color:rgba(var(--bs-info-rgb), var(--bs-text-opacity)) !important}.text-warning{--bs-text-opacity: 1;color:rgba(var(--bs-warning-rgb), var(--bs-text-opacity)) !important}.text-danger{--bs-text-opacity: 1;color:rgba(var(--bs-danger-rgb), var(--bs-text-opacity)) !important}.text-light{--bs-text-opacity: 1;color:rgba(var(--bs-light-rgb), var(--bs-text-opacity)) !important}.text-dark{--bs-text-opacity: 1;color:rgba(var(--bs-dark-rgb), var(--bs-text-opacity)) !important}.text-black{--bs-text-opacity: 1;color:rgba(var(--bs-black-rgb), var(--bs-text-opacity)) !important}.text-white{--bs-text-opacity: 1;color:rgba(var(--bs-white-rgb), var(--bs-text-opacity)) !important}.text-body{--bs-text-opacity: 1;color:rgba(var(--bs-body-color-rgb), var(--bs-text-opacity)) !important}.text-muted{--bs-text-opacity: 1;color:var(--bs-secondary-color) !important}.text-black-50{--bs-text-opacity: 1;color:rgba(0,0,0,.5) !important}.text-white-50{--bs-text-opacity: 1;color:rgba(255,255,255,.5) !important}.text-body-secondary{--bs-text-opacity: 1;color:var(--bs-secondary-color) !important}.text-body-tertiary{--bs-text-opacity: 1;color:var(--bs-tertiary-color) !important}.text-body-emphasis{--bs-text-opacity: 1;color:var(--bs-emphasis-color) !important}.text-reset{--bs-text-opacity: 1;color:inherit !important}.text-opacity-25{--bs-text-opacity: 0.25}.text-opacity-50{--bs-text-opacity: 0.5}.text-opacity-75{--bs-text-opacity: 0.75}.text-opacity-100{--bs-text-opacity: 1}.text-primary-emphasis{color:var(--bs-primary-text-emphasis) !important}.text-secondary-emphasis{color:var(--bs-secondary-text-emphasis) !important}.text-success-emphasis{color:var(--bs-success-text-emphasis) !important}.text-info-emphasis{color:var(--bs-info-text-emphasis) !important}.text-warning-emphasis{color:var(--bs-warning-text-emphasis) !important}.text-danger-emphasis{color:var(--bs-danger-text-emphasis) !important}.text-light-emphasis{color:var(--bs-light-text-emphasis) !important}.text-dark-emphasis{color:var(--bs-dark-text-emphasis) !important}.link-opacity-10{--bs-link-opacity: 0.1}.link-opacity-10-hover:hover{--bs-link-opacity: 0.1}.link-opacity-25{--bs-link-opacity: 0.25}.link-opacity-25-hover:hover{--bs-link-opacity: 0.25}.link-opacity-50{--bs-link-opacity: 0.5}.link-opacity-50-hover:hover{--bs-link-opacity: 0.5}.link-opacity-75{--bs-link-opacity: 0.75}.link-opacity-75-hover:hover{--bs-link-opacity: 0.75}.link-opacity-100{--bs-link-opacity: 1}.link-opacity-100-hover:hover{--bs-link-opacity: 1}.link-offset-1{text-underline-offset:.125em !important}.link-offset-1-hover:hover{text-underline-offset:.125em !important}.link-offset-2{text-underline-offset:.25em !important}.link-offset-2-hover:hover{text-underline-offset:.25em !important}.link-offset-3{text-underline-offset:.375em !important}.link-offset-3-hover:hover{text-underline-offset:.375em !important}.link-underline-default{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-default-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-primary{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-primary-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-secondary{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-secondary-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-success{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-success-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-info{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-info-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-warning{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-warning-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-danger{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-danger-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-light{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-light-rgb), var(--bs-link-underline-opacity)) !important}.link-underline-dark{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-dark-rgb), var(--bs-link-underline-opacity)) !important}.link-underline{--bs-link-underline-opacity: 1;text-decoration-color:rgba(var(--bs-link-color-rgb), var(--bs-link-underline-opacity, 1)) !important}.link-underline-opacity-0{--bs-link-underline-opacity: 0}.link-underline-opacity-0-hover:hover{--bs-link-underline-opacity: 0}.link-underline-opacity-10{--bs-link-underline-opacity: 0.1}.link-underline-opacity-10-hover:hover{--bs-link-underline-opacity: 0.1}.link-underline-opacity-25{--bs-link-underline-opacity: 0.25}.link-underline-opacity-25-hover:hover{--bs-link-underline-opacity: 0.25}.link-underline-opacity-50{--bs-link-underline-opacity: 0.5}.link-underline-opacity-50-hover:hover{--bs-link-underline-opacity: 0.5}.link-underline-opacity-75{--bs-link-underline-opacity: 0.75}.link-underline-opacity-75-hover:hover{--bs-link-underline-opacity: 0.75}.link-underline-opacity-100{--bs-link-underline-opacity: 1}.link-underline-opacity-100-hover:hover{--bs-link-underline-opacity: 1}.bg-default{--bs-bg-opacity: 1;background-color:rgba(var(--bs-default-rgb), var(--bs-bg-opacity)) !important}.bg-primary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-primary-rgb), var(--bs-bg-opacity)) !important}.bg-secondary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-secondary-rgb), var(--bs-bg-opacity)) !important}.bg-success{--bs-bg-opacity: 1;background-color:rgba(var(--bs-success-rgb), var(--bs-bg-opacity)) !important}.bg-info{--bs-bg-opacity: 1;background-color:rgba(var(--bs-info-rgb), var(--bs-bg-opacity)) !important}.bg-warning{--bs-bg-opacity: 1;background-color:rgba(var(--bs-warning-rgb), var(--bs-bg-opacity)) !important}.bg-danger{--bs-bg-opacity: 1;background-color:rgba(var(--bs-danger-rgb), var(--bs-bg-opacity)) !important}.bg-light{--bs-bg-opacity: 1;background-color:rgba(var(--bs-light-rgb), var(--bs-bg-opacity)) !important}.bg-dark{--bs-bg-opacity: 1;background-color:rgba(var(--bs-dark-rgb), var(--bs-bg-opacity)) !important}.bg-black{--bs-bg-opacity: 1;background-color:rgba(var(--bs-black-rgb), var(--bs-bg-opacity)) !important}.bg-white{--bs-bg-opacity: 1;background-color:rgba(var(--bs-white-rgb), var(--bs-bg-opacity)) !important}.bg-body{--bs-bg-opacity: 1;background-color:rgba(var(--bs-body-bg-rgb), var(--bs-bg-opacity)) !important}.bg-transparent{--bs-bg-opacity: 1;background-color:rgba(0,0,0,0) !important}.bg-body-secondary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-secondary-bg-rgb), var(--bs-bg-opacity)) !important}.bg-body-tertiary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-tertiary-bg-rgb), var(--bs-bg-opacity)) !important}.bg-opacity-10{--bs-bg-opacity: 0.1}.bg-opacity-25{--bs-bg-opacity: 0.25}.bg-opacity-50{--bs-bg-opacity: 0.5}.bg-opacity-75{--bs-bg-opacity: 0.75}.bg-opacity-100{--bs-bg-opacity: 1}.bg-primary-subtle{background-color:var(--bs-primary-bg-subtle) !important}.bg-secondary-subtle{background-color:var(--bs-secondary-bg-subtle) !important}.bg-success-subtle{background-color:var(--bs-success-bg-subtle) !important}.bg-info-subtle{background-color:var(--bs-info-bg-subtle) !important}.bg-warning-subtle{background-color:var(--bs-warning-bg-subtle) !important}.bg-danger-subtle{background-color:var(--bs-danger-bg-subtle) !important}.bg-light-subtle{background-color:var(--bs-light-bg-subtle) !important}.bg-dark-subtle{background-color:var(--bs-dark-bg-subtle) !important}.bg-gradient{background-image:var(--bs-gradient) !important}.user-select-all{user-select:all !important}.user-select-auto{user-select:auto !important}.user-select-none{user-select:none !important}.pe-none{pointer-events:none !important}.pe-auto{pointer-events:auto !important}.rounded{border-radius:var(--bs-border-radius) !important}.rounded-0{border-radius:0 !important}.rounded-1{border-radius:var(--bs-border-radius-sm) !important}.rounded-2{border-radius:var(--bs-border-radius) !important}.rounded-3{border-radius:var(--bs-border-radius-lg) !important}.rounded-4{border-radius:var(--bs-border-radius-xl) !important}.rounded-5{border-radius:var(--bs-border-radius-xxl) !important}.rounded-circle{border-radius:50% !important}.rounded-pill{border-radius:var(--bs-border-radius-pill) !important}.rounded-top{border-top-left-radius:var(--bs-border-radius) !important;border-top-right-radius:var(--bs-border-radius) !important}.rounded-top-0{border-top-left-radius:0 !important;border-top-right-radius:0 !important}.rounded-top-1{border-top-left-radius:var(--bs-border-radius-sm) !important;border-top-right-radius:var(--bs-border-radius-sm) !important}.rounded-top-2{border-top-left-radius:var(--bs-border-radius) !important;border-top-right-radius:var(--bs-border-radius) !important}.rounded-top-3{border-top-left-radius:var(--bs-border-radius-lg) !important;border-top-right-radius:var(--bs-border-radius-lg) !important}.rounded-top-4{border-top-left-radius:var(--bs-border-radius-xl) !important;border-top-right-radius:var(--bs-border-radius-xl) !important}.rounded-top-5{border-top-left-radius:var(--bs-border-radius-xxl) !important;border-top-right-radius:var(--bs-border-radius-xxl) !important}.rounded-top-circle{border-top-left-radius:50% !important;border-top-right-radius:50% !important}.rounded-top-pill{border-top-left-radius:var(--bs-border-radius-pill) !important;border-top-right-radius:var(--bs-border-radius-pill) !important}.rounded-end{border-top-right-radius:var(--bs-border-radius) !important;border-bottom-right-radius:var(--bs-border-radius) !important}.rounded-end-0{border-top-right-radius:0 !important;border-bottom-right-radius:0 !important}.rounded-end-1{border-top-right-radius:var(--bs-border-radius-sm) !important;border-bottom-right-radius:var(--bs-border-radius-sm) !important}.rounded-end-2{border-top-right-radius:var(--bs-border-radius) !important;border-bottom-right-radius:var(--bs-border-radius) !important}.rounded-end-3{border-top-right-radius:var(--bs-border-radius-lg) !important;border-bottom-right-radius:var(--bs-border-radius-lg) !important}.rounded-end-4{border-top-right-radius:var(--bs-border-radius-xl) !important;border-bottom-right-radius:var(--bs-border-radius-xl) !important}.rounded-end-5{border-top-right-radius:var(--bs-border-radius-xxl) !important;border-bottom-right-radius:var(--bs-border-radius-xxl) !important}.rounded-end-circle{border-top-right-radius:50% !important;border-bottom-right-radius:50% !important}.rounded-end-pill{border-top-right-radius:var(--bs-border-radius-pill) !important;border-bottom-right-radius:var(--bs-border-radius-pill) !important}.rounded-bottom{border-bottom-right-radius:var(--bs-border-radius) !important;border-bottom-left-radius:var(--bs-border-radius) !important}.rounded-bottom-0{border-bottom-right-radius:0 !important;border-bottom-left-radius:0 !important}.rounded-bottom-1{border-bottom-right-radius:var(--bs-border-radius-sm) !important;border-bottom-left-radius:var(--bs-border-radius-sm) !important}.rounded-bottom-2{border-bottom-right-radius:var(--bs-border-radius) !important;border-bottom-left-radius:var(--bs-border-radius) !important}.rounded-bottom-3{border-bottom-right-radius:var(--bs-border-radius-lg) !important;border-bottom-left-radius:var(--bs-border-radius-lg) !important}.rounded-bottom-4{border-bottom-right-radius:var(--bs-border-radius-xl) !important;border-bottom-left-radius:var(--bs-border-radius-xl) !important}.rounded-bottom-5{border-bottom-right-radius:var(--bs-border-radius-xxl) !important;border-bottom-left-radius:var(--bs-border-radius-xxl) !important}.rounded-bottom-circle{border-bottom-right-radius:50% !important;border-bottom-left-radius:50% !important}.rounded-bottom-pill{border-bottom-right-radius:var(--bs-border-radius-pill) !important;border-bottom-left-radius:var(--bs-border-radius-pill) !important}.rounded-start{border-bottom-left-radius:var(--bs-border-radius) !important;border-top-left-radius:var(--bs-border-radius) !important}.rounded-start-0{border-bottom-left-radius:0 !important;border-top-left-radius:0 !important}.rounded-start-1{border-bottom-left-radius:var(--bs-border-radius-sm) !important;border-top-left-radius:var(--bs-border-radius-sm) !important}.rounded-start-2{border-bottom-left-radius:var(--bs-border-radius) !important;border-top-left-radius:var(--bs-border-radius) !important}.rounded-start-3{border-bottom-left-radius:var(--bs-border-radius-lg) !important;border-top-left-radius:var(--bs-border-radius-lg) !important}.rounded-start-4{border-bottom-left-radius:var(--bs-border-radius-xl) !important;border-top-left-radius:var(--bs-border-radius-xl) !important}.rounded-start-5{border-bottom-left-radius:var(--bs-border-radius-xxl) !important;border-top-left-radius:var(--bs-border-radius-xxl) !important}.rounded-start-circle{border-bottom-left-radius:50% !important;border-top-left-radius:50% !important}.rounded-start-pill{border-bottom-left-radius:var(--bs-border-radius-pill) !important;border-top-left-radius:var(--bs-border-radius-pill) !important}.visible{visibility:visible !important}.invisible{visibility:hidden !important}.z-n1{z-index:-1 !important}.z-0{z-index:0 !important}.z-1{z-index:1 !important}.z-2{z-index:2 !important}.z-3{z-index:3 !important}@media(min-width: 576px){.float-sm-start{float:left !important}.float-sm-end{float:right !important}.float-sm-none{float:none !important}.object-fit-sm-contain{object-fit:contain !important}.object-fit-sm-cover{object-fit:cover !important}.object-fit-sm-fill{object-fit:fill !important}.object-fit-sm-scale{object-fit:scale-down !important}.object-fit-sm-none{object-fit:none !important}.d-sm-inline{display:inline !important}.d-sm-inline-block{display:inline-block !important}.d-sm-block{display:block !important}.d-sm-grid{display:grid !important}.d-sm-inline-grid{display:inline-grid !important}.d-sm-table{display:table !important}.d-sm-table-row{display:table-row !important}.d-sm-table-cell{display:table-cell !important}.d-sm-flex{display:flex !important}.d-sm-inline-flex{display:inline-flex !important}.d-sm-none{display:none !important}.flex-sm-fill{flex:1 1 auto !important}.flex-sm-row{flex-direction:row !important}.flex-sm-column{flex-direction:column !important}.flex-sm-row-reverse{flex-direction:row-reverse !important}.flex-sm-column-reverse{flex-direction:column-reverse !important}.flex-sm-grow-0{flex-grow:0 !important}.flex-sm-grow-1{flex-grow:1 !important}.flex-sm-shrink-0{flex-shrink:0 !important}.flex-sm-shrink-1{flex-shrink:1 !important}.flex-sm-wrap{flex-wrap:wrap !important}.flex-sm-nowrap{flex-wrap:nowrap !important}.flex-sm-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-sm-start{justify-content:flex-start !important}.justify-content-sm-end{justify-content:flex-end !important}.justify-content-sm-center{justify-content:center !important}.justify-content-sm-between{justify-content:space-between !important}.justify-content-sm-around{justify-content:space-around !important}.justify-content-sm-evenly{justify-content:space-evenly !important}.align-items-sm-start{align-items:flex-start !important}.align-items-sm-end{align-items:flex-end !important}.align-items-sm-center{align-items:center !important}.align-items-sm-baseline{align-items:baseline !important}.align-items-sm-stretch{align-items:stretch !important}.align-content-sm-start{align-content:flex-start !important}.align-content-sm-end{align-content:flex-end !important}.align-content-sm-center{align-content:center !important}.align-content-sm-between{align-content:space-between !important}.align-content-sm-around{align-content:space-around !important}.align-content-sm-stretch{align-content:stretch !important}.align-self-sm-auto{align-self:auto !important}.align-self-sm-start{align-self:flex-start !important}.align-self-sm-end{align-self:flex-end !important}.align-self-sm-center{align-self:center !important}.align-self-sm-baseline{align-self:baseline !important}.align-self-sm-stretch{align-self:stretch !important}.order-sm-first{order:-1 !important}.order-sm-0{order:0 !important}.order-sm-1{order:1 !important}.order-sm-2{order:2 !important}.order-sm-3{order:3 !important}.order-sm-4{order:4 !important}.order-sm-5{order:5 !important}.order-sm-last{order:6 !important}.m-sm-0{margin:0 !important}.m-sm-1{margin:.25rem !important}.m-sm-2{margin:.5rem !important}.m-sm-3{margin:1rem !important}.m-sm-4{margin:1.5rem !important}.m-sm-5{margin:3rem !important}.m-sm-auto{margin:auto !important}.mx-sm-0{margin-right:0 !important;margin-left:0 !important}.mx-sm-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-sm-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-sm-3{margin-right:1rem !important;margin-left:1rem !important}.mx-sm-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-sm-5{margin-right:3rem !important;margin-left:3rem !important}.mx-sm-auto{margin-right:auto !important;margin-left:auto !important}.my-sm-0{margin-top:0 !important;margin-bottom:0 !important}.my-sm-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-sm-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-sm-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-sm-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-sm-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-sm-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-sm-0{margin-top:0 !important}.mt-sm-1{margin-top:.25rem !important}.mt-sm-2{margin-top:.5rem !important}.mt-sm-3{margin-top:1rem !important}.mt-sm-4{margin-top:1.5rem !important}.mt-sm-5{margin-top:3rem !important}.mt-sm-auto{margin-top:auto !important}.me-sm-0{margin-right:0 !important}.me-sm-1{margin-right:.25rem !important}.me-sm-2{margin-right:.5rem !important}.me-sm-3{margin-right:1rem !important}.me-sm-4{margin-right:1.5rem !important}.me-sm-5{margin-right:3rem !important}.me-sm-auto{margin-right:auto !important}.mb-sm-0{margin-bottom:0 !important}.mb-sm-1{margin-bottom:.25rem !important}.mb-sm-2{margin-bottom:.5rem !important}.mb-sm-3{margin-bottom:1rem !important}.mb-sm-4{margin-bottom:1.5rem !important}.mb-sm-5{margin-bottom:3rem !important}.mb-sm-auto{margin-bottom:auto !important}.ms-sm-0{margin-left:0 !important}.ms-sm-1{margin-left:.25rem !important}.ms-sm-2{margin-left:.5rem !important}.ms-sm-3{margin-left:1rem !important}.ms-sm-4{margin-left:1.5rem !important}.ms-sm-5{margin-left:3rem !important}.ms-sm-auto{margin-left:auto !important}.p-sm-0{padding:0 !important}.p-sm-1{padding:.25rem !important}.p-sm-2{padding:.5rem !important}.p-sm-3{padding:1rem !important}.p-sm-4{padding:1.5rem !important}.p-sm-5{padding:3rem !important}.px-sm-0{padding-right:0 !important;padding-left:0 !important}.px-sm-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-sm-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-sm-3{padding-right:1rem !important;padding-left:1rem !important}.px-sm-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-sm-5{padding-right:3rem !important;padding-left:3rem !important}.py-sm-0{padding-top:0 !important;padding-bottom:0 !important}.py-sm-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-sm-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-sm-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-sm-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-sm-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-sm-0{padding-top:0 !important}.pt-sm-1{padding-top:.25rem !important}.pt-sm-2{padding-top:.5rem !important}.pt-sm-3{padding-top:1rem !important}.pt-sm-4{padding-top:1.5rem !important}.pt-sm-5{padding-top:3rem !important}.pe-sm-0{padding-right:0 !important}.pe-sm-1{padding-right:.25rem !important}.pe-sm-2{padding-right:.5rem !important}.pe-sm-3{padding-right:1rem !important}.pe-sm-4{padding-right:1.5rem !important}.pe-sm-5{padding-right:3rem !important}.pb-sm-0{padding-bottom:0 !important}.pb-sm-1{padding-bottom:.25rem !important}.pb-sm-2{padding-bottom:.5rem !important}.pb-sm-3{padding-bottom:1rem !important}.pb-sm-4{padding-bottom:1.5rem !important}.pb-sm-5{padding-bottom:3rem !important}.ps-sm-0{padding-left:0 !important}.ps-sm-1{padding-left:.25rem !important}.ps-sm-2{padding-left:.5rem !important}.ps-sm-3{padding-left:1rem !important}.ps-sm-4{padding-left:1.5rem !important}.ps-sm-5{padding-left:3rem !important}.gap-sm-0{gap:0 !important}.gap-sm-1{gap:.25rem !important}.gap-sm-2{gap:.5rem !important}.gap-sm-3{gap:1rem !important}.gap-sm-4{gap:1.5rem !important}.gap-sm-5{gap:3rem !important}.row-gap-sm-0{row-gap:0 !important}.row-gap-sm-1{row-gap:.25rem !important}.row-gap-sm-2{row-gap:.5rem !important}.row-gap-sm-3{row-gap:1rem !important}.row-gap-sm-4{row-gap:1.5rem !important}.row-gap-sm-5{row-gap:3rem !important}.column-gap-sm-0{column-gap:0 !important}.column-gap-sm-1{column-gap:.25rem !important}.column-gap-sm-2{column-gap:.5rem !important}.column-gap-sm-3{column-gap:1rem !important}.column-gap-sm-4{column-gap:1.5rem !important}.column-gap-sm-5{column-gap:3rem !important}.text-sm-start{text-align:left !important}.text-sm-end{text-align:right !important}.text-sm-center{text-align:center !important}}@media(min-width: 768px){.float-md-start{float:left !important}.float-md-end{float:right !important}.float-md-none{float:none !important}.object-fit-md-contain{object-fit:contain !important}.object-fit-md-cover{object-fit:cover !important}.object-fit-md-fill{object-fit:fill !important}.object-fit-md-scale{object-fit:scale-down !important}.object-fit-md-none{object-fit:none !important}.d-md-inline{display:inline !important}.d-md-inline-block{display:inline-block !important}.d-md-block{display:block !important}.d-md-grid{display:grid !important}.d-md-inline-grid{display:inline-grid !important}.d-md-table{display:table !important}.d-md-table-row{display:table-row !important}.d-md-table-cell{display:table-cell !important}.d-md-flex{display:flex !important}.d-md-inline-flex{display:inline-flex !important}.d-md-none{display:none !important}.flex-md-fill{flex:1 1 auto !important}.flex-md-row{flex-direction:row !important}.flex-md-column{flex-direction:column !important}.flex-md-row-reverse{flex-direction:row-reverse !important}.flex-md-column-reverse{flex-direction:column-reverse !important}.flex-md-grow-0{flex-grow:0 !important}.flex-md-grow-1{flex-grow:1 !important}.flex-md-shrink-0{flex-shrink:0 !important}.flex-md-shrink-1{flex-shrink:1 !important}.flex-md-wrap{flex-wrap:wrap !important}.flex-md-nowrap{flex-wrap:nowrap !important}.flex-md-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-md-start{justify-content:flex-start !important}.justify-content-md-end{justify-content:flex-end !important}.justify-content-md-center{justify-content:center !important}.justify-content-md-between{justify-content:space-between !important}.justify-content-md-around{justify-content:space-around !important}.justify-content-md-evenly{justify-content:space-evenly !important}.align-items-md-start{align-items:flex-start !important}.align-items-md-end{align-items:flex-end !important}.align-items-md-center{align-items:center !important}.align-items-md-baseline{align-items:baseline !important}.align-items-md-stretch{align-items:stretch !important}.align-content-md-start{align-content:flex-start !important}.align-content-md-end{align-content:flex-end !important}.align-content-md-center{align-content:center !important}.align-content-md-between{align-content:space-between !important}.align-content-md-around{align-content:space-around !important}.align-content-md-stretch{align-content:stretch !important}.align-self-md-auto{align-self:auto !important}.align-self-md-start{align-self:flex-start !important}.align-self-md-end{align-self:flex-end !important}.align-self-md-center{align-self:center !important}.align-self-md-baseline{align-self:baseline !important}.align-self-md-stretch{align-self:stretch !important}.order-md-first{order:-1 !important}.order-md-0{order:0 !important}.order-md-1{order:1 !important}.order-md-2{order:2 !important}.order-md-3{order:3 !important}.order-md-4{order:4 !important}.order-md-5{order:5 !important}.order-md-last{order:6 !important}.m-md-0{margin:0 !important}.m-md-1{margin:.25rem !important}.m-md-2{margin:.5rem !important}.m-md-3{margin:1rem !important}.m-md-4{margin:1.5rem !important}.m-md-5{margin:3rem !important}.m-md-auto{margin:auto !important}.mx-md-0{margin-right:0 !important;margin-left:0 !important}.mx-md-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-md-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-md-3{margin-right:1rem !important;margin-left:1rem !important}.mx-md-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-md-5{margin-right:3rem !important;margin-left:3rem !important}.mx-md-auto{margin-right:auto !important;margin-left:auto !important}.my-md-0{margin-top:0 !important;margin-bottom:0 !important}.my-md-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-md-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-md-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-md-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-md-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-md-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-md-0{margin-top:0 !important}.mt-md-1{margin-top:.25rem !important}.mt-md-2{margin-top:.5rem !important}.mt-md-3{margin-top:1rem !important}.mt-md-4{margin-top:1.5rem !important}.mt-md-5{margin-top:3rem !important}.mt-md-auto{margin-top:auto !important}.me-md-0{margin-right:0 !important}.me-md-1{margin-right:.25rem !important}.me-md-2{margin-right:.5rem !important}.me-md-3{margin-right:1rem !important}.me-md-4{margin-right:1.5rem !important}.me-md-5{margin-right:3rem !important}.me-md-auto{margin-right:auto !important}.mb-md-0{margin-bottom:0 !important}.mb-md-1{margin-bottom:.25rem !important}.mb-md-2{margin-bottom:.5rem !important}.mb-md-3{margin-bottom:1rem !important}.mb-md-4{margin-bottom:1.5rem !important}.mb-md-5{margin-bottom:3rem !important}.mb-md-auto{margin-bottom:auto !important}.ms-md-0{margin-left:0 !important}.ms-md-1{margin-left:.25rem !important}.ms-md-2{margin-left:.5rem !important}.ms-md-3{margin-left:1rem !important}.ms-md-4{margin-left:1.5rem !important}.ms-md-5{margin-left:3rem !important}.ms-md-auto{margin-left:auto !important}.p-md-0{padding:0 !important}.p-md-1{padding:.25rem !important}.p-md-2{padding:.5rem !important}.p-md-3{padding:1rem !important}.p-md-4{padding:1.5rem !important}.p-md-5{padding:3rem !important}.px-md-0{padding-right:0 !important;padding-left:0 !important}.px-md-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-md-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-md-3{padding-right:1rem !important;padding-left:1rem !important}.px-md-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-md-5{padding-right:3rem !important;padding-left:3rem !important}.py-md-0{padding-top:0 !important;padding-bottom:0 !important}.py-md-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-md-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-md-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-md-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-md-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-md-0{padding-top:0 !important}.pt-md-1{padding-top:.25rem !important}.pt-md-2{padding-top:.5rem !important}.pt-md-3{padding-top:1rem !important}.pt-md-4{padding-top:1.5rem !important}.pt-md-5{padding-top:3rem !important}.pe-md-0{padding-right:0 !important}.pe-md-1{padding-right:.25rem !important}.pe-md-2{padding-right:.5rem !important}.pe-md-3{padding-right:1rem !important}.pe-md-4{padding-right:1.5rem !important}.pe-md-5{padding-right:3rem !important}.pb-md-0{padding-bottom:0 !important}.pb-md-1{padding-bottom:.25rem !important}.pb-md-2{padding-bottom:.5rem !important}.pb-md-3{padding-bottom:1rem !important}.pb-md-4{padding-bottom:1.5rem !important}.pb-md-5{padding-bottom:3rem !important}.ps-md-0{padding-left:0 !important}.ps-md-1{padding-left:.25rem !important}.ps-md-2{padding-left:.5rem !important}.ps-md-3{padding-left:1rem !important}.ps-md-4{padding-left:1.5rem !important}.ps-md-5{padding-left:3rem !important}.gap-md-0{gap:0 !important}.gap-md-1{gap:.25rem !important}.gap-md-2{gap:.5rem !important}.gap-md-3{gap:1rem !important}.gap-md-4{gap:1.5rem !important}.gap-md-5{gap:3rem !important}.row-gap-md-0{row-gap:0 !important}.row-gap-md-1{row-gap:.25rem !important}.row-gap-md-2{row-gap:.5rem !important}.row-gap-md-3{row-gap:1rem !important}.row-gap-md-4{row-gap:1.5rem !important}.row-gap-md-5{row-gap:3rem !important}.column-gap-md-0{column-gap:0 !important}.column-gap-md-1{column-gap:.25rem !important}.column-gap-md-2{column-gap:.5rem !important}.column-gap-md-3{column-gap:1rem !important}.column-gap-md-4{column-gap:1.5rem !important}.column-gap-md-5{column-gap:3rem !important}.text-md-start{text-align:left !important}.text-md-end{text-align:right !important}.text-md-center{text-align:center !important}}@media(min-width: 992px){.float-lg-start{float:left !important}.float-lg-end{float:right !important}.float-lg-none{float:none !important}.object-fit-lg-contain{object-fit:contain !important}.object-fit-lg-cover{object-fit:cover !important}.object-fit-lg-fill{object-fit:fill !important}.object-fit-lg-scale{object-fit:scale-down !important}.object-fit-lg-none{object-fit:none !important}.d-lg-inline{display:inline !important}.d-lg-inline-block{display:inline-block !important}.d-lg-block{display:block !important}.d-lg-grid{display:grid !important}.d-lg-inline-grid{display:inline-grid !important}.d-lg-table{display:table !important}.d-lg-table-row{display:table-row !important}.d-lg-table-cell{display:table-cell !important}.d-lg-flex{display:flex !important}.d-lg-inline-flex{display:inline-flex !important}.d-lg-none{display:none !important}.flex-lg-fill{flex:1 1 auto !important}.flex-lg-row{flex-direction:row !important}.flex-lg-column{flex-direction:column !important}.flex-lg-row-reverse{flex-direction:row-reverse !important}.flex-lg-column-reverse{flex-direction:column-reverse !important}.flex-lg-grow-0{flex-grow:0 !important}.flex-lg-grow-1{flex-grow:1 !important}.flex-lg-shrink-0{flex-shrink:0 !important}.flex-lg-shrink-1{flex-shrink:1 !important}.flex-lg-wrap{flex-wrap:wrap !important}.flex-lg-nowrap{flex-wrap:nowrap !important}.flex-lg-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-lg-start{justify-content:flex-start !important}.justify-content-lg-end{justify-content:flex-end !important}.justify-content-lg-center{justify-content:center !important}.justify-content-lg-between{justify-content:space-between !important}.justify-content-lg-around{justify-content:space-around !important}.justify-content-lg-evenly{justify-content:space-evenly !important}.align-items-lg-start{align-items:flex-start !important}.align-items-lg-end{align-items:flex-end !important}.align-items-lg-center{align-items:center !important}.align-items-lg-baseline{align-items:baseline !important}.align-items-lg-stretch{align-items:stretch !important}.align-content-lg-start{align-content:flex-start !important}.align-content-lg-end{align-content:flex-end !important}.align-content-lg-center{align-content:center !important}.align-content-lg-between{align-content:space-between !important}.align-content-lg-around{align-content:space-around !important}.align-content-lg-stretch{align-content:stretch !important}.align-self-lg-auto{align-self:auto !important}.align-self-lg-start{align-self:flex-start !important}.align-self-lg-end{align-self:flex-end !important}.align-self-lg-center{align-self:center !important}.align-self-lg-baseline{align-self:baseline !important}.align-self-lg-stretch{align-self:stretch !important}.order-lg-first{order:-1 !important}.order-lg-0{order:0 !important}.order-lg-1{order:1 !important}.order-lg-2{order:2 !important}.order-lg-3{order:3 !important}.order-lg-4{order:4 !important}.order-lg-5{order:5 !important}.order-lg-last{order:6 !important}.m-lg-0{margin:0 !important}.m-lg-1{margin:.25rem !important}.m-lg-2{margin:.5rem !important}.m-lg-3{margin:1rem !important}.m-lg-4{margin:1.5rem !important}.m-lg-5{margin:3rem !important}.m-lg-auto{margin:auto !important}.mx-lg-0{margin-right:0 !important;margin-left:0 !important}.mx-lg-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-lg-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-lg-3{margin-right:1rem !important;margin-left:1rem !important}.mx-lg-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-lg-5{margin-right:3rem !important;margin-left:3rem !important}.mx-lg-auto{margin-right:auto !important;margin-left:auto !important}.my-lg-0{margin-top:0 !important;margin-bottom:0 !important}.my-lg-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-lg-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-lg-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-lg-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-lg-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-lg-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-lg-0{margin-top:0 !important}.mt-lg-1{margin-top:.25rem !important}.mt-lg-2{margin-top:.5rem !important}.mt-lg-3{margin-top:1rem !important}.mt-lg-4{margin-top:1.5rem !important}.mt-lg-5{margin-top:3rem !important}.mt-lg-auto{margin-top:auto !important}.me-lg-0{margin-right:0 !important}.me-lg-1{margin-right:.25rem !important}.me-lg-2{margin-right:.5rem !important}.me-lg-3{margin-right:1rem !important}.me-lg-4{margin-right:1.5rem !important}.me-lg-5{margin-right:3rem !important}.me-lg-auto{margin-right:auto !important}.mb-lg-0{margin-bottom:0 !important}.mb-lg-1{margin-bottom:.25rem !important}.mb-lg-2{margin-bottom:.5rem !important}.mb-lg-3{margin-bottom:1rem !important}.mb-lg-4{margin-bottom:1.5rem !important}.mb-lg-5{margin-bottom:3rem !important}.mb-lg-auto{margin-bottom:auto !important}.ms-lg-0{margin-left:0 !important}.ms-lg-1{margin-left:.25rem !important}.ms-lg-2{margin-left:.5rem !important}.ms-lg-3{margin-left:1rem !important}.ms-lg-4{margin-left:1.5rem !important}.ms-lg-5{margin-left:3rem !important}.ms-lg-auto{margin-left:auto !important}.p-lg-0{padding:0 !important}.p-lg-1{padding:.25rem !important}.p-lg-2{padding:.5rem !important}.p-lg-3{padding:1rem !important}.p-lg-4{padding:1.5rem !important}.p-lg-5{padding:3rem !important}.px-lg-0{padding-right:0 !important;padding-left:0 !important}.px-lg-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-lg-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-lg-3{padding-right:1rem !important;padding-left:1rem !important}.px-lg-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-lg-5{padding-right:3rem !important;padding-left:3rem !important}.py-lg-0{padding-top:0 !important;padding-bottom:0 !important}.py-lg-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-lg-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-lg-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-lg-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-lg-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-lg-0{padding-top:0 !important}.pt-lg-1{padding-top:.25rem !important}.pt-lg-2{padding-top:.5rem !important}.pt-lg-3{padding-top:1rem !important}.pt-lg-4{padding-top:1.5rem !important}.pt-lg-5{padding-top:3rem !important}.pe-lg-0{padding-right:0 !important}.pe-lg-1{padding-right:.25rem !important}.pe-lg-2{padding-right:.5rem !important}.pe-lg-3{padding-right:1rem !important}.pe-lg-4{padding-right:1.5rem !important}.pe-lg-5{padding-right:3rem !important}.pb-lg-0{padding-bottom:0 !important}.pb-lg-1{padding-bottom:.25rem !important}.pb-lg-2{padding-bottom:.5rem !important}.pb-lg-3{padding-bottom:1rem !important}.pb-lg-4{padding-bottom:1.5rem !important}.pb-lg-5{padding-bottom:3rem !important}.ps-lg-0{padding-left:0 !important}.ps-lg-1{padding-left:.25rem !important}.ps-lg-2{padding-left:.5rem !important}.ps-lg-3{padding-left:1rem !important}.ps-lg-4{padding-left:1.5rem !important}.ps-lg-5{padding-left:3rem !important}.gap-lg-0{gap:0 !important}.gap-lg-1{gap:.25rem !important}.gap-lg-2{gap:.5rem !important}.gap-lg-3{gap:1rem !important}.gap-lg-4{gap:1.5rem !important}.gap-lg-5{gap:3rem !important}.row-gap-lg-0{row-gap:0 !important}.row-gap-lg-1{row-gap:.25rem !important}.row-gap-lg-2{row-gap:.5rem !important}.row-gap-lg-3{row-gap:1rem !important}.row-gap-lg-4{row-gap:1.5rem !important}.row-gap-lg-5{row-gap:3rem !important}.column-gap-lg-0{column-gap:0 !important}.column-gap-lg-1{column-gap:.25rem !important}.column-gap-lg-2{column-gap:.5rem !important}.column-gap-lg-3{column-gap:1rem !important}.column-gap-lg-4{column-gap:1.5rem !important}.column-gap-lg-5{column-gap:3rem !important}.text-lg-start{text-align:left !important}.text-lg-end{text-align:right !important}.text-lg-center{text-align:center !important}}@media(min-width: 1200px){.float-xl-start{float:left !important}.float-xl-end{float:right !important}.float-xl-none{float:none !important}.object-fit-xl-contain{object-fit:contain !important}.object-fit-xl-cover{object-fit:cover !important}.object-fit-xl-fill{object-fit:fill !important}.object-fit-xl-scale{object-fit:scale-down !important}.object-fit-xl-none{object-fit:none !important}.d-xl-inline{display:inline !important}.d-xl-inline-block{display:inline-block !important}.d-xl-block{display:block !important}.d-xl-grid{display:grid !important}.d-xl-inline-grid{display:inline-grid !important}.d-xl-table{display:table !important}.d-xl-table-row{display:table-row !important}.d-xl-table-cell{display:table-cell !important}.d-xl-flex{display:flex !important}.d-xl-inline-flex{display:inline-flex !important}.d-xl-none{display:none !important}.flex-xl-fill{flex:1 1 auto !important}.flex-xl-row{flex-direction:row !important}.flex-xl-column{flex-direction:column !important}.flex-xl-row-reverse{flex-direction:row-reverse !important}.flex-xl-column-reverse{flex-direction:column-reverse !important}.flex-xl-grow-0{flex-grow:0 !important}.flex-xl-grow-1{flex-grow:1 !important}.flex-xl-shrink-0{flex-shrink:0 !important}.flex-xl-shrink-1{flex-shrink:1 !important}.flex-xl-wrap{flex-wrap:wrap !important}.flex-xl-nowrap{flex-wrap:nowrap !important}.flex-xl-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-xl-start{justify-content:flex-start !important}.justify-content-xl-end{justify-content:flex-end !important}.justify-content-xl-center{justify-content:center !important}.justify-content-xl-between{justify-content:space-between !important}.justify-content-xl-around{justify-content:space-around !important}.justify-content-xl-evenly{justify-content:space-evenly !important}.align-items-xl-start{align-items:flex-start !important}.align-items-xl-end{align-items:flex-end !important}.align-items-xl-center{align-items:center !important}.align-items-xl-baseline{align-items:baseline !important}.align-items-xl-stretch{align-items:stretch !important}.align-content-xl-start{align-content:flex-start !important}.align-content-xl-end{align-content:flex-end !important}.align-content-xl-center{align-content:center !important}.align-content-xl-between{align-content:space-between !important}.align-content-xl-around{align-content:space-around !important}.align-content-xl-stretch{align-content:stretch !important}.align-self-xl-auto{align-self:auto !important}.align-self-xl-start{align-self:flex-start !important}.align-self-xl-end{align-self:flex-end !important}.align-self-xl-center{align-self:center !important}.align-self-xl-baseline{align-self:baseline !important}.align-self-xl-stretch{align-self:stretch !important}.order-xl-first{order:-1 !important}.order-xl-0{order:0 !important}.order-xl-1{order:1 !important}.order-xl-2{order:2 !important}.order-xl-3{order:3 !important}.order-xl-4{order:4 !important}.order-xl-5{order:5 !important}.order-xl-last{order:6 !important}.m-xl-0{margin:0 !important}.m-xl-1{margin:.25rem !important}.m-xl-2{margin:.5rem !important}.m-xl-3{margin:1rem !important}.m-xl-4{margin:1.5rem !important}.m-xl-5{margin:3rem !important}.m-xl-auto{margin:auto !important}.mx-xl-0{margin-right:0 !important;margin-left:0 !important}.mx-xl-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-xl-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-xl-3{margin-right:1rem !important;margin-left:1rem !important}.mx-xl-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-xl-5{margin-right:3rem !important;margin-left:3rem !important}.mx-xl-auto{margin-right:auto !important;margin-left:auto !important}.my-xl-0{margin-top:0 !important;margin-bottom:0 !important}.my-xl-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-xl-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-xl-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-xl-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-xl-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-xl-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-xl-0{margin-top:0 !important}.mt-xl-1{margin-top:.25rem !important}.mt-xl-2{margin-top:.5rem !important}.mt-xl-3{margin-top:1rem !important}.mt-xl-4{margin-top:1.5rem !important}.mt-xl-5{margin-top:3rem !important}.mt-xl-auto{margin-top:auto !important}.me-xl-0{margin-right:0 !important}.me-xl-1{margin-right:.25rem !important}.me-xl-2{margin-right:.5rem !important}.me-xl-3{margin-right:1rem !important}.me-xl-4{margin-right:1.5rem !important}.me-xl-5{margin-right:3rem !important}.me-xl-auto{margin-right:auto !important}.mb-xl-0{margin-bottom:0 !important}.mb-xl-1{margin-bottom:.25rem !important}.mb-xl-2{margin-bottom:.5rem !important}.mb-xl-3{margin-bottom:1rem !important}.mb-xl-4{margin-bottom:1.5rem !important}.mb-xl-5{margin-bottom:3rem !important}.mb-xl-auto{margin-bottom:auto !important}.ms-xl-0{margin-left:0 !important}.ms-xl-1{margin-left:.25rem !important}.ms-xl-2{margin-left:.5rem !important}.ms-xl-3{margin-left:1rem !important}.ms-xl-4{margin-left:1.5rem !important}.ms-xl-5{margin-left:3rem !important}.ms-xl-auto{margin-left:auto !important}.p-xl-0{padding:0 !important}.p-xl-1{padding:.25rem !important}.p-xl-2{padding:.5rem !important}.p-xl-3{padding:1rem !important}.p-xl-4{padding:1.5rem !important}.p-xl-5{padding:3rem !important}.px-xl-0{padding-right:0 !important;padding-left:0 !important}.px-xl-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-xl-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-xl-3{padding-right:1rem !important;padding-left:1rem !important}.px-xl-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-xl-5{padding-right:3rem !important;padding-left:3rem !important}.py-xl-0{padding-top:0 !important;padding-bottom:0 !important}.py-xl-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-xl-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-xl-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-xl-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-xl-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-xl-0{padding-top:0 !important}.pt-xl-1{padding-top:.25rem !important}.pt-xl-2{padding-top:.5rem !important}.pt-xl-3{padding-top:1rem !important}.pt-xl-4{padding-top:1.5rem !important}.pt-xl-5{padding-top:3rem !important}.pe-xl-0{padding-right:0 !important}.pe-xl-1{padding-right:.25rem !important}.pe-xl-2{padding-right:.5rem !important}.pe-xl-3{padding-right:1rem !important}.pe-xl-4{padding-right:1.5rem !important}.pe-xl-5{padding-right:3rem !important}.pb-xl-0{padding-bottom:0 !important}.pb-xl-1{padding-bottom:.25rem !important}.pb-xl-2{padding-bottom:.5rem !important}.pb-xl-3{padding-bottom:1rem !important}.pb-xl-4{padding-bottom:1.5rem !important}.pb-xl-5{padding-bottom:3rem !important}.ps-xl-0{padding-left:0 !important}.ps-xl-1{padding-left:.25rem !important}.ps-xl-2{padding-left:.5rem !important}.ps-xl-3{padding-left:1rem !important}.ps-xl-4{padding-left:1.5rem !important}.ps-xl-5{padding-left:3rem !important}.gap-xl-0{gap:0 !important}.gap-xl-1{gap:.25rem !important}.gap-xl-2{gap:.5rem !important}.gap-xl-3{gap:1rem !important}.gap-xl-4{gap:1.5rem !important}.gap-xl-5{gap:3rem !important}.row-gap-xl-0{row-gap:0 !important}.row-gap-xl-1{row-gap:.25rem !important}.row-gap-xl-2{row-gap:.5rem !important}.row-gap-xl-3{row-gap:1rem !important}.row-gap-xl-4{row-gap:1.5rem !important}.row-gap-xl-5{row-gap:3rem !important}.column-gap-xl-0{column-gap:0 !important}.column-gap-xl-1{column-gap:.25rem !important}.column-gap-xl-2{column-gap:.5rem !important}.column-gap-xl-3{column-gap:1rem !important}.column-gap-xl-4{column-gap:1.5rem !important}.column-gap-xl-5{column-gap:3rem !important}.text-xl-start{text-align:left !important}.text-xl-end{text-align:right !important}.text-xl-center{text-align:center !important}}@media(min-width: 1400px){.float-xxl-start{float:left !important}.float-xxl-end{float:right !important}.float-xxl-none{float:none !important}.object-fit-xxl-contain{object-fit:contain !important}.object-fit-xxl-cover{object-fit:cover !important}.object-fit-xxl-fill{object-fit:fill !important}.object-fit-xxl-scale{object-fit:scale-down !important}.object-fit-xxl-none{object-fit:none !important}.d-xxl-inline{display:inline !important}.d-xxl-inline-block{display:inline-block !important}.d-xxl-block{display:block !important}.d-xxl-grid{display:grid !important}.d-xxl-inline-grid{display:inline-grid !important}.d-xxl-table{display:table !important}.d-xxl-table-row{display:table-row !important}.d-xxl-table-cell{display:table-cell !important}.d-xxl-flex{display:flex !important}.d-xxl-inline-flex{display:inline-flex !important}.d-xxl-none{display:none !important}.flex-xxl-fill{flex:1 1 auto !important}.flex-xxl-row{flex-direction:row !important}.flex-xxl-column{flex-direction:column !important}.flex-xxl-row-reverse{flex-direction:row-reverse !important}.flex-xxl-column-reverse{flex-direction:column-reverse !important}.flex-xxl-grow-0{flex-grow:0 !important}.flex-xxl-grow-1{flex-grow:1 !important}.flex-xxl-shrink-0{flex-shrink:0 !important}.flex-xxl-shrink-1{flex-shrink:1 !important}.flex-xxl-wrap{flex-wrap:wrap !important}.flex-xxl-nowrap{flex-wrap:nowrap !important}.flex-xxl-wrap-reverse{flex-wrap:wrap-reverse !important}.justify-content-xxl-start{justify-content:flex-start !important}.justify-content-xxl-end{justify-content:flex-end !important}.justify-content-xxl-center{justify-content:center !important}.justify-content-xxl-between{justify-content:space-between !important}.justify-content-xxl-around{justify-content:space-around !important}.justify-content-xxl-evenly{justify-content:space-evenly !important}.align-items-xxl-start{align-items:flex-start !important}.align-items-xxl-end{align-items:flex-end !important}.align-items-xxl-center{align-items:center !important}.align-items-xxl-baseline{align-items:baseline !important}.align-items-xxl-stretch{align-items:stretch !important}.align-content-xxl-start{align-content:flex-start !important}.align-content-xxl-end{align-content:flex-end !important}.align-content-xxl-center{align-content:center !important}.align-content-xxl-between{align-content:space-between !important}.align-content-xxl-around{align-content:space-around !important}.align-content-xxl-stretch{align-content:stretch !important}.align-self-xxl-auto{align-self:auto !important}.align-self-xxl-start{align-self:flex-start !important}.align-self-xxl-end{align-self:flex-end !important}.align-self-xxl-center{align-self:center !important}.align-self-xxl-baseline{align-self:baseline !important}.align-self-xxl-stretch{align-self:stretch !important}.order-xxl-first{order:-1 !important}.order-xxl-0{order:0 !important}.order-xxl-1{order:1 !important}.order-xxl-2{order:2 !important}.order-xxl-3{order:3 !important}.order-xxl-4{order:4 !important}.order-xxl-5{order:5 !important}.order-xxl-last{order:6 !important}.m-xxl-0{margin:0 !important}.m-xxl-1{margin:.25rem !important}.m-xxl-2{margin:.5rem !important}.m-xxl-3{margin:1rem !important}.m-xxl-4{margin:1.5rem !important}.m-xxl-5{margin:3rem !important}.m-xxl-auto{margin:auto !important}.mx-xxl-0{margin-right:0 !important;margin-left:0 !important}.mx-xxl-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-xxl-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-xxl-3{margin-right:1rem !important;margin-left:1rem !important}.mx-xxl-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-xxl-5{margin-right:3rem !important;margin-left:3rem !important}.mx-xxl-auto{margin-right:auto !important;margin-left:auto !important}.my-xxl-0{margin-top:0 !important;margin-bottom:0 !important}.my-xxl-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-xxl-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-xxl-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-xxl-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-xxl-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-xxl-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-xxl-0{margin-top:0 !important}.mt-xxl-1{margin-top:.25rem !important}.mt-xxl-2{margin-top:.5rem !important}.mt-xxl-3{margin-top:1rem !important}.mt-xxl-4{margin-top:1.5rem !important}.mt-xxl-5{margin-top:3rem !important}.mt-xxl-auto{margin-top:auto !important}.me-xxl-0{margin-right:0 !important}.me-xxl-1{margin-right:.25rem !important}.me-xxl-2{margin-right:.5rem !important}.me-xxl-3{margin-right:1rem !important}.me-xxl-4{margin-right:1.5rem !important}.me-xxl-5{margin-right:3rem !important}.me-xxl-auto{margin-right:auto !important}.mb-xxl-0{margin-bottom:0 !important}.mb-xxl-1{margin-bottom:.25rem !important}.mb-xxl-2{margin-bottom:.5rem !important}.mb-xxl-3{margin-bottom:1rem !important}.mb-xxl-4{margin-bottom:1.5rem !important}.mb-xxl-5{margin-bottom:3rem !important}.mb-xxl-auto{margin-bottom:auto !important}.ms-xxl-0{margin-left:0 !important}.ms-xxl-1{margin-left:.25rem !important}.ms-xxl-2{margin-left:.5rem !important}.ms-xxl-3{margin-left:1rem !important}.ms-xxl-4{margin-left:1.5rem !important}.ms-xxl-5{margin-left:3rem !important}.ms-xxl-auto{margin-left:auto !important}.p-xxl-0{padding:0 !important}.p-xxl-1{padding:.25rem !important}.p-xxl-2{padding:.5rem !important}.p-xxl-3{padding:1rem !important}.p-xxl-4{padding:1.5rem !important}.p-xxl-5{padding:3rem !important}.px-xxl-0{padding-right:0 !important;padding-left:0 !important}.px-xxl-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-xxl-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-xxl-3{padding-right:1rem !important;padding-left:1rem !important}.px-xxl-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-xxl-5{padding-right:3rem !important;padding-left:3rem !important}.py-xxl-0{padding-top:0 !important;padding-bottom:0 !important}.py-xxl-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-xxl-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-xxl-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-xxl-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-xxl-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-xxl-0{padding-top:0 !important}.pt-xxl-1{padding-top:.25rem !important}.pt-xxl-2{padding-top:.5rem !important}.pt-xxl-3{padding-top:1rem !important}.pt-xxl-4{padding-top:1.5rem !important}.pt-xxl-5{padding-top:3rem !important}.pe-xxl-0{padding-right:0 !important}.pe-xxl-1{padding-right:.25rem !important}.pe-xxl-2{padding-right:.5rem !important}.pe-xxl-3{padding-right:1rem !important}.pe-xxl-4{padding-right:1.5rem !important}.pe-xxl-5{padding-right:3rem !important}.pb-xxl-0{padding-bottom:0 !important}.pb-xxl-1{padding-bottom:.25rem !important}.pb-xxl-2{padding-bottom:.5rem !important}.pb-xxl-3{padding-bottom:1rem !important}.pb-xxl-4{padding-bottom:1.5rem !important}.pb-xxl-5{padding-bottom:3rem !important}.ps-xxl-0{padding-left:0 !important}.ps-xxl-1{padding-left:.25rem !important}.ps-xxl-2{padding-left:.5rem !important}.ps-xxl-3{padding-left:1rem !important}.ps-xxl-4{padding-left:1.5rem !important}.ps-xxl-5{padding-left:3rem !important}.gap-xxl-0{gap:0 !important}.gap-xxl-1{gap:.25rem !important}.gap-xxl-2{gap:.5rem !important}.gap-xxl-3{gap:1rem !important}.gap-xxl-4{gap:1.5rem !important}.gap-xxl-5{gap:3rem !important}.row-gap-xxl-0{row-gap:0 !important}.row-gap-xxl-1{row-gap:.25rem !important}.row-gap-xxl-2{row-gap:.5rem !important}.row-gap-xxl-3{row-gap:1rem !important}.row-gap-xxl-4{row-gap:1.5rem !important}.row-gap-xxl-5{row-gap:3rem !important}.column-gap-xxl-0{column-gap:0 !important}.column-gap-xxl-1{column-gap:.25rem !important}.column-gap-xxl-2{column-gap:.5rem !important}.column-gap-xxl-3{column-gap:1rem !important}.column-gap-xxl-4{column-gap:1.5rem !important}.column-gap-xxl-5{column-gap:3rem !important}.text-xxl-start{text-align:left !important}.text-xxl-end{text-align:right !important}.text-xxl-center{text-align:center !important}}.bg-default{color:#fff}.bg-primary{color:#fff}.bg-secondary{color:#fff}.bg-success{color:#fff}.bg-info{color:#fff}.bg-warning{color:#fff}.bg-danger{color:#fff}.bg-light{color:#fff}.bg-dark{color:#fff}@media(min-width: 1200px){.fs-1{font-size:2rem !important}.fs-2{font-size:1.65rem !important}.fs-3{font-size:1.45rem !important}}@media print{.d-print-inline{display:inline !important}.d-print-inline-block{display:inline-block !important}.d-print-block{display:block !important}.d-print-grid{display:grid !important}.d-print-inline-grid{display:inline-grid !important}.d-print-table{display:table !important}.d-print-table-row{display:table-row !important}.d-print-table-cell{display:table-cell !important}.d-print-flex{display:flex !important}.d-print-inline-flex{display:inline-flex !important}.d-print-none{display:none !important}}:root{--bslib-spacer: 1rem;--bslib-mb-spacer: var(--bslib-spacer, 1rem)}.bslib-mb-spacing{margin-bottom:var(--bslib-mb-spacer)}.bslib-gap-spacing{gap:var(--bslib-mb-spacer)}.bslib-gap-spacing>.bslib-mb-spacing,.bslib-gap-spacing>.form-group,.bslib-gap-spacing>p,.bslib-gap-spacing>pre{margin-bottom:0}.html-fill-container>.html-fill-item.bslib-mb-spacing{margin-bottom:0}.tab-content>.tab-pane.html-fill-container{display:none}.tab-content>.active.html-fill-container{display:flex}.tab-content.html-fill-container{padding:0}.bg-blue{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-blue{--bslib-color-fg: #2780e3;color:var(--bslib-color-fg)}.bg-indigo{--bslib-color-bg: #6610f2;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-indigo{--bslib-color-fg: #6610f2;color:var(--bslib-color-fg)}.bg-purple{--bslib-color-bg: #613d7c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-purple{--bslib-color-fg: #613d7c;color:var(--bslib-color-fg)}.bg-pink{--bslib-color-bg: #e83e8c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-pink{--bslib-color-fg: #e83e8c;color:var(--bslib-color-fg)}.bg-red{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-red{--bslib-color-fg: #ff0039;color:var(--bslib-color-fg)}.bg-orange{--bslib-color-bg: #f0ad4e;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-orange{--bslib-color-fg: #f0ad4e;color:var(--bslib-color-fg)}.bg-yellow{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-yellow{--bslib-color-fg: #ff7518;color:var(--bslib-color-fg)}.bg-green{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-green{--bslib-color-fg: #3fb618;color:var(--bslib-color-fg)}.bg-teal{--bslib-color-bg: #20c997;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-teal{--bslib-color-fg: #20c997;color:var(--bslib-color-fg)}.bg-cyan{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-cyan{--bslib-color-fg: #9954bb;color:var(--bslib-color-fg)}.text-default{--bslib-color-fg: #343a40}.bg-default{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-primary{--bslib-color-fg: #2780e3}.bg-primary{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff}.text-secondary{--bslib-color-fg: #343a40}.bg-secondary{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-success{--bslib-color-fg: #3fb618}.bg-success{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff}.text-info{--bslib-color-fg: #9954bb}.bg-info{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff}.text-warning{--bslib-color-fg: #ff7518}.bg-warning{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff}.text-danger{--bslib-color-fg: #ff0039}.bg-danger{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff}.text-light{--bslib-color-fg: #525252}.bg-light{--bslib-color-bg: #525252;--bslib-color-fg: #fff}.text-dark{--bslib-color-fg: #343a40}.bg-dark{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.bg-gradient-blue-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4053e9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4053e9;color:#fff}.bg-gradient-blue-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3e65ba;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3e65ba;color:#fff}.bg-gradient-blue-pink{--bslib-color-fg: #fff;--bslib-color-bg: #7466c0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #7466c0;color:#fff}.bg-gradient-blue-red{--bslib-color-fg: #fff;--bslib-color-bg: #7d4d9f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #7d4d9f;color:#fff}.bg-gradient-blue-orange{--bslib-color-fg: #fff;--bslib-color-bg: #7792a7;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #7792a7;color:#fff}.bg-gradient-blue-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #7d7c92;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #7d7c92;color:#fff}.bg-gradient-blue-green{--bslib-color-fg: #fff;--bslib-color-bg: #319692;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #319692;color:#fff}.bg-gradient-blue-teal{--bslib-color-fg: #fff;--bslib-color-bg: #249dc5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #249dc5;color:#fff}.bg-gradient-blue-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #556ed3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #556ed3;color:#fff}.bg-gradient-indigo-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4d3dec;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4d3dec;color:#fff}.bg-gradient-indigo-purple{--bslib-color-fg: #fff;--bslib-color-bg: #6422c3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #6422c3;color:#fff}.bg-gradient-indigo-pink{--bslib-color-fg: #fff;--bslib-color-bg: #9a22c9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #9a22c9;color:#fff}.bg-gradient-indigo-red{--bslib-color-fg: #fff;--bslib-color-bg: #a30aa8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a30aa8;color:#fff}.bg-gradient-indigo-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9d4fb0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9d4fb0;color:#fff}.bg-gradient-indigo-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a3389b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a3389b;color:#fff}.bg-gradient-indigo-green{--bslib-color-fg: #fff;--bslib-color-bg: #56529b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #56529b;color:#fff}.bg-gradient-indigo-teal{--bslib-color-fg: #fff;--bslib-color-bg: #4a5ace;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #4a5ace;color:#fff}.bg-gradient-indigo-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #7a2bdc;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #7a2bdc;color:#fff}.bg-gradient-purple-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4a58a5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4a58a5;color:#fff}.bg-gradient-purple-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #632bab;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #632bab;color:#fff}.bg-gradient-purple-pink{--bslib-color-fg: #fff;--bslib-color-bg: #973d82;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #973d82;color:#fff}.bg-gradient-purple-red{--bslib-color-fg: #fff;--bslib-color-bg: #a02561;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a02561;color:#fff}.bg-gradient-purple-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9a6a6a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9a6a6a;color:#fff}.bg-gradient-purple-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a05354;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a05354;color:#fff}.bg-gradient-purple-green{--bslib-color-fg: #fff;--bslib-color-bg: #536d54;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #536d54;color:#fff}.bg-gradient-purple-teal{--bslib-color-fg: #fff;--bslib-color-bg: #477587;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #477587;color:#fff}.bg-gradient-purple-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #774695;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #774695;color:#fff}.bg-gradient-pink-blue{--bslib-color-fg: #fff;--bslib-color-bg: #9b58af;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #9b58af;color:#fff}.bg-gradient-pink-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b42cb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b42cb5;color:#fff}.bg-gradient-pink-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b23e86;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b23e86;color:#fff}.bg-gradient-pink-red{--bslib-color-fg: #fff;--bslib-color-bg: #f1256b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f1256b;color:#fff}.bg-gradient-pink-orange{--bslib-color-fg: #fff;--bslib-color-bg: #eb6a73;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #eb6a73;color:#fff}.bg-gradient-pink-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #f1545e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f1545e;color:#fff}.bg-gradient-pink-green{--bslib-color-fg: #fff;--bslib-color-bg: #a46e5e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a46e5e;color:#fff}.bg-gradient-pink-teal{--bslib-color-fg: #fff;--bslib-color-bg: #987690;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #987690;color:#fff}.bg-gradient-pink-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #c8479f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #c8479f;color:#fff}.bg-gradient-red-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a9337d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a9337d;color:#fff}.bg-gradient-red-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c20683;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c20683;color:#fff}.bg-gradient-red-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c01854;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c01854;color:#fff}.bg-gradient-red-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f6195a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f6195a;color:#fff}.bg-gradient-red-orange{--bslib-color-fg: #fff;--bslib-color-bg: #f94541;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f94541;color:#fff}.bg-gradient-red-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #ff2f2c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #ff2f2c;color:#fff}.bg-gradient-red-green{--bslib-color-fg: #fff;--bslib-color-bg: #b2492c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b2492c;color:#fff}.bg-gradient-red-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6505f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6505f;color:#fff}.bg-gradient-red-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d6226d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d6226d;color:#fff}.bg-gradient-orange-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a09b8a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a09b8a;color:#fff}.bg-gradient-orange-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b96e90;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b96e90;color:#fff}.bg-gradient-orange-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b78060;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b78060;color:#fff}.bg-gradient-orange-pink{--bslib-color-fg: #fff;--bslib-color-bg: #ed8167;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #ed8167;color:#fff}.bg-gradient-orange-red{--bslib-color-fg: #fff;--bslib-color-bg: #f66846;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f66846;color:#fff}.bg-gradient-orange-yellow{--bslib-color-fg: #000;--bslib-color-bg: #f69738;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f69738;color:#000}.bg-gradient-orange-green{--bslib-color-fg: #000;--bslib-color-bg: #a9b138;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a9b138;color:#000}.bg-gradient-orange-teal{--bslib-color-fg: #000;--bslib-color-bg: #9db86b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #9db86b;color:#000}.bg-gradient-orange-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #cd897a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #cd897a;color:#fff}.bg-gradient-yellow-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a97969;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a97969;color:#fff}.bg-gradient-yellow-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c24d6f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c24d6f;color:#fff}.bg-gradient-yellow-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c05f40;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c05f40;color:#fff}.bg-gradient-yellow-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f65f46;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f65f46;color:#fff}.bg-gradient-yellow-red{--bslib-color-fg: #fff;--bslib-color-bg: #ff4625;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #ff4625;color:#fff}.bg-gradient-yellow-orange{--bslib-color-fg: #000;--bslib-color-bg: #f98b2e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f98b2e;color:#000}.bg-gradient-yellow-green{--bslib-color-fg: #fff;--bslib-color-bg: #b28f18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b28f18;color:#fff}.bg-gradient-yellow-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6974b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6974b;color:#fff}.bg-gradient-yellow-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d66859;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d66859;color:#fff}.bg-gradient-green-blue{--bslib-color-fg: #fff;--bslib-color-bg: #35a069;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #35a069;color:#fff}.bg-gradient-green-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4f746f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4f746f;color:#fff}.bg-gradient-green-purple{--bslib-color-fg: #fff;--bslib-color-bg: #4d8640;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #4d8640;color:#fff}.bg-gradient-green-pink{--bslib-color-fg: #fff;--bslib-color-bg: #838646;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #838646;color:#fff}.bg-gradient-green-red{--bslib-color-fg: #fff;--bslib-color-bg: #8c6d25;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #8c6d25;color:#fff}.bg-gradient-green-orange{--bslib-color-fg: #000;--bslib-color-bg: #86b22e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #86b22e;color:#000}.bg-gradient-green-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #8c9c18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #8c9c18;color:#fff}.bg-gradient-green-teal{--bslib-color-fg: #000;--bslib-color-bg: #33be4b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #33be4b;color:#000}.bg-gradient-green-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #638f59;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #638f59;color:#fff}.bg-gradient-teal-blue{--bslib-color-fg: #fff;--bslib-color-bg: #23acb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #23acb5;color:#fff}.bg-gradient-teal-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #3c7fbb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #3c7fbb;color:#fff}.bg-gradient-teal-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3a918c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3a918c;color:#fff}.bg-gradient-teal-pink{--bslib-color-fg: #fff;--bslib-color-bg: #709193;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #709193;color:#fff}.bg-gradient-teal-red{--bslib-color-fg: #fff;--bslib-color-bg: #797971;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #797971;color:#fff}.bg-gradient-teal-orange{--bslib-color-fg: #000;--bslib-color-bg: #73be7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #73be7a;color:#000}.bg-gradient-teal-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #79a764;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #79a764;color:#fff}.bg-gradient-teal-green{--bslib-color-fg: #000;--bslib-color-bg: #2cc164;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #2cc164;color:#000}.bg-gradient-teal-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #509aa5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #509aa5;color:#fff}.bg-gradient-cyan-blue{--bslib-color-fg: #fff;--bslib-color-bg: #6b66cb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #6b66cb;color:#fff}.bg-gradient-cyan-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #8539d1;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #8539d1;color:#fff}.bg-gradient-cyan-purple{--bslib-color-fg: #fff;--bslib-color-bg: #834ba2;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #834ba2;color:#fff}.bg-gradient-cyan-pink{--bslib-color-fg: #fff;--bslib-color-bg: #b94ba8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #b94ba8;color:#fff}.bg-gradient-cyan-red{--bslib-color-fg: #fff;--bslib-color-bg: #c23287;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #c23287;color:#fff}.bg-gradient-cyan-orange{--bslib-color-fg: #fff;--bslib-color-bg: #bc788f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #bc788f;color:#fff}.bg-gradient-cyan-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #c2617a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #c2617a;color:#fff}.bg-gradient-cyan-green{--bslib-color-fg: #fff;--bslib-color-bg: #757b7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #757b7a;color:#fff}.bg-gradient-cyan-teal{--bslib-color-fg: #fff;--bslib-color-bg: #6983ad;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #6983ad;color:#fff}.bg-blue{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-blue{--bslib-color-fg: #2780e3;color:var(--bslib-color-fg)}.bg-indigo{--bslib-color-bg: #6610f2;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-indigo{--bslib-color-fg: #6610f2;color:var(--bslib-color-fg)}.bg-purple{--bslib-color-bg: #613d7c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-purple{--bslib-color-fg: #613d7c;color:var(--bslib-color-fg)}.bg-pink{--bslib-color-bg: #e83e8c;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-pink{--bslib-color-fg: #e83e8c;color:var(--bslib-color-fg)}.bg-red{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-red{--bslib-color-fg: #ff0039;color:var(--bslib-color-fg)}.bg-orange{--bslib-color-bg: #f0ad4e;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-orange{--bslib-color-fg: #f0ad4e;color:var(--bslib-color-fg)}.bg-yellow{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-yellow{--bslib-color-fg: #ff7518;color:var(--bslib-color-fg)}.bg-green{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-green{--bslib-color-fg: #3fb618;color:var(--bslib-color-fg)}.bg-teal{--bslib-color-bg: #20c997;--bslib-color-fg: #000;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-teal{--bslib-color-fg: #20c997;color:var(--bslib-color-fg)}.bg-cyan{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff;background-color:var(--bslib-color-bg);color:var(--bslib-color-fg)}.text-cyan{--bslib-color-fg: #9954bb;color:var(--bslib-color-fg)}.text-default{--bslib-color-fg: #343a40}.bg-default{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-primary{--bslib-color-fg: #2780e3}.bg-primary{--bslib-color-bg: #2780e3;--bslib-color-fg: #fff}.text-secondary{--bslib-color-fg: #343a40}.bg-secondary{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.text-success{--bslib-color-fg: #3fb618}.bg-success{--bslib-color-bg: #3fb618;--bslib-color-fg: #fff}.text-info{--bslib-color-fg: #9954bb}.bg-info{--bslib-color-bg: #9954bb;--bslib-color-fg: #fff}.text-warning{--bslib-color-fg: #ff7518}.bg-warning{--bslib-color-bg: #ff7518;--bslib-color-fg: #fff}.text-danger{--bslib-color-fg: #ff0039}.bg-danger{--bslib-color-bg: #ff0039;--bslib-color-fg: #fff}.text-light{--bslib-color-fg: #525252}.bg-light{--bslib-color-bg: #525252;--bslib-color-fg: #fff}.text-dark{--bslib-color-fg: #343a40}.bg-dark{--bslib-color-bg: #343a40;--bslib-color-fg: #fff}.bg-gradient-blue-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4053e9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4053e9;color:#fff}.bg-gradient-blue-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3e65ba;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3e65ba;color:#fff}.bg-gradient-blue-pink{--bslib-color-fg: #fff;--bslib-color-bg: #7466c0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #7466c0;color:#fff}.bg-gradient-blue-red{--bslib-color-fg: #fff;--bslib-color-bg: #7d4d9f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #7d4d9f;color:#fff}.bg-gradient-blue-orange{--bslib-color-fg: #fff;--bslib-color-bg: #7792a7;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #7792a7;color:#fff}.bg-gradient-blue-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #7d7c92;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #7d7c92;color:#fff}.bg-gradient-blue-green{--bslib-color-fg: #fff;--bslib-color-bg: #319692;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #319692;color:#fff}.bg-gradient-blue-teal{--bslib-color-fg: #fff;--bslib-color-bg: #249dc5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #249dc5;color:#fff}.bg-gradient-blue-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #556ed3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #2780e3 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #556ed3;color:#fff}.bg-gradient-indigo-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4d3dec;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4d3dec;color:#fff}.bg-gradient-indigo-purple{--bslib-color-fg: #fff;--bslib-color-bg: #6422c3;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #6422c3;color:#fff}.bg-gradient-indigo-pink{--bslib-color-fg: #fff;--bslib-color-bg: #9a22c9;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #9a22c9;color:#fff}.bg-gradient-indigo-red{--bslib-color-fg: #fff;--bslib-color-bg: #a30aa8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a30aa8;color:#fff}.bg-gradient-indigo-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9d4fb0;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9d4fb0;color:#fff}.bg-gradient-indigo-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a3389b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a3389b;color:#fff}.bg-gradient-indigo-green{--bslib-color-fg: #fff;--bslib-color-bg: #56529b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #56529b;color:#fff}.bg-gradient-indigo-teal{--bslib-color-fg: #fff;--bslib-color-bg: #4a5ace;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #4a5ace;color:#fff}.bg-gradient-indigo-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #7a2bdc;background:linear-gradient(var(--bg-gradient-deg, 140deg), #6610f2 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #7a2bdc;color:#fff}.bg-gradient-purple-blue{--bslib-color-fg: #fff;--bslib-color-bg: #4a58a5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #4a58a5;color:#fff}.bg-gradient-purple-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #632bab;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #632bab;color:#fff}.bg-gradient-purple-pink{--bslib-color-fg: #fff;--bslib-color-bg: #973d82;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #973d82;color:#fff}.bg-gradient-purple-red{--bslib-color-fg: #fff;--bslib-color-bg: #a02561;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #a02561;color:#fff}.bg-gradient-purple-orange{--bslib-color-fg: #fff;--bslib-color-bg: #9a6a6a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #9a6a6a;color:#fff}.bg-gradient-purple-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #a05354;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #a05354;color:#fff}.bg-gradient-purple-green{--bslib-color-fg: #fff;--bslib-color-bg: #536d54;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #536d54;color:#fff}.bg-gradient-purple-teal{--bslib-color-fg: #fff;--bslib-color-bg: #477587;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #477587;color:#fff}.bg-gradient-purple-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #774695;background:linear-gradient(var(--bg-gradient-deg, 140deg), #613d7c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #774695;color:#fff}.bg-gradient-pink-blue{--bslib-color-fg: #fff;--bslib-color-bg: #9b58af;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #9b58af;color:#fff}.bg-gradient-pink-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b42cb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b42cb5;color:#fff}.bg-gradient-pink-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b23e86;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b23e86;color:#fff}.bg-gradient-pink-red{--bslib-color-fg: #fff;--bslib-color-bg: #f1256b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f1256b;color:#fff}.bg-gradient-pink-orange{--bslib-color-fg: #fff;--bslib-color-bg: #eb6a73;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #eb6a73;color:#fff}.bg-gradient-pink-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #f1545e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f1545e;color:#fff}.bg-gradient-pink-green{--bslib-color-fg: #fff;--bslib-color-bg: #a46e5e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a46e5e;color:#fff}.bg-gradient-pink-teal{--bslib-color-fg: #fff;--bslib-color-bg: #987690;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #987690;color:#fff}.bg-gradient-pink-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #c8479f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #e83e8c var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #c8479f;color:#fff}.bg-gradient-red-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a9337d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a9337d;color:#fff}.bg-gradient-red-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c20683;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c20683;color:#fff}.bg-gradient-red-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c01854;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c01854;color:#fff}.bg-gradient-red-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f6195a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f6195a;color:#fff}.bg-gradient-red-orange{--bslib-color-fg: #fff;--bslib-color-bg: #f94541;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f94541;color:#fff}.bg-gradient-red-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #ff2f2c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #ff2f2c;color:#fff}.bg-gradient-red-green{--bslib-color-fg: #fff;--bslib-color-bg: #b2492c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b2492c;color:#fff}.bg-gradient-red-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6505f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6505f;color:#fff}.bg-gradient-red-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d6226d;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff0039 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d6226d;color:#fff}.bg-gradient-orange-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a09b8a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a09b8a;color:#fff}.bg-gradient-orange-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #b96e90;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #b96e90;color:#fff}.bg-gradient-orange-purple{--bslib-color-fg: #fff;--bslib-color-bg: #b78060;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #b78060;color:#fff}.bg-gradient-orange-pink{--bslib-color-fg: #fff;--bslib-color-bg: #ed8167;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #ed8167;color:#fff}.bg-gradient-orange-red{--bslib-color-fg: #fff;--bslib-color-bg: #f66846;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #f66846;color:#fff}.bg-gradient-orange-yellow{--bslib-color-fg: #000;--bslib-color-bg: #f69738;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #f69738;color:#000}.bg-gradient-orange-green{--bslib-color-fg: #000;--bslib-color-bg: #a9b138;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #a9b138;color:#000}.bg-gradient-orange-teal{--bslib-color-fg: #000;--bslib-color-bg: #9db86b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #9db86b;color:#000}.bg-gradient-orange-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #cd897a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #f0ad4e var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #cd897a;color:#fff}.bg-gradient-yellow-blue{--bslib-color-fg: #fff;--bslib-color-bg: #a97969;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #a97969;color:#fff}.bg-gradient-yellow-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #c24d6f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #c24d6f;color:#fff}.bg-gradient-yellow-purple{--bslib-color-fg: #fff;--bslib-color-bg: #c05f40;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #c05f40;color:#fff}.bg-gradient-yellow-pink{--bslib-color-fg: #fff;--bslib-color-bg: #f65f46;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #f65f46;color:#fff}.bg-gradient-yellow-red{--bslib-color-fg: #fff;--bslib-color-bg: #ff4625;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #ff4625;color:#fff}.bg-gradient-yellow-orange{--bslib-color-fg: #000;--bslib-color-bg: #f98b2e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #f98b2e;color:#000}.bg-gradient-yellow-green{--bslib-color-fg: #fff;--bslib-color-bg: #b28f18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #b28f18;color:#fff}.bg-gradient-yellow-teal{--bslib-color-fg: #fff;--bslib-color-bg: #a6974b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #a6974b;color:#fff}.bg-gradient-yellow-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #d66859;background:linear-gradient(var(--bg-gradient-deg, 140deg), #ff7518 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #d66859;color:#fff}.bg-gradient-green-blue{--bslib-color-fg: #fff;--bslib-color-bg: #35a069;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #35a069;color:#fff}.bg-gradient-green-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #4f746f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #4f746f;color:#fff}.bg-gradient-green-purple{--bslib-color-fg: #fff;--bslib-color-bg: #4d8640;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #4d8640;color:#fff}.bg-gradient-green-pink{--bslib-color-fg: #fff;--bslib-color-bg: #838646;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #838646;color:#fff}.bg-gradient-green-red{--bslib-color-fg: #fff;--bslib-color-bg: #8c6d25;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #8c6d25;color:#fff}.bg-gradient-green-orange{--bslib-color-fg: #000;--bslib-color-bg: #86b22e;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #86b22e;color:#000}.bg-gradient-green-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #8c9c18;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #8c9c18;color:#fff}.bg-gradient-green-teal{--bslib-color-fg: #000;--bslib-color-bg: #33be4b;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #33be4b;color:#000}.bg-gradient-green-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #638f59;background:linear-gradient(var(--bg-gradient-deg, 140deg), #3fb618 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #638f59;color:#fff}.bg-gradient-teal-blue{--bslib-color-fg: #fff;--bslib-color-bg: #23acb5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #23acb5;color:#fff}.bg-gradient-teal-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #3c7fbb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #3c7fbb;color:#fff}.bg-gradient-teal-purple{--bslib-color-fg: #fff;--bslib-color-bg: #3a918c;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #3a918c;color:#fff}.bg-gradient-teal-pink{--bslib-color-fg: #fff;--bslib-color-bg: #709193;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #709193;color:#fff}.bg-gradient-teal-red{--bslib-color-fg: #fff;--bslib-color-bg: #797971;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #797971;color:#fff}.bg-gradient-teal-orange{--bslib-color-fg: #000;--bslib-color-bg: #73be7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #73be7a;color:#000}.bg-gradient-teal-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #79a764;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #79a764;color:#fff}.bg-gradient-teal-green{--bslib-color-fg: #000;--bslib-color-bg: #2cc164;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #2cc164;color:#000}.bg-gradient-teal-cyan{--bslib-color-fg: #fff;--bslib-color-bg: #509aa5;background:linear-gradient(var(--bg-gradient-deg, 140deg), #20c997 var(--bg-gradient-start, 36%), #9954bb var(--bg-gradient-end, 180%)) #509aa5;color:#fff}.bg-gradient-cyan-blue{--bslib-color-fg: #fff;--bslib-color-bg: #6b66cb;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #2780e3 var(--bg-gradient-end, 180%)) #6b66cb;color:#fff}.bg-gradient-cyan-indigo{--bslib-color-fg: #fff;--bslib-color-bg: #8539d1;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #6610f2 var(--bg-gradient-end, 180%)) #8539d1;color:#fff}.bg-gradient-cyan-purple{--bslib-color-fg: #fff;--bslib-color-bg: #834ba2;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #613d7c var(--bg-gradient-end, 180%)) #834ba2;color:#fff}.bg-gradient-cyan-pink{--bslib-color-fg: #fff;--bslib-color-bg: #b94ba8;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #e83e8c var(--bg-gradient-end, 180%)) #b94ba8;color:#fff}.bg-gradient-cyan-red{--bslib-color-fg: #fff;--bslib-color-bg: #c23287;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff0039 var(--bg-gradient-end, 180%)) #c23287;color:#fff}.bg-gradient-cyan-orange{--bslib-color-fg: #fff;--bslib-color-bg: #bc788f;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #f0ad4e var(--bg-gradient-end, 180%)) #bc788f;color:#fff}.bg-gradient-cyan-yellow{--bslib-color-fg: #fff;--bslib-color-bg: #c2617a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #ff7518 var(--bg-gradient-end, 180%)) #c2617a;color:#fff}.bg-gradient-cyan-green{--bslib-color-fg: #fff;--bslib-color-bg: #757b7a;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #3fb618 var(--bg-gradient-end, 180%)) #757b7a;color:#fff}.bg-gradient-cyan-teal{--bslib-color-fg: #fff;--bslib-color-bg: #6983ad;background:linear-gradient(var(--bg-gradient-deg, 140deg), #9954bb var(--bg-gradient-start, 36%), #20c997 var(--bg-gradient-end, 180%)) #6983ad;color:#fff}.tab-content>.tab-pane.html-fill-container{display:none}.tab-content>.active.html-fill-container{display:flex}.tab-content.html-fill-container{padding:0}:root{--bslib-spacer: 1rem;--bslib-mb-spacer: var(--bslib-spacer, 1rem)}.bslib-mb-spacing{margin-bottom:var(--bslib-mb-spacer)}.bslib-gap-spacing{gap:var(--bslib-mb-spacer)}.bslib-gap-spacing>.bslib-mb-spacing,.bslib-gap-spacing>.form-group,.bslib-gap-spacing>p,.bslib-gap-spacing>pre{margin-bottom:0}.html-fill-container>.html-fill-item.bslib-mb-spacing{margin-bottom:0}.bslib-grid{display:grid !important;gap:var(--bslib-spacer, 1rem);height:var(--bslib-grid-height)}.bslib-grid.grid{grid-template-columns:repeat(var(--bs-columns, 12), minmax(0, 1fr));grid-template-rows:unset;grid-auto-rows:var(--bslib-grid--row-heights);--bslib-grid--row-heights--xs: unset;--bslib-grid--row-heights--sm: unset;--bslib-grid--row-heights--md: unset;--bslib-grid--row-heights--lg: unset;--bslib-grid--row-heights--xl: unset;--bslib-grid--row-heights--xxl: unset}.bslib-grid.grid.bslib-grid--row-heights--xs{--bslib-grid--row-heights: var(--bslib-grid--row-heights--xs)}@media(min-width: 576px){.bslib-grid.grid.bslib-grid--row-heights--sm{--bslib-grid--row-heights: var(--bslib-grid--row-heights--sm)}}@media(min-width: 768px){.bslib-grid.grid.bslib-grid--row-heights--md{--bslib-grid--row-heights: var(--bslib-grid--row-heights--md)}}@media(min-width: 992px){.bslib-grid.grid.bslib-grid--row-heights--lg{--bslib-grid--row-heights: var(--bslib-grid--row-heights--lg)}}@media(min-width: 1200px){.bslib-grid.grid.bslib-grid--row-heights--xl{--bslib-grid--row-heights: var(--bslib-grid--row-heights--xl)}}@media(min-width: 1400px){.bslib-grid.grid.bslib-grid--row-heights--xxl{--bslib-grid--row-heights: var(--bslib-grid--row-heights--xxl)}}.bslib-grid>*>.shiny-input-container{width:100%}.bslib-grid-item{grid-column:auto/span 1}@media(max-width: 767.98px){.bslib-grid-item{grid-column:1/-1}}@media(max-width: 575.98px){.bslib-grid{grid-template-columns:1fr !important;height:var(--bslib-grid-height-mobile)}.bslib-grid.grid{height:unset !important;grid-auto-rows:var(--bslib-grid--row-heights--xs, auto)}}.bslib-card{overflow:auto}.bslib-card .card-body+.card-body{padding-top:0}.bslib-card .card-body{overflow:auto}.bslib-card .card-body p{margin-top:0}.bslib-card .card-body p:last-child{margin-bottom:0}.bslib-card .card-body{max-height:var(--bslib-card-body-max-height, none)}.bslib-card[data-full-screen=true]>.card-body{max-height:var(--bslib-card-body-max-height-full-screen, none)}.bslib-card .card-header .form-group{margin-bottom:0}.bslib-card .card-header .selectize-control{margin-bottom:0}.bslib-card .card-header .selectize-control .item{margin-right:1.15rem}.bslib-card .card-footer{margin-top:auto}.bslib-card .bslib-navs-card-title{display:flex;flex-wrap:wrap;justify-content:space-between;align-items:center}.bslib-card .bslib-navs-card-title .nav{margin-left:auto}.bslib-card .bslib-sidebar-layout:not([data-bslib-sidebar-border=true]){border:none}.bslib-card .bslib-sidebar-layout:not([data-bslib-sidebar-border-radius=true]){border-top-left-radius:0;border-top-right-radius:0}[data-full-screen=true]{position:fixed;inset:3.5rem 1rem 1rem;height:auto !important;max-height:none !important;width:auto !important;z-index:1070}.bslib-full-screen-enter{display:none;position:absolute;bottom:var(--bslib-full-screen-enter-bottom, 0.2rem);right:var(--bslib-full-screen-enter-right, 0);top:var(--bslib-full-screen-enter-top);left:var(--bslib-full-screen-enter-left);color:var(--bslib-color-fg, var(--bs-card-color));background-color:var(--bslib-color-bg, var(--bs-card-bg, var(--bs-body-bg)));border:var(--bs-card-border-width) solid var(--bslib-color-fg, var(--bs-card-border-color));box-shadow:0 2px 4px rgba(0,0,0,.15);margin:.2rem .4rem;padding:.55rem !important;font-size:.8rem;cursor:pointer;opacity:.7;z-index:1070}.bslib-full-screen-enter:hover{opacity:1}.card[data-full-screen=false]:hover>*>.bslib-full-screen-enter{display:block}.bslib-has-full-screen .card:hover>*>.bslib-full-screen-enter{display:none}@media(max-width: 575.98px){.bslib-full-screen-enter{display:none !important}}.bslib-full-screen-exit{position:relative;top:1.35rem;font-size:.9rem;cursor:pointer;text-decoration:none;display:flex;float:right;margin-right:2.15rem;align-items:center;color:rgba(var(--bs-body-bg-rgb), 0.8)}.bslib-full-screen-exit:hover{color:rgba(var(--bs-body-bg-rgb), 1)}.bslib-full-screen-exit svg{margin-left:.5rem;font-size:1.5rem}#bslib-full-screen-overlay{position:fixed;inset:0;background-color:rgba(var(--bs-body-color-rgb), 0.6);backdrop-filter:blur(2px);-webkit-backdrop-filter:blur(2px);z-index:1069;animation:bslib-full-screen-overlay-enter 400ms cubic-bezier(0.6, 0.02, 0.65, 1) forwards}@keyframes bslib-full-screen-overlay-enter{0%{opacity:0}100%{opacity:1}}:root{--bslib-value-box-shadow: none;--bslib-value-box-border-width-auto-yes: var(--bslib-value-box-border-width-baseline);--bslib-value-box-border-width-auto-no: 0;--bslib-value-box-border-width-baseline: 1px}.bslib-value-box{border-width:var(--bslib-value-box-border-width-auto-no, var(--bslib-value-box-border-width-baseline));container-name:bslib-value-box;container-type:inline-size}.bslib-value-box.card{box-shadow:var(--bslib-value-box-shadow)}.bslib-value-box.border-auto{border-width:var(--bslib-value-box-border-width-auto-yes, var(--bslib-value-box-border-width-baseline))}.bslib-value-box.default{--bslib-value-box-bg-default: var(--bs-card-bg, #181818);--bslib-value-box-border-color-default: var(--bs-card-border-color, rgba(0, 0, 0, 0.175));color:var(--bslib-value-box-color);background-color:var(--bslib-value-box-bg, var(--bslib-value-box-bg-default));border-color:var(--bslib-value-box-border-color, var(--bslib-value-box-border-color-default))}.bslib-value-box .value-box-grid{display:grid;grid-template-areas:"left right";align-items:center;overflow:hidden}.bslib-value-box .value-box-showcase{height:100%;max-height:var(---bslib-value-box-showcase-max-h, 100%)}.bslib-value-box .value-box-showcase,.bslib-value-box .value-box-showcase>.html-fill-item{width:100%}.bslib-value-box[data-full-screen=true] .value-box-showcase{max-height:var(---bslib-value-box-showcase-max-h-fs, 100%)}@media screen and (min-width: 575.98px){@container bslib-value-box (max-width: 300px){.bslib-value-box:not(.showcase-bottom) .value-box-grid{grid-template-columns:1fr !important;grid-template-rows:auto auto;grid-template-areas:"top" "bottom"}.bslib-value-box:not(.showcase-bottom) .value-box-grid .value-box-showcase{grid-area:top !important}.bslib-value-box:not(.showcase-bottom) .value-box-grid .value-box-area{grid-area:bottom !important;justify-content:end}}}.bslib-value-box .value-box-area{justify-content:center;padding:1.5rem 1rem;font-size:.9rem;font-weight:500}.bslib-value-box .value-box-area *{margin-bottom:0;margin-top:0}.bslib-value-box .value-box-title{font-size:1rem;margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2}.bslib-value-box .value-box-title:empty::after{content:" "}.bslib-value-box .value-box-value{font-size:calc(1.29rem + 0.48vw);margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2}@media(min-width: 1200px){.bslib-value-box .value-box-value{font-size:1.65rem}}.bslib-value-box .value-box-value:empty::after{content:" "}.bslib-value-box .value-box-showcase{align-items:center;justify-content:center;margin-top:auto;margin-bottom:auto;padding:1rem}.bslib-value-box .value-box-showcase .bi,.bslib-value-box .value-box-showcase .fa,.bslib-value-box .value-box-showcase .fab,.bslib-value-box .value-box-showcase .fas,.bslib-value-box .value-box-showcase .far{opacity:.85;min-width:50px;max-width:125%}.bslib-value-box .value-box-showcase .bi,.bslib-value-box .value-box-showcase .fa,.bslib-value-box .value-box-showcase .fab,.bslib-value-box .value-box-showcase .fas,.bslib-value-box .value-box-showcase .far{font-size:4rem}.bslib-value-box.showcase-top-right .value-box-grid{grid-template-columns:1fr var(---bslib-value-box-showcase-w, 50%)}.bslib-value-box.showcase-top-right .value-box-grid .value-box-showcase{grid-area:right;margin-left:auto;align-self:start;align-items:end;padding-left:0;padding-bottom:0}.bslib-value-box.showcase-top-right .value-box-grid .value-box-area{grid-area:left;align-self:end}.bslib-value-box.showcase-top-right[data-full-screen=true] .value-box-grid{grid-template-columns:auto var(---bslib-value-box-showcase-w-fs, 1fr)}.bslib-value-box.showcase-top-right[data-full-screen=true] .value-box-grid>div{align-self:center}.bslib-value-box.showcase-top-right:not([data-full-screen=true]) .value-box-showcase{margin-top:0}@container bslib-value-box (max-width: 300px){.bslib-value-box.showcase-top-right:not([data-full-screen=true]) .value-box-grid .value-box-showcase{padding-left:1rem}}.bslib-value-box.showcase-left-center .value-box-grid{grid-template-columns:var(---bslib-value-box-showcase-w, 30%) auto}.bslib-value-box.showcase-left-center[data-full-screen=true] .value-box-grid{grid-template-columns:var(---bslib-value-box-showcase-w-fs, 1fr) auto}.bslib-value-box.showcase-left-center:not([data-fill-screen=true]) .value-box-grid .value-box-showcase{grid-area:left}.bslib-value-box.showcase-left-center:not([data-fill-screen=true]) .value-box-grid .value-box-area{grid-area:right}.bslib-value-box.showcase-bottom .value-box-grid{grid-template-columns:1fr;grid-template-rows:1fr var(---bslib-value-box-showcase-h, auto);grid-template-areas:"top" "bottom";overflow:hidden}.bslib-value-box.showcase-bottom .value-box-grid .value-box-showcase{grid-area:bottom;padding:0;margin:0}.bslib-value-box.showcase-bottom .value-box-grid .value-box-area{grid-area:top}.bslib-value-box.showcase-bottom[data-full-screen=true] .value-box-grid{grid-template-rows:1fr var(---bslib-value-box-showcase-h-fs, 2fr)}.bslib-value-box.showcase-bottom[data-full-screen=true] .value-box-grid .value-box-showcase{padding:1rem}[data-bs-theme=dark] .bslib-value-box{--bslib-value-box-shadow: 0 0.5rem 1rem rgb(0 0 0 / 50%)}:root{--bslib-page-sidebar-title-bg: #303030;--bslib-page-sidebar-title-color: #fff}.bslib-page-title{background-color:var(--bslib-page-sidebar-title-bg);color:var(--bslib-page-sidebar-title-color);font-size:1.25rem;font-weight:300;padding:var(--bslib-spacer, 1rem);padding-left:1.5rem;margin-bottom:0;border-bottom:1px solid #dee2e6}html{height:100%}.bslib-page-fill{width:100%;height:100%;margin:0;padding:var(--bslib-spacer, 1rem);gap:var(--bslib-spacer, 1rem)}@media(max-width: 575.98px){.bslib-page-fill{height:var(--bslib-page-fill-mobile-height, auto)}}@media(min-width: 576px){.nav:not(.nav-hidden){display:flex !important;display:-webkit-flex !important}.nav:not(.nav-hidden):not(.nav-stacked):not(.flex-column){float:none !important}.nav:not(.nav-hidden):not(.nav-stacked):not(.flex-column)>.bslib-nav-spacer{margin-left:auto !important}.nav:not(.nav-hidden):not(.nav-stacked):not(.flex-column)>.form-inline{margin-top:auto;margin-bottom:auto}.nav:not(.nav-hidden).nav-stacked{flex-direction:column;-webkit-flex-direction:column;height:100%}.nav:not(.nav-hidden).nav-stacked>.bslib-nav-spacer{margin-top:auto !important}}.accordion .accordion-header{font-size:calc(1.29rem + 0.48vw);margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2;color:var(--bs-heading-color);margin-bottom:0}@media(min-width: 1200px){.accordion .accordion-header{font-size:1.65rem}}.accordion .accordion-icon:not(:empty){margin-right:.75rem;display:flex}.accordion .accordion-button:not(.collapsed){box-shadow:none}.accordion .accordion-button:not(.collapsed):focus{box-shadow:var(--bs-accordion-btn-focus-box-shadow)}.bslib-sidebar-layout{--bslib-sidebar-transition-duration: 500ms;--bslib-sidebar-transition-easing-x: cubic-bezier(0.8, 0.78, 0.22, 1.07);--bslib-sidebar-border: var(--bs-card-border-width, 1px) solid var(--bs-card-border-color, rgba(0, 0, 0, 0.175));--bslib-sidebar-border-radius: var(--bs-border-radius);--bslib-sidebar-vert-border: var(--bs-card-border-width, 1px) solid var(--bs-card-border-color, rgba(0, 0, 0, 0.175));--bslib-sidebar-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.05);--bslib-sidebar-fg: var(--bs-emphasis-color, black);--bslib-sidebar-main-fg: var(--bs-card-color, var(--bs-body-color));--bslib-sidebar-main-bg: var(--bs-card-bg, var(--bs-body-bg));--bslib-sidebar-toggle-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.1);--bslib-sidebar-padding: calc(var(--bslib-spacer) * 1.5);--bslib-sidebar-icon-size: var(--bslib-spacer, 1rem);--bslib-sidebar-icon-button-size: calc(var(--bslib-sidebar-icon-size, 1rem) * 2);--bslib-sidebar-padding-icon: calc(var(--bslib-sidebar-icon-button-size, 2rem) * 1.5);--bslib-collapse-toggle-border-radius: var(--bs-border-radius, 0.25rem);--bslib-collapse-toggle-transform: 0deg;--bslib-sidebar-toggle-transition-easing: cubic-bezier(1, 0, 0, 1);--bslib-collapse-toggle-right-transform: 180deg;--bslib-sidebar-column-main: minmax(0, 1fr);display:grid !important;grid-template-columns:min(100% - var(--bslib-sidebar-icon-size),var(--bslib-sidebar-width, 250px)) var(--bslib-sidebar-column-main);position:relative;transition:grid-template-columns ease-in-out var(--bslib-sidebar-transition-duration);border:var(--bslib-sidebar-border);border-radius:var(--bslib-sidebar-border-radius)}@media(prefers-reduced-motion: reduce){.bslib-sidebar-layout{transition:none}}.bslib-sidebar-layout[data-bslib-sidebar-border=false]{border:none}.bslib-sidebar-layout[data-bslib-sidebar-border-radius=false]{border-radius:initial}.bslib-sidebar-layout>.main,.bslib-sidebar-layout>.sidebar{grid-row:1/2;border-radius:inherit;overflow:auto}.bslib-sidebar-layout>.main{grid-column:2/3;border-top-left-radius:0;border-bottom-left-radius:0;padding:var(--bslib-sidebar-padding);transition:padding var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration);color:var(--bslib-sidebar-main-fg);background-color:var(--bslib-sidebar-main-bg)}.bslib-sidebar-layout>.sidebar{grid-column:1/2;width:100%;height:100%;border-right:var(--bslib-sidebar-vert-border);border-top-right-radius:0;border-bottom-right-radius:0;color:var(--bslib-sidebar-fg);background-color:var(--bslib-sidebar-bg);backdrop-filter:blur(5px)}.bslib-sidebar-layout>.sidebar>.sidebar-content{display:flex;flex-direction:column;gap:var(--bslib-spacer, 1rem);padding:var(--bslib-sidebar-padding);padding-top:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout>.sidebar>.sidebar-content>:last-child:not(.sidebar-title){margin-bottom:0}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion{margin-left:calc(-1*var(--bslib-sidebar-padding));margin-right:calc(-1*var(--bslib-sidebar-padding))}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:last-child{margin-bottom:calc(-1*var(--bslib-sidebar-padding))}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:not(:last-child){margin-bottom:1rem}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion .accordion-body{display:flex;flex-direction:column}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:not(:first-child) .accordion-item:first-child{border-top:var(--bs-accordion-border-width) solid var(--bs-accordion-border-color)}.bslib-sidebar-layout>.sidebar>.sidebar-content>.accordion:not(:last-child) .accordion-item:last-child{border-bottom:var(--bs-accordion-border-width) solid var(--bs-accordion-border-color)}.bslib-sidebar-layout>.sidebar>.sidebar-content.has-accordion>.sidebar-title{border-bottom:none;padding-bottom:0}.bslib-sidebar-layout>.sidebar .shiny-input-container{width:100%}.bslib-sidebar-layout[data-bslib-sidebar-open=always]>.sidebar>.sidebar-content{padding-top:var(--bslib-sidebar-padding)}.bslib-sidebar-layout>.collapse-toggle{grid-row:1/2;grid-column:1/2;display:inline-flex;align-items:center;position:absolute;right:calc(var(--bslib-sidebar-icon-size));top:calc(var(--bslib-sidebar-icon-size, 1rem)/2);border:none;border-radius:var(--bslib-collapse-toggle-border-radius);height:var(--bslib-sidebar-icon-button-size, 2rem);width:var(--bslib-sidebar-icon-button-size, 2rem);display:flex;align-items:center;justify-content:center;padding:0;color:var(--bslib-sidebar-fg);background-color:unset;transition:color var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration),top var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration),right var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration),left var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration)}.bslib-sidebar-layout>.collapse-toggle:hover{background-color:var(--bslib-sidebar-toggle-bg)}.bslib-sidebar-layout>.collapse-toggle>.collapse-icon{opacity:.8;width:var(--bslib-sidebar-icon-size);height:var(--bslib-sidebar-icon-size);transform:rotateY(var(--bslib-collapse-toggle-transform));transition:transform var(--bslib-sidebar-toggle-transition-easing) var(--bslib-sidebar-transition-duration)}.bslib-sidebar-layout>.collapse-toggle:hover>.collapse-icon{opacity:1}.bslib-sidebar-layout .sidebar-title{font-size:1.25rem;line-height:1.25;margin-top:0;margin-bottom:1rem;padding-bottom:1rem;border-bottom:var(--bslib-sidebar-border)}.bslib-sidebar-layout.sidebar-right{grid-template-columns:var(--bslib-sidebar-column-main) min(100% - var(--bslib-sidebar-icon-size),var(--bslib-sidebar-width, 250px))}.bslib-sidebar-layout.sidebar-right>.main{grid-column:1/2;border-top-right-radius:0;border-bottom-right-radius:0;border-top-left-radius:inherit;border-bottom-left-radius:inherit}.bslib-sidebar-layout.sidebar-right>.sidebar{grid-column:2/3;border-right:none;border-left:var(--bslib-sidebar-vert-border);border-top-left-radius:0;border-bottom-left-radius:0}.bslib-sidebar-layout.sidebar-right>.collapse-toggle{grid-column:2/3;left:var(--bslib-sidebar-icon-size);right:unset;border:var(--bslib-collapse-toggle-border)}.bslib-sidebar-layout.sidebar-right>.collapse-toggle>.collapse-icon{transform:rotateY(var(--bslib-collapse-toggle-right-transform))}.bslib-sidebar-layout.sidebar-collapsed{--bslib-collapse-toggle-transform: 180deg;--bslib-collapse-toggle-right-transform: 0deg;--bslib-sidebar-vert-border: none;grid-template-columns:0 minmax(0, 1fr)}.bslib-sidebar-layout.sidebar-collapsed.sidebar-right{grid-template-columns:minmax(0, 1fr) 0}.bslib-sidebar-layout.sidebar-collapsed:not(.transitioning)>.sidebar>*{display:none}.bslib-sidebar-layout.sidebar-collapsed>.main{border-radius:inherit}.bslib-sidebar-layout.sidebar-collapsed:not(.sidebar-right)>.main{padding-left:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout.sidebar-collapsed.sidebar-right>.main{padding-right:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout.sidebar-collapsed>.collapse-toggle{color:var(--bslib-sidebar-main-fg);top:calc(var(--bslib-sidebar-overlap-counter, 0)*(var(--bslib-sidebar-icon-size) + var(--bslib-sidebar-padding)) + var(--bslib-sidebar-icon-size, 1rem)/2);right:calc(-2.5*var(--bslib-sidebar-icon-size) - var(--bs-card-border-width, 1px))}.bslib-sidebar-layout.sidebar-collapsed.sidebar-right>.collapse-toggle{left:calc(-2.5*var(--bslib-sidebar-icon-size) - var(--bs-card-border-width, 1px));right:unset}@media(min-width: 576px){.bslib-sidebar-layout.transitioning>.sidebar>.sidebar-content{display:none}}@media(max-width: 575.98px){.bslib-sidebar-layout[data-bslib-sidebar-open=desktop]{--bslib-sidebar-js-init-collapsed: true}.bslib-sidebar-layout>.sidebar,.bslib-sidebar-layout.sidebar-right>.sidebar{border:none}.bslib-sidebar-layout>.main,.bslib-sidebar-layout.sidebar-right>.main{grid-column:1/3}.bslib-sidebar-layout[data-bslib-sidebar-open=always]{display:block !important}.bslib-sidebar-layout[data-bslib-sidebar-open=always]>.sidebar{max-height:var(--bslib-sidebar-max-height-mobile);overflow-y:auto;border-top:var(--bslib-sidebar-vert-border)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]){grid-template-columns:100% 0}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]):not(.sidebar-collapsed)>.sidebar{z-index:1}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]):not(.sidebar-collapsed)>.collapse-toggle{z-index:1}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-right{grid-template-columns:0 100%}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-collapsed{grid-template-columns:0 100%}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-collapsed.sidebar-right{grid-template-columns:100% 0}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]):not(.sidebar-right)>.main{padding-left:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-right>.main{padding-right:var(--bslib-sidebar-padding-icon)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always])>.main{opacity:0;transition:opacity var(--bslib-sidebar-transition-easing-x) var(--bslib-sidebar-transition-duration)}.bslib-sidebar-layout:not([data-bslib-sidebar-open=always]).sidebar-collapsed>.main{opacity:1}}.navbar+.container-fluid:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-sm:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-md:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-lg:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-xl:has(>.tab-content>.tab-pane.active.html-fill-container),.navbar+.container-xxl:has(>.tab-content>.tab-pane.active.html-fill-container){padding-left:0;padding-right:0}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container,.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container{padding:var(--bslib-spacer, 1rem);gap:var(--bslib-spacer, 1rem)}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child),.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container:has(>.bslib-sidebar-layout:only-child){padding:0}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]),.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border=true]){border-left:none;border-right:none;border-bottom:none}.navbar+.container-fluid>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-sm>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-md>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-lg>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-xl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]),.navbar+.container-xxl>.tab-content>.tab-pane.active.html-fill-container>.bslib-sidebar-layout:only-child:not([data-bslib-sidebar-border-radius=true]){border-radius:0}.navbar+div>.bslib-sidebar-layout{border-top:var(--bslib-sidebar-border)}.html-fill-container{display:flex;flex-direction:column;min-height:0;min-width:0}.html-fill-container>.html-fill-item{flex:1 1 auto;min-height:0;min-width:0}.html-fill-container>:not(.html-fill-item){flex:0 0 auto}.quarto-container{min-height:calc(100vh - 132px)}body.hypothesis-enabled #quarto-header{margin-right:16px}footer.footer .nav-footer,#quarto-header>nav{padding-left:1em;padding-right:1em}footer.footer div.nav-footer p:first-child{margin-top:0}footer.footer div.nav-footer p:last-child{margin-bottom:0}#quarto-content>*{padding-top:14px}#quarto-content>#quarto-sidebar-glass{padding-top:0px}@media(max-width: 991.98px){#quarto-content>*{padding-top:0}#quarto-content .subtitle{padding-top:14px}#quarto-content section:first-of-type h2:first-of-type,#quarto-content section:first-of-type .h2:first-of-type{margin-top:1rem}}.headroom-target,header.headroom{will-change:transform;transition:position 200ms linear;transition:all 200ms linear}header.headroom--pinned{transform:translateY(0%)}header.headroom--unpinned{transform:translateY(-100%)}.navbar-container{width:100%}.navbar-brand{overflow:hidden;text-overflow:ellipsis}.navbar-brand-container{max-width:calc(100% - 115px);min-width:0;display:flex;align-items:center}@media(min-width: 992px){.navbar-brand-container{margin-right:1em}}.navbar-brand.navbar-brand-logo{margin-right:4px;display:inline-flex}.navbar-toggler{flex-basis:content;flex-shrink:0}.navbar .navbar-brand-container{order:2}.navbar .navbar-toggler{order:1}.navbar .navbar-container>.navbar-nav{order:20}.navbar .navbar-container>.navbar-brand-container{margin-left:0 !important;margin-right:0 !important}.navbar .navbar-collapse{order:20}.navbar #quarto-search{order:4;margin-left:auto}.navbar .navbar-toggler{margin-right:.5em}.navbar-collapse .quarto-navbar-tools{margin-left:.5em}.navbar-logo{max-height:24px;width:auto;padding-right:4px}nav .nav-item:not(.compact){padding-top:1px}nav .nav-link i,nav .dropdown-item i{padding-right:1px}.navbar-expand-lg .navbar-nav .nav-link{padding-left:.6rem;padding-right:.6rem}nav .nav-item.compact .nav-link{padding-left:.5rem;padding-right:.5rem;font-size:1.1rem}.navbar .quarto-navbar-tools{order:3}.navbar .quarto-navbar-tools div.dropdown{display:inline-block}.navbar .quarto-navbar-tools .quarto-navigation-tool{color:#bdbdbd}.navbar .quarto-navbar-tools .quarto-navigation-tool:hover{color:#aed1e4}.navbar-nav .dropdown-menu{min-width:220px;font-size:.9rem}.navbar .navbar-nav .nav-link.dropdown-toggle::after{opacity:.75;vertical-align:.175em}.navbar ul.dropdown-menu{padding-top:0;padding-bottom:0}.navbar .dropdown-header{text-transform:uppercase;font-size:.8rem;padding:0 .5rem}.navbar .dropdown-item{padding:.4rem .5rem}.navbar .dropdown-item>i.bi{margin-left:.1rem;margin-right:.25em}.sidebar #quarto-search{margin-top:-1px}.sidebar #quarto-search svg.aa-SubmitIcon{width:16px;height:16px}.sidebar-navigation a{color:inherit}.sidebar-title{margin-top:.25rem;padding-bottom:.5rem;font-size:1.3rem;line-height:1.6rem;visibility:visible}.sidebar-title>a{font-size:inherit;text-decoration:none}.sidebar-title .sidebar-tools-main{margin-top:-6px}@media(max-width: 991.98px){#quarto-sidebar div.sidebar-header{padding-top:.2em}}.sidebar-header-stacked .sidebar-title{margin-top:.6rem}.sidebar-logo{max-width:90%;padding-bottom:.5rem}.sidebar-logo-link{text-decoration:none}.sidebar-navigation li a{text-decoration:none}.sidebar-navigation .quarto-navigation-tool{opacity:.7;font-size:.875rem}#quarto-sidebar>nav>.sidebar-tools-main{margin-left:14px}.sidebar-tools-main{display:inline-flex;margin-left:0px;order:2}.sidebar-tools-main:not(.tools-wide){vertical-align:middle}.sidebar-navigation .quarto-navigation-tool.dropdown-toggle::after{display:none}.sidebar.sidebar-navigation>*{padding-top:1em}.sidebar-item{margin-bottom:.2em;line-height:1rem;margin-top:.4rem}.sidebar-section{padding-left:.5em;padding-bottom:.2em}.sidebar-item .sidebar-item-container{display:flex;justify-content:space-between;cursor:pointer}.sidebar-item-toggle:hover{cursor:pointer}.sidebar-item .sidebar-item-toggle .bi{font-size:.7rem;text-align:center}.sidebar-item .sidebar-item-toggle .bi-chevron-right::before{transition:transform 200ms ease}.sidebar-item .sidebar-item-toggle[aria-expanded=false] .bi-chevron-right::before{transform:none}.sidebar-item .sidebar-item-toggle[aria-expanded=true] .bi-chevron-right::before{transform:rotate(90deg)}.sidebar-item-text{width:100%}.sidebar-navigation .sidebar-divider{margin-left:0;margin-right:0;margin-top:.5rem;margin-bottom:.5rem}@media(max-width: 991.98px){.quarto-secondary-nav{display:block}.quarto-secondary-nav button.quarto-search-button{padding-right:0em;padding-left:2em}.quarto-secondary-nav button.quarto-btn-toggle{margin-left:-0.75rem;margin-right:.15rem}.quarto-secondary-nav nav.quarto-title-breadcrumbs{display:none}.quarto-secondary-nav nav.quarto-page-breadcrumbs{display:flex;align-items:center;padding-right:1em;margin-left:-0.25em}.quarto-secondary-nav nav.quarto-page-breadcrumbs a{text-decoration:none}.quarto-secondary-nav nav.quarto-page-breadcrumbs ol.breadcrumb{margin-bottom:0}}@media(min-width: 992px){.quarto-secondary-nav{display:none}}.quarto-title-breadcrumbs .breadcrumb{margin-bottom:.5em;font-size:.9rem}.quarto-title-breadcrumbs .breadcrumb li:last-of-type a{color:#6c757d}.quarto-secondary-nav .quarto-btn-toggle{color:#faf1e4}.quarto-secondary-nav[aria-expanded=false] .quarto-btn-toggle .bi-chevron-right::before{transform:none}.quarto-secondary-nav[aria-expanded=true] .quarto-btn-toggle .bi-chevron-right::before{transform:rotate(90deg)}.quarto-secondary-nav .quarto-btn-toggle .bi-chevron-right::before{transition:transform 200ms ease}.quarto-secondary-nav{cursor:pointer}.no-decor{text-decoration:none}.quarto-secondary-nav-title{margin-top:.3em;color:#faf1e4;padding-top:4px}.quarto-secondary-nav nav.quarto-page-breadcrumbs{color:#faf1e4}.quarto-secondary-nav nav.quarto-page-breadcrumbs a{color:#faf1e4}.quarto-secondary-nav nav.quarto-page-breadcrumbs a:hover{color:rgba(174,209,228,.8)}.quarto-secondary-nav nav.quarto-page-breadcrumbs .breadcrumb-item::before{color:#eac48e}.breadcrumb-item{line-height:1.2rem}div.sidebar-item-container{color:#faf1e4}div.sidebar-item-container:hover,div.sidebar-item-container:focus{color:rgba(174,209,228,.8)}div.sidebar-item-container.disabled{color:rgba(250,241,228,.75)}div.sidebar-item-container .active,div.sidebar-item-container .show>.nav-link,div.sidebar-item-container .sidebar-link>code{color:#aed1e4}div.sidebar.sidebar-navigation.rollup.quarto-sidebar-toggle-contents,nav.sidebar.sidebar-navigation:not(.rollup){background-color:#353535}@media(max-width: 991.98px){.sidebar-navigation .sidebar-item a,.nav-page .nav-page-text,.sidebar-navigation{font-size:1rem}.sidebar-navigation ul.sidebar-section.depth1 .sidebar-section-item{font-size:1.1rem}.sidebar-logo{display:none}.sidebar.sidebar-navigation{position:static;border-bottom:1px solid #dee2e6}.sidebar.sidebar-navigation.collapsing{position:fixed;z-index:1000}.sidebar.sidebar-navigation.show{position:fixed;z-index:1000}.sidebar.sidebar-navigation{min-height:100%}nav.quarto-secondary-nav{background-color:#353535;border-bottom:1px solid #dee2e6}.quarto-banner nav.quarto-secondary-nav{background-color:#303030;color:#bdbdbd;border-top:1px solid #dee2e6}.sidebar .sidebar-footer{visibility:visible;padding-top:1rem;position:inherit}.sidebar-tools-collapse{display:block}}#quarto-sidebar{transition:width .15s ease-in}#quarto-sidebar>*{padding-right:1em}@media(max-width: 991.98px){#quarto-sidebar .sidebar-menu-container{white-space:nowrap;min-width:225px}#quarto-sidebar.show{transition:width .15s ease-out}}@media(min-width: 992px){#quarto-sidebar{display:flex;flex-direction:column}.nav-page .nav-page-text,.sidebar-navigation .sidebar-section .sidebar-item{font-size:.875rem}.sidebar-navigation .sidebar-item{font-size:.925rem}.sidebar.sidebar-navigation{display:block;position:sticky}.sidebar-search{width:100%}.sidebar .sidebar-footer{visibility:visible}}@media(min-width: 992px){#quarto-sidebar-glass{display:none}}@media(max-width: 991.98px){#quarto-sidebar-glass{position:fixed;top:0;bottom:0;left:0;right:0;background-color:rgba(255,255,255,0);transition:background-color .15s ease-in;z-index:-1}#quarto-sidebar-glass.collapsing{z-index:1000}#quarto-sidebar-glass.show{transition:background-color .15s ease-out;background-color:rgba(102,102,102,.4);z-index:1000}}.sidebar .sidebar-footer{padding:.5rem 1rem;align-self:flex-end;color:#6c757d;width:100%}.quarto-page-breadcrumbs .breadcrumb-item+.breadcrumb-item,.quarto-page-breadcrumbs .breadcrumb-item{padding-right:.33em;padding-left:0}.quarto-page-breadcrumbs .breadcrumb-item::before{padding-right:.33em}.quarto-sidebar-footer{font-size:.875em}.sidebar-section .bi-chevron-right{vertical-align:middle}.sidebar-section .bi-chevron-right::before{font-size:.9em}.notransition{-webkit-transition:none !important;-moz-transition:none !important;-o-transition:none !important;transition:none !important}.btn:focus:not(:focus-visible){box-shadow:none}.page-navigation{display:flex;justify-content:space-between}.nav-page{padding-bottom:.75em}.nav-page .bi{font-size:1.8rem;vertical-align:middle}.nav-page .nav-page-text{padding-left:.25em;padding-right:.25em}.nav-page a{color:#6c757d;text-decoration:none;display:flex;align-items:center}.nav-page a:hover{color:#8ba7b6}.nav-footer .toc-actions{padding-bottom:.5em;padding-top:.5em}.nav-footer .toc-actions a,.nav-footer .toc-actions a:hover{text-decoration:none}.nav-footer .toc-actions ul{display:flex;list-style:none}.nav-footer .toc-actions ul :first-child{margin-left:auto}.nav-footer .toc-actions ul :last-child{margin-right:auto}.nav-footer .toc-actions ul li{padding-right:1.5em}.nav-footer .toc-actions ul li i.bi{padding-right:.4em}.nav-footer .toc-actions ul li:last-of-type{padding-right:0}.nav-footer{display:flex;flex-direction:row;flex-wrap:wrap;justify-content:space-between;align-items:baseline;text-align:center;padding-top:.5rem;padding-bottom:.5rem;background-color:#181818}body.nav-fixed{padding-top:64px}.nav-footer-contents{color:#6c757d;margin-top:.25rem}.nav-footer{min-height:3.5em;color:#828282}.nav-footer a{color:#828282}.nav-footer .nav-footer-left{font-size:.825em}.nav-footer .nav-footer-center{font-size:.825em}.nav-footer .nav-footer-right{font-size:.825em}.nav-footer-left .footer-items,.nav-footer-center .footer-items,.nav-footer-right .footer-items{display:inline-flex;padding-top:.3em;padding-bottom:.3em;margin-bottom:0em}.nav-footer-left .footer-items .nav-link,.nav-footer-center .footer-items .nav-link,.nav-footer-right .footer-items .nav-link{padding-left:.6em;padding-right:.6em}@media(min-width: 768px){.nav-footer-left{flex:1 1 0px;text-align:left}}@media(max-width: 575.98px){.nav-footer-left{margin-bottom:1em;flex:100%}}@media(min-width: 768px){.nav-footer-right{flex:1 1 0px;text-align:right}}@media(max-width: 575.98px){.nav-footer-right{margin-bottom:1em;flex:100%}}.nav-footer-center{text-align:center;min-height:3em}@media(min-width: 768px){.nav-footer-center{flex:1 1 0px}}.nav-footer-center .footer-items{justify-content:center}@media(max-width: 767.98px){.nav-footer-center{margin-bottom:1em;flex:100%}}@media(max-width: 767.98px){.nav-footer-center{margin-top:3em;order:10}}.navbar .quarto-reader-toggle.reader .quarto-reader-toggle-btn{background-color:#bdbdbd;border-radius:3px}@media(max-width: 991.98px){.quarto-reader-toggle{display:none}}.quarto-reader-toggle.reader.quarto-navigation-tool .quarto-reader-toggle-btn{background-color:#faf1e4;border-radius:3px}.quarto-reader-toggle .quarto-reader-toggle-btn{display:inline-flex;padding-left:.2em;padding-right:.2em;margin-left:-0.2em;margin-right:-0.2em;text-align:center}.navbar .quarto-reader-toggle:not(.reader) .bi::before{background-image:url('data:image/svg+xml,')}.navbar .quarto-reader-toggle.reader .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-reader-toggle:not(.reader) .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-reader-toggle.reader .bi::before{background-image:url('data:image/svg+xml,')}#quarto-back-to-top{display:none;position:fixed;bottom:50px;background-color:#181818;border-radius:.25rem;box-shadow:0 .2rem .5rem #6c757d,0 0 .05rem #6c757d;color:#6c757d;text-decoration:none;font-size:.9em;text-align:center;left:50%;padding:.4rem .8rem;transform:translate(-50%, 0)}#quarto-announcement{padding:.5em;display:flex;justify-content:space-between;margin-bottom:0;font-size:.9em}#quarto-announcement .quarto-announcement-content{margin-right:auto}#quarto-announcement .quarto-announcement-content p{margin-bottom:0}#quarto-announcement .quarto-announcement-icon{margin-right:.5em;font-size:1.2em;margin-top:-0.15em}#quarto-announcement .quarto-announcement-action{cursor:pointer}.aa-DetachedSearchButtonQuery{display:none}.aa-DetachedOverlay ul.aa-List,#quarto-search-results ul.aa-List{list-style:none;padding-left:0}.aa-DetachedOverlay .aa-Panel,#quarto-search-results .aa-Panel{background-color:#181818;position:absolute;z-index:2000}#quarto-search-results .aa-Panel{max-width:400px}#quarto-search input{font-size:.925rem}@media(min-width: 992px){.navbar #quarto-search{margin-left:.25rem;order:999}}.navbar.navbar-expand-sm #quarto-search,.navbar.navbar-expand-md #quarto-search{order:999}@media(min-width: 992px){.navbar .quarto-navbar-tools{order:900}}@media(min-width: 992px){.navbar .quarto-navbar-tools.tools-end{margin-left:auto !important}}@media(max-width: 991.98px){#quarto-sidebar .sidebar-search{display:none}}#quarto-sidebar .sidebar-search .aa-Autocomplete{width:100%}.navbar .aa-Autocomplete .aa-Form{width:180px}.navbar #quarto-search.type-overlay .aa-Autocomplete{width:40px}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form{background-color:inherit;border:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form:focus-within{box-shadow:none;outline:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-InputWrapper{display:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-InputWrapper:focus-within{display:inherit}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-Label svg,.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-LoadingIndicator svg{width:26px;height:26px;color:#bdbdbd;opacity:1}.navbar #quarto-search.type-overlay .aa-Autocomplete svg.aa-SubmitIcon{width:26px;height:26px;color:#bdbdbd;opacity:1}.aa-Autocomplete .aa-Form,.aa-DetachedFormContainer .aa-Form{align-items:center;background-color:#242424;border:1px solid #dee2e6;border-radius:.25rem;color:#faf1e4;display:flex;line-height:1em;margin:0;position:relative;width:100%}.aa-Autocomplete .aa-Form:focus-within,.aa-DetachedFormContainer .aa-Form:focus-within{box-shadow:rgba(39,128,227,.6) 0 0 0 1px;outline:currentColor none medium}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix{align-items:center;display:flex;flex-shrink:0;order:1}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-Label,.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-Label,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator{cursor:initial;flex-shrink:0;padding:0;text-align:left}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-Label svg,.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-Label svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator svg{color:#faf1e4;opacity:.5}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-SubmitButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-SubmitButton{appearance:none;background:none;border:0;margin:0}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator{align-items:center;display:flex;justify-content:center}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator[hidden]{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapper,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper{order:3;position:relative;width:100%}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input{appearance:none;background:none;border:0;color:#faf1e4;font:inherit;height:calc(1.5em + .1rem + 2px);padding:0;width:100%}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::placeholder,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::placeholder{color:#faf1e4;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input:focus{border-color:none;box-shadow:none;outline:none}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-decoration,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-cancel-button,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-button,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-decoration,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-decoration,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-cancel-button,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-button,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-decoration{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix{align-items:center;display:flex;order:4}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton{align-items:center;background:none;border:0;color:#faf1e4;opacity:.8;cursor:pointer;display:flex;margin:0;width:calc(1.5em + .1rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:hover,.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:hover,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:focus{color:#faf1e4;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton[hidden]{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton svg{width:calc(1.5em + 0.75rem + calc(1px * 2))}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton{border:none;align-items:center;background:none;color:#faf1e4;opacity:.4;font-size:.7rem;cursor:pointer;display:none;margin:0;width:calc(1em + .1rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:hover,.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:hover,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:focus{color:#faf1e4;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton[hidden]{display:none}.aa-PanelLayout:empty{display:none}.quarto-search-no-results.no-query{display:none}.aa-Source:has(.no-query){display:none}#quarto-search-results .aa-Panel{border:solid #dee2e6 1px}#quarto-search-results .aa-SourceNoResults{width:398px}.aa-DetachedOverlay .aa-Panel,#quarto-search-results .aa-Panel{max-height:65vh;overflow-y:auto;font-size:.925rem}.aa-DetachedOverlay .aa-SourceNoResults,#quarto-search-results .aa-SourceNoResults{height:60px;display:flex;justify-content:center;align-items:center}.aa-DetachedOverlay .search-error,#quarto-search-results .search-error{padding-top:10px;padding-left:20px;padding-right:20px;cursor:default}.aa-DetachedOverlay .search-error .search-error-title,#quarto-search-results .search-error .search-error-title{font-size:1.1rem;margin-bottom:.5rem}.aa-DetachedOverlay .search-error .search-error-title .search-error-icon,#quarto-search-results .search-error .search-error-title .search-error-icon{margin-right:8px}.aa-DetachedOverlay .search-error .search-error-text,#quarto-search-results .search-error .search-error-text{font-weight:300}.aa-DetachedOverlay .search-result-text,#quarto-search-results .search-result-text{font-weight:300;overflow:hidden;text-overflow:ellipsis;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;line-height:1.2rem;max-height:2.4rem}.aa-DetachedOverlay .aa-SourceHeader .search-result-header,#quarto-search-results .aa-SourceHeader .search-result-header{font-size:.875rem;background-color:#252525;padding-left:14px;padding-bottom:4px;padding-top:4px}.aa-DetachedOverlay .aa-SourceHeader .search-result-header-no-results,#quarto-search-results .aa-SourceHeader .search-result-header-no-results{display:none}.aa-DetachedOverlay .aa-SourceFooter .algolia-search-logo,#quarto-search-results .aa-SourceFooter .algolia-search-logo{width:110px;opacity:.85;margin:8px;float:right}.aa-DetachedOverlay .search-result-section,#quarto-search-results .search-result-section{font-size:.925em}.aa-DetachedOverlay a.search-result-link,#quarto-search-results a.search-result-link{color:inherit;text-decoration:none}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item,#quarto-search-results li.aa-Item[aria-selected=true] .search-item{background-color:#2780e3}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item.search-result-more,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-section,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-text,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-title-container,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-text-container,#quarto-search-results li.aa-Item[aria-selected=true] .search-item.search-result-more,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-section,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-text,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-title-container,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-text-container{color:#fff;background-color:#2780e3}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item mark.search-match,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-match.mark,#quarto-search-results li.aa-Item[aria-selected=true] .search-item mark.search-match,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-match.mark{color:#fff;background-color:#1a6cc7}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item,#quarto-search-results li.aa-Item[aria-selected=false] .search-item{background-color:#242424}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item.search-result-more,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-section,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-text,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-title-container,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-text-container,#quarto-search-results li.aa-Item[aria-selected=false] .search-item.search-result-more,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-section,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-text,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-title-container,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-text-container{color:#faf1e4}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item mark.search-match,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-match.mark,#quarto-search-results li.aa-Item[aria-selected=false] .search-item mark.search-match,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-match.mark{color:inherit;background-color:#06192e}.aa-DetachedOverlay .aa-Item .search-result-doc:not(.document-selectable) .search-result-title-container,#quarto-search-results .aa-Item .search-result-doc:not(.document-selectable) .search-result-title-container{background-color:#242424;color:#faf1e4}.aa-DetachedOverlay .aa-Item .search-result-doc:not(.document-selectable) .search-result-text-container,#quarto-search-results .aa-Item .search-result-doc:not(.document-selectable) .search-result-text-container{padding-top:0px}.aa-DetachedOverlay li.aa-Item .search-result-doc.document-selectable .search-result-text-container,#quarto-search-results li.aa-Item .search-result-doc.document-selectable .search-result-text-container{margin-top:-4px}.aa-DetachedOverlay .aa-Item,#quarto-search-results .aa-Item{cursor:pointer}.aa-DetachedOverlay .aa-Item .search-item,#quarto-search-results .aa-Item .search-item{border-left:none;border-right:none;border-top:none;background-color:#242424;border-color:#dee2e6;color:#faf1e4}.aa-DetachedOverlay .aa-Item .search-item p,#quarto-search-results .aa-Item .search-item p{margin-top:0;margin-bottom:0}.aa-DetachedOverlay .aa-Item .search-item i.bi,#quarto-search-results .aa-Item .search-item i.bi{padding-left:8px;padding-right:8px;font-size:1.3em}.aa-DetachedOverlay .aa-Item .search-item .search-result-title,#quarto-search-results .aa-Item .search-item .search-result-title{margin-top:.3em;margin-bottom:0em}.aa-DetachedOverlay .aa-Item .search-item .search-result-crumbs,#quarto-search-results .aa-Item .search-item .search-result-crumbs{white-space:nowrap;text-overflow:ellipsis;font-size:.8em;font-weight:300;margin-right:1em}.aa-DetachedOverlay .aa-Item .search-item .search-result-crumbs:not(.search-result-crumbs-wrap),#quarto-search-results .aa-Item .search-item .search-result-crumbs:not(.search-result-crumbs-wrap){max-width:30%;margin-left:auto;margin-top:.5em;margin-bottom:.1rem}.aa-DetachedOverlay .aa-Item .search-item .search-result-crumbs.search-result-crumbs-wrap,#quarto-search-results .aa-Item .search-item .search-result-crumbs.search-result-crumbs-wrap{flex-basis:100%;margin-top:0em;margin-bottom:.2em;margin-left:37px}.aa-DetachedOverlay .aa-Item .search-result-title-container,#quarto-search-results .aa-Item .search-result-title-container{font-size:1em;display:flex;flex-wrap:wrap;padding:6px 4px 6px 4px}.aa-DetachedOverlay .aa-Item .search-result-text-container,#quarto-search-results .aa-Item .search-result-text-container{padding-bottom:8px;padding-right:8px;margin-left:42px}.aa-DetachedOverlay .aa-Item .search-result-doc-section,.aa-DetachedOverlay .aa-Item .search-result-more,#quarto-search-results .aa-Item .search-result-doc-section,#quarto-search-results .aa-Item .search-result-more{padding-top:8px;padding-bottom:8px;padding-left:44px}.aa-DetachedOverlay .aa-Item .search-result-more,#quarto-search-results .aa-Item .search-result-more{font-size:.8em;font-weight:400}.aa-DetachedOverlay .aa-Item .search-result-doc,#quarto-search-results .aa-Item .search-result-doc{border-top:1px solid #dee2e6}.aa-DetachedSearchButton{background:none;border:none}.aa-DetachedSearchButton .aa-DetachedSearchButtonPlaceholder{display:none}.navbar .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon{color:#bdbdbd}.sidebar-tools-collapse #quarto-search,.sidebar-tools-main #quarto-search{display:inline}.sidebar-tools-collapse #quarto-search .aa-Autocomplete,.sidebar-tools-main #quarto-search .aa-Autocomplete{display:inline}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton{padding-left:4px;padding-right:4px}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon{color:#faf1e4}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon .aa-SubmitIcon,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon .aa-SubmitIcon{margin-top:-3px}.aa-DetachedContainer{background:rgba(24,24,24,.65);width:90%;bottom:0;box-shadow:rgba(222,226,230,.6) 0 0 0 1px;outline:currentColor none medium;display:flex;flex-direction:column;left:0;margin:0;overflow:hidden;padding:0;position:fixed;right:0;top:0;z-index:1101}.aa-DetachedContainer::after{height:32px}.aa-DetachedContainer .aa-SourceHeader{margin:var(--aa-spacing-half) 0 var(--aa-spacing-half) 2px}.aa-DetachedContainer .aa-Panel{background-color:#181818;border-radius:0;box-shadow:none;flex-grow:1;margin:0;padding:0;position:relative}.aa-DetachedContainer .aa-PanelLayout{bottom:0;box-shadow:none;left:0;margin:0;max-height:none;overflow-y:auto;position:absolute;right:0;top:0;width:100%}.aa-DetachedFormContainer{background-color:#181818;border-bottom:1px solid #dee2e6;display:flex;flex-direction:row;justify-content:space-between;margin:0;padding:.5em}.aa-DetachedCancelButton{background:none;font-size:.8em;border:0;border-radius:3px;color:#faf1e4;cursor:pointer;margin:0 0 0 .5em;padding:0 .5em}.aa-DetachedCancelButton:hover,.aa-DetachedCancelButton:focus{box-shadow:rgba(39,128,227,.6) 0 0 0 1px;outline:currentColor none medium}.aa-DetachedContainer--modal{bottom:inherit;height:auto;margin:0 auto;position:absolute;top:100px;border-radius:6px;max-width:850px}@media(max-width: 575.98px){.aa-DetachedContainer--modal{width:100%;top:0px;border-radius:0px;border:none}}.aa-DetachedContainer--modal .aa-PanelLayout{max-height:var(--aa-detached-modal-max-height);padding-bottom:var(--aa-spacing-half);position:static}.aa-Detached{height:100vh;overflow:hidden}.aa-DetachedOverlay{background-color:rgba(250,241,228,.4);position:fixed;left:0;right:0;top:0;margin:0;padding:0;height:100vh;z-index:1100}.quarto-dashboard.nav-fixed.dashboard-sidebar #quarto-content.quarto-dashboard-content{padding:0em}.quarto-dashboard #quarto-content.quarto-dashboard-content{padding:1em}.quarto-dashboard #quarto-content.quarto-dashboard-content>*{padding-top:0}@media(min-width: 576px){.quarto-dashboard{height:100%}}.quarto-dashboard .card.valuebox.bslib-card.bg-primary{background-color:#5397e9 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-secondary{background-color:#343a40 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-success{background-color:#3aa716 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-info{background-color:rgba(153,84,187,.7019607843) !important}.quarto-dashboard .card.valuebox.bslib-card.bg-warning{background-color:#fa6400 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-danger{background-color:rgba(255,0,57,.7019607843) !important}.quarto-dashboard .card.valuebox.bslib-card.bg-light{background-color:#525252 !important}.quarto-dashboard .card.valuebox.bslib-card.bg-dark{background-color:#343a40 !important}.quarto-dashboard.dashboard-fill{display:flex;flex-direction:column}.quarto-dashboard #quarto-appendix{display:none}.quarto-dashboard #quarto-header #quarto-dashboard-header{border-top:solid 1px #4a4a4a;border-bottom:solid 1px #4a4a4a}.quarto-dashboard #quarto-header #quarto-dashboard-header>nav{padding-left:1em;padding-right:1em}.quarto-dashboard #quarto-header #quarto-dashboard-header>nav .navbar-brand-container{padding-left:0}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-toggler{margin-right:0}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-toggler-icon{height:1em;width:1em;background-image:url('data:image/svg+xml,')}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-brand-container{padding-right:1em}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-title{font-size:1.1em}.quarto-dashboard #quarto-header #quarto-dashboard-header .navbar-nav{font-size:.9em}.quarto-dashboard #quarto-dashboard-header .navbar{padding:0}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-container{padding-left:1em}.quarto-dashboard #quarto-dashboard-header .navbar.slim .navbar-brand-container .nav-link,.quarto-dashboard #quarto-dashboard-header .navbar.slim .navbar-nav .nav-link{padding:.7em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-color-scheme-toggle{order:9}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-toggler{margin-left:.5em;order:10}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-nav .nav-link{padding:.5em;height:100%;display:flex;align-items:center}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-nav .active{background-color:#444}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-brand-container{padding:.5em .5em .5em 0;display:flex;flex-direction:row;margin-right:2em;align-items:center}@media(max-width: 767.98px){.quarto-dashboard #quarto-dashboard-header .navbar .navbar-brand-container{margin-right:auto}}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse{align-self:stretch}@media(min-width: 768px){.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse{order:8}}@media(max-width: 767.98px){.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse{order:1000;padding-bottom:.5em}}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-collapse .navbar-nav{align-self:stretch}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-title{font-size:1.25em;line-height:1.1em;display:flex;flex-direction:row;flex-wrap:wrap;align-items:baseline}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-title .navbar-title-text{margin-right:.4em}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-title a{text-decoration:none;color:inherit}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-subtitle,.quarto-dashboard #quarto-dashboard-header .navbar .navbar-author{font-size:.9rem;margin-right:.5em}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-author{margin-left:auto}.quarto-dashboard #quarto-dashboard-header .navbar .navbar-logo{max-height:48px;min-height:30px;object-fit:cover;margin-right:1em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-dashboard-links{order:9;padding-right:1em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-dashboard-link-text{margin-left:.25em}.quarto-dashboard #quarto-dashboard-header .navbar .quarto-dashboard-link{padding-right:0em;padding-left:.7em;text-decoration:none;color:#bdbdbd}.quarto-dashboard .page-layout-custom .tab-content{padding:0;border:none}.quarto-dashboard-img-contain{height:100%;width:100%;object-fit:contain}@media(max-width: 575.98px){.quarto-dashboard .bslib-grid{grid-template-rows:minmax(1em, max-content) !important}.quarto-dashboard .sidebar-content{height:inherit}.quarto-dashboard .page-layout-custom{min-height:100vh}}.quarto-dashboard.dashboard-toolbar>.page-layout-custom,.quarto-dashboard.dashboard-sidebar>.page-layout-custom{padding:0}.quarto-dashboard .quarto-dashboard-content.quarto-dashboard-pages{padding:0}.quarto-dashboard .callout{margin-bottom:0;margin-top:0}.quarto-dashboard .html-fill-container figure{overflow:hidden}.quarto-dashboard bslib-tooltip .rounded-pill{border:solid #6c757d 1px}.quarto-dashboard bslib-tooltip .rounded-pill .svg{fill:#faf1e4}.quarto-dashboard .tabset .dashboard-card-no-title .nav-tabs{margin-left:0;margin-right:auto}.quarto-dashboard .tabset .tab-content{border:none}.quarto-dashboard .tabset .card-header .nav-link[role=tab]{margin-top:-6px;padding-top:6px;padding-bottom:6px}.quarto-dashboard .card.valuebox,.quarto-dashboard .card.bslib-value-box{min-height:3rem}.quarto-dashboard .card.valuebox .card-body,.quarto-dashboard .card.bslib-value-box .card-body{padding:0}.quarto-dashboard .bslib-value-box .value-box-value{font-size:clamp(.1em,15cqw,5em)}.quarto-dashboard .bslib-value-box .value-box-showcase .bi{font-size:clamp(.1em,max(18cqw,5.2cqh),5em);text-align:center;height:1em}.quarto-dashboard .bslib-value-box .value-box-showcase .bi::before{vertical-align:1em}.quarto-dashboard .bslib-value-box .value-box-area{margin-top:auto;margin-bottom:auto}.quarto-dashboard .card figure.quarto-float{display:flex;flex-direction:column;align-items:center}.quarto-dashboard .dashboard-scrolling{padding:1em}.quarto-dashboard .full-height{height:100%}.quarto-dashboard .showcase-bottom .value-box-grid{display:grid;grid-template-columns:1fr;grid-template-rows:1fr auto;grid-template-areas:"top" "bottom"}.quarto-dashboard .showcase-bottom .value-box-grid .value-box-showcase{grid-area:bottom;padding:0;margin:0}.quarto-dashboard .showcase-bottom .value-box-grid .value-box-showcase i.bi{font-size:4rem}.quarto-dashboard .showcase-bottom .value-box-grid .value-box-area{grid-area:top}.quarto-dashboard .tab-content{margin-bottom:0}.quarto-dashboard .bslib-card .bslib-navs-card-title{justify-content:stretch;align-items:end}.quarto-dashboard .card-header{display:flex;flex-wrap:wrap;justify-content:space-between}.quarto-dashboard .card-header .card-title{display:flex;flex-direction:column;justify-content:center;margin-bottom:0}.quarto-dashboard .tabset .card-toolbar{margin-bottom:1em}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout{border:none;gap:var(--bslib-spacer, 1rem)}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.main{padding:0}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.sidebar{border-radius:.25rem;border:1px solid rgba(0,0,0,.175)}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.collapse-toggle{display:none}@media(max-width: 767.98px){.quarto-dashboard .bslib-grid>.bslib-sidebar-layout{grid-template-columns:1fr;grid-template-rows:max-content 1fr}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout>.main{grid-column:1;grid-row:2}.quarto-dashboard .bslib-grid>.bslib-sidebar-layout .sidebar{grid-column:1;grid-row:1}}.quarto-dashboard .sidebar-right .sidebar{padding-left:2.5em}.quarto-dashboard .sidebar-right .collapse-toggle{left:2px}.quarto-dashboard .quarto-dashboard .sidebar-right button.collapse-toggle:not(.transitioning){left:unset}.quarto-dashboard aside.sidebar{padding-left:1em;padding-right:1em;background-color:rgba(52,58,64,.25);color:#faf1e4}.quarto-dashboard .bslib-sidebar-layout>div.main{padding:.7em}.quarto-dashboard .bslib-sidebar-layout button.collapse-toggle{margin-top:.3em}.quarto-dashboard .bslib-sidebar-layout .collapse-toggle{top:0}.quarto-dashboard .bslib-sidebar-layout.sidebar-collapsed:not(.transitioning):not(.sidebar-right) .collapse-toggle{left:2px}.quarto-dashboard .sidebar>section>.h3:first-of-type{margin-top:0em}.quarto-dashboard .sidebar .h3,.quarto-dashboard .sidebar .h4,.quarto-dashboard .sidebar .h5,.quarto-dashboard .sidebar .h6{margin-top:.5em}.quarto-dashboard .sidebar form{flex-direction:column;align-items:start;margin-bottom:1em}.quarto-dashboard .sidebar form div[class*=oi-][class$=-input]{flex-direction:column}.quarto-dashboard .sidebar form[class*=oi-][class$=-toggle]{flex-direction:row-reverse;align-items:center;justify-content:start}.quarto-dashboard .sidebar form input[type=range]{margin-top:.5em;margin-right:.8em;margin-left:1em}.quarto-dashboard .sidebar label{width:fit-content}.quarto-dashboard .sidebar .card-body{margin-bottom:2em}.quarto-dashboard .sidebar .shiny-input-container{margin-bottom:1em}.quarto-dashboard .sidebar .shiny-options-group{margin-top:0}.quarto-dashboard .sidebar .control-label{margin-bottom:.3em}.quarto-dashboard .card .card-body .quarto-layout-row{align-items:stretch}.quarto-dashboard .toolbar{font-size:.9em;display:flex;flex-direction:row;border-top:solid 1px #bcbfc0;padding:1em;flex-wrap:wrap;background-color:rgba(52,58,64,.25)}.quarto-dashboard .toolbar .cell-output-display{display:flex}.quarto-dashboard .toolbar .shiny-input-container{padding-bottom:.5em;margin-bottom:.5em;width:inherit}.quarto-dashboard .toolbar .shiny-input-container>.checkbox:first-child{margin-top:6px}.quarto-dashboard .toolbar>*:last-child{margin-right:0}.quarto-dashboard .toolbar>*>*{margin-right:1em;align-items:baseline}.quarto-dashboard .toolbar>*>*>a{text-decoration:none;margin-top:auto;margin-bottom:auto}.quarto-dashboard .toolbar .shiny-input-container{padding-bottom:0;margin-bottom:0}.quarto-dashboard .toolbar .shiny-input-container>*{flex-shrink:0;flex-grow:0}.quarto-dashboard .toolbar .form-group.shiny-input-container:not([role=group])>label{margin-bottom:0}.quarto-dashboard .toolbar .shiny-input-container.no-baseline{align-items:start;padding-top:6px}.quarto-dashboard .toolbar .shiny-input-container{display:flex;align-items:baseline}.quarto-dashboard .toolbar .shiny-input-container label{padding-right:.4em}.quarto-dashboard .toolbar .shiny-input-container .bslib-input-switch{margin-top:6px}.quarto-dashboard .toolbar input[type=text]{line-height:1;width:inherit}.quarto-dashboard .toolbar .input-daterange{width:inherit}.quarto-dashboard .toolbar .input-daterange input[type=text]{height:2.4em;width:10em}.quarto-dashboard .toolbar .input-daterange .input-group-addon{height:auto;padding:0;margin-left:-5px !important;margin-right:-5px}.quarto-dashboard .toolbar .input-daterange .input-group-addon .input-group-text{padding-top:0;padding-bottom:0;height:100%}.quarto-dashboard .toolbar span.irs.irs--shiny{width:10em}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-line{top:9px}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-min,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-max,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-from,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-to,.quarto-dashboard .toolbar span.irs.irs--shiny .irs-single{top:20px}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-bar{top:8px}.quarto-dashboard .toolbar span.irs.irs--shiny .irs-handle{top:0px}.quarto-dashboard .toolbar .shiny-input-checkboxgroup>label{margin-top:6px}.quarto-dashboard .toolbar .shiny-input-checkboxgroup>.shiny-options-group{margin-top:0;align-items:baseline}.quarto-dashboard .toolbar .shiny-input-radiogroup>label{margin-top:6px}.quarto-dashboard .toolbar .shiny-input-radiogroup>.shiny-options-group{align-items:baseline;margin-top:0}.quarto-dashboard .toolbar .shiny-input-radiogroup>.shiny-options-group>.radio{margin-right:.3em}.quarto-dashboard .toolbar .form-select{padding-top:.2em;padding-bottom:.2em}.quarto-dashboard .toolbar .shiny-input-select{min-width:6em}.quarto-dashboard .toolbar div.checkbox{margin-bottom:0px}.quarto-dashboard .toolbar>.checkbox:first-child{margin-top:6px}.quarto-dashboard .toolbar form{width:fit-content}.quarto-dashboard .toolbar form label{padding-top:.2em;padding-bottom:.2em;width:fit-content}.quarto-dashboard .toolbar form input[type=date]{width:fit-content}.quarto-dashboard .toolbar form input[type=color]{width:3em}.quarto-dashboard .toolbar form button{padding:.4em}.quarto-dashboard .toolbar form select{width:fit-content}.quarto-dashboard .toolbar>*{font-size:.9em;flex-grow:0}.quarto-dashboard .toolbar .shiny-input-container label{margin-bottom:1px}.quarto-dashboard .toolbar-bottom{margin-top:1em;margin-bottom:0 !important;order:2}.quarto-dashboard .quarto-dashboard-content>.dashboard-toolbar-container>.toolbar-content>.tab-content>.tab-pane>*:not(.bslib-sidebar-layout){padding:1em}.quarto-dashboard .quarto-dashboard-content>.dashboard-toolbar-container>.toolbar-content>*:not(.tab-content){padding:1em}.quarto-dashboard .quarto-dashboard-content>.tab-content>.dashboard-page>.dashboard-toolbar-container>.toolbar-content,.quarto-dashboard .quarto-dashboard-content>.tab-content>.dashboard-page:not(.dashboard-sidebar-container)>*:not(.dashboard-toolbar-container){padding:1em}.quarto-dashboard .toolbar-content{padding:0}.quarto-dashboard .quarto-dashboard-content.quarto-dashboard-pages .tab-pane>.dashboard-toolbar-container .toolbar{border-radius:0;margin-bottom:0}.quarto-dashboard .dashboard-toolbar-container.toolbar-toplevel .toolbar{border-bottom:1px solid rgba(0,0,0,.175)}.quarto-dashboard .dashboard-toolbar-container.toolbar-toplevel .toolbar-bottom{margin-top:0}.quarto-dashboard .dashboard-toolbar-container:not(.toolbar-toplevel) .toolbar{margin-bottom:1em;border-top:none;border-radius:.25rem;border:1px solid rgba(0,0,0,.175)}.quarto-dashboard .vega-embed.has-actions details{width:1.7em;height:2em;position:absolute !important;top:0;right:0}.quarto-dashboard .dashboard-toolbar-container{padding:0}.quarto-dashboard .card .card-header p:last-child,.quarto-dashboard .card .card-footer p:last-child{margin-bottom:0}.quarto-dashboard .card .card-body>.h4:first-child{margin-top:0}.quarto-dashboard .card .card-body{z-index:4}@media(max-width: 767.98px){.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_length,.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_info,.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_paginate{text-align:initial}.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_filter{text-align:right}.quarto-dashboard .card .card-body .itables div.dataTables_wrapper div.dataTables_paginate ul.pagination{justify-content:initial}}.quarto-dashboard .card .card-body .itables .dataTables_wrapper{display:flex;flex-wrap:wrap;justify-content:space-between;align-items:center;padding-top:0}.quarto-dashboard .card .card-body .itables .dataTables_wrapper table{flex-shrink:0}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons{margin-bottom:.5em;margin-left:auto;width:fit-content;float:right}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons.btn-group{background:#181818;border:none}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons .btn-secondary{background-color:#181818;background-image:none;border:solid #dee2e6 1px;padding:.2em .7em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dt-buttons .btn span{font-size:.8em;color:#faf1e4}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_info{margin-left:.5em;margin-bottom:.5em;padding-top:0}@media(min-width: 768px){.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_info{font-size:.875em}}@media(max-width: 767.98px){.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_info{font-size:.8em}}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_filter{margin-bottom:.5em;font-size:.875em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_filter input[type=search]{padding:1px 5px 1px 5px;font-size:.875em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_length{flex-basis:1 1 50%;margin-bottom:.5em;font-size:.875em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_length select{padding:.4em 3em .4em .5em;font-size:.875em;margin-left:.2em;margin-right:.2em}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_paginate{flex-shrink:0}@media(min-width: 768px){.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_paginate{margin-left:auto}}.quarto-dashboard .card .card-body .itables .dataTables_wrapper .dataTables_paginate ul.pagination .paginate_button .page-link{font-size:.8em}.quarto-dashboard .card .card-footer{font-size:.9em}.quarto-dashboard .card .card-toolbar{display:flex;flex-grow:1;flex-direction:row;width:100%;flex-wrap:wrap}.quarto-dashboard .card .card-toolbar>*{font-size:.8em;flex-grow:0}.quarto-dashboard .card .card-toolbar>.card-title{font-size:1em;flex-grow:1;align-self:flex-start;margin-top:.1em}.quarto-dashboard .card .card-toolbar .cell-output-display{display:flex}.quarto-dashboard .card .card-toolbar .shiny-input-container{padding-bottom:.5em;margin-bottom:.5em;width:inherit}.quarto-dashboard .card .card-toolbar .shiny-input-container>.checkbox:first-child{margin-top:6px}.quarto-dashboard .card .card-toolbar>*:last-child{margin-right:0}.quarto-dashboard .card .card-toolbar>*>*{margin-right:1em;align-items:baseline}.quarto-dashboard .card .card-toolbar>*>*>a{text-decoration:none;margin-top:auto;margin-bottom:auto}.quarto-dashboard .card .card-toolbar form{width:fit-content}.quarto-dashboard .card .card-toolbar form label{padding-top:.2em;padding-bottom:.2em;width:fit-content}.quarto-dashboard .card .card-toolbar form input[type=date]{width:fit-content}.quarto-dashboard .card .card-toolbar form input[type=color]{width:3em}.quarto-dashboard .card .card-toolbar form button{padding:.4em}.quarto-dashboard .card .card-toolbar form select{width:fit-content}.quarto-dashboard .card .card-toolbar .cell-output-display{display:flex}.quarto-dashboard .card .card-toolbar .shiny-input-container{padding-bottom:.5em;margin-bottom:.5em;width:inherit}.quarto-dashboard .card .card-toolbar .shiny-input-container>.checkbox:first-child{margin-top:6px}.quarto-dashboard .card .card-toolbar>*:last-child{margin-right:0}.quarto-dashboard .card .card-toolbar>*>*{margin-right:1em;align-items:baseline}.quarto-dashboard .card .card-toolbar>*>*>a{text-decoration:none;margin-top:auto;margin-bottom:auto}.quarto-dashboard .card .card-toolbar .shiny-input-container{padding-bottom:0;margin-bottom:0}.quarto-dashboard .card .card-toolbar .shiny-input-container>*{flex-shrink:0;flex-grow:0}.quarto-dashboard .card .card-toolbar .form-group.shiny-input-container:not([role=group])>label{margin-bottom:0}.quarto-dashboard .card .card-toolbar .shiny-input-container.no-baseline{align-items:start;padding-top:6px}.quarto-dashboard .card .card-toolbar .shiny-input-container{display:flex;align-items:baseline}.quarto-dashboard .card .card-toolbar .shiny-input-container label{padding-right:.4em}.quarto-dashboard .card .card-toolbar .shiny-input-container .bslib-input-switch{margin-top:6px}.quarto-dashboard .card .card-toolbar input[type=text]{line-height:1;width:inherit}.quarto-dashboard .card .card-toolbar .input-daterange{width:inherit}.quarto-dashboard .card .card-toolbar .input-daterange input[type=text]{height:2.4em;width:10em}.quarto-dashboard .card .card-toolbar .input-daterange .input-group-addon{height:auto;padding:0;margin-left:-5px !important;margin-right:-5px}.quarto-dashboard .card .card-toolbar .input-daterange .input-group-addon .input-group-text{padding-top:0;padding-bottom:0;height:100%}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny{width:10em}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-line{top:9px}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-min,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-max,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-from,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-to,.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-single{top:20px}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-bar{top:8px}.quarto-dashboard .card .card-toolbar span.irs.irs--shiny .irs-handle{top:0px}.quarto-dashboard .card .card-toolbar .shiny-input-checkboxgroup>label{margin-top:6px}.quarto-dashboard .card .card-toolbar .shiny-input-checkboxgroup>.shiny-options-group{margin-top:0;align-items:baseline}.quarto-dashboard .card .card-toolbar .shiny-input-radiogroup>label{margin-top:6px}.quarto-dashboard .card .card-toolbar .shiny-input-radiogroup>.shiny-options-group{align-items:baseline;margin-top:0}.quarto-dashboard .card .card-toolbar .shiny-input-radiogroup>.shiny-options-group>.radio{margin-right:.3em}.quarto-dashboard .card .card-toolbar .form-select{padding-top:.2em;padding-bottom:.2em}.quarto-dashboard .card .card-toolbar .shiny-input-select{min-width:6em}.quarto-dashboard .card .card-toolbar div.checkbox{margin-bottom:0px}.quarto-dashboard .card .card-toolbar>.checkbox:first-child{margin-top:6px}.quarto-dashboard .card-body>table>thead{border-top:none}.quarto-dashboard .card-body>.table>:not(caption)>*>*{background-color:#181818}.tableFloatingHeaderOriginal{background-color:#181818;position:sticky !important;top:0 !important}.dashboard-data-table{margin-top:-1px}div.value-box-area span.observablehq--number{font-size:calc(clamp(.1em,15cqw,5em)*1.25);line-height:1.2;color:inherit;font-family:var(--bs-body-font-family)}.quarto-listing{padding-bottom:1em}.listing-pagination{padding-top:.5em}ul.pagination{float:right;padding-left:8px;padding-top:.5em}ul.pagination li{padding-right:.75em}ul.pagination li.disabled a,ul.pagination li.active a{color:#fff;text-decoration:none}ul.pagination li:last-of-type{padding-right:0}.listing-actions-group{display:flex}.quarto-listing-filter{margin-bottom:1em;width:200px;margin-left:auto}.quarto-listing-sort{margin-bottom:1em;margin-right:auto;width:auto}.quarto-listing-sort .input-group-text{font-size:.8em}.input-group-text{border-right:none}.quarto-listing-sort select.form-select{font-size:.8em}.listing-no-matching{text-align:center;padding-top:2em;padding-bottom:3em;font-size:1em}#quarto-margin-sidebar .quarto-listing-category{padding-top:0;font-size:1rem}#quarto-margin-sidebar .quarto-listing-category-title{cursor:pointer;font-weight:600;font-size:1rem}.quarto-listing-category .category{cursor:pointer}.quarto-listing-category .category.active{font-weight:600}.quarto-listing-category.category-cloud{display:flex;flex-wrap:wrap;align-items:baseline}.quarto-listing-category.category-cloud .category{padding-right:5px}.quarto-listing-category.category-cloud .category-cloud-1{font-size:.75em}.quarto-listing-category.category-cloud .category-cloud-2{font-size:.95em}.quarto-listing-category.category-cloud .category-cloud-3{font-size:1.15em}.quarto-listing-category.category-cloud .category-cloud-4{font-size:1.35em}.quarto-listing-category.category-cloud .category-cloud-5{font-size:1.55em}.quarto-listing-category.category-cloud .category-cloud-6{font-size:1.75em}.quarto-listing-category.category-cloud .category-cloud-7{font-size:1.95em}.quarto-listing-category.category-cloud .category-cloud-8{font-size:2.15em}.quarto-listing-category.category-cloud .category-cloud-9{font-size:2.35em}.quarto-listing-category.category-cloud .category-cloud-10{font-size:2.55em}.quarto-listing-cols-1{grid-template-columns:repeat(1, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-1{grid-template-columns:repeat(1, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-1{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-2{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-2{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-2{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-3{grid-template-columns:repeat(3, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-3{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-3{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-4{grid-template-columns:repeat(4, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-4{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-4{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-5{grid-template-columns:repeat(5, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-5{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-5{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-6{grid-template-columns:repeat(6, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-6{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-6{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-7{grid-template-columns:repeat(7, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-7{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-7{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-8{grid-template-columns:repeat(8, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-8{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-8{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-9{grid-template-columns:repeat(9, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-9{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-9{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-10{grid-template-columns:repeat(10, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-10{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-10{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-11{grid-template-columns:repeat(11, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-11{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-11{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-12{grid-template-columns:repeat(12, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-12{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-12{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-grid{gap:1.5em}.quarto-grid-item.borderless{border:none}.quarto-grid-item.borderless .listing-categories .listing-category:last-of-type,.quarto-grid-item.borderless .listing-categories .listing-category:first-of-type{padding-left:0}.quarto-grid-item.borderless .listing-categories .listing-category{border:0}.quarto-grid-link{text-decoration:none;color:inherit}.quarto-grid-link:hover{text-decoration:none;color:inherit}.quarto-grid-item h5.title,.quarto-grid-item .title.h5{margin-top:0;margin-bottom:0}.quarto-grid-item .card-footer{display:flex;justify-content:space-between;font-size:.8em}.quarto-grid-item .card-footer p{margin-bottom:0}.quarto-grid-item p.card-img-top{margin-bottom:0}.quarto-grid-item p.card-img-top>img{object-fit:cover}.quarto-grid-item .card-other-values{margin-top:.5em;font-size:.8em}.quarto-grid-item .card-other-values tr{margin-bottom:.5em}.quarto-grid-item .card-other-values tr>td:first-of-type{font-weight:600;padding-right:1em;padding-left:1em;vertical-align:top}.quarto-grid-item div.post-contents{display:flex;flex-direction:column;text-decoration:none;height:100%}.quarto-grid-item .listing-item-img-placeholder{background-color:rgba(52,58,64,.25);flex-shrink:0}.quarto-grid-item .card-attribution{padding-top:1em;display:flex;gap:1em;text-transform:uppercase;color:#6c757d;font-weight:500;flex-grow:10;align-items:flex-end}.quarto-grid-item .description{padding-bottom:1em}.quarto-grid-item .card-attribution .date{align-self:flex-end}.quarto-grid-item .card-attribution.justify{justify-content:space-between}.quarto-grid-item .card-attribution.start{justify-content:flex-start}.quarto-grid-item .card-attribution.end{justify-content:flex-end}.quarto-grid-item .card-title{margin-bottom:.1em}.quarto-grid-item .card-subtitle{padding-top:.25em}.quarto-grid-item .card-text{font-size:.9em}.quarto-grid-item .listing-reading-time{padding-bottom:.25em}.quarto-grid-item .card-text-small{font-size:.8em}.quarto-grid-item .card-subtitle.subtitle{font-size:.9em;font-weight:600;padding-bottom:.5em}.quarto-grid-item .listing-categories{display:flex;flex-wrap:wrap;padding-bottom:5px}.quarto-grid-item .listing-categories .listing-category{color:#6c757d;border:solid 1px #dee2e6;border-radius:.25rem;text-transform:uppercase;font-size:.65em;padding-left:.5em;padding-right:.5em;padding-top:.15em;padding-bottom:.15em;cursor:pointer;margin-right:4px;margin-bottom:4px}.quarto-grid-item.card-right{text-align:right}.quarto-grid-item.card-right .listing-categories{justify-content:flex-end}.quarto-grid-item.card-left{text-align:left}.quarto-grid-item.card-center{text-align:center}.quarto-grid-item.card-center .listing-description{text-align:justify}.quarto-grid-item.card-center .listing-categories{justify-content:center}table.quarto-listing-table td.image{padding:0px}table.quarto-listing-table td.image img{width:100%;max-width:50px;object-fit:contain}table.quarto-listing-table a{text-decoration:none;word-break:keep-all}table.quarto-listing-table th a{color:inherit}table.quarto-listing-table th a.asc:after{margin-bottom:-2px;margin-left:5px;display:inline-block;height:1rem;width:1rem;background-repeat:no-repeat;background-size:1rem 1rem;background-image:url('data:image/svg+xml,');content:""}table.quarto-listing-table th a.desc:after{margin-bottom:-2px;margin-left:5px;display:inline-block;height:1rem;width:1rem;background-repeat:no-repeat;background-size:1rem 1rem;background-image:url('data:image/svg+xml,');content:""}table.quarto-listing-table.table-hover td{cursor:pointer}.quarto-post.image-left{flex-direction:row}.quarto-post.image-right{flex-direction:row-reverse}@media(max-width: 767.98px){.quarto-post.image-right,.quarto-post.image-left{gap:0em;flex-direction:column}.quarto-post .metadata{padding-bottom:1em;order:2}.quarto-post .body{order:1}.quarto-post .thumbnail{order:3}}.list.quarto-listing-default div:last-of-type{border-bottom:none}@media(min-width: 992px){.quarto-listing-container-default{margin-right:2em}}div.quarto-post{display:flex;gap:2em;margin-bottom:1.5em;border-bottom:1px solid #dee2e6}@media(max-width: 767.98px){div.quarto-post{padding-bottom:1em}}div.quarto-post .metadata{flex-basis:20%;flex-grow:0;margin-top:.2em;flex-shrink:10}div.quarto-post .thumbnail{flex-basis:30%;flex-grow:0;flex-shrink:0}div.quarto-post .thumbnail img{margin-top:.4em;width:100%;object-fit:cover}div.quarto-post .body{flex-basis:45%;flex-grow:1;flex-shrink:0}div.quarto-post .body h3.listing-title,div.quarto-post .body .listing-title.h3{margin-top:0px;margin-bottom:0px;border-bottom:none}div.quarto-post .body .listing-subtitle{font-size:.875em;margin-bottom:.5em;margin-top:.2em}div.quarto-post .body .description{font-size:.9em}div.quarto-post .body pre code{white-space:pre-wrap}div.quarto-post a{color:#faf1e4;text-decoration:none}div.quarto-post .metadata{display:flex;flex-direction:column;font-size:.8em;font-family:"Source Sans Pro",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";flex-basis:33%}div.quarto-post .listing-categories{display:flex;flex-wrap:wrap;padding-bottom:5px}div.quarto-post .listing-categories .listing-category{color:#6c757d;border:solid 1px #dee2e6;border-radius:.25rem;text-transform:uppercase;font-size:.65em;padding-left:.5em;padding-right:.5em;padding-top:.15em;padding-bottom:.15em;cursor:pointer;margin-right:4px;margin-bottom:4px}div.quarto-post .listing-description{margin-bottom:.5em}div.quarto-about-jolla{display:flex !important;flex-direction:column;align-items:center;margin-top:10%;padding-bottom:1em}div.quarto-about-jolla .about-image{object-fit:cover;margin-left:auto;margin-right:auto;margin-bottom:1.5em}div.quarto-about-jolla img.round{border-radius:50%}div.quarto-about-jolla img.rounded{border-radius:10px}div.quarto-about-jolla .quarto-title h1.title,div.quarto-about-jolla .quarto-title .title.h1{text-align:center}div.quarto-about-jolla .quarto-title .description{text-align:center}div.quarto-about-jolla h2,div.quarto-about-jolla .h2{border-bottom:none}div.quarto-about-jolla .about-sep{width:60%}div.quarto-about-jolla main{text-align:center}div.quarto-about-jolla .about-links{display:flex}@media(min-width: 992px){div.quarto-about-jolla .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-jolla .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-jolla .about-link{color:#fff;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-jolla .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-jolla .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-jolla .about-link:hover{color:#aed1e4}div.quarto-about-jolla .about-link i.bi{margin-right:.15em}div.quarto-about-solana{display:flex !important;flex-direction:column;padding-top:3em !important;padding-bottom:1em}div.quarto-about-solana .about-entity{display:flex !important;align-items:start;justify-content:space-between}@media(min-width: 992px){div.quarto-about-solana .about-entity{flex-direction:row}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity{flex-direction:column-reverse;align-items:center;text-align:center}}div.quarto-about-solana .about-entity .entity-contents{display:flex;flex-direction:column}@media(max-width: 767.98px){div.quarto-about-solana .about-entity .entity-contents{width:100%}}div.quarto-about-solana .about-entity .about-image{object-fit:cover}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-image{margin-bottom:1.5em}}div.quarto-about-solana .about-entity img.round{border-radius:50%}div.quarto-about-solana .about-entity img.rounded{border-radius:10px}div.quarto-about-solana .about-entity .about-links{display:flex;justify-content:left;padding-bottom:1.2em}@media(min-width: 992px){div.quarto-about-solana .about-entity .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-solana .about-entity .about-link{color:#fff;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-solana .about-entity .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-solana .about-entity .about-link:hover{color:#aed1e4}div.quarto-about-solana .about-entity .about-link i.bi{margin-right:.15em}div.quarto-about-solana .about-contents{padding-right:1.5em;flex-basis:0;flex-grow:1}div.quarto-about-solana .about-contents main.content{margin-top:0}div.quarto-about-solana .about-contents h2,div.quarto-about-solana .about-contents .h2{border-bottom:none}div.quarto-about-trestles{display:flex !important;flex-direction:row;padding-top:3em !important;padding-bottom:1em}@media(max-width: 991.98px){div.quarto-about-trestles{flex-direction:column;padding-top:0em !important}}div.quarto-about-trestles .about-entity{display:flex !important;flex-direction:column;align-items:center;text-align:center;padding-right:1em}@media(min-width: 992px){div.quarto-about-trestles .about-entity{flex:0 0 42%}}div.quarto-about-trestles .about-entity .about-image{object-fit:cover;margin-bottom:1.5em}div.quarto-about-trestles .about-entity img.round{border-radius:50%}div.quarto-about-trestles .about-entity img.rounded{border-radius:10px}div.quarto-about-trestles .about-entity .about-links{display:flex;justify-content:center}@media(min-width: 992px){div.quarto-about-trestles .about-entity .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-trestles .about-entity .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-trestles .about-entity .about-link{color:#fff;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-trestles .about-entity .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-trestles .about-entity .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-trestles .about-entity .about-link:hover{color:#aed1e4}div.quarto-about-trestles .about-entity .about-link i.bi{margin-right:.15em}div.quarto-about-trestles .about-contents{flex-basis:0;flex-grow:1}div.quarto-about-trestles .about-contents h2,div.quarto-about-trestles .about-contents .h2{border-bottom:none}@media(min-width: 992px){div.quarto-about-trestles .about-contents{border-left:solid 1px #dee2e6;padding-left:1.5em}}div.quarto-about-trestles .about-contents main.content{margin-top:0}div.quarto-about-marquee{padding-bottom:1em}div.quarto-about-marquee .about-contents{display:flex;flex-direction:column}div.quarto-about-marquee .about-image{max-height:550px;margin-bottom:1.5em;object-fit:cover}div.quarto-about-marquee img.round{border-radius:50%}div.quarto-about-marquee img.rounded{border-radius:10px}div.quarto-about-marquee h2,div.quarto-about-marquee .h2{border-bottom:none}div.quarto-about-marquee .about-links{display:flex;justify-content:center;padding-top:1.5em}@media(min-width: 992px){div.quarto-about-marquee .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-marquee .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-marquee .about-link{color:#fff;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-marquee .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-marquee .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-marquee .about-link:hover{color:#aed1e4}div.quarto-about-marquee .about-link i.bi{margin-right:.15em}@media(min-width: 992px){div.quarto-about-marquee .about-link{border:none}}div.quarto-about-broadside{display:flex;flex-direction:column;padding-bottom:1em}div.quarto-about-broadside .about-main{display:flex !important;padding-top:0 !important}@media(min-width: 992px){div.quarto-about-broadside .about-main{flex-direction:row;align-items:flex-start}}@media(max-width: 991.98px){div.quarto-about-broadside .about-main{flex-direction:column}}@media(max-width: 991.98px){div.quarto-about-broadside .about-main .about-entity{flex-shrink:0;width:100%;height:450px;margin-bottom:1.5em;background-size:cover;background-repeat:no-repeat}}@media(min-width: 992px){div.quarto-about-broadside .about-main .about-entity{flex:0 10 50%;margin-right:1.5em;width:100%;height:100%;background-size:100%;background-repeat:no-repeat}}div.quarto-about-broadside .about-main .about-contents{padding-top:14px;flex:0 0 50%}div.quarto-about-broadside h2,div.quarto-about-broadside .h2{border-bottom:none}div.quarto-about-broadside .about-sep{margin-top:1.5em;width:60%;align-self:center}div.quarto-about-broadside .about-links{display:flex;justify-content:center;column-gap:20px;padding-top:1.5em}@media(min-width: 992px){div.quarto-about-broadside .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-broadside .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-broadside .about-link{color:#fff;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-broadside .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-broadside .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-broadside .about-link:hover{color:#aed1e4}div.quarto-about-broadside .about-link i.bi{margin-right:.15em}@media(min-width: 992px){div.quarto-about-broadside .about-link{border:none}}.tippy-box[data-theme~=quarto]{background-color:#181818;border:solid 1px #dee2e6;border-radius:.25rem;color:#faf1e4;font-size:.875rem}.tippy-box[data-theme~=quarto]>.tippy-backdrop{background-color:#181818}.tippy-box[data-theme~=quarto]>.tippy-arrow:after,.tippy-box[data-theme~=quarto]>.tippy-svg-arrow:after{content:"";position:absolute;z-index:-1}.tippy-box[data-theme~=quarto]>.tippy-arrow:after{border-color:rgba(0,0,0,0);border-style:solid}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-6px}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-6px}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-6px}.tippy-box[data-placement^=left]>.tippy-arrow:before{right:-6px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-arrow:before{border-top-color:#181818}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-arrow:after{border-top-color:#dee2e6;border-width:7px 7px 0;top:17px;left:1px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-svg-arrow>svg{top:16px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-svg-arrow:after{top:17px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-arrow:before{border-bottom-color:#181818;bottom:16px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-arrow:after{border-bottom-color:#dee2e6;border-width:0 7px 7px;bottom:17px;left:1px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-svg-arrow>svg{bottom:15px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-svg-arrow:after{bottom:17px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-arrow:before{border-left-color:#181818}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-arrow:after{border-left-color:#dee2e6;border-width:7px 0 7px 7px;left:17px;top:1px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-svg-arrow>svg{left:11px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-svg-arrow:after{left:12px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-arrow:before{border-right-color:#181818;right:16px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-arrow:after{border-width:7px 7px 7px 0;right:17px;top:1px;border-right-color:#dee2e6}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-svg-arrow>svg{right:11px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-svg-arrow:after{right:12px}.tippy-box[data-theme~=quarto]>.tippy-svg-arrow{fill:#faf1e4}.tippy-box[data-theme~=quarto]>.tippy-svg-arrow:after{background-image:url(data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iNiIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMCA2czEuNzk2LS4wMTMgNC42Ny0zLjYxNUM1Ljg1MS45IDYuOTMuMDA2IDggMGMxLjA3LS4wMDYgMi4xNDguODg3IDMuMzQzIDIuMzg1QzE0LjIzMyA2LjAwNSAxNiA2IDE2IDZIMHoiIGZpbGw9InJnYmEoMCwgOCwgMTYsIDAuMikiLz48L3N2Zz4=);background-size:16px 6px;width:16px;height:6px}.top-right{position:absolute;top:1em;right:1em}.visually-hidden{border:0;clip:rect(0 0 0 0);height:auto;margin:0;overflow:hidden;padding:0;position:absolute;width:1px;white-space:nowrap}.hidden{display:none !important}.zindex-bottom{z-index:-1 !important}figure.figure{display:block}.quarto-layout-panel{margin-bottom:1em}.quarto-layout-panel>figure{width:100%}.quarto-layout-panel>figure>figcaption,.quarto-layout-panel>.panel-caption{margin-top:10pt}.quarto-layout-panel>.table-caption{margin-top:0px}.table-caption p{margin-bottom:.5em}.quarto-layout-row{display:flex;flex-direction:row;align-items:flex-start}.quarto-layout-valign-top{align-items:flex-start}.quarto-layout-valign-bottom{align-items:flex-end}.quarto-layout-valign-center{align-items:center}.quarto-layout-cell{position:relative;margin-right:20px}.quarto-layout-cell:last-child{margin-right:0}.quarto-layout-cell figure,.quarto-layout-cell>p{margin:.2em}.quarto-layout-cell img{max-width:100%}.quarto-layout-cell .html-widget{width:100% !important}.quarto-layout-cell div figure p{margin:0}.quarto-layout-cell figure{display:block;margin-inline-start:0;margin-inline-end:0}.quarto-layout-cell table{display:inline-table}.quarto-layout-cell-subref figcaption,figure .quarto-layout-row figure figcaption{text-align:center;font-style:italic}.quarto-figure{position:relative;margin-bottom:1em}.quarto-figure>figure{width:100%;margin-bottom:0}.quarto-figure-left>figure>p,.quarto-figure-left>figure>div{text-align:left}.quarto-figure-center>figure>p,.quarto-figure-center>figure>div{text-align:center}.quarto-figure-right>figure>p,.quarto-figure-right>figure>div{text-align:right}.quarto-figure>figure>div.cell-annotation,.quarto-figure>figure>div code{text-align:left}figure>p:empty{display:none}figure>p:first-child{margin-top:0;margin-bottom:0}figure>figcaption.quarto-float-caption-bottom{margin-bottom:.5em}figure>figcaption.quarto-float-caption-top{margin-top:.5em}div[id^=tbl-]{position:relative}.quarto-figure>.anchorjs-link{position:absolute;top:.6em;right:.5em}div[id^=tbl-]>.anchorjs-link{position:absolute;top:.7em;right:.3em}.quarto-figure:hover>.anchorjs-link,div[id^=tbl-]:hover>.anchorjs-link,h2:hover>.anchorjs-link,.h2:hover>.anchorjs-link,h3:hover>.anchorjs-link,.h3:hover>.anchorjs-link,h4:hover>.anchorjs-link,.h4:hover>.anchorjs-link,h5:hover>.anchorjs-link,.h5:hover>.anchorjs-link,h6:hover>.anchorjs-link,.h6:hover>.anchorjs-link,.reveal-anchorjs-link>.anchorjs-link{opacity:1}#title-block-header{margin-block-end:1rem;position:relative;margin-top:-1px}#title-block-header .abstract{margin-block-start:1rem}#title-block-header .abstract .abstract-title{font-weight:600}#title-block-header a{text-decoration:none}#title-block-header .author,#title-block-header .date,#title-block-header .doi{margin-block-end:.2rem}#title-block-header .quarto-title-block>div{display:flex}#title-block-header .quarto-title-block>div>h1,#title-block-header .quarto-title-block>div>.h1{flex-grow:1}#title-block-header .quarto-title-block>div>button{flex-shrink:0;height:2.25rem;margin-top:0}@media(min-width: 992px){#title-block-header .quarto-title-block>div>button{margin-top:5px}}tr.header>th>p:last-of-type{margin-bottom:0px}table,table.table{margin-top:.5rem;margin-bottom:.5rem}caption,.table-caption{padding-top:.5rem;padding-bottom:.5rem;text-align:center}figure.quarto-float-tbl figcaption.quarto-float-caption-top{margin-top:.5rem;margin-bottom:.25rem;text-align:center}figure.quarto-float-tbl figcaption.quarto-float-caption-bottom{padding-top:.25rem;margin-bottom:.5rem;text-align:center}.utterances{max-width:none;margin-left:-8px}iframe{margin-bottom:1em}details{margin-bottom:1em}details[show]{margin-bottom:0}details>summary{color:#6c757d}details>summary>p:only-child{display:inline}pre.sourceCode,code.sourceCode{position:relative}dd code:not(.sourceCode),p code:not(.sourceCode){white-space:pre-wrap}code{white-space:pre}@media print{code{white-space:pre-wrap}}pre>code{display:block}pre>code.sourceCode{white-space:pre-wrap}pre>code.sourceCode>span>a:first-child::before{text-decoration:none}pre.code-overflow-wrap>code.sourceCode{white-space:pre-wrap}pre.code-overflow-scroll>code.sourceCode{white-space:pre}code a:any-link{color:inherit;text-decoration:none}code a:hover{color:inherit;text-decoration:underline}ul.task-list{padding-left:1em}[data-tippy-root]{display:inline-block}.tippy-content .footnote-back{display:none}.footnote-back{margin-left:.2em}.tippy-content{overflow-x:auto}.quarto-embedded-source-code{display:none}.quarto-unresolved-ref{font-weight:600}.quarto-cover-image{max-width:35%;float:right;margin-left:30px}.cell-output-display .widget-subarea{margin-bottom:1em}.cell-output-display:not(.no-overflow-x),.knitsql-table:not(.no-overflow-x){overflow-x:auto}.panel-input{margin-bottom:1em}.panel-input>div,.panel-input>div>div{display:inline-block;vertical-align:top;padding-right:12px}.panel-input>p:last-child{margin-bottom:0}.layout-sidebar{margin-bottom:1em}.layout-sidebar .tab-content{border:none}.tab-content>.page-columns.active{display:grid}div.sourceCode>iframe{width:100%;height:300px;margin-bottom:-0.5em}a{text-underline-offset:3px}.callout pre.sourceCode{padding-left:0}div.ansi-escaped-output{font-family:monospace;display:block}/*! +* +* ansi colors from IPython notebook's +* +* we also add `bright-[color]-` synonyms for the `-[color]-intense` classes since +* that seems to be what ansi_up emits +* +*/.ansi-black-fg{color:#3e424d}.ansi-black-bg{background-color:#3e424d}.ansi-black-intense-black,.ansi-bright-black-fg{color:#282c36}.ansi-black-intense-black,.ansi-bright-black-bg{background-color:#282c36}.ansi-red-fg{color:#e75c58}.ansi-red-bg{background-color:#e75c58}.ansi-red-intense-red,.ansi-bright-red-fg{color:#b22b31}.ansi-red-intense-red,.ansi-bright-red-bg{background-color:#b22b31}.ansi-green-fg{color:#00a250}.ansi-green-bg{background-color:#00a250}.ansi-green-intense-green,.ansi-bright-green-fg{color:#007427}.ansi-green-intense-green,.ansi-bright-green-bg{background-color:#007427}.ansi-yellow-fg{color:#ddb62b}.ansi-yellow-bg{background-color:#ddb62b}.ansi-yellow-intense-yellow,.ansi-bright-yellow-fg{color:#b27d12}.ansi-yellow-intense-yellow,.ansi-bright-yellow-bg{background-color:#b27d12}.ansi-blue-fg{color:#208ffb}.ansi-blue-bg{background-color:#208ffb}.ansi-blue-intense-blue,.ansi-bright-blue-fg{color:#0065ca}.ansi-blue-intense-blue,.ansi-bright-blue-bg{background-color:#0065ca}.ansi-magenta-fg{color:#d160c4}.ansi-magenta-bg{background-color:#d160c4}.ansi-magenta-intense-magenta,.ansi-bright-magenta-fg{color:#a03196}.ansi-magenta-intense-magenta,.ansi-bright-magenta-bg{background-color:#a03196}.ansi-cyan-fg{color:#60c6c8}.ansi-cyan-bg{background-color:#60c6c8}.ansi-cyan-intense-cyan,.ansi-bright-cyan-fg{color:#258f8f}.ansi-cyan-intense-cyan,.ansi-bright-cyan-bg{background-color:#258f8f}.ansi-white-fg{color:#c5c1b4}.ansi-white-bg{background-color:#c5c1b4}.ansi-white-intense-white,.ansi-bright-white-fg{color:#a1a6b2}.ansi-white-intense-white,.ansi-bright-white-bg{background-color:#a1a6b2}.ansi-default-inverse-fg{color:#fff}.ansi-default-inverse-bg{background-color:#000}.ansi-bold{font-weight:bold}.ansi-underline{text-decoration:underline}:root{--quarto-body-bg: #181818;--quarto-body-color: #FAF1E4;--quarto-text-muted: #6c757d;--quarto-border-color: #dee2e6;--quarto-border-width: 1px}table.gt_table{color:var(--quarto-body-color);font-size:1em;width:100%;background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_column_spanner_outer{color:var(--quarto-body-color);background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_col_heading{color:var(--quarto-body-color);font-weight:bold;background-color:rgba(0,0,0,0)}table.gt_table thead.gt_col_headings{border-bottom:1px solid currentColor;border-top-width:inherit;border-top-color:var(--quarto-border-color)}table.gt_table thead.gt_col_headings:not(:first-child){border-top-width:1px;border-top-color:var(--quarto-border-color)}table.gt_table td.gt_row{border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-width:0px}table.gt_table tbody.gt_table_body{border-top-width:1px;border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-color:currentColor}div.columns{display:initial;gap:initial}div.column{display:inline-block;overflow-x:initial;vertical-align:top;width:50%}.code-annotation-tip-content{word-wrap:break-word}.code-annotation-container-hidden{display:none !important}dl.code-annotation-container-grid{display:grid;grid-template-columns:min-content auto}dl.code-annotation-container-grid dt{grid-column:1}dl.code-annotation-container-grid dd{grid-column:2}pre.sourceCode.code-annotation-code{padding-right:0}code.sourceCode .code-annotation-anchor{z-index:100;position:relative;float:right;background-color:rgba(0,0,0,0)}input[type=checkbox]{margin-right:.5ch}:root{--mermaid-bg-color: #181818;--mermaid-edge-color: #343a40;--mermaid-node-fg-color: #FAF1E4;--mermaid-fg-color: #FAF1E4;--mermaid-fg-color--lighter: white;--mermaid-fg-color--lightest: white;--mermaid-font-family: Source Sans Pro, -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol;--mermaid-label-bg-color: #181818;--mermaid-label-fg-color: #2780e3;--mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--mermaid-node-fg-color: #FAF1E4}@media print{:root{font-size:11pt}#quarto-sidebar,#TOC,.nav-page{display:none}.page-columns .content{grid-column-start:page-start}.fixed-top{position:relative}.panel-caption,.figure-caption,figcaption{color:#666}}.code-copy-button{position:absolute;top:0;right:0;border:0;margin-top:5px;margin-right:5px;background-color:rgba(0,0,0,0);z-index:3}.code-copy-button:focus{outline:none}.code-copy-button-tooltip{font-size:.75em}.code-copy-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:1rem 1rem}.code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,')}.code-copy-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}.code-copy-button-checked:hover>.bi::before{background-image:url('data:image/svg+xml,')}main ol ol,main ul ul,main ol ul,main ul ol{margin-bottom:1em}ul>li:not(:has(>p))>ul,ol>li:not(:has(>p))>ul,ul>li:not(:has(>p))>ol,ol>li:not(:has(>p))>ol{margin-bottom:0}ul>li:not(:has(>p))>ul>li:has(>p),ol>li:not(:has(>p))>ul>li:has(>p),ul>li:not(:has(>p))>ol>li:has(>p),ol>li:not(:has(>p))>ol>li:has(>p){margin-top:1rem}body{margin:0}main.page-columns>header>h1.title,main.page-columns>header>.title.h1{margin-bottom:0}@media(min-width: 992px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] 42px [page-end-inset page-end] 5fr [screen-end-inset] 1.5em}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(950px - 3em)) [body-content-end] 3em [body-end] 50px [body-end-outset] minmax(0px, 300px) [page-end-inset] minmax(50px, 100px) [page-end] 1fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] minmax(30px, 60px) [body-end-outset] minmax(60px, 180px) [page-end-inset] minmax(30px, 60px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(60px, 120px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(0px, 240px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(60px, 180px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] minmax(30px, 60px) [body-end-outset] minmax(60px, 180px) [page-end-inset] minmax(30px, 60px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 991.98px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc(1350px - 3em)) [body-content-end body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1.5em [body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(90px, 180px) [page-end-inset] 30px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(30px, 60px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(1100px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc(900px - 3em)) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(30px, 60px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(30px, 60px) [page-end-inset] 60px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 42px [body-end-outset] minmax(90px, 174px) [page-end-inset] 42px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc(850px - 3em)) [body-content-end] 1.5em [body-end] 60px [body-end-outset] minmax(90px, 180px) [page-end-inset] 30px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 767.98px){body .page-columns,body.fullcontent:not(.floating):not(.docked) .page-columns,body.slimcontent:not(.floating):not(.docked) .page-columns,body.docked .page-columns,body.docked.slimcontent .page-columns,body.docked.fullcontent .page-columns,body.floating .page-columns,body.floating.slimcontent .page-columns,body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}nav[role=doc-toc]{display:none}}body,.page-row-navigation{grid-template-rows:[page-top] max-content [contents-top] max-content [contents-bottom] max-content [page-bottom]}.page-rows-contents{grid-template-rows:[content-top] minmax(max-content, 1fr) [content-bottom] minmax(60px, max-content) [page-bottom]}.page-full{grid-column:screen-start/screen-end !important}.page-columns>*{grid-column:body-content-start/body-content-end}.page-columns.column-page>*{grid-column:page-start/page-end}.page-columns.column-page-left .page-columns.page-full>*,.page-columns.column-page-left>*{grid-column:page-start/body-content-end}.page-columns.column-page-right .page-columns.page-full>*,.page-columns.column-page-right>*{grid-column:body-content-start/page-end}.page-rows{grid-auto-rows:auto}.header{grid-column:screen-start/screen-end;grid-row:page-top/contents-top}#quarto-content{padding:0;grid-column:screen-start/screen-end;grid-row:contents-top/contents-bottom}body.floating .sidebar.sidebar-navigation{grid-column:page-start/body-start;grid-row:content-top/page-bottom}body.docked .sidebar.sidebar-navigation{grid-column:screen-start/body-start;grid-row:content-top/page-bottom}.sidebar.toc-left{grid-column:page-start/body-start;grid-row:content-top/page-bottom}.sidebar.margin-sidebar{grid-column:body-end/page-end;grid-row:content-top/page-bottom}.page-columns .content{grid-column:body-content-start/body-content-end;grid-row:content-top/content-bottom;align-content:flex-start}.page-columns .page-navigation{grid-column:body-content-start/body-content-end;grid-row:content-bottom/page-bottom}.page-columns .footer{grid-column:screen-start/screen-end;grid-row:contents-bottom/page-bottom}.page-columns .column-body{grid-column:body-content-start/body-content-end}.page-columns .column-body-fullbleed{grid-column:body-start/body-end}.page-columns .column-body-outset{grid-column:body-start-outset/body-end-outset;z-index:998;opacity:.999}.page-columns .column-body-outset table{background:#181818}.page-columns .column-body-outset-left{grid-column:body-start-outset/body-content-end;z-index:998;opacity:.999}.page-columns .column-body-outset-left table{background:#181818}.page-columns .column-body-outset-right{grid-column:body-content-start/body-end-outset;z-index:998;opacity:.999}.page-columns .column-body-outset-right table{background:#181818}.page-columns .column-page{grid-column:page-start/page-end;z-index:998;opacity:.999}.page-columns .column-page table{background:#181818}.page-columns .column-page-inset{grid-column:page-start-inset/page-end-inset;z-index:998;opacity:.999}.page-columns .column-page-inset table{background:#181818}.page-columns .column-page-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;opacity:.999}.page-columns .column-page-inset-left table{background:#181818}.page-columns .column-page-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;opacity:.999}.page-columns .column-page-inset-right figcaption table{background:#181818}.page-columns .column-page-left{grid-column:page-start/body-content-end;z-index:998;opacity:.999}.page-columns .column-page-left table{background:#181818}.page-columns .column-page-right{grid-column:body-content-start/page-end;z-index:998;opacity:.999}.page-columns .column-page-right figcaption table{background:#181818}#quarto-content.page-columns #quarto-margin-sidebar,#quarto-content.page-columns #quarto-sidebar{z-index:1}@media(max-width: 991.98px){#quarto-content.page-columns #quarto-margin-sidebar.collapse,#quarto-content.page-columns #quarto-sidebar.collapse,#quarto-content.page-columns #quarto-margin-sidebar.collapsing,#quarto-content.page-columns #quarto-sidebar.collapsing{z-index:1055}}#quarto-content.page-columns main.column-page,#quarto-content.page-columns main.column-page-right,#quarto-content.page-columns main.column-page-left{z-index:0}.page-columns .column-screen-inset{grid-column:screen-start-inset/screen-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset table{background:#181818}.page-columns .column-screen-inset-left{grid-column:screen-start-inset/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-inset-left table{background:#181818}.page-columns .column-screen-inset-right{grid-column:body-content-start/screen-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset-right table{background:#181818}.page-columns .column-screen{grid-column:screen-start/screen-end;z-index:998;opacity:.999}.page-columns .column-screen table{background:#181818}.page-columns .column-screen-left{grid-column:screen-start/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-left table{background:#181818}.page-columns .column-screen-right{grid-column:body-content-start/screen-end;z-index:998;opacity:.999}.page-columns .column-screen-right table{background:#181818}.page-columns .column-screen-inset-shaded{grid-column:screen-start/screen-end;padding:1em;background:#525252;z-index:998;opacity:.999;margin-bottom:1em}.zindex-content{z-index:998;opacity:.999}.zindex-modal{z-index:1055;opacity:.999}.zindex-over-content{z-index:999;opacity:.999}img.img-fluid.column-screen,img.img-fluid.column-screen-inset-shaded,img.img-fluid.column-screen-inset,img.img-fluid.column-screen-inset-left,img.img-fluid.column-screen-inset-right,img.img-fluid.column-screen-left,img.img-fluid.column-screen-right{width:100%}@media(min-width: 992px){.margin-caption,div.aside,aside:not(.footnotes):not(.sidebar),.column-margin{grid-column:body-end/page-end !important;z-index:998}.column-sidebar{grid-column:page-start/body-start !important;z-index:998}.column-leftmargin{grid-column:screen-start-inset/body-start !important;z-index:998}.no-row-height{height:1em;overflow:visible}}@media(max-width: 991.98px){.margin-caption,div.aside,aside:not(.footnotes):not(.sidebar),.column-margin{grid-column:body-end/page-end !important;z-index:998}.no-row-height{height:1em;overflow:visible}.page-columns.page-full{overflow:visible}.page-columns.toc-left .margin-caption,.page-columns.toc-left div.aside,.page-columns.toc-left aside:not(.footnotes):not(.sidebar),.page-columns.toc-left .column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;opacity:.999}.page-columns.toc-left .no-row-height{height:initial;overflow:initial}}@media(max-width: 767.98px){.margin-caption,div.aside,aside:not(.footnotes):not(.sidebar),.column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;opacity:.999}.no-row-height{height:initial;overflow:initial}#quarto-margin-sidebar{display:none}#quarto-sidebar-toc-left{display:none}.hidden-sm{display:none}}.panel-grid{display:grid;grid-template-rows:repeat(1, 1fr);grid-template-columns:repeat(24, 1fr);gap:1em}.panel-grid .g-col-1{grid-column:auto/span 1}.panel-grid .g-col-2{grid-column:auto/span 2}.panel-grid .g-col-3{grid-column:auto/span 3}.panel-grid .g-col-4{grid-column:auto/span 4}.panel-grid .g-col-5{grid-column:auto/span 5}.panel-grid .g-col-6{grid-column:auto/span 6}.panel-grid .g-col-7{grid-column:auto/span 7}.panel-grid .g-col-8{grid-column:auto/span 8}.panel-grid .g-col-9{grid-column:auto/span 9}.panel-grid .g-col-10{grid-column:auto/span 10}.panel-grid .g-col-11{grid-column:auto/span 11}.panel-grid .g-col-12{grid-column:auto/span 12}.panel-grid .g-col-13{grid-column:auto/span 13}.panel-grid .g-col-14{grid-column:auto/span 14}.panel-grid .g-col-15{grid-column:auto/span 15}.panel-grid .g-col-16{grid-column:auto/span 16}.panel-grid .g-col-17{grid-column:auto/span 17}.panel-grid .g-col-18{grid-column:auto/span 18}.panel-grid .g-col-19{grid-column:auto/span 19}.panel-grid .g-col-20{grid-column:auto/span 20}.panel-grid .g-col-21{grid-column:auto/span 21}.panel-grid .g-col-22{grid-column:auto/span 22}.panel-grid .g-col-23{grid-column:auto/span 23}.panel-grid .g-col-24{grid-column:auto/span 24}.panel-grid .g-start-1{grid-column-start:1}.panel-grid .g-start-2{grid-column-start:2}.panel-grid .g-start-3{grid-column-start:3}.panel-grid .g-start-4{grid-column-start:4}.panel-grid .g-start-5{grid-column-start:5}.panel-grid .g-start-6{grid-column-start:6}.panel-grid .g-start-7{grid-column-start:7}.panel-grid .g-start-8{grid-column-start:8}.panel-grid .g-start-9{grid-column-start:9}.panel-grid .g-start-10{grid-column-start:10}.panel-grid .g-start-11{grid-column-start:11}.panel-grid .g-start-12{grid-column-start:12}.panel-grid .g-start-13{grid-column-start:13}.panel-grid .g-start-14{grid-column-start:14}.panel-grid .g-start-15{grid-column-start:15}.panel-grid .g-start-16{grid-column-start:16}.panel-grid .g-start-17{grid-column-start:17}.panel-grid .g-start-18{grid-column-start:18}.panel-grid .g-start-19{grid-column-start:19}.panel-grid .g-start-20{grid-column-start:20}.panel-grid .g-start-21{grid-column-start:21}.panel-grid .g-start-22{grid-column-start:22}.panel-grid .g-start-23{grid-column-start:23}@media(min-width: 576px){.panel-grid .g-col-sm-1{grid-column:auto/span 1}.panel-grid .g-col-sm-2{grid-column:auto/span 2}.panel-grid .g-col-sm-3{grid-column:auto/span 3}.panel-grid .g-col-sm-4{grid-column:auto/span 4}.panel-grid .g-col-sm-5{grid-column:auto/span 5}.panel-grid .g-col-sm-6{grid-column:auto/span 6}.panel-grid .g-col-sm-7{grid-column:auto/span 7}.panel-grid .g-col-sm-8{grid-column:auto/span 8}.panel-grid .g-col-sm-9{grid-column:auto/span 9}.panel-grid .g-col-sm-10{grid-column:auto/span 10}.panel-grid .g-col-sm-11{grid-column:auto/span 11}.panel-grid .g-col-sm-12{grid-column:auto/span 12}.panel-grid .g-col-sm-13{grid-column:auto/span 13}.panel-grid .g-col-sm-14{grid-column:auto/span 14}.panel-grid .g-col-sm-15{grid-column:auto/span 15}.panel-grid .g-col-sm-16{grid-column:auto/span 16}.panel-grid .g-col-sm-17{grid-column:auto/span 17}.panel-grid .g-col-sm-18{grid-column:auto/span 18}.panel-grid .g-col-sm-19{grid-column:auto/span 19}.panel-grid .g-col-sm-20{grid-column:auto/span 20}.panel-grid .g-col-sm-21{grid-column:auto/span 21}.panel-grid .g-col-sm-22{grid-column:auto/span 22}.panel-grid .g-col-sm-23{grid-column:auto/span 23}.panel-grid .g-col-sm-24{grid-column:auto/span 24}.panel-grid .g-start-sm-1{grid-column-start:1}.panel-grid .g-start-sm-2{grid-column-start:2}.panel-grid .g-start-sm-3{grid-column-start:3}.panel-grid .g-start-sm-4{grid-column-start:4}.panel-grid .g-start-sm-5{grid-column-start:5}.panel-grid .g-start-sm-6{grid-column-start:6}.panel-grid .g-start-sm-7{grid-column-start:7}.panel-grid .g-start-sm-8{grid-column-start:8}.panel-grid .g-start-sm-9{grid-column-start:9}.panel-grid .g-start-sm-10{grid-column-start:10}.panel-grid .g-start-sm-11{grid-column-start:11}.panel-grid .g-start-sm-12{grid-column-start:12}.panel-grid .g-start-sm-13{grid-column-start:13}.panel-grid .g-start-sm-14{grid-column-start:14}.panel-grid .g-start-sm-15{grid-column-start:15}.panel-grid .g-start-sm-16{grid-column-start:16}.panel-grid .g-start-sm-17{grid-column-start:17}.panel-grid .g-start-sm-18{grid-column-start:18}.panel-grid .g-start-sm-19{grid-column-start:19}.panel-grid .g-start-sm-20{grid-column-start:20}.panel-grid .g-start-sm-21{grid-column-start:21}.panel-grid .g-start-sm-22{grid-column-start:22}.panel-grid .g-start-sm-23{grid-column-start:23}}@media(min-width: 768px){.panel-grid .g-col-md-1{grid-column:auto/span 1}.panel-grid .g-col-md-2{grid-column:auto/span 2}.panel-grid .g-col-md-3{grid-column:auto/span 3}.panel-grid .g-col-md-4{grid-column:auto/span 4}.panel-grid .g-col-md-5{grid-column:auto/span 5}.panel-grid .g-col-md-6{grid-column:auto/span 6}.panel-grid .g-col-md-7{grid-column:auto/span 7}.panel-grid .g-col-md-8{grid-column:auto/span 8}.panel-grid .g-col-md-9{grid-column:auto/span 9}.panel-grid .g-col-md-10{grid-column:auto/span 10}.panel-grid .g-col-md-11{grid-column:auto/span 11}.panel-grid .g-col-md-12{grid-column:auto/span 12}.panel-grid .g-col-md-13{grid-column:auto/span 13}.panel-grid .g-col-md-14{grid-column:auto/span 14}.panel-grid .g-col-md-15{grid-column:auto/span 15}.panel-grid .g-col-md-16{grid-column:auto/span 16}.panel-grid .g-col-md-17{grid-column:auto/span 17}.panel-grid .g-col-md-18{grid-column:auto/span 18}.panel-grid .g-col-md-19{grid-column:auto/span 19}.panel-grid .g-col-md-20{grid-column:auto/span 20}.panel-grid .g-col-md-21{grid-column:auto/span 21}.panel-grid .g-col-md-22{grid-column:auto/span 22}.panel-grid .g-col-md-23{grid-column:auto/span 23}.panel-grid .g-col-md-24{grid-column:auto/span 24}.panel-grid .g-start-md-1{grid-column-start:1}.panel-grid .g-start-md-2{grid-column-start:2}.panel-grid .g-start-md-3{grid-column-start:3}.panel-grid .g-start-md-4{grid-column-start:4}.panel-grid .g-start-md-5{grid-column-start:5}.panel-grid .g-start-md-6{grid-column-start:6}.panel-grid .g-start-md-7{grid-column-start:7}.panel-grid .g-start-md-8{grid-column-start:8}.panel-grid .g-start-md-9{grid-column-start:9}.panel-grid .g-start-md-10{grid-column-start:10}.panel-grid .g-start-md-11{grid-column-start:11}.panel-grid .g-start-md-12{grid-column-start:12}.panel-grid .g-start-md-13{grid-column-start:13}.panel-grid .g-start-md-14{grid-column-start:14}.panel-grid .g-start-md-15{grid-column-start:15}.panel-grid .g-start-md-16{grid-column-start:16}.panel-grid .g-start-md-17{grid-column-start:17}.panel-grid .g-start-md-18{grid-column-start:18}.panel-grid .g-start-md-19{grid-column-start:19}.panel-grid .g-start-md-20{grid-column-start:20}.panel-grid .g-start-md-21{grid-column-start:21}.panel-grid .g-start-md-22{grid-column-start:22}.panel-grid .g-start-md-23{grid-column-start:23}}@media(min-width: 992px){.panel-grid .g-col-lg-1{grid-column:auto/span 1}.panel-grid .g-col-lg-2{grid-column:auto/span 2}.panel-grid .g-col-lg-3{grid-column:auto/span 3}.panel-grid .g-col-lg-4{grid-column:auto/span 4}.panel-grid .g-col-lg-5{grid-column:auto/span 5}.panel-grid .g-col-lg-6{grid-column:auto/span 6}.panel-grid .g-col-lg-7{grid-column:auto/span 7}.panel-grid .g-col-lg-8{grid-column:auto/span 8}.panel-grid .g-col-lg-9{grid-column:auto/span 9}.panel-grid .g-col-lg-10{grid-column:auto/span 10}.panel-grid .g-col-lg-11{grid-column:auto/span 11}.panel-grid .g-col-lg-12{grid-column:auto/span 12}.panel-grid .g-col-lg-13{grid-column:auto/span 13}.panel-grid .g-col-lg-14{grid-column:auto/span 14}.panel-grid .g-col-lg-15{grid-column:auto/span 15}.panel-grid .g-col-lg-16{grid-column:auto/span 16}.panel-grid .g-col-lg-17{grid-column:auto/span 17}.panel-grid .g-col-lg-18{grid-column:auto/span 18}.panel-grid .g-col-lg-19{grid-column:auto/span 19}.panel-grid .g-col-lg-20{grid-column:auto/span 20}.panel-grid .g-col-lg-21{grid-column:auto/span 21}.panel-grid .g-col-lg-22{grid-column:auto/span 22}.panel-grid .g-col-lg-23{grid-column:auto/span 23}.panel-grid .g-col-lg-24{grid-column:auto/span 24}.panel-grid .g-start-lg-1{grid-column-start:1}.panel-grid .g-start-lg-2{grid-column-start:2}.panel-grid .g-start-lg-3{grid-column-start:3}.panel-grid .g-start-lg-4{grid-column-start:4}.panel-grid .g-start-lg-5{grid-column-start:5}.panel-grid .g-start-lg-6{grid-column-start:6}.panel-grid .g-start-lg-7{grid-column-start:7}.panel-grid .g-start-lg-8{grid-column-start:8}.panel-grid .g-start-lg-9{grid-column-start:9}.panel-grid .g-start-lg-10{grid-column-start:10}.panel-grid .g-start-lg-11{grid-column-start:11}.panel-grid .g-start-lg-12{grid-column-start:12}.panel-grid .g-start-lg-13{grid-column-start:13}.panel-grid .g-start-lg-14{grid-column-start:14}.panel-grid .g-start-lg-15{grid-column-start:15}.panel-grid .g-start-lg-16{grid-column-start:16}.panel-grid .g-start-lg-17{grid-column-start:17}.panel-grid .g-start-lg-18{grid-column-start:18}.panel-grid .g-start-lg-19{grid-column-start:19}.panel-grid .g-start-lg-20{grid-column-start:20}.panel-grid .g-start-lg-21{grid-column-start:21}.panel-grid .g-start-lg-22{grid-column-start:22}.panel-grid .g-start-lg-23{grid-column-start:23}}@media(min-width: 1200px){.panel-grid .g-col-xl-1{grid-column:auto/span 1}.panel-grid .g-col-xl-2{grid-column:auto/span 2}.panel-grid .g-col-xl-3{grid-column:auto/span 3}.panel-grid .g-col-xl-4{grid-column:auto/span 4}.panel-grid .g-col-xl-5{grid-column:auto/span 5}.panel-grid .g-col-xl-6{grid-column:auto/span 6}.panel-grid .g-col-xl-7{grid-column:auto/span 7}.panel-grid .g-col-xl-8{grid-column:auto/span 8}.panel-grid .g-col-xl-9{grid-column:auto/span 9}.panel-grid .g-col-xl-10{grid-column:auto/span 10}.panel-grid .g-col-xl-11{grid-column:auto/span 11}.panel-grid .g-col-xl-12{grid-column:auto/span 12}.panel-grid .g-col-xl-13{grid-column:auto/span 13}.panel-grid .g-col-xl-14{grid-column:auto/span 14}.panel-grid .g-col-xl-15{grid-column:auto/span 15}.panel-grid .g-col-xl-16{grid-column:auto/span 16}.panel-grid .g-col-xl-17{grid-column:auto/span 17}.panel-grid .g-col-xl-18{grid-column:auto/span 18}.panel-grid .g-col-xl-19{grid-column:auto/span 19}.panel-grid .g-col-xl-20{grid-column:auto/span 20}.panel-grid .g-col-xl-21{grid-column:auto/span 21}.panel-grid .g-col-xl-22{grid-column:auto/span 22}.panel-grid .g-col-xl-23{grid-column:auto/span 23}.panel-grid .g-col-xl-24{grid-column:auto/span 24}.panel-grid .g-start-xl-1{grid-column-start:1}.panel-grid .g-start-xl-2{grid-column-start:2}.panel-grid .g-start-xl-3{grid-column-start:3}.panel-grid .g-start-xl-4{grid-column-start:4}.panel-grid .g-start-xl-5{grid-column-start:5}.panel-grid .g-start-xl-6{grid-column-start:6}.panel-grid .g-start-xl-7{grid-column-start:7}.panel-grid .g-start-xl-8{grid-column-start:8}.panel-grid .g-start-xl-9{grid-column-start:9}.panel-grid .g-start-xl-10{grid-column-start:10}.panel-grid .g-start-xl-11{grid-column-start:11}.panel-grid .g-start-xl-12{grid-column-start:12}.panel-grid .g-start-xl-13{grid-column-start:13}.panel-grid .g-start-xl-14{grid-column-start:14}.panel-grid .g-start-xl-15{grid-column-start:15}.panel-grid .g-start-xl-16{grid-column-start:16}.panel-grid .g-start-xl-17{grid-column-start:17}.panel-grid .g-start-xl-18{grid-column-start:18}.panel-grid .g-start-xl-19{grid-column-start:19}.panel-grid .g-start-xl-20{grid-column-start:20}.panel-grid .g-start-xl-21{grid-column-start:21}.panel-grid .g-start-xl-22{grid-column-start:22}.panel-grid .g-start-xl-23{grid-column-start:23}}@media(min-width: 1400px){.panel-grid .g-col-xxl-1{grid-column:auto/span 1}.panel-grid .g-col-xxl-2{grid-column:auto/span 2}.panel-grid .g-col-xxl-3{grid-column:auto/span 3}.panel-grid .g-col-xxl-4{grid-column:auto/span 4}.panel-grid .g-col-xxl-5{grid-column:auto/span 5}.panel-grid .g-col-xxl-6{grid-column:auto/span 6}.panel-grid .g-col-xxl-7{grid-column:auto/span 7}.panel-grid .g-col-xxl-8{grid-column:auto/span 8}.panel-grid .g-col-xxl-9{grid-column:auto/span 9}.panel-grid .g-col-xxl-10{grid-column:auto/span 10}.panel-grid .g-col-xxl-11{grid-column:auto/span 11}.panel-grid .g-col-xxl-12{grid-column:auto/span 12}.panel-grid .g-col-xxl-13{grid-column:auto/span 13}.panel-grid .g-col-xxl-14{grid-column:auto/span 14}.panel-grid .g-col-xxl-15{grid-column:auto/span 15}.panel-grid .g-col-xxl-16{grid-column:auto/span 16}.panel-grid .g-col-xxl-17{grid-column:auto/span 17}.panel-grid .g-col-xxl-18{grid-column:auto/span 18}.panel-grid .g-col-xxl-19{grid-column:auto/span 19}.panel-grid .g-col-xxl-20{grid-column:auto/span 20}.panel-grid .g-col-xxl-21{grid-column:auto/span 21}.panel-grid .g-col-xxl-22{grid-column:auto/span 22}.panel-grid .g-col-xxl-23{grid-column:auto/span 23}.panel-grid .g-col-xxl-24{grid-column:auto/span 24}.panel-grid .g-start-xxl-1{grid-column-start:1}.panel-grid .g-start-xxl-2{grid-column-start:2}.panel-grid .g-start-xxl-3{grid-column-start:3}.panel-grid .g-start-xxl-4{grid-column-start:4}.panel-grid .g-start-xxl-5{grid-column-start:5}.panel-grid .g-start-xxl-6{grid-column-start:6}.panel-grid .g-start-xxl-7{grid-column-start:7}.panel-grid .g-start-xxl-8{grid-column-start:8}.panel-grid .g-start-xxl-9{grid-column-start:9}.panel-grid .g-start-xxl-10{grid-column-start:10}.panel-grid .g-start-xxl-11{grid-column-start:11}.panel-grid .g-start-xxl-12{grid-column-start:12}.panel-grid .g-start-xxl-13{grid-column-start:13}.panel-grid .g-start-xxl-14{grid-column-start:14}.panel-grid .g-start-xxl-15{grid-column-start:15}.panel-grid .g-start-xxl-16{grid-column-start:16}.panel-grid .g-start-xxl-17{grid-column-start:17}.panel-grid .g-start-xxl-18{grid-column-start:18}.panel-grid .g-start-xxl-19{grid-column-start:19}.panel-grid .g-start-xxl-20{grid-column-start:20}.panel-grid .g-start-xxl-21{grid-column-start:21}.panel-grid .g-start-xxl-22{grid-column-start:22}.panel-grid .g-start-xxl-23{grid-column-start:23}}main{margin-top:1em;margin-bottom:1em}h1,.h1,h2,.h2{color:inherit;margin-top:2rem;margin-bottom:1rem;font-weight:600}h1.title,.title.h1{margin-top:0}main.content>section:first-of-type>h2:first-child,main.content>section:first-of-type>.h2:first-child{margin-top:0}h2,.h2{border-bottom:1px solid #dee2e6;padding-bottom:.5rem}h3,.h3{font-weight:600}h3,.h3,h4,.h4{opacity:.9;margin-top:1.5rem}h5,.h5,h6,.h6{opacity:.9}.header-section-number{color:#e6b978}.nav-link.active .header-section-number{color:inherit}mark,.mark{padding:0em}.panel-caption,.figure-caption,.subfigure-caption,.table-caption,figcaption,caption{font-size:.9rem;color:#e6b978}.quarto-layout-cell[data-ref-parent] caption{color:#e6b978}.column-margin figcaption,.margin-caption,div.aside,aside,.column-margin{color:#e6b978;font-size:.825rem}.panel-caption.margin-caption{text-align:inherit}.column-margin.column-container p{margin-bottom:0}.column-margin.column-container>*:not(.collapse):first-child{padding-bottom:.5em;display:block}.column-margin.column-container>*:not(.collapse):not(:first-child){padding-top:.5em;padding-bottom:.5em;display:block}.column-margin.column-container>*.collapse:not(.show){display:none}@media(min-width: 768px){.column-margin.column-container .callout-margin-content:first-child{margin-top:4.5em}.column-margin.column-container .callout-margin-content-simple:first-child{margin-top:3.5em}}.margin-caption>*{padding-top:.5em;padding-bottom:.5em}@media(max-width: 767.98px){.quarto-layout-row{flex-direction:column}}.nav-tabs .nav-item{margin-top:1px;cursor:pointer}.tab-content{margin-top:0px;border-left:#dee2e6 1px solid;border-right:#dee2e6 1px solid;border-bottom:#dee2e6 1px solid;margin-left:0;padding:1em;margin-bottom:1em}@media(max-width: 767.98px){.layout-sidebar{margin-left:0;margin-right:0}}.panel-sidebar,.panel-sidebar .form-control,.panel-input,.panel-input .form-control,.selectize-dropdown{font-size:.9rem}.panel-sidebar .form-control,.panel-input .form-control{padding-top:.1rem}.tab-pane div.sourceCode{margin-top:0px}.tab-pane>p{padding-top:0}.tab-pane>p:nth-child(1){padding-top:0}.tab-pane>p:last-child{margin-bottom:0}.tab-pane>pre:last-child{margin-bottom:0}.tab-content>.tab-pane:not(.active){display:none !important}div.sourceCode{background-color:rgba(233,236,239,.2);border:1px solid rgba(233,236,239,.2)}pre.sourceCode{background-color:rgba(0,0,0,0)}pre.sourceCode{border:none;font-size:.875em;overflow:visible !important;padding:.4em}div.sourceCode{overflow-y:hidden}.callout div.sourceCode{margin-left:initial}.blockquote{font-size:inherit;padding-left:1rem;padding-right:1.5rem;color:#e6b978}.blockquote h1:first-child,.blockquote .h1:first-child,.blockquote h2:first-child,.blockquote .h2:first-child,.blockquote h3:first-child,.blockquote .h3:first-child,.blockquote h4:first-child,.blockquote .h4:first-child,.blockquote h5:first-child,.blockquote .h5:first-child{margin-top:0}pre{background-color:initial;padding:initial;border:initial}p pre code:not(.sourceCode),li pre code:not(.sourceCode),pre code:not(.sourceCode){background-color:initial}p code:not(.sourceCode),li code:not(.sourceCode),td code:not(.sourceCode){background-color:#f8f9fa;padding:.2em}nav p code:not(.sourceCode),nav li code:not(.sourceCode),nav td code:not(.sourceCode){background-color:rgba(0,0,0,0);padding:0}td code:not(.sourceCode){white-space:pre-wrap}#quarto-embedded-source-code-modal>.modal-dialog{max-width:1000px;padding-left:1.75rem;padding-right:1.75rem}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body{padding:0}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body div.sourceCode{margin:0;padding:.2rem .2rem;border-radius:0px;border:none}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-header{padding:.7rem}.code-tools-button{font-size:1rem;padding:.15rem .15rem;margin-left:5px;color:#6c757d;background-color:rgba(0,0,0,0);transition:initial;cursor:pointer}.code-tools-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:1rem 1rem}.code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}#quarto-embedded-source-code-modal .code-copy-button>.bi::before{background-image:url('data:image/svg+xml,')}#quarto-embedded-source-code-modal .code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,')}.sidebar{will-change:top;transition:top 200ms linear;position:sticky;overflow-y:auto;padding-top:1.2em;max-height:100vh}.sidebar.toc-left,.sidebar.margin-sidebar{top:0px;padding-top:1em}.sidebar.quarto-banner-title-block-sidebar>*{padding-top:1.65em}figure .quarto-notebook-link{margin-top:.5em}.quarto-notebook-link{font-size:.75em;color:#6c757d;margin-bottom:1em;text-decoration:none;display:block}.quarto-notebook-link:hover{text-decoration:underline;color:#aed1e4}.quarto-notebook-link::before{display:inline-block;height:.75rem;width:.75rem;margin-bottom:0em;margin-right:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:.75rem .75rem}.toc-actions i.bi,.quarto-code-links i.bi,.quarto-other-links i.bi,.quarto-alternate-notebooks i.bi,.quarto-alternate-formats i.bi{margin-right:.4em;font-size:.8rem}.quarto-other-links-text-target .quarto-code-links i.bi,.quarto-other-links-text-target .quarto-other-links i.bi{margin-right:.2em}.quarto-other-formats-text-target .quarto-alternate-formats i.bi{margin-right:.1em}.toc-actions i.bi.empty,.quarto-code-links i.bi.empty,.quarto-other-links i.bi.empty,.quarto-alternate-notebooks i.bi.empty,.quarto-alternate-formats i.bi.empty{padding-left:1em}.quarto-notebook h2,.quarto-notebook .h2{border-bottom:none}.quarto-notebook .cell-container{display:flex}.quarto-notebook .cell-container .cell{flex-grow:4}.quarto-notebook .cell-container .cell-decorator{padding-top:1.5em;padding-right:1em;text-align:right}.quarto-notebook .cell-container.code-fold .cell-decorator{padding-top:3em}.quarto-notebook .cell-code code{white-space:pre-wrap}.quarto-notebook .cell .cell-output-stderr pre code,.quarto-notebook .cell .cell-output-stdout pre code{white-space:pre-wrap;overflow-wrap:anywhere}.toc-actions,.quarto-alternate-formats,.quarto-other-links,.quarto-code-links,.quarto-alternate-notebooks{padding-left:0em}.sidebar .toc-actions a,.sidebar .quarto-alternate-formats a,.sidebar .quarto-other-links a,.sidebar .quarto-code-links a,.sidebar .quarto-alternate-notebooks a,.sidebar nav[role=doc-toc] a{text-decoration:none}.sidebar .toc-actions a:hover,.sidebar .quarto-other-links a:hover,.sidebar .quarto-code-links a:hover,.sidebar .quarto-alternate-formats a:hover,.sidebar .quarto-alternate-notebooks a:hover{color:#aed1e4}.sidebar .toc-actions h2,.sidebar .toc-actions .h2,.sidebar .quarto-code-links h2,.sidebar .quarto-code-links .h2,.sidebar .quarto-other-links h2,.sidebar .quarto-other-links .h2,.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2,.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-weight:500;margin-bottom:.2rem;margin-top:.3rem;font-family:inherit;border-bottom:0;padding-bottom:0;padding-top:0px}.sidebar .toc-actions>h2,.sidebar .toc-actions>.h2,.sidebar .quarto-code-links>h2,.sidebar .quarto-code-links>.h2,.sidebar .quarto-other-links>h2,.sidebar .quarto-other-links>.h2,.sidebar .quarto-alternate-notebooks>h2,.sidebar .quarto-alternate-notebooks>.h2,.sidebar .quarto-alternate-formats>h2,.sidebar .quarto-alternate-formats>.h2{font-size:.8rem}.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-size:.875rem}.sidebar nav[role=doc-toc]>ul a{border-left:1px solid #e9ecef;padding-left:.6rem}.sidebar .toc-actions h2>ul a,.sidebar .toc-actions .h2>ul a,.sidebar .quarto-code-links h2>ul a,.sidebar .quarto-code-links .h2>ul a,.sidebar .quarto-other-links h2>ul a,.sidebar .quarto-other-links .h2>ul a,.sidebar .quarto-alternate-notebooks h2>ul a,.sidebar .quarto-alternate-notebooks .h2>ul a,.sidebar .quarto-alternate-formats h2>ul a,.sidebar .quarto-alternate-formats .h2>ul a{border-left:none;padding-left:.6rem}.sidebar .toc-actions ul a:empty,.sidebar .quarto-code-links ul a:empty,.sidebar .quarto-other-links ul a:empty,.sidebar .quarto-alternate-notebooks ul a:empty,.sidebar .quarto-alternate-formats ul a:empty,.sidebar nav[role=doc-toc]>ul a:empty{display:none}.sidebar .toc-actions ul,.sidebar .quarto-code-links ul,.sidebar .quarto-other-links ul,.sidebar .quarto-alternate-notebooks ul,.sidebar .quarto-alternate-formats ul{padding-left:0;list-style:none}.sidebar nav[role=doc-toc] ul{list-style:none;padding-left:0;list-style:none}.sidebar nav[role=doc-toc]>ul{margin-left:.45em}.quarto-margin-sidebar nav[role=doc-toc]{padding-left:.5em}.sidebar .toc-actions>ul,.sidebar .quarto-code-links>ul,.sidebar .quarto-other-links>ul,.sidebar .quarto-alternate-notebooks>ul,.sidebar .quarto-alternate-formats>ul{font-size:.8rem}.sidebar nav[role=doc-toc]>ul{font-size:.875rem}.sidebar .toc-actions ul li a,.sidebar .quarto-code-links ul li a,.sidebar .quarto-other-links ul li a,.sidebar .quarto-alternate-notebooks ul li a,.sidebar .quarto-alternate-formats ul li a,.sidebar nav[role=doc-toc]>ul li a{line-height:1.1rem;padding-bottom:.2rem;padding-top:.2rem;color:inherit}.sidebar nav[role=doc-toc] ul>li>ul>li>a{padding-left:1.2em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>a{padding-left:2.4em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>a{padding-left:3.6em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:4.8em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:6em}.sidebar nav[role=doc-toc] ul>li>a.active,.sidebar nav[role=doc-toc] ul>li>ul>li>a.active{border-left:1px solid #aed1e4;color:#aed1e4 !important}.sidebar nav[role=doc-toc] ul>li>a:hover,.sidebar nav[role=doc-toc] ul>li>ul>li>a:hover{color:#aed1e4 !important}kbd,.kbd{color:#faf1e4;background-color:#4a4b4b;border:1px solid;border-radius:5px;border-color:#dee2e6}.quarto-appendix-contents div.hanging-indent{margin-left:0em}.quarto-appendix-contents div.hanging-indent div.csl-entry{margin-left:1em;text-indent:-1em}.citation a,.footnote-ref{text-decoration:none}.footnotes ol{padding-left:1em}.tippy-content>*{margin-bottom:.7em}.tippy-content>*:last-child{margin-bottom:0}.callout{margin-top:1.25rem;margin-bottom:1.25rem;border-radius:.25rem;overflow-wrap:break-word}.callout .callout-title-container{overflow-wrap:anywhere}.callout.callout-style-simple{padding:.4em .7em;border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout.callout-style-default{border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout .callout-body-container{flex-grow:1}.callout.callout-style-simple .callout-body{font-size:.9rem;font-weight:400}.callout.callout-style-default .callout-body{font-size:.9rem;font-weight:400}.callout:not(.no-icon).callout-titled.callout-style-simple .callout-body{padding-left:1.6em}.callout.callout-titled>.callout-header{padding-top:.2em;margin-bottom:-0.2em}.callout.callout-style-simple>div.callout-header{border-bottom:none;font-size:.9rem;font-weight:600;opacity:75%}.callout.callout-style-default>div.callout-header{border-bottom:none;font-weight:600;opacity:85%;font-size:.9rem;padding-left:.5em;padding-right:.5em}.callout.callout-style-default .callout-body{padding-left:.5em;padding-right:.5em}.callout.callout-style-default .callout-body>:first-child{padding-top:.5rem;margin-top:0}.callout>div.callout-header[data-bs-toggle=collapse]{cursor:pointer}.callout.callout-style-default .callout-header[aria-expanded=false],.callout.callout-style-default .callout-header[aria-expanded=true]{padding-top:0px;margin-bottom:0px;align-items:center}.callout.callout-titled .callout-body>:last-child:not(.sourceCode),.callout.callout-titled .callout-body>div>:last-child:not(.sourceCode){padding-bottom:.5rem;margin-bottom:0}.callout:not(.callout-titled) .callout-body>:first-child,.callout:not(.callout-titled) .callout-body>div>:first-child{margin-top:.25rem}.callout:not(.callout-titled) .callout-body>:last-child,.callout:not(.callout-titled) .callout-body>div>:last-child{margin-bottom:.2rem}.callout.callout-style-simple .callout-icon::before,.callout.callout-style-simple .callout-toggle::before{height:1rem;width:1rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.callout.callout-style-default .callout-icon::before,.callout.callout-style-default .callout-toggle::before{height:.9rem;width:.9rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:.9rem .9rem}.callout.callout-style-default .callout-toggle::before{margin-top:5px}.callout .callout-btn-toggle .callout-toggle::before{transition:transform .2s linear}.callout .callout-header[aria-expanded=false] .callout-toggle::before{transform:rotate(-90deg)}.callout .callout-header[aria-expanded=true] .callout-toggle::before{transform:none}.callout.callout-style-simple:not(.no-icon) div.callout-icon-container{padding-top:.2em;padding-right:.55em}.callout.callout-style-default:not(.no-icon) div.callout-icon-container{padding-top:.1em;padding-right:.35em}.callout.callout-style-default:not(.no-icon) div.callout-title-container{margin-top:-1px}.callout.callout-style-default.callout-caution:not(.no-icon) div.callout-icon-container{padding-top:.3em;padding-right:.35em}.callout>.callout-body>.callout-icon-container>.no-icon,.callout>.callout-header>.callout-icon-container>.no-icon{display:none}div.callout.callout{border-left-color:#6c757d}div.callout.callout-style-default>.callout-header{background-color:#6c757d}div.callout-note.callout{border-left-color:#2780e3}div.callout-note.callout-style-default>.callout-header{background-color:#0c2644}div.callout-note:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-note.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-note .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-tip.callout{border-left-color:#3fb618}div.callout-tip.callout-style-default>.callout-header{background-color:#133707}div.callout-tip:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-tip.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-tip .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-warning.callout{border-left-color:#ff7518}div.callout-warning.callout-style-default>.callout-header{background-color:#4d2307}div.callout-warning:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-warning.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-warning .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-caution.callout{border-left-color:#f0ad4e}div.callout-caution.callout-style-default>.callout-header{background-color:#483417}div.callout-caution:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-caution.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-caution .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-important.callout{border-left-color:#ff0039}div.callout-important.callout-style-default>.callout-header{background-color:#4d0011}div.callout-important:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-important.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-important .callout-toggle::before{background-image:url('data:image/svg+xml,')}.quarto-toggle-container{display:flex;align-items:center}.quarto-reader-toggle .bi::before,.quarto-color-scheme-toggle .bi::before{display:inline-block;height:1rem;width:1rem;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.sidebar-navigation{padding-left:20px}.navbar{background-color:#303030;color:#bdbdbd}.navbar .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,')}.navbar .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,')}.quarto-sidebar-toggle{border-color:#dee2e6;border-bottom-left-radius:.25rem;border-bottom-right-radius:.25rem;border-style:solid;border-width:1px;overflow:hidden;border-top-width:0px;padding-top:0px !important}.quarto-sidebar-toggle-title{cursor:pointer;padding-bottom:2px;margin-left:.25em;text-align:center;font-weight:400;font-size:.775em}#quarto-content .quarto-sidebar-toggle{background:#1d1d1d}#quarto-content .quarto-sidebar-toggle-title{color:#faf1e4}.quarto-sidebar-toggle-icon{color:#dee2e6;margin-right:.5em;float:right;transition:transform .2s ease}.quarto-sidebar-toggle-icon::before{padding-top:5px}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-icon{transform:rotate(-180deg)}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-title{border-bottom:solid #dee2e6 1px}.quarto-sidebar-toggle-contents{background-color:#181818;padding-right:10px;padding-left:10px;margin-top:0px !important;transition:max-height .5s ease}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-contents{padding-top:1em;padding-bottom:10px}@media(max-width: 767.98px){.sidebar-menu-container{padding-bottom:5em}}.quarto-sidebar-toggle:not(.expanded) .quarto-sidebar-toggle-contents{padding-top:0px !important;padding-bottom:0px}nav[role=doc-toc]{z-index:1020}#quarto-sidebar>*,nav[role=doc-toc]>*{transition:opacity .1s ease,border .1s ease}#quarto-sidebar.slow>*,nav[role=doc-toc].slow>*{transition:opacity .4s ease,border .4s ease}.quarto-color-scheme-toggle:not(.alternate).top-right .bi::before{background-image:url('data:image/svg+xml,')}.quarto-color-scheme-toggle.alternate.top-right .bi::before{background-image:url('data:image/svg+xml,')}#quarto-appendix.default{border-top:1px solid #dee2e6}#quarto-appendix.default{background-color:#181818;padding-top:1.5em;margin-top:2em;z-index:998}#quarto-appendix.default .quarto-appendix-heading{margin-top:0;line-height:1.4em;font-weight:600;opacity:.9;border-bottom:none;margin-bottom:0}#quarto-appendix.default .footnotes ol,#quarto-appendix.default .footnotes ol li>p:last-of-type,#quarto-appendix.default .quarto-appendix-contents>p:last-of-type{margin-bottom:0}#quarto-appendix.default .footnotes ol{margin-left:.5em}#quarto-appendix.default .quarto-appendix-secondary-label{margin-bottom:.4em}#quarto-appendix.default .quarto-appendix-bibtex{font-size:.7em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-bibtex code.sourceCode{white-space:pre-wrap}#quarto-appendix.default .quarto-appendix-citeas{font-size:.9em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-heading{font-size:1em !important}#quarto-appendix.default *[role=doc-endnotes]>ol,#quarto-appendix.default .quarto-appendix-contents>*:not(h2):not(.h2){font-size:.9em}#quarto-appendix.default section{padding-bottom:1.5em}#quarto-appendix.default section *[role=doc-endnotes],#quarto-appendix.default section>*:not(a){opacity:.9;word-wrap:break-word}.btn.btn-quarto,div.cell-output-display .btn-quarto{--bs-btn-color: #cacccd;--bs-btn-bg: #343a40;--bs-btn-border-color: #343a40;--bs-btn-hover-color: #cacccd;--bs-btn-hover-bg: #52585d;--bs-btn-hover-border-color: #484e53;--bs-btn-focus-shadow-rgb: 75, 80, 85;--bs-btn-active-color: #fff;--bs-btn-active-bg: #5d6166;--bs-btn-active-border-color: #484e53;--bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);--bs-btn-disabled-color: #fff;--bs-btn-disabled-bg: #343a40;--bs-btn-disabled-border-color: #343a40}nav.quarto-secondary-nav.color-navbar{background-color:#303030;color:#bdbdbd}nav.quarto-secondary-nav.color-navbar h1,nav.quarto-secondary-nav.color-navbar .h1,nav.quarto-secondary-nav.color-navbar .quarto-btn-toggle{color:#bdbdbd}@media(max-width: 991.98px){body.nav-sidebar .quarto-title-banner{margin-bottom:0;padding-bottom:1em}body.nav-sidebar #title-block-header{margin-block-end:0}}p.subtitle{margin-top:.25em;margin-bottom:.5em}code a:any-link{color:inherit;text-decoration-color:#6c757d}/*! dark */div.observablehq table thead tr th{background-color:var(--bs-body-bg)}input,button,select,optgroup,textarea{background-color:var(--bs-body-bg)}.code-annotated .code-copy-button{margin-right:1.25em;margin-top:0;padding-bottom:0;padding-top:3px}.code-annotation-gutter-bg{background-color:#181818}.code-annotation-gutter{background-color:rgba(233,236,239,.2)}.code-annotation-gutter,.code-annotation-gutter-bg{height:100%;width:calc(20px + .5em);position:absolute;top:0;right:0}dl.code-annotation-container-grid dt{margin-right:1em;margin-top:.25rem}dl.code-annotation-container-grid dt{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;color:#f2dbb9;border:solid #f2dbb9 1px;border-radius:50%;height:22px;width:22px;line-height:22px;font-size:11px;text-align:center;vertical-align:middle;text-decoration:none}dl.code-annotation-container-grid dt[data-target-cell]{cursor:pointer}dl.code-annotation-container-grid dt[data-target-cell].code-annotation-active{color:#181818;border:solid #aaa 1px;background-color:#aaa}pre.code-annotation-code{padding-top:0;padding-bottom:0}pre.code-annotation-code code{z-index:3}#code-annotation-line-highlight-gutter{width:100%;border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}#code-annotation-line-highlight{margin-left:-4em;width:calc(100% + 4em);border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}code.sourceCode .code-annotation-anchor.code-annotation-active{background-color:var(--quarto-hl-normal-color, #aaaaaa);border:solid var(--quarto-hl-normal-color, #aaaaaa) 1px;color:#e9ecef;font-weight:bolder}code.sourceCode .code-annotation-anchor{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;color:var(--quarto-hl-co-color);border:solid var(--quarto-hl-co-color) 1px;border-radius:50%;height:18px;width:18px;font-size:9px;margin-top:2px}code.sourceCode button.code-annotation-anchor{padding:2px;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none}code.sourceCode a.code-annotation-anchor{line-height:18px;text-align:center;vertical-align:middle;cursor:default;text-decoration:none}@media print{.page-columns .column-screen-inset{grid-column:page-start-inset/page-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset table{background:#181818}.page-columns .column-screen-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-inset-left table{background:#181818}.page-columns .column-screen-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;opacity:.999}.page-columns .column-screen-inset-right table{background:#181818}.page-columns .column-screen{grid-column:page-start/page-end;z-index:998;opacity:.999}.page-columns .column-screen table{background:#181818}.page-columns .column-screen-left{grid-column:page-start/body-content-end;z-index:998;opacity:.999}.page-columns .column-screen-left table{background:#181818}.page-columns .column-screen-right{grid-column:body-content-start/page-end;z-index:998;opacity:.999}.page-columns .column-screen-right table{background:#181818}.page-columns .column-screen-inset-shaded{grid-column:page-start-inset/page-end-inset;padding:1em;background:#525252;z-index:998;opacity:.999;margin-bottom:1em}}.quarto-video{margin-bottom:1em}.table{border-top:1px solid #454341;border-bottom:1px solid #454341}.table>thead{border-top-width:0;border-bottom:1px solid #89857e}.table a{word-break:break-word}.table>:not(caption)>*>*{background-color:unset;color:unset}#quarto-document-content .crosstalk-input .checkbox input[type=checkbox],#quarto-document-content .crosstalk-input .checkbox-inline input[type=checkbox]{position:unset;margin-top:unset;margin-left:unset}#quarto-document-content .row{margin-left:unset;margin-right:unset}.quarto-xref{white-space:nowrap}#quarto-draft-alert{margin-top:0px;margin-bottom:0px;padding:.3em;text-align:center;font-size:.9em}#quarto-draft-alert i{margin-right:.3em}#quarto-back-to-top{z-index:1000}pre{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:0.875em;font-weight:400}pre code{font-family:inherit;font-size:inherit;font-weight:inherit}code{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:0.875em;font-weight:400}a{background-color:rgba(0,0,0,0);font-weight:400;text-decoration:underline}a.external:after{content:"";background-image:url('data:image/svg+xml,');background-size:contain;background-repeat:no-repeat;background-position:center center;margin-left:.2em;padding-right:.75em}div.sourceCode code a.external:after{content:none}a.external:after:hover{cursor:pointer}.quarto-ext-icon{display:inline-block;font-size:.75em;padding-left:.3em}.code-with-filename .code-with-filename-file{margin-bottom:0;padding-bottom:2px;padding-top:2px;padding-left:.7em;border:var(--quarto-border-width) solid var(--quarto-border-color);border-radius:var(--quarto-border-radius);border-bottom:0;border-bottom-left-radius:0%;border-bottom-right-radius:0%}.code-with-filename div.sourceCode,.reveal .code-with-filename div.sourceCode{margin-top:0;border-top-left-radius:0%;border-top-right-radius:0%}.code-with-filename .code-with-filename-file pre{margin-bottom:0}.code-with-filename .code-with-filename-file{background-color:rgba(219,219,219,.8)}.quarto-dark .code-with-filename .code-with-filename-file{background-color:#555}.code-with-filename .code-with-filename-file strong{font-weight:400}.quarto-title-banner{margin-bottom:1em;color:#bdbdbd;background:#303030}.quarto-title-banner a{color:#bdbdbd}.quarto-title-banner h1,.quarto-title-banner .h1,.quarto-title-banner h2,.quarto-title-banner .h2{color:#bdbdbd}.quarto-title-banner .code-tools-button{color:#8a8a8a}.quarto-title-banner .code-tools-button:hover{color:#bdbdbd}.quarto-title-banner .code-tools-button>.bi::before{background-image:url('data:image/svg+xml,')}.quarto-title-banner .code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}.quarto-title-banner .quarto-title .title{font-weight:600}.quarto-title-banner .quarto-categories{margin-top:.75em}@media(min-width: 992px){.quarto-title-banner{padding-top:2.5em;padding-bottom:2.5em}}@media(max-width: 991.98px){.quarto-title-banner{padding-top:1em;padding-bottom:1em}}@media(max-width: 767.98px){body.hypothesis-enabled #title-block-header>*{padding-right:20px}}main.quarto-banner-title-block>section:first-child>h2,main.quarto-banner-title-block>section:first-child>.h2,main.quarto-banner-title-block>section:first-child>h3,main.quarto-banner-title-block>section:first-child>.h3,main.quarto-banner-title-block>section:first-child>h4,main.quarto-banner-title-block>section:first-child>.h4{margin-top:0}.quarto-title .quarto-categories{display:flex;flex-wrap:wrap;row-gap:.5em;column-gap:.4em;padding-bottom:.5em;margin-top:.75em}.quarto-title .quarto-categories .quarto-category{padding:.25em .75em;font-size:.65em;text-transform:uppercase;border:solid 1px;border-radius:.25rem;opacity:.6}.quarto-title .quarto-categories .quarto-category a{color:inherit}.quarto-title-meta-container{display:grid;grid-template-columns:1fr auto}.quarto-title-meta-column-end{display:flex;flex-direction:column;padding-left:1em}.quarto-title-meta-column-end a .bi{margin-right:.3em}#title-block-header.quarto-title-block.default .quarto-title-meta{display:grid;grid-template-columns:repeat(2, 1fr);grid-column-gap:1em}#title-block-header.quarto-title-block.default .quarto-title .title{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-author-orcid img{margin-top:-0.2em;height:.8em;width:.8em}#title-block-header.quarto-title-block.default .quarto-title-author-email{opacity:.7}#title-block-header.quarto-title-block.default .quarto-description p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p,#title-block-header.quarto-title-block.default .quarto-title-authors p,#title-block-header.quarto-title-block.default .quarto-title-affiliations p{margin-bottom:.1em}#title-block-header.quarto-title-block.default .quarto-title-meta-heading{text-transform:uppercase;margin-top:1em;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-contents{font-size:.9em}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p.affiliation:last-of-type{margin-bottom:.1em}#title-block-header.quarto-title-block.default p.affiliation{margin-bottom:.1em}#title-block-header.quarto-title-block.default .keywords,#title-block-header.quarto-title-block.default .description,#title-block-header.quarto-title-block.default .abstract{margin-top:0}#title-block-header.quarto-title-block.default .keywords>p,#title-block-header.quarto-title-block.default .description>p,#title-block-header.quarto-title-block.default .abstract>p{font-size:.9em}#title-block-header.quarto-title-block.default .keywords>p:last-of-type,#title-block-header.quarto-title-block.default .description>p:last-of-type,#title-block-header.quarto-title-block.default .abstract>p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .keywords .block-title,#title-block-header.quarto-title-block.default .description .block-title,#title-block-header.quarto-title-block.default .abstract .block-title{margin-top:1em;text-transform:uppercase;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-author{display:grid;grid-template-columns:minmax(max-content, 1fr) 1fr;grid-column-gap:1em}.quarto-title-tools-only{display:flex;justify-content:right}body{-webkit-font-smoothing:antialiased}.badge.bg-light{color:#343a40}.progress .progress-bar{font-size:8px;line-height:8px}:root{--quarto-scss-export-gray-300: #dee2e6;--quarto-scss-export-gray-500: #adb5bd;--quarto-scss-export-gray-600: #6c757d;--quarto-scss-export-gray-800: #343a40;--quarto-scss-export-card-cap-bg: rgba(52, 58, 64, 0.25);--quarto-scss-export-border-color: #dee2e6;--quarto-scss-export-text-muted: #6c757d;--quarto-scss-export-body-bg: #181818;--quarto-scss-export-link-color: #AED1E4;--quarto-scss-export-light: #525252;--quarto-scss-export-footer-bg: #181818;--quarto-scss-export-navbar-bg: #303030;--quarto-scss-export-popover-bg: #242424;--quarto-scss-export-input-bg: #242424;--quarto-scss-export-dropdown-bg: #181818;--quarto-scss-export-quarto-body-bg: #181818;--quarto-scss-export-bs-body-bg: #181818;--quarto-scss-export-sidebar-bg: #353535;--quarto-scss-export-white: #fff;--quarto-scss-export-gray-100: #f8f9fa;--quarto-scss-export-gray-200: #e9ecef;--quarto-scss-export-gray-400: #ced4da;--quarto-scss-export-gray-700: #495057;--quarto-scss-export-gray-900: #212529;--quarto-scss-export-black: #000;--quarto-scss-export-blue: #2780e3;--quarto-scss-export-indigo: #6610f2;--quarto-scss-export-purple: #613d7c;--quarto-scss-export-pink: #e83e8c;--quarto-scss-export-red: #ff0039;--quarto-scss-export-orange: #f0ad4e;--quarto-scss-export-yellow: #ff7518;--quarto-scss-export-green: #3fb618;--quarto-scss-export-teal: #20c997;--quarto-scss-export-cyan: #9954bb;--quarto-scss-export-primary: #2780e3;--quarto-scss-export-secondary: #343a40;--quarto-scss-export-success: #3fb618;--quarto-scss-export-info: #9954bb;--quarto-scss-export-warning: #ff7518;--quarto-scss-export-danger: #ff0039;--quarto-scss-export-dark: #343a40;--quarto-scss-export-title-banner-color: ;--quarto-scss-export-title-banner-bg: ;--quarto-scss-export-btn-code-copy-color: #616e88;--quarto-scss-export-btn-code-copy-color-active: #88c0d0;--quarto-scss-export-link-color-bg: transparent;--quarto-scss-export-code-color: #7d12ba;--quarto-scss-export-code-bg: #f8f9fa;--quarto-scss-export-toc-color: #AED1E4;--quarto-scss-export-toc-active-border: #AED1E4;--quarto-scss-export-toc-inactive-border: #e9ecef;--quarto-scss-export-navbar-default: #2780e3;--quarto-scss-export-navbar-hl-override: #aed1e4;--quarto-scss-export-btn-bg: #343a40;--quarto-scss-export-btn-fg: #cacccd;--quarto-scss-export-body-contrast-bg: #181818;--quarto-scss-export-body-contrast-color: #FAF1E4;--quarto-scss-export-navbar-fg: #bdbdbd;--quarto-scss-export-navbar-hl: #aed1e4;--quarto-scss-export-navbar-brand: #bdbdbd;--quarto-scss-export-navbar-brand-hl: #aed1e4;--quarto-scss-export-navbar-toggler-border-color: rgba(189, 189, 189, 0);--quarto-scss-export-navbar-hover-color: rgba(174, 209, 228, 0.8);--quarto-scss-export-navbar-disabled-color: rgba(189, 189, 189, 0.75);--quarto-scss-export-title-block-color: #FAF1E4;--quarto-scss-export-title-block-contast-color: #181818;--quarto-scss-export-footer-fg: #828282;--quarto-scss-export-input-border-color: #dee2e6;--quarto-scss-export-code-annotation-higlight-color: rgba(170, 170, 170, 0.2666666667);--quarto-scss-export-code-annotation-higlight-bg: rgba(170, 170, 170, 0.1333333333);--quarto-scss-export-table-group-separator-color: #89857e;--quarto-scss-export-table-group-separator-color-lighter: #454341;--quarto-scss-export-link-decoration: underline;--quarto-scss-export-table-border-color: #dee2e6;--quarto-scss-export-sidebar-glass-bg: rgba(102, 102, 102, 0.4);--quarto-scss-export-color-contrast-dark: #000;--quarto-scss-export-color-contrast-light: #fff;--quarto-scss-export-blue-100: #d4e6f9;--quarto-scss-export-blue-200: #a9ccf4;--quarto-scss-export-blue-300: #7db3ee;--quarto-scss-export-blue-400: #5299e9;--quarto-scss-export-blue-500: #2780e3;--quarto-scss-export-blue-600: #1f66b6;--quarto-scss-export-blue-700: #174d88;--quarto-scss-export-blue-800: #10335b;--quarto-scss-export-blue-900: #081a2d;--quarto-scss-export-indigo-100: #e0cffc;--quarto-scss-export-indigo-200: #c29ffa;--quarto-scss-export-indigo-300: #a370f7;--quarto-scss-export-indigo-400: #8540f5;--quarto-scss-export-indigo-500: #6610f2;--quarto-scss-export-indigo-600: #520dc2;--quarto-scss-export-indigo-700: #3d0a91;--quarto-scss-export-indigo-800: #290661;--quarto-scss-export-indigo-900: #140330;--quarto-scss-export-purple-100: #dfd8e5;--quarto-scss-export-purple-200: #c0b1cb;--quarto-scss-export-purple-300: #a08bb0;--quarto-scss-export-purple-400: #816496;--quarto-scss-export-purple-500: #613d7c;--quarto-scss-export-purple-600: #4e3163;--quarto-scss-export-purple-700: #3a254a;--quarto-scss-export-purple-800: #271832;--quarto-scss-export-purple-900: #130c19;--quarto-scss-export-pink-100: #fad8e8;--quarto-scss-export-pink-200: #f6b2d1;--quarto-scss-export-pink-300: #f18bba;--quarto-scss-export-pink-400: #ed65a3;--quarto-scss-export-pink-500: #e83e8c;--quarto-scss-export-pink-600: #ba3270;--quarto-scss-export-pink-700: #8b2554;--quarto-scss-export-pink-800: #5d1938;--quarto-scss-export-pink-900: #2e0c1c;--quarto-scss-export-red-100: #ffccd7;--quarto-scss-export-red-200: #ff99b0;--quarto-scss-export-red-300: #ff6688;--quarto-scss-export-red-400: #ff3361;--quarto-scss-export-red-500: #ff0039;--quarto-scss-export-red-600: #cc002e;--quarto-scss-export-red-700: #990022;--quarto-scss-export-red-800: #660017;--quarto-scss-export-red-900: #33000b;--quarto-scss-export-orange-100: #fcefdc;--quarto-scss-export-orange-200: #f9deb8;--quarto-scss-export-orange-300: #f6ce95;--quarto-scss-export-orange-400: #f3bd71;--quarto-scss-export-orange-500: #f0ad4e;--quarto-scss-export-orange-600: #c08a3e;--quarto-scss-export-orange-700: #90682f;--quarto-scss-export-orange-800: #60451f;--quarto-scss-export-orange-900: #302310;--quarto-scss-export-yellow-100: #ffe3d1;--quarto-scss-export-yellow-200: #ffc8a3;--quarto-scss-export-yellow-300: #ffac74;--quarto-scss-export-yellow-400: #ff9146;--quarto-scss-export-yellow-500: #ff7518;--quarto-scss-export-yellow-600: #cc5e13;--quarto-scss-export-yellow-700: #99460e;--quarto-scss-export-yellow-800: #662f0a;--quarto-scss-export-yellow-900: #331705;--quarto-scss-export-green-100: #d9f0d1;--quarto-scss-export-green-200: #b2e2a3;--quarto-scss-export-green-300: #8cd374;--quarto-scss-export-green-400: #65c546;--quarto-scss-export-green-500: #3fb618;--quarto-scss-export-green-600: #329213;--quarto-scss-export-green-700: #266d0e;--quarto-scss-export-green-800: #19490a;--quarto-scss-export-green-900: #0d2405;--quarto-scss-export-teal-100: #d2f4ea;--quarto-scss-export-teal-200: #a6e9d5;--quarto-scss-export-teal-300: #79dfc1;--quarto-scss-export-teal-400: #4dd4ac;--quarto-scss-export-teal-500: #20c997;--quarto-scss-export-teal-600: #1aa179;--quarto-scss-export-teal-700: #13795b;--quarto-scss-export-teal-800: #0d503c;--quarto-scss-export-teal-900: #06281e;--quarto-scss-export-cyan-100: #ebddf1;--quarto-scss-export-cyan-200: #d6bbe4;--quarto-scss-export-cyan-300: #c298d6;--quarto-scss-export-cyan-400: #ad76c9;--quarto-scss-export-cyan-500: #9954bb;--quarto-scss-export-cyan-600: #7a4396;--quarto-scss-export-cyan-700: #5c3270;--quarto-scss-export-cyan-800: #3d224b;--quarto-scss-export-cyan-900: #1f1125;--quarto-scss-export-default: #343a40;--quarto-scss-export-primary-text-emphasis: #10335b;--quarto-scss-export-secondary-text-emphasis: #15171a;--quarto-scss-export-success-text-emphasis: #19490a;--quarto-scss-export-info-text-emphasis: #3d224b;--quarto-scss-export-warning-text-emphasis: #662f0a;--quarto-scss-export-danger-text-emphasis: #660017;--quarto-scss-export-light-text-emphasis: #495057;--quarto-scss-export-dark-text-emphasis: #495057;--quarto-scss-export-primary-bg-subtle: #d4e6f9;--quarto-scss-export-secondary-bg-subtle: #d6d8d9;--quarto-scss-export-success-bg-subtle: #d9f0d1;--quarto-scss-export-info-bg-subtle: #ebddf1;--quarto-scss-export-warning-bg-subtle: #ffe3d1;--quarto-scss-export-danger-bg-subtle: #ffccd7;--quarto-scss-export-light-bg-subtle: #fcfcfd;--quarto-scss-export-dark-bg-subtle: #ced4da;--quarto-scss-export-primary-border-subtle: #a9ccf4;--quarto-scss-export-secondary-border-subtle: #aeb0b3;--quarto-scss-export-success-border-subtle: #b2e2a3;--quarto-scss-export-info-border-subtle: #d6bbe4;--quarto-scss-export-warning-border-subtle: #ffc8a3;--quarto-scss-export-danger-border-subtle: #ff99b0;--quarto-scss-export-light-border-subtle: #e9ecef;--quarto-scss-export-dark-border-subtle: #adb5bd;--quarto-scss-export-body-text-align: ;--quarto-scss-export-body-secondary-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-body-secondary-bg: #e9ecef;--quarto-scss-export-body-tertiary-color: rgba(250, 241, 228, 0.5);--quarto-scss-export-body-tertiary-bg: #f8f9fa;--quarto-scss-export-body-emphasis-color: #000;--quarto-scss-export-link-hover-color: #8ba7b6;--quarto-scss-export-link-hover-decoration: ;--quarto-scss-export-border-color-translucent: rgba(0, 0, 0, 0.175);--quarto-scss-export-component-active-bg: #2780e3;--quarto-scss-export-component-active-color: #fff;--quarto-scss-export-focus-ring-color: rgba(39, 128, 227, 0.25);--quarto-scss-export-headings-font-family: ;--quarto-scss-export-headings-font-style: ;--quarto-scss-export-display-font-family: ;--quarto-scss-export-display-font-style: ;--quarto-scss-export-blockquote-footer-color: #6c757d;--quarto-scss-export-blockquote-border-color: #e9ecef;--quarto-scss-export-hr-bg-color: ;--quarto-scss-export-hr-height: ;--quarto-scss-export-hr-border-color: ;--quarto-scss-export-legend-font-weight: ;--quarto-scss-export-mark-bg: #ffe3d1;--quarto-scss-export-table-bg: #181818;--quarto-scss-export-table-accent-bg: transparent;--quarto-scss-export-table-th-font-weight: ;--quarto-scss-export-table-striped-bg: rgba(0, 0, 0, 0.05);--quarto-scss-export-table-active-bg: rgba(0, 0, 0, 0.1);--quarto-scss-export-table-hover-bg: rgba(0, 0, 0, 0.075);--quarto-scss-export-table-caption-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-input-btn-font-family: ;--quarto-scss-export-input-btn-focus-color: rgba(39, 128, 227, 0.25);--quarto-scss-export-btn-font-family: ;--quarto-scss-export-btn-white-space: ;--quarto-scss-export-btn-link-color: #AED1E4;--quarto-scss-export-btn-link-hover-color: #8ba7b6;--quarto-scss-export-btn-link-disabled-color: #6c757d;--quarto-scss-export-form-text-font-style: ;--quarto-scss-export-form-text-font-weight: ;--quarto-scss-export-form-text-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-form-label-font-size: ;--quarto-scss-export-form-label-font-style: ;--quarto-scss-export-form-label-font-weight: ;--quarto-scss-export-form-label-color: ;--quarto-scss-export-input-font-family: ;--quarto-scss-export-input-disabled-color: ;--quarto-scss-export-input-disabled-bg: #e9ecef;--quarto-scss-export-input-disabled-border-color: ;--quarto-scss-export-input-focus-bg: #242424;--quarto-scss-export-input-focus-border-color: #93c0f1;--quarto-scss-export-input-placeholder-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-form-check-label-color: ;--quarto-scss-export-form-check-transition: ;--quarto-scss-export-form-check-input-bg: #242424;--quarto-scss-export-form-check-input-focus-border: #93c0f1;--quarto-scss-export-form-check-input-checked-color: #fff;--quarto-scss-export-form-check-input-checked-bg-color: #2780e3;--quarto-scss-export-form-check-input-checked-border-color: #2780e3;--quarto-scss-export-form-check-input-indeterminate-color: #fff;--quarto-scss-export-form-check-input-indeterminate-bg-color: #2780e3;--quarto-scss-export-form-check-input-indeterminate-border-color: #2780e3;--quarto-scss-export-form-switch-color: rgba(0, 0, 0, 0.25);--quarto-scss-export-form-switch-focus-color: #93c0f1;--quarto-scss-export-form-switch-checked-color: #fff;--quarto-scss-export-input-group-addon-bg: #f8f9fa;--quarto-scss-export-input-group-addon-border-color: #dee2e6;--quarto-scss-export-form-select-font-family: ;--quarto-scss-export-form-select-bg: #242424;--quarto-scss-export-form-select-disabled-color: ;--quarto-scss-export-form-select-disabled-bg: #e9ecef;--quarto-scss-export-form-select-disabled-border-color: ;--quarto-scss-export-form-select-indicator-color: #343a40;--quarto-scss-export-form-select-border-color: #dee2e6;--quarto-scss-export-form-select-focus-border-color: #93c0f1;--quarto-scss-export-form-range-track-bg: #f8f9fa;--quarto-scss-export-form-range-thumb-bg: #2780e3;--quarto-scss-export-form-range-thumb-active-bg: #bed9f7;--quarto-scss-export-form-range-thumb-disabled-bg: rgba(250, 241, 228, 0.75);--quarto-scss-export-form-file-button-bg: #f8f9fa;--quarto-scss-export-form-file-button-hover-bg: #e9ecef;--quarto-scss-export-form-floating-label-disabled-color: #6c757d;--quarto-scss-export-form-feedback-font-style: ;--quarto-scss-export-form-feedback-valid-color: #3fb618;--quarto-scss-export-form-feedback-invalid-color: #ff0039;--quarto-scss-export-form-feedback-icon-valid-color: #3fb618;--quarto-scss-export-form-feedback-icon-invalid-color: #ff0039;--quarto-scss-export-form-valid-color: #3fb618;--quarto-scss-export-form-valid-border-color: #3fb618;--quarto-scss-export-form-invalid-color: #ff0039;--quarto-scss-export-form-invalid-border-color: #ff0039;--quarto-scss-export-nav-link-font-size: ;--quarto-scss-export-nav-link-font-weight: ;--quarto-scss-export-nav-link-color: #AED1E4;--quarto-scss-export-nav-link-hover-color: #8ba7b6;--quarto-scss-export-nav-link-disabled-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-nav-tabs-border-color: #dee2e6;--quarto-scss-export-nav-tabs-link-hover-border-color: #e9ecef #e9ecef #dee2e6;--quarto-scss-export-nav-tabs-link-active-color: #000;--quarto-scss-export-nav-tabs-link-active-bg: #181818;--quarto-scss-export-nav-pills-link-active-bg: #2780e3;--quarto-scss-export-nav-pills-link-active-color: #fff;--quarto-scss-export-nav-underline-link-active-color: #000;--quarto-scss-export-navbar-padding-x: ;--quarto-scss-export-navbar-light-contrast: #fff;--quarto-scss-export-navbar-dark-contrast: #fff;--quarto-scss-export-navbar-light-icon-color: rgba(255, 255, 255, 0.75);--quarto-scss-export-navbar-dark-icon-color: rgba(255, 255, 255, 0.75);--quarto-scss-export-dropdown-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-divider-bg: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-link-active-bg: #2780e3;--quarto-scss-export-dropdown-link-active-color: #fff;--quarto-scss-export-dropdown-link-disabled-color: rgba(250, 241, 228, 0.5);--quarto-scss-export-dropdown-header-color: #6c757d;--quarto-scss-export-dropdown-dark-color: #dee2e6;--quarto-scss-export-dropdown-dark-bg: #343a40;--quarto-scss-export-dropdown-dark-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-dark-divider-bg: rgba(0, 0, 0, 0.175);--quarto-scss-export-dropdown-dark-box-shadow: ;--quarto-scss-export-dropdown-dark-link-color: #dee2e6;--quarto-scss-export-dropdown-dark-link-hover-color: #fff;--quarto-scss-export-dropdown-dark-link-hover-bg: rgba(255, 255, 255, 0.15);--quarto-scss-export-dropdown-dark-link-active-color: #fff;--quarto-scss-export-dropdown-dark-link-active-bg: #2780e3;--quarto-scss-export-dropdown-dark-link-disabled-color: #adb5bd;--quarto-scss-export-dropdown-dark-header-color: #adb5bd;--quarto-scss-export-pagination-color: #AED1E4;--quarto-scss-export-pagination-bg: #181818;--quarto-scss-export-pagination-border-color: #dee2e6;--quarto-scss-export-pagination-focus-color: #8ba7b6;--quarto-scss-export-pagination-focus-bg: #e9ecef;--quarto-scss-export-pagination-hover-color: #8ba7b6;--quarto-scss-export-pagination-hover-bg: #f8f9fa;--quarto-scss-export-pagination-hover-border-color: #dee2e6;--quarto-scss-export-pagination-active-color: #fff;--quarto-scss-export-pagination-active-bg: #2780e3;--quarto-scss-export-pagination-active-border-color: #2780e3;--quarto-scss-export-pagination-disabled-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-pagination-disabled-bg: #e9ecef;--quarto-scss-export-pagination-disabled-border-color: #dee2e6;--quarto-scss-export-card-title-color: ;--quarto-scss-export-card-subtitle-color: ;--quarto-scss-export-card-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-card-box-shadow: ;--quarto-scss-export-card-cap-color: ;--quarto-scss-export-card-height: ;--quarto-scss-export-card-color: ;--quarto-scss-export-card-bg: #181818;--quarto-scss-export-accordion-bg: #181818;--quarto-scss-export-accordion-border-color: #dee2e6;--quarto-scss-export-accordion-button-bg: #181818;--quarto-scss-export-accordion-button-active-bg: #d4e6f9;--quarto-scss-export-accordion-button-active-color: #10335b;--quarto-scss-export-accordion-button-focus-border-color: #93c0f1;--quarto-scss-export-accordion-icon-active-color: #10335b;--quarto-scss-export-tooltip-color: #181818;--quarto-scss-export-tooltip-bg: #000;--quarto-scss-export-tooltip-margin: ;--quarto-scss-export-tooltip-arrow-color: ;--quarto-scss-export-form-feedback-tooltip-line-height: ;--quarto-scss-export-popover-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-popover-header-bg: #e9ecef;--quarto-scss-export-popover-arrow-color: #242424;--quarto-scss-export-popover-arrow-outer-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-toast-color: ;--quarto-scss-export-toast-background-color: rgba(24, 24, 24, 0.85);--quarto-scss-export-toast-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-toast-header-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-toast-header-background-color: rgba(24, 24, 24, 0.85);--quarto-scss-export-toast-header-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-badge-color: #fff;--quarto-scss-export-modal-content-color: ;--quarto-scss-export-modal-content-bg: #181818;--quarto-scss-export-modal-content-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-modal-backdrop-bg: #000;--quarto-scss-export-modal-header-border-color: #dee2e6;--quarto-scss-export-modal-footer-bg: ;--quarto-scss-export-modal-footer-border-color: #dee2e6;--quarto-scss-export-progress-bg: #e9ecef;--quarto-scss-export-progress-bar-color: #fff;--quarto-scss-export-progress-bar-bg: #2780e3;--quarto-scss-export-list-group-bg: #181818;--quarto-scss-export-list-group-border-color: #dee2e6;--quarto-scss-export-list-group-hover-bg: #f8f9fa;--quarto-scss-export-list-group-active-bg: #2780e3;--quarto-scss-export-list-group-active-color: #fff;--quarto-scss-export-list-group-active-border-color: #2780e3;--quarto-scss-export-list-group-disabled-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-list-group-disabled-bg: #181818;--quarto-scss-export-list-group-action-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-list-group-action-hover-color: #000;--quarto-scss-export-list-group-action-active-bg: #e9ecef;--quarto-scss-export-thumbnail-bg: #181818;--quarto-scss-export-thumbnail-border-color: #dee2e6;--quarto-scss-export-figure-caption-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-breadcrumb-font-size: ;--quarto-scss-export-breadcrumb-bg: ;--quarto-scss-export-breadcrumb-divider-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-breadcrumb-active-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-breadcrumb-border-radius: ;--quarto-scss-export-carousel-control-color: #fff;--quarto-scss-export-carousel-indicator-active-bg: #fff;--quarto-scss-export-carousel-caption-color: #fff;--quarto-scss-export-carousel-dark-indicator-active-bg: #000;--quarto-scss-export-carousel-dark-caption-color: #000;--quarto-scss-export-btn-close-color: #000;--quarto-scss-export-offcanvas-border-color: rgba(0, 0, 0, 0.175);--quarto-scss-export-offcanvas-bg-color: #181818;--quarto-scss-export-offcanvas-backdrop-bg: #000;--quarto-scss-export-code-color-dark: white;--quarto-scss-export-kbd-color: #181818;--quarto-scss-export-nested-kbd-font-weight: ;--quarto-scss-export-pre-bg: #f8f9fa;--quarto-scss-export-pre-color: #000;--quarto-scss-export-bslib-page-sidebar-title-bg: #303030;--quarto-scss-export-bslib-page-sidebar-title-color: #fff;--quarto-scss-export-bslib-sidebar-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.05);--quarto-scss-export-bslib-sidebar-toggle-bg: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.1);--quarto-scss-export-sidebar-color: #faf1e4;--quarto-scss-export-sidebar-hover-color: rgba(174, 209, 228, 0.8);--quarto-scss-export-sidebar-disabled-color: rgba(250, 241, 228, 0.75);--quarto-scss-export-valuebox-bg-primary: #5397e9;--quarto-scss-export-valuebox-bg-secondary: #343a40;--quarto-scss-export-valuebox-bg-success: #3aa716;--quarto-scss-export-valuebox-bg-info: rgba(153, 84, 187, 0.7019607843);--quarto-scss-export-valuebox-bg-warning: #fa6400;--quarto-scss-export-valuebox-bg-danger: rgba(255, 0, 57, 0.7019607843);--quarto-scss-export-valuebox-bg-light: #525252;--quarto-scss-export-valuebox-bg-dark: #343a40;--quarto-scss-export-mermaid-bg-color: #181818;--quarto-scss-export-mermaid-edge-color: #343a40;--quarto-scss-export-mermaid-fg-color--lighter: white;--quarto-scss-export-mermaid-fg-color--lightest: white;--quarto-scss-export-mermaid-label-bg-color: #181818;--quarto-scss-export-mermaid-label-fg-color: #2780e3;--quarto-scss-export-mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--quarto-scss-export-code-block-border-left-color: #dee2e6;--quarto-scss-export-callout-color-note: #2780e3;--quarto-scss-export-callout-color-tip: #3fb618;--quarto-scss-export-callout-color-important: #ff0039;--quarto-scss-export-callout-color-caution: #f0ad4e;--quarto-scss-export-callout-color-warning: #ff7518} \ No newline at end of file diff --git a/docs/2_39/site_libs/bootstrap/bootstrap-icons.css b/docs/2_39/site_libs/bootstrap/bootstrap-icons.css new file mode 100644 index 000000000..285e4448f --- /dev/null +++ b/docs/2_39/site_libs/bootstrap/bootstrap-icons.css @@ -0,0 +1,2078 @@ +/*! + * Bootstrap Icons v1.11.1 (https://icons.getbootstrap.com/) + * Copyright 2019-2023 The Bootstrap Authors + * Licensed under MIT (https://github.com/twbs/icons/blob/main/LICENSE) + */ + +@font-face { + font-display: block; + font-family: "bootstrap-icons"; + src: +url("./bootstrap-icons.woff?2820a3852bdb9a5832199cc61cec4e65") format("woff"); +} + +.bi::before, +[class^="bi-"]::before, +[class*=" bi-"]::before { + display: inline-block; + font-family: bootstrap-icons !important; + font-style: normal; + font-weight: normal !important; + font-variant: normal; + text-transform: none; + line-height: 1; + vertical-align: -.125em; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +.bi-123::before { content: "\f67f"; } +.bi-alarm-fill::before { content: "\f101"; } +.bi-alarm::before { content: "\f102"; } +.bi-align-bottom::before { content: "\f103"; } +.bi-align-center::before { content: "\f104"; } +.bi-align-end::before { content: "\f105"; } +.bi-align-middle::before { content: "\f106"; } +.bi-align-start::before { content: "\f107"; } +.bi-align-top::before { content: "\f108"; } +.bi-alt::before { content: "\f109"; } +.bi-app-indicator::before { content: "\f10a"; } +.bi-app::before { content: "\f10b"; } +.bi-archive-fill::before { content: "\f10c"; } +.bi-archive::before { content: "\f10d"; } +.bi-arrow-90deg-down::before { content: "\f10e"; } +.bi-arrow-90deg-left::before { content: "\f10f"; } +.bi-arrow-90deg-right::before { content: "\f110"; } +.bi-arrow-90deg-up::before { content: "\f111"; } +.bi-arrow-bar-down::before { content: "\f112"; } +.bi-arrow-bar-left::before { content: "\f113"; } +.bi-arrow-bar-right::before { content: "\f114"; } +.bi-arrow-bar-up::before { content: "\f115"; } +.bi-arrow-clockwise::before { content: "\f116"; } +.bi-arrow-counterclockwise::before { content: "\f117"; } +.bi-arrow-down-circle-fill::before { content: "\f118"; } +.bi-arrow-down-circle::before { content: "\f119"; } +.bi-arrow-down-left-circle-fill::before { content: "\f11a"; } +.bi-arrow-down-left-circle::before { content: "\f11b"; } +.bi-arrow-down-left-square-fill::before { content: "\f11c"; } +.bi-arrow-down-left-square::before { content: "\f11d"; } +.bi-arrow-down-left::before { content: "\f11e"; } +.bi-arrow-down-right-circle-fill::before { content: "\f11f"; } +.bi-arrow-down-right-circle::before { content: "\f120"; } +.bi-arrow-down-right-square-fill::before { content: "\f121"; } +.bi-arrow-down-right-square::before { content: "\f122"; } +.bi-arrow-down-right::before { content: "\f123"; } +.bi-arrow-down-short::before { content: "\f124"; } +.bi-arrow-down-square-fill::before { content: "\f125"; } +.bi-arrow-down-square::before { content: "\f126"; } +.bi-arrow-down-up::before { content: "\f127"; } +.bi-arrow-down::before { content: "\f128"; } +.bi-arrow-left-circle-fill::before { content: "\f129"; } +.bi-arrow-left-circle::before { content: "\f12a"; } +.bi-arrow-left-right::before { content: "\f12b"; } +.bi-arrow-left-short::before { content: "\f12c"; } +.bi-arrow-left-square-fill::before { content: "\f12d"; } +.bi-arrow-left-square::before { content: "\f12e"; } +.bi-arrow-left::before { content: "\f12f"; } +.bi-arrow-repeat::before { content: "\f130"; } +.bi-arrow-return-left::before { content: "\f131"; } +.bi-arrow-return-right::before { content: "\f132"; } +.bi-arrow-right-circle-fill::before { content: "\f133"; } +.bi-arrow-right-circle::before { content: "\f134"; } +.bi-arrow-right-short::before { content: "\f135"; } +.bi-arrow-right-square-fill::before { content: "\f136"; } +.bi-arrow-right-square::before { content: "\f137"; } +.bi-arrow-right::before { content: "\f138"; } +.bi-arrow-up-circle-fill::before { content: "\f139"; } +.bi-arrow-up-circle::before { content: "\f13a"; } +.bi-arrow-up-left-circle-fill::before { content: "\f13b"; } +.bi-arrow-up-left-circle::before { content: "\f13c"; } +.bi-arrow-up-left-square-fill::before { content: "\f13d"; } +.bi-arrow-up-left-square::before { content: "\f13e"; } +.bi-arrow-up-left::before { content: "\f13f"; } +.bi-arrow-up-right-circle-fill::before { content: "\f140"; } +.bi-arrow-up-right-circle::before { content: "\f141"; } +.bi-arrow-up-right-square-fill::before { content: "\f142"; } +.bi-arrow-up-right-square::before { content: "\f143"; } +.bi-arrow-up-right::before { content: "\f144"; } +.bi-arrow-up-short::before { content: "\f145"; } +.bi-arrow-up-square-fill::before { content: "\f146"; } +.bi-arrow-up-square::before { content: "\f147"; } +.bi-arrow-up::before { content: "\f148"; } +.bi-arrows-angle-contract::before { content: "\f149"; } +.bi-arrows-angle-expand::before { content: "\f14a"; } +.bi-arrows-collapse::before { content: "\f14b"; } +.bi-arrows-expand::before { content: "\f14c"; } +.bi-arrows-fullscreen::before { content: "\f14d"; } +.bi-arrows-move::before { content: "\f14e"; } +.bi-aspect-ratio-fill::before { content: "\f14f"; } +.bi-aspect-ratio::before { content: "\f150"; } +.bi-asterisk::before { content: "\f151"; } +.bi-at::before { content: "\f152"; } +.bi-award-fill::before { content: "\f153"; } +.bi-award::before { content: "\f154"; } +.bi-back::before { content: "\f155"; } +.bi-backspace-fill::before { content: "\f156"; } +.bi-backspace-reverse-fill::before { content: "\f157"; } +.bi-backspace-reverse::before { content: "\f158"; } +.bi-backspace::before { content: "\f159"; } +.bi-badge-3d-fill::before { content: "\f15a"; } +.bi-badge-3d::before { content: "\f15b"; } +.bi-badge-4k-fill::before { content: "\f15c"; } +.bi-badge-4k::before { content: "\f15d"; } +.bi-badge-8k-fill::before { content: "\f15e"; } +.bi-badge-8k::before { content: "\f15f"; } +.bi-badge-ad-fill::before { content: "\f160"; } +.bi-badge-ad::before { content: "\f161"; } +.bi-badge-ar-fill::before { content: "\f162"; } +.bi-badge-ar::before { content: "\f163"; } +.bi-badge-cc-fill::before { content: "\f164"; } +.bi-badge-cc::before { content: "\f165"; } +.bi-badge-hd-fill::before { content: "\f166"; } +.bi-badge-hd::before { content: "\f167"; } +.bi-badge-tm-fill::before { content: "\f168"; } +.bi-badge-tm::before { content: "\f169"; } +.bi-badge-vo-fill::before { content: "\f16a"; } +.bi-badge-vo::before { content: "\f16b"; } +.bi-badge-vr-fill::before { content: "\f16c"; } +.bi-badge-vr::before { content: "\f16d"; } +.bi-badge-wc-fill::before { content: "\f16e"; } +.bi-badge-wc::before { content: "\f16f"; } +.bi-bag-check-fill::before { content: "\f170"; } +.bi-bag-check::before { content: "\f171"; } +.bi-bag-dash-fill::before { content: "\f172"; } +.bi-bag-dash::before { content: "\f173"; } +.bi-bag-fill::before { content: "\f174"; } +.bi-bag-plus-fill::before { content: "\f175"; } +.bi-bag-plus::before { content: "\f176"; } +.bi-bag-x-fill::before { content: "\f177"; } +.bi-bag-x::before { content: "\f178"; } +.bi-bag::before { content: "\f179"; } +.bi-bar-chart-fill::before { content: "\f17a"; } +.bi-bar-chart-line-fill::before { content: "\f17b"; } +.bi-bar-chart-line::before { content: "\f17c"; } +.bi-bar-chart-steps::before { content: "\f17d"; } +.bi-bar-chart::before { content: "\f17e"; } +.bi-basket-fill::before { content: "\f17f"; } +.bi-basket::before { content: "\f180"; } +.bi-basket2-fill::before { content: "\f181"; } +.bi-basket2::before { content: "\f182"; } +.bi-basket3-fill::before { content: "\f183"; } +.bi-basket3::before { content: "\f184"; } +.bi-battery-charging::before { content: "\f185"; } +.bi-battery-full::before { content: "\f186"; } +.bi-battery-half::before { content: "\f187"; } +.bi-battery::before { content: "\f188"; } +.bi-bell-fill::before { content: "\f189"; } +.bi-bell::before { content: "\f18a"; } +.bi-bezier::before { content: "\f18b"; } +.bi-bezier2::before { content: "\f18c"; } +.bi-bicycle::before { content: "\f18d"; } +.bi-binoculars-fill::before { content: "\f18e"; } +.bi-binoculars::before { content: "\f18f"; } +.bi-blockquote-left::before { content: "\f190"; } +.bi-blockquote-right::before { content: "\f191"; } +.bi-book-fill::before { content: "\f192"; } +.bi-book-half::before { content: "\f193"; } +.bi-book::before { content: "\f194"; } +.bi-bookmark-check-fill::before { content: "\f195"; } +.bi-bookmark-check::before { content: "\f196"; } +.bi-bookmark-dash-fill::before { content: "\f197"; } +.bi-bookmark-dash::before { content: "\f198"; } +.bi-bookmark-fill::before { content: "\f199"; } +.bi-bookmark-heart-fill::before { content: "\f19a"; } +.bi-bookmark-heart::before { content: "\f19b"; } +.bi-bookmark-plus-fill::before { content: "\f19c"; } +.bi-bookmark-plus::before { content: "\f19d"; } +.bi-bookmark-star-fill::before { content: "\f19e"; } +.bi-bookmark-star::before { content: "\f19f"; } +.bi-bookmark-x-fill::before { content: "\f1a0"; } +.bi-bookmark-x::before { content: "\f1a1"; } +.bi-bookmark::before { content: "\f1a2"; } +.bi-bookmarks-fill::before { content: "\f1a3"; } +.bi-bookmarks::before { content: "\f1a4"; } +.bi-bookshelf::before { content: "\f1a5"; } +.bi-bootstrap-fill::before { content: "\f1a6"; } +.bi-bootstrap-reboot::before { content: "\f1a7"; } +.bi-bootstrap::before { content: "\f1a8"; } +.bi-border-all::before { content: "\f1a9"; } +.bi-border-bottom::before { content: "\f1aa"; } +.bi-border-center::before { content: "\f1ab"; } +.bi-border-inner::before { content: "\f1ac"; } +.bi-border-left::before { content: "\f1ad"; } +.bi-border-middle::before { content: "\f1ae"; } +.bi-border-outer::before { content: "\f1af"; } +.bi-border-right::before { content: "\f1b0"; } +.bi-border-style::before { content: "\f1b1"; } +.bi-border-top::before { content: "\f1b2"; } +.bi-border-width::before { content: "\f1b3"; } +.bi-border::before { content: "\f1b4"; } +.bi-bounding-box-circles::before { content: "\f1b5"; } +.bi-bounding-box::before { content: "\f1b6"; } +.bi-box-arrow-down-left::before { content: "\f1b7"; } +.bi-box-arrow-down-right::before { content: "\f1b8"; } +.bi-box-arrow-down::before { content: "\f1b9"; } +.bi-box-arrow-in-down-left::before { content: "\f1ba"; } +.bi-box-arrow-in-down-right::before { content: "\f1bb"; } +.bi-box-arrow-in-down::before { content: "\f1bc"; } +.bi-box-arrow-in-left::before { content: "\f1bd"; } +.bi-box-arrow-in-right::before { content: "\f1be"; } +.bi-box-arrow-in-up-left::before { content: "\f1bf"; } +.bi-box-arrow-in-up-right::before { content: "\f1c0"; } +.bi-box-arrow-in-up::before { content: "\f1c1"; } +.bi-box-arrow-left::before { content: "\f1c2"; } +.bi-box-arrow-right::before { content: "\f1c3"; } +.bi-box-arrow-up-left::before { content: "\f1c4"; } +.bi-box-arrow-up-right::before { content: "\f1c5"; } +.bi-box-arrow-up::before { content: "\f1c6"; } +.bi-box-seam::before { content: "\f1c7"; } +.bi-box::before { content: "\f1c8"; } +.bi-braces::before { content: "\f1c9"; } +.bi-bricks::before { content: "\f1ca"; } +.bi-briefcase-fill::before { content: "\f1cb"; } +.bi-briefcase::before { content: "\f1cc"; } +.bi-brightness-alt-high-fill::before { content: "\f1cd"; } +.bi-brightness-alt-high::before { content: "\f1ce"; } +.bi-brightness-alt-low-fill::before { content: "\f1cf"; } +.bi-brightness-alt-low::before { content: "\f1d0"; } +.bi-brightness-high-fill::before { content: "\f1d1"; } +.bi-brightness-high::before { content: "\f1d2"; } +.bi-brightness-low-fill::before { content: "\f1d3"; } +.bi-brightness-low::before { content: "\f1d4"; } +.bi-broadcast-pin::before { content: "\f1d5"; } +.bi-broadcast::before { content: "\f1d6"; } +.bi-brush-fill::before { content: "\f1d7"; } +.bi-brush::before { content: "\f1d8"; } +.bi-bucket-fill::before { content: "\f1d9"; } +.bi-bucket::before { content: "\f1da"; } +.bi-bug-fill::before { content: "\f1db"; } +.bi-bug::before { content: "\f1dc"; } +.bi-building::before { content: "\f1dd"; } +.bi-bullseye::before { content: "\f1de"; } +.bi-calculator-fill::before { content: "\f1df"; } +.bi-calculator::before { content: "\f1e0"; } +.bi-calendar-check-fill::before { content: "\f1e1"; } +.bi-calendar-check::before { content: "\f1e2"; } +.bi-calendar-date-fill::before { content: "\f1e3"; } +.bi-calendar-date::before { content: "\f1e4"; } +.bi-calendar-day-fill::before { content: "\f1e5"; } +.bi-calendar-day::before { content: "\f1e6"; } +.bi-calendar-event-fill::before { content: "\f1e7"; } +.bi-calendar-event::before { content: "\f1e8"; } +.bi-calendar-fill::before { content: "\f1e9"; } +.bi-calendar-minus-fill::before { content: "\f1ea"; } +.bi-calendar-minus::before { content: "\f1eb"; } +.bi-calendar-month-fill::before { content: "\f1ec"; } +.bi-calendar-month::before { content: "\f1ed"; } +.bi-calendar-plus-fill::before { content: "\f1ee"; } +.bi-calendar-plus::before { content: "\f1ef"; } +.bi-calendar-range-fill::before { content: "\f1f0"; } +.bi-calendar-range::before { content: "\f1f1"; } +.bi-calendar-week-fill::before { content: "\f1f2"; } +.bi-calendar-week::before { content: "\f1f3"; } +.bi-calendar-x-fill::before { content: "\f1f4"; } +.bi-calendar-x::before { content: "\f1f5"; } +.bi-calendar::before { content: "\f1f6"; } +.bi-calendar2-check-fill::before { content: "\f1f7"; } +.bi-calendar2-check::before { content: "\f1f8"; } +.bi-calendar2-date-fill::before { content: "\f1f9"; } +.bi-calendar2-date::before { content: "\f1fa"; } +.bi-calendar2-day-fill::before { content: "\f1fb"; } +.bi-calendar2-day::before { content: "\f1fc"; } +.bi-calendar2-event-fill::before { content: "\f1fd"; } +.bi-calendar2-event::before { content: "\f1fe"; } +.bi-calendar2-fill::before { content: "\f1ff"; } +.bi-calendar2-minus-fill::before { content: "\f200"; } +.bi-calendar2-minus::before { content: "\f201"; } +.bi-calendar2-month-fill::before { content: "\f202"; } +.bi-calendar2-month::before { content: "\f203"; } +.bi-calendar2-plus-fill::before { content: "\f204"; } +.bi-calendar2-plus::before { content: "\f205"; } +.bi-calendar2-range-fill::before { content: "\f206"; } +.bi-calendar2-range::before { content: "\f207"; } +.bi-calendar2-week-fill::before { content: "\f208"; } +.bi-calendar2-week::before { content: "\f209"; } +.bi-calendar2-x-fill::before { content: "\f20a"; } +.bi-calendar2-x::before { content: "\f20b"; } +.bi-calendar2::before { content: "\f20c"; } +.bi-calendar3-event-fill::before { content: "\f20d"; } +.bi-calendar3-event::before { content: "\f20e"; } +.bi-calendar3-fill::before { content: "\f20f"; } +.bi-calendar3-range-fill::before { content: "\f210"; } +.bi-calendar3-range::before { content: "\f211"; } +.bi-calendar3-week-fill::before { content: "\f212"; } +.bi-calendar3-week::before { content: "\f213"; } +.bi-calendar3::before { content: "\f214"; } +.bi-calendar4-event::before { content: "\f215"; } +.bi-calendar4-range::before { content: "\f216"; } +.bi-calendar4-week::before { content: "\f217"; } +.bi-calendar4::before { content: "\f218"; } +.bi-camera-fill::before { content: "\f219"; } +.bi-camera-reels-fill::before { content: "\f21a"; } +.bi-camera-reels::before { content: "\f21b"; } +.bi-camera-video-fill::before { content: "\f21c"; } +.bi-camera-video-off-fill::before { content: "\f21d"; } +.bi-camera-video-off::before { content: "\f21e"; } +.bi-camera-video::before { content: "\f21f"; } +.bi-camera::before { content: "\f220"; } +.bi-camera2::before { content: "\f221"; } +.bi-capslock-fill::before { content: "\f222"; } +.bi-capslock::before { content: "\f223"; } +.bi-card-checklist::before { content: "\f224"; } +.bi-card-heading::before { content: "\f225"; } +.bi-card-image::before { content: "\f226"; } +.bi-card-list::before { content: "\f227"; } +.bi-card-text::before { content: "\f228"; } +.bi-caret-down-fill::before { content: "\f229"; } +.bi-caret-down-square-fill::before { content: "\f22a"; } +.bi-caret-down-square::before { content: "\f22b"; } +.bi-caret-down::before { content: "\f22c"; } +.bi-caret-left-fill::before { content: "\f22d"; } +.bi-caret-left-square-fill::before { content: "\f22e"; } +.bi-caret-left-square::before { content: "\f22f"; } +.bi-caret-left::before { content: "\f230"; } +.bi-caret-right-fill::before { content: "\f231"; } +.bi-caret-right-square-fill::before { content: "\f232"; } +.bi-caret-right-square::before { content: "\f233"; } +.bi-caret-right::before { content: "\f234"; } +.bi-caret-up-fill::before { content: "\f235"; } +.bi-caret-up-square-fill::before { content: "\f236"; } +.bi-caret-up-square::before { content: "\f237"; } +.bi-caret-up::before { content: "\f238"; } +.bi-cart-check-fill::before { content: "\f239"; } +.bi-cart-check::before { content: "\f23a"; } +.bi-cart-dash-fill::before { content: "\f23b"; } +.bi-cart-dash::before { content: "\f23c"; } +.bi-cart-fill::before { content: "\f23d"; } +.bi-cart-plus-fill::before { content: "\f23e"; } +.bi-cart-plus::before { content: "\f23f"; } +.bi-cart-x-fill::before { content: "\f240"; } +.bi-cart-x::before { content: "\f241"; } +.bi-cart::before { content: "\f242"; } +.bi-cart2::before { content: "\f243"; } +.bi-cart3::before { content: "\f244"; } +.bi-cart4::before { content: "\f245"; } +.bi-cash-stack::before { content: "\f246"; } +.bi-cash::before { content: "\f247"; } +.bi-cast::before { content: "\f248"; } +.bi-chat-dots-fill::before { content: "\f249"; } +.bi-chat-dots::before { content: "\f24a"; } +.bi-chat-fill::before { content: "\f24b"; } +.bi-chat-left-dots-fill::before { content: "\f24c"; } +.bi-chat-left-dots::before { content: "\f24d"; } +.bi-chat-left-fill::before { content: "\f24e"; } +.bi-chat-left-quote-fill::before { content: "\f24f"; } +.bi-chat-left-quote::before { content: "\f250"; } +.bi-chat-left-text-fill::before { content: "\f251"; } +.bi-chat-left-text::before { content: "\f252"; } +.bi-chat-left::before { content: "\f253"; } +.bi-chat-quote-fill::before { content: "\f254"; } +.bi-chat-quote::before { content: "\f255"; } +.bi-chat-right-dots-fill::before { content: "\f256"; } +.bi-chat-right-dots::before { content: "\f257"; } +.bi-chat-right-fill::before { content: "\f258"; } +.bi-chat-right-quote-fill::before { content: "\f259"; } +.bi-chat-right-quote::before { content: "\f25a"; } +.bi-chat-right-text-fill::before { content: "\f25b"; } +.bi-chat-right-text::before { content: "\f25c"; } +.bi-chat-right::before { content: "\f25d"; } +.bi-chat-square-dots-fill::before { content: "\f25e"; } +.bi-chat-square-dots::before { content: "\f25f"; } +.bi-chat-square-fill::before { content: "\f260"; } +.bi-chat-square-quote-fill::before { content: "\f261"; } +.bi-chat-square-quote::before { content: "\f262"; } +.bi-chat-square-text-fill::before { content: "\f263"; } +.bi-chat-square-text::before { content: "\f264"; } +.bi-chat-square::before { content: "\f265"; } +.bi-chat-text-fill::before { content: "\f266"; } +.bi-chat-text::before { content: "\f267"; } +.bi-chat::before { content: "\f268"; } +.bi-check-all::before { content: "\f269"; } +.bi-check-circle-fill::before { content: "\f26a"; } +.bi-check-circle::before { content: "\f26b"; } +.bi-check-square-fill::before { content: "\f26c"; } +.bi-check-square::before { content: "\f26d"; } +.bi-check::before { content: "\f26e"; } +.bi-check2-all::before { content: "\f26f"; } +.bi-check2-circle::before { content: "\f270"; } +.bi-check2-square::before { content: "\f271"; } +.bi-check2::before { content: "\f272"; } +.bi-chevron-bar-contract::before { content: "\f273"; } +.bi-chevron-bar-down::before { content: "\f274"; } +.bi-chevron-bar-expand::before { content: "\f275"; } +.bi-chevron-bar-left::before { content: "\f276"; } +.bi-chevron-bar-right::before { content: "\f277"; } +.bi-chevron-bar-up::before { content: "\f278"; } +.bi-chevron-compact-down::before { content: "\f279"; } +.bi-chevron-compact-left::before { content: "\f27a"; } +.bi-chevron-compact-right::before { content: "\f27b"; } +.bi-chevron-compact-up::before { content: "\f27c"; } +.bi-chevron-contract::before { content: "\f27d"; } +.bi-chevron-double-down::before { content: "\f27e"; } +.bi-chevron-double-left::before { content: "\f27f"; } +.bi-chevron-double-right::before { content: "\f280"; } +.bi-chevron-double-up::before { content: "\f281"; } +.bi-chevron-down::before { content: "\f282"; } +.bi-chevron-expand::before { content: "\f283"; } +.bi-chevron-left::before { content: "\f284"; } +.bi-chevron-right::before { content: "\f285"; } +.bi-chevron-up::before { content: "\f286"; } +.bi-circle-fill::before { content: "\f287"; } +.bi-circle-half::before { content: "\f288"; } +.bi-circle-square::before { content: "\f289"; } +.bi-circle::before { content: "\f28a"; } +.bi-clipboard-check::before { content: "\f28b"; } +.bi-clipboard-data::before { content: "\f28c"; } +.bi-clipboard-minus::before { content: "\f28d"; } +.bi-clipboard-plus::before { content: "\f28e"; } +.bi-clipboard-x::before { content: "\f28f"; } +.bi-clipboard::before { content: "\f290"; } +.bi-clock-fill::before { content: "\f291"; } +.bi-clock-history::before { content: "\f292"; } +.bi-clock::before { content: "\f293"; } +.bi-cloud-arrow-down-fill::before { content: "\f294"; } +.bi-cloud-arrow-down::before { content: "\f295"; } +.bi-cloud-arrow-up-fill::before { content: "\f296"; } +.bi-cloud-arrow-up::before { content: "\f297"; } +.bi-cloud-check-fill::before { content: "\f298"; } +.bi-cloud-check::before { content: "\f299"; } +.bi-cloud-download-fill::before { content: "\f29a"; } +.bi-cloud-download::before { content: "\f29b"; } +.bi-cloud-drizzle-fill::before { content: "\f29c"; } +.bi-cloud-drizzle::before { content: "\f29d"; } +.bi-cloud-fill::before { content: "\f29e"; } +.bi-cloud-fog-fill::before { content: "\f29f"; } +.bi-cloud-fog::before { content: "\f2a0"; } +.bi-cloud-fog2-fill::before { content: "\f2a1"; } +.bi-cloud-fog2::before { content: "\f2a2"; } +.bi-cloud-hail-fill::before { content: "\f2a3"; } +.bi-cloud-hail::before { content: "\f2a4"; } +.bi-cloud-haze-fill::before { content: "\f2a6"; } +.bi-cloud-haze::before { content: "\f2a7"; } +.bi-cloud-haze2-fill::before { content: "\f2a8"; } +.bi-cloud-lightning-fill::before { content: "\f2a9"; } +.bi-cloud-lightning-rain-fill::before { content: "\f2aa"; } +.bi-cloud-lightning-rain::before { content: "\f2ab"; } +.bi-cloud-lightning::before { content: "\f2ac"; } +.bi-cloud-minus-fill::before { content: "\f2ad"; } +.bi-cloud-minus::before { content: "\f2ae"; } +.bi-cloud-moon-fill::before { content: "\f2af"; } +.bi-cloud-moon::before { content: "\f2b0"; } +.bi-cloud-plus-fill::before { content: "\f2b1"; } +.bi-cloud-plus::before { content: "\f2b2"; } +.bi-cloud-rain-fill::before { content: "\f2b3"; } +.bi-cloud-rain-heavy-fill::before { content: "\f2b4"; } +.bi-cloud-rain-heavy::before { content: "\f2b5"; } +.bi-cloud-rain::before { content: "\f2b6"; } +.bi-cloud-slash-fill::before { content: "\f2b7"; } +.bi-cloud-slash::before { content: "\f2b8"; } +.bi-cloud-sleet-fill::before { content: "\f2b9"; } +.bi-cloud-sleet::before { content: "\f2ba"; } +.bi-cloud-snow-fill::before { content: "\f2bb"; } +.bi-cloud-snow::before { content: "\f2bc"; } +.bi-cloud-sun-fill::before { content: "\f2bd"; } +.bi-cloud-sun::before { content: "\f2be"; } +.bi-cloud-upload-fill::before { content: "\f2bf"; } +.bi-cloud-upload::before { content: "\f2c0"; } +.bi-cloud::before { content: "\f2c1"; } +.bi-clouds-fill::before { content: "\f2c2"; } +.bi-clouds::before { content: "\f2c3"; } +.bi-cloudy-fill::before { content: "\f2c4"; } +.bi-cloudy::before { content: "\f2c5"; } +.bi-code-slash::before { content: "\f2c6"; } +.bi-code-square::before { content: "\f2c7"; } +.bi-code::before { content: "\f2c8"; } +.bi-collection-fill::before { content: "\f2c9"; } +.bi-collection-play-fill::before { content: "\f2ca"; } +.bi-collection-play::before { content: "\f2cb"; } +.bi-collection::before { content: "\f2cc"; } +.bi-columns-gap::before { content: "\f2cd"; } +.bi-columns::before { content: "\f2ce"; } +.bi-command::before { content: "\f2cf"; } +.bi-compass-fill::before { content: "\f2d0"; } +.bi-compass::before { content: "\f2d1"; } +.bi-cone-striped::before { content: "\f2d2"; } +.bi-cone::before { content: "\f2d3"; } +.bi-controller::before { content: "\f2d4"; } +.bi-cpu-fill::before { content: "\f2d5"; } +.bi-cpu::before { content: "\f2d6"; } +.bi-credit-card-2-back-fill::before { content: "\f2d7"; } +.bi-credit-card-2-back::before { content: "\f2d8"; } +.bi-credit-card-2-front-fill::before { content: "\f2d9"; } +.bi-credit-card-2-front::before { content: "\f2da"; } +.bi-credit-card-fill::before { content: "\f2db"; } +.bi-credit-card::before { content: "\f2dc"; } +.bi-crop::before { content: "\f2dd"; } +.bi-cup-fill::before { content: "\f2de"; } +.bi-cup-straw::before { content: "\f2df"; } +.bi-cup::before { content: "\f2e0"; } +.bi-cursor-fill::before { content: "\f2e1"; } +.bi-cursor-text::before { content: "\f2e2"; } +.bi-cursor::before { content: "\f2e3"; } +.bi-dash-circle-dotted::before { content: "\f2e4"; } +.bi-dash-circle-fill::before { content: "\f2e5"; } +.bi-dash-circle::before { content: "\f2e6"; } +.bi-dash-square-dotted::before { content: "\f2e7"; } +.bi-dash-square-fill::before { content: "\f2e8"; } +.bi-dash-square::before { content: "\f2e9"; } +.bi-dash::before { content: "\f2ea"; } +.bi-diagram-2-fill::before { content: "\f2eb"; } +.bi-diagram-2::before { content: "\f2ec"; } +.bi-diagram-3-fill::before { content: "\f2ed"; } +.bi-diagram-3::before { content: "\f2ee"; } +.bi-diamond-fill::before { content: "\f2ef"; } +.bi-diamond-half::before { content: "\f2f0"; } +.bi-diamond::before { content: "\f2f1"; } +.bi-dice-1-fill::before { content: "\f2f2"; } +.bi-dice-1::before { content: "\f2f3"; } +.bi-dice-2-fill::before { content: "\f2f4"; } +.bi-dice-2::before { content: "\f2f5"; } +.bi-dice-3-fill::before { content: "\f2f6"; } +.bi-dice-3::before { content: "\f2f7"; } +.bi-dice-4-fill::before { content: "\f2f8"; } +.bi-dice-4::before { content: "\f2f9"; } +.bi-dice-5-fill::before { content: "\f2fa"; } +.bi-dice-5::before { content: "\f2fb"; } +.bi-dice-6-fill::before { content: "\f2fc"; } +.bi-dice-6::before { content: "\f2fd"; } +.bi-disc-fill::before { content: "\f2fe"; } +.bi-disc::before { content: "\f2ff"; } +.bi-discord::before { content: "\f300"; } +.bi-display-fill::before { content: "\f301"; } +.bi-display::before { content: "\f302"; } +.bi-distribute-horizontal::before { content: "\f303"; } +.bi-distribute-vertical::before { content: "\f304"; } +.bi-door-closed-fill::before { content: "\f305"; } +.bi-door-closed::before { content: "\f306"; } +.bi-door-open-fill::before { content: "\f307"; } +.bi-door-open::before { content: "\f308"; } +.bi-dot::before { content: "\f309"; } +.bi-download::before { content: "\f30a"; } +.bi-droplet-fill::before { content: "\f30b"; } +.bi-droplet-half::before { content: "\f30c"; } +.bi-droplet::before { content: "\f30d"; } +.bi-earbuds::before { content: "\f30e"; } +.bi-easel-fill::before { content: "\f30f"; } +.bi-easel::before { content: "\f310"; } +.bi-egg-fill::before { content: "\f311"; } +.bi-egg-fried::before { content: "\f312"; } +.bi-egg::before { content: "\f313"; } +.bi-eject-fill::before { content: "\f314"; } +.bi-eject::before { content: "\f315"; } +.bi-emoji-angry-fill::before { content: "\f316"; } +.bi-emoji-angry::before { content: "\f317"; } +.bi-emoji-dizzy-fill::before { content: "\f318"; } +.bi-emoji-dizzy::before { content: "\f319"; } +.bi-emoji-expressionless-fill::before { content: "\f31a"; } +.bi-emoji-expressionless::before { content: "\f31b"; } +.bi-emoji-frown-fill::before { content: "\f31c"; } +.bi-emoji-frown::before { content: "\f31d"; } +.bi-emoji-heart-eyes-fill::before { content: "\f31e"; } +.bi-emoji-heart-eyes::before { content: "\f31f"; } +.bi-emoji-laughing-fill::before { content: "\f320"; } +.bi-emoji-laughing::before { content: "\f321"; } +.bi-emoji-neutral-fill::before { content: "\f322"; } +.bi-emoji-neutral::before { content: "\f323"; } +.bi-emoji-smile-fill::before { content: "\f324"; } +.bi-emoji-smile-upside-down-fill::before { content: "\f325"; } +.bi-emoji-smile-upside-down::before { content: "\f326"; } +.bi-emoji-smile::before { content: "\f327"; } +.bi-emoji-sunglasses-fill::before { content: "\f328"; } +.bi-emoji-sunglasses::before { content: "\f329"; } +.bi-emoji-wink-fill::before { content: "\f32a"; } +.bi-emoji-wink::before { content: "\f32b"; } +.bi-envelope-fill::before { content: "\f32c"; } +.bi-envelope-open-fill::before { content: "\f32d"; } +.bi-envelope-open::before { content: "\f32e"; } +.bi-envelope::before { content: "\f32f"; } +.bi-eraser-fill::before { content: "\f330"; } +.bi-eraser::before { content: "\f331"; } +.bi-exclamation-circle-fill::before { content: "\f332"; } +.bi-exclamation-circle::before { content: "\f333"; } +.bi-exclamation-diamond-fill::before { content: "\f334"; } +.bi-exclamation-diamond::before { content: "\f335"; } +.bi-exclamation-octagon-fill::before { content: "\f336"; } +.bi-exclamation-octagon::before { content: "\f337"; } +.bi-exclamation-square-fill::before { content: "\f338"; } +.bi-exclamation-square::before { content: "\f339"; } +.bi-exclamation-triangle-fill::before { content: "\f33a"; } +.bi-exclamation-triangle::before { content: "\f33b"; } +.bi-exclamation::before { content: "\f33c"; } +.bi-exclude::before { content: "\f33d"; } +.bi-eye-fill::before { content: "\f33e"; } +.bi-eye-slash-fill::before { content: "\f33f"; } +.bi-eye-slash::before { content: "\f340"; } +.bi-eye::before { content: "\f341"; } +.bi-eyedropper::before { content: "\f342"; } +.bi-eyeglasses::before { content: "\f343"; } +.bi-facebook::before { content: "\f344"; } +.bi-file-arrow-down-fill::before { content: "\f345"; } +.bi-file-arrow-down::before { content: "\f346"; } +.bi-file-arrow-up-fill::before { content: "\f347"; } +.bi-file-arrow-up::before { content: "\f348"; } +.bi-file-bar-graph-fill::before { content: "\f349"; } +.bi-file-bar-graph::before { content: "\f34a"; } +.bi-file-binary-fill::before { content: "\f34b"; } +.bi-file-binary::before { content: "\f34c"; } +.bi-file-break-fill::before { content: "\f34d"; } +.bi-file-break::before { content: "\f34e"; } +.bi-file-check-fill::before { content: "\f34f"; } +.bi-file-check::before { content: "\f350"; } +.bi-file-code-fill::before { content: "\f351"; } +.bi-file-code::before { content: "\f352"; } +.bi-file-diff-fill::before { content: "\f353"; } +.bi-file-diff::before { content: "\f354"; } +.bi-file-earmark-arrow-down-fill::before { content: "\f355"; } +.bi-file-earmark-arrow-down::before { content: "\f356"; } +.bi-file-earmark-arrow-up-fill::before { content: "\f357"; } +.bi-file-earmark-arrow-up::before { content: "\f358"; } +.bi-file-earmark-bar-graph-fill::before { content: "\f359"; } +.bi-file-earmark-bar-graph::before { content: "\f35a"; } +.bi-file-earmark-binary-fill::before { content: "\f35b"; } +.bi-file-earmark-binary::before { content: "\f35c"; } +.bi-file-earmark-break-fill::before { content: "\f35d"; } +.bi-file-earmark-break::before { content: "\f35e"; } +.bi-file-earmark-check-fill::before { content: "\f35f"; } +.bi-file-earmark-check::before { content: "\f360"; } +.bi-file-earmark-code-fill::before { content: "\f361"; } +.bi-file-earmark-code::before { content: "\f362"; } +.bi-file-earmark-diff-fill::before { content: "\f363"; } +.bi-file-earmark-diff::before { content: "\f364"; } +.bi-file-earmark-easel-fill::before { content: "\f365"; } +.bi-file-earmark-easel::before { content: "\f366"; } +.bi-file-earmark-excel-fill::before { content: "\f367"; } +.bi-file-earmark-excel::before { content: "\f368"; } +.bi-file-earmark-fill::before { content: "\f369"; } +.bi-file-earmark-font-fill::before { content: "\f36a"; } +.bi-file-earmark-font::before { content: "\f36b"; } +.bi-file-earmark-image-fill::before { content: "\f36c"; } +.bi-file-earmark-image::before { content: "\f36d"; } +.bi-file-earmark-lock-fill::before { content: "\f36e"; } +.bi-file-earmark-lock::before { content: "\f36f"; } +.bi-file-earmark-lock2-fill::before { content: "\f370"; } +.bi-file-earmark-lock2::before { content: "\f371"; } +.bi-file-earmark-medical-fill::before { content: "\f372"; } +.bi-file-earmark-medical::before { content: "\f373"; } +.bi-file-earmark-minus-fill::before { content: "\f374"; } +.bi-file-earmark-minus::before { content: "\f375"; } +.bi-file-earmark-music-fill::before { content: "\f376"; } +.bi-file-earmark-music::before { content: "\f377"; } +.bi-file-earmark-person-fill::before { content: "\f378"; } +.bi-file-earmark-person::before { content: "\f379"; } +.bi-file-earmark-play-fill::before { content: "\f37a"; } +.bi-file-earmark-play::before { content: "\f37b"; } +.bi-file-earmark-plus-fill::before { content: "\f37c"; } +.bi-file-earmark-plus::before { content: "\f37d"; } +.bi-file-earmark-post-fill::before { content: "\f37e"; } +.bi-file-earmark-post::before { content: "\f37f"; } +.bi-file-earmark-ppt-fill::before { content: "\f380"; } +.bi-file-earmark-ppt::before { content: "\f381"; } +.bi-file-earmark-richtext-fill::before { content: "\f382"; } +.bi-file-earmark-richtext::before { content: "\f383"; } +.bi-file-earmark-ruled-fill::before { content: "\f384"; } +.bi-file-earmark-ruled::before { content: "\f385"; } +.bi-file-earmark-slides-fill::before { content: "\f386"; } +.bi-file-earmark-slides::before { content: "\f387"; } +.bi-file-earmark-spreadsheet-fill::before { content: "\f388"; } +.bi-file-earmark-spreadsheet::before { content: "\f389"; } +.bi-file-earmark-text-fill::before { content: "\f38a"; } +.bi-file-earmark-text::before { content: "\f38b"; } +.bi-file-earmark-word-fill::before { content: "\f38c"; } +.bi-file-earmark-word::before { content: "\f38d"; } +.bi-file-earmark-x-fill::before { content: "\f38e"; } +.bi-file-earmark-x::before { content: "\f38f"; } +.bi-file-earmark-zip-fill::before { content: "\f390"; } +.bi-file-earmark-zip::before { content: "\f391"; } +.bi-file-earmark::before { content: "\f392"; } +.bi-file-easel-fill::before { content: "\f393"; } +.bi-file-easel::before { content: "\f394"; } +.bi-file-excel-fill::before { content: "\f395"; } +.bi-file-excel::before { content: "\f396"; } +.bi-file-fill::before { content: "\f397"; } +.bi-file-font-fill::before { content: "\f398"; } +.bi-file-font::before { content: "\f399"; } +.bi-file-image-fill::before { content: "\f39a"; } +.bi-file-image::before { content: "\f39b"; } +.bi-file-lock-fill::before { content: "\f39c"; } +.bi-file-lock::before { content: "\f39d"; } +.bi-file-lock2-fill::before { content: "\f39e"; } +.bi-file-lock2::before { content: "\f39f"; } +.bi-file-medical-fill::before { content: "\f3a0"; } +.bi-file-medical::before { content: "\f3a1"; } +.bi-file-minus-fill::before { content: "\f3a2"; } +.bi-file-minus::before { content: "\f3a3"; } +.bi-file-music-fill::before { content: "\f3a4"; } +.bi-file-music::before { content: "\f3a5"; } +.bi-file-person-fill::before { content: "\f3a6"; } +.bi-file-person::before { content: "\f3a7"; } +.bi-file-play-fill::before { content: "\f3a8"; } +.bi-file-play::before { content: "\f3a9"; } +.bi-file-plus-fill::before { content: "\f3aa"; } +.bi-file-plus::before { content: "\f3ab"; } +.bi-file-post-fill::before { content: "\f3ac"; } +.bi-file-post::before { content: "\f3ad"; } +.bi-file-ppt-fill::before { content: "\f3ae"; } +.bi-file-ppt::before { content: "\f3af"; } +.bi-file-richtext-fill::before { content: "\f3b0"; } +.bi-file-richtext::before { content: "\f3b1"; } +.bi-file-ruled-fill::before { content: "\f3b2"; } +.bi-file-ruled::before { content: "\f3b3"; } +.bi-file-slides-fill::before { content: "\f3b4"; } +.bi-file-slides::before { content: "\f3b5"; } +.bi-file-spreadsheet-fill::before { content: "\f3b6"; } +.bi-file-spreadsheet::before { content: "\f3b7"; } +.bi-file-text-fill::before { content: "\f3b8"; } +.bi-file-text::before { content: "\f3b9"; } +.bi-file-word-fill::before { content: "\f3ba"; } +.bi-file-word::before { content: "\f3bb"; } +.bi-file-x-fill::before { content: "\f3bc"; } +.bi-file-x::before { content: "\f3bd"; } +.bi-file-zip-fill::before { content: "\f3be"; } +.bi-file-zip::before { content: "\f3bf"; } +.bi-file::before { content: "\f3c0"; } +.bi-files-alt::before { content: "\f3c1"; } +.bi-files::before { content: "\f3c2"; } +.bi-film::before { content: "\f3c3"; } +.bi-filter-circle-fill::before { content: "\f3c4"; } +.bi-filter-circle::before { content: "\f3c5"; } +.bi-filter-left::before { content: "\f3c6"; } +.bi-filter-right::before { content: "\f3c7"; } +.bi-filter-square-fill::before { content: "\f3c8"; } +.bi-filter-square::before { content: "\f3c9"; } +.bi-filter::before { content: "\f3ca"; } +.bi-flag-fill::before { content: "\f3cb"; } +.bi-flag::before { content: "\f3cc"; } +.bi-flower1::before { content: "\f3cd"; } +.bi-flower2::before { content: "\f3ce"; } +.bi-flower3::before { content: "\f3cf"; } +.bi-folder-check::before { content: "\f3d0"; } +.bi-folder-fill::before { content: "\f3d1"; } +.bi-folder-minus::before { content: "\f3d2"; } +.bi-folder-plus::before { content: "\f3d3"; } +.bi-folder-symlink-fill::before { content: "\f3d4"; } +.bi-folder-symlink::before { content: "\f3d5"; } +.bi-folder-x::before { content: "\f3d6"; } +.bi-folder::before { content: "\f3d7"; } +.bi-folder2-open::before { content: "\f3d8"; } +.bi-folder2::before { content: "\f3d9"; } +.bi-fonts::before { content: "\f3da"; } +.bi-forward-fill::before { content: "\f3db"; } +.bi-forward::before { content: "\f3dc"; } +.bi-front::before { content: "\f3dd"; } +.bi-fullscreen-exit::before { content: "\f3de"; } +.bi-fullscreen::before { content: "\f3df"; } +.bi-funnel-fill::before { content: "\f3e0"; } +.bi-funnel::before { content: "\f3e1"; } +.bi-gear-fill::before { content: "\f3e2"; } +.bi-gear-wide-connected::before { content: "\f3e3"; } +.bi-gear-wide::before { content: "\f3e4"; } +.bi-gear::before { content: "\f3e5"; } +.bi-gem::before { content: "\f3e6"; } +.bi-geo-alt-fill::before { content: "\f3e7"; } +.bi-geo-alt::before { content: "\f3e8"; } +.bi-geo-fill::before { content: "\f3e9"; } +.bi-geo::before { content: "\f3ea"; } +.bi-gift-fill::before { content: "\f3eb"; } +.bi-gift::before { content: "\f3ec"; } +.bi-github::before { content: "\f3ed"; } +.bi-globe::before { content: "\f3ee"; } +.bi-globe2::before { content: "\f3ef"; } +.bi-google::before { content: "\f3f0"; } +.bi-graph-down::before { content: "\f3f1"; } +.bi-graph-up::before { content: "\f3f2"; } +.bi-grid-1x2-fill::before { content: "\f3f3"; } +.bi-grid-1x2::before { content: "\f3f4"; } +.bi-grid-3x2-gap-fill::before { content: "\f3f5"; } +.bi-grid-3x2-gap::before { content: "\f3f6"; } +.bi-grid-3x2::before { content: "\f3f7"; } +.bi-grid-3x3-gap-fill::before { content: "\f3f8"; } +.bi-grid-3x3-gap::before { content: "\f3f9"; } +.bi-grid-3x3::before { content: "\f3fa"; } +.bi-grid-fill::before { content: "\f3fb"; } +.bi-grid::before { content: "\f3fc"; } +.bi-grip-horizontal::before { content: "\f3fd"; } +.bi-grip-vertical::before { content: "\f3fe"; } +.bi-hammer::before { content: "\f3ff"; } +.bi-hand-index-fill::before { content: "\f400"; } +.bi-hand-index-thumb-fill::before { content: "\f401"; } +.bi-hand-index-thumb::before { content: "\f402"; } +.bi-hand-index::before { content: "\f403"; } +.bi-hand-thumbs-down-fill::before { content: "\f404"; } +.bi-hand-thumbs-down::before { content: "\f405"; } +.bi-hand-thumbs-up-fill::before { content: "\f406"; } +.bi-hand-thumbs-up::before { content: "\f407"; } +.bi-handbag-fill::before { content: "\f408"; } +.bi-handbag::before { content: "\f409"; } +.bi-hash::before { content: "\f40a"; } +.bi-hdd-fill::before { content: "\f40b"; } +.bi-hdd-network-fill::before { content: "\f40c"; } +.bi-hdd-network::before { content: "\f40d"; } +.bi-hdd-rack-fill::before { content: "\f40e"; } +.bi-hdd-rack::before { content: "\f40f"; } +.bi-hdd-stack-fill::before { content: "\f410"; } +.bi-hdd-stack::before { content: "\f411"; } +.bi-hdd::before { content: "\f412"; } +.bi-headphones::before { content: "\f413"; } +.bi-headset::before { content: "\f414"; } +.bi-heart-fill::before { content: "\f415"; } +.bi-heart-half::before { content: "\f416"; } +.bi-heart::before { content: "\f417"; } +.bi-heptagon-fill::before { content: "\f418"; } +.bi-heptagon-half::before { content: "\f419"; } +.bi-heptagon::before { content: "\f41a"; } +.bi-hexagon-fill::before { content: "\f41b"; } +.bi-hexagon-half::before { content: "\f41c"; } +.bi-hexagon::before { content: "\f41d"; } +.bi-hourglass-bottom::before { content: "\f41e"; } +.bi-hourglass-split::before { content: "\f41f"; } +.bi-hourglass-top::before { content: "\f420"; } +.bi-hourglass::before { content: "\f421"; } +.bi-house-door-fill::before { content: "\f422"; } +.bi-house-door::before { content: "\f423"; } +.bi-house-fill::before { content: "\f424"; } +.bi-house::before { content: "\f425"; } +.bi-hr::before { content: "\f426"; } +.bi-hurricane::before { content: "\f427"; } +.bi-image-alt::before { content: "\f428"; } +.bi-image-fill::before { content: "\f429"; } +.bi-image::before { content: "\f42a"; } +.bi-images::before { content: "\f42b"; } +.bi-inbox-fill::before { content: "\f42c"; } +.bi-inbox::before { content: "\f42d"; } +.bi-inboxes-fill::before { content: "\f42e"; } +.bi-inboxes::before { content: "\f42f"; } +.bi-info-circle-fill::before { content: "\f430"; } +.bi-info-circle::before { content: "\f431"; } +.bi-info-square-fill::before { content: "\f432"; } +.bi-info-square::before { content: "\f433"; } +.bi-info::before { content: "\f434"; } +.bi-input-cursor-text::before { content: "\f435"; } +.bi-input-cursor::before { content: "\f436"; } +.bi-instagram::before { content: "\f437"; } +.bi-intersect::before { content: "\f438"; } +.bi-journal-album::before { content: "\f439"; } +.bi-journal-arrow-down::before { content: "\f43a"; } +.bi-journal-arrow-up::before { content: "\f43b"; } +.bi-journal-bookmark-fill::before { content: "\f43c"; } +.bi-journal-bookmark::before { content: "\f43d"; } +.bi-journal-check::before { content: "\f43e"; } +.bi-journal-code::before { content: "\f43f"; } +.bi-journal-medical::before { content: "\f440"; } +.bi-journal-minus::before { content: "\f441"; } +.bi-journal-plus::before { content: "\f442"; } +.bi-journal-richtext::before { content: "\f443"; } +.bi-journal-text::before { content: "\f444"; } +.bi-journal-x::before { content: "\f445"; } +.bi-journal::before { content: "\f446"; } +.bi-journals::before { content: "\f447"; } +.bi-joystick::before { content: "\f448"; } +.bi-justify-left::before { content: "\f449"; } +.bi-justify-right::before { content: "\f44a"; } +.bi-justify::before { content: "\f44b"; } +.bi-kanban-fill::before { content: "\f44c"; } +.bi-kanban::before { content: "\f44d"; } +.bi-key-fill::before { content: "\f44e"; } +.bi-key::before { content: "\f44f"; } +.bi-keyboard-fill::before { content: "\f450"; } +.bi-keyboard::before { content: "\f451"; } +.bi-ladder::before { content: "\f452"; } +.bi-lamp-fill::before { content: "\f453"; } +.bi-lamp::before { content: "\f454"; } +.bi-laptop-fill::before { content: "\f455"; } +.bi-laptop::before { content: "\f456"; } +.bi-layer-backward::before { content: "\f457"; } +.bi-layer-forward::before { content: "\f458"; } +.bi-layers-fill::before { content: "\f459"; } +.bi-layers-half::before { content: "\f45a"; } +.bi-layers::before { content: "\f45b"; } +.bi-layout-sidebar-inset-reverse::before { content: "\f45c"; } +.bi-layout-sidebar-inset::before { content: "\f45d"; } +.bi-layout-sidebar-reverse::before { content: "\f45e"; } +.bi-layout-sidebar::before { content: "\f45f"; } +.bi-layout-split::before { content: "\f460"; } +.bi-layout-text-sidebar-reverse::before { content: "\f461"; } +.bi-layout-text-sidebar::before { content: "\f462"; } +.bi-layout-text-window-reverse::before { content: "\f463"; } +.bi-layout-text-window::before { content: "\f464"; } +.bi-layout-three-columns::before { content: "\f465"; } +.bi-layout-wtf::before { content: "\f466"; } +.bi-life-preserver::before { content: "\f467"; } +.bi-lightbulb-fill::before { content: "\f468"; } +.bi-lightbulb-off-fill::before { content: "\f469"; } +.bi-lightbulb-off::before { content: "\f46a"; } +.bi-lightbulb::before { content: "\f46b"; } +.bi-lightning-charge-fill::before { content: "\f46c"; } +.bi-lightning-charge::before { content: "\f46d"; } +.bi-lightning-fill::before { content: "\f46e"; } +.bi-lightning::before { content: "\f46f"; } +.bi-link-45deg::before { content: "\f470"; } +.bi-link::before { content: "\f471"; } +.bi-linkedin::before { content: "\f472"; } +.bi-list-check::before { content: "\f473"; } +.bi-list-nested::before { content: "\f474"; } +.bi-list-ol::before { content: "\f475"; } +.bi-list-stars::before { content: "\f476"; } +.bi-list-task::before { content: "\f477"; } +.bi-list-ul::before { content: "\f478"; } +.bi-list::before { content: "\f479"; } +.bi-lock-fill::before { content: "\f47a"; } +.bi-lock::before { content: "\f47b"; } +.bi-mailbox::before { content: "\f47c"; } +.bi-mailbox2::before { content: "\f47d"; } +.bi-map-fill::before { content: "\f47e"; } +.bi-map::before { content: "\f47f"; } +.bi-markdown-fill::before { content: "\f480"; } +.bi-markdown::before { content: "\f481"; } +.bi-mask::before { content: "\f482"; } +.bi-megaphone-fill::before { content: "\f483"; } +.bi-megaphone::before { content: "\f484"; } +.bi-menu-app-fill::before { content: "\f485"; } +.bi-menu-app::before { content: "\f486"; } +.bi-menu-button-fill::before { content: "\f487"; } +.bi-menu-button-wide-fill::before { content: "\f488"; } +.bi-menu-button-wide::before { content: "\f489"; } +.bi-menu-button::before { content: "\f48a"; } +.bi-menu-down::before { content: "\f48b"; } +.bi-menu-up::before { content: "\f48c"; } +.bi-mic-fill::before { content: "\f48d"; } +.bi-mic-mute-fill::before { content: "\f48e"; } +.bi-mic-mute::before { content: "\f48f"; } +.bi-mic::before { content: "\f490"; } +.bi-minecart-loaded::before { content: "\f491"; } +.bi-minecart::before { content: "\f492"; } +.bi-moisture::before { content: "\f493"; } +.bi-moon-fill::before { content: "\f494"; } +.bi-moon-stars-fill::before { content: "\f495"; } +.bi-moon-stars::before { content: "\f496"; } +.bi-moon::before { content: "\f497"; } +.bi-mouse-fill::before { content: "\f498"; } +.bi-mouse::before { content: "\f499"; } +.bi-mouse2-fill::before { content: "\f49a"; } +.bi-mouse2::before { content: "\f49b"; } +.bi-mouse3-fill::before { content: "\f49c"; } +.bi-mouse3::before { content: "\f49d"; } +.bi-music-note-beamed::before { content: "\f49e"; } +.bi-music-note-list::before { content: "\f49f"; } +.bi-music-note::before { content: "\f4a0"; } +.bi-music-player-fill::before { content: "\f4a1"; } +.bi-music-player::before { content: "\f4a2"; } +.bi-newspaper::before { content: "\f4a3"; } +.bi-node-minus-fill::before { content: "\f4a4"; } +.bi-node-minus::before { content: "\f4a5"; } +.bi-node-plus-fill::before { content: "\f4a6"; } +.bi-node-plus::before { content: "\f4a7"; } +.bi-nut-fill::before { content: "\f4a8"; } +.bi-nut::before { content: "\f4a9"; } +.bi-octagon-fill::before { content: "\f4aa"; } +.bi-octagon-half::before { content: "\f4ab"; } +.bi-octagon::before { content: "\f4ac"; } +.bi-option::before { content: "\f4ad"; } +.bi-outlet::before { content: "\f4ae"; } +.bi-paint-bucket::before { content: "\f4af"; } +.bi-palette-fill::before { content: "\f4b0"; } +.bi-palette::before { content: "\f4b1"; } +.bi-palette2::before { content: "\f4b2"; } +.bi-paperclip::before { content: "\f4b3"; } +.bi-paragraph::before { content: "\f4b4"; } +.bi-patch-check-fill::before { content: "\f4b5"; } +.bi-patch-check::before { content: "\f4b6"; } +.bi-patch-exclamation-fill::before { content: "\f4b7"; } +.bi-patch-exclamation::before { content: "\f4b8"; } +.bi-patch-minus-fill::before { content: "\f4b9"; } +.bi-patch-minus::before { content: "\f4ba"; } +.bi-patch-plus-fill::before { content: "\f4bb"; } +.bi-patch-plus::before { content: "\f4bc"; } +.bi-patch-question-fill::before { content: "\f4bd"; } +.bi-patch-question::before { content: "\f4be"; } +.bi-pause-btn-fill::before { content: "\f4bf"; } +.bi-pause-btn::before { content: "\f4c0"; } +.bi-pause-circle-fill::before { content: "\f4c1"; } +.bi-pause-circle::before { content: "\f4c2"; } +.bi-pause-fill::before { content: "\f4c3"; } +.bi-pause::before { content: "\f4c4"; } +.bi-peace-fill::before { content: "\f4c5"; } +.bi-peace::before { content: "\f4c6"; } +.bi-pen-fill::before { content: "\f4c7"; } +.bi-pen::before { content: "\f4c8"; } +.bi-pencil-fill::before { content: "\f4c9"; } +.bi-pencil-square::before { content: "\f4ca"; } +.bi-pencil::before { content: "\f4cb"; } +.bi-pentagon-fill::before { content: "\f4cc"; } +.bi-pentagon-half::before { content: "\f4cd"; } +.bi-pentagon::before { content: "\f4ce"; } +.bi-people-fill::before { content: "\f4cf"; } +.bi-people::before { content: "\f4d0"; } +.bi-percent::before { content: "\f4d1"; } +.bi-person-badge-fill::before { content: "\f4d2"; } +.bi-person-badge::before { content: "\f4d3"; } +.bi-person-bounding-box::before { content: "\f4d4"; } +.bi-person-check-fill::before { content: "\f4d5"; } +.bi-person-check::before { content: "\f4d6"; } +.bi-person-circle::before { content: "\f4d7"; } +.bi-person-dash-fill::before { content: "\f4d8"; } +.bi-person-dash::before { content: "\f4d9"; } +.bi-person-fill::before { content: "\f4da"; } +.bi-person-lines-fill::before { content: "\f4db"; } +.bi-person-plus-fill::before { content: "\f4dc"; } +.bi-person-plus::before { content: "\f4dd"; } +.bi-person-square::before { content: "\f4de"; } +.bi-person-x-fill::before { content: "\f4df"; } +.bi-person-x::before { content: "\f4e0"; } +.bi-person::before { content: "\f4e1"; } +.bi-phone-fill::before { content: "\f4e2"; } +.bi-phone-landscape-fill::before { content: "\f4e3"; } +.bi-phone-landscape::before { content: "\f4e4"; } +.bi-phone-vibrate-fill::before { content: "\f4e5"; } +.bi-phone-vibrate::before { content: "\f4e6"; } +.bi-phone::before { content: "\f4e7"; } +.bi-pie-chart-fill::before { content: "\f4e8"; } +.bi-pie-chart::before { content: "\f4e9"; } +.bi-pin-angle-fill::before { content: "\f4ea"; } +.bi-pin-angle::before { content: "\f4eb"; } +.bi-pin-fill::before { content: "\f4ec"; } +.bi-pin::before { content: "\f4ed"; } +.bi-pip-fill::before { content: "\f4ee"; } +.bi-pip::before { content: "\f4ef"; } +.bi-play-btn-fill::before { content: "\f4f0"; } +.bi-play-btn::before { content: "\f4f1"; } +.bi-play-circle-fill::before { content: "\f4f2"; } +.bi-play-circle::before { content: "\f4f3"; } +.bi-play-fill::before { content: "\f4f4"; } +.bi-play::before { content: "\f4f5"; } +.bi-plug-fill::before { content: "\f4f6"; } +.bi-plug::before { content: "\f4f7"; } +.bi-plus-circle-dotted::before { content: "\f4f8"; } +.bi-plus-circle-fill::before { content: "\f4f9"; } +.bi-plus-circle::before { content: "\f4fa"; } +.bi-plus-square-dotted::before { content: "\f4fb"; } +.bi-plus-square-fill::before { content: "\f4fc"; } +.bi-plus-square::before { content: "\f4fd"; } +.bi-plus::before { content: "\f4fe"; } +.bi-power::before { content: "\f4ff"; } +.bi-printer-fill::before { content: "\f500"; } +.bi-printer::before { content: "\f501"; } +.bi-puzzle-fill::before { content: "\f502"; } +.bi-puzzle::before { content: "\f503"; } +.bi-question-circle-fill::before { content: "\f504"; } +.bi-question-circle::before { content: "\f505"; } +.bi-question-diamond-fill::before { content: "\f506"; } +.bi-question-diamond::before { content: "\f507"; } +.bi-question-octagon-fill::before { content: "\f508"; } +.bi-question-octagon::before { content: "\f509"; } +.bi-question-square-fill::before { content: "\f50a"; } +.bi-question-square::before { content: "\f50b"; } +.bi-question::before { content: "\f50c"; } +.bi-rainbow::before { content: "\f50d"; } +.bi-receipt-cutoff::before { content: "\f50e"; } +.bi-receipt::before { content: "\f50f"; } +.bi-reception-0::before { content: "\f510"; } +.bi-reception-1::before { content: "\f511"; } +.bi-reception-2::before { content: "\f512"; } +.bi-reception-3::before { content: "\f513"; } +.bi-reception-4::before { content: "\f514"; } +.bi-record-btn-fill::before { content: "\f515"; } +.bi-record-btn::before { content: "\f516"; } +.bi-record-circle-fill::before { content: "\f517"; } +.bi-record-circle::before { content: "\f518"; } +.bi-record-fill::before { content: "\f519"; } +.bi-record::before { content: "\f51a"; } +.bi-record2-fill::before { content: "\f51b"; } +.bi-record2::before { content: "\f51c"; } +.bi-reply-all-fill::before { content: "\f51d"; } +.bi-reply-all::before { content: "\f51e"; } +.bi-reply-fill::before { content: "\f51f"; } +.bi-reply::before { content: "\f520"; } +.bi-rss-fill::before { content: "\f521"; } +.bi-rss::before { content: "\f522"; } +.bi-rulers::before { content: "\f523"; } +.bi-save-fill::before { content: "\f524"; } +.bi-save::before { content: "\f525"; } +.bi-save2-fill::before { content: "\f526"; } +.bi-save2::before { content: "\f527"; } +.bi-scissors::before { content: "\f528"; } +.bi-screwdriver::before { content: "\f529"; } +.bi-search::before { content: "\f52a"; } +.bi-segmented-nav::before { content: "\f52b"; } +.bi-server::before { content: "\f52c"; } +.bi-share-fill::before { content: "\f52d"; } +.bi-share::before { content: "\f52e"; } +.bi-shield-check::before { content: "\f52f"; } +.bi-shield-exclamation::before { content: "\f530"; } +.bi-shield-fill-check::before { content: "\f531"; } +.bi-shield-fill-exclamation::before { content: "\f532"; } +.bi-shield-fill-minus::before { content: "\f533"; } +.bi-shield-fill-plus::before { content: "\f534"; } +.bi-shield-fill-x::before { content: "\f535"; } +.bi-shield-fill::before { content: "\f536"; } +.bi-shield-lock-fill::before { content: "\f537"; } +.bi-shield-lock::before { content: "\f538"; } +.bi-shield-minus::before { content: "\f539"; } +.bi-shield-plus::before { content: "\f53a"; } +.bi-shield-shaded::before { content: "\f53b"; } +.bi-shield-slash-fill::before { content: "\f53c"; } +.bi-shield-slash::before { content: "\f53d"; } +.bi-shield-x::before { content: "\f53e"; } +.bi-shield::before { content: "\f53f"; } +.bi-shift-fill::before { content: "\f540"; } +.bi-shift::before { content: "\f541"; } +.bi-shop-window::before { content: "\f542"; } +.bi-shop::before { content: "\f543"; } +.bi-shuffle::before { content: "\f544"; } +.bi-signpost-2-fill::before { content: "\f545"; } +.bi-signpost-2::before { content: "\f546"; } +.bi-signpost-fill::before { content: "\f547"; } +.bi-signpost-split-fill::before { content: "\f548"; } +.bi-signpost-split::before { content: "\f549"; } +.bi-signpost::before { content: "\f54a"; } +.bi-sim-fill::before { content: "\f54b"; } +.bi-sim::before { content: "\f54c"; } +.bi-skip-backward-btn-fill::before { content: "\f54d"; } +.bi-skip-backward-btn::before { content: "\f54e"; } +.bi-skip-backward-circle-fill::before { content: "\f54f"; } +.bi-skip-backward-circle::before { content: "\f550"; } +.bi-skip-backward-fill::before { content: "\f551"; } +.bi-skip-backward::before { content: "\f552"; } +.bi-skip-end-btn-fill::before { content: "\f553"; } +.bi-skip-end-btn::before { content: "\f554"; } +.bi-skip-end-circle-fill::before { content: "\f555"; } +.bi-skip-end-circle::before { content: "\f556"; } +.bi-skip-end-fill::before { content: "\f557"; } +.bi-skip-end::before { content: "\f558"; } +.bi-skip-forward-btn-fill::before { content: "\f559"; } +.bi-skip-forward-btn::before { content: "\f55a"; } +.bi-skip-forward-circle-fill::before { content: "\f55b"; } +.bi-skip-forward-circle::before { content: "\f55c"; } +.bi-skip-forward-fill::before { content: "\f55d"; } +.bi-skip-forward::before { content: "\f55e"; } +.bi-skip-start-btn-fill::before { content: "\f55f"; } +.bi-skip-start-btn::before { content: "\f560"; } +.bi-skip-start-circle-fill::before { content: "\f561"; } +.bi-skip-start-circle::before { content: "\f562"; } +.bi-skip-start-fill::before { content: "\f563"; } +.bi-skip-start::before { content: "\f564"; } +.bi-slack::before { content: "\f565"; } +.bi-slash-circle-fill::before { content: "\f566"; } +.bi-slash-circle::before { content: "\f567"; } +.bi-slash-square-fill::before { content: "\f568"; } +.bi-slash-square::before { content: "\f569"; } +.bi-slash::before { content: "\f56a"; } +.bi-sliders::before { content: "\f56b"; } +.bi-smartwatch::before { content: "\f56c"; } +.bi-snow::before { content: "\f56d"; } +.bi-snow2::before { content: "\f56e"; } +.bi-snow3::before { content: "\f56f"; } +.bi-sort-alpha-down-alt::before { content: "\f570"; } +.bi-sort-alpha-down::before { content: "\f571"; } +.bi-sort-alpha-up-alt::before { content: "\f572"; } +.bi-sort-alpha-up::before { content: "\f573"; } +.bi-sort-down-alt::before { content: "\f574"; } +.bi-sort-down::before { content: "\f575"; } +.bi-sort-numeric-down-alt::before { content: "\f576"; } +.bi-sort-numeric-down::before { content: "\f577"; } +.bi-sort-numeric-up-alt::before { content: "\f578"; } +.bi-sort-numeric-up::before { content: "\f579"; } +.bi-sort-up-alt::before { content: "\f57a"; } +.bi-sort-up::before { content: "\f57b"; } +.bi-soundwave::before { content: "\f57c"; } +.bi-speaker-fill::before { content: "\f57d"; } +.bi-speaker::before { content: "\f57e"; } +.bi-speedometer::before { content: "\f57f"; } +.bi-speedometer2::before { content: "\f580"; } +.bi-spellcheck::before { content: "\f581"; } +.bi-square-fill::before { content: "\f582"; } +.bi-square-half::before { content: "\f583"; } +.bi-square::before { content: "\f584"; } +.bi-stack::before { content: "\f585"; } +.bi-star-fill::before { content: "\f586"; } +.bi-star-half::before { content: "\f587"; } +.bi-star::before { content: "\f588"; } +.bi-stars::before { content: "\f589"; } +.bi-stickies-fill::before { content: "\f58a"; } +.bi-stickies::before { content: "\f58b"; } +.bi-sticky-fill::before { content: "\f58c"; } +.bi-sticky::before { content: "\f58d"; } +.bi-stop-btn-fill::before { content: "\f58e"; } +.bi-stop-btn::before { content: "\f58f"; } +.bi-stop-circle-fill::before { content: "\f590"; } +.bi-stop-circle::before { content: "\f591"; } +.bi-stop-fill::before { content: "\f592"; } +.bi-stop::before { content: "\f593"; } +.bi-stoplights-fill::before { content: "\f594"; } +.bi-stoplights::before { content: "\f595"; } +.bi-stopwatch-fill::before { content: "\f596"; } +.bi-stopwatch::before { content: "\f597"; } +.bi-subtract::before { content: "\f598"; } +.bi-suit-club-fill::before { content: "\f599"; } +.bi-suit-club::before { content: "\f59a"; } +.bi-suit-diamond-fill::before { content: "\f59b"; } +.bi-suit-diamond::before { content: "\f59c"; } +.bi-suit-heart-fill::before { content: "\f59d"; } +.bi-suit-heart::before { content: "\f59e"; } +.bi-suit-spade-fill::before { content: "\f59f"; } +.bi-suit-spade::before { content: "\f5a0"; } +.bi-sun-fill::before { content: "\f5a1"; } +.bi-sun::before { content: "\f5a2"; } +.bi-sunglasses::before { content: "\f5a3"; } +.bi-sunrise-fill::before { content: "\f5a4"; } +.bi-sunrise::before { content: "\f5a5"; } +.bi-sunset-fill::before { content: "\f5a6"; } +.bi-sunset::before { content: "\f5a7"; } +.bi-symmetry-horizontal::before { content: "\f5a8"; } +.bi-symmetry-vertical::before { content: "\f5a9"; } +.bi-table::before { content: "\f5aa"; } +.bi-tablet-fill::before { content: "\f5ab"; } +.bi-tablet-landscape-fill::before { content: "\f5ac"; } +.bi-tablet-landscape::before { content: "\f5ad"; } +.bi-tablet::before { content: "\f5ae"; } +.bi-tag-fill::before { content: "\f5af"; } +.bi-tag::before { content: "\f5b0"; } +.bi-tags-fill::before { content: "\f5b1"; } +.bi-tags::before { content: "\f5b2"; } +.bi-telegram::before { content: "\f5b3"; } +.bi-telephone-fill::before { content: "\f5b4"; } +.bi-telephone-forward-fill::before { content: "\f5b5"; } +.bi-telephone-forward::before { content: "\f5b6"; } +.bi-telephone-inbound-fill::before { content: "\f5b7"; } +.bi-telephone-inbound::before { content: "\f5b8"; } +.bi-telephone-minus-fill::before { content: "\f5b9"; } +.bi-telephone-minus::before { content: "\f5ba"; } +.bi-telephone-outbound-fill::before { content: "\f5bb"; } +.bi-telephone-outbound::before { content: "\f5bc"; } +.bi-telephone-plus-fill::before { content: "\f5bd"; } +.bi-telephone-plus::before { content: "\f5be"; } +.bi-telephone-x-fill::before { content: "\f5bf"; } +.bi-telephone-x::before { content: "\f5c0"; } +.bi-telephone::before { content: "\f5c1"; } +.bi-terminal-fill::before { content: "\f5c2"; } +.bi-terminal::before { content: "\f5c3"; } +.bi-text-center::before { content: "\f5c4"; } +.bi-text-indent-left::before { content: "\f5c5"; } +.bi-text-indent-right::before { content: "\f5c6"; } +.bi-text-left::before { content: "\f5c7"; } +.bi-text-paragraph::before { content: "\f5c8"; } +.bi-text-right::before { content: "\f5c9"; } +.bi-textarea-resize::before { content: "\f5ca"; } +.bi-textarea-t::before { content: "\f5cb"; } +.bi-textarea::before { content: "\f5cc"; } +.bi-thermometer-half::before { content: "\f5cd"; } +.bi-thermometer-high::before { content: "\f5ce"; } +.bi-thermometer-low::before { content: "\f5cf"; } +.bi-thermometer-snow::before { content: "\f5d0"; } +.bi-thermometer-sun::before { content: "\f5d1"; } +.bi-thermometer::before { content: "\f5d2"; } +.bi-three-dots-vertical::before { content: "\f5d3"; } +.bi-three-dots::before { content: "\f5d4"; } +.bi-toggle-off::before { content: "\f5d5"; } +.bi-toggle-on::before { content: "\f5d6"; } +.bi-toggle2-off::before { content: "\f5d7"; } +.bi-toggle2-on::before { content: "\f5d8"; } +.bi-toggles::before { content: "\f5d9"; } +.bi-toggles2::before { content: "\f5da"; } +.bi-tools::before { content: "\f5db"; } +.bi-tornado::before { content: "\f5dc"; } +.bi-trash-fill::before { content: "\f5dd"; } +.bi-trash::before { content: "\f5de"; } +.bi-trash2-fill::before { content: "\f5df"; } +.bi-trash2::before { content: "\f5e0"; } +.bi-tree-fill::before { content: "\f5e1"; } +.bi-tree::before { content: "\f5e2"; } +.bi-triangle-fill::before { content: "\f5e3"; } +.bi-triangle-half::before { content: "\f5e4"; } +.bi-triangle::before { content: "\f5e5"; } +.bi-trophy-fill::before { content: "\f5e6"; } +.bi-trophy::before { content: "\f5e7"; } +.bi-tropical-storm::before { content: "\f5e8"; } +.bi-truck-flatbed::before { content: "\f5e9"; } +.bi-truck::before { content: "\f5ea"; } +.bi-tsunami::before { content: "\f5eb"; } +.bi-tv-fill::before { content: "\f5ec"; } +.bi-tv::before { content: "\f5ed"; } +.bi-twitch::before { content: "\f5ee"; } +.bi-twitter::before { content: "\f5ef"; } +.bi-type-bold::before { content: "\f5f0"; } +.bi-type-h1::before { content: "\f5f1"; } +.bi-type-h2::before { content: "\f5f2"; } +.bi-type-h3::before { content: "\f5f3"; } +.bi-type-italic::before { content: "\f5f4"; } +.bi-type-strikethrough::before { content: "\f5f5"; } +.bi-type-underline::before { content: "\f5f6"; } +.bi-type::before { content: "\f5f7"; } +.bi-ui-checks-grid::before { content: "\f5f8"; } +.bi-ui-checks::before { content: "\f5f9"; } +.bi-ui-radios-grid::before { content: "\f5fa"; } +.bi-ui-radios::before { content: "\f5fb"; } +.bi-umbrella-fill::before { content: "\f5fc"; } +.bi-umbrella::before { content: "\f5fd"; } +.bi-union::before { content: "\f5fe"; } +.bi-unlock-fill::before { content: "\f5ff"; } +.bi-unlock::before { content: "\f600"; } +.bi-upc-scan::before { content: "\f601"; } +.bi-upc::before { content: "\f602"; } +.bi-upload::before { content: "\f603"; } +.bi-vector-pen::before { content: "\f604"; } +.bi-view-list::before { content: "\f605"; } +.bi-view-stacked::before { content: "\f606"; } +.bi-vinyl-fill::before { content: "\f607"; } +.bi-vinyl::before { content: "\f608"; } +.bi-voicemail::before { content: "\f609"; } +.bi-volume-down-fill::before { content: "\f60a"; } +.bi-volume-down::before { content: "\f60b"; } +.bi-volume-mute-fill::before { content: "\f60c"; } +.bi-volume-mute::before { content: "\f60d"; } +.bi-volume-off-fill::before { content: "\f60e"; } +.bi-volume-off::before { content: "\f60f"; } +.bi-volume-up-fill::before { content: "\f610"; } +.bi-volume-up::before { content: "\f611"; } +.bi-vr::before { content: "\f612"; } +.bi-wallet-fill::before { content: "\f613"; } +.bi-wallet::before { content: "\f614"; } +.bi-wallet2::before { content: "\f615"; } +.bi-watch::before { content: "\f616"; } +.bi-water::before { content: "\f617"; } +.bi-whatsapp::before { content: "\f618"; } +.bi-wifi-1::before { content: "\f619"; } +.bi-wifi-2::before { content: "\f61a"; } +.bi-wifi-off::before { content: "\f61b"; } +.bi-wifi::before { content: "\f61c"; } +.bi-wind::before { content: "\f61d"; } +.bi-window-dock::before { content: "\f61e"; } +.bi-window-sidebar::before { content: "\f61f"; } +.bi-window::before { content: "\f620"; } +.bi-wrench::before { content: "\f621"; } +.bi-x-circle-fill::before { content: "\f622"; } +.bi-x-circle::before { content: "\f623"; } +.bi-x-diamond-fill::before { content: "\f624"; } +.bi-x-diamond::before { content: "\f625"; } +.bi-x-octagon-fill::before { content: "\f626"; } +.bi-x-octagon::before { content: "\f627"; } +.bi-x-square-fill::before { content: "\f628"; } +.bi-x-square::before { content: "\f629"; } +.bi-x::before { content: "\f62a"; } +.bi-youtube::before { content: "\f62b"; } +.bi-zoom-in::before { content: "\f62c"; } +.bi-zoom-out::before { content: "\f62d"; } +.bi-bank::before { content: "\f62e"; } +.bi-bank2::before { content: "\f62f"; } +.bi-bell-slash-fill::before { content: "\f630"; } +.bi-bell-slash::before { content: "\f631"; } +.bi-cash-coin::before { content: "\f632"; } +.bi-check-lg::before { content: "\f633"; } +.bi-coin::before { content: "\f634"; } +.bi-currency-bitcoin::before { content: "\f635"; } +.bi-currency-dollar::before { content: "\f636"; } +.bi-currency-euro::before { content: "\f637"; } +.bi-currency-exchange::before { content: "\f638"; } +.bi-currency-pound::before { content: "\f639"; } +.bi-currency-yen::before { content: "\f63a"; } +.bi-dash-lg::before { content: "\f63b"; } +.bi-exclamation-lg::before { content: "\f63c"; } +.bi-file-earmark-pdf-fill::before { content: "\f63d"; } +.bi-file-earmark-pdf::before { content: "\f63e"; } +.bi-file-pdf-fill::before { content: "\f63f"; } +.bi-file-pdf::before { content: "\f640"; } +.bi-gender-ambiguous::before { content: "\f641"; } +.bi-gender-female::before { content: "\f642"; } +.bi-gender-male::before { content: "\f643"; } +.bi-gender-trans::before { content: "\f644"; } +.bi-headset-vr::before { content: "\f645"; } +.bi-info-lg::before { content: "\f646"; } +.bi-mastodon::before { content: "\f647"; } +.bi-messenger::before { content: "\f648"; } +.bi-piggy-bank-fill::before { content: "\f649"; } +.bi-piggy-bank::before { content: "\f64a"; } +.bi-pin-map-fill::before { content: "\f64b"; } +.bi-pin-map::before { content: "\f64c"; } +.bi-plus-lg::before { content: "\f64d"; } +.bi-question-lg::before { content: "\f64e"; } +.bi-recycle::before { content: "\f64f"; } +.bi-reddit::before { content: "\f650"; } +.bi-safe-fill::before { content: "\f651"; } +.bi-safe2-fill::before { content: "\f652"; } +.bi-safe2::before { content: "\f653"; } +.bi-sd-card-fill::before { content: "\f654"; } +.bi-sd-card::before { content: "\f655"; } +.bi-skype::before { content: "\f656"; } +.bi-slash-lg::before { content: "\f657"; } +.bi-translate::before { content: "\f658"; } +.bi-x-lg::before { content: "\f659"; } +.bi-safe::before { content: "\f65a"; } +.bi-apple::before { content: "\f65b"; } +.bi-microsoft::before { content: "\f65d"; } +.bi-windows::before { content: "\f65e"; } +.bi-behance::before { content: "\f65c"; } +.bi-dribbble::before { content: "\f65f"; } +.bi-line::before { content: "\f660"; } +.bi-medium::before { content: "\f661"; } +.bi-paypal::before { content: "\f662"; } +.bi-pinterest::before { content: "\f663"; } +.bi-signal::before { content: "\f664"; } +.bi-snapchat::before { content: "\f665"; } +.bi-spotify::before { content: "\f666"; } +.bi-stack-overflow::before { content: "\f667"; } +.bi-strava::before { content: "\f668"; } +.bi-wordpress::before { content: "\f669"; } +.bi-vimeo::before { content: "\f66a"; } +.bi-activity::before { content: "\f66b"; } +.bi-easel2-fill::before { content: "\f66c"; } +.bi-easel2::before { content: "\f66d"; } +.bi-easel3-fill::before { content: "\f66e"; } +.bi-easel3::before { content: "\f66f"; } +.bi-fan::before { content: "\f670"; } +.bi-fingerprint::before { content: "\f671"; } +.bi-graph-down-arrow::before { content: "\f672"; } +.bi-graph-up-arrow::before { content: "\f673"; } +.bi-hypnotize::before { content: "\f674"; } +.bi-magic::before { content: "\f675"; } +.bi-person-rolodex::before { content: "\f676"; } +.bi-person-video::before { content: "\f677"; } +.bi-person-video2::before { content: "\f678"; } +.bi-person-video3::before { content: "\f679"; } +.bi-person-workspace::before { content: "\f67a"; } +.bi-radioactive::before { content: "\f67b"; } +.bi-webcam-fill::before { content: "\f67c"; } +.bi-webcam::before { content: "\f67d"; } +.bi-yin-yang::before { content: "\f67e"; } +.bi-bandaid-fill::before { content: "\f680"; } +.bi-bandaid::before { content: "\f681"; } +.bi-bluetooth::before { content: "\f682"; } +.bi-body-text::before { content: "\f683"; } +.bi-boombox::before { content: "\f684"; } +.bi-boxes::before { content: "\f685"; } +.bi-dpad-fill::before { content: "\f686"; } +.bi-dpad::before { content: "\f687"; } +.bi-ear-fill::before { content: "\f688"; } +.bi-ear::before { content: "\f689"; } +.bi-envelope-check-fill::before { content: "\f68b"; } +.bi-envelope-check::before { content: "\f68c"; } +.bi-envelope-dash-fill::before { content: "\f68e"; } +.bi-envelope-dash::before { content: "\f68f"; } +.bi-envelope-exclamation-fill::before { content: "\f691"; } +.bi-envelope-exclamation::before { content: "\f692"; } +.bi-envelope-plus-fill::before { content: "\f693"; } +.bi-envelope-plus::before { content: "\f694"; } +.bi-envelope-slash-fill::before { content: "\f696"; } +.bi-envelope-slash::before { content: "\f697"; } +.bi-envelope-x-fill::before { content: "\f699"; } +.bi-envelope-x::before { content: "\f69a"; } +.bi-explicit-fill::before { content: "\f69b"; } +.bi-explicit::before { content: "\f69c"; } +.bi-git::before { content: "\f69d"; } +.bi-infinity::before { content: "\f69e"; } +.bi-list-columns-reverse::before { content: "\f69f"; } +.bi-list-columns::before { content: "\f6a0"; } +.bi-meta::before { content: "\f6a1"; } +.bi-nintendo-switch::before { content: "\f6a4"; } +.bi-pc-display-horizontal::before { content: "\f6a5"; } +.bi-pc-display::before { content: "\f6a6"; } +.bi-pc-horizontal::before { content: "\f6a7"; } +.bi-pc::before { content: "\f6a8"; } +.bi-playstation::before { content: "\f6a9"; } +.bi-plus-slash-minus::before { content: "\f6aa"; } +.bi-projector-fill::before { content: "\f6ab"; } +.bi-projector::before { content: "\f6ac"; } +.bi-qr-code-scan::before { content: "\f6ad"; } +.bi-qr-code::before { content: "\f6ae"; } +.bi-quora::before { content: "\f6af"; } +.bi-quote::before { content: "\f6b0"; } +.bi-robot::before { content: "\f6b1"; } +.bi-send-check-fill::before { content: "\f6b2"; } +.bi-send-check::before { content: "\f6b3"; } +.bi-send-dash-fill::before { content: "\f6b4"; } +.bi-send-dash::before { content: "\f6b5"; } +.bi-send-exclamation-fill::before { content: "\f6b7"; } +.bi-send-exclamation::before { content: "\f6b8"; } +.bi-send-fill::before { content: "\f6b9"; } +.bi-send-plus-fill::before { content: "\f6ba"; } +.bi-send-plus::before { content: "\f6bb"; } +.bi-send-slash-fill::before { content: "\f6bc"; } +.bi-send-slash::before { content: "\f6bd"; } +.bi-send-x-fill::before { content: "\f6be"; } +.bi-send-x::before { content: "\f6bf"; } +.bi-send::before { content: "\f6c0"; } +.bi-steam::before { content: "\f6c1"; } +.bi-terminal-dash::before { content: "\f6c3"; } +.bi-terminal-plus::before { content: "\f6c4"; } +.bi-terminal-split::before { content: "\f6c5"; } +.bi-ticket-detailed-fill::before { content: "\f6c6"; } +.bi-ticket-detailed::before { content: "\f6c7"; } +.bi-ticket-fill::before { content: "\f6c8"; } +.bi-ticket-perforated-fill::before { content: "\f6c9"; } +.bi-ticket-perforated::before { content: "\f6ca"; } +.bi-ticket::before { content: "\f6cb"; } +.bi-tiktok::before { content: "\f6cc"; } +.bi-window-dash::before { content: "\f6cd"; } +.bi-window-desktop::before { content: "\f6ce"; } +.bi-window-fullscreen::before { content: "\f6cf"; } +.bi-window-plus::before { content: "\f6d0"; } +.bi-window-split::before { content: "\f6d1"; } +.bi-window-stack::before { content: "\f6d2"; } +.bi-window-x::before { content: "\f6d3"; } +.bi-xbox::before { content: "\f6d4"; } +.bi-ethernet::before { content: "\f6d5"; } +.bi-hdmi-fill::before { content: "\f6d6"; } +.bi-hdmi::before { content: "\f6d7"; } +.bi-usb-c-fill::before { content: "\f6d8"; } +.bi-usb-c::before { content: "\f6d9"; } +.bi-usb-fill::before { content: "\f6da"; } +.bi-usb-plug-fill::before { content: "\f6db"; } +.bi-usb-plug::before { content: "\f6dc"; } +.bi-usb-symbol::before { content: "\f6dd"; } +.bi-usb::before { content: "\f6de"; } +.bi-boombox-fill::before { content: "\f6df"; } +.bi-displayport::before { content: "\f6e1"; } +.bi-gpu-card::before { content: "\f6e2"; } +.bi-memory::before { content: "\f6e3"; } +.bi-modem-fill::before { content: "\f6e4"; } +.bi-modem::before { content: "\f6e5"; } +.bi-motherboard-fill::before { content: "\f6e6"; } +.bi-motherboard::before { content: "\f6e7"; } +.bi-optical-audio-fill::before { content: "\f6e8"; } +.bi-optical-audio::before { content: "\f6e9"; } +.bi-pci-card::before { content: "\f6ea"; } +.bi-router-fill::before { content: "\f6eb"; } +.bi-router::before { content: "\f6ec"; } +.bi-thunderbolt-fill::before { content: "\f6ef"; } +.bi-thunderbolt::before { content: "\f6f0"; } +.bi-usb-drive-fill::before { content: "\f6f1"; } +.bi-usb-drive::before { content: "\f6f2"; } +.bi-usb-micro-fill::before { content: "\f6f3"; } +.bi-usb-micro::before { content: "\f6f4"; } +.bi-usb-mini-fill::before { content: "\f6f5"; } +.bi-usb-mini::before { content: "\f6f6"; } +.bi-cloud-haze2::before { content: "\f6f7"; } +.bi-device-hdd-fill::before { content: "\f6f8"; } +.bi-device-hdd::before { content: "\f6f9"; } +.bi-device-ssd-fill::before { content: "\f6fa"; } +.bi-device-ssd::before { content: "\f6fb"; } +.bi-displayport-fill::before { content: "\f6fc"; } +.bi-mortarboard-fill::before { content: "\f6fd"; } +.bi-mortarboard::before { content: "\f6fe"; } +.bi-terminal-x::before { content: "\f6ff"; } +.bi-arrow-through-heart-fill::before { content: "\f700"; } +.bi-arrow-through-heart::before { content: "\f701"; } +.bi-badge-sd-fill::before { content: "\f702"; } +.bi-badge-sd::before { content: "\f703"; } +.bi-bag-heart-fill::before { content: "\f704"; } +.bi-bag-heart::before { content: "\f705"; } +.bi-balloon-fill::before { content: "\f706"; } +.bi-balloon-heart-fill::before { content: "\f707"; } +.bi-balloon-heart::before { content: "\f708"; } +.bi-balloon::before { content: "\f709"; } +.bi-box2-fill::before { content: "\f70a"; } +.bi-box2-heart-fill::before { content: "\f70b"; } +.bi-box2-heart::before { content: "\f70c"; } +.bi-box2::before { content: "\f70d"; } +.bi-braces-asterisk::before { content: "\f70e"; } +.bi-calendar-heart-fill::before { content: "\f70f"; } +.bi-calendar-heart::before { content: "\f710"; } +.bi-calendar2-heart-fill::before { content: "\f711"; } +.bi-calendar2-heart::before { content: "\f712"; } +.bi-chat-heart-fill::before { content: "\f713"; } +.bi-chat-heart::before { content: "\f714"; } +.bi-chat-left-heart-fill::before { content: "\f715"; } +.bi-chat-left-heart::before { content: "\f716"; } +.bi-chat-right-heart-fill::before { content: "\f717"; } +.bi-chat-right-heart::before { content: "\f718"; } +.bi-chat-square-heart-fill::before { content: "\f719"; } +.bi-chat-square-heart::before { content: "\f71a"; } +.bi-clipboard-check-fill::before { content: "\f71b"; } +.bi-clipboard-data-fill::before { content: "\f71c"; } +.bi-clipboard-fill::before { content: "\f71d"; } +.bi-clipboard-heart-fill::before { content: "\f71e"; } +.bi-clipboard-heart::before { content: "\f71f"; } +.bi-clipboard-minus-fill::before { content: "\f720"; } +.bi-clipboard-plus-fill::before { content: "\f721"; } +.bi-clipboard-pulse::before { content: "\f722"; } +.bi-clipboard-x-fill::before { content: "\f723"; } +.bi-clipboard2-check-fill::before { content: "\f724"; } +.bi-clipboard2-check::before { content: "\f725"; } +.bi-clipboard2-data-fill::before { content: "\f726"; } +.bi-clipboard2-data::before { content: "\f727"; } +.bi-clipboard2-fill::before { content: "\f728"; } +.bi-clipboard2-heart-fill::before { content: "\f729"; } +.bi-clipboard2-heart::before { content: "\f72a"; } +.bi-clipboard2-minus-fill::before { content: "\f72b"; } +.bi-clipboard2-minus::before { content: "\f72c"; } +.bi-clipboard2-plus-fill::before { content: "\f72d"; } +.bi-clipboard2-plus::before { content: "\f72e"; } +.bi-clipboard2-pulse-fill::before { content: "\f72f"; } +.bi-clipboard2-pulse::before { content: "\f730"; } +.bi-clipboard2-x-fill::before { content: "\f731"; } +.bi-clipboard2-x::before { content: "\f732"; } +.bi-clipboard2::before { content: "\f733"; } +.bi-emoji-kiss-fill::before { content: "\f734"; } +.bi-emoji-kiss::before { content: "\f735"; } +.bi-envelope-heart-fill::before { content: "\f736"; } +.bi-envelope-heart::before { content: "\f737"; } +.bi-envelope-open-heart-fill::before { content: "\f738"; } +.bi-envelope-open-heart::before { content: "\f739"; } +.bi-envelope-paper-fill::before { content: "\f73a"; } +.bi-envelope-paper-heart-fill::before { content: "\f73b"; } +.bi-envelope-paper-heart::before { content: "\f73c"; } +.bi-envelope-paper::before { content: "\f73d"; } +.bi-filetype-aac::before { content: "\f73e"; } +.bi-filetype-ai::before { content: "\f73f"; } +.bi-filetype-bmp::before { content: "\f740"; } +.bi-filetype-cs::before { content: "\f741"; } +.bi-filetype-css::before { content: "\f742"; } +.bi-filetype-csv::before { content: "\f743"; } +.bi-filetype-doc::before { content: "\f744"; } +.bi-filetype-docx::before { content: "\f745"; } +.bi-filetype-exe::before { content: "\f746"; } +.bi-filetype-gif::before { content: "\f747"; } +.bi-filetype-heic::before { content: "\f748"; } +.bi-filetype-html::before { content: "\f749"; } +.bi-filetype-java::before { content: "\f74a"; } +.bi-filetype-jpg::before { content: "\f74b"; } +.bi-filetype-js::before { content: "\f74c"; } +.bi-filetype-jsx::before { content: "\f74d"; } +.bi-filetype-key::before { content: "\f74e"; } +.bi-filetype-m4p::before { content: "\f74f"; } +.bi-filetype-md::before { content: "\f750"; } +.bi-filetype-mdx::before { content: "\f751"; } +.bi-filetype-mov::before { content: "\f752"; } +.bi-filetype-mp3::before { content: "\f753"; } +.bi-filetype-mp4::before { content: "\f754"; } +.bi-filetype-otf::before { content: "\f755"; } +.bi-filetype-pdf::before { content: "\f756"; } +.bi-filetype-php::before { content: "\f757"; } +.bi-filetype-png::before { content: "\f758"; } +.bi-filetype-ppt::before { content: "\f75a"; } +.bi-filetype-psd::before { content: "\f75b"; } +.bi-filetype-py::before { content: "\f75c"; } +.bi-filetype-raw::before { content: "\f75d"; } +.bi-filetype-rb::before { content: "\f75e"; } +.bi-filetype-sass::before { content: "\f75f"; } +.bi-filetype-scss::before { content: "\f760"; } +.bi-filetype-sh::before { content: "\f761"; } +.bi-filetype-svg::before { content: "\f762"; } +.bi-filetype-tiff::before { content: "\f763"; } +.bi-filetype-tsx::before { content: "\f764"; } +.bi-filetype-ttf::before { content: "\f765"; } +.bi-filetype-txt::before { content: "\f766"; } +.bi-filetype-wav::before { content: "\f767"; } +.bi-filetype-woff::before { content: "\f768"; } +.bi-filetype-xls::before { content: "\f76a"; } +.bi-filetype-xml::before { content: "\f76b"; } +.bi-filetype-yml::before { content: "\f76c"; } +.bi-heart-arrow::before { content: "\f76d"; } +.bi-heart-pulse-fill::before { content: "\f76e"; } +.bi-heart-pulse::before { content: "\f76f"; } +.bi-heartbreak-fill::before { content: "\f770"; } +.bi-heartbreak::before { content: "\f771"; } +.bi-hearts::before { content: "\f772"; } +.bi-hospital-fill::before { content: "\f773"; } +.bi-hospital::before { content: "\f774"; } +.bi-house-heart-fill::before { content: "\f775"; } +.bi-house-heart::before { content: "\f776"; } +.bi-incognito::before { content: "\f777"; } +.bi-magnet-fill::before { content: "\f778"; } +.bi-magnet::before { content: "\f779"; } +.bi-person-heart::before { content: "\f77a"; } +.bi-person-hearts::before { content: "\f77b"; } +.bi-phone-flip::before { content: "\f77c"; } +.bi-plugin::before { content: "\f77d"; } +.bi-postage-fill::before { content: "\f77e"; } +.bi-postage-heart-fill::before { content: "\f77f"; } +.bi-postage-heart::before { content: "\f780"; } +.bi-postage::before { content: "\f781"; } +.bi-postcard-fill::before { content: "\f782"; } +.bi-postcard-heart-fill::before { content: "\f783"; } +.bi-postcard-heart::before { content: "\f784"; } +.bi-postcard::before { content: "\f785"; } +.bi-search-heart-fill::before { content: "\f786"; } +.bi-search-heart::before { content: "\f787"; } +.bi-sliders2-vertical::before { content: "\f788"; } +.bi-sliders2::before { content: "\f789"; } +.bi-trash3-fill::before { content: "\f78a"; } +.bi-trash3::before { content: "\f78b"; } +.bi-valentine::before { content: "\f78c"; } +.bi-valentine2::before { content: "\f78d"; } +.bi-wrench-adjustable-circle-fill::before { content: "\f78e"; } +.bi-wrench-adjustable-circle::before { content: "\f78f"; } +.bi-wrench-adjustable::before { content: "\f790"; } +.bi-filetype-json::before { content: "\f791"; } +.bi-filetype-pptx::before { content: "\f792"; } +.bi-filetype-xlsx::before { content: "\f793"; } +.bi-1-circle-fill::before { content: "\f796"; } +.bi-1-circle::before { content: "\f797"; } +.bi-1-square-fill::before { content: "\f798"; } +.bi-1-square::before { content: "\f799"; } +.bi-2-circle-fill::before { content: "\f79c"; } +.bi-2-circle::before { content: "\f79d"; } +.bi-2-square-fill::before { content: "\f79e"; } +.bi-2-square::before { content: "\f79f"; } +.bi-3-circle-fill::before { content: "\f7a2"; } +.bi-3-circle::before { content: "\f7a3"; } +.bi-3-square-fill::before { content: "\f7a4"; } +.bi-3-square::before { content: "\f7a5"; } +.bi-4-circle-fill::before { content: "\f7a8"; } +.bi-4-circle::before { content: "\f7a9"; } +.bi-4-square-fill::before { content: "\f7aa"; } +.bi-4-square::before { content: "\f7ab"; } +.bi-5-circle-fill::before { content: "\f7ae"; } +.bi-5-circle::before { content: "\f7af"; } +.bi-5-square-fill::before { content: "\f7b0"; } +.bi-5-square::before { content: "\f7b1"; } +.bi-6-circle-fill::before { content: "\f7b4"; } +.bi-6-circle::before { content: "\f7b5"; } +.bi-6-square-fill::before { content: "\f7b6"; } +.bi-6-square::before { content: "\f7b7"; } +.bi-7-circle-fill::before { content: "\f7ba"; } +.bi-7-circle::before { content: "\f7bb"; } +.bi-7-square-fill::before { content: "\f7bc"; } +.bi-7-square::before { content: "\f7bd"; } +.bi-8-circle-fill::before { content: "\f7c0"; } +.bi-8-circle::before { content: "\f7c1"; } +.bi-8-square-fill::before { content: "\f7c2"; } +.bi-8-square::before { content: "\f7c3"; } +.bi-9-circle-fill::before { content: "\f7c6"; } +.bi-9-circle::before { content: "\f7c7"; } +.bi-9-square-fill::before { content: "\f7c8"; } +.bi-9-square::before { content: "\f7c9"; } +.bi-airplane-engines-fill::before { content: "\f7ca"; } +.bi-airplane-engines::before { content: "\f7cb"; } +.bi-airplane-fill::before { content: "\f7cc"; } +.bi-airplane::before { content: "\f7cd"; } +.bi-alexa::before { content: "\f7ce"; } +.bi-alipay::before { content: "\f7cf"; } +.bi-android::before { content: "\f7d0"; } +.bi-android2::before { content: "\f7d1"; } +.bi-box-fill::before { content: "\f7d2"; } +.bi-box-seam-fill::before { content: "\f7d3"; } +.bi-browser-chrome::before { content: "\f7d4"; } +.bi-browser-edge::before { content: "\f7d5"; } +.bi-browser-firefox::before { content: "\f7d6"; } +.bi-browser-safari::before { content: "\f7d7"; } +.bi-c-circle-fill::before { content: "\f7da"; } +.bi-c-circle::before { content: "\f7db"; } +.bi-c-square-fill::before { content: "\f7dc"; } +.bi-c-square::before { content: "\f7dd"; } +.bi-capsule-pill::before { content: "\f7de"; } +.bi-capsule::before { content: "\f7df"; } +.bi-car-front-fill::before { content: "\f7e0"; } +.bi-car-front::before { content: "\f7e1"; } +.bi-cassette-fill::before { content: "\f7e2"; } +.bi-cassette::before { content: "\f7e3"; } +.bi-cc-circle-fill::before { content: "\f7e6"; } +.bi-cc-circle::before { content: "\f7e7"; } +.bi-cc-square-fill::before { content: "\f7e8"; } +.bi-cc-square::before { content: "\f7e9"; } +.bi-cup-hot-fill::before { content: "\f7ea"; } +.bi-cup-hot::before { content: "\f7eb"; } +.bi-currency-rupee::before { content: "\f7ec"; } +.bi-dropbox::before { content: "\f7ed"; } +.bi-escape::before { content: "\f7ee"; } +.bi-fast-forward-btn-fill::before { content: "\f7ef"; } +.bi-fast-forward-btn::before { content: "\f7f0"; } +.bi-fast-forward-circle-fill::before { content: "\f7f1"; } +.bi-fast-forward-circle::before { content: "\f7f2"; } +.bi-fast-forward-fill::before { content: "\f7f3"; } +.bi-fast-forward::before { content: "\f7f4"; } +.bi-filetype-sql::before { content: "\f7f5"; } +.bi-fire::before { content: "\f7f6"; } +.bi-google-play::before { content: "\f7f7"; } +.bi-h-circle-fill::before { content: "\f7fa"; } +.bi-h-circle::before { content: "\f7fb"; } +.bi-h-square-fill::before { content: "\f7fc"; } +.bi-h-square::before { content: "\f7fd"; } +.bi-indent::before { content: "\f7fe"; } +.bi-lungs-fill::before { content: "\f7ff"; } +.bi-lungs::before { content: "\f800"; } +.bi-microsoft-teams::before { content: "\f801"; } +.bi-p-circle-fill::before { content: "\f804"; } +.bi-p-circle::before { content: "\f805"; } +.bi-p-square-fill::before { content: "\f806"; } +.bi-p-square::before { content: "\f807"; } +.bi-pass-fill::before { content: "\f808"; } +.bi-pass::before { content: "\f809"; } +.bi-prescription::before { content: "\f80a"; } +.bi-prescription2::before { content: "\f80b"; } +.bi-r-circle-fill::before { content: "\f80e"; } +.bi-r-circle::before { content: "\f80f"; } +.bi-r-square-fill::before { content: "\f810"; } +.bi-r-square::before { content: "\f811"; } +.bi-repeat-1::before { content: "\f812"; } +.bi-repeat::before { content: "\f813"; } +.bi-rewind-btn-fill::before { content: "\f814"; } +.bi-rewind-btn::before { content: "\f815"; } +.bi-rewind-circle-fill::before { content: "\f816"; } +.bi-rewind-circle::before { content: "\f817"; } +.bi-rewind-fill::before { content: "\f818"; } +.bi-rewind::before { content: "\f819"; } +.bi-train-freight-front-fill::before { content: "\f81a"; } +.bi-train-freight-front::before { content: "\f81b"; } +.bi-train-front-fill::before { content: "\f81c"; } +.bi-train-front::before { content: "\f81d"; } +.bi-train-lightrail-front-fill::before { content: "\f81e"; } +.bi-train-lightrail-front::before { content: "\f81f"; } +.bi-truck-front-fill::before { content: "\f820"; } +.bi-truck-front::before { content: "\f821"; } +.bi-ubuntu::before { content: "\f822"; } +.bi-unindent::before { content: "\f823"; } +.bi-unity::before { content: "\f824"; } +.bi-universal-access-circle::before { content: "\f825"; } +.bi-universal-access::before { content: "\f826"; } +.bi-virus::before { content: "\f827"; } +.bi-virus2::before { content: "\f828"; } +.bi-wechat::before { content: "\f829"; } +.bi-yelp::before { content: "\f82a"; } +.bi-sign-stop-fill::before { content: "\f82b"; } +.bi-sign-stop-lights-fill::before { content: "\f82c"; } +.bi-sign-stop-lights::before { content: "\f82d"; } +.bi-sign-stop::before { content: "\f82e"; } +.bi-sign-turn-left-fill::before { content: "\f82f"; } +.bi-sign-turn-left::before { content: "\f830"; } +.bi-sign-turn-right-fill::before { content: "\f831"; } +.bi-sign-turn-right::before { content: "\f832"; } +.bi-sign-turn-slight-left-fill::before { content: "\f833"; } +.bi-sign-turn-slight-left::before { content: "\f834"; } +.bi-sign-turn-slight-right-fill::before { content: "\f835"; } +.bi-sign-turn-slight-right::before { content: "\f836"; } +.bi-sign-yield-fill::before { content: "\f837"; } +.bi-sign-yield::before { content: "\f838"; } +.bi-ev-station-fill::before { content: "\f839"; } +.bi-ev-station::before { content: "\f83a"; } +.bi-fuel-pump-diesel-fill::before { content: "\f83b"; } +.bi-fuel-pump-diesel::before { content: "\f83c"; } +.bi-fuel-pump-fill::before { content: "\f83d"; } +.bi-fuel-pump::before { content: "\f83e"; } +.bi-0-circle-fill::before { content: "\f83f"; } +.bi-0-circle::before { content: "\f840"; } +.bi-0-square-fill::before { content: "\f841"; } +.bi-0-square::before { content: "\f842"; } +.bi-rocket-fill::before { content: "\f843"; } +.bi-rocket-takeoff-fill::before { content: "\f844"; } +.bi-rocket-takeoff::before { content: "\f845"; } +.bi-rocket::before { content: "\f846"; } +.bi-stripe::before { content: "\f847"; } +.bi-subscript::before { content: "\f848"; } +.bi-superscript::before { content: "\f849"; } +.bi-trello::before { content: "\f84a"; } +.bi-envelope-at-fill::before { content: "\f84b"; } +.bi-envelope-at::before { content: "\f84c"; } +.bi-regex::before { content: "\f84d"; } +.bi-text-wrap::before { content: "\f84e"; } +.bi-sign-dead-end-fill::before { content: "\f84f"; } +.bi-sign-dead-end::before { content: "\f850"; } +.bi-sign-do-not-enter-fill::before { content: "\f851"; } +.bi-sign-do-not-enter::before { content: "\f852"; } +.bi-sign-intersection-fill::before { content: "\f853"; } +.bi-sign-intersection-side-fill::before { content: "\f854"; } +.bi-sign-intersection-side::before { content: "\f855"; } +.bi-sign-intersection-t-fill::before { content: "\f856"; } +.bi-sign-intersection-t::before { content: "\f857"; } +.bi-sign-intersection-y-fill::before { content: "\f858"; } +.bi-sign-intersection-y::before { content: "\f859"; } +.bi-sign-intersection::before { content: "\f85a"; } +.bi-sign-merge-left-fill::before { content: "\f85b"; } +.bi-sign-merge-left::before { content: "\f85c"; } +.bi-sign-merge-right-fill::before { content: "\f85d"; } +.bi-sign-merge-right::before { content: "\f85e"; } +.bi-sign-no-left-turn-fill::before { content: "\f85f"; } +.bi-sign-no-left-turn::before { content: "\f860"; } +.bi-sign-no-parking-fill::before { content: "\f861"; } +.bi-sign-no-parking::before { content: "\f862"; } +.bi-sign-no-right-turn-fill::before { content: "\f863"; } +.bi-sign-no-right-turn::before { content: "\f864"; } +.bi-sign-railroad-fill::before { content: "\f865"; } +.bi-sign-railroad::before { content: "\f866"; } +.bi-building-add::before { content: "\f867"; } +.bi-building-check::before { content: "\f868"; } +.bi-building-dash::before { content: "\f869"; } +.bi-building-down::before { content: "\f86a"; } +.bi-building-exclamation::before { content: "\f86b"; } +.bi-building-fill-add::before { content: "\f86c"; } +.bi-building-fill-check::before { content: "\f86d"; } +.bi-building-fill-dash::before { content: "\f86e"; } +.bi-building-fill-down::before { content: "\f86f"; } +.bi-building-fill-exclamation::before { content: "\f870"; } +.bi-building-fill-gear::before { content: "\f871"; } +.bi-building-fill-lock::before { content: "\f872"; } +.bi-building-fill-slash::before { content: "\f873"; } +.bi-building-fill-up::before { content: "\f874"; } +.bi-building-fill-x::before { content: "\f875"; } +.bi-building-fill::before { content: "\f876"; } +.bi-building-gear::before { content: "\f877"; } +.bi-building-lock::before { content: "\f878"; } +.bi-building-slash::before { content: "\f879"; } +.bi-building-up::before { content: "\f87a"; } +.bi-building-x::before { content: "\f87b"; } +.bi-buildings-fill::before { content: "\f87c"; } +.bi-buildings::before { content: "\f87d"; } +.bi-bus-front-fill::before { content: "\f87e"; } +.bi-bus-front::before { content: "\f87f"; } +.bi-ev-front-fill::before { content: "\f880"; } +.bi-ev-front::before { content: "\f881"; } +.bi-globe-americas::before { content: "\f882"; } +.bi-globe-asia-australia::before { content: "\f883"; } +.bi-globe-central-south-asia::before { content: "\f884"; } +.bi-globe-europe-africa::before { content: "\f885"; } +.bi-house-add-fill::before { content: "\f886"; } +.bi-house-add::before { content: "\f887"; } +.bi-house-check-fill::before { content: "\f888"; } +.bi-house-check::before { content: "\f889"; } +.bi-house-dash-fill::before { content: "\f88a"; } +.bi-house-dash::before { content: "\f88b"; } +.bi-house-down-fill::before { content: "\f88c"; } +.bi-house-down::before { content: "\f88d"; } +.bi-house-exclamation-fill::before { content: "\f88e"; } +.bi-house-exclamation::before { content: "\f88f"; } +.bi-house-gear-fill::before { content: "\f890"; } +.bi-house-gear::before { content: "\f891"; } +.bi-house-lock-fill::before { content: "\f892"; } +.bi-house-lock::before { content: "\f893"; } +.bi-house-slash-fill::before { content: "\f894"; } +.bi-house-slash::before { content: "\f895"; } +.bi-house-up-fill::before { content: "\f896"; } +.bi-house-up::before { content: "\f897"; } +.bi-house-x-fill::before { content: "\f898"; } +.bi-house-x::before { content: "\f899"; } +.bi-person-add::before { content: "\f89a"; } +.bi-person-down::before { content: "\f89b"; } +.bi-person-exclamation::before { content: "\f89c"; } +.bi-person-fill-add::before { content: "\f89d"; } +.bi-person-fill-check::before { content: "\f89e"; } +.bi-person-fill-dash::before { content: "\f89f"; } +.bi-person-fill-down::before { content: "\f8a0"; } +.bi-person-fill-exclamation::before { content: "\f8a1"; } +.bi-person-fill-gear::before { content: "\f8a2"; } +.bi-person-fill-lock::before { content: "\f8a3"; } +.bi-person-fill-slash::before { content: "\f8a4"; } +.bi-person-fill-up::before { content: "\f8a5"; } +.bi-person-fill-x::before { content: "\f8a6"; } +.bi-person-gear::before { content: "\f8a7"; } +.bi-person-lock::before { content: "\f8a8"; } +.bi-person-slash::before { content: "\f8a9"; } +.bi-person-up::before { content: "\f8aa"; } +.bi-scooter::before { content: "\f8ab"; } +.bi-taxi-front-fill::before { content: "\f8ac"; } +.bi-taxi-front::before { content: "\f8ad"; } +.bi-amd::before { content: "\f8ae"; } +.bi-database-add::before { content: "\f8af"; } +.bi-database-check::before { content: "\f8b0"; } +.bi-database-dash::before { content: "\f8b1"; } +.bi-database-down::before { content: "\f8b2"; } +.bi-database-exclamation::before { content: "\f8b3"; } +.bi-database-fill-add::before { content: "\f8b4"; } +.bi-database-fill-check::before { content: "\f8b5"; } +.bi-database-fill-dash::before { content: "\f8b6"; } +.bi-database-fill-down::before { content: "\f8b7"; } +.bi-database-fill-exclamation::before { content: "\f8b8"; } +.bi-database-fill-gear::before { content: "\f8b9"; } +.bi-database-fill-lock::before { content: "\f8ba"; } +.bi-database-fill-slash::before { content: "\f8bb"; } +.bi-database-fill-up::before { content: "\f8bc"; } +.bi-database-fill-x::before { content: "\f8bd"; } +.bi-database-fill::before { content: "\f8be"; } +.bi-database-gear::before { content: "\f8bf"; } +.bi-database-lock::before { content: "\f8c0"; } +.bi-database-slash::before { content: "\f8c1"; } +.bi-database-up::before { content: "\f8c2"; } +.bi-database-x::before { content: "\f8c3"; } +.bi-database::before { content: "\f8c4"; } +.bi-houses-fill::before { content: "\f8c5"; } +.bi-houses::before { content: "\f8c6"; } +.bi-nvidia::before { content: "\f8c7"; } +.bi-person-vcard-fill::before { content: "\f8c8"; } +.bi-person-vcard::before { content: "\f8c9"; } +.bi-sina-weibo::before { content: "\f8ca"; } +.bi-tencent-qq::before { content: "\f8cb"; } +.bi-wikipedia::before { content: "\f8cc"; } +.bi-alphabet-uppercase::before { content: "\f2a5"; } +.bi-alphabet::before { content: "\f68a"; } +.bi-amazon::before { content: "\f68d"; } +.bi-arrows-collapse-vertical::before { content: "\f690"; } +.bi-arrows-expand-vertical::before { content: "\f695"; } +.bi-arrows-vertical::before { content: "\f698"; } +.bi-arrows::before { content: "\f6a2"; } +.bi-ban-fill::before { content: "\f6a3"; } +.bi-ban::before { content: "\f6b6"; } +.bi-bing::before { content: "\f6c2"; } +.bi-cake::before { content: "\f6e0"; } +.bi-cake2::before { content: "\f6ed"; } +.bi-cookie::before { content: "\f6ee"; } +.bi-copy::before { content: "\f759"; } +.bi-crosshair::before { content: "\f769"; } +.bi-crosshair2::before { content: "\f794"; } +.bi-emoji-astonished-fill::before { content: "\f795"; } +.bi-emoji-astonished::before { content: "\f79a"; } +.bi-emoji-grimace-fill::before { content: "\f79b"; } +.bi-emoji-grimace::before { content: "\f7a0"; } +.bi-emoji-grin-fill::before { content: "\f7a1"; } +.bi-emoji-grin::before { content: "\f7a6"; } +.bi-emoji-surprise-fill::before { content: "\f7a7"; } +.bi-emoji-surprise::before { content: "\f7ac"; } +.bi-emoji-tear-fill::before { content: "\f7ad"; } +.bi-emoji-tear::before { content: "\f7b2"; } +.bi-envelope-arrow-down-fill::before { content: "\f7b3"; } +.bi-envelope-arrow-down::before { content: "\f7b8"; } +.bi-envelope-arrow-up-fill::before { content: "\f7b9"; } +.bi-envelope-arrow-up::before { content: "\f7be"; } +.bi-feather::before { content: "\f7bf"; } +.bi-feather2::before { content: "\f7c4"; } +.bi-floppy-fill::before { content: "\f7c5"; } +.bi-floppy::before { content: "\f7d8"; } +.bi-floppy2-fill::before { content: "\f7d9"; } +.bi-floppy2::before { content: "\f7e4"; } +.bi-gitlab::before { content: "\f7e5"; } +.bi-highlighter::before { content: "\f7f8"; } +.bi-marker-tip::before { content: "\f802"; } +.bi-nvme-fill::before { content: "\f803"; } +.bi-nvme::before { content: "\f80c"; } +.bi-opencollective::before { content: "\f80d"; } +.bi-pci-card-network::before { content: "\f8cd"; } +.bi-pci-card-sound::before { content: "\f8ce"; } +.bi-radar::before { content: "\f8cf"; } +.bi-send-arrow-down-fill::before { content: "\f8d0"; } +.bi-send-arrow-down::before { content: "\f8d1"; } +.bi-send-arrow-up-fill::before { content: "\f8d2"; } +.bi-send-arrow-up::before { content: "\f8d3"; } +.bi-sim-slash-fill::before { content: "\f8d4"; } +.bi-sim-slash::before { content: "\f8d5"; } +.bi-sourceforge::before { content: "\f8d6"; } +.bi-substack::before { content: "\f8d7"; } +.bi-threads-fill::before { content: "\f8d8"; } +.bi-threads::before { content: "\f8d9"; } +.bi-transparency::before { content: "\f8da"; } +.bi-twitter-x::before { content: "\f8db"; } +.bi-type-h4::before { content: "\f8dc"; } +.bi-type-h5::before { content: "\f8dd"; } +.bi-type-h6::before { content: "\f8de"; } +.bi-backpack-fill::before { content: "\f8df"; } +.bi-backpack::before { content: "\f8e0"; } +.bi-backpack2-fill::before { content: "\f8e1"; } +.bi-backpack2::before { content: "\f8e2"; } +.bi-backpack3-fill::before { content: "\f8e3"; } +.bi-backpack3::before { content: "\f8e4"; } +.bi-backpack4-fill::before { content: "\f8e5"; } +.bi-backpack4::before { content: "\f8e6"; } +.bi-brilliance::before { content: "\f8e7"; } +.bi-cake-fill::before { content: "\f8e8"; } +.bi-cake2-fill::before { content: "\f8e9"; } +.bi-duffle-fill::before { content: "\f8ea"; } +.bi-duffle::before { content: "\f8eb"; } +.bi-exposure::before { content: "\f8ec"; } +.bi-gender-neuter::before { content: "\f8ed"; } +.bi-highlights::before { content: "\f8ee"; } +.bi-luggage-fill::before { content: "\f8ef"; } +.bi-luggage::before { content: "\f8f0"; } +.bi-mailbox-flag::before { content: "\f8f1"; } +.bi-mailbox2-flag::before { content: "\f8f2"; } +.bi-noise-reduction::before { content: "\f8f3"; } +.bi-passport-fill::before { content: "\f8f4"; } +.bi-passport::before { content: "\f8f5"; } +.bi-person-arms-up::before { content: "\f8f6"; } +.bi-person-raised-hand::before { content: "\f8f7"; } +.bi-person-standing-dress::before { content: "\f8f8"; } +.bi-person-standing::before { content: "\f8f9"; } +.bi-person-walking::before { content: "\f8fa"; } +.bi-person-wheelchair::before { content: "\f8fb"; } +.bi-shadows::before { content: "\f8fc"; } +.bi-suitcase-fill::before { content: "\f8fd"; } +.bi-suitcase-lg-fill::before { content: "\f8fe"; } +.bi-suitcase-lg::before { content: "\f8ff"; } +.bi-suitcase::before { content: "\f900"; } +.bi-suitcase2-fill::before { content: "\f901"; } +.bi-suitcase2::before { content: "\f902"; } +.bi-vignette::before { content: "\f903"; } diff --git a/docs/2_39/site_libs/bootstrap/bootstrap-icons.woff b/docs/2_39/site_libs/bootstrap/bootstrap-icons.woff new file mode 100644 index 000000000..dbeeb0556 Binary files /dev/null and b/docs/2_39/site_libs/bootstrap/bootstrap-icons.woff differ diff --git a/docs/2_39/site_libs/bootstrap/bootstrap.min.js b/docs/2_39/site_libs/bootstrap/bootstrap.min.js new file mode 100644 index 000000000..e8f21f703 --- /dev/null +++ b/docs/2_39/site_libs/bootstrap/bootstrap.min.js @@ -0,0 +1,7 @@ +/*! + * Bootstrap v5.3.1 (https://getbootstrap.com/) + * Copyright 2011-2023 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors) + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */ +!function(t,e){"object"==typeof exports&&"undefined"!=typeof module?module.exports=e():"function"==typeof define&&define.amd?define(e):(t="undefined"!=typeof globalThis?globalThis:t||self).bootstrap=e()}(this,(function(){"use strict";const t=new Map,e={set(e,i,n){t.has(e)||t.set(e,new Map);const s=t.get(e);s.has(i)||0===s.size?s.set(i,n):console.error(`Bootstrap doesn't allow more than one instance per element. Bound instance: ${Array.from(s.keys())[0]}.`)},get:(e,i)=>t.has(e)&&t.get(e).get(i)||null,remove(e,i){if(!t.has(e))return;const n=t.get(e);n.delete(i),0===n.size&&t.delete(e)}},i="transitionend",n=t=>(t&&window.CSS&&window.CSS.escape&&(t=t.replace(/#([^\s"#']+)/g,((t,e)=>`#${CSS.escape(e)}`))),t),s=t=>{t.dispatchEvent(new Event(i))},o=t=>!(!t||"object"!=typeof t)&&(void 0!==t.jquery&&(t=t[0]),void 0!==t.nodeType),r=t=>o(t)?t.jquery?t[0]:t:"string"==typeof t&&t.length>0?document.querySelector(n(t)):null,a=t=>{if(!o(t)||0===t.getClientRects().length)return!1;const e="visible"===getComputedStyle(t).getPropertyValue("visibility"),i=t.closest("details:not([open])");if(!i)return e;if(i!==t){const e=t.closest("summary");if(e&&e.parentNode!==i)return!1;if(null===e)return!1}return e},l=t=>!t||t.nodeType!==Node.ELEMENT_NODE||!!t.classList.contains("disabled")||(void 0!==t.disabled?t.disabled:t.hasAttribute("disabled")&&"false"!==t.getAttribute("disabled")),c=t=>{if(!document.documentElement.attachShadow)return null;if("function"==typeof t.getRootNode){const e=t.getRootNode();return e instanceof ShadowRoot?e:null}return t instanceof ShadowRoot?t:t.parentNode?c(t.parentNode):null},h=()=>{},d=t=>{t.offsetHeight},u=()=>window.jQuery&&!document.body.hasAttribute("data-bs-no-jquery")?window.jQuery:null,f=[],p=()=>"rtl"===document.documentElement.dir,m=t=>{var e;e=()=>{const e=u();if(e){const i=t.NAME,n=e.fn[i];e.fn[i]=t.jQueryInterface,e.fn[i].Constructor=t,e.fn[i].noConflict=()=>(e.fn[i]=n,t.jQueryInterface)}},"loading"===document.readyState?(f.length||document.addEventListener("DOMContentLoaded",(()=>{for(const t of f)t()})),f.push(e)):e()},g=(t,e=[],i=t)=>"function"==typeof t?t(...e):i,_=(t,e,n=!0)=>{if(!n)return void g(t);const o=(t=>{if(!t)return 0;let{transitionDuration:e,transitionDelay:i}=window.getComputedStyle(t);const n=Number.parseFloat(e),s=Number.parseFloat(i);return n||s?(e=e.split(",")[0],i=i.split(",")[0],1e3*(Number.parseFloat(e)+Number.parseFloat(i))):0})(e)+5;let r=!1;const a=({target:n})=>{n===e&&(r=!0,e.removeEventListener(i,a),g(t))};e.addEventListener(i,a),setTimeout((()=>{r||s(e)}),o)},b=(t,e,i,n)=>{const s=t.length;let o=t.indexOf(e);return-1===o?!i&&n?t[s-1]:t[0]:(o+=i?1:-1,n&&(o=(o+s)%s),t[Math.max(0,Math.min(o,s-1))])},v=/[^.]*(?=\..*)\.|.*/,y=/\..*/,w=/::\d+$/,A={};let E=1;const T={mouseenter:"mouseover",mouseleave:"mouseout"},C=new Set(["click","dblclick","mouseup","mousedown","contextmenu","mousewheel","DOMMouseScroll","mouseover","mouseout","mousemove","selectstart","selectend","keydown","keypress","keyup","orientationchange","touchstart","touchmove","touchend","touchcancel","pointerdown","pointermove","pointerup","pointerleave","pointercancel","gesturestart","gesturechange","gestureend","focus","blur","change","reset","select","submit","focusin","focusout","load","unload","beforeunload","resize","move","DOMContentLoaded","readystatechange","error","abort","scroll"]);function O(t,e){return e&&`${e}::${E++}`||t.uidEvent||E++}function x(t){const e=O(t);return t.uidEvent=e,A[e]=A[e]||{},A[e]}function k(t,e,i=null){return Object.values(t).find((t=>t.callable===e&&t.delegationSelector===i))}function L(t,e,i){const n="string"==typeof e,s=n?i:e||i;let o=I(t);return C.has(o)||(o=t),[n,s,o]}function S(t,e,i,n,s){if("string"!=typeof e||!t)return;let[o,r,a]=L(e,i,n);if(e in T){const t=t=>function(e){if(!e.relatedTarget||e.relatedTarget!==e.delegateTarget&&!e.delegateTarget.contains(e.relatedTarget))return t.call(this,e)};r=t(r)}const l=x(t),c=l[a]||(l[a]={}),h=k(c,r,o?i:null);if(h)return void(h.oneOff=h.oneOff&&s);const d=O(r,e.replace(v,"")),u=o?function(t,e,i){return function n(s){const o=t.querySelectorAll(e);for(let{target:r}=s;r&&r!==this;r=r.parentNode)for(const a of o)if(a===r)return P(s,{delegateTarget:r}),n.oneOff&&N.off(t,s.type,e,i),i.apply(r,[s])}}(t,i,r):function(t,e){return function i(n){return P(n,{delegateTarget:t}),i.oneOff&&N.off(t,n.type,e),e.apply(t,[n])}}(t,r);u.delegationSelector=o?i:null,u.callable=r,u.oneOff=s,u.uidEvent=d,c[d]=u,t.addEventListener(a,u,o)}function D(t,e,i,n,s){const o=k(e[i],n,s);o&&(t.removeEventListener(i,o,Boolean(s)),delete e[i][o.uidEvent])}function $(t,e,i,n){const s=e[i]||{};for(const[o,r]of Object.entries(s))o.includes(n)&&D(t,e,i,r.callable,r.delegationSelector)}function I(t){return t=t.replace(y,""),T[t]||t}const N={on(t,e,i,n){S(t,e,i,n,!1)},one(t,e,i,n){S(t,e,i,n,!0)},off(t,e,i,n){if("string"!=typeof e||!t)return;const[s,o,r]=L(e,i,n),a=r!==e,l=x(t),c=l[r]||{},h=e.startsWith(".");if(void 0===o){if(h)for(const i of Object.keys(l))$(t,l,i,e.slice(1));for(const[i,n]of Object.entries(c)){const s=i.replace(w,"");a&&!e.includes(s)||D(t,l,r,n.callable,n.delegationSelector)}}else{if(!Object.keys(c).length)return;D(t,l,r,o,s?i:null)}},trigger(t,e,i){if("string"!=typeof e||!t)return null;const n=u();let s=null,o=!0,r=!0,a=!1;e!==I(e)&&n&&(s=n.Event(e,i),n(t).trigger(s),o=!s.isPropagationStopped(),r=!s.isImmediatePropagationStopped(),a=s.isDefaultPrevented());const l=P(new Event(e,{bubbles:o,cancelable:!0}),i);return a&&l.preventDefault(),r&&t.dispatchEvent(l),l.defaultPrevented&&s&&s.preventDefault(),l}};function P(t,e={}){for(const[i,n]of Object.entries(e))try{t[i]=n}catch(e){Object.defineProperty(t,i,{configurable:!0,get:()=>n})}return t}function M(t){if("true"===t)return!0;if("false"===t)return!1;if(t===Number(t).toString())return Number(t);if(""===t||"null"===t)return null;if("string"!=typeof t)return t;try{return JSON.parse(decodeURIComponent(t))}catch(e){return t}}function j(t){return t.replace(/[A-Z]/g,(t=>`-${t.toLowerCase()}`))}const F={setDataAttribute(t,e,i){t.setAttribute(`data-bs-${j(e)}`,i)},removeDataAttribute(t,e){t.removeAttribute(`data-bs-${j(e)}`)},getDataAttributes(t){if(!t)return{};const e={},i=Object.keys(t.dataset).filter((t=>t.startsWith("bs")&&!t.startsWith("bsConfig")));for(const n of i){let i=n.replace(/^bs/,"");i=i.charAt(0).toLowerCase()+i.slice(1,i.length),e[i]=M(t.dataset[n])}return e},getDataAttribute:(t,e)=>M(t.getAttribute(`data-bs-${j(e)}`))};class H{static get Default(){return{}}static get DefaultType(){return{}}static get NAME(){throw new Error('You have to implement the static method "NAME", for each component!')}_getConfig(t){return t=this._mergeConfigObj(t),t=this._configAfterMerge(t),this._typeCheckConfig(t),t}_configAfterMerge(t){return t}_mergeConfigObj(t,e){const i=o(e)?F.getDataAttribute(e,"config"):{};return{...this.constructor.Default,..."object"==typeof i?i:{},...o(e)?F.getDataAttributes(e):{},..."object"==typeof t?t:{}}}_typeCheckConfig(t,e=this.constructor.DefaultType){for(const[n,s]of Object.entries(e)){const e=t[n],r=o(e)?"element":null==(i=e)?`${i}`:Object.prototype.toString.call(i).match(/\s([a-z]+)/i)[1].toLowerCase();if(!new RegExp(s).test(r))throw new TypeError(`${this.constructor.NAME.toUpperCase()}: Option "${n}" provided type "${r}" but expected type "${s}".`)}var i}}class W extends H{constructor(t,i){super(),(t=r(t))&&(this._element=t,this._config=this._getConfig(i),e.set(this._element,this.constructor.DATA_KEY,this))}dispose(){e.remove(this._element,this.constructor.DATA_KEY),N.off(this._element,this.constructor.EVENT_KEY);for(const t of Object.getOwnPropertyNames(this))this[t]=null}_queueCallback(t,e,i=!0){_(t,e,i)}_getConfig(t){return t=this._mergeConfigObj(t,this._element),t=this._configAfterMerge(t),this._typeCheckConfig(t),t}static getInstance(t){return e.get(r(t),this.DATA_KEY)}static getOrCreateInstance(t,e={}){return this.getInstance(t)||new this(t,"object"==typeof e?e:null)}static get VERSION(){return"5.3.1"}static get DATA_KEY(){return`bs.${this.NAME}`}static get EVENT_KEY(){return`.${this.DATA_KEY}`}static eventName(t){return`${t}${this.EVENT_KEY}`}}const B=t=>{let e=t.getAttribute("data-bs-target");if(!e||"#"===e){let i=t.getAttribute("href");if(!i||!i.includes("#")&&!i.startsWith("."))return null;i.includes("#")&&!i.startsWith("#")&&(i=`#${i.split("#")[1]}`),e=i&&"#"!==i?i.trim():null}return n(e)},z={find:(t,e=document.documentElement)=>[].concat(...Element.prototype.querySelectorAll.call(e,t)),findOne:(t,e=document.documentElement)=>Element.prototype.querySelector.call(e,t),children:(t,e)=>[].concat(...t.children).filter((t=>t.matches(e))),parents(t,e){const i=[];let n=t.parentNode.closest(e);for(;n;)i.push(n),n=n.parentNode.closest(e);return i},prev(t,e){let i=t.previousElementSibling;for(;i;){if(i.matches(e))return[i];i=i.previousElementSibling}return[]},next(t,e){let i=t.nextElementSibling;for(;i;){if(i.matches(e))return[i];i=i.nextElementSibling}return[]},focusableChildren(t){const e=["a","button","input","textarea","select","details","[tabindex]",'[contenteditable="true"]'].map((t=>`${t}:not([tabindex^="-"])`)).join(",");return this.find(e,t).filter((t=>!l(t)&&a(t)))},getSelectorFromElement(t){const e=B(t);return e&&z.findOne(e)?e:null},getElementFromSelector(t){const e=B(t);return e?z.findOne(e):null},getMultipleElementsFromSelector(t){const e=B(t);return e?z.find(e):[]}},R=(t,e="hide")=>{const i=`click.dismiss${t.EVENT_KEY}`,n=t.NAME;N.on(document,i,`[data-bs-dismiss="${n}"]`,(function(i){if(["A","AREA"].includes(this.tagName)&&i.preventDefault(),l(this))return;const s=z.getElementFromSelector(this)||this.closest(`.${n}`);t.getOrCreateInstance(s)[e]()}))},q=".bs.alert",V=`close${q}`,K=`closed${q}`;class Q extends W{static get NAME(){return"alert"}close(){if(N.trigger(this._element,V).defaultPrevented)return;this._element.classList.remove("show");const t=this._element.classList.contains("fade");this._queueCallback((()=>this._destroyElement()),this._element,t)}_destroyElement(){this._element.remove(),N.trigger(this._element,K),this.dispose()}static jQueryInterface(t){return this.each((function(){const e=Q.getOrCreateInstance(this);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}R(Q,"close"),m(Q);const X='[data-bs-toggle="button"]';class Y extends W{static get NAME(){return"button"}toggle(){this._element.setAttribute("aria-pressed",this._element.classList.toggle("active"))}static jQueryInterface(t){return this.each((function(){const e=Y.getOrCreateInstance(this);"toggle"===t&&e[t]()}))}}N.on(document,"click.bs.button.data-api",X,(t=>{t.preventDefault();const e=t.target.closest(X);Y.getOrCreateInstance(e).toggle()})),m(Y);const U=".bs.swipe",G=`touchstart${U}`,J=`touchmove${U}`,Z=`touchend${U}`,tt=`pointerdown${U}`,et=`pointerup${U}`,it={endCallback:null,leftCallback:null,rightCallback:null},nt={endCallback:"(function|null)",leftCallback:"(function|null)",rightCallback:"(function|null)"};class st extends H{constructor(t,e){super(),this._element=t,t&&st.isSupported()&&(this._config=this._getConfig(e),this._deltaX=0,this._supportPointerEvents=Boolean(window.PointerEvent),this._initEvents())}static get Default(){return it}static get DefaultType(){return nt}static get NAME(){return"swipe"}dispose(){N.off(this._element,U)}_start(t){this._supportPointerEvents?this._eventIsPointerPenTouch(t)&&(this._deltaX=t.clientX):this._deltaX=t.touches[0].clientX}_end(t){this._eventIsPointerPenTouch(t)&&(this._deltaX=t.clientX-this._deltaX),this._handleSwipe(),g(this._config.endCallback)}_move(t){this._deltaX=t.touches&&t.touches.length>1?0:t.touches[0].clientX-this._deltaX}_handleSwipe(){const t=Math.abs(this._deltaX);if(t<=40)return;const e=t/this._deltaX;this._deltaX=0,e&&g(e>0?this._config.rightCallback:this._config.leftCallback)}_initEvents(){this._supportPointerEvents?(N.on(this._element,tt,(t=>this._start(t))),N.on(this._element,et,(t=>this._end(t))),this._element.classList.add("pointer-event")):(N.on(this._element,G,(t=>this._start(t))),N.on(this._element,J,(t=>this._move(t))),N.on(this._element,Z,(t=>this._end(t))))}_eventIsPointerPenTouch(t){return this._supportPointerEvents&&("pen"===t.pointerType||"touch"===t.pointerType)}static isSupported(){return"ontouchstart"in document.documentElement||navigator.maxTouchPoints>0}}const ot=".bs.carousel",rt=".data-api",at="next",lt="prev",ct="left",ht="right",dt=`slide${ot}`,ut=`slid${ot}`,ft=`keydown${ot}`,pt=`mouseenter${ot}`,mt=`mouseleave${ot}`,gt=`dragstart${ot}`,_t=`load${ot}${rt}`,bt=`click${ot}${rt}`,vt="carousel",yt="active",wt=".active",At=".carousel-item",Et=wt+At,Tt={ArrowLeft:ht,ArrowRight:ct},Ct={interval:5e3,keyboard:!0,pause:"hover",ride:!1,touch:!0,wrap:!0},Ot={interval:"(number|boolean)",keyboard:"boolean",pause:"(string|boolean)",ride:"(boolean|string)",touch:"boolean",wrap:"boolean"};class xt extends W{constructor(t,e){super(t,e),this._interval=null,this._activeElement=null,this._isSliding=!1,this.touchTimeout=null,this._swipeHelper=null,this._indicatorsElement=z.findOne(".carousel-indicators",this._element),this._addEventListeners(),this._config.ride===vt&&this.cycle()}static get Default(){return Ct}static get DefaultType(){return Ot}static get NAME(){return"carousel"}next(){this._slide(at)}nextWhenVisible(){!document.hidden&&a(this._element)&&this.next()}prev(){this._slide(lt)}pause(){this._isSliding&&s(this._element),this._clearInterval()}cycle(){this._clearInterval(),this._updateInterval(),this._interval=setInterval((()=>this.nextWhenVisible()),this._config.interval)}_maybeEnableCycle(){this._config.ride&&(this._isSliding?N.one(this._element,ut,(()=>this.cycle())):this.cycle())}to(t){const e=this._getItems();if(t>e.length-1||t<0)return;if(this._isSliding)return void N.one(this._element,ut,(()=>this.to(t)));const i=this._getItemIndex(this._getActive());if(i===t)return;const n=t>i?at:lt;this._slide(n,e[t])}dispose(){this._swipeHelper&&this._swipeHelper.dispose(),super.dispose()}_configAfterMerge(t){return t.defaultInterval=t.interval,t}_addEventListeners(){this._config.keyboard&&N.on(this._element,ft,(t=>this._keydown(t))),"hover"===this._config.pause&&(N.on(this._element,pt,(()=>this.pause())),N.on(this._element,mt,(()=>this._maybeEnableCycle()))),this._config.touch&&st.isSupported()&&this._addTouchEventListeners()}_addTouchEventListeners(){for(const t of z.find(".carousel-item img",this._element))N.on(t,gt,(t=>t.preventDefault()));const t={leftCallback:()=>this._slide(this._directionToOrder(ct)),rightCallback:()=>this._slide(this._directionToOrder(ht)),endCallback:()=>{"hover"===this._config.pause&&(this.pause(),this.touchTimeout&&clearTimeout(this.touchTimeout),this.touchTimeout=setTimeout((()=>this._maybeEnableCycle()),500+this._config.interval))}};this._swipeHelper=new st(this._element,t)}_keydown(t){if(/input|textarea/i.test(t.target.tagName))return;const e=Tt[t.key];e&&(t.preventDefault(),this._slide(this._directionToOrder(e)))}_getItemIndex(t){return this._getItems().indexOf(t)}_setActiveIndicatorElement(t){if(!this._indicatorsElement)return;const e=z.findOne(wt,this._indicatorsElement);e.classList.remove(yt),e.removeAttribute("aria-current");const i=z.findOne(`[data-bs-slide-to="${t}"]`,this._indicatorsElement);i&&(i.classList.add(yt),i.setAttribute("aria-current","true"))}_updateInterval(){const t=this._activeElement||this._getActive();if(!t)return;const e=Number.parseInt(t.getAttribute("data-bs-interval"),10);this._config.interval=e||this._config.defaultInterval}_slide(t,e=null){if(this._isSliding)return;const i=this._getActive(),n=t===at,s=e||b(this._getItems(),i,n,this._config.wrap);if(s===i)return;const o=this._getItemIndex(s),r=e=>N.trigger(this._element,e,{relatedTarget:s,direction:this._orderToDirection(t),from:this._getItemIndex(i),to:o});if(r(dt).defaultPrevented)return;if(!i||!s)return;const a=Boolean(this._interval);this.pause(),this._isSliding=!0,this._setActiveIndicatorElement(o),this._activeElement=s;const l=n?"carousel-item-start":"carousel-item-end",c=n?"carousel-item-next":"carousel-item-prev";s.classList.add(c),d(s),i.classList.add(l),s.classList.add(l),this._queueCallback((()=>{s.classList.remove(l,c),s.classList.add(yt),i.classList.remove(yt,c,l),this._isSliding=!1,r(ut)}),i,this._isAnimated()),a&&this.cycle()}_isAnimated(){return this._element.classList.contains("slide")}_getActive(){return z.findOne(Et,this._element)}_getItems(){return z.find(At,this._element)}_clearInterval(){this._interval&&(clearInterval(this._interval),this._interval=null)}_directionToOrder(t){return p()?t===ct?lt:at:t===ct?at:lt}_orderToDirection(t){return p()?t===lt?ct:ht:t===lt?ht:ct}static jQueryInterface(t){return this.each((function(){const e=xt.getOrCreateInstance(this,t);if("number"!=typeof t){if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t]()}}else e.to(t)}))}}N.on(document,bt,"[data-bs-slide], [data-bs-slide-to]",(function(t){const e=z.getElementFromSelector(this);if(!e||!e.classList.contains(vt))return;t.preventDefault();const i=xt.getOrCreateInstance(e),n=this.getAttribute("data-bs-slide-to");return n?(i.to(n),void i._maybeEnableCycle()):"next"===F.getDataAttribute(this,"slide")?(i.next(),void i._maybeEnableCycle()):(i.prev(),void i._maybeEnableCycle())})),N.on(window,_t,(()=>{const t=z.find('[data-bs-ride="carousel"]');for(const e of t)xt.getOrCreateInstance(e)})),m(xt);const kt=".bs.collapse",Lt=`show${kt}`,St=`shown${kt}`,Dt=`hide${kt}`,$t=`hidden${kt}`,It=`click${kt}.data-api`,Nt="show",Pt="collapse",Mt="collapsing",jt=`:scope .${Pt} .${Pt}`,Ft='[data-bs-toggle="collapse"]',Ht={parent:null,toggle:!0},Wt={parent:"(null|element)",toggle:"boolean"};class Bt extends W{constructor(t,e){super(t,e),this._isTransitioning=!1,this._triggerArray=[];const i=z.find(Ft);for(const t of i){const e=z.getSelectorFromElement(t),i=z.find(e).filter((t=>t===this._element));null!==e&&i.length&&this._triggerArray.push(t)}this._initializeChildren(),this._config.parent||this._addAriaAndCollapsedClass(this._triggerArray,this._isShown()),this._config.toggle&&this.toggle()}static get Default(){return Ht}static get DefaultType(){return Wt}static get NAME(){return"collapse"}toggle(){this._isShown()?this.hide():this.show()}show(){if(this._isTransitioning||this._isShown())return;let t=[];if(this._config.parent&&(t=this._getFirstLevelChildren(".collapse.show, .collapse.collapsing").filter((t=>t!==this._element)).map((t=>Bt.getOrCreateInstance(t,{toggle:!1})))),t.length&&t[0]._isTransitioning)return;if(N.trigger(this._element,Lt).defaultPrevented)return;for(const e of t)e.hide();const e=this._getDimension();this._element.classList.remove(Pt),this._element.classList.add(Mt),this._element.style[e]=0,this._addAriaAndCollapsedClass(this._triggerArray,!0),this._isTransitioning=!0;const i=`scroll${e[0].toUpperCase()+e.slice(1)}`;this._queueCallback((()=>{this._isTransitioning=!1,this._element.classList.remove(Mt),this._element.classList.add(Pt,Nt),this._element.style[e]="",N.trigger(this._element,St)}),this._element,!0),this._element.style[e]=`${this._element[i]}px`}hide(){if(this._isTransitioning||!this._isShown())return;if(N.trigger(this._element,Dt).defaultPrevented)return;const t=this._getDimension();this._element.style[t]=`${this._element.getBoundingClientRect()[t]}px`,d(this._element),this._element.classList.add(Mt),this._element.classList.remove(Pt,Nt);for(const t of this._triggerArray){const e=z.getElementFromSelector(t);e&&!this._isShown(e)&&this._addAriaAndCollapsedClass([t],!1)}this._isTransitioning=!0,this._element.style[t]="",this._queueCallback((()=>{this._isTransitioning=!1,this._element.classList.remove(Mt),this._element.classList.add(Pt),N.trigger(this._element,$t)}),this._element,!0)}_isShown(t=this._element){return t.classList.contains(Nt)}_configAfterMerge(t){return t.toggle=Boolean(t.toggle),t.parent=r(t.parent),t}_getDimension(){return this._element.classList.contains("collapse-horizontal")?"width":"height"}_initializeChildren(){if(!this._config.parent)return;const t=this._getFirstLevelChildren(Ft);for(const e of t){const t=z.getElementFromSelector(e);t&&this._addAriaAndCollapsedClass([e],this._isShown(t))}}_getFirstLevelChildren(t){const e=z.find(jt,this._config.parent);return z.find(t,this._config.parent).filter((t=>!e.includes(t)))}_addAriaAndCollapsedClass(t,e){if(t.length)for(const i of t)i.classList.toggle("collapsed",!e),i.setAttribute("aria-expanded",e)}static jQueryInterface(t){const e={};return"string"==typeof t&&/show|hide/.test(t)&&(e.toggle=!1),this.each((function(){const i=Bt.getOrCreateInstance(this,e);if("string"==typeof t){if(void 0===i[t])throw new TypeError(`No method named "${t}"`);i[t]()}}))}}N.on(document,It,Ft,(function(t){("A"===t.target.tagName||t.delegateTarget&&"A"===t.delegateTarget.tagName)&&t.preventDefault();for(const t of z.getMultipleElementsFromSelector(this))Bt.getOrCreateInstance(t,{toggle:!1}).toggle()})),m(Bt);var zt="top",Rt="bottom",qt="right",Vt="left",Kt="auto",Qt=[zt,Rt,qt,Vt],Xt="start",Yt="end",Ut="clippingParents",Gt="viewport",Jt="popper",Zt="reference",te=Qt.reduce((function(t,e){return t.concat([e+"-"+Xt,e+"-"+Yt])}),[]),ee=[].concat(Qt,[Kt]).reduce((function(t,e){return t.concat([e,e+"-"+Xt,e+"-"+Yt])}),[]),ie="beforeRead",ne="read",se="afterRead",oe="beforeMain",re="main",ae="afterMain",le="beforeWrite",ce="write",he="afterWrite",de=[ie,ne,se,oe,re,ae,le,ce,he];function ue(t){return t?(t.nodeName||"").toLowerCase():null}function fe(t){if(null==t)return window;if("[object Window]"!==t.toString()){var e=t.ownerDocument;return e&&e.defaultView||window}return t}function pe(t){return t instanceof fe(t).Element||t instanceof Element}function me(t){return t instanceof fe(t).HTMLElement||t instanceof HTMLElement}function ge(t){return"undefined"!=typeof ShadowRoot&&(t instanceof fe(t).ShadowRoot||t instanceof ShadowRoot)}const _e={name:"applyStyles",enabled:!0,phase:"write",fn:function(t){var e=t.state;Object.keys(e.elements).forEach((function(t){var i=e.styles[t]||{},n=e.attributes[t]||{},s=e.elements[t];me(s)&&ue(s)&&(Object.assign(s.style,i),Object.keys(n).forEach((function(t){var e=n[t];!1===e?s.removeAttribute(t):s.setAttribute(t,!0===e?"":e)})))}))},effect:function(t){var e=t.state,i={popper:{position:e.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(e.elements.popper.style,i.popper),e.styles=i,e.elements.arrow&&Object.assign(e.elements.arrow.style,i.arrow),function(){Object.keys(e.elements).forEach((function(t){var n=e.elements[t],s=e.attributes[t]||{},o=Object.keys(e.styles.hasOwnProperty(t)?e.styles[t]:i[t]).reduce((function(t,e){return t[e]="",t}),{});me(n)&&ue(n)&&(Object.assign(n.style,o),Object.keys(s).forEach((function(t){n.removeAttribute(t)})))}))}},requires:["computeStyles"]};function be(t){return t.split("-")[0]}var ve=Math.max,ye=Math.min,we=Math.round;function Ae(){var t=navigator.userAgentData;return null!=t&&t.brands&&Array.isArray(t.brands)?t.brands.map((function(t){return t.brand+"/"+t.version})).join(" "):navigator.userAgent}function Ee(){return!/^((?!chrome|android).)*safari/i.test(Ae())}function Te(t,e,i){void 0===e&&(e=!1),void 0===i&&(i=!1);var n=t.getBoundingClientRect(),s=1,o=1;e&&me(t)&&(s=t.offsetWidth>0&&we(n.width)/t.offsetWidth||1,o=t.offsetHeight>0&&we(n.height)/t.offsetHeight||1);var r=(pe(t)?fe(t):window).visualViewport,a=!Ee()&&i,l=(n.left+(a&&r?r.offsetLeft:0))/s,c=(n.top+(a&&r?r.offsetTop:0))/o,h=n.width/s,d=n.height/o;return{width:h,height:d,top:c,right:l+h,bottom:c+d,left:l,x:l,y:c}}function Ce(t){var e=Te(t),i=t.offsetWidth,n=t.offsetHeight;return Math.abs(e.width-i)<=1&&(i=e.width),Math.abs(e.height-n)<=1&&(n=e.height),{x:t.offsetLeft,y:t.offsetTop,width:i,height:n}}function Oe(t,e){var i=e.getRootNode&&e.getRootNode();if(t.contains(e))return!0;if(i&&ge(i)){var n=e;do{if(n&&t.isSameNode(n))return!0;n=n.parentNode||n.host}while(n)}return!1}function xe(t){return fe(t).getComputedStyle(t)}function ke(t){return["table","td","th"].indexOf(ue(t))>=0}function Le(t){return((pe(t)?t.ownerDocument:t.document)||window.document).documentElement}function Se(t){return"html"===ue(t)?t:t.assignedSlot||t.parentNode||(ge(t)?t.host:null)||Le(t)}function De(t){return me(t)&&"fixed"!==xe(t).position?t.offsetParent:null}function $e(t){for(var e=fe(t),i=De(t);i&&ke(i)&&"static"===xe(i).position;)i=De(i);return i&&("html"===ue(i)||"body"===ue(i)&&"static"===xe(i).position)?e:i||function(t){var e=/firefox/i.test(Ae());if(/Trident/i.test(Ae())&&me(t)&&"fixed"===xe(t).position)return null;var i=Se(t);for(ge(i)&&(i=i.host);me(i)&&["html","body"].indexOf(ue(i))<0;){var n=xe(i);if("none"!==n.transform||"none"!==n.perspective||"paint"===n.contain||-1!==["transform","perspective"].indexOf(n.willChange)||e&&"filter"===n.willChange||e&&n.filter&&"none"!==n.filter)return i;i=i.parentNode}return null}(t)||e}function Ie(t){return["top","bottom"].indexOf(t)>=0?"x":"y"}function Ne(t,e,i){return ve(t,ye(e,i))}function Pe(t){return Object.assign({},{top:0,right:0,bottom:0,left:0},t)}function Me(t,e){return e.reduce((function(e,i){return e[i]=t,e}),{})}const je={name:"arrow",enabled:!0,phase:"main",fn:function(t){var e,i=t.state,n=t.name,s=t.options,o=i.elements.arrow,r=i.modifiersData.popperOffsets,a=be(i.placement),l=Ie(a),c=[Vt,qt].indexOf(a)>=0?"height":"width";if(o&&r){var h=function(t,e){return Pe("number"!=typeof(t="function"==typeof t?t(Object.assign({},e.rects,{placement:e.placement})):t)?t:Me(t,Qt))}(s.padding,i),d=Ce(o),u="y"===l?zt:Vt,f="y"===l?Rt:qt,p=i.rects.reference[c]+i.rects.reference[l]-r[l]-i.rects.popper[c],m=r[l]-i.rects.reference[l],g=$e(o),_=g?"y"===l?g.clientHeight||0:g.clientWidth||0:0,b=p/2-m/2,v=h[u],y=_-d[c]-h[f],w=_/2-d[c]/2+b,A=Ne(v,w,y),E=l;i.modifiersData[n]=((e={})[E]=A,e.centerOffset=A-w,e)}},effect:function(t){var e=t.state,i=t.options.element,n=void 0===i?"[data-popper-arrow]":i;null!=n&&("string"!=typeof n||(n=e.elements.popper.querySelector(n)))&&Oe(e.elements.popper,n)&&(e.elements.arrow=n)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function Fe(t){return t.split("-")[1]}var He={top:"auto",right:"auto",bottom:"auto",left:"auto"};function We(t){var e,i=t.popper,n=t.popperRect,s=t.placement,o=t.variation,r=t.offsets,a=t.position,l=t.gpuAcceleration,c=t.adaptive,h=t.roundOffsets,d=t.isFixed,u=r.x,f=void 0===u?0:u,p=r.y,m=void 0===p?0:p,g="function"==typeof h?h({x:f,y:m}):{x:f,y:m};f=g.x,m=g.y;var _=r.hasOwnProperty("x"),b=r.hasOwnProperty("y"),v=Vt,y=zt,w=window;if(c){var A=$e(i),E="clientHeight",T="clientWidth";A===fe(i)&&"static"!==xe(A=Le(i)).position&&"absolute"===a&&(E="scrollHeight",T="scrollWidth"),(s===zt||(s===Vt||s===qt)&&o===Yt)&&(y=Rt,m-=(d&&A===w&&w.visualViewport?w.visualViewport.height:A[E])-n.height,m*=l?1:-1),s!==Vt&&(s!==zt&&s!==Rt||o!==Yt)||(v=qt,f-=(d&&A===w&&w.visualViewport?w.visualViewport.width:A[T])-n.width,f*=l?1:-1)}var C,O=Object.assign({position:a},c&&He),x=!0===h?function(t,e){var i=t.x,n=t.y,s=e.devicePixelRatio||1;return{x:we(i*s)/s||0,y:we(n*s)/s||0}}({x:f,y:m},fe(i)):{x:f,y:m};return f=x.x,m=x.y,l?Object.assign({},O,((C={})[y]=b?"0":"",C[v]=_?"0":"",C.transform=(w.devicePixelRatio||1)<=1?"translate("+f+"px, "+m+"px)":"translate3d("+f+"px, "+m+"px, 0)",C)):Object.assign({},O,((e={})[y]=b?m+"px":"",e[v]=_?f+"px":"",e.transform="",e))}const Be={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:function(t){var e=t.state,i=t.options,n=i.gpuAcceleration,s=void 0===n||n,o=i.adaptive,r=void 0===o||o,a=i.roundOffsets,l=void 0===a||a,c={placement:be(e.placement),variation:Fe(e.placement),popper:e.elements.popper,popperRect:e.rects.popper,gpuAcceleration:s,isFixed:"fixed"===e.options.strategy};null!=e.modifiersData.popperOffsets&&(e.styles.popper=Object.assign({},e.styles.popper,We(Object.assign({},c,{offsets:e.modifiersData.popperOffsets,position:e.options.strategy,adaptive:r,roundOffsets:l})))),null!=e.modifiersData.arrow&&(e.styles.arrow=Object.assign({},e.styles.arrow,We(Object.assign({},c,{offsets:e.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:l})))),e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-placement":e.placement})},data:{}};var ze={passive:!0};const Re={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:function(t){var e=t.state,i=t.instance,n=t.options,s=n.scroll,o=void 0===s||s,r=n.resize,a=void 0===r||r,l=fe(e.elements.popper),c=[].concat(e.scrollParents.reference,e.scrollParents.popper);return o&&c.forEach((function(t){t.addEventListener("scroll",i.update,ze)})),a&&l.addEventListener("resize",i.update,ze),function(){o&&c.forEach((function(t){t.removeEventListener("scroll",i.update,ze)})),a&&l.removeEventListener("resize",i.update,ze)}},data:{}};var qe={left:"right",right:"left",bottom:"top",top:"bottom"};function Ve(t){return t.replace(/left|right|bottom|top/g,(function(t){return qe[t]}))}var Ke={start:"end",end:"start"};function Qe(t){return t.replace(/start|end/g,(function(t){return Ke[t]}))}function Xe(t){var e=fe(t);return{scrollLeft:e.pageXOffset,scrollTop:e.pageYOffset}}function Ye(t){return Te(Le(t)).left+Xe(t).scrollLeft}function Ue(t){var e=xe(t),i=e.overflow,n=e.overflowX,s=e.overflowY;return/auto|scroll|overlay|hidden/.test(i+s+n)}function Ge(t){return["html","body","#document"].indexOf(ue(t))>=0?t.ownerDocument.body:me(t)&&Ue(t)?t:Ge(Se(t))}function Je(t,e){var i;void 0===e&&(e=[]);var n=Ge(t),s=n===(null==(i=t.ownerDocument)?void 0:i.body),o=fe(n),r=s?[o].concat(o.visualViewport||[],Ue(n)?n:[]):n,a=e.concat(r);return s?a:a.concat(Je(Se(r)))}function Ze(t){return Object.assign({},t,{left:t.x,top:t.y,right:t.x+t.width,bottom:t.y+t.height})}function ti(t,e,i){return e===Gt?Ze(function(t,e){var i=fe(t),n=Le(t),s=i.visualViewport,o=n.clientWidth,r=n.clientHeight,a=0,l=0;if(s){o=s.width,r=s.height;var c=Ee();(c||!c&&"fixed"===e)&&(a=s.offsetLeft,l=s.offsetTop)}return{width:o,height:r,x:a+Ye(t),y:l}}(t,i)):pe(e)?function(t,e){var i=Te(t,!1,"fixed"===e);return i.top=i.top+t.clientTop,i.left=i.left+t.clientLeft,i.bottom=i.top+t.clientHeight,i.right=i.left+t.clientWidth,i.width=t.clientWidth,i.height=t.clientHeight,i.x=i.left,i.y=i.top,i}(e,i):Ze(function(t){var e,i=Le(t),n=Xe(t),s=null==(e=t.ownerDocument)?void 0:e.body,o=ve(i.scrollWidth,i.clientWidth,s?s.scrollWidth:0,s?s.clientWidth:0),r=ve(i.scrollHeight,i.clientHeight,s?s.scrollHeight:0,s?s.clientHeight:0),a=-n.scrollLeft+Ye(t),l=-n.scrollTop;return"rtl"===xe(s||i).direction&&(a+=ve(i.clientWidth,s?s.clientWidth:0)-o),{width:o,height:r,x:a,y:l}}(Le(t)))}function ei(t){var e,i=t.reference,n=t.element,s=t.placement,o=s?be(s):null,r=s?Fe(s):null,a=i.x+i.width/2-n.width/2,l=i.y+i.height/2-n.height/2;switch(o){case zt:e={x:a,y:i.y-n.height};break;case Rt:e={x:a,y:i.y+i.height};break;case qt:e={x:i.x+i.width,y:l};break;case Vt:e={x:i.x-n.width,y:l};break;default:e={x:i.x,y:i.y}}var c=o?Ie(o):null;if(null!=c){var h="y"===c?"height":"width";switch(r){case Xt:e[c]=e[c]-(i[h]/2-n[h]/2);break;case Yt:e[c]=e[c]+(i[h]/2-n[h]/2)}}return e}function ii(t,e){void 0===e&&(e={});var i=e,n=i.placement,s=void 0===n?t.placement:n,o=i.strategy,r=void 0===o?t.strategy:o,a=i.boundary,l=void 0===a?Ut:a,c=i.rootBoundary,h=void 0===c?Gt:c,d=i.elementContext,u=void 0===d?Jt:d,f=i.altBoundary,p=void 0!==f&&f,m=i.padding,g=void 0===m?0:m,_=Pe("number"!=typeof g?g:Me(g,Qt)),b=u===Jt?Zt:Jt,v=t.rects.popper,y=t.elements[p?b:u],w=function(t,e,i,n){var s="clippingParents"===e?function(t){var e=Je(Se(t)),i=["absolute","fixed"].indexOf(xe(t).position)>=0&&me(t)?$e(t):t;return pe(i)?e.filter((function(t){return pe(t)&&Oe(t,i)&&"body"!==ue(t)})):[]}(t):[].concat(e),o=[].concat(s,[i]),r=o[0],a=o.reduce((function(e,i){var s=ti(t,i,n);return e.top=ve(s.top,e.top),e.right=ye(s.right,e.right),e.bottom=ye(s.bottom,e.bottom),e.left=ve(s.left,e.left),e}),ti(t,r,n));return a.width=a.right-a.left,a.height=a.bottom-a.top,a.x=a.left,a.y=a.top,a}(pe(y)?y:y.contextElement||Le(t.elements.popper),l,h,r),A=Te(t.elements.reference),E=ei({reference:A,element:v,strategy:"absolute",placement:s}),T=Ze(Object.assign({},v,E)),C=u===Jt?T:A,O={top:w.top-C.top+_.top,bottom:C.bottom-w.bottom+_.bottom,left:w.left-C.left+_.left,right:C.right-w.right+_.right},x=t.modifiersData.offset;if(u===Jt&&x){var k=x[s];Object.keys(O).forEach((function(t){var e=[qt,Rt].indexOf(t)>=0?1:-1,i=[zt,Rt].indexOf(t)>=0?"y":"x";O[t]+=k[i]*e}))}return O}function ni(t,e){void 0===e&&(e={});var i=e,n=i.placement,s=i.boundary,o=i.rootBoundary,r=i.padding,a=i.flipVariations,l=i.allowedAutoPlacements,c=void 0===l?ee:l,h=Fe(n),d=h?a?te:te.filter((function(t){return Fe(t)===h})):Qt,u=d.filter((function(t){return c.indexOf(t)>=0}));0===u.length&&(u=d);var f=u.reduce((function(e,i){return e[i]=ii(t,{placement:i,boundary:s,rootBoundary:o,padding:r})[be(i)],e}),{});return Object.keys(f).sort((function(t,e){return f[t]-f[e]}))}const si={name:"flip",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,n=t.name;if(!e.modifiersData[n]._skip){for(var s=i.mainAxis,o=void 0===s||s,r=i.altAxis,a=void 0===r||r,l=i.fallbackPlacements,c=i.padding,h=i.boundary,d=i.rootBoundary,u=i.altBoundary,f=i.flipVariations,p=void 0===f||f,m=i.allowedAutoPlacements,g=e.options.placement,_=be(g),b=l||(_!==g&&p?function(t){if(be(t)===Kt)return[];var e=Ve(t);return[Qe(t),e,Qe(e)]}(g):[Ve(g)]),v=[g].concat(b).reduce((function(t,i){return t.concat(be(i)===Kt?ni(e,{placement:i,boundary:h,rootBoundary:d,padding:c,flipVariations:p,allowedAutoPlacements:m}):i)}),[]),y=e.rects.reference,w=e.rects.popper,A=new Map,E=!0,T=v[0],C=0;C=0,S=L?"width":"height",D=ii(e,{placement:O,boundary:h,rootBoundary:d,altBoundary:u,padding:c}),$=L?k?qt:Vt:k?Rt:zt;y[S]>w[S]&&($=Ve($));var I=Ve($),N=[];if(o&&N.push(D[x]<=0),a&&N.push(D[$]<=0,D[I]<=0),N.every((function(t){return t}))){T=O,E=!1;break}A.set(O,N)}if(E)for(var P=function(t){var e=v.find((function(e){var i=A.get(e);if(i)return i.slice(0,t).every((function(t){return t}))}));if(e)return T=e,"break"},M=p?3:1;M>0&&"break"!==P(M);M--);e.placement!==T&&(e.modifiersData[n]._skip=!0,e.placement=T,e.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function oi(t,e,i){return void 0===i&&(i={x:0,y:0}),{top:t.top-e.height-i.y,right:t.right-e.width+i.x,bottom:t.bottom-e.height+i.y,left:t.left-e.width-i.x}}function ri(t){return[zt,qt,Rt,Vt].some((function(e){return t[e]>=0}))}const ai={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(t){var e=t.state,i=t.name,n=e.rects.reference,s=e.rects.popper,o=e.modifiersData.preventOverflow,r=ii(e,{elementContext:"reference"}),a=ii(e,{altBoundary:!0}),l=oi(r,n),c=oi(a,s,o),h=ri(l),d=ri(c);e.modifiersData[i]={referenceClippingOffsets:l,popperEscapeOffsets:c,isReferenceHidden:h,hasPopperEscaped:d},e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-reference-hidden":h,"data-popper-escaped":d})}},li={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:function(t){var e=t.state,i=t.options,n=t.name,s=i.offset,o=void 0===s?[0,0]:s,r=ee.reduce((function(t,i){return t[i]=function(t,e,i){var n=be(t),s=[Vt,zt].indexOf(n)>=0?-1:1,o="function"==typeof i?i(Object.assign({},e,{placement:t})):i,r=o[0],a=o[1];return r=r||0,a=(a||0)*s,[Vt,qt].indexOf(n)>=0?{x:a,y:r}:{x:r,y:a}}(i,e.rects,o),t}),{}),a=r[e.placement],l=a.x,c=a.y;null!=e.modifiersData.popperOffsets&&(e.modifiersData.popperOffsets.x+=l,e.modifiersData.popperOffsets.y+=c),e.modifiersData[n]=r}},ci={name:"popperOffsets",enabled:!0,phase:"read",fn:function(t){var e=t.state,i=t.name;e.modifiersData[i]=ei({reference:e.rects.reference,element:e.rects.popper,strategy:"absolute",placement:e.placement})},data:{}},hi={name:"preventOverflow",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,n=t.name,s=i.mainAxis,o=void 0===s||s,r=i.altAxis,a=void 0!==r&&r,l=i.boundary,c=i.rootBoundary,h=i.altBoundary,d=i.padding,u=i.tether,f=void 0===u||u,p=i.tetherOffset,m=void 0===p?0:p,g=ii(e,{boundary:l,rootBoundary:c,padding:d,altBoundary:h}),_=be(e.placement),b=Fe(e.placement),v=!b,y=Ie(_),w="x"===y?"y":"x",A=e.modifiersData.popperOffsets,E=e.rects.reference,T=e.rects.popper,C="function"==typeof m?m(Object.assign({},e.rects,{placement:e.placement})):m,O="number"==typeof C?{mainAxis:C,altAxis:C}:Object.assign({mainAxis:0,altAxis:0},C),x=e.modifiersData.offset?e.modifiersData.offset[e.placement]:null,k={x:0,y:0};if(A){if(o){var L,S="y"===y?zt:Vt,D="y"===y?Rt:qt,$="y"===y?"height":"width",I=A[y],N=I+g[S],P=I-g[D],M=f?-T[$]/2:0,j=b===Xt?E[$]:T[$],F=b===Xt?-T[$]:-E[$],H=e.elements.arrow,W=f&&H?Ce(H):{width:0,height:0},B=e.modifiersData["arrow#persistent"]?e.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},z=B[S],R=B[D],q=Ne(0,E[$],W[$]),V=v?E[$]/2-M-q-z-O.mainAxis:j-q-z-O.mainAxis,K=v?-E[$]/2+M+q+R+O.mainAxis:F+q+R+O.mainAxis,Q=e.elements.arrow&&$e(e.elements.arrow),X=Q?"y"===y?Q.clientTop||0:Q.clientLeft||0:0,Y=null!=(L=null==x?void 0:x[y])?L:0,U=I+K-Y,G=Ne(f?ye(N,I+V-Y-X):N,I,f?ve(P,U):P);A[y]=G,k[y]=G-I}if(a){var J,Z="x"===y?zt:Vt,tt="x"===y?Rt:qt,et=A[w],it="y"===w?"height":"width",nt=et+g[Z],st=et-g[tt],ot=-1!==[zt,Vt].indexOf(_),rt=null!=(J=null==x?void 0:x[w])?J:0,at=ot?nt:et-E[it]-T[it]-rt+O.altAxis,lt=ot?et+E[it]+T[it]-rt-O.altAxis:st,ct=f&&ot?function(t,e,i){var n=Ne(t,e,i);return n>i?i:n}(at,et,lt):Ne(f?at:nt,et,f?lt:st);A[w]=ct,k[w]=ct-et}e.modifiersData[n]=k}},requiresIfExists:["offset"]};function di(t,e,i){void 0===i&&(i=!1);var n,s,o=me(e),r=me(e)&&function(t){var e=t.getBoundingClientRect(),i=we(e.width)/t.offsetWidth||1,n=we(e.height)/t.offsetHeight||1;return 1!==i||1!==n}(e),a=Le(e),l=Te(t,r,i),c={scrollLeft:0,scrollTop:0},h={x:0,y:0};return(o||!o&&!i)&&(("body"!==ue(e)||Ue(a))&&(c=(n=e)!==fe(n)&&me(n)?{scrollLeft:(s=n).scrollLeft,scrollTop:s.scrollTop}:Xe(n)),me(e)?((h=Te(e,!0)).x+=e.clientLeft,h.y+=e.clientTop):a&&(h.x=Ye(a))),{x:l.left+c.scrollLeft-h.x,y:l.top+c.scrollTop-h.y,width:l.width,height:l.height}}function ui(t){var e=new Map,i=new Set,n=[];function s(t){i.add(t.name),[].concat(t.requires||[],t.requiresIfExists||[]).forEach((function(t){if(!i.has(t)){var n=e.get(t);n&&s(n)}})),n.push(t)}return t.forEach((function(t){e.set(t.name,t)})),t.forEach((function(t){i.has(t.name)||s(t)})),n}var fi={placement:"bottom",modifiers:[],strategy:"absolute"};function pi(){for(var t=arguments.length,e=new Array(t),i=0;iNumber.parseInt(t,10))):"function"==typeof t?e=>t(e,this._element):t}_getPopperConfig(){const t={placement:this._getPlacement(),modifiers:[{name:"preventOverflow",options:{boundary:this._config.boundary}},{name:"offset",options:{offset:this._getOffset()}}]};return(this._inNavbar||"static"===this._config.display)&&(F.setDataAttribute(this._menu,"popper","static"),t.modifiers=[{name:"applyStyles",enabled:!1}]),{...t,...g(this._config.popperConfig,[t])}}_selectMenuItem({key:t,target:e}){const i=z.find(".dropdown-menu .dropdown-item:not(.disabled):not(:disabled)",this._menu).filter((t=>a(t)));i.length&&b(i,e,t===Ti,!i.includes(e)).focus()}static jQueryInterface(t){return this.each((function(){const e=qi.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}static clearMenus(t){if(2===t.button||"keyup"===t.type&&"Tab"!==t.key)return;const e=z.find(Ni);for(const i of e){const e=qi.getInstance(i);if(!e||!1===e._config.autoClose)continue;const n=t.composedPath(),s=n.includes(e._menu);if(n.includes(e._element)||"inside"===e._config.autoClose&&!s||"outside"===e._config.autoClose&&s)continue;if(e._menu.contains(t.target)&&("keyup"===t.type&&"Tab"===t.key||/input|select|option|textarea|form/i.test(t.target.tagName)))continue;const o={relatedTarget:e._element};"click"===t.type&&(o.clickEvent=t),e._completeHide(o)}}static dataApiKeydownHandler(t){const e=/input|textarea/i.test(t.target.tagName),i="Escape"===t.key,n=[Ei,Ti].includes(t.key);if(!n&&!i)return;if(e&&!i)return;t.preventDefault();const s=this.matches(Ii)?this:z.prev(this,Ii)[0]||z.next(this,Ii)[0]||z.findOne(Ii,t.delegateTarget.parentNode),o=qi.getOrCreateInstance(s);if(n)return t.stopPropagation(),o.show(),void o._selectMenuItem(t);o._isShown()&&(t.stopPropagation(),o.hide(),s.focus())}}N.on(document,Si,Ii,qi.dataApiKeydownHandler),N.on(document,Si,Pi,qi.dataApiKeydownHandler),N.on(document,Li,qi.clearMenus),N.on(document,Di,qi.clearMenus),N.on(document,Li,Ii,(function(t){t.preventDefault(),qi.getOrCreateInstance(this).toggle()})),m(qi);const Vi="backdrop",Ki="show",Qi=`mousedown.bs.${Vi}`,Xi={className:"modal-backdrop",clickCallback:null,isAnimated:!1,isVisible:!0,rootElement:"body"},Yi={className:"string",clickCallback:"(function|null)",isAnimated:"boolean",isVisible:"boolean",rootElement:"(element|string)"};class Ui extends H{constructor(t){super(),this._config=this._getConfig(t),this._isAppended=!1,this._element=null}static get Default(){return Xi}static get DefaultType(){return Yi}static get NAME(){return Vi}show(t){if(!this._config.isVisible)return void g(t);this._append();const e=this._getElement();this._config.isAnimated&&d(e),e.classList.add(Ki),this._emulateAnimation((()=>{g(t)}))}hide(t){this._config.isVisible?(this._getElement().classList.remove(Ki),this._emulateAnimation((()=>{this.dispose(),g(t)}))):g(t)}dispose(){this._isAppended&&(N.off(this._element,Qi),this._element.remove(),this._isAppended=!1)}_getElement(){if(!this._element){const t=document.createElement("div");t.className=this._config.className,this._config.isAnimated&&t.classList.add("fade"),this._element=t}return this._element}_configAfterMerge(t){return t.rootElement=r(t.rootElement),t}_append(){if(this._isAppended)return;const t=this._getElement();this._config.rootElement.append(t),N.on(t,Qi,(()=>{g(this._config.clickCallback)})),this._isAppended=!0}_emulateAnimation(t){_(t,this._getElement(),this._config.isAnimated)}}const Gi=".bs.focustrap",Ji=`focusin${Gi}`,Zi=`keydown.tab${Gi}`,tn="backward",en={autofocus:!0,trapElement:null},nn={autofocus:"boolean",trapElement:"element"};class sn extends H{constructor(t){super(),this._config=this._getConfig(t),this._isActive=!1,this._lastTabNavDirection=null}static get Default(){return en}static get DefaultType(){return nn}static get NAME(){return"focustrap"}activate(){this._isActive||(this._config.autofocus&&this._config.trapElement.focus(),N.off(document,Gi),N.on(document,Ji,(t=>this._handleFocusin(t))),N.on(document,Zi,(t=>this._handleKeydown(t))),this._isActive=!0)}deactivate(){this._isActive&&(this._isActive=!1,N.off(document,Gi))}_handleFocusin(t){const{trapElement:e}=this._config;if(t.target===document||t.target===e||e.contains(t.target))return;const i=z.focusableChildren(e);0===i.length?e.focus():this._lastTabNavDirection===tn?i[i.length-1].focus():i[0].focus()}_handleKeydown(t){"Tab"===t.key&&(this._lastTabNavDirection=t.shiftKey?tn:"forward")}}const on=".fixed-top, .fixed-bottom, .is-fixed, .sticky-top",rn=".sticky-top",an="padding-right",ln="margin-right";class cn{constructor(){this._element=document.body}getWidth(){const t=document.documentElement.clientWidth;return Math.abs(window.innerWidth-t)}hide(){const t=this.getWidth();this._disableOverFlow(),this._setElementAttributes(this._element,an,(e=>e+t)),this._setElementAttributes(on,an,(e=>e+t)),this._setElementAttributes(rn,ln,(e=>e-t))}reset(){this._resetElementAttributes(this._element,"overflow"),this._resetElementAttributes(this._element,an),this._resetElementAttributes(on,an),this._resetElementAttributes(rn,ln)}isOverflowing(){return this.getWidth()>0}_disableOverFlow(){this._saveInitialAttribute(this._element,"overflow"),this._element.style.overflow="hidden"}_setElementAttributes(t,e,i){const n=this.getWidth();this._applyManipulationCallback(t,(t=>{if(t!==this._element&&window.innerWidth>t.clientWidth+n)return;this._saveInitialAttribute(t,e);const s=window.getComputedStyle(t).getPropertyValue(e);t.style.setProperty(e,`${i(Number.parseFloat(s))}px`)}))}_saveInitialAttribute(t,e){const i=t.style.getPropertyValue(e);i&&F.setDataAttribute(t,e,i)}_resetElementAttributes(t,e){this._applyManipulationCallback(t,(t=>{const i=F.getDataAttribute(t,e);null!==i?(F.removeDataAttribute(t,e),t.style.setProperty(e,i)):t.style.removeProperty(e)}))}_applyManipulationCallback(t,e){if(o(t))e(t);else for(const i of z.find(t,this._element))e(i)}}const hn=".bs.modal",dn=`hide${hn}`,un=`hidePrevented${hn}`,fn=`hidden${hn}`,pn=`show${hn}`,mn=`shown${hn}`,gn=`resize${hn}`,_n=`click.dismiss${hn}`,bn=`mousedown.dismiss${hn}`,vn=`keydown.dismiss${hn}`,yn=`click${hn}.data-api`,wn="modal-open",An="show",En="modal-static",Tn={backdrop:!0,focus:!0,keyboard:!0},Cn={backdrop:"(boolean|string)",focus:"boolean",keyboard:"boolean"};class On extends W{constructor(t,e){super(t,e),this._dialog=z.findOne(".modal-dialog",this._element),this._backdrop=this._initializeBackDrop(),this._focustrap=this._initializeFocusTrap(),this._isShown=!1,this._isTransitioning=!1,this._scrollBar=new cn,this._addEventListeners()}static get Default(){return Tn}static get DefaultType(){return Cn}static get NAME(){return"modal"}toggle(t){return this._isShown?this.hide():this.show(t)}show(t){this._isShown||this._isTransitioning||N.trigger(this._element,pn,{relatedTarget:t}).defaultPrevented||(this._isShown=!0,this._isTransitioning=!0,this._scrollBar.hide(),document.body.classList.add(wn),this._adjustDialog(),this._backdrop.show((()=>this._showElement(t))))}hide(){this._isShown&&!this._isTransitioning&&(N.trigger(this._element,dn).defaultPrevented||(this._isShown=!1,this._isTransitioning=!0,this._focustrap.deactivate(),this._element.classList.remove(An),this._queueCallback((()=>this._hideModal()),this._element,this._isAnimated())))}dispose(){N.off(window,hn),N.off(this._dialog,hn),this._backdrop.dispose(),this._focustrap.deactivate(),super.dispose()}handleUpdate(){this._adjustDialog()}_initializeBackDrop(){return new Ui({isVisible:Boolean(this._config.backdrop),isAnimated:this._isAnimated()})}_initializeFocusTrap(){return new sn({trapElement:this._element})}_showElement(t){document.body.contains(this._element)||document.body.append(this._element),this._element.style.display="block",this._element.removeAttribute("aria-hidden"),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),this._element.scrollTop=0;const e=z.findOne(".modal-body",this._dialog);e&&(e.scrollTop=0),d(this._element),this._element.classList.add(An),this._queueCallback((()=>{this._config.focus&&this._focustrap.activate(),this._isTransitioning=!1,N.trigger(this._element,mn,{relatedTarget:t})}),this._dialog,this._isAnimated())}_addEventListeners(){N.on(this._element,vn,(t=>{"Escape"===t.key&&(this._config.keyboard?this.hide():this._triggerBackdropTransition())})),N.on(window,gn,(()=>{this._isShown&&!this._isTransitioning&&this._adjustDialog()})),N.on(this._element,bn,(t=>{N.one(this._element,_n,(e=>{this._element===t.target&&this._element===e.target&&("static"!==this._config.backdrop?this._config.backdrop&&this.hide():this._triggerBackdropTransition())}))}))}_hideModal(){this._element.style.display="none",this._element.setAttribute("aria-hidden",!0),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._isTransitioning=!1,this._backdrop.hide((()=>{document.body.classList.remove(wn),this._resetAdjustments(),this._scrollBar.reset(),N.trigger(this._element,fn)}))}_isAnimated(){return this._element.classList.contains("fade")}_triggerBackdropTransition(){if(N.trigger(this._element,un).defaultPrevented)return;const t=this._element.scrollHeight>document.documentElement.clientHeight,e=this._element.style.overflowY;"hidden"===e||this._element.classList.contains(En)||(t||(this._element.style.overflowY="hidden"),this._element.classList.add(En),this._queueCallback((()=>{this._element.classList.remove(En),this._queueCallback((()=>{this._element.style.overflowY=e}),this._dialog)}),this._dialog),this._element.focus())}_adjustDialog(){const t=this._element.scrollHeight>document.documentElement.clientHeight,e=this._scrollBar.getWidth(),i=e>0;if(i&&!t){const t=p()?"paddingLeft":"paddingRight";this._element.style[t]=`${e}px`}if(!i&&t){const t=p()?"paddingRight":"paddingLeft";this._element.style[t]=`${e}px`}}_resetAdjustments(){this._element.style.paddingLeft="",this._element.style.paddingRight=""}static jQueryInterface(t,e){return this.each((function(){const i=On.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===i[t])throw new TypeError(`No method named "${t}"`);i[t](e)}}))}}N.on(document,yn,'[data-bs-toggle="modal"]',(function(t){const e=z.getElementFromSelector(this);["A","AREA"].includes(this.tagName)&&t.preventDefault(),N.one(e,pn,(t=>{t.defaultPrevented||N.one(e,fn,(()=>{a(this)&&this.focus()}))}));const i=z.findOne(".modal.show");i&&On.getInstance(i).hide(),On.getOrCreateInstance(e).toggle(this)})),R(On),m(On);const xn=".bs.offcanvas",kn=".data-api",Ln=`load${xn}${kn}`,Sn="show",Dn="showing",$n="hiding",In=".offcanvas.show",Nn=`show${xn}`,Pn=`shown${xn}`,Mn=`hide${xn}`,jn=`hidePrevented${xn}`,Fn=`hidden${xn}`,Hn=`resize${xn}`,Wn=`click${xn}${kn}`,Bn=`keydown.dismiss${xn}`,zn={backdrop:!0,keyboard:!0,scroll:!1},Rn={backdrop:"(boolean|string)",keyboard:"boolean",scroll:"boolean"};class qn extends W{constructor(t,e){super(t,e),this._isShown=!1,this._backdrop=this._initializeBackDrop(),this._focustrap=this._initializeFocusTrap(),this._addEventListeners()}static get Default(){return zn}static get DefaultType(){return Rn}static get NAME(){return"offcanvas"}toggle(t){return this._isShown?this.hide():this.show(t)}show(t){this._isShown||N.trigger(this._element,Nn,{relatedTarget:t}).defaultPrevented||(this._isShown=!0,this._backdrop.show(),this._config.scroll||(new cn).hide(),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),this._element.classList.add(Dn),this._queueCallback((()=>{this._config.scroll&&!this._config.backdrop||this._focustrap.activate(),this._element.classList.add(Sn),this._element.classList.remove(Dn),N.trigger(this._element,Pn,{relatedTarget:t})}),this._element,!0))}hide(){this._isShown&&(N.trigger(this._element,Mn).defaultPrevented||(this._focustrap.deactivate(),this._element.blur(),this._isShown=!1,this._element.classList.add($n),this._backdrop.hide(),this._queueCallback((()=>{this._element.classList.remove(Sn,$n),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._config.scroll||(new cn).reset(),N.trigger(this._element,Fn)}),this._element,!0)))}dispose(){this._backdrop.dispose(),this._focustrap.deactivate(),super.dispose()}_initializeBackDrop(){const t=Boolean(this._config.backdrop);return new Ui({className:"offcanvas-backdrop",isVisible:t,isAnimated:!0,rootElement:this._element.parentNode,clickCallback:t?()=>{"static"!==this._config.backdrop?this.hide():N.trigger(this._element,jn)}:null})}_initializeFocusTrap(){return new sn({trapElement:this._element})}_addEventListeners(){N.on(this._element,Bn,(t=>{"Escape"===t.key&&(this._config.keyboard?this.hide():N.trigger(this._element,jn))}))}static jQueryInterface(t){return this.each((function(){const e=qn.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}N.on(document,Wn,'[data-bs-toggle="offcanvas"]',(function(t){const e=z.getElementFromSelector(this);if(["A","AREA"].includes(this.tagName)&&t.preventDefault(),l(this))return;N.one(e,Fn,(()=>{a(this)&&this.focus()}));const i=z.findOne(In);i&&i!==e&&qn.getInstance(i).hide(),qn.getOrCreateInstance(e).toggle(this)})),N.on(window,Ln,(()=>{for(const t of z.find(In))qn.getOrCreateInstance(t).show()})),N.on(window,Hn,(()=>{for(const t of z.find("[aria-modal][class*=show][class*=offcanvas-]"))"fixed"!==getComputedStyle(t).position&&qn.getOrCreateInstance(t).hide()})),R(qn),m(qn);const Vn={"*":["class","dir","id","lang","role",/^aria-[\w-]*$/i],a:["target","href","title","rel"],area:[],b:[],br:[],col:[],code:[],div:[],em:[],hr:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],i:[],img:["src","srcset","alt","title","width","height"],li:[],ol:[],p:[],pre:[],s:[],small:[],span:[],sub:[],sup:[],strong:[],u:[],ul:[]},Kn=new Set(["background","cite","href","itemtype","longdesc","poster","src","xlink:href"]),Qn=/^(?!javascript:)(?:[a-z0-9+.-]+:|[^&:/?#]*(?:[/?#]|$))/i,Xn=(t,e)=>{const i=t.nodeName.toLowerCase();return e.includes(i)?!Kn.has(i)||Boolean(Qn.test(t.nodeValue)):e.filter((t=>t instanceof RegExp)).some((t=>t.test(i)))},Yn={allowList:Vn,content:{},extraClass:"",html:!1,sanitize:!0,sanitizeFn:null,template:"
"},Un={allowList:"object",content:"object",extraClass:"(string|function)",html:"boolean",sanitize:"boolean",sanitizeFn:"(null|function)",template:"string"},Gn={entry:"(string|element|function|null)",selector:"(string|element)"};class Jn extends H{constructor(t){super(),this._config=this._getConfig(t)}static get Default(){return Yn}static get DefaultType(){return Un}static get NAME(){return"TemplateFactory"}getContent(){return Object.values(this._config.content).map((t=>this._resolvePossibleFunction(t))).filter(Boolean)}hasContent(){return this.getContent().length>0}changeContent(t){return this._checkContent(t),this._config.content={...this._config.content,...t},this}toHtml(){const t=document.createElement("div");t.innerHTML=this._maybeSanitize(this._config.template);for(const[e,i]of Object.entries(this._config.content))this._setContent(t,i,e);const e=t.children[0],i=this._resolvePossibleFunction(this._config.extraClass);return i&&e.classList.add(...i.split(" ")),e}_typeCheckConfig(t){super._typeCheckConfig(t),this._checkContent(t.content)}_checkContent(t){for(const[e,i]of Object.entries(t))super._typeCheckConfig({selector:e,entry:i},Gn)}_setContent(t,e,i){const n=z.findOne(i,t);n&&((e=this._resolvePossibleFunction(e))?o(e)?this._putElementInTemplate(r(e),n):this._config.html?n.innerHTML=this._maybeSanitize(e):n.textContent=e:n.remove())}_maybeSanitize(t){return this._config.sanitize?function(t,e,i){if(!t.length)return t;if(i&&"function"==typeof i)return i(t);const n=(new window.DOMParser).parseFromString(t,"text/html"),s=[].concat(...n.body.querySelectorAll("*"));for(const t of s){const i=t.nodeName.toLowerCase();if(!Object.keys(e).includes(i)){t.remove();continue}const n=[].concat(...t.attributes),s=[].concat(e["*"]||[],e[i]||[]);for(const e of n)Xn(e,s)||t.removeAttribute(e.nodeName)}return n.body.innerHTML}(t,this._config.allowList,this._config.sanitizeFn):t}_resolvePossibleFunction(t){return g(t,[this])}_putElementInTemplate(t,e){if(this._config.html)return e.innerHTML="",void e.append(t);e.textContent=t.textContent}}const Zn=new Set(["sanitize","allowList","sanitizeFn"]),ts="fade",es="show",is=".modal",ns="hide.bs.modal",ss="hover",os="focus",rs={AUTO:"auto",TOP:"top",RIGHT:p()?"left":"right",BOTTOM:"bottom",LEFT:p()?"right":"left"},as={allowList:Vn,animation:!0,boundary:"clippingParents",container:!1,customClass:"",delay:0,fallbackPlacements:["top","right","bottom","left"],html:!1,offset:[0,6],placement:"top",popperConfig:null,sanitize:!0,sanitizeFn:null,selector:!1,template:'',title:"",trigger:"hover focus"},ls={allowList:"object",animation:"boolean",boundary:"(string|element)",container:"(string|element|boolean)",customClass:"(string|function)",delay:"(number|object)",fallbackPlacements:"array",html:"boolean",offset:"(array|string|function)",placement:"(string|function)",popperConfig:"(null|object|function)",sanitize:"boolean",sanitizeFn:"(null|function)",selector:"(string|boolean)",template:"string",title:"(string|element|function)",trigger:"string"};class cs extends W{constructor(t,e){if(void 0===vi)throw new TypeError("Bootstrap's tooltips require Popper (https://popper.js.org)");super(t,e),this._isEnabled=!0,this._timeout=0,this._isHovered=null,this._activeTrigger={},this._popper=null,this._templateFactory=null,this._newContent=null,this.tip=null,this._setListeners(),this._config.selector||this._fixTitle()}static get Default(){return as}static get DefaultType(){return ls}static get NAME(){return"tooltip"}enable(){this._isEnabled=!0}disable(){this._isEnabled=!1}toggleEnabled(){this._isEnabled=!this._isEnabled}toggle(){this._isEnabled&&(this._activeTrigger.click=!this._activeTrigger.click,this._isShown()?this._leave():this._enter())}dispose(){clearTimeout(this._timeout),N.off(this._element.closest(is),ns,this._hideModalHandler),this._element.getAttribute("data-bs-original-title")&&this._element.setAttribute("title",this._element.getAttribute("data-bs-original-title")),this._disposePopper(),super.dispose()}show(){if("none"===this._element.style.display)throw new Error("Please use show on visible elements");if(!this._isWithContent()||!this._isEnabled)return;const t=N.trigger(this._element,this.constructor.eventName("show")),e=(c(this._element)||this._element.ownerDocument.documentElement).contains(this._element);if(t.defaultPrevented||!e)return;this._disposePopper();const i=this._getTipElement();this._element.setAttribute("aria-describedby",i.getAttribute("id"));const{container:n}=this._config;if(this._element.ownerDocument.documentElement.contains(this.tip)||(n.append(i),N.trigger(this._element,this.constructor.eventName("inserted"))),this._popper=this._createPopper(i),i.classList.add(es),"ontouchstart"in document.documentElement)for(const t of[].concat(...document.body.children))N.on(t,"mouseover",h);this._queueCallback((()=>{N.trigger(this._element,this.constructor.eventName("shown")),!1===this._isHovered&&this._leave(),this._isHovered=!1}),this.tip,this._isAnimated())}hide(){if(this._isShown()&&!N.trigger(this._element,this.constructor.eventName("hide")).defaultPrevented){if(this._getTipElement().classList.remove(es),"ontouchstart"in document.documentElement)for(const t of[].concat(...document.body.children))N.off(t,"mouseover",h);this._activeTrigger.click=!1,this._activeTrigger[os]=!1,this._activeTrigger[ss]=!1,this._isHovered=null,this._queueCallback((()=>{this._isWithActiveTrigger()||(this._isHovered||this._disposePopper(),this._element.removeAttribute("aria-describedby"),N.trigger(this._element,this.constructor.eventName("hidden")))}),this.tip,this._isAnimated())}}update(){this._popper&&this._popper.update()}_isWithContent(){return Boolean(this._getTitle())}_getTipElement(){return this.tip||(this.tip=this._createTipElement(this._newContent||this._getContentForTemplate())),this.tip}_createTipElement(t){const e=this._getTemplateFactory(t).toHtml();if(!e)return null;e.classList.remove(ts,es),e.classList.add(`bs-${this.constructor.NAME}-auto`);const i=(t=>{do{t+=Math.floor(1e6*Math.random())}while(document.getElementById(t));return t})(this.constructor.NAME).toString();return e.setAttribute("id",i),this._isAnimated()&&e.classList.add(ts),e}setContent(t){this._newContent=t,this._isShown()&&(this._disposePopper(),this.show())}_getTemplateFactory(t){return this._templateFactory?this._templateFactory.changeContent(t):this._templateFactory=new Jn({...this._config,content:t,extraClass:this._resolvePossibleFunction(this._config.customClass)}),this._templateFactory}_getContentForTemplate(){return{".tooltip-inner":this._getTitle()}}_getTitle(){return this._resolvePossibleFunction(this._config.title)||this._element.getAttribute("data-bs-original-title")}_initializeOnDelegatedTarget(t){return this.constructor.getOrCreateInstance(t.delegateTarget,this._getDelegateConfig())}_isAnimated(){return this._config.animation||this.tip&&this.tip.classList.contains(ts)}_isShown(){return this.tip&&this.tip.classList.contains(es)}_createPopper(t){const e=g(this._config.placement,[this,t,this._element]),i=rs[e.toUpperCase()];return bi(this._element,t,this._getPopperConfig(i))}_getOffset(){const{offset:t}=this._config;return"string"==typeof t?t.split(",").map((t=>Number.parseInt(t,10))):"function"==typeof t?e=>t(e,this._element):t}_resolvePossibleFunction(t){return g(t,[this._element])}_getPopperConfig(t){const e={placement:t,modifiers:[{name:"flip",options:{fallbackPlacements:this._config.fallbackPlacements}},{name:"offset",options:{offset:this._getOffset()}},{name:"preventOverflow",options:{boundary:this._config.boundary}},{name:"arrow",options:{element:`.${this.constructor.NAME}-arrow`}},{name:"preSetPlacement",enabled:!0,phase:"beforeMain",fn:t=>{this._getTipElement().setAttribute("data-popper-placement",t.state.placement)}}]};return{...e,...g(this._config.popperConfig,[e])}}_setListeners(){const t=this._config.trigger.split(" ");for(const e of t)if("click"===e)N.on(this._element,this.constructor.eventName("click"),this._config.selector,(t=>{this._initializeOnDelegatedTarget(t).toggle()}));else if("manual"!==e){const t=e===ss?this.constructor.eventName("mouseenter"):this.constructor.eventName("focusin"),i=e===ss?this.constructor.eventName("mouseleave"):this.constructor.eventName("focusout");N.on(this._element,t,this._config.selector,(t=>{const e=this._initializeOnDelegatedTarget(t);e._activeTrigger["focusin"===t.type?os:ss]=!0,e._enter()})),N.on(this._element,i,this._config.selector,(t=>{const e=this._initializeOnDelegatedTarget(t);e._activeTrigger["focusout"===t.type?os:ss]=e._element.contains(t.relatedTarget),e._leave()}))}this._hideModalHandler=()=>{this._element&&this.hide()},N.on(this._element.closest(is),ns,this._hideModalHandler)}_fixTitle(){const t=this._element.getAttribute("title");t&&(this._element.getAttribute("aria-label")||this._element.textContent.trim()||this._element.setAttribute("aria-label",t),this._element.setAttribute("data-bs-original-title",t),this._element.removeAttribute("title"))}_enter(){this._isShown()||this._isHovered?this._isHovered=!0:(this._isHovered=!0,this._setTimeout((()=>{this._isHovered&&this.show()}),this._config.delay.show))}_leave(){this._isWithActiveTrigger()||(this._isHovered=!1,this._setTimeout((()=>{this._isHovered||this.hide()}),this._config.delay.hide))}_setTimeout(t,e){clearTimeout(this._timeout),this._timeout=setTimeout(t,e)}_isWithActiveTrigger(){return Object.values(this._activeTrigger).includes(!0)}_getConfig(t){const e=F.getDataAttributes(this._element);for(const t of Object.keys(e))Zn.has(t)&&delete e[t];return t={...e,..."object"==typeof t&&t?t:{}},t=this._mergeConfigObj(t),t=this._configAfterMerge(t),this._typeCheckConfig(t),t}_configAfterMerge(t){return t.container=!1===t.container?document.body:r(t.container),"number"==typeof t.delay&&(t.delay={show:t.delay,hide:t.delay}),"number"==typeof t.title&&(t.title=t.title.toString()),"number"==typeof t.content&&(t.content=t.content.toString()),t}_getDelegateConfig(){const t={};for(const[e,i]of Object.entries(this._config))this.constructor.Default[e]!==i&&(t[e]=i);return t.selector=!1,t.trigger="manual",t}_disposePopper(){this._popper&&(this._popper.destroy(),this._popper=null),this.tip&&(this.tip.remove(),this.tip=null)}static jQueryInterface(t){return this.each((function(){const e=cs.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}m(cs);const hs={...cs.Default,content:"",offset:[0,8],placement:"right",template:'',trigger:"click"},ds={...cs.DefaultType,content:"(null|string|element|function)"};class us extends cs{static get Default(){return hs}static get DefaultType(){return ds}static get NAME(){return"popover"}_isWithContent(){return this._getTitle()||this._getContent()}_getContentForTemplate(){return{".popover-header":this._getTitle(),".popover-body":this._getContent()}}_getContent(){return this._resolvePossibleFunction(this._config.content)}static jQueryInterface(t){return this.each((function(){const e=us.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}m(us);const fs=".bs.scrollspy",ps=`activate${fs}`,ms=`click${fs}`,gs=`load${fs}.data-api`,_s="active",bs="[href]",vs=".nav-link",ys=`${vs}, .nav-item > ${vs}, .list-group-item`,ws={offset:null,rootMargin:"0px 0px -25%",smoothScroll:!1,target:null,threshold:[.1,.5,1]},As={offset:"(number|null)",rootMargin:"string",smoothScroll:"boolean",target:"element",threshold:"array"};class Es extends W{constructor(t,e){super(t,e),this._targetLinks=new Map,this._observableSections=new Map,this._rootElement="visible"===getComputedStyle(this._element).overflowY?null:this._element,this._activeTarget=null,this._observer=null,this._previousScrollData={visibleEntryTop:0,parentScrollTop:0},this.refresh()}static get Default(){return ws}static get DefaultType(){return As}static get NAME(){return"scrollspy"}refresh(){this._initializeTargetsAndObservables(),this._maybeEnableSmoothScroll(),this._observer?this._observer.disconnect():this._observer=this._getNewObserver();for(const t of this._observableSections.values())this._observer.observe(t)}dispose(){this._observer.disconnect(),super.dispose()}_configAfterMerge(t){return t.target=r(t.target)||document.body,t.rootMargin=t.offset?`${t.offset}px 0px -30%`:t.rootMargin,"string"==typeof t.threshold&&(t.threshold=t.threshold.split(",").map((t=>Number.parseFloat(t)))),t}_maybeEnableSmoothScroll(){this._config.smoothScroll&&(N.off(this._config.target,ms),N.on(this._config.target,ms,bs,(t=>{const e=this._observableSections.get(t.target.hash);if(e){t.preventDefault();const i=this._rootElement||window,n=e.offsetTop-this._element.offsetTop;if(i.scrollTo)return void i.scrollTo({top:n,behavior:"smooth"});i.scrollTop=n}})))}_getNewObserver(){const t={root:this._rootElement,threshold:this._config.threshold,rootMargin:this._config.rootMargin};return new IntersectionObserver((t=>this._observerCallback(t)),t)}_observerCallback(t){const e=t=>this._targetLinks.get(`#${t.target.id}`),i=t=>{this._previousScrollData.visibleEntryTop=t.target.offsetTop,this._process(e(t))},n=(this._rootElement||document.documentElement).scrollTop,s=n>=this._previousScrollData.parentScrollTop;this._previousScrollData.parentScrollTop=n;for(const o of t){if(!o.isIntersecting){this._activeTarget=null,this._clearActiveClass(e(o));continue}const t=o.target.offsetTop>=this._previousScrollData.visibleEntryTop;if(s&&t){if(i(o),!n)return}else s||t||i(o)}}_initializeTargetsAndObservables(){this._targetLinks=new Map,this._observableSections=new Map;const t=z.find(bs,this._config.target);for(const e of t){if(!e.hash||l(e))continue;const t=z.findOne(decodeURI(e.hash),this._element);a(t)&&(this._targetLinks.set(decodeURI(e.hash),e),this._observableSections.set(e.hash,t))}}_process(t){this._activeTarget!==t&&(this._clearActiveClass(this._config.target),this._activeTarget=t,t.classList.add(_s),this._activateParents(t),N.trigger(this._element,ps,{relatedTarget:t}))}_activateParents(t){if(t.classList.contains("dropdown-item"))z.findOne(".dropdown-toggle",t.closest(".dropdown")).classList.add(_s);else for(const e of z.parents(t,".nav, .list-group"))for(const t of z.prev(e,ys))t.classList.add(_s)}_clearActiveClass(t){t.classList.remove(_s);const e=z.find(`${bs}.${_s}`,t);for(const t of e)t.classList.remove(_s)}static jQueryInterface(t){return this.each((function(){const e=Es.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t]()}}))}}N.on(window,gs,(()=>{for(const t of z.find('[data-bs-spy="scroll"]'))Es.getOrCreateInstance(t)})),m(Es);const Ts=".bs.tab",Cs=`hide${Ts}`,Os=`hidden${Ts}`,xs=`show${Ts}`,ks=`shown${Ts}`,Ls=`click${Ts}`,Ss=`keydown${Ts}`,Ds=`load${Ts}`,$s="ArrowLeft",Is="ArrowRight",Ns="ArrowUp",Ps="ArrowDown",Ms="Home",js="End",Fs="active",Hs="fade",Ws="show",Bs=":not(.dropdown-toggle)",zs='[data-bs-toggle="tab"], [data-bs-toggle="pill"], [data-bs-toggle="list"]',Rs=`.nav-link${Bs}, .list-group-item${Bs}, [role="tab"]${Bs}, ${zs}`,qs=`.${Fs}[data-bs-toggle="tab"], .${Fs}[data-bs-toggle="pill"], .${Fs}[data-bs-toggle="list"]`;class Vs extends W{constructor(t){super(t),this._parent=this._element.closest('.list-group, .nav, [role="tablist"]'),this._parent&&(this._setInitialAttributes(this._parent,this._getChildren()),N.on(this._element,Ss,(t=>this._keydown(t))))}static get NAME(){return"tab"}show(){const t=this._element;if(this._elemIsActive(t))return;const e=this._getActiveElem(),i=e?N.trigger(e,Cs,{relatedTarget:t}):null;N.trigger(t,xs,{relatedTarget:e}).defaultPrevented||i&&i.defaultPrevented||(this._deactivate(e,t),this._activate(t,e))}_activate(t,e){t&&(t.classList.add(Fs),this._activate(z.getElementFromSelector(t)),this._queueCallback((()=>{"tab"===t.getAttribute("role")?(t.removeAttribute("tabindex"),t.setAttribute("aria-selected",!0),this._toggleDropDown(t,!0),N.trigger(t,ks,{relatedTarget:e})):t.classList.add(Ws)}),t,t.classList.contains(Hs)))}_deactivate(t,e){t&&(t.classList.remove(Fs),t.blur(),this._deactivate(z.getElementFromSelector(t)),this._queueCallback((()=>{"tab"===t.getAttribute("role")?(t.setAttribute("aria-selected",!1),t.setAttribute("tabindex","-1"),this._toggleDropDown(t,!1),N.trigger(t,Os,{relatedTarget:e})):t.classList.remove(Ws)}),t,t.classList.contains(Hs)))}_keydown(t){if(![$s,Is,Ns,Ps,Ms,js].includes(t.key))return;t.stopPropagation(),t.preventDefault();const e=this._getChildren().filter((t=>!l(t)));let i;if([Ms,js].includes(t.key))i=e[t.key===Ms?0:e.length-1];else{const n=[Is,Ps].includes(t.key);i=b(e,t.target,n,!0)}i&&(i.focus({preventScroll:!0}),Vs.getOrCreateInstance(i).show())}_getChildren(){return z.find(Rs,this._parent)}_getActiveElem(){return this._getChildren().find((t=>this._elemIsActive(t)))||null}_setInitialAttributes(t,e){this._setAttributeIfNotExists(t,"role","tablist");for(const t of e)this._setInitialAttributesOnChild(t)}_setInitialAttributesOnChild(t){t=this._getInnerElement(t);const e=this._elemIsActive(t),i=this._getOuterElement(t);t.setAttribute("aria-selected",e),i!==t&&this._setAttributeIfNotExists(i,"role","presentation"),e||t.setAttribute("tabindex","-1"),this._setAttributeIfNotExists(t,"role","tab"),this._setInitialAttributesOnTargetPanel(t)}_setInitialAttributesOnTargetPanel(t){const e=z.getElementFromSelector(t);e&&(this._setAttributeIfNotExists(e,"role","tabpanel"),t.id&&this._setAttributeIfNotExists(e,"aria-labelledby",`${t.id}`))}_toggleDropDown(t,e){const i=this._getOuterElement(t);if(!i.classList.contains("dropdown"))return;const n=(t,n)=>{const s=z.findOne(t,i);s&&s.classList.toggle(n,e)};n(".dropdown-toggle",Fs),n(".dropdown-menu",Ws),i.setAttribute("aria-expanded",e)}_setAttributeIfNotExists(t,e,i){t.hasAttribute(e)||t.setAttribute(e,i)}_elemIsActive(t){return t.classList.contains(Fs)}_getInnerElement(t){return t.matches(Rs)?t:z.findOne(Rs,t)}_getOuterElement(t){return t.closest(".nav-item, .list-group-item")||t}static jQueryInterface(t){return this.each((function(){const e=Vs.getOrCreateInstance(this);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t]()}}))}}N.on(document,Ls,zs,(function(t){["A","AREA"].includes(this.tagName)&&t.preventDefault(),l(this)||Vs.getOrCreateInstance(this).show()})),N.on(window,Ds,(()=>{for(const t of z.find(qs))Vs.getOrCreateInstance(t)})),m(Vs);const Ks=".bs.toast",Qs=`mouseover${Ks}`,Xs=`mouseout${Ks}`,Ys=`focusin${Ks}`,Us=`focusout${Ks}`,Gs=`hide${Ks}`,Js=`hidden${Ks}`,Zs=`show${Ks}`,to=`shown${Ks}`,eo="hide",io="show",no="showing",so={animation:"boolean",autohide:"boolean",delay:"number"},oo={animation:!0,autohide:!0,delay:5e3};class ro extends W{constructor(t,e){super(t,e),this._timeout=null,this._hasMouseInteraction=!1,this._hasKeyboardInteraction=!1,this._setListeners()}static get Default(){return oo}static get DefaultType(){return so}static get NAME(){return"toast"}show(){N.trigger(this._element,Zs).defaultPrevented||(this._clearTimeout(),this._config.animation&&this._element.classList.add("fade"),this._element.classList.remove(eo),d(this._element),this._element.classList.add(io,no),this._queueCallback((()=>{this._element.classList.remove(no),N.trigger(this._element,to),this._maybeScheduleHide()}),this._element,this._config.animation))}hide(){this.isShown()&&(N.trigger(this._element,Gs).defaultPrevented||(this._element.classList.add(no),this._queueCallback((()=>{this._element.classList.add(eo),this._element.classList.remove(no,io),N.trigger(this._element,Js)}),this._element,this._config.animation)))}dispose(){this._clearTimeout(),this.isShown()&&this._element.classList.remove(io),super.dispose()}isShown(){return this._element.classList.contains(io)}_maybeScheduleHide(){this._config.autohide&&(this._hasMouseInteraction||this._hasKeyboardInteraction||(this._timeout=setTimeout((()=>{this.hide()}),this._config.delay)))}_onInteraction(t,e){switch(t.type){case"mouseover":case"mouseout":this._hasMouseInteraction=e;break;case"focusin":case"focusout":this._hasKeyboardInteraction=e}if(e)return void this._clearTimeout();const i=t.relatedTarget;this._element===i||this._element.contains(i)||this._maybeScheduleHide()}_setListeners(){N.on(this._element,Qs,(t=>this._onInteraction(t,!0))),N.on(this._element,Xs,(t=>this._onInteraction(t,!1))),N.on(this._element,Ys,(t=>this._onInteraction(t,!0))),N.on(this._element,Us,(t=>this._onInteraction(t,!1)))}_clearTimeout(){clearTimeout(this._timeout),this._timeout=null}static jQueryInterface(t){return this.each((function(){const e=ro.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}return R(ro),m(ro),{Alert:Q,Button:Y,Carousel:xt,Collapse:Bt,Dropdown:qi,Modal:On,Offcanvas:qn,Popover:us,ScrollSpy:Es,Tab:Vs,Toast:ro,Tooltip:cs}})); +//# sourceMappingURL=bootstrap.bundle.min.js.map \ No newline at end of file diff --git a/docs/2_39/site_libs/clipboard/clipboard.min.js b/docs/2_39/site_libs/clipboard/clipboard.min.js new file mode 100644 index 000000000..1103f811e --- /dev/null +++ b/docs/2_39/site_libs/clipboard/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=11&&void 0!==arguments[1]?arguments[1]:null,i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null,n=e[s]=e[s]||[],l={all:n,evt:null,found:null};return t&&i&&P(n)>0&&o(n,(function(e,n){if(e.eventName==t&&e.fn.toString()==i.toString())return l.found=!0,l.evt=n,!1})),l}function a(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},i=t.onElement,n=t.withCallback,s=t.avoidDuplicate,l=void 0===s||s,a=t.once,h=void 0!==a&&a,d=t.useCapture,c=void 0!==d&&d,u=arguments.length>2?arguments[2]:void 0,g=i||[];function v(e){T(n)&&n.call(u,e,this),h&&v.destroy()}return C(g)&&(g=document.querySelectorAll(g)),v.destroy=function(){o(g,(function(t){var i=r(t,e,v);i.found&&i.all.splice(i.evt,1),t.removeEventListener&&t.removeEventListener(e,v,c)}))},o(g,(function(t){var i=r(t,e,v);(t.addEventListener&&l&&!i.found||!l)&&(t.addEventListener(e,v,c),i.all.push({eventName:e,fn:v}))})),v}function h(e,t){o(t.split(" "),(function(t){return e.classList.add(t)}))}function d(e,t){o(t.split(" "),(function(t){return e.classList.remove(t)}))}function c(e,t){return e.classList.contains(t)}function u(e,t){for(;e!==document.body;){if(!(e=e.parentElement))return!1;if("function"==typeof e.matches?e.matches(t):e.msMatchesSelector(t))return e}}function g(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"",i=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if(!e||""===t)return!1;if("none"===t)return T(i)&&i(),!1;var n=x(),s=t.split(" ");o(s,(function(t){h(e,"g"+t)})),a(n,{onElement:e,avoidDuplicate:!1,once:!0,withCallback:function(e,t){o(s,(function(e){d(t,"g"+e)})),T(i)&&i()}})}function v(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"";if(""===t)return e.style.webkitTransform="",e.style.MozTransform="",e.style.msTransform="",e.style.OTransform="",e.style.transform="",!1;e.style.webkitTransform=t,e.style.MozTransform=t,e.style.msTransform=t,e.style.OTransform=t,e.style.transform=t}function f(e){e.style.display="block"}function p(e){e.style.display="none"}function m(e){var t=document.createDocumentFragment(),i=document.createElement("div");for(i.innerHTML=e;i.firstChild;)t.appendChild(i.firstChild);return t}function y(){return{width:window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth,height:window.innerHeight||document.documentElement.clientHeight||document.body.clientHeight}}function x(){var e,t=document.createElement("fakeelement"),i={animation:"animationend",OAnimation:"oAnimationEnd",MozAnimation:"animationend",WebkitAnimation:"webkitAnimationEnd"};for(e in i)if(void 0!==t.style[e])return i[e]}function b(e,t,i,n){if(e())t();else{var s;i||(i=100);var l=setInterval((function(){e()&&(clearInterval(l),s&&clearTimeout(s),t())}),i);n&&(s=setTimeout((function(){clearInterval(l)}),n))}}function S(e,t,i){if(I(e))console.error("Inject assets error");else if(T(t)&&(i=t,t=!1),C(t)&&t in window)T(i)&&i();else{var n;if(-1!==e.indexOf(".css")){if((n=document.querySelectorAll('link[href="'+e+'"]'))&&n.length>0)return void(T(i)&&i());var s=document.getElementsByTagName("head")[0],l=s.querySelectorAll('link[rel="stylesheet"]'),o=document.createElement("link");return o.rel="stylesheet",o.type="text/css",o.href=e,o.media="all",l?s.insertBefore(o,l[0]):s.appendChild(o),void(T(i)&&i())}if((n=document.querySelectorAll('script[src="'+e+'"]'))&&n.length>0){if(T(i)){if(C(t))return b((function(){return void 0!==window[t]}),(function(){i()})),!1;i()}}else{var r=document.createElement("script");r.type="text/javascript",r.src=e,r.onload=function(){if(T(i)){if(C(t))return b((function(){return void 0!==window[t]}),(function(){i()})),!1;i()}},document.body.appendChild(r)}}}function w(){return"navigator"in window&&window.navigator.userAgent.match(/(iPad)|(iPhone)|(iPod)|(Android)|(PlayBook)|(BB10)|(BlackBerry)|(Opera Mini)|(IEMobile)|(webOS)|(MeeGo)/i)}function T(e){return"function"==typeof e}function C(e){return"string"==typeof e}function k(e){return!(!e||!e.nodeType||1!=e.nodeType)}function E(e){return Array.isArray(e)}function A(e){return e&&e.length&&isFinite(e.length)}function L(t){return"object"===e(t)&&null!=t&&!T(t)&&!E(t)}function I(e){return null==e}function O(e,t){return null!==e&&hasOwnProperty.call(e,t)}function P(e){if(L(e)){if(e.keys)return e.keys().length;var t=0;for(var i in e)O(e,i)&&t++;return t}return e.length}function M(e){return!isNaN(parseFloat(e))&&isFinite(e)}function z(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:-1,t=document.querySelectorAll(".gbtn[data-taborder]:not(.disabled)");if(!t.length)return!1;if(1==t.length)return t[0];"string"==typeof e&&(e=parseInt(e));var i=[];o(t,(function(e){i.push(e.getAttribute("data-taborder"))}));var n=Math.max.apply(Math,i.map((function(e){return parseInt(e)}))),s=e<0?1:e+1;s>n&&(s="1");var l=i.filter((function(e){return e>=parseInt(s)})),r=l.sort()[0];return document.querySelector('.gbtn[data-taborder="'.concat(r,'"]'))}function X(e){if(e.events.hasOwnProperty("keyboard"))return!1;e.events.keyboard=a("keydown",{onElement:window,withCallback:function(t,i){var n=(t=t||window.event).keyCode;if(9==n){var s=document.querySelector(".gbtn.focused");if(!s){var l=!(!document.activeElement||!document.activeElement.nodeName)&&document.activeElement.nodeName.toLocaleLowerCase();if("input"==l||"textarea"==l||"button"==l)return}t.preventDefault();var o=document.querySelectorAll(".gbtn[data-taborder]");if(!o||o.length<=0)return;if(!s){var r=z();return void(r&&(r.focus(),h(r,"focused")))}var a=z(s.getAttribute("data-taborder"));d(s,"focused"),a&&(a.focus(),h(a,"focused"))}39==n&&e.nextSlide(),37==n&&e.prevSlide(),27==n&&e.close()}})}function Y(e){return Math.sqrt(e.x*e.x+e.y*e.y)}function q(e,t){var i=function(e,t){var i=Y(e)*Y(t);if(0===i)return 0;var n=function(e,t){return e.x*t.x+e.y*t.y}(e,t)/i;return n>1&&(n=1),Math.acos(n)}(e,t);return function(e,t){return e.x*t.y-t.x*e.y}(e,t)>0&&(i*=-1),180*i/Math.PI}var N=function(){function e(i){t(this,e),this.handlers=[],this.el=i}return n(e,[{key:"add",value:function(e){this.handlers.push(e)}},{key:"del",value:function(e){e||(this.handlers=[]);for(var t=this.handlers.length;t>=0;t--)this.handlers[t]===e&&this.handlers.splice(t,1)}},{key:"dispatch",value:function(){for(var e=0,t=this.handlers.length;e=0)console.log("ignore drag for this touched element",e.target.nodeName.toLowerCase());else{this.now=Date.now(),this.x1=e.touches[0].pageX,this.y1=e.touches[0].pageY,this.delta=this.now-(this.last||this.now),this.touchStart.dispatch(e,this.element),null!==this.preTapPosition.x&&(this.isDoubleTap=this.delta>0&&this.delta<=250&&Math.abs(this.preTapPosition.x-this.x1)<30&&Math.abs(this.preTapPosition.y-this.y1)<30,this.isDoubleTap&&clearTimeout(this.singleTapTimeout)),this.preTapPosition.x=this.x1,this.preTapPosition.y=this.y1,this.last=this.now;var t=this.preV;if(e.touches.length>1){this._cancelLongTap(),this._cancelSingleTap();var i={x:e.touches[1].pageX-this.x1,y:e.touches[1].pageY-this.y1};t.x=i.x,t.y=i.y,this.pinchStartLen=Y(t),this.multipointStart.dispatch(e,this.element)}this._preventTap=!1,this.longTapTimeout=setTimeout(function(){this.longTap.dispatch(e,this.element),this._preventTap=!0}.bind(this),750)}}}},{key:"move",value:function(e){if(e.touches){var t=this.preV,i=e.touches.length,n=e.touches[0].pageX,s=e.touches[0].pageY;if(this.isDoubleTap=!1,i>1){var l=e.touches[1].pageX,o=e.touches[1].pageY,r={x:e.touches[1].pageX-n,y:e.touches[1].pageY-s};null!==t.x&&(this.pinchStartLen>0&&(e.zoom=Y(r)/this.pinchStartLen,this.pinch.dispatch(e,this.element)),e.angle=q(r,t),this.rotate.dispatch(e,this.element)),t.x=r.x,t.y=r.y,null!==this.x2&&null!==this.sx2?(e.deltaX=(n-this.x2+l-this.sx2)/2,e.deltaY=(s-this.y2+o-this.sy2)/2):(e.deltaX=0,e.deltaY=0),this.twoFingerPressMove.dispatch(e,this.element),this.sx2=l,this.sy2=o}else{if(null!==this.x2){e.deltaX=n-this.x2,e.deltaY=s-this.y2;var a=Math.abs(this.x1-this.x2),h=Math.abs(this.y1-this.y2);(a>10||h>10)&&(this._preventTap=!0)}else e.deltaX=0,e.deltaY=0;this.pressMove.dispatch(e,this.element)}this.touchMove.dispatch(e,this.element),this._cancelLongTap(),this.x2=n,this.y2=s,i>1&&e.preventDefault()}}},{key:"end",value:function(e){if(e.changedTouches){this._cancelLongTap();var t=this;e.touches.length<2&&(this.multipointEnd.dispatch(e,this.element),this.sx2=this.sy2=null),this.x2&&Math.abs(this.x1-this.x2)>30||this.y2&&Math.abs(this.y1-this.y2)>30?(e.direction=this._swipeDirection(this.x1,this.x2,this.y1,this.y2),this.swipeTimeout=setTimeout((function(){t.swipe.dispatch(e,t.element)}),0)):(this.tapTimeout=setTimeout((function(){t._preventTap||t.tap.dispatch(e,t.element),t.isDoubleTap&&(t.doubleTap.dispatch(e,t.element),t.isDoubleTap=!1)}),0),t.isDoubleTap||(t.singleTapTimeout=setTimeout((function(){t.singleTap.dispatch(e,t.element)}),250))),this.touchEnd.dispatch(e,this.element),this.preV.x=0,this.preV.y=0,this.zoom=1,this.pinchStartLen=null,this.x1=this.x2=this.y1=this.y2=null}}},{key:"cancelAll",value:function(){this._preventTap=!0,clearTimeout(this.singleTapTimeout),clearTimeout(this.tapTimeout),clearTimeout(this.longTapTimeout),clearTimeout(this.swipeTimeout)}},{key:"cancel",value:function(e){this.cancelAll(),this.touchCancel.dispatch(e,this.element)}},{key:"_cancelLongTap",value:function(){clearTimeout(this.longTapTimeout)}},{key:"_cancelSingleTap",value:function(){clearTimeout(this.singleTapTimeout)}},{key:"_swipeDirection",value:function(e,t,i,n){return Math.abs(e-t)>=Math.abs(i-n)?e-t>0?"Left":"Right":i-n>0?"Up":"Down"}},{key:"on",value:function(e,t){this[e]&&this[e].add(t)}},{key:"off",value:function(e,t){this[e]&&this[e].del(t)}},{key:"destroy",value:function(){return this.singleTapTimeout&&clearTimeout(this.singleTapTimeout),this.tapTimeout&&clearTimeout(this.tapTimeout),this.longTapTimeout&&clearTimeout(this.longTapTimeout),this.swipeTimeout&&clearTimeout(this.swipeTimeout),this.element.removeEventListener("touchstart",this.start),this.element.removeEventListener("touchmove",this.move),this.element.removeEventListener("touchend",this.end),this.element.removeEventListener("touchcancel",this.cancel),this.rotate.del(),this.touchStart.del(),this.multipointStart.del(),this.multipointEnd.del(),this.pinch.del(),this.swipe.del(),this.tap.del(),this.doubleTap.del(),this.longTap.del(),this.singleTap.del(),this.pressMove.del(),this.twoFingerPressMove.del(),this.touchMove.del(),this.touchEnd.del(),this.touchCancel.del(),this.preV=this.pinchStartLen=this.zoom=this.isDoubleTap=this.delta=this.last=this.now=this.tapTimeout=this.singleTapTimeout=this.longTapTimeout=this.swipeTimeout=this.x1=this.x2=this.y1=this.y2=this.preTapPosition=this.rotate=this.touchStart=this.multipointStart=this.multipointEnd=this.pinch=this.swipe=this.tap=this.doubleTap=this.longTap=this.singleTap=this.pressMove=this.touchMove=this.touchEnd=this.touchCancel=this.twoFingerPressMove=null,window.removeEventListener("scroll",this._cancelAllHandler),null}}]),e}();function W(e){var t=function(){var e,t=document.createElement("fakeelement"),i={transition:"transitionend",OTransition:"oTransitionEnd",MozTransition:"transitionend",WebkitTransition:"webkitTransitionEnd"};for(e in i)if(void 0!==t.style[e])return i[e]}(),i=window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth,n=c(e,"gslide-media")?e:e.querySelector(".gslide-media"),s=u(n,".ginner-container"),l=e.querySelector(".gslide-description");i>769&&(n=s),h(n,"greset"),v(n,"translate3d(0, 0, 0)"),a(t,{onElement:n,once:!0,withCallback:function(e,t){d(n,"greset")}}),n.style.opacity="",l&&(l.style.opacity="")}function B(e){if(e.events.hasOwnProperty("touch"))return!1;var t,i,n,s=y(),l=s.width,o=s.height,r=!1,a=null,g=null,f=null,p=!1,m=1,x=1,b=!1,S=!1,w=null,T=null,C=null,k=null,E=0,A=0,L=!1,I=!1,O={},P={},M=0,z=0,X=document.getElementById("glightbox-slider"),Y=document.querySelector(".goverlay"),q=new _(X,{touchStart:function(t){if(r=!0,(c(t.targetTouches[0].target,"ginner-container")||u(t.targetTouches[0].target,".gslide-desc")||"a"==t.targetTouches[0].target.nodeName.toLowerCase())&&(r=!1),u(t.targetTouches[0].target,".gslide-inline")&&!c(t.targetTouches[0].target.parentNode,"gslide-inline")&&(r=!1),r){if(P=t.targetTouches[0],O.pageX=t.targetTouches[0].pageX,O.pageY=t.targetTouches[0].pageY,M=t.targetTouches[0].clientX,z=t.targetTouches[0].clientY,a=e.activeSlide,g=a.querySelector(".gslide-media"),n=a.querySelector(".gslide-inline"),f=null,c(g,"gslide-image")&&(f=g.querySelector("img")),(window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth)>769&&(g=a.querySelector(".ginner-container")),d(Y,"greset"),t.pageX>20&&t.pageXo){var a=O.pageX-P.pageX;if(Math.abs(a)<=13)return!1}p=!0;var h,d=s.targetTouches[0].clientX,c=s.targetTouches[0].clientY,u=M-d,m=z-c;if(Math.abs(u)>Math.abs(m)?(L=!1,I=!0):(I=!1,L=!0),t=P.pageX-O.pageX,E=100*t/l,i=P.pageY-O.pageY,A=100*i/o,L&&f&&(h=1-Math.abs(i)/o,Y.style.opacity=h,e.settings.touchFollowAxis&&(E=0)),I&&(h=1-Math.abs(t)/l,g.style.opacity=h,e.settings.touchFollowAxis&&(A=0)),!f)return v(g,"translate3d(".concat(E,"%, 0, 0)"));v(g,"translate3d(".concat(E,"%, ").concat(A,"%, 0)"))}},touchEnd:function(){if(r){if(p=!1,S||b)return C=w,void(k=T);var t=Math.abs(parseInt(A)),i=Math.abs(parseInt(E));if(!(t>29&&f))return t<29&&i<25?(h(Y,"greset"),Y.style.opacity=1,W(g)):void 0;e.close()}},multipointEnd:function(){setTimeout((function(){b=!1}),50)},multipointStart:function(){b=!0,m=x||1},pinch:function(e){if(!f||p)return!1;b=!0,f.scaleX=f.scaleY=m*e.zoom;var t=m*e.zoom;if(S=!0,t<=1)return S=!1,t=1,k=null,C=null,w=null,T=null,void f.setAttribute("style","");t>4.5&&(t=4.5),f.style.transform="scale3d(".concat(t,", ").concat(t,", 1)"),x=t},pressMove:function(e){if(S&&!b){var t=P.pageX-O.pageX,i=P.pageY-O.pageY;C&&(t+=C),k&&(i+=k),w=t,T=i;var n="translate3d(".concat(t,"px, ").concat(i,"px, 0)");x&&(n+=" scale3d(".concat(x,", ").concat(x,", 1)")),v(f,n)}},swipe:function(t){if(!S)if(b)b=!1;else{if("Left"==t.direction){if(e.index==e.elements.length-1)return W(g);e.nextSlide()}if("Right"==t.direction){if(0==e.index)return W(g);e.prevSlide()}}}});e.events.touch=q}var H=function(){function e(i,n){var s=this,l=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;if(t(this,e),this.img=i,this.slide=n,this.onclose=l,this.img.setZoomEvents)return!1;this.active=!1,this.zoomedIn=!1,this.dragging=!1,this.currentX=null,this.currentY=null,this.initialX=null,this.initialY=null,this.xOffset=0,this.yOffset=0,this.img.addEventListener("mousedown",(function(e){return s.dragStart(e)}),!1),this.img.addEventListener("mouseup",(function(e){return s.dragEnd(e)}),!1),this.img.addEventListener("mousemove",(function(e){return s.drag(e)}),!1),this.img.addEventListener("click",(function(e){return s.slide.classList.contains("dragging-nav")?(s.zoomOut(),!1):s.zoomedIn?void(s.zoomedIn&&!s.dragging&&s.zoomOut()):s.zoomIn()}),!1),this.img.setZoomEvents=!0}return n(e,[{key:"zoomIn",value:function(){var e=this.widowWidth();if(!(this.zoomedIn||e<=768)){var t=this.img;if(t.setAttribute("data-style",t.getAttribute("style")),t.style.maxWidth=t.naturalWidth+"px",t.style.maxHeight=t.naturalHeight+"px",t.naturalWidth>e){var i=e/2-t.naturalWidth/2;this.setTranslate(this.img.parentNode,i,0)}this.slide.classList.add("zoomed"),this.zoomedIn=!0}}},{key:"zoomOut",value:function(){this.img.parentNode.setAttribute("style",""),this.img.setAttribute("style",this.img.getAttribute("data-style")),this.slide.classList.remove("zoomed"),this.zoomedIn=!1,this.currentX=null,this.currentY=null,this.initialX=null,this.initialY=null,this.xOffset=0,this.yOffset=0,this.onclose&&"function"==typeof this.onclose&&this.onclose()}},{key:"dragStart",value:function(e){e.preventDefault(),this.zoomedIn?("touchstart"===e.type?(this.initialX=e.touches[0].clientX-this.xOffset,this.initialY=e.touches[0].clientY-this.yOffset):(this.initialX=e.clientX-this.xOffset,this.initialY=e.clientY-this.yOffset),e.target===this.img&&(this.active=!0,this.img.classList.add("dragging"))):this.active=!1}},{key:"dragEnd",value:function(e){var t=this;e.preventDefault(),this.initialX=this.currentX,this.initialY=this.currentY,this.active=!1,setTimeout((function(){t.dragging=!1,t.img.isDragging=!1,t.img.classList.remove("dragging")}),100)}},{key:"drag",value:function(e){this.active&&(e.preventDefault(),"touchmove"===e.type?(this.currentX=e.touches[0].clientX-this.initialX,this.currentY=e.touches[0].clientY-this.initialY):(this.currentX=e.clientX-this.initialX,this.currentY=e.clientY-this.initialY),this.xOffset=this.currentX,this.yOffset=this.currentY,this.img.isDragging=!0,this.dragging=!0,this.setTranslate(this.img,this.currentX,this.currentY))}},{key:"onMove",value:function(e){if(this.zoomedIn){var t=e.clientX-this.img.naturalWidth/2,i=e.clientY-this.img.naturalHeight/2;this.setTranslate(this.img,t,i)}}},{key:"setTranslate",value:function(e,t,i){e.style.transform="translate3d("+t+"px, "+i+"px, 0)"}},{key:"widowWidth",value:function(){return window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth}}]),e}(),V=function(){function e(){var i=this,n=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};t(this,e);var s=n.dragEl,l=n.toleranceX,o=void 0===l?40:l,r=n.toleranceY,a=void 0===r?65:r,h=n.slide,d=void 0===h?null:h,c=n.instance,u=void 0===c?null:c;this.el=s,this.active=!1,this.dragging=!1,this.currentX=null,this.currentY=null,this.initialX=null,this.initialY=null,this.xOffset=0,this.yOffset=0,this.direction=null,this.lastDirection=null,this.toleranceX=o,this.toleranceY=a,this.toleranceReached=!1,this.dragContainer=this.el,this.slide=d,this.instance=u,this.el.addEventListener("mousedown",(function(e){return i.dragStart(e)}),!1),this.el.addEventListener("mouseup",(function(e){return i.dragEnd(e)}),!1),this.el.addEventListener("mousemove",(function(e){return i.drag(e)}),!1)}return n(e,[{key:"dragStart",value:function(e){if(this.slide.classList.contains("zoomed"))this.active=!1;else{"touchstart"===e.type?(this.initialX=e.touches[0].clientX-this.xOffset,this.initialY=e.touches[0].clientY-this.yOffset):(this.initialX=e.clientX-this.xOffset,this.initialY=e.clientY-this.yOffset);var t=e.target.nodeName.toLowerCase();e.target.classList.contains("nodrag")||u(e.target,".nodrag")||-1!==["input","select","textarea","button","a"].indexOf(t)?this.active=!1:(e.preventDefault(),(e.target===this.el||"img"!==t&&u(e.target,".gslide-inline"))&&(this.active=!0,this.el.classList.add("dragging"),this.dragContainer=u(e.target,".ginner-container")))}}},{key:"dragEnd",value:function(e){var t=this;e&&e.preventDefault(),this.initialX=0,this.initialY=0,this.currentX=null,this.currentY=null,this.initialX=null,this.initialY=null,this.xOffset=0,this.yOffset=0,this.active=!1,this.doSlideChange&&(this.instance.preventOutsideClick=!0,"right"==this.doSlideChange&&this.instance.prevSlide(),"left"==this.doSlideChange&&this.instance.nextSlide()),this.doSlideClose&&this.instance.close(),this.toleranceReached||this.setTranslate(this.dragContainer,0,0,!0),setTimeout((function(){t.instance.preventOutsideClick=!1,t.toleranceReached=!1,t.lastDirection=null,t.dragging=!1,t.el.isDragging=!1,t.el.classList.remove("dragging"),t.slide.classList.remove("dragging-nav"),t.dragContainer.style.transform="",t.dragContainer.style.transition=""}),100)}},{key:"drag",value:function(e){if(this.active){e.preventDefault(),this.slide.classList.add("dragging-nav"),"touchmove"===e.type?(this.currentX=e.touches[0].clientX-this.initialX,this.currentY=e.touches[0].clientY-this.initialY):(this.currentX=e.clientX-this.initialX,this.currentY=e.clientY-this.initialY),this.xOffset=this.currentX,this.yOffset=this.currentY,this.el.isDragging=!0,this.dragging=!0,this.doSlideChange=!1,this.doSlideClose=!1;var t=Math.abs(this.currentX),i=Math.abs(this.currentY);if(t>0&&t>=Math.abs(this.currentY)&&(!this.lastDirection||"x"==this.lastDirection)){this.yOffset=0,this.lastDirection="x",this.setTranslate(this.dragContainer,this.currentX,0);var n=this.shouldChange();if(!this.instance.settings.dragAutoSnap&&n&&(this.doSlideChange=n),this.instance.settings.dragAutoSnap&&n)return this.instance.preventOutsideClick=!0,this.toleranceReached=!0,this.active=!1,this.instance.preventOutsideClick=!0,this.dragEnd(null),"right"==n&&this.instance.prevSlide(),void("left"==n&&this.instance.nextSlide())}if(this.toleranceY>0&&i>0&&i>=t&&(!this.lastDirection||"y"==this.lastDirection)){this.xOffset=0,this.lastDirection="y",this.setTranslate(this.dragContainer,0,this.currentY);var s=this.shouldClose();return!this.instance.settings.dragAutoSnap&&s&&(this.doSlideClose=!0),void(this.instance.settings.dragAutoSnap&&s&&this.instance.close())}}}},{key:"shouldChange",value:function(){var e=!1;if(Math.abs(this.currentX)>=this.toleranceX){var t=this.currentX>0?"right":"left";("left"==t&&this.slide!==this.slide.parentNode.lastChild||"right"==t&&this.slide!==this.slide.parentNode.firstChild)&&(e=t)}return e}},{key:"shouldClose",value:function(){var e=!1;return Math.abs(this.currentY)>=this.toleranceY&&(e=!0),e}},{key:"setTranslate",value:function(e,t,i){var n=arguments.length>3&&void 0!==arguments[3]&&arguments[3];e.style.transition=n?"all .2s ease":"",e.style.transform="translate3d(".concat(t,"px, ").concat(i,"px, 0)")}}]),e}();function j(e,t,i,n){var s=e.querySelector(".gslide-media"),l=new Image,o="gSlideTitle_"+i,r="gSlideDesc_"+i;l.addEventListener("load",(function(){T(n)&&n()}),!1),l.src=t.href,""!=t.sizes&&""!=t.srcset&&(l.sizes=t.sizes,l.srcset=t.srcset),l.alt="",I(t.alt)||""===t.alt||(l.alt=t.alt),""!==t.title&&l.setAttribute("aria-labelledby",o),""!==t.description&&l.setAttribute("aria-describedby",r),t.hasOwnProperty("_hasCustomWidth")&&t._hasCustomWidth&&(l.style.width=t.width),t.hasOwnProperty("_hasCustomHeight")&&t._hasCustomHeight&&(l.style.height=t.height),s.insertBefore(l,s.firstChild)}function F(e,t,i,n){var s=this,l=e.querySelector(".ginner-container"),o="gvideo"+i,r=e.querySelector(".gslide-media"),a=this.getAllPlayers();h(l,"gvideo-container"),r.insertBefore(m('
'),r.firstChild);var d=e.querySelector(".gvideo-wrapper");S(this.settings.plyr.css,"Plyr");var c=t.href,u=null==t?void 0:t.videoProvider,g=!1;r.style.maxWidth=t.width,S(this.settings.plyr.js,"Plyr",(function(){if(!u&&c.match(/vimeo\.com\/([0-9]*)/)&&(u="vimeo"),!u&&(c.match(/(youtube\.com|youtube-nocookie\.com)\/watch\?v=([a-zA-Z0-9\-_]+)/)||c.match(/youtu\.be\/([a-zA-Z0-9\-_]+)/)||c.match(/(youtube\.com|youtube-nocookie\.com)\/embed\/([a-zA-Z0-9\-_]+)/))&&(u="youtube"),"local"===u||!u){u="local";var l='")}var r=g||m('
'));h(d,"".concat(u,"-video gvideo")),d.appendChild(r),d.setAttribute("data-id",o),d.setAttribute("data-index",i);var v=O(s.settings.plyr,"config")?s.settings.plyr.config:{},f=new Plyr("#"+o,v);f.on("ready",(function(e){a[o]=e.detail.plyr,T(n)&&n()})),b((function(){return e.querySelector("iframe")&&"true"==e.querySelector("iframe").dataset.ready}),(function(){s.resize(e)})),f.on("enterfullscreen",R),f.on("exitfullscreen",R)}))}function R(e){var t=u(e.target,".gslide-media");"enterfullscreen"===e.type&&h(t,"fullscreen"),"exitfullscreen"===e.type&&d(t,"fullscreen")}function G(e,t,i,n){var s,l=this,o=e.querySelector(".gslide-media"),r=!(!O(t,"href")||!t.href)&&t.href.split("#").pop().trim(),d=!(!O(t,"content")||!t.content)&&t.content;if(d&&(C(d)&&(s=m('
'.concat(d,"
"))),k(d))){"none"==d.style.display&&(d.style.display="block");var c=document.createElement("div");c.className="ginlined-content",c.appendChild(d),s=c}if(r){var u=document.getElementById(r);if(!u)return!1;var g=u.cloneNode(!0);g.style.height=t.height,g.style.maxWidth=t.width,h(g,"ginlined-content"),s=g}if(!s)return console.error("Unable to append inline slide content",t),!1;o.style.height=t.height,o.style.width=t.width,o.appendChild(s),this.events["inlineclose"+r]=a("click",{onElement:o.querySelectorAll(".gtrigger-close"),withCallback:function(e){e.preventDefault(),l.close()}}),T(n)&&n()}function Z(e,t,i,n){var s=e.querySelector(".gslide-media"),l=function(e){var t=e.url,i=e.allow,n=e.callback,s=e.appendTo,l=document.createElement("iframe");return l.className="vimeo-video gvideo",l.src=t,l.style.width="100%",l.style.height="100%",i&&l.setAttribute("allow",i),l.onload=function(){l.onload=null,h(l,"node-ready"),T(n)&&n()},s&&s.appendChild(l),l}({url:t.href,callback:n});s.parentNode.style.maxWidth=t.width,s.parentNode.style.height=t.height,s.appendChild(l)}var U=function(){function e(){var i=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};t(this,e),this.defaults={href:"",sizes:"",srcset:"",title:"",type:"",videoProvider:"",description:"",alt:"",descPosition:"bottom",effect:"",width:"",height:"",content:!1,zoomable:!0,draggable:!0},L(i)&&(this.defaults=l(this.defaults,i))}return n(e,[{key:"sourceType",value:function(e){var t=e;if(null!==(e=e.toLowerCase()).match(/\.(jpeg|jpg|jpe|gif|png|apn|webp|avif|svg)/))return"image";if(e.match(/(youtube\.com|youtube-nocookie\.com)\/watch\?v=([a-zA-Z0-9\-_]+)/)||e.match(/youtu\.be\/([a-zA-Z0-9\-_]+)/)||e.match(/(youtube\.com|youtube-nocookie\.com)\/embed\/([a-zA-Z0-9\-_]+)/))return"video";if(e.match(/vimeo\.com\/([0-9]*)/))return"video";if(null!==e.match(/\.(mp4|ogg|webm|mov)/))return"video";if(null!==e.match(/\.(mp3|wav|wma|aac|ogg)/))return"audio";if(e.indexOf("#")>-1&&""!==t.split("#").pop().trim())return"inline";return e.indexOf("goajax=true")>-1?"ajax":"external"}},{key:"parseConfig",value:function(e,t){var i=this,n=l({descPosition:t.descPosition},this.defaults);if(L(e)&&!k(e)){O(e,"type")||(O(e,"content")&&e.content?e.type="inline":O(e,"href")&&(e.type=this.sourceType(e.href)));var s=l(n,e);return this.setSize(s,t),s}var r="",a=e.getAttribute("data-glightbox"),h=e.nodeName.toLowerCase();if("a"===h&&(r=e.href),"img"===h&&(r=e.src,n.alt=e.alt),n.href=r,o(n,(function(s,l){O(t,l)&&"width"!==l&&(n[l]=t[l]);var o=e.dataset[l];I(o)||(n[l]=i.sanitizeValue(o))})),n.content&&(n.type="inline"),!n.type&&r&&(n.type=this.sourceType(r)),I(a)){if(!n.title&&"a"==h){var d=e.title;I(d)||""===d||(n.title=d)}if(!n.title&&"img"==h){var c=e.alt;I(c)||""===c||(n.title=c)}}else{var u=[];o(n,(function(e,t){u.push(";\\s?"+t)})),u=u.join("\\s?:|"),""!==a.trim()&&o(n,(function(e,t){var s=a,l=new RegExp("s?"+t+"s?:s?(.*?)("+u+"s?:|$)"),o=s.match(l);if(o&&o.length&&o[1]){var r=o[1].trim().replace(/;\s*$/,"");n[t]=i.sanitizeValue(r)}}))}if(n.description&&"."===n.description.substring(0,1)){var g;try{g=document.querySelector(n.description).innerHTML}catch(e){if(!(e instanceof DOMException))throw e}g&&(n.description=g)}if(!n.description){var v=e.querySelector(".glightbox-desc");v&&(n.description=v.innerHTML)}return this.setSize(n,t,e),this.slideConfig=n,n}},{key:"setSize",value:function(e,t){var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null,n="video"==e.type?this.checkSize(t.videosWidth):this.checkSize(t.width),s=this.checkSize(t.height);return e.width=O(e,"width")&&""!==e.width?this.checkSize(e.width):n,e.height=O(e,"height")&&""!==e.height?this.checkSize(e.height):s,i&&"image"==e.type&&(e._hasCustomWidth=!!i.dataset.width,e._hasCustomHeight=!!i.dataset.height),e}},{key:"checkSize",value:function(e){return M(e)?"".concat(e,"px"):e}},{key:"sanitizeValue",value:function(e){return"true"!==e&&"false"!==e?e:"true"===e}}]),e}(),$=function(){function e(i,n,s){t(this,e),this.element=i,this.instance=n,this.index=s}return n(e,[{key:"setContent",value:function(){var e=this,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:null,i=arguments.length>1&&void 0!==arguments[1]&&arguments[1];if(c(t,"loaded"))return!1;var n=this.instance.settings,s=this.slideConfig,l=w();T(n.beforeSlideLoad)&&n.beforeSlideLoad({index:this.index,slide:t,player:!1});var o=s.type,r=s.descPosition,a=t.querySelector(".gslide-media"),d=t.querySelector(".gslide-title"),u=t.querySelector(".gslide-desc"),g=t.querySelector(".gdesc-inner"),v=i,f="gSlideTitle_"+this.index,p="gSlideDesc_"+this.index;if(T(n.afterSlideLoad)&&(v=function(){T(i)&&i(),n.afterSlideLoad({index:e.index,slide:t,player:e.instance.getSlidePlayerInstance(e.index)})}),""==s.title&&""==s.description?g&&g.parentNode.parentNode.removeChild(g.parentNode):(d&&""!==s.title?(d.id=f,d.innerHTML=s.title):d.parentNode.removeChild(d),u&&""!==s.description?(u.id=p,l&&n.moreLength>0?(s.smallDescription=this.slideShortDesc(s.description,n.moreLength,n.moreText),u.innerHTML=s.smallDescription,this.descriptionEvents(u,s)):u.innerHTML=s.description):u.parentNode.removeChild(u),h(a.parentNode,"desc-".concat(r)),h(g.parentNode,"description-".concat(r))),h(a,"gslide-".concat(o)),h(t,"loaded"),"video"!==o){if("external"!==o)return"inline"===o?(G.apply(this.instance,[t,s,this.index,v]),void(s.draggable&&new V({dragEl:t.querySelector(".gslide-inline"),toleranceX:n.dragToleranceX,toleranceY:n.dragToleranceY,slide:t,instance:this.instance}))):void("image"!==o?T(v)&&v():j(t,s,this.index,(function(){var i=t.querySelector("img");s.draggable&&new V({dragEl:i,toleranceX:n.dragToleranceX,toleranceY:n.dragToleranceY,slide:t,instance:e.instance}),s.zoomable&&i.naturalWidth>i.offsetWidth&&(h(i,"zoomable"),new H(i,t,(function(){e.instance.resize()}))),T(v)&&v()})));Z.apply(this,[t,s,this.index,v])}else F.apply(this.instance,[t,s,this.index,v])}},{key:"slideShortDesc",value:function(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:50,i=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=document.createElement("div");n.innerHTML=e;var s=n.innerText,l=i;if((e=s.trim()).length<=t)return e;var o=e.substr(0,t-1);return l?(n=null,o+'... '+i+""):o}},{key:"descriptionEvents",value:function(e,t){var i=this,n=e.querySelector(".desc-more");if(!n)return!1;a("click",{onElement:n,withCallback:function(e,n){e.preventDefault();var s=document.body,l=u(n,".gslide-desc");if(!l)return!1;l.innerHTML=t.description,h(s,"gdesc-open");var o=a("click",{onElement:[s,u(l,".gslide-description")],withCallback:function(e,n){"a"!==e.target.nodeName.toLowerCase()&&(d(s,"gdesc-open"),h(s,"gdesc-closed"),l.innerHTML=t.smallDescription,i.descriptionEvents(l,t),setTimeout((function(){d(s,"gdesc-closed")}),400),o.destroy())}})}})}},{key:"create",value:function(){return m(this.instance.settings.slideHTML)}},{key:"getConfig",value:function(){k(this.element)||this.element.hasOwnProperty("draggable")||(this.element.draggable=this.instance.settings.draggable);var e=new U(this.instance.settings.slideExtraAttributes);return this.slideConfig=e.parseConfig(this.element,this.instance.settings),this.slideConfig}}]),e}(),J=w(),K=null!==w()||void 0!==document.createTouch||"ontouchstart"in window||"onmsgesturechange"in window||navigator.msMaxTouchPoints,Q=document.getElementsByTagName("html")[0],ee={selector:".glightbox",elements:null,skin:"clean",theme:"clean",closeButton:!0,startAt:null,autoplayVideos:!0,autofocusVideos:!0,descPosition:"bottom",width:"900px",height:"506px",videosWidth:"960px",beforeSlideChange:null,afterSlideChange:null,beforeSlideLoad:null,afterSlideLoad:null,slideInserted:null,slideRemoved:null,slideExtraAttributes:null,onOpen:null,onClose:null,loop:!1,zoomable:!0,draggable:!0,dragAutoSnap:!1,dragToleranceX:40,dragToleranceY:65,preload:!0,oneSlidePerOpen:!1,touchNavigation:!0,touchFollowAxis:!0,keyboardNavigation:!0,closeOnOutsideClick:!0,plugins:!1,plyr:{css:"https://cdn.plyr.io/3.6.12/plyr.css",js:"https://cdn.plyr.io/3.6.12/plyr.js",config:{ratio:"16:9",fullscreen:{enabled:!0,iosNative:!0},youtube:{noCookie:!0,rel:0,showinfo:0,iv_load_policy:3},vimeo:{byline:!1,portrait:!1,title:!1,transparent:!1}}},openEffect:"zoom",closeEffect:"zoom",slideEffect:"slide",moreText:"See more",moreLength:60,cssEfects:{fade:{in:"fadeIn",out:"fadeOut"},zoom:{in:"zoomIn",out:"zoomOut"},slide:{in:"slideInRight",out:"slideOutLeft"},slideBack:{in:"slideInLeft",out:"slideOutRight"},none:{in:"none",out:"none"}},svg:{close:'',next:' ',prev:''},slideHTML:'
\n
\n
\n
\n
\n
\n
\n

\n
\n
\n
\n
\n
\n
',lightboxHTML:''},te=function(){function e(){var i=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};t(this,e),this.customOptions=i,this.settings=l(ee,i),this.effectsClasses=this.getAnimationClasses(),this.videoPlayers={},this.apiEvents=[],this.fullElementsList=!1}return n(e,[{key:"init",value:function(){var e=this,t=this.getSelector();t&&(this.baseEvents=a("click",{onElement:t,withCallback:function(t,i){t.preventDefault(),e.open(i)}})),this.elements=this.getElements()}},{key:"open",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:null,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;if(0===this.elements.length)return!1;this.activeSlide=null,this.prevActiveSlideIndex=null,this.prevActiveSlide=null;var i=M(t)?t:this.settings.startAt;if(k(e)){var n=e.getAttribute("data-gallery");n&&(this.fullElementsList=this.elements,this.elements=this.getGalleryElements(this.elements,n)),I(i)&&(i=this.getElementIndex(e))<0&&(i=0)}M(i)||(i=0),this.build(),g(this.overlay,"none"===this.settings.openEffect?"none":this.settings.cssEfects.fade.in);var s=document.body,l=window.innerWidth-document.documentElement.clientWidth;if(l>0){var o=document.createElement("style");o.type="text/css",o.className="gcss-styles",o.innerText=".gscrollbar-fixer {margin-right: ".concat(l,"px}"),document.head.appendChild(o),h(s,"gscrollbar-fixer")}h(s,"glightbox-open"),h(Q,"glightbox-open"),J&&(h(document.body,"glightbox-mobile"),this.settings.slideEffect="slide"),this.showSlide(i,!0),1===this.elements.length?(h(this.prevButton,"glightbox-button-hidden"),h(this.nextButton,"glightbox-button-hidden")):(d(this.prevButton,"glightbox-button-hidden"),d(this.nextButton,"glightbox-button-hidden")),this.lightboxOpen=!0,this.trigger("open"),T(this.settings.onOpen)&&this.settings.onOpen(),K&&this.settings.touchNavigation&&B(this),this.settings.keyboardNavigation&&X(this)}},{key:"openAt",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:0;this.open(null,e)}},{key:"showSlide",value:function(){var e=this,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:0,i=arguments.length>1&&void 0!==arguments[1]&&arguments[1];f(this.loader),this.index=parseInt(t);var n=this.slidesContainer.querySelector(".current");n&&d(n,"current"),this.slideAnimateOut();var s=this.slidesContainer.querySelectorAll(".gslide")[t];if(c(s,"loaded"))this.slideAnimateIn(s,i),p(this.loader);else{f(this.loader);var l=this.elements[t],o={index:this.index,slide:s,slideNode:s,slideConfig:l.slideConfig,slideIndex:this.index,trigger:l.node,player:null};this.trigger("slide_before_load",o),l.instance.setContent(s,(function(){p(e.loader),e.resize(),e.slideAnimateIn(s,i),e.trigger("slide_after_load",o)}))}this.slideDescription=s.querySelector(".gslide-description"),this.slideDescriptionContained=this.slideDescription&&c(this.slideDescription.parentNode,"gslide-media"),this.settings.preload&&(this.preloadSlide(t+1),this.preloadSlide(t-1)),this.updateNavigationClasses(),this.activeSlide=s}},{key:"preloadSlide",value:function(e){var t=this;if(e<0||e>this.elements.length-1)return!1;if(I(this.elements[e]))return!1;var i=this.slidesContainer.querySelectorAll(".gslide")[e];if(c(i,"loaded"))return!1;var n=this.elements[e],s=n.type,l={index:e,slide:i,slideNode:i,slideConfig:n.slideConfig,slideIndex:e,trigger:n.node,player:null};this.trigger("slide_before_load",l),"video"===s||"external"===s?setTimeout((function(){n.instance.setContent(i,(function(){t.trigger("slide_after_load",l)}))}),200):n.instance.setContent(i,(function(){t.trigger("slide_after_load",l)}))}},{key:"prevSlide",value:function(){this.goToSlide(this.index-1)}},{key:"nextSlide",value:function(){this.goToSlide(this.index+1)}},{key:"goToSlide",value:function(){var e=arguments.length>0&&void 0!==arguments[0]&&arguments[0];if(this.prevActiveSlide=this.activeSlide,this.prevActiveSlideIndex=this.index,!this.loop()&&(e<0||e>this.elements.length-1))return!1;e<0?e=this.elements.length-1:e>=this.elements.length&&(e=0),this.showSlide(e)}},{key:"insertSlide",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:-1;t<0&&(t=this.elements.length);var i=new $(e,this,t),n=i.getConfig(),s=l({},n),o=i.create(),r=this.elements.length-1;s.index=t,s.node=!1,s.instance=i,s.slideConfig=n,this.elements.splice(t,0,s);var a=null,h=null;if(this.slidesContainer){if(t>r)this.slidesContainer.appendChild(o);else{var d=this.slidesContainer.querySelectorAll(".gslide")[t];this.slidesContainer.insertBefore(o,d)}(this.settings.preload&&0==this.index&&0==t||this.index-1==t||this.index+1==t)&&this.preloadSlide(t),0===this.index&&0===t&&(this.index=1),this.updateNavigationClasses(),a=this.slidesContainer.querySelectorAll(".gslide")[t],h=this.getSlidePlayerInstance(t),s.slideNode=a}this.trigger("slide_inserted",{index:t,slide:a,slideNode:a,slideConfig:n,slideIndex:t,trigger:null,player:h}),T(this.settings.slideInserted)&&this.settings.slideInserted({index:t,slide:a,player:h})}},{key:"removeSlide",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:-1;if(e<0||e>this.elements.length-1)return!1;var t=this.slidesContainer&&this.slidesContainer.querySelectorAll(".gslide")[e];t&&(this.getActiveSlideIndex()==e&&(e==this.elements.length-1?this.prevSlide():this.nextSlide()),t.parentNode.removeChild(t)),this.elements.splice(e,1),this.trigger("slide_removed",e),T(this.settings.slideRemoved)&&this.settings.slideRemoved(e)}},{key:"slideAnimateIn",value:function(e,t){var i=this,n=e.querySelector(".gslide-media"),s=e.querySelector(".gslide-description"),l={index:this.prevActiveSlideIndex,slide:this.prevActiveSlide,slideNode:this.prevActiveSlide,slideIndex:this.prevActiveSlide,slideConfig:I(this.prevActiveSlideIndex)?null:this.elements[this.prevActiveSlideIndex].slideConfig,trigger:I(this.prevActiveSlideIndex)?null:this.elements[this.prevActiveSlideIndex].node,player:this.getSlidePlayerInstance(this.prevActiveSlideIndex)},o={index:this.index,slide:this.activeSlide,slideNode:this.activeSlide,slideConfig:this.elements[this.index].slideConfig,slideIndex:this.index,trigger:this.elements[this.index].node,player:this.getSlidePlayerInstance(this.index)};if(n.offsetWidth>0&&s&&(p(s),s.style.display=""),d(e,this.effectsClasses),t)g(e,this.settings.cssEfects[this.settings.openEffect].in,(function(){i.settings.autoplayVideos&&i.slidePlayerPlay(e),i.trigger("slide_changed",{prev:l,current:o}),T(i.settings.afterSlideChange)&&i.settings.afterSlideChange.apply(i,[l,o])}));else{var r=this.settings.slideEffect,a="none"!==r?this.settings.cssEfects[r].in:r;this.prevActiveSlideIndex>this.index&&"slide"==this.settings.slideEffect&&(a=this.settings.cssEfects.slideBack.in),g(e,a,(function(){i.settings.autoplayVideos&&i.slidePlayerPlay(e),i.trigger("slide_changed",{prev:l,current:o}),T(i.settings.afterSlideChange)&&i.settings.afterSlideChange.apply(i,[l,o])}))}setTimeout((function(){i.resize(e)}),100),h(e,"current")}},{key:"slideAnimateOut",value:function(){if(!this.prevActiveSlide)return!1;var e=this.prevActiveSlide;d(e,this.effectsClasses),h(e,"prev");var t=this.settings.slideEffect,i="none"!==t?this.settings.cssEfects[t].out:t;this.slidePlayerPause(e),this.trigger("slide_before_change",{prev:{index:this.prevActiveSlideIndex,slide:this.prevActiveSlide,slideNode:this.prevActiveSlide,slideIndex:this.prevActiveSlideIndex,slideConfig:I(this.prevActiveSlideIndex)?null:this.elements[this.prevActiveSlideIndex].slideConfig,trigger:I(this.prevActiveSlideIndex)?null:this.elements[this.prevActiveSlideIndex].node,player:this.getSlidePlayerInstance(this.prevActiveSlideIndex)},current:{index:this.index,slide:this.activeSlide,slideNode:this.activeSlide,slideIndex:this.index,slideConfig:this.elements[this.index].slideConfig,trigger:this.elements[this.index].node,player:this.getSlidePlayerInstance(this.index)}}),T(this.settings.beforeSlideChange)&&this.settings.beforeSlideChange.apply(this,[{index:this.prevActiveSlideIndex,slide:this.prevActiveSlide,player:this.getSlidePlayerInstance(this.prevActiveSlideIndex)},{index:this.index,slide:this.activeSlide,player:this.getSlidePlayerInstance(this.index)}]),this.prevActiveSlideIndex>this.index&&"slide"==this.settings.slideEffect&&(i=this.settings.cssEfects.slideBack.out),g(e,i,(function(){var t=e.querySelector(".ginner-container"),i=e.querySelector(".gslide-media"),n=e.querySelector(".gslide-description");t.style.transform="",i.style.transform="",d(i,"greset"),i.style.opacity="",n&&(n.style.opacity=""),d(e,"prev")}))}},{key:"getAllPlayers",value:function(){return this.videoPlayers}},{key:"getSlidePlayerInstance",value:function(e){var t="gvideo"+e,i=this.getAllPlayers();return!(!O(i,t)||!i[t])&&i[t]}},{key:"stopSlideVideo",value:function(e){if(k(e)){var t=e.querySelector(".gvideo-wrapper");t&&(e=t.getAttribute("data-index"))}console.log("stopSlideVideo is deprecated, use slidePlayerPause");var i=this.getSlidePlayerInstance(e);i&&i.playing&&i.pause()}},{key:"slidePlayerPause",value:function(e){if(k(e)){var t=e.querySelector(".gvideo-wrapper");t&&(e=t.getAttribute("data-index"))}var i=this.getSlidePlayerInstance(e);i&&i.playing&&i.pause()}},{key:"playSlideVideo",value:function(e){if(k(e)){var t=e.querySelector(".gvideo-wrapper");t&&(e=t.getAttribute("data-index"))}console.log("playSlideVideo is deprecated, use slidePlayerPlay");var i=this.getSlidePlayerInstance(e);i&&!i.playing&&i.play()}},{key:"slidePlayerPlay",value:function(e){var t;if(!J||null!==(t=this.settings.plyr.config)&&void 0!==t&&t.muted){if(k(e)){var i=e.querySelector(".gvideo-wrapper");i&&(e=i.getAttribute("data-index"))}var n=this.getSlidePlayerInstance(e);n&&!n.playing&&(n.play(),this.settings.autofocusVideos&&n.elements.container.focus())}}},{key:"setElements",value:function(e){var t=this;this.settings.elements=!1;var i=[];e&&e.length&&o(e,(function(e,n){var s=new $(e,t,n),o=s.getConfig(),r=l({},o);r.slideConfig=o,r.instance=s,r.index=n,i.push(r)})),this.elements=i,this.lightboxOpen&&(this.slidesContainer.innerHTML="",this.elements.length&&(o(this.elements,(function(){var e=m(t.settings.slideHTML);t.slidesContainer.appendChild(e)})),this.showSlide(0,!0)))}},{key:"getElementIndex",value:function(e){var t=!1;return o(this.elements,(function(i,n){if(O(i,"node")&&i.node==e)return t=n,!0})),t}},{key:"getElements",value:function(){var e=this,t=[];this.elements=this.elements?this.elements:[],!I(this.settings.elements)&&E(this.settings.elements)&&this.settings.elements.length&&o(this.settings.elements,(function(i,n){var s=new $(i,e,n),o=s.getConfig(),r=l({},o);r.node=!1,r.index=n,r.instance=s,r.slideConfig=o,t.push(r)}));var i=!1;return this.getSelector()&&(i=document.querySelectorAll(this.getSelector())),i?(o(i,(function(i,n){var s=new $(i,e,n),o=s.getConfig(),r=l({},o);r.node=i,r.index=n,r.instance=s,r.slideConfig=o,r.gallery=i.getAttribute("data-gallery"),t.push(r)})),t):t}},{key:"getGalleryElements",value:function(e,t){return e.filter((function(e){return e.gallery==t}))}},{key:"getSelector",value:function(){return!this.settings.elements&&(this.settings.selector&&"data-"==this.settings.selector.substring(0,5)?"*[".concat(this.settings.selector,"]"):this.settings.selector)}},{key:"getActiveSlide",value:function(){return this.slidesContainer.querySelectorAll(".gslide")[this.index]}},{key:"getActiveSlideIndex",value:function(){return this.index}},{key:"getAnimationClasses",value:function(){var e=[];for(var t in this.settings.cssEfects)if(this.settings.cssEfects.hasOwnProperty(t)){var i=this.settings.cssEfects[t];e.push("g".concat(i.in)),e.push("g".concat(i.out))}return e.join(" ")}},{key:"build",value:function(){var e=this;if(this.built)return!1;var t=document.body.childNodes,i=[];o(t,(function(e){e.parentNode==document.body&&"#"!==e.nodeName.charAt(0)&&e.hasAttribute&&!e.hasAttribute("aria-hidden")&&(i.push(e),e.setAttribute("aria-hidden","true"))}));var n=O(this.settings.svg,"next")?this.settings.svg.next:"",s=O(this.settings.svg,"prev")?this.settings.svg.prev:"",l=O(this.settings.svg,"close")?this.settings.svg.close:"",r=this.settings.lightboxHTML;r=m(r=(r=(r=r.replace(/{nextSVG}/g,n)).replace(/{prevSVG}/g,s)).replace(/{closeSVG}/g,l)),document.body.appendChild(r);var d=document.getElementById("glightbox-body");this.modal=d;var g=d.querySelector(".gclose");this.prevButton=d.querySelector(".gprev"),this.nextButton=d.querySelector(".gnext"),this.overlay=d.querySelector(".goverlay"),this.loader=d.querySelector(".gloader"),this.slidesContainer=document.getElementById("glightbox-slider"),this.bodyHiddenChildElms=i,this.events={},h(this.modal,"glightbox-"+this.settings.skin),this.settings.closeButton&&g&&(this.events.close=a("click",{onElement:g,withCallback:function(t,i){t.preventDefault(),e.close()}})),g&&!this.settings.closeButton&&g.parentNode.removeChild(g),this.nextButton&&(this.events.next=a("click",{onElement:this.nextButton,withCallback:function(t,i){t.preventDefault(),e.nextSlide()}})),this.prevButton&&(this.events.prev=a("click",{onElement:this.prevButton,withCallback:function(t,i){t.preventDefault(),e.prevSlide()}})),this.settings.closeOnOutsideClick&&(this.events.outClose=a("click",{onElement:d,withCallback:function(t,i){e.preventOutsideClick||c(document.body,"glightbox-mobile")||u(t.target,".ginner-container")||u(t.target,".gbtn")||c(t.target,"gnext")||c(t.target,"gprev")||e.close()}})),o(this.elements,(function(t,i){e.slidesContainer.appendChild(t.instance.create()),t.slideNode=e.slidesContainer.querySelectorAll(".gslide")[i]})),K&&h(document.body,"glightbox-touch"),this.events.resize=a("resize",{onElement:window,withCallback:function(){e.resize()}}),this.built=!0}},{key:"resize",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:null;if((e=e||this.activeSlide)&&!c(e,"zoomed")){var t=y(),i=e.querySelector(".gvideo-wrapper"),n=e.querySelector(".gslide-image"),s=this.slideDescription,l=t.width,o=t.height;if(l<=768?h(document.body,"glightbox-mobile"):d(document.body,"glightbox-mobile"),i||n){var r=!1;if(s&&(c(s,"description-bottom")||c(s,"description-top"))&&!c(s,"gabsolute")&&(r=!0),n)if(l<=768)n.querySelector("img");else if(r){var a=s.offsetHeight,u=n.querySelector("img");u.setAttribute("style","max-height: calc(100vh - ".concat(a,"px)")),s.setAttribute("style","max-width: ".concat(u.offsetWidth,"px;"))}if(i){var g=O(this.settings.plyr.config,"ratio")?this.settings.plyr.config.ratio:"";if(!g){var v=i.clientWidth,f=i.clientHeight,p=v/f;g="".concat(v/p,":").concat(f/p)}var m=g.split(":"),x=this.settings.videosWidth,b=this.settings.videosWidth,S=(b=M(x)||-1!==x.indexOf("px")?parseInt(x):-1!==x.indexOf("vw")?l*parseInt(x)/100:-1!==x.indexOf("vh")?o*parseInt(x)/100:-1!==x.indexOf("%")?l*parseInt(x)/100:parseInt(i.clientWidth))/(parseInt(m[0])/parseInt(m[1]));if(S=Math.floor(S),r&&(o-=s.offsetHeight),b>l||S>o||ob){var w=i.offsetWidth,T=i.offsetHeight,C=o/T,k={width:w*C,height:T*C};i.parentNode.setAttribute("style","max-width: ".concat(k.width,"px")),r&&s.setAttribute("style","max-width: ".concat(k.width,"px;"))}else i.parentNode.style.maxWidth="".concat(x),r&&s.setAttribute("style","max-width: ".concat(x,";"))}}}}},{key:"reload",value:function(){this.init()}},{key:"updateNavigationClasses",value:function(){var e=this.loop();d(this.nextButton,"disabled"),d(this.prevButton,"disabled"),0==this.index&&this.elements.length-1==0?(h(this.prevButton,"disabled"),h(this.nextButton,"disabled")):0!==this.index||e?this.index!==this.elements.length-1||e||h(this.nextButton,"disabled"):h(this.prevButton,"disabled")}},{key:"loop",value:function(){var e=O(this.settings,"loopAtEnd")?this.settings.loopAtEnd:null;return e=O(this.settings,"loop")?this.settings.loop:e,e}},{key:"close",value:function(){var e=this;if(!this.lightboxOpen){if(this.events){for(var t in this.events)this.events.hasOwnProperty(t)&&this.events[t].destroy();this.events=null}return!1}if(this.closing)return!1;this.closing=!0,this.slidePlayerPause(this.activeSlide),this.fullElementsList&&(this.elements=this.fullElementsList),this.bodyHiddenChildElms.length&&o(this.bodyHiddenChildElms,(function(e){e.removeAttribute("aria-hidden")})),h(this.modal,"glightbox-closing"),g(this.overlay,"none"==this.settings.openEffect?"none":this.settings.cssEfects.fade.out),g(this.activeSlide,this.settings.cssEfects[this.settings.closeEffect].out,(function(){if(e.activeSlide=null,e.prevActiveSlideIndex=null,e.prevActiveSlide=null,e.built=!1,e.events){for(var t in e.events)e.events.hasOwnProperty(t)&&e.events[t].destroy();e.events=null}var i=document.body;d(Q,"glightbox-open"),d(i,"glightbox-open touching gdesc-open glightbox-touch glightbox-mobile gscrollbar-fixer"),e.modal.parentNode.removeChild(e.modal),e.trigger("close"),T(e.settings.onClose)&&e.settings.onClose();var n=document.querySelector(".gcss-styles");n&&n.parentNode.removeChild(n),e.lightboxOpen=!1,e.closing=null}))}},{key:"destroy",value:function(){this.close(),this.clearAllEvents(),this.baseEvents&&this.baseEvents.destroy()}},{key:"on",value:function(e,t){var i=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if(!e||!T(t))throw new TypeError("Event name and callback must be defined");this.apiEvents.push({evt:e,once:i,callback:t})}},{key:"once",value:function(e,t){this.on(e,t,!0)}},{key:"trigger",value:function(e){var t=this,i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null,n=[];o(this.apiEvents,(function(t,s){var l=t.evt,o=t.once,r=t.callback;l==e&&(r(i),o&&n.push(s))})),n.length&&o(n,(function(e){return t.apiEvents.splice(e,1)}))}},{key:"clearAllEvents",value:function(){this.apiEvents.splice(0,this.apiEvents.length)}},{key:"version",value:function(){return"3.1.0"}}]),e}();return function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},t=new te(e);return t.init(),t}})); \ No newline at end of file diff --git a/docs/2_39/site_libs/quarto-contrib/glightbox/lightbox.css b/docs/2_39/site_libs/quarto-contrib/glightbox/lightbox.css new file mode 100644 index 000000000..46432d990 --- /dev/null +++ b/docs/2_39/site_libs/quarto-contrib/glightbox/lightbox.css @@ -0,0 +1,26 @@ +body:not(.glightbox-mobile) div.gslide div.gslide-description, +body:not(.glightbox-mobile) div.gslide-description .gslide-title, +body:not(.glightbox-mobile) div.gslide-description .gslide-desc { + color: var(--quarto-body-color); + background-color: var(--quarto-body-bg); +} + +body:not(.glightbox-mobile) div.gslide-media { + background-color: var(--quarto-body-bg); +} + +.goverlay { + background: rgba(0, 0, 0, 0.7); +} + +div.gslide-description .gslide-title { + margin-top: 0.25em; + margin-bottom: 0.25em; + font-weight: 500; + font-family: inherit; +} + +div.gslide-description .gslide-desc { + padding-bottom: 0.5em; + font-family: inherit; +} diff --git a/docs/2_39/site_libs/quarto-html/anchor.min.js b/docs/2_39/site_libs/quarto-html/anchor.min.js new file mode 100644 index 000000000..5ac814d1e --- /dev/null +++ b/docs/2_39/site_libs/quarto-html/anchor.min.js @@ -0,0 +1,9 @@ +// @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat +// +// AnchorJS - v5.0.0 - 2023-01-18 +// https://www.bryanbraun.com/anchorjs/ +// Copyright (c) 2023 Bryan Braun; Licensed MIT +// +// @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat +!function(A,e){"use strict";"function"==typeof define&&define.amd?define([],e):"object"==typeof module&&module.exports?module.exports=e():(A.AnchorJS=e(),A.anchors=new A.AnchorJS)}(globalThis,function(){"use strict";return function(A){function u(A){A.icon=Object.prototype.hasOwnProperty.call(A,"icon")?A.icon:"",A.visible=Object.prototype.hasOwnProperty.call(A,"visible")?A.visible:"hover",A.placement=Object.prototype.hasOwnProperty.call(A,"placement")?A.placement:"right",A.ariaLabel=Object.prototype.hasOwnProperty.call(A,"ariaLabel")?A.ariaLabel:"Anchor",A.class=Object.prototype.hasOwnProperty.call(A,"class")?A.class:"",A.base=Object.prototype.hasOwnProperty.call(A,"base")?A.base:"",A.truncate=Object.prototype.hasOwnProperty.call(A,"truncate")?Math.floor(A.truncate):64,A.titleText=Object.prototype.hasOwnProperty.call(A,"titleText")?A.titleText:""}function d(A){var e;if("string"==typeof A||A instanceof String)e=[].slice.call(document.querySelectorAll(A));else{if(!(Array.isArray(A)||A instanceof NodeList))throw new TypeError("The selector provided to AnchorJS was invalid.");e=[].slice.call(A)}return e}this.options=A||{},this.elements=[],u(this.options),this.add=function(A){var e,t,o,i,n,s,a,r,l,c,h,p=[];if(u(this.options),0!==(e=d(A=A||"h2, h3, h4, h5, h6")).length){for(null===document.head.querySelector("style.anchorjs")&&((A=document.createElement("style")).className="anchorjs",A.appendChild(document.createTextNode("")),void 0===(h=document.head.querySelector('[rel="stylesheet"],style'))?document.head.appendChild(A):document.head.insertBefore(A,h),A.sheet.insertRule(".anchorjs-link{opacity:0;text-decoration:none;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}",A.sheet.cssRules.length),A.sheet.insertRule(":hover>.anchorjs-link,.anchorjs-link:focus{opacity:1}",A.sheet.cssRules.length),A.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}",A.sheet.cssRules.length),A.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}',A.sheet.cssRules.length)),h=document.querySelectorAll("[id]"),t=[].map.call(h,function(A){return A.id}),i=0;i\]./()*\\\n\t\b\v\u00A0]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),A=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||A||!1}}}); +// @license-end \ No newline at end of file diff --git a/docs/2_39/site_libs/quarto-html/popper.min.js b/docs/2_39/site_libs/quarto-html/popper.min.js new file mode 100644 index 000000000..e3726d728 --- /dev/null +++ b/docs/2_39/site_libs/quarto-html/popper.min.js @@ -0,0 +1,6 @@ +/** + * @popperjs/core v2.11.7 - MIT License + */ + +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).Popper={})}(this,(function(e){"use strict";function t(e){if(null==e)return window;if("[object Window]"!==e.toString()){var t=e.ownerDocument;return t&&t.defaultView||window}return e}function n(e){return e instanceof t(e).Element||e instanceof Element}function r(e){return e instanceof t(e).HTMLElement||e instanceof HTMLElement}function o(e){return"undefined"!=typeof ShadowRoot&&(e instanceof t(e).ShadowRoot||e instanceof ShadowRoot)}var i=Math.max,a=Math.min,s=Math.round;function f(){var e=navigator.userAgentData;return null!=e&&e.brands&&Array.isArray(e.brands)?e.brands.map((function(e){return e.brand+"/"+e.version})).join(" "):navigator.userAgent}function c(){return!/^((?!chrome|android).)*safari/i.test(f())}function p(e,o,i){void 0===o&&(o=!1),void 0===i&&(i=!1);var a=e.getBoundingClientRect(),f=1,p=1;o&&r(e)&&(f=e.offsetWidth>0&&s(a.width)/e.offsetWidth||1,p=e.offsetHeight>0&&s(a.height)/e.offsetHeight||1);var u=(n(e)?t(e):window).visualViewport,l=!c()&&i,d=(a.left+(l&&u?u.offsetLeft:0))/f,h=(a.top+(l&&u?u.offsetTop:0))/p,m=a.width/f,v=a.height/p;return{width:m,height:v,top:h,right:d+m,bottom:h+v,left:d,x:d,y:h}}function u(e){var n=t(e);return{scrollLeft:n.pageXOffset,scrollTop:n.pageYOffset}}function l(e){return e?(e.nodeName||"").toLowerCase():null}function d(e){return((n(e)?e.ownerDocument:e.document)||window.document).documentElement}function h(e){return p(d(e)).left+u(e).scrollLeft}function m(e){return t(e).getComputedStyle(e)}function v(e){var t=m(e),n=t.overflow,r=t.overflowX,o=t.overflowY;return/auto|scroll|overlay|hidden/.test(n+o+r)}function y(e,n,o){void 0===o&&(o=!1);var i,a,f=r(n),c=r(n)&&function(e){var t=e.getBoundingClientRect(),n=s(t.width)/e.offsetWidth||1,r=s(t.height)/e.offsetHeight||1;return 1!==n||1!==r}(n),m=d(n),y=p(e,c,o),g={scrollLeft:0,scrollTop:0},b={x:0,y:0};return(f||!f&&!o)&&(("body"!==l(n)||v(m))&&(g=(i=n)!==t(i)&&r(i)?{scrollLeft:(a=i).scrollLeft,scrollTop:a.scrollTop}:u(i)),r(n)?((b=p(n,!0)).x+=n.clientLeft,b.y+=n.clientTop):m&&(b.x=h(m))),{x:y.left+g.scrollLeft-b.x,y:y.top+g.scrollTop-b.y,width:y.width,height:y.height}}function g(e){var t=p(e),n=e.offsetWidth,r=e.offsetHeight;return Math.abs(t.width-n)<=1&&(n=t.width),Math.abs(t.height-r)<=1&&(r=t.height),{x:e.offsetLeft,y:e.offsetTop,width:n,height:r}}function b(e){return"html"===l(e)?e:e.assignedSlot||e.parentNode||(o(e)?e.host:null)||d(e)}function x(e){return["html","body","#document"].indexOf(l(e))>=0?e.ownerDocument.body:r(e)&&v(e)?e:x(b(e))}function w(e,n){var r;void 0===n&&(n=[]);var o=x(e),i=o===(null==(r=e.ownerDocument)?void 0:r.body),a=t(o),s=i?[a].concat(a.visualViewport||[],v(o)?o:[]):o,f=n.concat(s);return i?f:f.concat(w(b(s)))}function O(e){return["table","td","th"].indexOf(l(e))>=0}function j(e){return r(e)&&"fixed"!==m(e).position?e.offsetParent:null}function E(e){for(var n=t(e),i=j(e);i&&O(i)&&"static"===m(i).position;)i=j(i);return i&&("html"===l(i)||"body"===l(i)&&"static"===m(i).position)?n:i||function(e){var t=/firefox/i.test(f());if(/Trident/i.test(f())&&r(e)&&"fixed"===m(e).position)return null;var n=b(e);for(o(n)&&(n=n.host);r(n)&&["html","body"].indexOf(l(n))<0;){var i=m(n);if("none"!==i.transform||"none"!==i.perspective||"paint"===i.contain||-1!==["transform","perspective"].indexOf(i.willChange)||t&&"filter"===i.willChange||t&&i.filter&&"none"!==i.filter)return n;n=n.parentNode}return null}(e)||n}var D="top",A="bottom",L="right",P="left",M="auto",k=[D,A,L,P],W="start",B="end",H="viewport",T="popper",R=k.reduce((function(e,t){return e.concat([t+"-"+W,t+"-"+B])}),[]),S=[].concat(k,[M]).reduce((function(e,t){return e.concat([t,t+"-"+W,t+"-"+B])}),[]),V=["beforeRead","read","afterRead","beforeMain","main","afterMain","beforeWrite","write","afterWrite"];function q(e){var t=new Map,n=new Set,r=[];function o(e){n.add(e.name),[].concat(e.requires||[],e.requiresIfExists||[]).forEach((function(e){if(!n.has(e)){var r=t.get(e);r&&o(r)}})),r.push(e)}return e.forEach((function(e){t.set(e.name,e)})),e.forEach((function(e){n.has(e.name)||o(e)})),r}function C(e){return e.split("-")[0]}function N(e,t){var n=t.getRootNode&&t.getRootNode();if(e.contains(t))return!0;if(n&&o(n)){var r=t;do{if(r&&e.isSameNode(r))return!0;r=r.parentNode||r.host}while(r)}return!1}function I(e){return Object.assign({},e,{left:e.x,top:e.y,right:e.x+e.width,bottom:e.y+e.height})}function _(e,r,o){return r===H?I(function(e,n){var r=t(e),o=d(e),i=r.visualViewport,a=o.clientWidth,s=o.clientHeight,f=0,p=0;if(i){a=i.width,s=i.height;var u=c();(u||!u&&"fixed"===n)&&(f=i.offsetLeft,p=i.offsetTop)}return{width:a,height:s,x:f+h(e),y:p}}(e,o)):n(r)?function(e,t){var n=p(e,!1,"fixed"===t);return n.top=n.top+e.clientTop,n.left=n.left+e.clientLeft,n.bottom=n.top+e.clientHeight,n.right=n.left+e.clientWidth,n.width=e.clientWidth,n.height=e.clientHeight,n.x=n.left,n.y=n.top,n}(r,o):I(function(e){var t,n=d(e),r=u(e),o=null==(t=e.ownerDocument)?void 0:t.body,a=i(n.scrollWidth,n.clientWidth,o?o.scrollWidth:0,o?o.clientWidth:0),s=i(n.scrollHeight,n.clientHeight,o?o.scrollHeight:0,o?o.clientHeight:0),f=-r.scrollLeft+h(e),c=-r.scrollTop;return"rtl"===m(o||n).direction&&(f+=i(n.clientWidth,o?o.clientWidth:0)-a),{width:a,height:s,x:f,y:c}}(d(e)))}function F(e,t,o,s){var f="clippingParents"===t?function(e){var t=w(b(e)),o=["absolute","fixed"].indexOf(m(e).position)>=0&&r(e)?E(e):e;return n(o)?t.filter((function(e){return n(e)&&N(e,o)&&"body"!==l(e)})):[]}(e):[].concat(t),c=[].concat(f,[o]),p=c[0],u=c.reduce((function(t,n){var r=_(e,n,s);return t.top=i(r.top,t.top),t.right=a(r.right,t.right),t.bottom=a(r.bottom,t.bottom),t.left=i(r.left,t.left),t}),_(e,p,s));return u.width=u.right-u.left,u.height=u.bottom-u.top,u.x=u.left,u.y=u.top,u}function U(e){return e.split("-")[1]}function z(e){return["top","bottom"].indexOf(e)>=0?"x":"y"}function X(e){var t,n=e.reference,r=e.element,o=e.placement,i=o?C(o):null,a=o?U(o):null,s=n.x+n.width/2-r.width/2,f=n.y+n.height/2-r.height/2;switch(i){case D:t={x:s,y:n.y-r.height};break;case A:t={x:s,y:n.y+n.height};break;case L:t={x:n.x+n.width,y:f};break;case P:t={x:n.x-r.width,y:f};break;default:t={x:n.x,y:n.y}}var c=i?z(i):null;if(null!=c){var p="y"===c?"height":"width";switch(a){case W:t[c]=t[c]-(n[p]/2-r[p]/2);break;case B:t[c]=t[c]+(n[p]/2-r[p]/2)}}return t}function Y(e){return Object.assign({},{top:0,right:0,bottom:0,left:0},e)}function G(e,t){return t.reduce((function(t,n){return t[n]=e,t}),{})}function J(e,t){void 0===t&&(t={});var r=t,o=r.placement,i=void 0===o?e.placement:o,a=r.strategy,s=void 0===a?e.strategy:a,f=r.boundary,c=void 0===f?"clippingParents":f,u=r.rootBoundary,l=void 0===u?H:u,h=r.elementContext,m=void 0===h?T:h,v=r.altBoundary,y=void 0!==v&&v,g=r.padding,b=void 0===g?0:g,x=Y("number"!=typeof b?b:G(b,k)),w=m===T?"reference":T,O=e.rects.popper,j=e.elements[y?w:m],E=F(n(j)?j:j.contextElement||d(e.elements.popper),c,l,s),P=p(e.elements.reference),M=X({reference:P,element:O,strategy:"absolute",placement:i}),W=I(Object.assign({},O,M)),B=m===T?W:P,R={top:E.top-B.top+x.top,bottom:B.bottom-E.bottom+x.bottom,left:E.left-B.left+x.left,right:B.right-E.right+x.right},S=e.modifiersData.offset;if(m===T&&S){var V=S[i];Object.keys(R).forEach((function(e){var t=[L,A].indexOf(e)>=0?1:-1,n=[D,A].indexOf(e)>=0?"y":"x";R[e]+=V[n]*t}))}return R}var K={placement:"bottom",modifiers:[],strategy:"absolute"};function Q(){for(var e=arguments.length,t=new Array(e),n=0;n=0?-1:1,i="function"==typeof n?n(Object.assign({},t,{placement:e})):n,a=i[0],s=i[1];return a=a||0,s=(s||0)*o,[P,L].indexOf(r)>=0?{x:s,y:a}:{x:a,y:s}}(n,t.rects,i),e}),{}),s=a[t.placement],f=s.x,c=s.y;null!=t.modifiersData.popperOffsets&&(t.modifiersData.popperOffsets.x+=f,t.modifiersData.popperOffsets.y+=c),t.modifiersData[r]=a}},se={left:"right",right:"left",bottom:"top",top:"bottom"};function fe(e){return e.replace(/left|right|bottom|top/g,(function(e){return se[e]}))}var ce={start:"end",end:"start"};function pe(e){return e.replace(/start|end/g,(function(e){return ce[e]}))}function ue(e,t){void 0===t&&(t={});var n=t,r=n.placement,o=n.boundary,i=n.rootBoundary,a=n.padding,s=n.flipVariations,f=n.allowedAutoPlacements,c=void 0===f?S:f,p=U(r),u=p?s?R:R.filter((function(e){return U(e)===p})):k,l=u.filter((function(e){return c.indexOf(e)>=0}));0===l.length&&(l=u);var d=l.reduce((function(t,n){return t[n]=J(e,{placement:n,boundary:o,rootBoundary:i,padding:a})[C(n)],t}),{});return Object.keys(d).sort((function(e,t){return d[e]-d[t]}))}var le={name:"flip",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name;if(!t.modifiersData[r]._skip){for(var o=n.mainAxis,i=void 0===o||o,a=n.altAxis,s=void 0===a||a,f=n.fallbackPlacements,c=n.padding,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.flipVariations,h=void 0===d||d,m=n.allowedAutoPlacements,v=t.options.placement,y=C(v),g=f||(y===v||!h?[fe(v)]:function(e){if(C(e)===M)return[];var t=fe(e);return[pe(e),t,pe(t)]}(v)),b=[v].concat(g).reduce((function(e,n){return e.concat(C(n)===M?ue(t,{placement:n,boundary:p,rootBoundary:u,padding:c,flipVariations:h,allowedAutoPlacements:m}):n)}),[]),x=t.rects.reference,w=t.rects.popper,O=new Map,j=!0,E=b[0],k=0;k=0,S=R?"width":"height",V=J(t,{placement:B,boundary:p,rootBoundary:u,altBoundary:l,padding:c}),q=R?T?L:P:T?A:D;x[S]>w[S]&&(q=fe(q));var N=fe(q),I=[];if(i&&I.push(V[H]<=0),s&&I.push(V[q]<=0,V[N]<=0),I.every((function(e){return e}))){E=B,j=!1;break}O.set(B,I)}if(j)for(var _=function(e){var t=b.find((function(t){var n=O.get(t);if(n)return n.slice(0,e).every((function(e){return e}))}));if(t)return E=t,"break"},F=h?3:1;F>0;F--){if("break"===_(F))break}t.placement!==E&&(t.modifiersData[r]._skip=!0,t.placement=E,t.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function de(e,t,n){return i(e,a(t,n))}var he={name:"preventOverflow",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name,o=n.mainAxis,s=void 0===o||o,f=n.altAxis,c=void 0!==f&&f,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.padding,h=n.tether,m=void 0===h||h,v=n.tetherOffset,y=void 0===v?0:v,b=J(t,{boundary:p,rootBoundary:u,padding:d,altBoundary:l}),x=C(t.placement),w=U(t.placement),O=!w,j=z(x),M="x"===j?"y":"x",k=t.modifiersData.popperOffsets,B=t.rects.reference,H=t.rects.popper,T="function"==typeof y?y(Object.assign({},t.rects,{placement:t.placement})):y,R="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),S=t.modifiersData.offset?t.modifiersData.offset[t.placement]:null,V={x:0,y:0};if(k){if(s){var q,N="y"===j?D:P,I="y"===j?A:L,_="y"===j?"height":"width",F=k[j],X=F+b[N],Y=F-b[I],G=m?-H[_]/2:0,K=w===W?B[_]:H[_],Q=w===W?-H[_]:-B[_],Z=t.elements.arrow,$=m&&Z?g(Z):{width:0,height:0},ee=t.modifiersData["arrow#persistent"]?t.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},te=ee[N],ne=ee[I],re=de(0,B[_],$[_]),oe=O?B[_]/2-G-re-te-R.mainAxis:K-re-te-R.mainAxis,ie=O?-B[_]/2+G+re+ne+R.mainAxis:Q+re+ne+R.mainAxis,ae=t.elements.arrow&&E(t.elements.arrow),se=ae?"y"===j?ae.clientTop||0:ae.clientLeft||0:0,fe=null!=(q=null==S?void 0:S[j])?q:0,ce=F+ie-fe,pe=de(m?a(X,F+oe-fe-se):X,F,m?i(Y,ce):Y);k[j]=pe,V[j]=pe-F}if(c){var ue,le="x"===j?D:P,he="x"===j?A:L,me=k[M],ve="y"===M?"height":"width",ye=me+b[le],ge=me-b[he],be=-1!==[D,P].indexOf(x),xe=null!=(ue=null==S?void 0:S[M])?ue:0,we=be?ye:me-B[ve]-H[ve]-xe+R.altAxis,Oe=be?me+B[ve]+H[ve]-xe-R.altAxis:ge,je=m&&be?function(e,t,n){var r=de(e,t,n);return r>n?n:r}(we,me,Oe):de(m?we:ye,me,m?Oe:ge);k[M]=je,V[M]=je-me}t.modifiersData[r]=V}},requiresIfExists:["offset"]};var me={name:"arrow",enabled:!0,phase:"main",fn:function(e){var t,n=e.state,r=e.name,o=e.options,i=n.elements.arrow,a=n.modifiersData.popperOffsets,s=C(n.placement),f=z(s),c=[P,L].indexOf(s)>=0?"height":"width";if(i&&a){var p=function(e,t){return Y("number"!=typeof(e="function"==typeof e?e(Object.assign({},t.rects,{placement:t.placement})):e)?e:G(e,k))}(o.padding,n),u=g(i),l="y"===f?D:P,d="y"===f?A:L,h=n.rects.reference[c]+n.rects.reference[f]-a[f]-n.rects.popper[c],m=a[f]-n.rects.reference[f],v=E(i),y=v?"y"===f?v.clientHeight||0:v.clientWidth||0:0,b=h/2-m/2,x=p[l],w=y-u[c]-p[d],O=y/2-u[c]/2+b,j=de(x,O,w),M=f;n.modifiersData[r]=((t={})[M]=j,t.centerOffset=j-O,t)}},effect:function(e){var t=e.state,n=e.options.element,r=void 0===n?"[data-popper-arrow]":n;null!=r&&("string"!=typeof r||(r=t.elements.popper.querySelector(r)))&&N(t.elements.popper,r)&&(t.elements.arrow=r)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function ve(e,t,n){return void 0===n&&(n={x:0,y:0}),{top:e.top-t.height-n.y,right:e.right-t.width+n.x,bottom:e.bottom-t.height+n.y,left:e.left-t.width-n.x}}function ye(e){return[D,L,A,P].some((function(t){return e[t]>=0}))}var ge={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(e){var t=e.state,n=e.name,r=t.rects.reference,o=t.rects.popper,i=t.modifiersData.preventOverflow,a=J(t,{elementContext:"reference"}),s=J(t,{altBoundary:!0}),f=ve(a,r),c=ve(s,o,i),p=ye(f),u=ye(c);t.modifiersData[n]={referenceClippingOffsets:f,popperEscapeOffsets:c,isReferenceHidden:p,hasPopperEscaped:u},t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-reference-hidden":p,"data-popper-escaped":u})}},be=Z({defaultModifiers:[ee,te,oe,ie]}),xe=[ee,te,oe,ie,ae,le,he,me,ge],we=Z({defaultModifiers:xe});e.applyStyles=ie,e.arrow=me,e.computeStyles=oe,e.createPopper=we,e.createPopperLite=be,e.defaultModifiers=xe,e.detectOverflow=J,e.eventListeners=ee,e.flip=le,e.hide=ge,e.offset=ae,e.popperGenerator=Z,e.popperOffsets=te,e.preventOverflow=he,Object.defineProperty(e,"__esModule",{value:!0})})); + diff --git a/docs/2_39/site_libs/quarto-html/quarto-syntax-highlighting-05fe91a66cf75bbbb8c9664867fe5124.css b/docs/2_39/site_libs/quarto-html/quarto-syntax-highlighting-05fe91a66cf75bbbb8c9664867fe5124.css new file mode 100644 index 000000000..05b0f7099 --- /dev/null +++ b/docs/2_39/site_libs/quarto-html/quarto-syntax-highlighting-05fe91a66cf75bbbb8c9664867fe5124.css @@ -0,0 +1,191 @@ +/* quarto syntax highlight colors */ +:root { + --quarto-hl-al-color: #ef2929; + --quarto-hl-an-color: #8f5902; + --quarto-hl-at-color: #204a87; + --quarto-hl-bn-color: #0000cf; + --quarto-hl-ch-color: #4e9a06; + --quarto-hl-co-color: #8f5902; + --quarto-hl-cv-color: #8f5902; + --quarto-hl-cn-color: #8f5902; + --quarto-hl-cf-color: #204a87; + --quarto-hl-dt-color: #204a87; + --quarto-hl-dv-color: #0000cf; + --quarto-hl-do-color: #8f5902; + --quarto-hl-er-color: #a40000; + --quarto-hl-ex-color: inherit; + --quarto-hl-fl-color: #0000cf; + --quarto-hl-fu-color: #204a87; + --quarto-hl-im-color: inherit; + --quarto-hl-in-color: #8f5902; + --quarto-hl-kw-color: #204a87; + --quarto-hl-op-color: #ce5c00; + --quarto-hl-ot-color: #8f5902; + --quarto-hl-pp-color: #8f5902; + --quarto-hl-sc-color: #ce5c00; + --quarto-hl-ss-color: #4e9a06; + --quarto-hl-st-color: #4e9a06; + --quarto-hl-va-color: #000000; + --quarto-hl-vs-color: #4e9a06; + --quarto-hl-wa-color: #8f5902; +} + +/* other quarto variables */ +:root { + --quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; +} + +code span.al { + color: #ef2929; + font-style: inherit; +} + +code span.an { + color: #8f5902; + font-weight: bold; + font-style: italic; +} + +code span.at { + color: #204a87; + font-style: inherit; +} + +code span.bn { + color: #0000cf; + font-style: inherit; +} + +code span.ch { + color: #4e9a06; + font-style: inherit; +} + +code span.co { + color: #8f5902; + font-style: italic; +} + +code span.cv { + color: #8f5902; + font-weight: bold; + font-style: italic; +} + +code span.cn { + color: #8f5902; + font-style: inherit; +} + +code span.cf { + color: #204a87; + font-weight: bold; + font-style: inherit; +} + +code span.dt { + color: #204a87; + font-style: inherit; +} + +code span.dv { + color: #0000cf; + font-style: inherit; +} + +code span.do { + color: #8f5902; + font-weight: bold; + font-style: italic; +} + +code span.er { + color: #a40000; + font-weight: bold; + font-style: inherit; +} + +code span.ex { + font-style: inherit; +} + +code span.fl { + color: #0000cf; + font-style: inherit; +} + +code span.fu { + color: #204a87; + font-weight: bold; + font-style: inherit; +} + +code span.im { + font-style: inherit; +} + +code span.in { + color: #8f5902; + font-weight: bold; + font-style: italic; +} + +code span.kw { + color: #204a87; + font-weight: bold; + font-style: inherit; +} + +code span.op { + color: #ce5c00; + font-weight: bold; + font-style: inherit; +} + +code span.ot { + color: #8f5902; + font-style: inherit; +} + +code span.pp { + color: #8f5902; + font-style: italic; +} + +code span.sc { + color: #ce5c00; + font-weight: bold; + font-style: inherit; +} + +code span.ss { + color: #4e9a06; + font-style: inherit; +} + +code span.st { + color: #4e9a06; + font-style: inherit; +} + +code span.va { + color: #000000; + font-style: inherit; +} + +code span.vs { + color: #4e9a06; + font-style: inherit; +} + +code span.wa { + color: #8f5902; + font-weight: bold; + font-style: italic; +} + +.prevent-inlining { + content: " code.sourceCode > span { + color: #d8dee9; +} + +code span { + color: #d8dee9; +} + +code.sourceCode > span { + color: #d8dee9; +} + +div.sourceCode, +div.sourceCode pre.sourceCode { + color: #d8dee9; +} + +code span.op { + color: #81a1c1; +} + +code span.pp { + color: #5e81ac; +} + +code span.re { + background-color: #3b4252; + color: #88c0d0; +} + +code span.sc { + color: #ebcb8b; +} + +code span.ss { + color: #d08770; +} + +code span.st { + color: #a3be8c; +} + +code span.va { + color: #5e81ac; +} + +code span.vs { + color: #a3be8c; +} + +code span.wa { + color: #bf616a; +} + +.prevent-inlining { + content: " { + // Find any conflicting margin elements and add margins to the + // top to prevent overlap + const marginChildren = window.document.querySelectorAll( + ".column-margin.column-container > *, .margin-caption, .aside" + ); + + let lastBottom = 0; + for (const marginChild of marginChildren) { + if (marginChild.offsetParent !== null) { + // clear the top margin so we recompute it + marginChild.style.marginTop = null; + const top = marginChild.getBoundingClientRect().top + window.scrollY; + if (top < lastBottom) { + const marginChildStyle = window.getComputedStyle(marginChild); + const marginBottom = parseFloat(marginChildStyle["marginBottom"]); + const margin = lastBottom - top + marginBottom; + marginChild.style.marginTop = `${margin}px`; + } + const styles = window.getComputedStyle(marginChild); + const marginTop = parseFloat(styles["marginTop"]); + lastBottom = top + marginChild.getBoundingClientRect().height + marginTop; + } + } +}; + +window.document.addEventListener("DOMContentLoaded", function (_event) { + // Recompute the position of margin elements anytime the body size changes + if (window.ResizeObserver) { + const resizeObserver = new window.ResizeObserver( + throttle(() => { + layoutMarginEls(); + if ( + window.document.body.getBoundingClientRect().width < 990 && + isReaderMode() + ) { + quartoToggleReader(); + } + }, 50) + ); + resizeObserver.observe(window.document.body); + } + + const tocEl = window.document.querySelector('nav.toc-active[role="doc-toc"]'); + const sidebarEl = window.document.getElementById("quarto-sidebar"); + const leftTocEl = window.document.getElementById("quarto-sidebar-toc-left"); + const marginSidebarEl = window.document.getElementById( + "quarto-margin-sidebar" + ); + // function to determine whether the element has a previous sibling that is active + const prevSiblingIsActiveLink = (el) => { + const sibling = el.previousElementSibling; + if (sibling && sibling.tagName === "A") { + return sibling.classList.contains("active"); + } else { + return false; + } + }; + + // fire slideEnter for bootstrap tab activations (for htmlwidget resize behavior) + function fireSlideEnter(e) { + const event = window.document.createEvent("Event"); + event.initEvent("slideenter", true, true); + window.document.dispatchEvent(event); + } + const tabs = window.document.querySelectorAll('a[data-bs-toggle="tab"]'); + tabs.forEach((tab) => { + tab.addEventListener("shown.bs.tab", fireSlideEnter); + }); + + // fire slideEnter for tabby tab activations (for htmlwidget resize behavior) + document.addEventListener("tabby", fireSlideEnter, false); + + // Track scrolling and mark TOC links as active + // get table of contents and sidebar (bail if we don't have at least one) + const tocLinks = tocEl + ? [...tocEl.querySelectorAll("a[data-scroll-target]")] + : []; + const makeActive = (link) => tocLinks[link].classList.add("active"); + const removeActive = (link) => tocLinks[link].classList.remove("active"); + const removeAllActive = () => + [...Array(tocLinks.length).keys()].forEach((link) => removeActive(link)); + + // activate the anchor for a section associated with this TOC entry + tocLinks.forEach((link) => { + link.addEventListener("click", () => { + if (link.href.indexOf("#") !== -1) { + const anchor = link.href.split("#")[1]; + const heading = window.document.querySelector( + `[data-anchor-id="${anchor}"]` + ); + if (heading) { + // Add the class + heading.classList.add("reveal-anchorjs-link"); + + // function to show the anchor + const handleMouseout = () => { + heading.classList.remove("reveal-anchorjs-link"); + heading.removeEventListener("mouseout", handleMouseout); + }; + + // add a function to clear the anchor when the user mouses out of it + heading.addEventListener("mouseout", handleMouseout); + } + } + }); + }); + + const sections = tocLinks.map((link) => { + const target = link.getAttribute("data-scroll-target"); + if (target.startsWith("#")) { + return window.document.getElementById(decodeURI(`${target.slice(1)}`)); + } else { + return window.document.querySelector(decodeURI(`${target}`)); + } + }); + + const sectionMargin = 200; + let currentActive = 0; + // track whether we've initialized state the first time + let init = false; + + const updateActiveLink = () => { + // The index from bottom to top (e.g. reversed list) + let sectionIndex = -1; + if ( + window.innerHeight + window.pageYOffset >= + window.document.body.offsetHeight + ) { + // This is the no-scroll case where last section should be the active one + sectionIndex = 0; + } else { + // This finds the last section visible on screen that should be made active + sectionIndex = [...sections].reverse().findIndex((section) => { + if (section) { + return window.pageYOffset >= section.offsetTop - sectionMargin; + } else { + return false; + } + }); + } + if (sectionIndex > -1) { + const current = sections.length - sectionIndex - 1; + if (current !== currentActive) { + removeAllActive(); + currentActive = current; + makeActive(current); + if (init) { + window.dispatchEvent(sectionChanged); + } + init = true; + } + } + }; + + const inHiddenRegion = (top, bottom, hiddenRegions) => { + for (const region of hiddenRegions) { + if (top <= region.bottom && bottom >= region.top) { + return true; + } + } + return false; + }; + + const categorySelector = "header.quarto-title-block .quarto-category"; + const activateCategories = (href) => { + // Find any categories + // Surround them with a link pointing back to: + // #category=Authoring + try { + const categoryEls = window.document.querySelectorAll(categorySelector); + for (const categoryEl of categoryEls) { + const categoryText = categoryEl.textContent; + if (categoryText) { + const link = `${href}#category=${encodeURIComponent(categoryText)}`; + const linkEl = window.document.createElement("a"); + linkEl.setAttribute("href", link); + for (const child of categoryEl.childNodes) { + linkEl.append(child); + } + categoryEl.appendChild(linkEl); + } + } + } catch { + // Ignore errors + } + }; + function hasTitleCategories() { + return window.document.querySelector(categorySelector) !== null; + } + + function offsetRelativeUrl(url) { + const offset = getMeta("quarto:offset"); + return offset ? offset + url : url; + } + + function offsetAbsoluteUrl(url) { + const offset = getMeta("quarto:offset"); + const baseUrl = new URL(offset, window.location); + + const projRelativeUrl = url.replace(baseUrl, ""); + if (projRelativeUrl.startsWith("/")) { + return projRelativeUrl; + } else { + return "/" + projRelativeUrl; + } + } + + // read a meta tag value + function getMeta(metaName) { + const metas = window.document.getElementsByTagName("meta"); + for (let i = 0; i < metas.length; i++) { + if (metas[i].getAttribute("name") === metaName) { + return metas[i].getAttribute("content"); + } + } + return ""; + } + + async function findAndActivateCategories() { + // Categories search with listing only use path without query + const currentPagePath = offsetAbsoluteUrl( + window.location.origin + window.location.pathname + ); + const response = await fetch(offsetRelativeUrl("listings.json")); + if (response.status == 200) { + return response.json().then(function (listingPaths) { + const listingHrefs = []; + for (const listingPath of listingPaths) { + const pathWithoutLeadingSlash = listingPath.listing.substring(1); + for (const item of listingPath.items) { + if ( + item === currentPagePath || + item === currentPagePath + "index.html" + ) { + // Resolve this path against the offset to be sure + // we already are using the correct path to the listing + // (this adjusts the listing urls to be rooted against + // whatever root the page is actually running against) + const relative = offsetRelativeUrl(pathWithoutLeadingSlash); + const baseUrl = window.location; + const resolvedPath = new URL(relative, baseUrl); + listingHrefs.push(resolvedPath.pathname); + break; + } + } + } + + // Look up the tree for a nearby linting and use that if we find one + const nearestListing = findNearestParentListing( + offsetAbsoluteUrl(window.location.pathname), + listingHrefs + ); + if (nearestListing) { + activateCategories(nearestListing); + } else { + // See if the referrer is a listing page for this item + const referredRelativePath = offsetAbsoluteUrl(document.referrer); + const referrerListing = listingHrefs.find((listingHref) => { + const isListingReferrer = + listingHref === referredRelativePath || + listingHref === referredRelativePath + "index.html"; + return isListingReferrer; + }); + + if (referrerListing) { + // Try to use the referrer if possible + activateCategories(referrerListing); + } else if (listingHrefs.length > 0) { + // Otherwise, just fall back to the first listing + activateCategories(listingHrefs[0]); + } + } + }); + } + } + if (hasTitleCategories()) { + findAndActivateCategories(); + } + + const findNearestParentListing = (href, listingHrefs) => { + if (!href || !listingHrefs) { + return undefined; + } + // Look up the tree for a nearby linting and use that if we find one + const relativeParts = href.substring(1).split("/"); + while (relativeParts.length > 0) { + const path = relativeParts.join("/"); + for (const listingHref of listingHrefs) { + if (listingHref.startsWith(path)) { + return listingHref; + } + } + relativeParts.pop(); + } + + return undefined; + }; + + const manageSidebarVisiblity = (el, placeholderDescriptor) => { + let isVisible = true; + let elRect; + + return (hiddenRegions) => { + if (el === null) { + return; + } + + // Find the last element of the TOC + const lastChildEl = el.lastElementChild; + + if (lastChildEl) { + // Converts the sidebar to a menu + const convertToMenu = () => { + for (const child of el.children) { + child.style.opacity = 0; + child.style.overflow = "hidden"; + child.style.pointerEvents = "none"; + } + + nexttick(() => { + const toggleContainer = window.document.createElement("div"); + toggleContainer.style.width = "100%"; + toggleContainer.classList.add("zindex-over-content"); + toggleContainer.classList.add("quarto-sidebar-toggle"); + toggleContainer.classList.add("headroom-target"); // Marks this to be managed by headeroom + toggleContainer.id = placeholderDescriptor.id; + toggleContainer.style.position = "fixed"; + + const toggleIcon = window.document.createElement("i"); + toggleIcon.classList.add("quarto-sidebar-toggle-icon"); + toggleIcon.classList.add("bi"); + toggleIcon.classList.add("bi-caret-down-fill"); + + const toggleTitle = window.document.createElement("div"); + const titleEl = window.document.body.querySelector( + placeholderDescriptor.titleSelector + ); + if (titleEl) { + toggleTitle.append( + titleEl.textContent || titleEl.innerText, + toggleIcon + ); + } + toggleTitle.classList.add("zindex-over-content"); + toggleTitle.classList.add("quarto-sidebar-toggle-title"); + toggleContainer.append(toggleTitle); + + const toggleContents = window.document.createElement("div"); + toggleContents.classList = el.classList; + toggleContents.classList.add("zindex-over-content"); + toggleContents.classList.add("quarto-sidebar-toggle-contents"); + for (const child of el.children) { + if (child.id === "toc-title") { + continue; + } + + const clone = child.cloneNode(true); + clone.style.opacity = 1; + clone.style.pointerEvents = null; + clone.style.display = null; + toggleContents.append(clone); + } + toggleContents.style.height = "0px"; + const positionToggle = () => { + // position the element (top left of parent, same width as parent) + if (!elRect) { + elRect = el.getBoundingClientRect(); + } + toggleContainer.style.left = `${elRect.left}px`; + toggleContainer.style.top = `${elRect.top}px`; + toggleContainer.style.width = `${elRect.width}px`; + }; + positionToggle(); + + toggleContainer.append(toggleContents); + el.parentElement.prepend(toggleContainer); + + // Process clicks + let tocShowing = false; + // Allow the caller to control whether this is dismissed + // when it is clicked (e.g. sidebar navigation supports + // opening and closing the nav tree, so don't dismiss on click) + const clickEl = placeholderDescriptor.dismissOnClick + ? toggleContainer + : toggleTitle; + + const closeToggle = () => { + if (tocShowing) { + toggleContainer.classList.remove("expanded"); + toggleContents.style.height = "0px"; + tocShowing = false; + } + }; + + // Get rid of any expanded toggle if the user scrolls + window.document.addEventListener( + "scroll", + throttle(() => { + closeToggle(); + }, 50) + ); + + // Handle positioning of the toggle + window.addEventListener( + "resize", + throttle(() => { + elRect = undefined; + positionToggle(); + }, 50) + ); + + window.addEventListener("quarto-hrChanged", () => { + elRect = undefined; + }); + + // Process the click + clickEl.onclick = () => { + if (!tocShowing) { + toggleContainer.classList.add("expanded"); + toggleContents.style.height = null; + tocShowing = true; + } else { + closeToggle(); + } + }; + }); + }; + + // Converts a sidebar from a menu back to a sidebar + const convertToSidebar = () => { + for (const child of el.children) { + child.style.opacity = 1; + child.style.overflow = null; + child.style.pointerEvents = null; + } + + const placeholderEl = window.document.getElementById( + placeholderDescriptor.id + ); + if (placeholderEl) { + placeholderEl.remove(); + } + + el.classList.remove("rollup"); + }; + + if (isReaderMode()) { + convertToMenu(); + isVisible = false; + } else { + // Find the top and bottom o the element that is being managed + const elTop = el.offsetTop; + const elBottom = + elTop + lastChildEl.offsetTop + lastChildEl.offsetHeight; + + if (!isVisible) { + // If the element is current not visible reveal if there are + // no conflicts with overlay regions + if (!inHiddenRegion(elTop, elBottom, hiddenRegions)) { + convertToSidebar(); + isVisible = true; + } + } else { + // If the element is visible, hide it if it conflicts with overlay regions + // and insert a placeholder toggle (or if we're in reader mode) + if (inHiddenRegion(elTop, elBottom, hiddenRegions)) { + convertToMenu(); + isVisible = false; + } + } + } + } + }; + }; + + const tabEls = document.querySelectorAll('a[data-bs-toggle="tab"]'); + for (const tabEl of tabEls) { + const id = tabEl.getAttribute("data-bs-target"); + if (id) { + const columnEl = document.querySelector( + `${id} .column-margin, .tabset-margin-content` + ); + if (columnEl) + tabEl.addEventListener("shown.bs.tab", function (event) { + const el = event.srcElement; + if (el) { + const visibleCls = `${el.id}-margin-content`; + // walk up until we find a parent tabset + let panelTabsetEl = el.parentElement; + while (panelTabsetEl) { + if (panelTabsetEl.classList.contains("panel-tabset")) { + break; + } + panelTabsetEl = panelTabsetEl.parentElement; + } + + if (panelTabsetEl) { + const prevSib = panelTabsetEl.previousElementSibling; + if ( + prevSib && + prevSib.classList.contains("tabset-margin-container") + ) { + const childNodes = prevSib.querySelectorAll( + ".tabset-margin-content" + ); + for (const childEl of childNodes) { + if (childEl.classList.contains(visibleCls)) { + childEl.classList.remove("collapse"); + } else { + childEl.classList.add("collapse"); + } + } + } + } + } + + layoutMarginEls(); + }); + } + } + + // Manage the visibility of the toc and the sidebar + const marginScrollVisibility = manageSidebarVisiblity(marginSidebarEl, { + id: "quarto-toc-toggle", + titleSelector: "#toc-title", + dismissOnClick: true, + }); + const sidebarScrollVisiblity = manageSidebarVisiblity(sidebarEl, { + id: "quarto-sidebarnav-toggle", + titleSelector: ".title", + dismissOnClick: false, + }); + let tocLeftScrollVisibility; + if (leftTocEl) { + tocLeftScrollVisibility = manageSidebarVisiblity(leftTocEl, { + id: "quarto-lefttoc-toggle", + titleSelector: "#toc-title", + dismissOnClick: true, + }); + } + + // Find the first element that uses formatting in special columns + const conflictingEls = window.document.body.querySelectorAll( + '[class^="column-"], [class*=" column-"], aside, [class*="margin-caption"], [class*=" margin-caption"], [class*="margin-ref"], [class*=" margin-ref"]' + ); + + // Filter all the possibly conflicting elements into ones + // the do conflict on the left or ride side + const arrConflictingEls = Array.from(conflictingEls); + const leftSideConflictEls = arrConflictingEls.filter((el) => { + if (el.tagName === "ASIDE") { + return false; + } + return Array.from(el.classList).find((className) => { + return ( + className !== "column-body" && + className.startsWith("column-") && + !className.endsWith("right") && + !className.endsWith("container") && + className !== "column-margin" + ); + }); + }); + const rightSideConflictEls = arrConflictingEls.filter((el) => { + if (el.tagName === "ASIDE") { + return true; + } + + const hasMarginCaption = Array.from(el.classList).find((className) => { + return className == "margin-caption"; + }); + if (hasMarginCaption) { + return true; + } + + return Array.from(el.classList).find((className) => { + return ( + className !== "column-body" && + !className.endsWith("container") && + className.startsWith("column-") && + !className.endsWith("left") + ); + }); + }); + + const kOverlapPaddingSize = 10; + function toRegions(els) { + return els.map((el) => { + const boundRect = el.getBoundingClientRect(); + const top = + boundRect.top + + document.documentElement.scrollTop - + kOverlapPaddingSize; + return { + top, + bottom: top + el.scrollHeight + 2 * kOverlapPaddingSize, + }; + }); + } + + let hasObserved = false; + const visibleItemObserver = (els) => { + let visibleElements = [...els]; + const intersectionObserver = new IntersectionObserver( + (entries, _observer) => { + entries.forEach((entry) => { + if (entry.isIntersecting) { + if (visibleElements.indexOf(entry.target) === -1) { + visibleElements.push(entry.target); + } + } else { + visibleElements = visibleElements.filter((visibleEntry) => { + return visibleEntry !== entry; + }); + } + }); + + if (!hasObserved) { + hideOverlappedSidebars(); + } + hasObserved = true; + }, + {} + ); + els.forEach((el) => { + intersectionObserver.observe(el); + }); + + return { + getVisibleEntries: () => { + return visibleElements; + }, + }; + }; + + const rightElementObserver = visibleItemObserver(rightSideConflictEls); + const leftElementObserver = visibleItemObserver(leftSideConflictEls); + + const hideOverlappedSidebars = () => { + marginScrollVisibility(toRegions(rightElementObserver.getVisibleEntries())); + sidebarScrollVisiblity(toRegions(leftElementObserver.getVisibleEntries())); + if (tocLeftScrollVisibility) { + tocLeftScrollVisibility( + toRegions(leftElementObserver.getVisibleEntries()) + ); + } + }; + + window.quartoToggleReader = () => { + // Applies a slow class (or removes it) + // to update the transition speed + const slowTransition = (slow) => { + const manageTransition = (id, slow) => { + const el = document.getElementById(id); + if (el) { + if (slow) { + el.classList.add("slow"); + } else { + el.classList.remove("slow"); + } + } + }; + + manageTransition("TOC", slow); + manageTransition("quarto-sidebar", slow); + }; + const readerMode = !isReaderMode(); + setReaderModeValue(readerMode); + + // If we're entering reader mode, slow the transition + if (readerMode) { + slowTransition(readerMode); + } + highlightReaderToggle(readerMode); + hideOverlappedSidebars(); + + // If we're exiting reader mode, restore the non-slow transition + if (!readerMode) { + slowTransition(!readerMode); + } + }; + + const highlightReaderToggle = (readerMode) => { + const els = document.querySelectorAll(".quarto-reader-toggle"); + if (els) { + els.forEach((el) => { + if (readerMode) { + el.classList.add("reader"); + } else { + el.classList.remove("reader"); + } + }); + } + }; + + const setReaderModeValue = (val) => { + if (window.location.protocol !== "file:") { + window.localStorage.setItem("quarto-reader-mode", val); + } else { + localReaderMode = val; + } + }; + + const isReaderMode = () => { + if (window.location.protocol !== "file:") { + return window.localStorage.getItem("quarto-reader-mode") === "true"; + } else { + return localReaderMode; + } + }; + let localReaderMode = null; + + const tocOpenDepthStr = tocEl?.getAttribute("data-toc-expanded"); + const tocOpenDepth = tocOpenDepthStr ? Number(tocOpenDepthStr) : 1; + + // Walk the TOC and collapse/expand nodes + // Nodes are expanded if: + // - they are top level + // - they have children that are 'active' links + // - they are directly below an link that is 'active' + const walk = (el, depth) => { + // Tick depth when we enter a UL + if (el.tagName === "UL") { + depth = depth + 1; + } + + // It this is active link + let isActiveNode = false; + if (el.tagName === "A" && el.classList.contains("active")) { + isActiveNode = true; + } + + // See if there is an active child to this element + let hasActiveChild = false; + for (child of el.children) { + hasActiveChild = walk(child, depth) || hasActiveChild; + } + + // Process the collapse state if this is an UL + if (el.tagName === "UL") { + if (tocOpenDepth === -1 && depth > 1) { + // toc-expand: false + el.classList.add("collapse"); + } else if ( + depth <= tocOpenDepth || + hasActiveChild || + prevSiblingIsActiveLink(el) + ) { + el.classList.remove("collapse"); + } else { + el.classList.add("collapse"); + } + + // untick depth when we leave a UL + depth = depth - 1; + } + return hasActiveChild || isActiveNode; + }; + + // walk the TOC and expand / collapse any items that should be shown + if (tocEl) { + updateActiveLink(); + walk(tocEl, 0); + } + + // Throttle the scroll event and walk peridiocally + window.document.addEventListener( + "scroll", + throttle(() => { + if (tocEl) { + updateActiveLink(); + walk(tocEl, 0); + } + if (!isReaderMode()) { + hideOverlappedSidebars(); + } + }, 5) + ); + window.addEventListener( + "resize", + throttle(() => { + if (tocEl) { + updateActiveLink(); + walk(tocEl, 0); + } + if (!isReaderMode()) { + hideOverlappedSidebars(); + } + }, 10) + ); + hideOverlappedSidebars(); + highlightReaderToggle(isReaderMode()); +}); + +// grouped tabsets +window.addEventListener("pageshow", (_event) => { + function getTabSettings() { + const data = localStorage.getItem("quarto-persistent-tabsets-data"); + if (!data) { + localStorage.setItem("quarto-persistent-tabsets-data", "{}"); + return {}; + } + if (data) { + return JSON.parse(data); + } + } + + function setTabSettings(data) { + localStorage.setItem( + "quarto-persistent-tabsets-data", + JSON.stringify(data) + ); + } + + function setTabState(groupName, groupValue) { + const data = getTabSettings(); + data[groupName] = groupValue; + setTabSettings(data); + } + + function toggleTab(tab, active) { + const tabPanelId = tab.getAttribute("aria-controls"); + const tabPanel = document.getElementById(tabPanelId); + if (active) { + tab.classList.add("active"); + tabPanel.classList.add("active"); + } else { + tab.classList.remove("active"); + tabPanel.classList.remove("active"); + } + } + + function toggleAll(selectedGroup, selectorsToSync) { + for (const [thisGroup, tabs] of Object.entries(selectorsToSync)) { + const active = selectedGroup === thisGroup; + for (const tab of tabs) { + toggleTab(tab, active); + } + } + } + + function findSelectorsToSyncByLanguage() { + const result = {}; + const tabs = Array.from( + document.querySelectorAll(`div[data-group] a[id^='tabset-']`) + ); + for (const item of tabs) { + const div = item.parentElement.parentElement.parentElement; + const group = div.getAttribute("data-group"); + if (!result[group]) { + result[group] = {}; + } + const selectorsToSync = result[group]; + const value = item.innerHTML; + if (!selectorsToSync[value]) { + selectorsToSync[value] = []; + } + selectorsToSync[value].push(item); + } + return result; + } + + function setupSelectorSync() { + const selectorsToSync = findSelectorsToSyncByLanguage(); + Object.entries(selectorsToSync).forEach(([group, tabSetsByValue]) => { + Object.entries(tabSetsByValue).forEach(([value, items]) => { + items.forEach((item) => { + item.addEventListener("click", (_event) => { + setTabState(group, value); + toggleAll(value, selectorsToSync[group]); + }); + }); + }); + }); + return selectorsToSync; + } + + const selectorsToSync = setupSelectorSync(); + for (const [group, selectedName] of Object.entries(getTabSettings())) { + const selectors = selectorsToSync[group]; + // it's possible that stale state gives us empty selections, so we explicitly check here. + if (selectors) { + toggleAll(selectedName, selectors); + } + } +}); + +function throttle(func, wait) { + let waiting = false; + return function () { + if (!waiting) { + func.apply(this, arguments); + waiting = true; + setTimeout(function () { + waiting = false; + }, wait); + } + }; +} + +function nexttick(func) { + return setTimeout(func, 0); +} diff --git a/docs/2_39/site_libs/quarto-html/tippy.css b/docs/2_39/site_libs/quarto-html/tippy.css new file mode 100644 index 000000000..e6ae635cb --- /dev/null +++ b/docs/2_39/site_libs/quarto-html/tippy.css @@ -0,0 +1 @@ +.tippy-box[data-animation=fade][data-state=hidden]{opacity:0}[data-tippy-root]{max-width:calc(100vw - 10px)}.tippy-box{position:relative;background-color:#333;color:#fff;border-radius:4px;font-size:14px;line-height:1.4;white-space:normal;outline:0;transition-property:transform,visibility,opacity}.tippy-box[data-placement^=top]>.tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1} \ No newline at end of file diff --git a/docs/2_39/site_libs/quarto-html/tippy.umd.min.js b/docs/2_39/site_libs/quarto-html/tippy.umd.min.js new file mode 100644 index 000000000..ca292be32 --- /dev/null +++ b/docs/2_39/site_libs/quarto-html/tippy.umd.min.js @@ -0,0 +1,2 @@ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t(require("@popperjs/core")):"function"==typeof define&&define.amd?define(["@popperjs/core"],t):(e=e||self).tippy=t(e.Popper)}(this,(function(e){"use strict";var t={passive:!0,capture:!0},n=function(){return document.body};function r(e,t,n){if(Array.isArray(e)){var r=e[t];return null==r?Array.isArray(n)?n[t]:n:r}return e}function o(e,t){var n={}.toString.call(e);return 0===n.indexOf("[object")&&n.indexOf(t+"]")>-1}function i(e,t){return"function"==typeof e?e.apply(void 0,t):e}function a(e,t){return 0===t?e:function(r){clearTimeout(n),n=setTimeout((function(){e(r)}),t)};var n}function s(e,t){var n=Object.assign({},e);return t.forEach((function(e){delete n[e]})),n}function u(e){return[].concat(e)}function c(e,t){-1===e.indexOf(t)&&e.push(t)}function p(e){return e.split("-")[0]}function f(e){return[].slice.call(e)}function l(e){return Object.keys(e).reduce((function(t,n){return void 0!==e[n]&&(t[n]=e[n]),t}),{})}function d(){return document.createElement("div")}function v(e){return["Element","Fragment"].some((function(t){return o(e,t)}))}function m(e){return o(e,"MouseEvent")}function g(e){return!(!e||!e._tippy||e._tippy.reference!==e)}function h(e){return v(e)?[e]:function(e){return o(e,"NodeList")}(e)?f(e):Array.isArray(e)?e:f(document.querySelectorAll(e))}function b(e,t){e.forEach((function(e){e&&(e.style.transitionDuration=t+"ms")}))}function y(e,t){e.forEach((function(e){e&&e.setAttribute("data-state",t)}))}function w(e){var t,n=u(e)[0];return null!=n&&null!=(t=n.ownerDocument)&&t.body?n.ownerDocument:document}function E(e,t,n){var r=t+"EventListener";["transitionend","webkitTransitionEnd"].forEach((function(t){e[r](t,n)}))}function O(e,t){for(var n=t;n;){var r;if(e.contains(n))return!0;n=null==n.getRootNode||null==(r=n.getRootNode())?void 0:r.host}return!1}var x={isTouch:!1},C=0;function T(){x.isTouch||(x.isTouch=!0,window.performance&&document.addEventListener("mousemove",A))}function A(){var e=performance.now();e-C<20&&(x.isTouch=!1,document.removeEventListener("mousemove",A)),C=e}function L(){var e=document.activeElement;if(g(e)){var t=e._tippy;e.blur&&!t.state.isVisible&&e.blur()}}var D=!!("undefined"!=typeof window&&"undefined"!=typeof document)&&!!window.msCrypto,R=Object.assign({appendTo:n,aria:{content:"auto",expanded:"auto"},delay:0,duration:[300,250],getReferenceClientRect:null,hideOnClick:!0,ignoreAttributes:!1,interactive:!1,interactiveBorder:2,interactiveDebounce:0,moveTransition:"",offset:[0,10],onAfterUpdate:function(){},onBeforeUpdate:function(){},onCreate:function(){},onDestroy:function(){},onHidden:function(){},onHide:function(){},onMount:function(){},onShow:function(){},onShown:function(){},onTrigger:function(){},onUntrigger:function(){},onClickOutside:function(){},placement:"top",plugins:[],popperOptions:{},render:null,showOnCreate:!1,touch:!0,trigger:"mouseenter focus",triggerTarget:null},{animateFill:!1,followCursor:!1,inlinePositioning:!1,sticky:!1},{allowHTML:!1,animation:"fade",arrow:!0,content:"",inertia:!1,maxWidth:350,role:"tooltip",theme:"",zIndex:9999}),k=Object.keys(R);function P(e){var t=(e.plugins||[]).reduce((function(t,n){var r,o=n.name,i=n.defaultValue;o&&(t[o]=void 0!==e[o]?e[o]:null!=(r=R[o])?r:i);return t}),{});return Object.assign({},e,t)}function j(e,t){var n=Object.assign({},t,{content:i(t.content,[e])},t.ignoreAttributes?{}:function(e,t){return(t?Object.keys(P(Object.assign({},R,{plugins:t}))):k).reduce((function(t,n){var r=(e.getAttribute("data-tippy-"+n)||"").trim();if(!r)return t;if("content"===n)t[n]=r;else try{t[n]=JSON.parse(r)}catch(e){t[n]=r}return t}),{})}(e,t.plugins));return n.aria=Object.assign({},R.aria,n.aria),n.aria={expanded:"auto"===n.aria.expanded?t.interactive:n.aria.expanded,content:"auto"===n.aria.content?t.interactive?null:"describedby":n.aria.content},n}function M(e,t){e.innerHTML=t}function V(e){var t=d();return!0===e?t.className="tippy-arrow":(t.className="tippy-svg-arrow",v(e)?t.appendChild(e):M(t,e)),t}function I(e,t){v(t.content)?(M(e,""),e.appendChild(t.content)):"function"!=typeof t.content&&(t.allowHTML?M(e,t.content):e.textContent=t.content)}function S(e){var t=e.firstElementChild,n=f(t.children);return{box:t,content:n.find((function(e){return e.classList.contains("tippy-content")})),arrow:n.find((function(e){return e.classList.contains("tippy-arrow")||e.classList.contains("tippy-svg-arrow")})),backdrop:n.find((function(e){return e.classList.contains("tippy-backdrop")}))}}function N(e){var t=d(),n=d();n.className="tippy-box",n.setAttribute("data-state","hidden"),n.setAttribute("tabindex","-1");var r=d();function o(n,r){var o=S(t),i=o.box,a=o.content,s=o.arrow;r.theme?i.setAttribute("data-theme",r.theme):i.removeAttribute("data-theme"),"string"==typeof r.animation?i.setAttribute("data-animation",r.animation):i.removeAttribute("data-animation"),r.inertia?i.setAttribute("data-inertia",""):i.removeAttribute("data-inertia"),i.style.maxWidth="number"==typeof r.maxWidth?r.maxWidth+"px":r.maxWidth,r.role?i.setAttribute("role",r.role):i.removeAttribute("role"),n.content===r.content&&n.allowHTML===r.allowHTML||I(a,e.props),r.arrow?s?n.arrow!==r.arrow&&(i.removeChild(s),i.appendChild(V(r.arrow))):i.appendChild(V(r.arrow)):s&&i.removeChild(s)}return r.className="tippy-content",r.setAttribute("data-state","hidden"),I(r,e.props),t.appendChild(n),n.appendChild(r),o(e.props,e.props),{popper:t,onUpdate:o}}N.$$tippy=!0;var B=1,H=[],U=[];function _(o,s){var v,g,h,C,T,A,L,k,M=j(o,Object.assign({},R,P(l(s)))),V=!1,I=!1,N=!1,_=!1,F=[],W=a(we,M.interactiveDebounce),X=B++,Y=(k=M.plugins).filter((function(e,t){return k.indexOf(e)===t})),$={id:X,reference:o,popper:d(),popperInstance:null,props:M,state:{isEnabled:!0,isVisible:!1,isDestroyed:!1,isMounted:!1,isShown:!1},plugins:Y,clearDelayTimeouts:function(){clearTimeout(v),clearTimeout(g),cancelAnimationFrame(h)},setProps:function(e){if($.state.isDestroyed)return;ae("onBeforeUpdate",[$,e]),be();var t=$.props,n=j(o,Object.assign({},t,l(e),{ignoreAttributes:!0}));$.props=n,he(),t.interactiveDebounce!==n.interactiveDebounce&&(ce(),W=a(we,n.interactiveDebounce));t.triggerTarget&&!n.triggerTarget?u(t.triggerTarget).forEach((function(e){e.removeAttribute("aria-expanded")})):n.triggerTarget&&o.removeAttribute("aria-expanded");ue(),ie(),J&&J(t,n);$.popperInstance&&(Ce(),Ae().forEach((function(e){requestAnimationFrame(e._tippy.popperInstance.forceUpdate)})));ae("onAfterUpdate",[$,e])},setContent:function(e){$.setProps({content:e})},show:function(){var e=$.state.isVisible,t=$.state.isDestroyed,o=!$.state.isEnabled,a=x.isTouch&&!$.props.touch,s=r($.props.duration,0,R.duration);if(e||t||o||a)return;if(te().hasAttribute("disabled"))return;if(ae("onShow",[$],!1),!1===$.props.onShow($))return;$.state.isVisible=!0,ee()&&(z.style.visibility="visible");ie(),de(),$.state.isMounted||(z.style.transition="none");if(ee()){var u=re(),p=u.box,f=u.content;b([p,f],0)}A=function(){var e;if($.state.isVisible&&!_){if(_=!0,z.offsetHeight,z.style.transition=$.props.moveTransition,ee()&&$.props.animation){var t=re(),n=t.box,r=t.content;b([n,r],s),y([n,r],"visible")}se(),ue(),c(U,$),null==(e=$.popperInstance)||e.forceUpdate(),ae("onMount",[$]),$.props.animation&&ee()&&function(e,t){me(e,t)}(s,(function(){$.state.isShown=!0,ae("onShown",[$])}))}},function(){var e,t=$.props.appendTo,r=te();e=$.props.interactive&&t===n||"parent"===t?r.parentNode:i(t,[r]);e.contains(z)||e.appendChild(z);$.state.isMounted=!0,Ce()}()},hide:function(){var e=!$.state.isVisible,t=$.state.isDestroyed,n=!$.state.isEnabled,o=r($.props.duration,1,R.duration);if(e||t||n)return;if(ae("onHide",[$],!1),!1===$.props.onHide($))return;$.state.isVisible=!1,$.state.isShown=!1,_=!1,V=!1,ee()&&(z.style.visibility="hidden");if(ce(),ve(),ie(!0),ee()){var i=re(),a=i.box,s=i.content;$.props.animation&&(b([a,s],o),y([a,s],"hidden"))}se(),ue(),$.props.animation?ee()&&function(e,t){me(e,(function(){!$.state.isVisible&&z.parentNode&&z.parentNode.contains(z)&&t()}))}(o,$.unmount):$.unmount()},hideWithInteractivity:function(e){ne().addEventListener("mousemove",W),c(H,W),W(e)},enable:function(){$.state.isEnabled=!0},disable:function(){$.hide(),$.state.isEnabled=!1},unmount:function(){$.state.isVisible&&$.hide();if(!$.state.isMounted)return;Te(),Ae().forEach((function(e){e._tippy.unmount()})),z.parentNode&&z.parentNode.removeChild(z);U=U.filter((function(e){return e!==$})),$.state.isMounted=!1,ae("onHidden",[$])},destroy:function(){if($.state.isDestroyed)return;$.clearDelayTimeouts(),$.unmount(),be(),delete o._tippy,$.state.isDestroyed=!0,ae("onDestroy",[$])}};if(!M.render)return $;var q=M.render($),z=q.popper,J=q.onUpdate;z.setAttribute("data-tippy-root",""),z.id="tippy-"+$.id,$.popper=z,o._tippy=$,z._tippy=$;var G=Y.map((function(e){return e.fn($)})),K=o.hasAttribute("aria-expanded");return he(),ue(),ie(),ae("onCreate",[$]),M.showOnCreate&&Le(),z.addEventListener("mouseenter",(function(){$.props.interactive&&$.state.isVisible&&$.clearDelayTimeouts()})),z.addEventListener("mouseleave",(function(){$.props.interactive&&$.props.trigger.indexOf("mouseenter")>=0&&ne().addEventListener("mousemove",W)})),$;function Q(){var e=$.props.touch;return Array.isArray(e)?e:[e,0]}function Z(){return"hold"===Q()[0]}function ee(){var e;return!(null==(e=$.props.render)||!e.$$tippy)}function te(){return L||o}function ne(){var e=te().parentNode;return e?w(e):document}function re(){return S(z)}function oe(e){return $.state.isMounted&&!$.state.isVisible||x.isTouch||C&&"focus"===C.type?0:r($.props.delay,e?0:1,R.delay)}function ie(e){void 0===e&&(e=!1),z.style.pointerEvents=$.props.interactive&&!e?"":"none",z.style.zIndex=""+$.props.zIndex}function ae(e,t,n){var r;(void 0===n&&(n=!0),G.forEach((function(n){n[e]&&n[e].apply(n,t)})),n)&&(r=$.props)[e].apply(r,t)}function se(){var e=$.props.aria;if(e.content){var t="aria-"+e.content,n=z.id;u($.props.triggerTarget||o).forEach((function(e){var r=e.getAttribute(t);if($.state.isVisible)e.setAttribute(t,r?r+" "+n:n);else{var o=r&&r.replace(n,"").trim();o?e.setAttribute(t,o):e.removeAttribute(t)}}))}}function ue(){!K&&$.props.aria.expanded&&u($.props.triggerTarget||o).forEach((function(e){$.props.interactive?e.setAttribute("aria-expanded",$.state.isVisible&&e===te()?"true":"false"):e.removeAttribute("aria-expanded")}))}function ce(){ne().removeEventListener("mousemove",W),H=H.filter((function(e){return e!==W}))}function pe(e){if(!x.isTouch||!N&&"mousedown"!==e.type){var t=e.composedPath&&e.composedPath()[0]||e.target;if(!$.props.interactive||!O(z,t)){if(u($.props.triggerTarget||o).some((function(e){return O(e,t)}))){if(x.isTouch)return;if($.state.isVisible&&$.props.trigger.indexOf("click")>=0)return}else ae("onClickOutside",[$,e]);!0===$.props.hideOnClick&&($.clearDelayTimeouts(),$.hide(),I=!0,setTimeout((function(){I=!1})),$.state.isMounted||ve())}}}function fe(){N=!0}function le(){N=!1}function de(){var e=ne();e.addEventListener("mousedown",pe,!0),e.addEventListener("touchend",pe,t),e.addEventListener("touchstart",le,t),e.addEventListener("touchmove",fe,t)}function ve(){var e=ne();e.removeEventListener("mousedown",pe,!0),e.removeEventListener("touchend",pe,t),e.removeEventListener("touchstart",le,t),e.removeEventListener("touchmove",fe,t)}function me(e,t){var n=re().box;function r(e){e.target===n&&(E(n,"remove",r),t())}if(0===e)return t();E(n,"remove",T),E(n,"add",r),T=r}function ge(e,t,n){void 0===n&&(n=!1),u($.props.triggerTarget||o).forEach((function(r){r.addEventListener(e,t,n),F.push({node:r,eventType:e,handler:t,options:n})}))}function he(){var e;Z()&&(ge("touchstart",ye,{passive:!0}),ge("touchend",Ee,{passive:!0})),(e=$.props.trigger,e.split(/\s+/).filter(Boolean)).forEach((function(e){if("manual"!==e)switch(ge(e,ye),e){case"mouseenter":ge("mouseleave",Ee);break;case"focus":ge(D?"focusout":"blur",Oe);break;case"focusin":ge("focusout",Oe)}}))}function be(){F.forEach((function(e){var t=e.node,n=e.eventType,r=e.handler,o=e.options;t.removeEventListener(n,r,o)})),F=[]}function ye(e){var t,n=!1;if($.state.isEnabled&&!xe(e)&&!I){var r="focus"===(null==(t=C)?void 0:t.type);C=e,L=e.currentTarget,ue(),!$.state.isVisible&&m(e)&&H.forEach((function(t){return t(e)})),"click"===e.type&&($.props.trigger.indexOf("mouseenter")<0||V)&&!1!==$.props.hideOnClick&&$.state.isVisible?n=!0:Le(e),"click"===e.type&&(V=!n),n&&!r&&De(e)}}function we(e){var t=e.target,n=te().contains(t)||z.contains(t);"mousemove"===e.type&&n||function(e,t){var n=t.clientX,r=t.clientY;return e.every((function(e){var t=e.popperRect,o=e.popperState,i=e.props.interactiveBorder,a=p(o.placement),s=o.modifiersData.offset;if(!s)return!0;var u="bottom"===a?s.top.y:0,c="top"===a?s.bottom.y:0,f="right"===a?s.left.x:0,l="left"===a?s.right.x:0,d=t.top-r+u>i,v=r-t.bottom-c>i,m=t.left-n+f>i,g=n-t.right-l>i;return d||v||m||g}))}(Ae().concat(z).map((function(e){var t,n=null==(t=e._tippy.popperInstance)?void 0:t.state;return n?{popperRect:e.getBoundingClientRect(),popperState:n,props:M}:null})).filter(Boolean),e)&&(ce(),De(e))}function Ee(e){xe(e)||$.props.trigger.indexOf("click")>=0&&V||($.props.interactive?$.hideWithInteractivity(e):De(e))}function Oe(e){$.props.trigger.indexOf("focusin")<0&&e.target!==te()||$.props.interactive&&e.relatedTarget&&z.contains(e.relatedTarget)||De(e)}function xe(e){return!!x.isTouch&&Z()!==e.type.indexOf("touch")>=0}function Ce(){Te();var t=$.props,n=t.popperOptions,r=t.placement,i=t.offset,a=t.getReferenceClientRect,s=t.moveTransition,u=ee()?S(z).arrow:null,c=a?{getBoundingClientRect:a,contextElement:a.contextElement||te()}:o,p=[{name:"offset",options:{offset:i}},{name:"preventOverflow",options:{padding:{top:2,bottom:2,left:5,right:5}}},{name:"flip",options:{padding:5}},{name:"computeStyles",options:{adaptive:!s}},{name:"$$tippy",enabled:!0,phase:"beforeWrite",requires:["computeStyles"],fn:function(e){var t=e.state;if(ee()){var n=re().box;["placement","reference-hidden","escaped"].forEach((function(e){"placement"===e?n.setAttribute("data-placement",t.placement):t.attributes.popper["data-popper-"+e]?n.setAttribute("data-"+e,""):n.removeAttribute("data-"+e)})),t.attributes.popper={}}}}];ee()&&u&&p.push({name:"arrow",options:{element:u,padding:3}}),p.push.apply(p,(null==n?void 0:n.modifiers)||[]),$.popperInstance=e.createPopper(c,z,Object.assign({},n,{placement:r,onFirstUpdate:A,modifiers:p}))}function Te(){$.popperInstance&&($.popperInstance.destroy(),$.popperInstance=null)}function Ae(){return f(z.querySelectorAll("[data-tippy-root]"))}function Le(e){$.clearDelayTimeouts(),e&&ae("onTrigger",[$,e]),de();var t=oe(!0),n=Q(),r=n[0],o=n[1];x.isTouch&&"hold"===r&&o&&(t=o),t?v=setTimeout((function(){$.show()}),t):$.show()}function De(e){if($.clearDelayTimeouts(),ae("onUntrigger",[$,e]),$.state.isVisible){if(!($.props.trigger.indexOf("mouseenter")>=0&&$.props.trigger.indexOf("click")>=0&&["mouseleave","mousemove"].indexOf(e.type)>=0&&V)){var t=oe(!1);t?g=setTimeout((function(){$.state.isVisible&&$.hide()}),t):h=requestAnimationFrame((function(){$.hide()}))}}else ve()}}function F(e,n){void 0===n&&(n={});var r=R.plugins.concat(n.plugins||[]);document.addEventListener("touchstart",T,t),window.addEventListener("blur",L);var o=Object.assign({},n,{plugins:r}),i=h(e).reduce((function(e,t){var n=t&&_(t,o);return n&&e.push(n),e}),[]);return v(e)?i[0]:i}F.defaultProps=R,F.setDefaultProps=function(e){Object.keys(e).forEach((function(t){R[t]=e[t]}))},F.currentInput=x;var W=Object.assign({},e.applyStyles,{effect:function(e){var t=e.state,n={popper:{position:t.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};Object.assign(t.elements.popper.style,n.popper),t.styles=n,t.elements.arrow&&Object.assign(t.elements.arrow.style,n.arrow)}}),X={mouseover:"mouseenter",focusin:"focus",click:"click"};var Y={name:"animateFill",defaultValue:!1,fn:function(e){var t;if(null==(t=e.props.render)||!t.$$tippy)return{};var n=S(e.popper),r=n.box,o=n.content,i=e.props.animateFill?function(){var e=d();return e.className="tippy-backdrop",y([e],"hidden"),e}():null;return{onCreate:function(){i&&(r.insertBefore(i,r.firstElementChild),r.setAttribute("data-animatefill",""),r.style.overflow="hidden",e.setProps({arrow:!1,animation:"shift-away"}))},onMount:function(){if(i){var e=r.style.transitionDuration,t=Number(e.replace("ms",""));o.style.transitionDelay=Math.round(t/10)+"ms",i.style.transitionDuration=e,y([i],"visible")}},onShow:function(){i&&(i.style.transitionDuration="0ms")},onHide:function(){i&&y([i],"hidden")}}}};var $={clientX:0,clientY:0},q=[];function z(e){var t=e.clientX,n=e.clientY;$={clientX:t,clientY:n}}var J={name:"followCursor",defaultValue:!1,fn:function(e){var t=e.reference,n=w(e.props.triggerTarget||t),r=!1,o=!1,i=!0,a=e.props;function s(){return"initial"===e.props.followCursor&&e.state.isVisible}function u(){n.addEventListener("mousemove",f)}function c(){n.removeEventListener("mousemove",f)}function p(){r=!0,e.setProps({getReferenceClientRect:null}),r=!1}function f(n){var r=!n.target||t.contains(n.target),o=e.props.followCursor,i=n.clientX,a=n.clientY,s=t.getBoundingClientRect(),u=i-s.left,c=a-s.top;!r&&e.props.interactive||e.setProps({getReferenceClientRect:function(){var e=t.getBoundingClientRect(),n=i,r=a;"initial"===o&&(n=e.left+u,r=e.top+c);var s="horizontal"===o?e.top:r,p="vertical"===o?e.right:n,f="horizontal"===o?e.bottom:r,l="vertical"===o?e.left:n;return{width:p-l,height:f-s,top:s,right:p,bottom:f,left:l}}})}function l(){e.props.followCursor&&(q.push({instance:e,doc:n}),function(e){e.addEventListener("mousemove",z)}(n))}function d(){0===(q=q.filter((function(t){return t.instance!==e}))).filter((function(e){return e.doc===n})).length&&function(e){e.removeEventListener("mousemove",z)}(n)}return{onCreate:l,onDestroy:d,onBeforeUpdate:function(){a=e.props},onAfterUpdate:function(t,n){var i=n.followCursor;r||void 0!==i&&a.followCursor!==i&&(d(),i?(l(),!e.state.isMounted||o||s()||u()):(c(),p()))},onMount:function(){e.props.followCursor&&!o&&(i&&(f($),i=!1),s()||u())},onTrigger:function(e,t){m(t)&&($={clientX:t.clientX,clientY:t.clientY}),o="focus"===t.type},onHidden:function(){e.props.followCursor&&(p(),c(),i=!0)}}}};var G={name:"inlinePositioning",defaultValue:!1,fn:function(e){var t,n=e.reference;var r=-1,o=!1,i=[],a={name:"tippyInlinePositioning",enabled:!0,phase:"afterWrite",fn:function(o){var a=o.state;e.props.inlinePositioning&&(-1!==i.indexOf(a.placement)&&(i=[]),t!==a.placement&&-1===i.indexOf(a.placement)&&(i.push(a.placement),e.setProps({getReferenceClientRect:function(){return function(e){return function(e,t,n,r){if(n.length<2||null===e)return t;if(2===n.length&&r>=0&&n[0].left>n[1].right)return n[r]||t;switch(e){case"top":case"bottom":var o=n[0],i=n[n.length-1],a="top"===e,s=o.top,u=i.bottom,c=a?o.left:i.left,p=a?o.right:i.right;return{top:s,bottom:u,left:c,right:p,width:p-c,height:u-s};case"left":case"right":var f=Math.min.apply(Math,n.map((function(e){return e.left}))),l=Math.max.apply(Math,n.map((function(e){return e.right}))),d=n.filter((function(t){return"left"===e?t.left===f:t.right===l})),v=d[0].top,m=d[d.length-1].bottom;return{top:v,bottom:m,left:f,right:l,width:l-f,height:m-v};default:return t}}(p(e),n.getBoundingClientRect(),f(n.getClientRects()),r)}(a.placement)}})),t=a.placement)}};function s(){var t;o||(t=function(e,t){var n;return{popperOptions:Object.assign({},e.popperOptions,{modifiers:[].concat(((null==(n=e.popperOptions)?void 0:n.modifiers)||[]).filter((function(e){return e.name!==t.name})),[t])})}}(e.props,a),o=!0,e.setProps(t),o=!1)}return{onCreate:s,onAfterUpdate:s,onTrigger:function(t,n){if(m(n)){var o=f(e.reference.getClientRects()),i=o.find((function(e){return e.left-2<=n.clientX&&e.right+2>=n.clientX&&e.top-2<=n.clientY&&e.bottom+2>=n.clientY})),a=o.indexOf(i);r=a>-1?a:r}},onHidden:function(){r=-1}}}};var K={name:"sticky",defaultValue:!1,fn:function(e){var t=e.reference,n=e.popper;function r(t){return!0===e.props.sticky||e.props.sticky===t}var o=null,i=null;function a(){var s=r("reference")?(e.popperInstance?e.popperInstance.state.elements.reference:t).getBoundingClientRect():null,u=r("popper")?n.getBoundingClientRect():null;(s&&Q(o,s)||u&&Q(i,u))&&e.popperInstance&&e.popperInstance.update(),o=s,i=u,e.state.isMounted&&requestAnimationFrame(a)}return{onMount:function(){e.props.sticky&&a()}}}};function Q(e,t){return!e||!t||(e.top!==t.top||e.right!==t.right||e.bottom!==t.bottom||e.left!==t.left)}return F.setDefaultProps({plugins:[Y,J,G,K],render:N}),F.createSingleton=function(e,t){var n;void 0===t&&(t={});var r,o=e,i=[],a=[],c=t.overrides,p=[],f=!1;function l(){a=o.map((function(e){return u(e.props.triggerTarget||e.reference)})).reduce((function(e,t){return e.concat(t)}),[])}function v(){i=o.map((function(e){return e.reference}))}function m(e){o.forEach((function(t){e?t.enable():t.disable()}))}function g(e){return o.map((function(t){var n=t.setProps;return t.setProps=function(o){n(o),t.reference===r&&e.setProps(o)},function(){t.setProps=n}}))}function h(e,t){var n=a.indexOf(t);if(t!==r){r=t;var s=(c||[]).concat("content").reduce((function(e,t){return e[t]=o[n].props[t],e}),{});e.setProps(Object.assign({},s,{getReferenceClientRect:"function"==typeof s.getReferenceClientRect?s.getReferenceClientRect:function(){var e;return null==(e=i[n])?void 0:e.getBoundingClientRect()}}))}}m(!1),v(),l();var b={fn:function(){return{onDestroy:function(){m(!0)},onHidden:function(){r=null},onClickOutside:function(e){e.props.showOnCreate&&!f&&(f=!0,r=null)},onShow:function(e){e.props.showOnCreate&&!f&&(f=!0,h(e,i[0]))},onTrigger:function(e,t){h(e,t.currentTarget)}}}},y=F(d(),Object.assign({},s(t,["overrides"]),{plugins:[b].concat(t.plugins||[]),triggerTarget:a,popperOptions:Object.assign({},t.popperOptions,{modifiers:[].concat((null==(n=t.popperOptions)?void 0:n.modifiers)||[],[W])})})),w=y.show;y.show=function(e){if(w(),!r&&null==e)return h(y,i[0]);if(!r||null!=e){if("number"==typeof e)return i[e]&&h(y,i[e]);if(o.indexOf(e)>=0){var t=e.reference;return h(y,t)}return i.indexOf(e)>=0?h(y,e):void 0}},y.showNext=function(){var e=i[0];if(!r)return y.show(0);var t=i.indexOf(r);y.show(i[t+1]||e)},y.showPrevious=function(){var e=i[i.length-1];if(!r)return y.show(e);var t=i.indexOf(r),n=i[t-1]||e;y.show(n)};var E=y.setProps;return y.setProps=function(e){c=e.overrides||c,E(e)},y.setInstances=function(e){m(!0),p.forEach((function(e){return e()})),o=e,m(!1),v(),l(),p=g(y),y.setProps({triggerTarget:a})},p=g(y),y},F.delegate=function(e,n){var r=[],o=[],i=!1,a=n.target,c=s(n,["target"]),p=Object.assign({},c,{trigger:"manual",touch:!1}),f=Object.assign({touch:R.touch},c,{showOnCreate:!0}),l=F(e,p);function d(e){if(e.target&&!i){var t=e.target.closest(a);if(t){var r=t.getAttribute("data-tippy-trigger")||n.trigger||R.trigger;if(!t._tippy&&!("touchstart"===e.type&&"boolean"==typeof f.touch||"touchstart"!==e.type&&r.indexOf(X[e.type])<0)){var s=F(t,f);s&&(o=o.concat(s))}}}}function v(e,t,n,o){void 0===o&&(o=!1),e.addEventListener(t,n,o),r.push({node:e,eventType:t,handler:n,options:o})}return u(l).forEach((function(e){var n=e.destroy,a=e.enable,s=e.disable;e.destroy=function(e){void 0===e&&(e=!0),e&&o.forEach((function(e){e.destroy()})),o=[],r.forEach((function(e){var t=e.node,n=e.eventType,r=e.handler,o=e.options;t.removeEventListener(n,r,o)})),r=[],n()},e.enable=function(){a(),o.forEach((function(e){return e.enable()})),i=!1},e.disable=function(){s(),o.forEach((function(e){return e.disable()})),i=!0},function(e){var n=e.reference;v(n,"touchstart",d,t),v(n,"mouseover",d),v(n,"focusin",d),v(n,"click",d)}(e)})),l},F.hideAll=function(e){var t=void 0===e?{}:e,n=t.exclude,r=t.duration;U.forEach((function(e){var t=!1;if(n&&(t=g(n)?e.reference===n:e.popper===n.popper),!t){var o=e.props.duration;e.setProps({duration:r}),e.hide(),e.state.isDestroyed||e.setProps({duration:o})}}))},F.roundArrow='',F})); + diff --git a/docs/2_39/site_libs/quarto-nav/headroom.min.js b/docs/2_39/site_libs/quarto-nav/headroom.min.js new file mode 100644 index 000000000..b08f1dffb --- /dev/null +++ b/docs/2_39/site_libs/quarto-nav/headroom.min.js @@ -0,0 +1,7 @@ +/*! + * headroom.js v0.12.0 - Give your page some headroom. Hide your header until you need it + * Copyright (c) 2020 Nick Williams - http://wicky.nillia.ms/headroom.js + * License: MIT + */ + +!function(t,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(t=t||self).Headroom=n()}(this,function(){"use strict";function t(){return"undefined"!=typeof window}function d(t){return function(t){return t&&t.document&&function(t){return 9===t.nodeType}(t.document)}(t)?function(t){var n=t.document,o=n.body,s=n.documentElement;return{scrollHeight:function(){return Math.max(o.scrollHeight,s.scrollHeight,o.offsetHeight,s.offsetHeight,o.clientHeight,s.clientHeight)},height:function(){return t.innerHeight||s.clientHeight||o.clientHeight},scrollY:function(){return void 0!==t.pageYOffset?t.pageYOffset:(s||o.parentNode||o).scrollTop}}}(t):function(t){return{scrollHeight:function(){return Math.max(t.scrollHeight,t.offsetHeight,t.clientHeight)},height:function(){return Math.max(t.offsetHeight,t.clientHeight)},scrollY:function(){return t.scrollTop}}}(t)}function n(t,s,e){var n,o=function(){var n=!1;try{var t={get passive(){n=!0}};window.addEventListener("test",t,t),window.removeEventListener("test",t,t)}catch(t){n=!1}return n}(),i=!1,r=d(t),l=r.scrollY(),a={};function c(){var t=Math.round(r.scrollY()),n=r.height(),o=r.scrollHeight();a.scrollY=t,a.lastScrollY=l,a.direction=ls.tolerance[a.direction],e(a),l=t,i=!1}function h(){i||(i=!0,n=requestAnimationFrame(c))}var u=!!o&&{passive:!0,capture:!1};return t.addEventListener("scroll",h,u),c(),{destroy:function(){cancelAnimationFrame(n),t.removeEventListener("scroll",h,u)}}}function o(t){return t===Object(t)?t:{down:t,up:t}}function s(t,n){n=n||{},Object.assign(this,s.options,n),this.classes=Object.assign({},s.options.classes,n.classes),this.elem=t,this.tolerance=o(this.tolerance),this.offset=o(this.offset),this.initialised=!1,this.frozen=!1}return s.prototype={constructor:s,init:function(){return s.cutsTheMustard&&!this.initialised&&(this.addClass("initial"),this.initialised=!0,setTimeout(function(t){t.scrollTracker=n(t.scroller,{offset:t.offset,tolerance:t.tolerance},t.update.bind(t))},100,this)),this},destroy:function(){this.initialised=!1,Object.keys(this.classes).forEach(this.removeClass,this),this.scrollTracker.destroy()},unpin:function(){!this.hasClass("pinned")&&this.hasClass("unpinned")||(this.addClass("unpinned"),this.removeClass("pinned"),this.onUnpin&&this.onUnpin.call(this))},pin:function(){this.hasClass("unpinned")&&(this.addClass("pinned"),this.removeClass("unpinned"),this.onPin&&this.onPin.call(this))},freeze:function(){this.frozen=!0,this.addClass("frozen")},unfreeze:function(){this.frozen=!1,this.removeClass("frozen")},top:function(){this.hasClass("top")||(this.addClass("top"),this.removeClass("notTop"),this.onTop&&this.onTop.call(this))},notTop:function(){this.hasClass("notTop")||(this.addClass("notTop"),this.removeClass("top"),this.onNotTop&&this.onNotTop.call(this))},bottom:function(){this.hasClass("bottom")||(this.addClass("bottom"),this.removeClass("notBottom"),this.onBottom&&this.onBottom.call(this))},notBottom:function(){this.hasClass("notBottom")||(this.addClass("notBottom"),this.removeClass("bottom"),this.onNotBottom&&this.onNotBottom.call(this))},shouldUnpin:function(t){return"down"===t.direction&&!t.top&&t.toleranceExceeded},shouldPin:function(t){return"up"===t.direction&&t.toleranceExceeded||t.top},addClass:function(t){this.elem.classList.add.apply(this.elem.classList,this.classes[t].split(" "))},removeClass:function(t){this.elem.classList.remove.apply(this.elem.classList,this.classes[t].split(" "))},hasClass:function(t){return this.classes[t].split(" ").every(function(t){return this.classList.contains(t)},this.elem)},update:function(t){t.isOutOfBounds||!0!==this.frozen&&(t.top?this.top():this.notTop(),t.bottom?this.bottom():this.notBottom(),this.shouldUnpin(t)?this.unpin():this.shouldPin(t)&&this.pin())}},s.options={tolerance:{up:0,down:0},offset:0,scroller:t()?window:null,classes:{frozen:"headroom--frozen",pinned:"headroom--pinned",unpinned:"headroom--unpinned",top:"headroom--top",notTop:"headroom--not-top",bottom:"headroom--bottom",notBottom:"headroom--not-bottom",initial:"headroom"}},s.cutsTheMustard=!!(t()&&function(){}.bind&&"classList"in document.documentElement&&Object.assign&&Object.keys&&requestAnimationFrame),s}); diff --git a/docs/2_39/site_libs/quarto-nav/quarto-nav.js b/docs/2_39/site_libs/quarto-nav/quarto-nav.js new file mode 100644 index 000000000..38cc43057 --- /dev/null +++ b/docs/2_39/site_libs/quarto-nav/quarto-nav.js @@ -0,0 +1,325 @@ +const headroomChanged = new CustomEvent("quarto-hrChanged", { + detail: {}, + bubbles: true, + cancelable: false, + composed: false, +}); + +const announceDismiss = () => { + const annEl = window.document.getElementById("quarto-announcement"); + if (annEl) { + annEl.remove(); + + const annId = annEl.getAttribute("data-announcement-id"); + window.localStorage.setItem(`quarto-announce-${annId}`, "true"); + } +}; + +const announceRegister = () => { + const annEl = window.document.getElementById("quarto-announcement"); + if (annEl) { + const annId = annEl.getAttribute("data-announcement-id"); + const isDismissed = + window.localStorage.getItem(`quarto-announce-${annId}`) || false; + if (isDismissed) { + announceDismiss(); + return; + } else { + annEl.classList.remove("hidden"); + } + + const actionEl = annEl.querySelector(".quarto-announcement-action"); + if (actionEl) { + actionEl.addEventListener("click", function (e) { + e.preventDefault(); + // Hide the bar immediately + announceDismiss(); + }); + } + } +}; + +window.document.addEventListener("DOMContentLoaded", function () { + let init = false; + + announceRegister(); + + // Manage the back to top button, if one is present. + let lastScrollTop = window.pageYOffset || document.documentElement.scrollTop; + const scrollDownBuffer = 5; + const scrollUpBuffer = 35; + const btn = document.getElementById("quarto-back-to-top"); + const hideBackToTop = () => { + btn.style.display = "none"; + }; + const showBackToTop = () => { + btn.style.display = "inline-block"; + }; + if (btn) { + window.document.addEventListener( + "scroll", + function () { + const currentScrollTop = + window.pageYOffset || document.documentElement.scrollTop; + + // Shows and hides the button 'intelligently' as the user scrolls + if (currentScrollTop - scrollDownBuffer > lastScrollTop) { + hideBackToTop(); + lastScrollTop = currentScrollTop <= 0 ? 0 : currentScrollTop; + } else if (currentScrollTop < lastScrollTop - scrollUpBuffer) { + showBackToTop(); + lastScrollTop = currentScrollTop <= 0 ? 0 : currentScrollTop; + } + + // Show the button at the bottom, hides it at the top + if (currentScrollTop <= 0) { + hideBackToTop(); + } else if ( + window.innerHeight + currentScrollTop >= + document.body.offsetHeight + ) { + showBackToTop(); + } + }, + false + ); + } + + function throttle(func, wait) { + var timeout; + return function () { + const context = this; + const args = arguments; + const later = function () { + clearTimeout(timeout); + timeout = null; + func.apply(context, args); + }; + + if (!timeout) { + timeout = setTimeout(later, wait); + } + }; + } + + function headerOffset() { + // Set an offset if there is are fixed top navbar + const headerEl = window.document.querySelector("header.fixed-top"); + if (headerEl) { + return headerEl.clientHeight; + } else { + return 0; + } + } + + function footerOffset() { + const footerEl = window.document.querySelector("footer.footer"); + if (footerEl) { + return footerEl.clientHeight; + } else { + return 0; + } + } + + function dashboardOffset() { + const dashboardNavEl = window.document.getElementById( + "quarto-dashboard-header" + ); + if (dashboardNavEl !== null) { + return dashboardNavEl.clientHeight; + } else { + return 0; + } + } + + function updateDocumentOffsetWithoutAnimation() { + updateDocumentOffset(false); + } + + function updateDocumentOffset(animated) { + // set body offset + const topOffset = headerOffset(); + const bodyOffset = topOffset + footerOffset() + dashboardOffset(); + const bodyEl = window.document.body; + bodyEl.setAttribute("data-bs-offset", topOffset); + bodyEl.style.paddingTop = topOffset + "px"; + + // deal with sidebar offsets + const sidebars = window.document.querySelectorAll( + ".sidebar, .headroom-target" + ); + sidebars.forEach((sidebar) => { + if (!animated) { + sidebar.classList.add("notransition"); + // Remove the no transition class after the animation has time to complete + setTimeout(function () { + sidebar.classList.remove("notransition"); + }, 201); + } + + if (window.Headroom && sidebar.classList.contains("sidebar-unpinned")) { + sidebar.style.top = "0"; + sidebar.style.maxHeight = "100vh"; + } else { + sidebar.style.top = topOffset + "px"; + sidebar.style.maxHeight = "calc(100vh - " + topOffset + "px)"; + } + }); + + // allow space for footer + const mainContainer = window.document.querySelector(".quarto-container"); + if (mainContainer) { + mainContainer.style.minHeight = "calc(100vh - " + bodyOffset + "px)"; + } + + // link offset + let linkStyle = window.document.querySelector("#quarto-target-style"); + if (!linkStyle) { + linkStyle = window.document.createElement("style"); + linkStyle.setAttribute("id", "quarto-target-style"); + window.document.head.appendChild(linkStyle); + } + while (linkStyle.firstChild) { + linkStyle.removeChild(linkStyle.firstChild); + } + if (topOffset > 0) { + linkStyle.appendChild( + window.document.createTextNode(` + section:target::before { + content: ""; + display: block; + height: ${topOffset}px; + margin: -${topOffset}px 0 0; + }`) + ); + } + if (init) { + window.dispatchEvent(headroomChanged); + } + init = true; + } + + // initialize headroom + var header = window.document.querySelector("#quarto-header"); + if (header && window.Headroom) { + const headroom = new window.Headroom(header, { + tolerance: 5, + onPin: function () { + const sidebars = window.document.querySelectorAll( + ".sidebar, .headroom-target" + ); + sidebars.forEach((sidebar) => { + sidebar.classList.remove("sidebar-unpinned"); + }); + updateDocumentOffset(); + }, + onUnpin: function () { + const sidebars = window.document.querySelectorAll( + ".sidebar, .headroom-target" + ); + sidebars.forEach((sidebar) => { + sidebar.classList.add("sidebar-unpinned"); + }); + updateDocumentOffset(); + }, + }); + headroom.init(); + + let frozen = false; + window.quartoToggleHeadroom = function () { + if (frozen) { + headroom.unfreeze(); + frozen = false; + } else { + headroom.freeze(); + frozen = true; + } + }; + } + + window.addEventListener( + "hashchange", + function (e) { + if ( + getComputedStyle(document.documentElement).scrollBehavior !== "smooth" + ) { + window.scrollTo(0, window.pageYOffset - headerOffset()); + } + }, + false + ); + + // Observe size changed for the header + const headerEl = window.document.querySelector("header.fixed-top"); + if (headerEl && window.ResizeObserver) { + const observer = new window.ResizeObserver(() => { + setTimeout(updateDocumentOffsetWithoutAnimation, 0); + }); + observer.observe(headerEl, { + attributes: true, + childList: true, + characterData: true, + }); + } else { + window.addEventListener( + "resize", + throttle(updateDocumentOffsetWithoutAnimation, 50) + ); + } + setTimeout(updateDocumentOffsetWithoutAnimation, 250); + + // fixup index.html links if we aren't on the filesystem + if (window.location.protocol !== "file:") { + const links = window.document.querySelectorAll("a"); + for (let i = 0; i < links.length; i++) { + if (links[i].href) { + links[i].dataset.originalHref = links[i].href; + links[i].href = links[i].href.replace(/\/index\.html/, "/"); + } + } + + // Fixup any sharing links that require urls + // Append url to any sharing urls + const sharingLinks = window.document.querySelectorAll( + "a.sidebar-tools-main-item, a.quarto-navigation-tool, a.quarto-navbar-tools, a.quarto-navbar-tools-item" + ); + for (let i = 0; i < sharingLinks.length; i++) { + const sharingLink = sharingLinks[i]; + const href = sharingLink.getAttribute("href"); + if (href) { + sharingLink.setAttribute( + "href", + href.replace("|url|", window.location.href) + ); + } + } + + // Scroll the active navigation item into view, if necessary + const navSidebar = window.document.querySelector("nav#quarto-sidebar"); + if (navSidebar) { + // Find the active item + const activeItem = navSidebar.querySelector("li.sidebar-item a.active"); + if (activeItem) { + // Wait for the scroll height and height to resolve by observing size changes on the + // nav element that is scrollable + const resizeObserver = new ResizeObserver((_entries) => { + // The bottom of the element + const elBottom = activeItem.offsetTop; + const viewBottom = navSidebar.scrollTop + navSidebar.clientHeight; + + // The element height and scroll height are the same, then we are still loading + if (viewBottom !== navSidebar.scrollHeight) { + // Determine if the item isn't visible and scroll to it + if (elBottom >= viewBottom) { + navSidebar.scrollTop = elBottom; + } + + // stop observing now since we've completed the scroll + resizeObserver.unobserve(navSidebar); + } + }); + resizeObserver.observe(navSidebar); + } + } + } +}); diff --git a/docs/2_39/site_libs/quarto-search/autocomplete.umd.js b/docs/2_39/site_libs/quarto-search/autocomplete.umd.js new file mode 100644 index 000000000..ae0063aa9 --- /dev/null +++ b/docs/2_39/site_libs/quarto-search/autocomplete.umd.js @@ -0,0 +1,3 @@ +/*! @algolia/autocomplete-js 1.11.1 | MIT License | © Algolia, Inc. and contributors | https://github.com/algolia/autocomplete */ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self)["@algolia/autocomplete-js"]={})}(this,(function(e){"use strict";function t(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function n(e){for(var n=1;n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function a(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){var n=null==e?null:"undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(null!=n){var r,o,i,u,a=[],l=!0,c=!1;try{if(i=(n=n.call(e)).next,0===t){if(Object(n)!==n)return;l=!1}else for(;!(l=(r=i.call(n)).done)&&(a.push(r.value),a.length!==t);l=!0);}catch(e){c=!0,o=e}finally{try{if(!l&&null!=n.return&&(u=n.return(),Object(u)!==u))return}finally{if(c)throw o}}return a}}(e,t)||c(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function l(e){return function(e){if(Array.isArray(e))return s(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||c(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function c(e,t){if(e){if("string"==typeof e)return s(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);return"Object"===n&&e.constructor&&(n=e.constructor.name),"Map"===n||"Set"===n?Array.from(e):"Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)?s(e,t):void 0}}function s(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);ne.length)&&(t=e.length);for(var n=0,r=new Array(t);ne.length)&&(t=e.length);for(var n=0,r=new Array(t);n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function x(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function N(e){for(var t=1;t1&&void 0!==arguments[1]?arguments[1]:20,n=[],r=0;r=3||2===n&&r>=4||1===n&&r>=10);function i(t,n,r){if(o&&void 0!==r){var i=r[0].__autocomplete_algoliaCredentials,u={"X-Algolia-Application-Id":i.appId,"X-Algolia-API-Key":i.apiKey};e.apply(void 0,[t].concat(D(n),[{headers:u}]))}else e.apply(void 0,[t].concat(D(n)))}return{init:function(t,n){e("init",{appId:t,apiKey:n})},setUserToken:function(t){e("setUserToken",t)},clickedObjectIDsAfterSearch:function(){for(var e=arguments.length,t=new Array(e),n=0;n0&&i("clickedObjectIDsAfterSearch",B(t),t[0].items)},clickedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),n=0;n0&&i("clickedObjectIDs",B(t),t[0].items)},clickedFilters:function(){for(var t=arguments.length,n=new Array(t),r=0;r0&&e.apply(void 0,["clickedFilters"].concat(n))},convertedObjectIDsAfterSearch:function(){for(var e=arguments.length,t=new Array(e),n=0;n0&&i("convertedObjectIDsAfterSearch",B(t),t[0].items)},convertedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),n=0;n0&&i("convertedObjectIDs",B(t),t[0].items)},convertedFilters:function(){for(var t=arguments.length,n=new Array(t),r=0;r0&&e.apply(void 0,["convertedFilters"].concat(n))},viewedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),n=0;n0&&t.reduce((function(e,t){var n=t.items,r=k(t,A);return[].concat(D(e),D(q(N(N({},r),{},{objectIDs:(null==n?void 0:n.map((function(e){return e.objectID})))||r.objectIDs})).map((function(e){return{items:n,payload:e}}))))}),[]).forEach((function(e){var t=e.items;return i("viewedObjectIDs",[e.payload],t)}))},viewedFilters:function(){for(var t=arguments.length,n=new Array(t),r=0;r0&&e.apply(void 0,["viewedFilters"].concat(n))}}}function F(e){var t=e.items.reduce((function(e,t){var n;return e[t.__autocomplete_indexName]=(null!==(n=e[t.__autocomplete_indexName])&&void 0!==n?n:[]).concat(t),e}),{});return Object.keys(t).map((function(e){return{index:e,items:t[e],algoliaSource:["autocomplete"]}}))}function L(e){return e.objectID&&e.__autocomplete_indexName&&e.__autocomplete_queryID}function U(e){return U="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},U(e)}function M(e){return function(e){if(Array.isArray(e))return H(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return H(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);"Object"===n&&e.constructor&&(n=e.constructor.name);if("Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return H(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function H(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n0&&z({onItemsChange:r,items:n,insights:a,state:t}))}}),0);return{name:"aa.algoliaInsightsPlugin",subscribe:function(e){var t=e.setContext,n=e.onSelect,r=e.onActive;function l(e){t({algoliaInsightsPlugin:{__algoliaSearchParameters:W({clickAnalytics:!0},e?{userToken:e}:{}),insights:a}})}u("addAlgoliaAgent","insights-plugin"),l(),u("onUserTokenChange",l),u("getUserToken",null,(function(e,t){l(t)})),n((function(e){var t=e.item,n=e.state,r=e.event,i=e.source;L(t)&&o({state:n,event:r,insights:a,item:t,insightsEvents:[W({eventName:"Item Selected"},j({item:t,items:i.getItems().filter(L)}))]})})),r((function(e){var t=e.item,n=e.source,r=e.state,o=e.event;L(t)&&i({state:r,event:o,insights:a,item:t,insightsEvents:[W({eventName:"Item Active"},j({item:t,items:n.getItems().filter(L)}))]})}))},onStateChange:function(e){var t=e.state;c({state:t})},__autocomplete_pluginOptions:e}}function J(e,t){var n=t;return{then:function(t,r){return J(e.then(Y(t,n,e),Y(r,n,e)),n)},catch:function(t){return J(e.catch(Y(t,n,e)),n)},finally:function(t){return t&&n.onCancelList.push(t),J(e.finally(Y(t&&function(){return n.onCancelList=[],t()},n,e)),n)},cancel:function(){n.isCanceled=!0;var e=n.onCancelList;n.onCancelList=[],e.forEach((function(e){e()}))},isCanceled:function(){return!0===n.isCanceled}}}function X(e){return J(e,{isCanceled:!1,onCancelList:[]})}function Y(e,t,n){return e?function(n){return t.isCanceled?n:e(n)}:n}function Z(e,t,n,r){if(!n)return null;if(e<0&&(null===t||null!==r&&0===t))return n+e;var o=(null===t?-1:t)+e;return o<=-1||o>=n?null===r?null:0:o}function ee(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function te(e){for(var t=1;te.length)&&(t=e.length);for(var n=0,r=new Array(t);n0},reshape:function(e){return e.sources}},e),{},{id:null!==(n=e.id)&&void 0!==n?n:d(),plugins:o,initialState:he({activeItemId:null,query:"",completion:null,collections:[],isOpen:!1,status:"idle",context:{}},e.initialState),onStateChange:function(t){var n;null===(n=e.onStateChange)||void 0===n||n.call(e,t),o.forEach((function(e){var n;return null===(n=e.onStateChange)||void 0===n?void 0:n.call(e,t)}))},onSubmit:function(t){var n;null===(n=e.onSubmit)||void 0===n||n.call(e,t),o.forEach((function(e){var n;return null===(n=e.onSubmit)||void 0===n?void 0:n.call(e,t)}))},onReset:function(t){var n;null===(n=e.onReset)||void 0===n||n.call(e,t),o.forEach((function(e){var n;return null===(n=e.onReset)||void 0===n?void 0:n.call(e,t)}))},getSources:function(n){return Promise.all([].concat(ye(o.map((function(e){return e.getSources}))),[e.getSources]).filter(Boolean).map((function(e){return function(e,t){var n=[];return Promise.resolve(e(t)).then((function(e){return Promise.all(e.filter((function(e){return Boolean(e)})).map((function(e){if(e.sourceId,n.includes(e.sourceId))throw new Error("[Autocomplete] The `sourceId` ".concat(JSON.stringify(e.sourceId)," is not unique."));n.push(e.sourceId);var t={getItemInputValue:function(e){return e.state.query},getItemUrl:function(){},onSelect:function(e){(0,e.setIsOpen)(!1)},onActive:O,onResolve:O};Object.keys(t).forEach((function(e){t[e].__default=!0}));var r=te(te({},t),e);return Promise.resolve(r)})))}))}(e,n)}))).then((function(e){return m(e)})).then((function(e){return e.map((function(e){return he(he({},e),{},{onSelect:function(n){e.onSelect(n),t.forEach((function(e){var t;return null===(t=e.onSelect)||void 0===t?void 0:t.call(e,n)}))},onActive:function(n){e.onActive(n),t.forEach((function(e){var t;return null===(t=e.onActive)||void 0===t?void 0:t.call(e,n)}))},onResolve:function(n){e.onResolve(n),t.forEach((function(e){var t;return null===(t=e.onResolve)||void 0===t?void 0:t.call(e,n)}))}})}))}))},navigator:he({navigate:function(e){var t=e.itemUrl;r.location.assign(t)},navigateNewTab:function(e){var t=e.itemUrl,n=r.open(t,"_blank","noopener");null==n||n.focus()},navigateNewWindow:function(e){var t=e.itemUrl;r.open(t,"_blank","noopener")}},e.navigator)})}function Se(e){return Se="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Se(e)}function je(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function Pe(e){for(var t=1;te.length)&&(t=e.length);for(var n=0,r=new Array(t);n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var He,Ve,We,Ke=null,Qe=(He=-1,Ve=-1,We=void 0,function(e){var t=++He;return Promise.resolve(e).then((function(e){return We&&t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function et(e){return et="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},et(e)}var tt=["props","refresh","store"],nt=["inputElement","formElement","panelElement"],rt=["inputElement"],ot=["inputElement","maxLength"],it=["source"],ut=["item","source"];function at(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function lt(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function ft(e){var t=e.props,n=e.refresh,r=e.store,o=st(e,tt);return{getEnvironmentProps:function(e){var n=e.inputElement,o=e.formElement,i=e.panelElement;function u(e){!r.getState().isOpen&&r.pendingRequests.isEmpty()||e.target===n||!1===[o,i].some((function(t){return n=t,r=e.target,n===r||n.contains(r);var n,r}))&&(r.dispatch("blur",null),t.debug||r.pendingRequests.cancelAll())}return lt({onTouchStart:u,onMouseDown:u,onTouchMove:function(e){!1!==r.getState().isOpen&&n===t.environment.document.activeElement&&e.target!==n&&n.blur()}},st(e,nt))},getRootProps:function(e){return lt({role:"combobox","aria-expanded":r.getState().isOpen,"aria-haspopup":"listbox","aria-owns":r.getState().isOpen?r.getState().collections.map((function(e){var n=e.source;return ie(t.id,"list",n)})).join(" "):void 0,"aria-labelledby":ie(t.id,"label")},e)},getFormProps:function(e){return e.inputElement,lt({action:"",noValidate:!0,role:"search",onSubmit:function(i){var u;i.preventDefault(),t.onSubmit(lt({event:i,refresh:n,state:r.getState()},o)),r.dispatch("submit",null),null===(u=e.inputElement)||void 0===u||u.blur()},onReset:function(i){var u;i.preventDefault(),t.onReset(lt({event:i,refresh:n,state:r.getState()},o)),r.dispatch("reset",null),null===(u=e.inputElement)||void 0===u||u.focus()}},st(e,rt))},getLabelProps:function(e){return lt({htmlFor:ie(t.id,"input"),id:ie(t.id,"label")},e)},getInputProps:function(e){var i;function u(e){(t.openOnFocus||Boolean(r.getState().query))&&$e(lt({event:e,props:t,query:r.getState().completion||r.getState().query,refresh:n,store:r},o)),r.dispatch("focus",null)}var a=e||{};a.inputElement;var l=a.maxLength,c=void 0===l?512:l,s=st(a,ot),f=oe(r.getState()),p=function(e){return Boolean(e&&e.match(ue))}((null===(i=t.environment.navigator)||void 0===i?void 0:i.userAgent)||""),m=t.enterKeyHint||(null!=f&&f.itemUrl&&!p?"go":"search");return lt({"aria-autocomplete":"both","aria-activedescendant":r.getState().isOpen&&null!==r.getState().activeItemId?ie(t.id,"item-".concat(r.getState().activeItemId),null==f?void 0:f.source):void 0,"aria-controls":r.getState().isOpen?r.getState().collections.map((function(e){var n=e.source;return ie(t.id,"list",n)})).join(" "):void 0,"aria-labelledby":ie(t.id,"label"),value:r.getState().completion||r.getState().query,id:ie(t.id,"input"),autoComplete:"off",autoCorrect:"off",autoCapitalize:"off",enterKeyHint:m,spellCheck:"false",autoFocus:t.autoFocus,placeholder:t.placeholder,maxLength:c,type:"search",onChange:function(e){$e(lt({event:e,props:t,query:e.currentTarget.value.slice(0,c),refresh:n,store:r},o))},onKeyDown:function(e){!function(e){var t=e.event,n=e.props,r=e.refresh,o=e.store,i=Ze(e,Ge);if("ArrowUp"===t.key||"ArrowDown"===t.key){var u=function(){var e=oe(o.getState()),t=n.environment.document.getElementById(ie(n.id,"item-".concat(o.getState().activeItemId),null==e?void 0:e.source));t&&(t.scrollIntoViewIfNeeded?t.scrollIntoViewIfNeeded(!1):t.scrollIntoView(!1))},a=function(){var e=oe(o.getState());if(null!==o.getState().activeItemId&&e){var n=e.item,u=e.itemInputValue,a=e.itemUrl,l=e.source;l.onActive(Xe({event:t,item:n,itemInputValue:u,itemUrl:a,refresh:r,source:l,state:o.getState()},i))}};t.preventDefault(),!1===o.getState().isOpen&&(n.openOnFocus||Boolean(o.getState().query))?$e(Xe({event:t,props:n,query:o.getState().query,refresh:r,store:o},i)).then((function(){o.dispatch(t.key,{nextActiveItemId:n.defaultActiveItemId}),a(),setTimeout(u,0)})):(o.dispatch(t.key,{}),a(),u())}else if("Escape"===t.key)t.preventDefault(),o.dispatch(t.key,null),o.pendingRequests.cancelAll();else if("Tab"===t.key)o.dispatch("blur",null),o.pendingRequests.cancelAll();else if("Enter"===t.key){if(null===o.getState().activeItemId||o.getState().collections.every((function(e){return 0===e.items.length})))return void(n.debug||o.pendingRequests.cancelAll());t.preventDefault();var l=oe(o.getState()),c=l.item,s=l.itemInputValue,f=l.itemUrl,p=l.source;if(t.metaKey||t.ctrlKey)void 0!==f&&(p.onSelect(Xe({event:t,item:c,itemInputValue:s,itemUrl:f,refresh:r,source:p,state:o.getState()},i)),n.navigator.navigateNewTab({itemUrl:f,item:c,state:o.getState()}));else if(t.shiftKey)void 0!==f&&(p.onSelect(Xe({event:t,item:c,itemInputValue:s,itemUrl:f,refresh:r,source:p,state:o.getState()},i)),n.navigator.navigateNewWindow({itemUrl:f,item:c,state:o.getState()}));else if(t.altKey);else{if(void 0!==f)return p.onSelect(Xe({event:t,item:c,itemInputValue:s,itemUrl:f,refresh:r,source:p,state:o.getState()},i)),void n.navigator.navigate({itemUrl:f,item:c,state:o.getState()});$e(Xe({event:t,nextState:{isOpen:!1},props:n,query:s,refresh:r,store:o},i)).then((function(){p.onSelect(Xe({event:t,item:c,itemInputValue:s,itemUrl:f,refresh:r,source:p,state:o.getState()},i))}))}}}(lt({event:e,props:t,refresh:n,store:r},o))},onFocus:u,onBlur:O,onClick:function(n){e.inputElement!==t.environment.document.activeElement||r.getState().isOpen||u(n)}},s)},getPanelProps:function(e){return lt({onMouseDown:function(e){e.preventDefault()},onMouseLeave:function(){r.dispatch("mouseleave",null)}},e)},getListProps:function(e){var n=e||{},r=n.source,o=st(n,it);return lt({role:"listbox","aria-labelledby":ie(t.id,"label"),id:ie(t.id,"list",r)},o)},getItemProps:function(e){var i=e.item,u=e.source,a=st(e,ut);return lt({id:ie(t.id,"item-".concat(i.__autocomplete_id),u),role:"option","aria-selected":r.getState().activeItemId===i.__autocomplete_id,onMouseMove:function(e){if(i.__autocomplete_id!==r.getState().activeItemId){r.dispatch("mousemove",i.__autocomplete_id);var t=oe(r.getState());if(null!==r.getState().activeItemId&&t){var u=t.item,a=t.itemInputValue,l=t.itemUrl,c=t.source;c.onActive(lt({event:e,item:u,itemInputValue:a,itemUrl:l,refresh:n,source:c,state:r.getState()},o))}}},onMouseDown:function(e){e.preventDefault()},onClick:function(e){var a=u.getItemInputValue({item:i,state:r.getState()}),l=u.getItemUrl({item:i,state:r.getState()});(l?Promise.resolve():$e(lt({event:e,nextState:{isOpen:!1},props:t,query:a,refresh:n,store:r},o))).then((function(){u.onSelect(lt({event:e,item:i,itemInputValue:a,itemUrl:l,refresh:n,source:u,state:r.getState()},o))}))}},a)}}}function pt(e){return pt="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},pt(e)}function mt(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function vt(e){for(var t=1;t=5&&((o||!e&&5===r)&&(u.push(r,0,o,n),r=6),e&&(u.push(r,e,0,n),r=6)),o=""},l=0;l"===t?(r=1,o=""):o=t+o[0]:i?t===i?i="":o+=t:'"'===t||"'"===t?i=t:">"===t?(a(),r=1):r&&("="===t?(r=5,n=o,o=""):"/"===t&&(r<5||">"===e[l][c+1])?(a(),3===r&&(u=u[0]),r=u,(u=u[0]).push(2,0,r),r=0):" "===t||"\t"===t||"\n"===t||"\r"===t?(a(),r=2):o+=t),3===r&&"!--"===o&&(r=4,u=u[0])}return a(),u}(e)),t),arguments,[])).length>1?t:t[0]}var kt=function(e){var t=e.environment,n=t.document.createElementNS("http://www.w3.org/2000/svg","svg");n.setAttribute("class","aa-ClearIcon"),n.setAttribute("viewBox","0 0 24 24"),n.setAttribute("width","18"),n.setAttribute("height","18"),n.setAttribute("fill","currentColor");var r=t.document.createElementNS("http://www.w3.org/2000/svg","path");return r.setAttribute("d","M5.293 6.707l5.293 5.293-5.293 5.293c-0.391 0.391-0.391 1.024 0 1.414s1.024 0.391 1.414 0l5.293-5.293 5.293 5.293c0.391 0.391 1.024 0.391 1.414 0s0.391-1.024 0-1.414l-5.293-5.293 5.293-5.293c0.391-0.391 0.391-1.024 0-1.414s-1.024-0.391-1.414 0l-5.293 5.293-5.293-5.293c-0.391-0.391-1.024-0.391-1.414 0s-0.391 1.024 0 1.414z"),n.appendChild(r),n};function xt(e,t){if("string"==typeof t){var n=e.document.querySelector(t);return"The element ".concat(JSON.stringify(t)," is not in the document."),n}return t}function Nt(){for(var e=arguments.length,t=new Array(e),n=0;n2&&(u.children=arguments.length>3?Jt.call(arguments,2):n),"function"==typeof e&&null!=e.defaultProps)for(i in e.defaultProps)void 0===u[i]&&(u[i]=e.defaultProps[i]);return sn(e,u,r,o,null)}function sn(e,t,n,r,o){var i={type:e,props:t,key:n,ref:r,__k:null,__:null,__b:0,__e:null,__d:void 0,__c:null,__h:null,constructor:void 0,__v:null==o?++Yt:o};return null==o&&null!=Xt.vnode&&Xt.vnode(i),i}function fn(e){return e.children}function pn(e,t){this.props=e,this.context=t}function mn(e,t){if(null==t)return e.__?mn(e.__,e.__.__k.indexOf(e)+1):null;for(var n;tt&&Zt.sort(nn));yn.__r=0}function bn(e,t,n,r,o,i,u,a,l,c){var s,f,p,m,v,d,y,b=r&&r.__k||on,g=b.length;for(n.__k=[],s=0;s0?sn(m.type,m.props,m.key,m.ref?m.ref:null,m.__v):m)){if(m.__=n,m.__b=n.__b+1,null===(p=b[s])||p&&m.key==p.key&&m.type===p.type)b[s]=void 0;else for(f=0;f=0;t--)if((n=e.__k[t])&&(r=On(n)))return r;return null}function _n(e,t,n){"-"===t[0]?e.setProperty(t,null==n?"":n):e[t]=null==n?"":"number"!=typeof n||un.test(t)?n:n+"px"}function Sn(e,t,n,r,o){var i;e:if("style"===t)if("string"==typeof n)e.style.cssText=n;else{if("string"==typeof r&&(e.style.cssText=r=""),r)for(t in r)n&&t in n||_n(e.style,t,"");if(n)for(t in n)r&&n[t]===r[t]||_n(e.style,t,n[t])}else if("o"===t[0]&&"n"===t[1])i=t!==(t=t.replace(/Capture$/,"")),t=t.toLowerCase()in e?t.toLowerCase().slice(2):t.slice(2),e.l||(e.l={}),e.l[t+i]=n,n?r||e.addEventListener(t,i?Pn:jn,i):e.removeEventListener(t,i?Pn:jn,i);else if("dangerouslySetInnerHTML"!==t){if(o)t=t.replace(/xlink(H|:h)/,"h").replace(/sName$/,"s");else if("width"!==t&&"height"!==t&&"href"!==t&&"list"!==t&&"form"!==t&&"tabIndex"!==t&&"download"!==t&&t in e)try{e[t]=null==n?"":n;break e}catch(e){}"function"==typeof n||(null==n||!1===n&&"-"!==t[4]?e.removeAttribute(t):e.setAttribute(t,n))}}function jn(e){return this.l[e.type+!1](Xt.event?Xt.event(e):e)}function Pn(e){return this.l[e.type+!0](Xt.event?Xt.event(e):e)}function wn(e,t,n,r,o,i,u,a,l){var c,s,f,p,m,v,d,y,b,g,h,O,_,S,j,P=t.type;if(void 0!==t.constructor)return null;null!=n.__h&&(l=n.__h,a=t.__e=n.__e,t.__h=null,i=[a]),(c=Xt.__b)&&c(t);try{e:if("function"==typeof P){if(y=t.props,b=(c=P.contextType)&&r[c.__c],g=c?b?b.props.value:c.__:r,n.__c?d=(s=t.__c=n.__c).__=s.__E:("prototype"in P&&P.prototype.render?t.__c=s=new P(y,g):(t.__c=s=new pn(y,g),s.constructor=P,s.render=Cn),b&&b.sub(s),s.props=y,s.state||(s.state={}),s.context=g,s.__n=r,f=s.__d=!0,s.__h=[],s._sb=[]),null==s.__s&&(s.__s=s.state),null!=P.getDerivedStateFromProps&&(s.__s==s.state&&(s.__s=an({},s.__s)),an(s.__s,P.getDerivedStateFromProps(y,s.__s))),p=s.props,m=s.state,s.__v=t,f)null==P.getDerivedStateFromProps&&null!=s.componentWillMount&&s.componentWillMount(),null!=s.componentDidMount&&s.__h.push(s.componentDidMount);else{if(null==P.getDerivedStateFromProps&&y!==p&&null!=s.componentWillReceiveProps&&s.componentWillReceiveProps(y,g),!s.__e&&null!=s.shouldComponentUpdate&&!1===s.shouldComponentUpdate(y,s.__s,g)||t.__v===n.__v){for(t.__v!==n.__v&&(s.props=y,s.state=s.__s,s.__d=!1),s.__e=!1,t.__e=n.__e,t.__k=n.__k,t.__k.forEach((function(e){e&&(e.__=t)})),h=0;h0&&void 0!==arguments[0]?arguments[0]:[];return{get:function(){return e},add:function(t){var n=e[e.length-1];(null==n?void 0:n.isHighlighted)===t.isHighlighted?e[e.length-1]={value:n.value+t.value,isHighlighted:n.isHighlighted}:e.push(t)}}}(n?[{value:n,isHighlighted:!1}]:[]);return t.forEach((function(e){var t=e.split(xn);r.add({value:t[0],isHighlighted:!0}),""!==t[1]&&r.add({value:t[1],isHighlighted:!1})})),r.get()}function Tn(e){return function(e){if(Array.isArray(e))return qn(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return qn(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);"Object"===n&&e.constructor&&(n=e.constructor.name);if("Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return qn(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function qn(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n",""":'"',"'":"'"},Fn=new RegExp(/\w/i),Ln=/&(amp|quot|lt|gt|#39);/g,Un=RegExp(Ln.source);function Mn(e,t){var n,r,o,i=e[t],u=(null===(n=e[t+1])||void 0===n?void 0:n.isHighlighted)||!0,a=(null===(r=e[t-1])||void 0===r?void 0:r.isHighlighted)||!0;return Fn.test((o=i.value)&&Un.test(o)?o.replace(Ln,(function(e){return Rn[e]})):o)||a!==u?i.isHighlighted:a}function Hn(e){return Hn="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Hn(e)}function Vn(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function Wn(e){for(var t=1;te.length)&&(t=e.length);for(var n=0,r=new Array(t);n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function ur(e){return function(e){if(Array.isArray(e))return ar(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return ar(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);"Object"===n&&e.constructor&&(n=e.constructor.name);if("Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return ar(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function ar(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n0;if(!O.value.core.openOnFocus&&!t.query)return n;var r=Boolean(y.current||O.value.renderer.renderNoResults);return!n&&r||n},__autocomplete_metadata:{userAgents:br,options:e}}))})),j=f(n({collections:[],completion:null,context:{},isOpen:!1,query:"",activeItemId:null,status:"idle"},O.value.core.initialState)),P={getEnvironmentProps:O.value.renderer.getEnvironmentProps,getFormProps:O.value.renderer.getFormProps,getInputProps:O.value.renderer.getInputProps,getItemProps:O.value.renderer.getItemProps,getLabelProps:O.value.renderer.getLabelProps,getListProps:O.value.renderer.getListProps,getPanelProps:O.value.renderer.getPanelProps,getRootProps:O.value.renderer.getRootProps},w={setActiveItemId:S.value.setActiveItemId,setQuery:S.value.setQuery,setCollections:S.value.setCollections,setIsOpen:S.value.setIsOpen,setStatus:S.value.setStatus,setContext:S.value.setContext,refresh:S.value.refresh,navigator:S.value.navigator},I=m((function(){return Ct.bind(O.value.renderer.renderer.createElement)})),A=m((function(){return Gt({autocomplete:S.value,autocompleteScopeApi:w,classNames:O.value.renderer.classNames,environment:O.value.core.environment,isDetached:_.value,placeholder:O.value.core.placeholder,propGetters:P,setIsModalOpen:k,state:j.current,translations:O.value.renderer.translations})}));function E(){Ht(A.value.panel,{style:_.value?{}:yr({panelPlacement:O.value.renderer.panelPlacement,container:A.value.root,form:A.value.form,environment:O.value.core.environment})})}function D(e){j.current=e;var t={autocomplete:S.value,autocompleteScopeApi:w,classNames:O.value.renderer.classNames,components:O.value.renderer.components,container:O.value.renderer.container,html:I.value,dom:A.value,panelContainer:_.value?A.value.detachedContainer:O.value.renderer.panelContainer,propGetters:P,state:j.current,renderer:O.value.renderer.renderer},r=!b(e)&&!y.current&&O.value.renderer.renderNoResults||O.value.renderer.render;!function(e){var t=e.autocomplete,r=e.autocompleteScopeApi,o=e.dom,i=e.propGetters,u=e.state;Vt(o.root,i.getRootProps(n({state:u,props:t.getRootProps({})},r))),Vt(o.input,i.getInputProps(n({state:u,props:t.getInputProps({inputElement:o.input}),inputElement:o.input},r))),Ht(o.label,{hidden:"stalled"===u.status}),Ht(o.loadingIndicator,{hidden:"stalled"!==u.status}),Ht(o.clearButton,{hidden:!u.query}),Ht(o.detachedSearchButtonQuery,{textContent:u.query}),Ht(o.detachedSearchButtonPlaceholder,{hidden:Boolean(u.query)})}(t),function(e,t){var r=t.autocomplete,o=t.autocompleteScopeApi,u=t.classNames,a=t.html,l=t.dom,c=t.panelContainer,s=t.propGetters,f=t.state,p=t.components,m=t.renderer;if(f.isOpen){c.contains(l.panel)||"loading"===f.status||c.appendChild(l.panel),l.panel.classList.toggle("aa-Panel--stalled","stalled"===f.status);var v=f.collections.filter((function(e){var t=e.source,n=e.items;return t.templates.noResults||n.length>0})).map((function(e,t){var l=e.source,c=e.items;return m.createElement("section",{key:t,className:u.source,"data-autocomplete-source-id":l.sourceId},l.templates.header&&m.createElement("div",{className:u.sourceHeader},l.templates.header({components:p,createElement:m.createElement,Fragment:m.Fragment,items:c,source:l,state:f,html:a})),l.templates.noResults&&0===c.length?m.createElement("div",{className:u.sourceNoResults},l.templates.noResults({components:p,createElement:m.createElement,Fragment:m.Fragment,source:l,state:f,html:a})):m.createElement("ul",i({className:u.list},s.getListProps(n({state:f,props:r.getListProps({source:l})},o))),c.map((function(e){var t=r.getItemProps({item:e,source:l});return m.createElement("li",i({key:t.id,className:u.item},s.getItemProps(n({state:f,props:t},o))),l.templates.item({components:p,createElement:m.createElement,Fragment:m.Fragment,item:e,state:f,html:a}))}))),l.templates.footer&&m.createElement("div",{className:u.sourceFooter},l.templates.footer({components:p,createElement:m.createElement,Fragment:m.Fragment,items:c,source:l,state:f,html:a})))})),d=m.createElement(m.Fragment,null,m.createElement("div",{className:u.panelLayout},v),m.createElement("div",{className:"aa-GradientBottom"})),y=v.reduce((function(e,t){return e[t.props["data-autocomplete-source-id"]]=t,e}),{});e(n(n({children:d,state:f,sections:v,elements:y},m),{},{components:p,html:a},o),l.panel)}else c.contains(l.panel)&&c.removeChild(l.panel)}(r,t)}function C(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};l();var t=O.value.renderer,n=t.components,r=u(t,gr);g.current=qt(r,O.value.core,{components:Bt(n,(function(e){return!e.value.hasOwnProperty("__autocomplete_componentName")})),initialState:j.current},e),v(),c(),S.value.refresh().then((function(){D(j.current)}))}function k(e){requestAnimationFrame((function(){var t=O.value.core.environment.document.body.contains(A.value.detachedOverlay);e!==t&&(e?(O.value.core.environment.document.body.appendChild(A.value.detachedOverlay),O.value.core.environment.document.body.classList.add("aa-Detached"),A.value.input.focus()):(O.value.core.environment.document.body.removeChild(A.value.detachedOverlay),O.value.core.environment.document.body.classList.remove("aa-Detached")))}))}return a((function(){var e=S.value.getEnvironmentProps({formElement:A.value.form,panelElement:A.value.panel,inputElement:A.value.input});return Ht(O.value.core.environment,e),function(){Ht(O.value.core.environment,Object.keys(e).reduce((function(e,t){return n(n({},e),{},o({},t,void 0))}),{}))}})),a((function(){var e=_.value?O.value.core.environment.document.body:O.value.renderer.panelContainer,t=_.value?A.value.detachedOverlay:A.value.panel;return _.value&&j.current.isOpen&&k(!0),D(j.current),function(){e.contains(t)&&e.removeChild(t)}})),a((function(){var e=O.value.renderer.container;return e.appendChild(A.value.root),function(){e.removeChild(A.value.root)}})),a((function(){var e=p((function(e){D(e.state)}),0);return h.current=function(t){var n=t.state,r=t.prevState;(_.value&&r.isOpen!==n.isOpen&&k(n.isOpen),_.value||!n.isOpen||r.isOpen||E(),n.query!==r.query)&&O.value.core.environment.document.querySelectorAll(".aa-Panel--scrollable").forEach((function(e){0!==e.scrollTop&&(e.scrollTop=0)}));e({state:n})},function(){h.current=void 0}})),a((function(){var e=p((function(){var e=_.value;_.value=O.value.core.environment.matchMedia(O.value.renderer.detachedMediaQuery).matches,e!==_.value?C({}):requestAnimationFrame(E)}),20);return O.value.core.environment.addEventListener("resize",e),function(){O.value.core.environment.removeEventListener("resize",e)}})),a((function(){if(!_.value)return function(){};function e(e){A.value.detachedContainer.classList.toggle("aa-DetachedContainer--modal",e)}function t(t){e(t.matches)}var n=O.value.core.environment.matchMedia(getComputedStyle(O.value.core.environment.document.documentElement).getPropertyValue("--aa-detached-modal-media-query"));e(n.matches);var r=Boolean(n.addEventListener);return r?n.addEventListener("change",t):n.addListener(t),function(){r?n.removeEventListener("change",t):n.removeListener(t)}})),a((function(){return requestAnimationFrame(E),function(){}})),n(n({},w),{},{update:C,destroy:function(){l()}})},e.getAlgoliaFacets=function(e){var t=hr({transformResponse:function(e){return e.facetHits}}),r=e.queries.map((function(e){return n(n({},e),{},{type:"facet"})}));return t(n(n({},e),{},{queries:r}))},e.getAlgoliaResults=Or,Object.defineProperty(e,"__esModule",{value:!0})})); + diff --git a/docs/2_39/site_libs/quarto-search/fuse.min.js b/docs/2_39/site_libs/quarto-search/fuse.min.js new file mode 100644 index 000000000..adc28356e --- /dev/null +++ b/docs/2_39/site_libs/quarto-search/fuse.min.js @@ -0,0 +1,9 @@ +/** + * Fuse.js v6.6.2 - Lightweight fuzzy-search (http://fusejs.io) + * + * Copyright (c) 2022 Kiro Risk (http://kiro.me) + * All Rights Reserved. Apache Software License 2.0 + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +var e,t;e=this,t=function(){"use strict";function e(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function t(t){for(var n=1;ne.length)&&(t=e.length);for(var n=0,r=new Array(t);n0&&void 0!==arguments[0]?arguments[0]:1,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:3,n=new Map,r=Math.pow(10,t);return{get:function(t){var i=t.match(C).length;if(n.has(i))return n.get(i);var o=1/Math.pow(i,.5*e),c=parseFloat(Math.round(o*r)/r);return n.set(i,c),c},clear:function(){n.clear()}}}var $=function(){function e(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},n=t.getFn,i=void 0===n?I.getFn:n,o=t.fieldNormWeight,c=void 0===o?I.fieldNormWeight:o;r(this,e),this.norm=E(c,3),this.getFn=i,this.isCreated=!1,this.setIndexRecords()}return o(e,[{key:"setSources",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.docs=e}},{key:"setIndexRecords",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.records=e}},{key:"setKeys",value:function(){var e=this,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.keys=t,this._keysMap={},t.forEach((function(t,n){e._keysMap[t.id]=n}))}},{key:"create",value:function(){var e=this;!this.isCreated&&this.docs.length&&(this.isCreated=!0,g(this.docs[0])?this.docs.forEach((function(t,n){e._addString(t,n)})):this.docs.forEach((function(t,n){e._addObject(t,n)})),this.norm.clear())}},{key:"add",value:function(e){var t=this.size();g(e)?this._addString(e,t):this._addObject(e,t)}},{key:"removeAt",value:function(e){this.records.splice(e,1);for(var t=e,n=this.size();t2&&void 0!==arguments[2]?arguments[2]:{},r=n.getFn,i=void 0===r?I.getFn:r,o=n.fieldNormWeight,c=void 0===o?I.fieldNormWeight:o,a=new $({getFn:i,fieldNormWeight:c});return a.setKeys(e.map(_)),a.setSources(t),a.create(),a}function R(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},n=t.errors,r=void 0===n?0:n,i=t.currentLocation,o=void 0===i?0:i,c=t.expectedLocation,a=void 0===c?0:c,s=t.distance,u=void 0===s?I.distance:s,h=t.ignoreLocation,l=void 0===h?I.ignoreLocation:h,f=r/e.length;if(l)return f;var d=Math.abs(a-o);return u?f+d/u:d?1:f}function N(){for(var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[],t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:I.minMatchCharLength,n=[],r=-1,i=-1,o=0,c=e.length;o=t&&n.push([r,i]),r=-1)}return e[o-1]&&o-r>=t&&n.push([r,o-1]),n}var P=32;function W(e){for(var t={},n=0,r=e.length;n1&&void 0!==arguments[1]?arguments[1]:{},o=i.location,c=void 0===o?I.location:o,a=i.threshold,s=void 0===a?I.threshold:a,u=i.distance,h=void 0===u?I.distance:u,l=i.includeMatches,f=void 0===l?I.includeMatches:l,d=i.findAllMatches,v=void 0===d?I.findAllMatches:d,g=i.minMatchCharLength,y=void 0===g?I.minMatchCharLength:g,p=i.isCaseSensitive,m=void 0===p?I.isCaseSensitive:p,k=i.ignoreLocation,M=void 0===k?I.ignoreLocation:k;if(r(this,e),this.options={location:c,threshold:s,distance:h,includeMatches:f,findAllMatches:v,minMatchCharLength:y,isCaseSensitive:m,ignoreLocation:M},this.pattern=m?t:t.toLowerCase(),this.chunks=[],this.pattern.length){var b=function(e,t){n.chunks.push({pattern:e,alphabet:W(e),startIndex:t})},x=this.pattern.length;if(x>P){for(var w=0,L=x%P,S=x-L;w3&&void 0!==arguments[3]?arguments[3]:{},i=r.location,o=void 0===i?I.location:i,c=r.distance,a=void 0===c?I.distance:c,s=r.threshold,u=void 0===s?I.threshold:s,h=r.findAllMatches,l=void 0===h?I.findAllMatches:h,f=r.minMatchCharLength,d=void 0===f?I.minMatchCharLength:f,v=r.includeMatches,g=void 0===v?I.includeMatches:v,y=r.ignoreLocation,p=void 0===y?I.ignoreLocation:y;if(t.length>P)throw new Error(w(P));for(var m,k=t.length,M=e.length,b=Math.max(0,Math.min(o,M)),x=u,L=b,S=d>1||g,_=S?Array(M):[];(m=e.indexOf(t,L))>-1;){var O=R(t,{currentLocation:m,expectedLocation:b,distance:a,ignoreLocation:p});if(x=Math.min(O,x),L=m+k,S)for(var j=0;j=z;q-=1){var B=q-1,J=n[e.charAt(B)];if(S&&(_[B]=+!!J),K[q]=(K[q+1]<<1|1)&J,F&&(K[q]|=(A[q+1]|A[q])<<1|1|A[q+1]),K[q]&$&&(C=R(t,{errors:F,currentLocation:B,expectedLocation:b,distance:a,ignoreLocation:p}))<=x){if(x=C,(L=B)<=b)break;z=Math.max(1,2*b-L)}}if(R(t,{errors:F+1,currentLocation:b,expectedLocation:b,distance:a,ignoreLocation:p})>x)break;A=K}var U={isMatch:L>=0,score:Math.max(.001,C)};if(S){var V=N(_,d);V.length?g&&(U.indices=V):U.isMatch=!1}return U}(e,n,i,{location:c+o,distance:a,threshold:s,findAllMatches:u,minMatchCharLength:h,includeMatches:r,ignoreLocation:l}),p=y.isMatch,m=y.score,k=y.indices;p&&(g=!0),v+=m,p&&k&&(d=[].concat(f(d),f(k)))}));var y={isMatch:g,score:g?v/this.chunks.length:1};return g&&r&&(y.indices=d),y}}]),e}(),z=function(){function e(t){r(this,e),this.pattern=t}return o(e,[{key:"search",value:function(){}}],[{key:"isMultiMatch",value:function(e){return D(e,this.multiRegex)}},{key:"isSingleMatch",value:function(e){return D(e,this.singleRegex)}}]),e}();function D(e,t){var n=e.match(t);return n?n[1]:null}var K=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=e===this.pattern;return{isMatch:t,score:t?0:1,indices:[0,this.pattern.length-1]}}}],[{key:"type",get:function(){return"exact"}},{key:"multiRegex",get:function(){return/^="(.*)"$/}},{key:"singleRegex",get:function(){return/^=(.*)$/}}]),n}(z),q=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=-1===e.indexOf(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}],[{key:"type",get:function(){return"inverse-exact"}},{key:"multiRegex",get:function(){return/^!"(.*)"$/}},{key:"singleRegex",get:function(){return/^!(.*)$/}}]),n}(z),B=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=e.startsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,this.pattern.length-1]}}}],[{key:"type",get:function(){return"prefix-exact"}},{key:"multiRegex",get:function(){return/^\^"(.*)"$/}},{key:"singleRegex",get:function(){return/^\^(.*)$/}}]),n}(z),J=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=!e.startsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}],[{key:"type",get:function(){return"inverse-prefix-exact"}},{key:"multiRegex",get:function(){return/^!\^"(.*)"$/}},{key:"singleRegex",get:function(){return/^!\^(.*)$/}}]),n}(z),U=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=e.endsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[e.length-this.pattern.length,e.length-1]}}}],[{key:"type",get:function(){return"suffix-exact"}},{key:"multiRegex",get:function(){return/^"(.*)"\$$/}},{key:"singleRegex",get:function(){return/^(.*)\$$/}}]),n}(z),V=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=!e.endsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}],[{key:"type",get:function(){return"inverse-suffix-exact"}},{key:"multiRegex",get:function(){return/^!"(.*)"\$$/}},{key:"singleRegex",get:function(){return/^!(.*)\$$/}}]),n}(z),G=function(e){a(n,e);var t=l(n);function n(e){var i,o=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},c=o.location,a=void 0===c?I.location:c,s=o.threshold,u=void 0===s?I.threshold:s,h=o.distance,l=void 0===h?I.distance:h,f=o.includeMatches,d=void 0===f?I.includeMatches:f,v=o.findAllMatches,g=void 0===v?I.findAllMatches:v,y=o.minMatchCharLength,p=void 0===y?I.minMatchCharLength:y,m=o.isCaseSensitive,k=void 0===m?I.isCaseSensitive:m,M=o.ignoreLocation,b=void 0===M?I.ignoreLocation:M;return r(this,n),(i=t.call(this,e))._bitapSearch=new T(e,{location:a,threshold:u,distance:l,includeMatches:d,findAllMatches:g,minMatchCharLength:p,isCaseSensitive:k,ignoreLocation:b}),i}return o(n,[{key:"search",value:function(e){return this._bitapSearch.searchIn(e)}}],[{key:"type",get:function(){return"fuzzy"}},{key:"multiRegex",get:function(){return/^"(.*)"$/}},{key:"singleRegex",get:function(){return/^(.*)$/}}]),n}(z),H=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){for(var t,n=0,r=[],i=this.pattern.length;(t=e.indexOf(this.pattern,n))>-1;)n=t+i,r.push([t,n-1]);var o=!!r.length;return{isMatch:o,score:o?0:1,indices:r}}}],[{key:"type",get:function(){return"include"}},{key:"multiRegex",get:function(){return/^'"(.*)"$/}},{key:"singleRegex",get:function(){return/^'(.*)$/}}]),n}(z),Q=[K,H,B,J,V,U,q,G],X=Q.length,Y=/ +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/;function Z(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return e.split("|").map((function(e){for(var n=e.trim().split(Y).filter((function(e){return e&&!!e.trim()})),r=[],i=0,o=n.length;i1&&void 0!==arguments[1]?arguments[1]:{},i=n.isCaseSensitive,o=void 0===i?I.isCaseSensitive:i,c=n.includeMatches,a=void 0===c?I.includeMatches:c,s=n.minMatchCharLength,u=void 0===s?I.minMatchCharLength:s,h=n.ignoreLocation,l=void 0===h?I.ignoreLocation:h,f=n.findAllMatches,d=void 0===f?I.findAllMatches:f,v=n.location,g=void 0===v?I.location:v,y=n.threshold,p=void 0===y?I.threshold:y,m=n.distance,k=void 0===m?I.distance:m;r(this,e),this.query=null,this.options={isCaseSensitive:o,includeMatches:a,minMatchCharLength:u,findAllMatches:d,ignoreLocation:l,location:g,threshold:p,distance:k},this.pattern=o?t:t.toLowerCase(),this.query=Z(this.pattern,this.options)}return o(e,[{key:"searchIn",value:function(e){var t=this.query;if(!t)return{isMatch:!1,score:1};var n=this.options,r=n.includeMatches;e=n.isCaseSensitive?e:e.toLowerCase();for(var i=0,o=[],c=0,a=0,s=t.length;a-1&&(n.refIndex=e.idx),t.matches.push(n)}}))}function ve(e,t){t.score=e.score}function ge(e,t){var n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{},r=n.includeMatches,i=void 0===r?I.includeMatches:r,o=n.includeScore,c=void 0===o?I.includeScore:o,a=[];return i&&a.push(de),c&&a.push(ve),e.map((function(e){var n=e.idx,r={item:t[n],refIndex:n};return a.length&&a.forEach((function(t){t(e,r)})),r}))}var ye=function(){function e(n){var i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},o=arguments.length>2?arguments[2]:void 0;r(this,e),this.options=t(t({},I),i),this.options.useExtendedSearch,this._keyStore=new S(this.options.keys),this.setCollection(n,o)}return o(e,[{key:"setCollection",value:function(e,t){if(this._docs=e,t&&!(t instanceof $))throw new Error("Incorrect 'index' type");this._myIndex=t||F(this.options.keys,this._docs,{getFn:this.options.getFn,fieldNormWeight:this.options.fieldNormWeight})}},{key:"add",value:function(e){k(e)&&(this._docs.push(e),this._myIndex.add(e))}},{key:"remove",value:function(){for(var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:function(){return!1},t=[],n=0,r=this._docs.length;n1&&void 0!==arguments[1]?arguments[1]:{},n=t.limit,r=void 0===n?-1:n,i=this.options,o=i.includeMatches,c=i.includeScore,a=i.shouldSort,s=i.sortFn,u=i.ignoreFieldNorm,h=g(e)?g(this._docs[0])?this._searchStringList(e):this._searchObjectList(e):this._searchLogical(e);return fe(h,{ignoreFieldNorm:u}),a&&h.sort(s),y(r)&&r>-1&&(h=h.slice(0,r)),ge(h,this._docs,{includeMatches:o,includeScore:c})}},{key:"_searchStringList",value:function(e){var t=re(e,this.options),n=this._myIndex.records,r=[];return n.forEach((function(e){var n=e.v,i=e.i,o=e.n;if(k(n)){var c=t.searchIn(n),a=c.isMatch,s=c.score,u=c.indices;a&&r.push({item:n,idx:i,matches:[{score:s,value:n,norm:o,indices:u}]})}})),r}},{key:"_searchLogical",value:function(e){var t=this,n=function(e,t){var n=(arguments.length>2&&void 0!==arguments[2]?arguments[2]:{}).auto,r=void 0===n||n,i=function e(n){var i=Object.keys(n),o=ue(n);if(!o&&i.length>1&&!se(n))return e(le(n));if(he(n)){var c=o?n[ce]:i[0],a=o?n[ae]:n[c];if(!g(a))throw new Error(x(c));var s={keyId:j(c),pattern:a};return r&&(s.searcher=re(a,t)),s}var u={children:[],operator:i[0]};return i.forEach((function(t){var r=n[t];v(r)&&r.forEach((function(t){u.children.push(e(t))}))})),u};return se(e)||(e=le(e)),i(e)}(e,this.options),r=function e(n,r,i){if(!n.children){var o=n.keyId,c=n.searcher,a=t._findMatches({key:t._keyStore.get(o),value:t._myIndex.getValueForItemAtKeyId(r,o),searcher:c});return a&&a.length?[{idx:i,item:r,matches:a}]:[]}for(var s=[],u=0,h=n.children.length;u1&&void 0!==arguments[1]?arguments[1]:{},n=t.getFn,r=void 0===n?I.getFn:n,i=t.fieldNormWeight,o=void 0===i?I.fieldNormWeight:i,c=e.keys,a=e.records,s=new $({getFn:r,fieldNormWeight:o});return s.setKeys(c),s.setIndexRecords(a),s},ye.config=I,function(){ne.push.apply(ne,arguments)}(te),ye},"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).Fuse=t(); \ No newline at end of file diff --git a/docs/2_39/site_libs/quarto-search/quarto-search.js b/docs/2_39/site_libs/quarto-search/quarto-search.js new file mode 100644 index 000000000..d788a9581 --- /dev/null +++ b/docs/2_39/site_libs/quarto-search/quarto-search.js @@ -0,0 +1,1290 @@ +const kQueryArg = "q"; +const kResultsArg = "show-results"; + +// If items don't provide a URL, then both the navigator and the onSelect +// function aren't called (and therefore, the default implementation is used) +// +// We're using this sentinel URL to signal to those handlers that this +// item is a more item (along with the type) and can be handled appropriately +const kItemTypeMoreHref = "0767FDFD-0422-4E5A-BC8A-3BE11E5BBA05"; + +window.document.addEventListener("DOMContentLoaded", function (_event) { + // Ensure that search is available on this page. If it isn't, + // should return early and not do anything + var searchEl = window.document.getElementById("quarto-search"); + if (!searchEl) return; + + const { autocomplete } = window["@algolia/autocomplete-js"]; + + let quartoSearchOptions = {}; + let language = {}; + const searchOptionEl = window.document.getElementById( + "quarto-search-options" + ); + if (searchOptionEl) { + const jsonStr = searchOptionEl.textContent; + quartoSearchOptions = JSON.parse(jsonStr); + language = quartoSearchOptions.language; + } + + // note the search mode + if (quartoSearchOptions.type === "overlay") { + searchEl.classList.add("type-overlay"); + } else { + searchEl.classList.add("type-textbox"); + } + + // Used to determine highlighting behavior for this page + // A `q` query param is expected when the user follows a search + // to this page + const currentUrl = new URL(window.location); + const query = currentUrl.searchParams.get(kQueryArg); + const showSearchResults = currentUrl.searchParams.get(kResultsArg); + const mainEl = window.document.querySelector("main"); + + // highlight matches on the page + if (query && mainEl) { + // perform any highlighting + highlight(escapeRegExp(query), mainEl); + + // fix up the URL to remove the q query param + const replacementUrl = new URL(window.location); + replacementUrl.searchParams.delete(kQueryArg); + window.history.replaceState({}, "", replacementUrl); + } + + // function to clear highlighting on the page when the search query changes + // (e.g. if the user edits the query or clears it) + let highlighting = true; + const resetHighlighting = (searchTerm) => { + if (mainEl && highlighting && query && searchTerm !== query) { + clearHighlight(query, mainEl); + highlighting = false; + } + }; + + // Clear search highlighting when the user scrolls sufficiently + const resetFn = () => { + resetHighlighting(""); + window.removeEventListener("quarto-hrChanged", resetFn); + window.removeEventListener("quarto-sectionChanged", resetFn); + }; + + // Register this event after the initial scrolling and settling of events + // on the page + window.addEventListener("quarto-hrChanged", resetFn); + window.addEventListener("quarto-sectionChanged", resetFn); + + // Responsively switch to overlay mode if the search is present on the navbar + // Note that switching the sidebar to overlay mode requires more coordinate (not just + // the media query since we generate different HTML for sidebar overlays than we do + // for sidebar input UI) + const detachedMediaQuery = + quartoSearchOptions.type === "overlay" ? "all" : "(max-width: 991px)"; + + // If configured, include the analytics client to send insights + const plugins = configurePlugins(quartoSearchOptions); + + let lastState = null; + const { setIsOpen, setQuery, setCollections } = autocomplete({ + container: searchEl, + detachedMediaQuery: detachedMediaQuery, + defaultActiveItemId: 0, + panelContainer: "#quarto-search-results", + panelPlacement: quartoSearchOptions["panel-placement"], + debug: false, + openOnFocus: true, + plugins, + classNames: { + form: "d-flex", + }, + placeholder: language["search-text-placeholder"], + translations: { + clearButtonTitle: language["search-clear-button-title"], + detachedCancelButtonText: language["search-detached-cancel-button-title"], + submitButtonTitle: language["search-submit-button-title"], + }, + initialState: { + query, + }, + getItemUrl({ item }) { + return item.href; + }, + onStateChange({ state }) { + // If this is a file URL, note that + + // Perhaps reset highlighting + resetHighlighting(state.query); + + // If the panel just opened, ensure the panel is positioned properly + if (state.isOpen) { + if (lastState && !lastState.isOpen) { + setTimeout(() => { + positionPanel(quartoSearchOptions["panel-placement"]); + }, 150); + } + } + + // Perhaps show the copy link + showCopyLink(state.query, quartoSearchOptions); + + lastState = state; + }, + reshape({ sources, state }) { + return sources.map((source) => { + try { + const items = source.getItems(); + + // Validate the items + validateItems(items); + + // group the items by document + const groupedItems = new Map(); + items.forEach((item) => { + const hrefParts = item.href.split("#"); + const baseHref = hrefParts[0]; + const isDocumentItem = hrefParts.length === 1; + + const items = groupedItems.get(baseHref); + if (!items) { + groupedItems.set(baseHref, [item]); + } else { + // If the href for this item matches the document + // exactly, place this item first as it is the item that represents + // the document itself + if (isDocumentItem) { + items.unshift(item); + } else { + items.push(item); + } + groupedItems.set(baseHref, items); + } + }); + + const reshapedItems = []; + let count = 1; + for (const [_key, value] of groupedItems) { + const firstItem = value[0]; + reshapedItems.push({ + ...firstItem, + type: kItemTypeDoc, + }); + + const collapseMatches = quartoSearchOptions["collapse-after"]; + const collapseCount = + typeof collapseMatches === "number" ? collapseMatches : 1; + + if (value.length > 1) { + const target = `search-more-${count}`; + const isExpanded = + state.context.expanded && + state.context.expanded.includes(target); + + const remainingCount = value.length - collapseCount; + + for (let i = 1; i < value.length; i++) { + if (collapseMatches && i === collapseCount) { + reshapedItems.push({ + target, + title: isExpanded + ? language["search-hide-matches-text"] + : remainingCount === 1 + ? `${remainingCount} ${language["search-more-match-text"]}` + : `${remainingCount} ${language["search-more-matches-text"]}`, + type: kItemTypeMore, + href: kItemTypeMoreHref, + }); + } + + if (isExpanded || !collapseMatches || i < collapseCount) { + reshapedItems.push({ + ...value[i], + type: kItemTypeItem, + target, + }); + } + } + } + count += 1; + } + + return { + ...source, + getItems() { + return reshapedItems; + }, + }; + } catch (error) { + // Some form of error occurred + return { + ...source, + getItems() { + return [ + { + title: error.name || "An Error Occurred While Searching", + text: + error.message || + "An unknown error occurred while attempting to perform the requested search.", + type: kItemTypeError, + }, + ]; + }, + }; + } + }); + }, + navigator: { + navigate({ itemUrl }) { + if (itemUrl !== offsetURL(kItemTypeMoreHref)) { + window.location.assign(itemUrl); + } + }, + navigateNewTab({ itemUrl }) { + if (itemUrl !== offsetURL(kItemTypeMoreHref)) { + const windowReference = window.open(itemUrl, "_blank", "noopener"); + if (windowReference) { + windowReference.focus(); + } + } + }, + navigateNewWindow({ itemUrl }) { + if (itemUrl !== offsetURL(kItemTypeMoreHref)) { + window.open(itemUrl, "_blank", "noopener"); + } + }, + }, + getSources({ state, setContext, setActiveItemId, refresh }) { + return [ + { + sourceId: "documents", + getItemUrl({ item }) { + if (item.href) { + return offsetURL(item.href); + } else { + return undefined; + } + }, + onSelect({ + item, + state, + setContext, + setIsOpen, + setActiveItemId, + refresh, + }) { + if (item.type === kItemTypeMore) { + toggleExpanded(item, state, setContext, setActiveItemId, refresh); + + // Toggle more + setIsOpen(true); + } + }, + getItems({ query }) { + if (query === null || query === "") { + return []; + } + + const limit = quartoSearchOptions.limit; + if (quartoSearchOptions.algolia) { + return algoliaSearch(query, limit, quartoSearchOptions.algolia); + } else { + // Fuse search options + const fuseSearchOptions = { + isCaseSensitive: false, + shouldSort: true, + minMatchCharLength: 2, + limit: limit, + }; + + return readSearchData().then(function (fuse) { + return fuseSearch(query, fuse, fuseSearchOptions); + }); + } + }, + templates: { + noResults({ createElement }) { + const hasQuery = lastState.query; + + return createElement( + "div", + { + class: `quarto-search-no-results${ + hasQuery ? "" : " no-query" + }`, + }, + language["search-no-results-text"] + ); + }, + header({ items, createElement }) { + // count the documents + const count = items.filter((item) => { + return item.type === kItemTypeDoc; + }).length; + + if (count > 0) { + return createElement( + "div", + { class: "search-result-header" }, + `${count} ${language["search-matching-documents-text"]}` + ); + } else { + return createElement( + "div", + { class: "search-result-header-no-results" }, + `` + ); + } + }, + footer({ _items, createElement }) { + if ( + quartoSearchOptions.algolia && + quartoSearchOptions.algolia["show-logo"] + ) { + const libDir = quartoSearchOptions.algolia["libDir"]; + const logo = createElement("img", { + src: offsetURL( + `${libDir}/quarto-search/search-by-algolia.svg` + ), + class: "algolia-search-logo", + }); + return createElement( + "a", + { href: "http://www.algolia.com/" }, + logo + ); + } + }, + + item({ item, createElement }) { + return renderItem( + item, + createElement, + state, + setActiveItemId, + setContext, + refresh, + quartoSearchOptions + ); + }, + }, + }, + ]; + }, + }); + + window.quartoOpenSearch = () => { + setIsOpen(false); + setIsOpen(true); + focusSearchInput(); + }; + + document.addEventListener("keyup", (event) => { + const { key } = event; + const kbds = quartoSearchOptions["keyboard-shortcut"]; + const focusedEl = document.activeElement; + + const isFormElFocused = [ + "input", + "select", + "textarea", + "button", + "option", + ].find((tag) => { + return focusedEl.tagName.toLowerCase() === tag; + }); + + if ( + kbds && + kbds.includes(key) && + !isFormElFocused && + !document.activeElement.isContentEditable + ) { + event.preventDefault(); + window.quartoOpenSearch(); + } + }); + + // Remove the labeleledby attribute since it is pointing + // to a non-existent label + if (quartoSearchOptions.type === "overlay") { + const inputEl = window.document.querySelector( + "#quarto-search .aa-Autocomplete" + ); + if (inputEl) { + inputEl.removeAttribute("aria-labelledby"); + } + } + + function throttle(func, wait) { + let waiting = false; + return function () { + if (!waiting) { + func.apply(this, arguments); + waiting = true; + setTimeout(function () { + waiting = false; + }, wait); + } + }; + } + + // If the main document scrolls dismiss the search results + // (otherwise, since they're floating in the document they can scroll with the document) + window.document.body.onscroll = throttle(() => { + // Only do this if we're not detached + // Bug #7117 + // This will happen when the keyboard is shown on ios (resulting in a scroll) + // which then closed the search UI + if (!window.matchMedia(detachedMediaQuery).matches) { + setIsOpen(false); + } + }, 50); + + if (showSearchResults) { + setIsOpen(true); + focusSearchInput(); + } +}); + +function configurePlugins(quartoSearchOptions) { + const autocompletePlugins = []; + const algoliaOptions = quartoSearchOptions.algolia; + if ( + algoliaOptions && + algoliaOptions["analytics-events"] && + algoliaOptions["search-only-api-key"] && + algoliaOptions["application-id"] + ) { + const apiKey = algoliaOptions["search-only-api-key"]; + const appId = algoliaOptions["application-id"]; + + // Aloglia insights may not be loaded because they require cookie consent + // Use deferred loading so events will start being recorded when/if consent + // is granted. + const algoliaInsightsDeferredPlugin = deferredLoadPlugin(() => { + if ( + window.aa && + window["@algolia/autocomplete-plugin-algolia-insights"] + ) { + window.aa("init", { + appId, + apiKey, + useCookie: true, + }); + + const { createAlgoliaInsightsPlugin } = + window["@algolia/autocomplete-plugin-algolia-insights"]; + // Register the insights client + const algoliaInsightsPlugin = createAlgoliaInsightsPlugin({ + insightsClient: window.aa, + onItemsChange({ insights, insightsEvents }) { + const events = insightsEvents.flatMap((event) => { + // This API limits the number of items per event to 20 + const chunkSize = 20; + const itemChunks = []; + const eventItems = event.items; + for (let i = 0; i < eventItems.length; i += chunkSize) { + itemChunks.push(eventItems.slice(i, i + chunkSize)); + } + // Split the items into multiple events that can be sent + const events = itemChunks.map((items) => { + return { + ...event, + items, + }; + }); + return events; + }); + + for (const event of events) { + insights.viewedObjectIDs(event); + } + }, + }); + return algoliaInsightsPlugin; + } + }); + + // Add the plugin + autocompletePlugins.push(algoliaInsightsDeferredPlugin); + return autocompletePlugins; + } +} + +// For plugins that may not load immediately, create a wrapper +// plugin and forward events and plugin data once the plugin +// is initialized. This is useful for cases like cookie consent +// which may prevent the analytics insights event plugin from initializing +// immediately. +function deferredLoadPlugin(createPlugin) { + let plugin = undefined; + let subscribeObj = undefined; + const wrappedPlugin = () => { + if (!plugin && subscribeObj) { + plugin = createPlugin(); + if (plugin && plugin.subscribe) { + plugin.subscribe(subscribeObj); + } + } + return plugin; + }; + + return { + subscribe: (obj) => { + subscribeObj = obj; + }, + onStateChange: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.onStateChange) { + plugin.onStateChange(obj); + } + }, + onSubmit: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.onSubmit) { + plugin.onSubmit(obj); + } + }, + onReset: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.onReset) { + plugin.onReset(obj); + } + }, + getSources: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.getSources) { + return plugin.getSources(obj); + } else { + return Promise.resolve([]); + } + }, + data: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.data) { + plugin.data(obj); + } + }, + }; +} + +function validateItems(items) { + // Validate the first item + if (items.length > 0) { + const item = items[0]; + const missingFields = []; + if (item.href == undefined) { + missingFields.push("href"); + } + if (!item.title == undefined) { + missingFields.push("title"); + } + if (!item.text == undefined) { + missingFields.push("text"); + } + + if (missingFields.length === 1) { + throw { + name: `Error: Search index is missing the ${missingFields[0]} field.`, + message: `The items being returned for this search do not include all the required fields. Please ensure that your index items include the ${missingFields[0]} field or use index-fields in your _quarto.yml file to specify the field names.`, + }; + } else if (missingFields.length > 1) { + const missingFieldList = missingFields + .map((field) => { + return `${field}`; + }) + .join(", "); + + throw { + name: `Error: Search index is missing the following fields: ${missingFieldList}.`, + message: `The items being returned for this search do not include all the required fields. Please ensure that your index items includes the following fields: ${missingFieldList}, or use index-fields in your _quarto.yml file to specify the field names.`, + }; + } + } +} + +let lastQuery = null; +function showCopyLink(query, options) { + const language = options.language; + lastQuery = query; + // Insert share icon + const inputSuffixEl = window.document.body.querySelector( + ".aa-Form .aa-InputWrapperSuffix" + ); + + if (inputSuffixEl) { + let copyButtonEl = window.document.body.querySelector( + ".aa-Form .aa-InputWrapperSuffix .aa-CopyButton" + ); + + if (copyButtonEl === null) { + copyButtonEl = window.document.createElement("button"); + copyButtonEl.setAttribute("class", "aa-CopyButton"); + copyButtonEl.setAttribute("type", "button"); + copyButtonEl.setAttribute("title", language["search-copy-link-title"]); + copyButtonEl.onmousedown = (e) => { + e.preventDefault(); + e.stopPropagation(); + }; + + const linkIcon = "bi-clipboard"; + const checkIcon = "bi-check2"; + + const shareIconEl = window.document.createElement("i"); + shareIconEl.setAttribute("class", `bi ${linkIcon}`); + copyButtonEl.appendChild(shareIconEl); + inputSuffixEl.prepend(copyButtonEl); + + const clipboard = new window.ClipboardJS(".aa-CopyButton", { + text: function (_trigger) { + const copyUrl = new URL(window.location); + copyUrl.searchParams.set(kQueryArg, lastQuery); + copyUrl.searchParams.set(kResultsArg, "1"); + return copyUrl.toString(); + }, + }); + clipboard.on("success", function (e) { + // Focus the input + + // button target + const button = e.trigger; + const icon = button.querySelector("i.bi"); + + // flash "checked" + icon.classList.add(checkIcon); + icon.classList.remove(linkIcon); + setTimeout(function () { + icon.classList.remove(checkIcon); + icon.classList.add(linkIcon); + }, 1000); + }); + } + + // If there is a query, show the link icon + if (copyButtonEl) { + if (lastQuery && options["copy-button"]) { + copyButtonEl.style.display = "flex"; + } else { + copyButtonEl.style.display = "none"; + } + } + } +} + +/* Search Index Handling */ +// create the index +var fuseIndex = undefined; +var shownWarning = false; + +// fuse index options +const kFuseIndexOptions = { + keys: [ + { name: "title", weight: 20 }, + { name: "section", weight: 20 }, + { name: "text", weight: 10 }, + ], + ignoreLocation: true, + threshold: 0.1, +}; + +async function readSearchData() { + // Initialize the search index on demand + if (fuseIndex === undefined) { + if (window.location.protocol === "file:" && !shownWarning) { + window.alert( + "Search requires JavaScript features disabled when running in file://... URLs. In order to use search, please run this document in a web server." + ); + shownWarning = true; + return; + } + const fuse = new window.Fuse([], kFuseIndexOptions); + + // fetch the main search.json + const response = await fetch(offsetURL("search.json")); + if (response.status == 200) { + return response.json().then(function (searchDocs) { + searchDocs.forEach(function (searchDoc) { + fuse.add(searchDoc); + }); + fuseIndex = fuse; + return fuseIndex; + }); + } else { + return Promise.reject( + new Error( + "Unexpected status from search index request: " + response.status + ) + ); + } + } + + return fuseIndex; +} + +function inputElement() { + return window.document.body.querySelector(".aa-Form .aa-Input"); +} + +function focusSearchInput() { + setTimeout(() => { + const inputEl = inputElement(); + if (inputEl) { + inputEl.focus(); + } + }, 50); +} + +/* Panels */ +const kItemTypeDoc = "document"; +const kItemTypeMore = "document-more"; +const kItemTypeItem = "document-item"; +const kItemTypeError = "error"; + +function renderItem( + item, + createElement, + state, + setActiveItemId, + setContext, + refresh, + quartoSearchOptions +) { + switch (item.type) { + case kItemTypeDoc: + return createDocumentCard( + createElement, + "file-richtext", + item.title, + item.section, + item.text, + item.href, + item.crumbs, + quartoSearchOptions + ); + case kItemTypeMore: + return createMoreCard( + createElement, + item, + state, + setActiveItemId, + setContext, + refresh + ); + case kItemTypeItem: + return createSectionCard( + createElement, + item.section, + item.text, + item.href + ); + case kItemTypeError: + return createErrorCard(createElement, item.title, item.text); + default: + return undefined; + } +} + +function createDocumentCard( + createElement, + icon, + title, + section, + text, + href, + crumbs, + quartoSearchOptions +) { + const iconEl = createElement("i", { + class: `bi bi-${icon} search-result-icon`, + }); + const titleEl = createElement("p", { class: "search-result-title" }, title); + const titleContents = [iconEl, titleEl]; + const showParent = quartoSearchOptions["show-item-context"]; + if (crumbs && showParent) { + let crumbsOut = undefined; + const crumbClz = ["search-result-crumbs"]; + if (showParent === "root") { + crumbsOut = crumbs.length > 1 ? crumbs[0] : undefined; + } else if (showParent === "parent") { + crumbsOut = crumbs.length > 1 ? crumbs[crumbs.length - 2] : undefined; + } else { + crumbsOut = crumbs.length > 1 ? crumbs.join(" > ") : undefined; + crumbClz.push("search-result-crumbs-wrap"); + } + + const crumbEl = createElement( + "p", + { class: crumbClz.join(" ") }, + crumbsOut + ); + titleContents.push(crumbEl); + } + + const titleContainerEl = createElement( + "div", + { class: "search-result-title-container" }, + titleContents + ); + + const textEls = []; + if (section) { + const sectionEl = createElement( + "p", + { class: "search-result-section" }, + section + ); + textEls.push(sectionEl); + } + const descEl = createElement("p", { + class: "search-result-text", + dangerouslySetInnerHTML: { + __html: text, + }, + }); + textEls.push(descEl); + + const textContainerEl = createElement( + "div", + { class: "search-result-text-container" }, + textEls + ); + + const containerEl = createElement( + "div", + { + class: "search-result-container", + }, + [titleContainerEl, textContainerEl] + ); + + const linkEl = createElement( + "a", + { + href: offsetURL(href), + class: "search-result-link", + }, + containerEl + ); + + const classes = ["search-result-doc", "search-item"]; + if (!section) { + classes.push("document-selectable"); + } + + return createElement( + "div", + { + class: classes.join(" "), + }, + linkEl + ); +} + +function createMoreCard( + createElement, + item, + state, + setActiveItemId, + setContext, + refresh +) { + const moreCardEl = createElement( + "div", + { + class: "search-result-more search-item", + onClick: (e) => { + // Handle expanding the sections by adding the expanded + // section to the list of expanded sections + toggleExpanded(item, state, setContext, setActiveItemId, refresh); + e.stopPropagation(); + }, + }, + item.title + ); + + return moreCardEl; +} + +function toggleExpanded(item, state, setContext, setActiveItemId, refresh) { + const expanded = state.context.expanded || []; + if (expanded.includes(item.target)) { + setContext({ + expanded: expanded.filter((target) => target !== item.target), + }); + } else { + setContext({ expanded: [...expanded, item.target] }); + } + + refresh(); + setActiveItemId(item.__autocomplete_id); +} + +function createSectionCard(createElement, section, text, href) { + const sectionEl = createSection(createElement, section, text, href); + return createElement( + "div", + { + class: "search-result-doc-section search-item", + }, + sectionEl + ); +} + +function createSection(createElement, title, text, href) { + const descEl = createElement("p", { + class: "search-result-text", + dangerouslySetInnerHTML: { + __html: text, + }, + }); + + const titleEl = createElement("p", { class: "search-result-section" }, title); + const linkEl = createElement( + "a", + { + href: offsetURL(href), + class: "search-result-link", + }, + [titleEl, descEl] + ); + return linkEl; +} + +function createErrorCard(createElement, title, text) { + const descEl = createElement("p", { + class: "search-error-text", + dangerouslySetInnerHTML: { + __html: text, + }, + }); + + const titleEl = createElement("p", { + class: "search-error-title", + dangerouslySetInnerHTML: { + __html: ` ${title}`, + }, + }); + const errorEl = createElement("div", { class: "search-error" }, [ + titleEl, + descEl, + ]); + return errorEl; +} + +function positionPanel(pos) { + const panelEl = window.document.querySelector( + "#quarto-search-results .aa-Panel" + ); + const inputEl = window.document.querySelector( + "#quarto-search .aa-Autocomplete" + ); + + if (panelEl && inputEl) { + panelEl.style.top = `${Math.round(panelEl.offsetTop)}px`; + if (pos === "start") { + panelEl.style.left = `${Math.round(inputEl.left)}px`; + } else { + panelEl.style.right = `${Math.round(inputEl.offsetRight)}px`; + } + } +} + +/* Highlighting */ +// highlighting functions +function highlightMatch(query, text) { + if (text) { + const start = text.toLowerCase().indexOf(query.toLowerCase()); + if (start !== -1) { + const startMark = ""; + const endMark = ""; + + const end = start + query.length; + text = + text.slice(0, start) + + startMark + + text.slice(start, end) + + endMark + + text.slice(end); + const startInfo = clipStart(text, start); + const endInfo = clipEnd( + text, + startInfo.position + startMark.length + endMark.length + ); + text = + startInfo.prefix + + text.slice(startInfo.position, endInfo.position) + + endInfo.suffix; + + return text; + } else { + return text; + } + } else { + return text; + } +} + +function clipStart(text, pos) { + const clipStart = pos - 50; + if (clipStart < 0) { + // This will just return the start of the string + return { + position: 0, + prefix: "", + }; + } else { + // We're clipping before the start of the string, walk backwards to the first space. + const spacePos = findSpace(text, pos, -1); + return { + position: spacePos.position, + prefix: "", + }; + } +} + +function clipEnd(text, pos) { + const clipEnd = pos + 200; + if (clipEnd > text.length) { + return { + position: text.length, + suffix: "", + }; + } else { + const spacePos = findSpace(text, clipEnd, 1); + return { + position: spacePos.position, + suffix: spacePos.clipped ? "…" : "", + }; + } +} + +function findSpace(text, start, step) { + let stepPos = start; + while (stepPos > -1 && stepPos < text.length) { + const char = text[stepPos]; + if (char === " " || char === "," || char === ":") { + return { + position: step === 1 ? stepPos : stepPos - step, + clipped: stepPos > 1 && stepPos < text.length, + }; + } + stepPos = stepPos + step; + } + + return { + position: stepPos - step, + clipped: false, + }; +} + +// removes highlighting as implemented by the mark tag +function clearHighlight(searchterm, el) { + const childNodes = el.childNodes; + for (let i = childNodes.length - 1; i >= 0; i--) { + const node = childNodes[i]; + if (node.nodeType === Node.ELEMENT_NODE) { + if ( + node.tagName === "MARK" && + node.innerText.toLowerCase() === searchterm.toLowerCase() + ) { + el.replaceChild(document.createTextNode(node.innerText), node); + } else { + clearHighlight(searchterm, node); + } + } + } +} + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string +} + +// highlight matches +function highlight(term, el) { + const termRegex = new RegExp(term, "ig"); + const childNodes = el.childNodes; + + // walk back to front avoid mutating elements in front of us + for (let i = childNodes.length - 1; i >= 0; i--) { + const node = childNodes[i]; + + if (node.nodeType === Node.TEXT_NODE) { + // Search text nodes for text to highlight + const text = node.nodeValue; + + let startIndex = 0; + let matchIndex = text.search(termRegex); + if (matchIndex > -1) { + const markFragment = document.createDocumentFragment(); + while (matchIndex > -1) { + const prefix = text.slice(startIndex, matchIndex); + markFragment.appendChild(document.createTextNode(prefix)); + + const mark = document.createElement("mark"); + mark.appendChild( + document.createTextNode( + text.slice(matchIndex, matchIndex + term.length) + ) + ); + markFragment.appendChild(mark); + + startIndex = matchIndex + term.length; + matchIndex = text.slice(startIndex).search(new RegExp(term, "ig")); + if (matchIndex > -1) { + matchIndex = startIndex + matchIndex; + } + } + if (startIndex < text.length) { + markFragment.appendChild( + document.createTextNode(text.slice(startIndex, text.length)) + ); + } + + el.replaceChild(markFragment, node); + } + } else if (node.nodeType === Node.ELEMENT_NODE) { + // recurse through elements + highlight(term, node); + } + } +} + +/* Link Handling */ +// get the offset from this page for a given site root relative url +function offsetURL(url) { + var offset = getMeta("quarto:offset"); + return offset ? offset + url : url; +} + +// read a meta tag value +function getMeta(metaName) { + var metas = window.document.getElementsByTagName("meta"); + for (let i = 0; i < metas.length; i++) { + if (metas[i].getAttribute("name") === metaName) { + return metas[i].getAttribute("content"); + } + } + return ""; +} + +function algoliaSearch(query, limit, algoliaOptions) { + const { getAlgoliaResults } = window["@algolia/autocomplete-preset-algolia"]; + + const applicationId = algoliaOptions["application-id"]; + const searchOnlyApiKey = algoliaOptions["search-only-api-key"]; + const indexName = algoliaOptions["index-name"]; + const indexFields = algoliaOptions["index-fields"]; + const searchClient = window.algoliasearch(applicationId, searchOnlyApiKey); + const searchParams = algoliaOptions["params"]; + const searchAnalytics = !!algoliaOptions["analytics-events"]; + + return getAlgoliaResults({ + searchClient, + queries: [ + { + indexName: indexName, + query, + params: { + hitsPerPage: limit, + clickAnalytics: searchAnalytics, + ...searchParams, + }, + }, + ], + transformResponse: (response) => { + if (!indexFields) { + return response.hits.map((hit) => { + return hit.map((item) => { + return { + ...item, + text: highlightMatch(query, item.text), + }; + }); + }); + } else { + const remappedHits = response.hits.map((hit) => { + return hit.map((item) => { + const newItem = { ...item }; + ["href", "section", "title", "text", "crumbs"].forEach( + (keyName) => { + const mappedName = indexFields[keyName]; + if ( + mappedName && + item[mappedName] !== undefined && + mappedName !== keyName + ) { + newItem[keyName] = item[mappedName]; + delete newItem[mappedName]; + } + } + ); + newItem.text = highlightMatch(query, newItem.text); + return newItem; + }); + }); + return remappedHits; + } + }, + }); +} + +let subSearchTerm = undefined; +let subSearchFuse = undefined; +const kFuseMaxWait = 125; + +async function fuseSearch(query, fuse, fuseOptions) { + let index = fuse; + // Fuse.js using the Bitap algorithm for text matching which runs in + // O(nm) time (no matter the structure of the text). In our case this + // means that long search terms mixed with large index gets very slow + // + // This injects a subIndex that will be used once the terms get long enough + // Usually making this subindex is cheap since there will typically be + // a subset of results matching the existing query + if (subSearchFuse !== undefined && query.startsWith(subSearchTerm)) { + // Use the existing subSearchFuse + index = subSearchFuse; + } else if (subSearchFuse !== undefined) { + // The term changed, discard the existing fuse + subSearchFuse = undefined; + subSearchTerm = undefined; + } + + // Search using the active fuse + const then = performance.now(); + const resultsRaw = await index.search(query, fuseOptions); + const now = performance.now(); + + const results = resultsRaw.map((result) => { + const addParam = (url, name, value) => { + const anchorParts = url.split("#"); + const baseUrl = anchorParts[0]; + const sep = baseUrl.search("\\?") > 0 ? "&" : "?"; + anchorParts[0] = baseUrl + sep + name + "=" + value; + return anchorParts.join("#"); + }; + + return { + title: result.item.title, + section: result.item.section, + href: addParam(result.item.href, kQueryArg, query), + text: highlightMatch(query, result.item.text), + crumbs: result.item.crumbs, + }; + }); + + // If we don't have a subfuse and the query is long enough, go ahead + // and create a subfuse to use for subsequent queries + if ( + now - then > kFuseMaxWait && + subSearchFuse === undefined && + resultsRaw.length < fuseOptions.limit + ) { + subSearchTerm = query; + subSearchFuse = new window.Fuse([], kFuseIndexOptions); + resultsRaw.forEach((rr) => { + subSearchFuse.add(rr.item); + }); + } + return results; +} diff --git a/docs/2_39/stan-users-guide-2_39.pdf b/docs/2_39/stan-users-guide-2_39.pdf new file mode 100644 index 000000000..011fadac0 Binary files /dev/null and b/docs/2_39/stan-users-guide-2_39.pdf differ diff --git a/docs/2_39/stan-users-guide/algebraic-equations.html b/docs/2_39/stan-users-guide/algebraic-equations.html new file mode 100644 index 000000000..171d090df --- /dev/null +++ b/docs/2_39/stan-users-guide/algebraic-equations.html @@ -0,0 +1,1342 @@ + + + + + + + + + +Solving Algebraic Equations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Solving Algebraic Equations

+

Stan provides a built-in mechanism for specifying systems of algebraic equations. These systems can be solved either with the Newton method, as implemented in the Kinsol package (Hindmarsh et al. 2005), or with the Powell hybrid method (Powell 1970). The function signatures for Stan’s algebraic solvers are fully described in the algebraic solver section of the reference manual.

+

Solving any system of algebraic equations can be translated into a root-finding problem, that is, given a function \(f\), we wish to find \(y\) such that \(f(y) = 0\).

+
+

Example: system of nonlinear algebraic equations

+

For systems of linear algebraic equations, we recommend solving the system using matrix division. The algebraic solver becomes handy when we want to solve nonlinear equations.

+

As an illustrative example, we consider the following nonlinear system of two equations with two unknowns: \[\begin{align*} +z_1 &= y_1 - \theta_1 \\ +z_2 &= y_1 y_2 + \theta_2 +\end{align*}\]

+

Our goal is to simultaneously solve all equations for \(y_1\) and \(y_2\), such that the vector \(z\) goes to 0.

+
+
+

Coding an algebraic system

+

A system of algebraic equations is coded directly in Stan as a function with a strictly specified signature. For example, the nonlinear system given above can be coded using the following function in Stan (see the user-defined functions section for more information on coding user-defined functions).

+
vector system(vector y,              // unknowns
+              vector theta,          // parameters
+              data array[] real x_r, // data (real)
+              array[] int x_i) {     // data (integer)
+  vector[2] z;
+  z[1] = y[1] - theta[1];
+  z[2] = y[1] * y[2] - theta[2];
+  return z;
+}
+

The function takes the unknowns we wish to solve for in y (a vector), the system parameters in theta (a vector), the real data in x_r (a real array) and the integer data in x_i (an integer array). The system function returns the value of the function (a vector), for which we want to compute the roots. Our example does not use real or integer data. Nevertheless, these unused arguments must be included in the system function with exactly the signature above.

+

The body of the system function here could also be coded using a row vector constructor and transposition,

+
return [ y[1] - theta[1],
+         y[1] * y[2] - theta[2] ]';
+

As systems get more complicated, naming the intermediate expressions goes a long way toward readability.

+
+

Strict signature

+

The function defining the system must have exactly these argument types and return type. This may require passing in zero-length arrays for data or a zero-length vector for parameters if the system does not involve data or parameters.

+
+
+
+

Calling the algebraic solver

+

Let’s suppose \(\theta = (3, 6)\). To call the algebraic solver, we need to provide an initial guess. This varies on a case-by-case basis, but in general a good guess will speed up the solver and, in pathological cases, even determine whether the solver converges or not. If the solver does not converge, the Metropolis proposal gets rejected and a warning message, stating no acceptable solution was found, is issued.

+

The solver has three tuning parameters to determine convergence: the relative tolerance, the function tolerance, and the maximum number of steps. Their behavior is explained in the section about algebraic solvers with control parameters.

+

The following code returns the solution to our nonlinear algebraic system:

+
transformed data {
+  vector[2] y_guess = [1, 1]';
+  array[0] real x_r;
+  array[0] int x_i;
+}
+
+transformed parameters {
+  vector[2] theta = [3, 6]';
+  vector[2] y;
+
+  y = solve_newton(system, y_guess, theta, x_r, x_i);
+}
+

which returns \(y = (3, -2)\).

+
+

Data versus parameters

+

The arguments for the real data x_r and the integer data x_i must be expressions that only involve data or transformed data variables. theta, on the other hand, must only involve parameters. Note there are no restrictions on the initial guess, y_guess, which may be a data or a parameter vector.

+
+
+

Length of the algebraic function and of the vector of unknowns

+

The Jacobian of the solution with respect to the parameters is computed using the implicit function theorem, which imposes certain restrictions. In particular, the Jacobian of the algebraic function \(f\) with respect to the unknowns \(x\) must be invertible. This requires the Jacobian to be square, meaning \(f(y)\) and \(y\) have the same length or, in other words the number of equations in the system is the same as the number of unknowns.

+
+
+

Pathological solutions

+

Certain systems may be degenerate, meaning they have multiple solutions. The algebraic solver will not report these cases, as the algorithm stops once it has found an acceptable solution. The initial guess will often determine which solution gets found first. The degeneracy may be broken by putting additional constraints on the solution. For instance, it might make “physical sense” for a solution to be positive or negative.

+

On the other hand, a system may not have a solution (for a given point in the parameter space). In that case, the solver will not converge to a solution. When the solver fails to do so, the current Metropolis proposal gets rejected.

+
+
+
+

Control parameters for the algebraic solver

+

The call to the algebraic solver shown previously uses the default control settings. The _tol variant of the solver function allows three additional parameters, all of which must be supplied before the variadic arguments.

+
y = solve_newton_tol(system, y_guess,  scaling_step, f_tol, max_steps,
+                     theta, x_r, x_i);
+

For the Newton solver the three control arguments are scaling step, function tolerance, and maximum number of steps. For the Powell’s hybrid method the three control arguments are relative tolerance, function tolerance, and maximum number of steps. If a Newton step is smaller than the scaling step tolerance, the code breaks, assuming the solver is no longer making significant progress. If set to 0, this constraint is ignored. For Powell’s hybrid method the relative tolerance is the estimated relative error of the solver and serves to test if a satisfactory solution has been found. After convergence of the either solver, the proposed solution is plugged into the algebraic system and its norm is compared to the function tolerance. If the norm is below the function tolerance, the solution is deemed acceptable. If the solver solver reaches the maximum number of steps, it stops and returns an error message. If one of the criteria is not met, the Metropolis proposal gets rejected with a warning message explaining which criterion was not satisfied.

+

The default values for the control arguments are respectively scaling_step = 1e-3 (\(10^{-3}\)), rel_tol = 1e-10 (\(10^{-10}\)), f_tol = 1e-6 (\(10^{-6}\)), and max_steps = 200 (\(200\)).

+
+

Tolerance

+

The relative and function tolerances control the accuracy of the solution generated by the solver. Relative tolerances are relative to the solution value. The function tolerance is the norm of the algebraic function, once we plug in the proposed solution. This norm should go to 0 (equivalently, all elements of the vector function are 0). It helps to think about this geometrically. Ideally the output of the algebraic function is at the origin; the norm measures deviations from this ideal. As the length of the return vector increases, a certain function tolerance becomes an increasingly difficult criterion to meet, given each individual element of the vector contribute to the norm.

+

Smaller relative tolerances produce more accurate solutions but require more computational time.

+
+

Sensitivity analysis

+

The tolerances should be set low enough that setting them lower does not change the statistical properties of posterior sample generated by the Stan program. The sensitivity can be analysed using importance sampling without need to re-run MCMC with different tolerances as shown by Timonen et al. (2023).

+
+
+
+

Maximum number of steps

+

The maximum number of steps can be used to stop a runaway simulation. This can arise in MCMC when a bad jump is taken, particularly during warmup. If the limit is hit, the current Metropolis proposal gets rejected. Users will see a warning message stating the maximum number of steps has been exceeded.

+ + + +
+
+
+ + Back to top

References

+
+Hindmarsh, Alan C, Peter N Brown, Keith E Grant, Steven L Lee, Radu Serban, Dan E Shumaker, and Carol S Woodward. 2005. SUNDIALS: Suite of Nonlinear and Differential/Algebraic Equation Solvers.” ACM Transactions on Mathematical Software (TOMS) 31 (3): 363–96. +
+
+Powell, Michael J. D. 1970. “A Hybrid Method for Nonlinear Equations.” In Numerical Methods for Nonlinear Algebraic Equations, edited by P. Rabinowitz. Gordon; Breach. +
+
+Timonen, Juho, Nikolas Siccha, Ben Bales, Harri Lähdesmäki, and Aki Vehtari. 2023. “An Importance Sampling Approach for Reliable and Efficient Inference in Bayesian Ordinary Differential Equation Models.” Stat 12 (1): e614. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/bootstrap.html b/docs/2_39/stan-users-guide/bootstrap.html new file mode 100644 index 000000000..041801f23 --- /dev/null +++ b/docs/2_39/stan-users-guide/bootstrap.html @@ -0,0 +1,1363 @@ + + + + + + + + + +The Bootstrap and Bagging + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

The Bootstrap and Bagging

+

The bootstrap is a technique for approximately sampling from the error distribution for an estimator. Thus it can be used as a Monte Carlo method to estimate standard errors and confidence intervals for point estimates (Efron and Tibshirani 1986; 1994). It works by subsampling the original data and computing sample estimates from the subsample. Like other Monte Carlo methods, the bootstrap is plug-and-play, allowing great flexibility in both model choice and estimator.

+

Bagging is a technique for combining bootstrapped estimators for model criticism and more robust inference (Breiman 1996; Huggins and Miller 2019).

+
+

The bootstrap

+
+

Estimators

+

An estimator is nothing more than a function mapping a data set to one or more numbers, which are called “estimates”. For example, the mean function maps a data set \(y_{1,\ldots, N}\) to a number by \[ +\textrm{mean}(y) = \frac{1}{N} \sum_{n=1}^N y_n, +\] and hence meets the definition of an estimator. Given the likelihood function \[ +p(y \mid \mu) = \prod_{n=1}^N \textrm{normal}(y_n \mid \mu, 1), +\] the mean is the maximum likelihood estimator,

+

\[ +\textrm{mean}(y) = \textrm{arg max}_{\mu} \ p(y \mid \mu, 1) +\] A Bayesian approach to point estimation would be to add a prior and use the posterior mean or median as an estimator. Alternatively, a penalty function could be added to the likelihood so that optimization produces a penalized maximum likelihood estimate. With any of these approaches, the estimator is just a function from data to a number.

+

In analyzing estimators, the data set is being modeled as a random variable. It is assumed that the observed data is just one of many possible random samples of data that may have been produced. If the data is modeled a random variable, then the estimator applied to the data is also a random variable. The simulations being done for the bootstrap are attempts to randomly sample replicated data sets and compute the random properties of the estimators using standard Monte Carlo methods.

+
+
+

The bootstrap in pseudocode

+

The bootstrap works by applying an estimator to replicated data sets. These replicates are created by subsampling the original data with replacement. The sample quantiles may then be used to estimate standard errors and confidence intervals.

+

The following pseudocode estimates 95% confidence intervals and standard errors for a generic estimate \(\hat{\theta}\) that is a function of data \(y\).

+
for (m in 1:M) {
+  y_rep[m] <- sample_uniform(y)
+  theta_hat[m] <- estimate_theta(y_rep[m])
+}
+std_error = sd(theta_hat)
+conf_95pct = [ quantile(theta_hat, 0.025),
+               quantile(theta_hat, 0.975) ]
+

The sample_uniform function works by independently assigning each element of y_rep an element of y drawn uniformly at random. This produces a sample with replacement. That is, some elements of y may show up more than once in y_rep and some may not appear at all.

+
+
+
+

Coding the bootstrap in Stan

+

The bootstrap procedure can be coded quite generally in Stan models. The following code illustrates a Stan model coding the likelihood for a simple linear regression. There is a parallel vector x of predictors in addition to outcomes y. To allow a single program to fit both the original data and random subsamples, the variable resample is set to 1 to resample and 0 to use the original data.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  vector[N] y;
+  int<lower=0, upper=1> resample;
+}
+transformed data {
+  simplex[N] uniform = rep_vector(1.0 / N, N);
+  array[N] int<lower=1, upper=N> boot_idxs;
+  for (n in 1:N) {
+    boot_idxs[n] = resample ? categorical_rng(uniform) : n;
+  }
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  y[boot_idxs] ~ normal(alpha + beta * x[boot_idxs], sigma);
+}
+

The model accepts data in the usual form for a linear regression as a number of observations \(N\) with a size \(N\) vector \(x\) of predictors and a size \(N\) vector of outcomes. The transformed data block generates a set of indexes into the data that is the same size as the data. This is done by independently sampling each entry of boot_idxs from 1:N, using a discrete uniform distribution coded as a categorical random number generator with an equal chance for each outcome. If resampling is not done, the array boot_idxs is defined to be the sequence 1:N, because x == x[1:N] and y = y[1:N].

+

For example, when resample == 1, if \(N = 4,\) the value of boot_idxs might be {2, 1, 1, 3}, resulting in a bootstrap sample {y[2], y[1], y[1], y[3]} with the first element repeated twice and the fourth element not sampled at all.

+

The parameters are the usual regression coefficients for the intercept alpha, slope beta, and error scale sigma. The model uses the bootstrap index variable boot_idx to index the predictors as x[boot_idx] and outcomes as y[boot_idx]. This generates a new size-\(N\) vector whose entries are defined by x[boot_idx][n] = x[boot_idx[n]] and similarly for y. For example, if \(N = 4\) and boot_idxs = {2, 1, 1, 3}, then x[boot_idxs] = [x[2], x[1], x[1], x[3]]' and y[boot_idxs] = [y[2], y[1], y[1], y[3]]'. The predictor and outcome vectors remain aligned, with both elements of the pair x[1] and y[1] repeated twice.

+

With the model defined this way, if resample is 1, the model is fit to a bootstrap subsample of the data. If resample is 0, the model is fit to the original data as given. By running the bootstrap fit multiple times, confidence intervals can be generated from quantiles of the results.

+
+
+

Error statistics from the bootstrap

+

Running the model multiple times produces a Monte Carlo sample of estimates from multiple alternative data sets subsampled from the original data set. The error distribution is just the distribution of the bootstrap estimates minus the estimate for the original data set.

+

To estimate standard errors and confidence intervals for maximum likelihood estimates the Stan program is executed multiple times using optimization (which turns off Jacobian adjustments for constraints and finds maximum likelihood estimates). On the order of one hundred replicates is typically enough to get a good sense of standard error; more will be needed to accurate estimate the boundaries of a 95% confidence interval. On the other hand, given that there is inherent variance due to sampling the original data \(y\), it is usually not worth calculating bootstrap estimates to high precision.

+
+

Standard errors

+

Here’s the result of calculating standard errors for the linear regression model above with \(N = 50\) data points, \(\alpha = 1.2, \beta += -0.5,\) and \(\sigma = 1.5.\) With a total of \(M = 100\) bootstrap samples, there are 100 estimates of \(\alpha\), 100 of \(\beta\), and 100 of \(\sigma\). These are then treated like Monte Carlo draws. For example, the sample standard deviation of the draws for \(\alpha\) provide the bootstrap estimate of the standard error in the estimate for \(\alpha\). Here’s what it looks like for the above model with \(M = +100\)

+
 parameter   estimate    std err
+ ---------   --------    -------
+     alpha      1.359      0.218
+      beta     -0.610      0.204
+     sigma      1.537      0.142
+

With the data set fixed, these estimates of standard error will display some Monte Carlo error. For example, here are the standard error estimates from five more runs holding the data the same, but allowing the subsampling to vary within Stan:

+
 parameter   estimate    std err
+ ---------   --------    -------
+     alpha      1.359      0.206
+     alpha      1.359      0.240
+     alpha      1.359      0.234
+     alpha      1.359      0.249
+     alpha      1.359      0.227
+

Increasing \(M\) will reduce Monte Carlo error, but this is not usually worth the extra computation time as there is so much other uncertainty due to the original data sample \(y\).

+
+
+

Confidence intervals

+

As usual with Monte Carlo methods, confidence intervals are estimated using quantiles of the draws. That is, if there are \(M = 1000\) estimates of \(\hat{\alpha}\) in different subsamples, the 2.5% quantile and 97.5% quantile pick out the boundaries of the 95% confidence interval around the estimate for the actual data set \(y\). To get accurate 97.5% quantile estimates requires a much larger number of Monte Carlo simulations (roughly twenty times as large as needed for the median).

+
+
+
+

Bagging

+

When bootstrapping is carried through inference it is known as bootstrap aggregation, or bagging, in the machine-learning literature (Breiman 1996). In the simplest case, this involves bootstrapping the original data, fitting a model to each bootstrapped data set, then averaging the predictions. For instance, rather than using an estimate \(\hat{\sigma}\) from the original data set, bootstrapped data sets \(y^{\textrm{boot}(1)}, \ldots, +y^{\textrm{boot}(N)}\) are generated. Each is used to generate an estimate \(\hat{\sigma}^{\textrm{boot}(n)}.\) The final estimate is \[ +\hat{\sigma} = \frac{1}{N} \sum_{n = 1}^N \hat{\sigma}^{\textrm{boot}(n)}. +\] The same would be done to estimate a predictive quantity \(\tilde{y}\) for as yet unseen data. \[ +\hat{\tilde{y}} = \frac{1}{N} \sum_{n = 1}^N +\hat{\tilde{y}}^{\textrm{boot}(n)}. +\] For discrete parameters, voting is used to select the outcome.

+

One way of viewing bagging is as a classical attempt to get something like averaging over parameter estimation uncertainty.

+
+
+

Bayesian bootstrap and bagging

+

A Bayesian estimator may be analyzed with the bootstrap in exactly the same way as a (penalized) maximum likelihood estimate. For example, the posterior mean and posterior median are two different Bayesian estimators. The bootstrap may be used estimate standard errors and confidence intervals, just as for any other estimator.

+

(Huggins and Miller 2019) use the bootstrap to assess model calibration and fitting in a Bayesian framework and further suggest using bagged estimators as a guard against model misspecification. Bagged posteriors will typically have wider posterior intervals than those fit with just the original data, showing that the method is not a pure Bayesian approach to updating, and indicating it would not be calibrated if the model were well specified. The hope is that it can guard against over-certainty in a poorly specified model.

+ + + +
+
+ + Back to top

References

+
+Breiman, Leo. 1996. “Bagging Predictors.” Machine Learning 24 (2): 123–40. +
+
+Efron, Bradley, and Robert Tibshirani. 1986. “Bootstrap Methods for Standard Errors, Confidence Intervals, and Other Measures of Statistical Accuracy.” Statistical Science 1 (1): 54–75. +
+
+Efron, Bradley, and Robert J Tibshirani. 1994. An Introduction to the Bootstrap. Chapman & Hall/CRC. +
+
+Huggins, Jonathan H, and Jeffrey W Miller. 2019. “Using Bagged Posteriors for Robust Inference and Model Criticism.” arXiv, no. 1912.07104. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/clustering.html b/docs/2_39/stan-users-guide/clustering.html new file mode 100644 index 000000000..c69c45ec7 --- /dev/null +++ b/docs/2_39/stan-users-guide/clustering.html @@ -0,0 +1,1655 @@ + + + + + + + + + +Clustering Models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Clustering Models

+

Unsupervised methods for organizing data into groups are collectively referred to as clustering. This chapter describes the implementation in Stan of two widely used statistical clustering models, soft \(K\)-means and latent Dirichlet allocation (LDA). In addition, this chapter includes naive Bayesian classification, which can be viewed as a form of clustering which may be supervised. These models are typically expressed using discrete parameters for cluster assignments. Nevertheless, they can be implemented in Stan like any other mixture model by marginalizing out the discrete parameters (see the mixture modeling chapter).

+
+

Relation to finite mixture models

+

As mentioned in the clustering section, clustering models and finite mixture models are really just two sides of the same coin. The “soft” \(K\)-means model described in the next section is a normal mixture model (with varying assumptions about covariance in higher dimensions leading to variants of \(K\)-means). Latent Dirichlet allocation is a mixed-membership multinomial mixture.

+
+
+

Soft K-means

+

\(K\)-means clustering is a method of clustering data represented as \(D\)-dimensional vectors. Specifically, there will be \(N\) items to be clustered, each represented as a vector \(y_n \in \mathbb{R}^D\). In the “soft” version of \(K\)-means, the assignments to clusters will be probabilistic.

+
+

Geometric hard K-means clustering

+

\(K\)-means clustering is typically described geometrically in terms of the following algorithm, which assumes the number of clusters \(K\) and data vectors \(y\) as input.

+
    +
  1. For each \(n\) in \(\{1,\dotsc,N\}\), randomly assign vector \(y_n\) to a cluster in \(\{1,\dotsc,K\}\);
  2. +
  3. Repeat +
      +
    1. For each cluster \(k\) in \(\{1,\dotsc,K\}\), compute the cluster centroid \(\mu_k\) by averaging the vectors assigned to that cluster;
    2. +
    3. For each \(n\) in \(\{1,\dotsc,N\}\), reassign \(y_n\) to the cluster \(k\) for which the (Euclidean) distance from \(y_n\) to \(\mu_k\) is smallest;
    4. +
    5. If no vectors changed cluster, return the cluster assignments.
    6. +
  4. +
+

This algorithm is guaranteed to terminate.

+
+
+

Soft K-means clustering

+

Soft \(K\)-means clustering treats the cluster assignments as probability distributions over the clusters. Because of the connection between Euclidean distance and multivariate normal models with a fixed covariance, soft \(K\)-means can be expressed (and coded in Stan) as a multivariate normal mixture model.

+

In the full generative model, each data point \(n\) in \(\{1,\dotsc,N\}\) is assigned a cluster \(z_n \in \{1,\dotsc,K\}\) with symmetric uniform probability, \[ +z_n \sim \textsf{categorical}(1/K), +\] where \(1\) is the unit vector of \(K\) dimensions, so that \(1/K\) is the symmetric \(K\)-simplex. Thus the model assumes that each data point is drawn from a hard decision about cluster membership. The softness arises only from the uncertainty about which cluster generated a data point.

+

The data points themselves are generated from a multivariate normal distribution whose parameters are determined by the cluster assignment \(z_n\), \[ +y_n \sim \textsf{normal}(\mu_{z[n]},\Sigma_{z[n]}) +\]

+

The sample implementation in this section assumes a fixed unit covariance matrix shared by all clusters \(k\), \[ +\Sigma_k = \mathrm{diag\_matrix}({\bf 1}), +\] so that the log multivariate normal can be implemented directly up to a proportion by \[ +\mathrm{normal}\left( y_n \mid \mu_k, \mathrm{diag\_matrix}({\bf 1}) \right) +\propto \exp \left (- \frac{1}{2} \sum_{d=1}^D \left( \mu_{k,d} - y_{n,d} + \right)^2 \right). +\] The spatial perspective on \(K\)-means arises by noting that the inner term is just half the negative Euclidean distance from the cluster mean \(\mu_k\) to the data point \(y_n\).

+
+
+

Stan implementation of soft K-means

+

Consider the following Stan program for implementing \(K\)-means clustering.

+
data {
+  int<lower=0> N;        // number of data points
+  int<lower=1> D;        // number of dimensions
+  int<lower=1> K;        // number of clusters
+  array[N] vector[D] y;  // observations
+}
+transformed data {
+  real<upper=0> neg_log_K;
+  neg_log_K = -log(K);
+}
+parameters {
+  array[K] vector[D] mu; // cluster means
+}
+transformed parameters {
+  array[N, K] real<upper=0> soft_z; // log unnormalized clusters
+  for (n in 1:N) {
+    for (k in 1:K) {
+      soft_z[n, k] = neg_log_K
+                     - 0.5 * dot_self(mu[k] - y[n]);
+    }
+  }
+}
+model {
+  // prior
+  for (k in 1:K) {
+    mu[k] ~ std_normal();
+  }
+
+  // likelihood
+  for (n in 1:N) {
+    target += log_sum_exp(soft_z[n]);
+  }
+}
+

There is an independent standard normal prior on the centroid parameters; this prior could be swapped with other priors, or even a hierarchical model to fit an overall problem scale and location.

+

The only parameter is mu, where mu[k] is the centroid for cluster \(k\). The transformed parameters soft_z[n] contain the log of the unnormalized cluster assignment probabilities. The vector soft_z[n] can be converted back to a normalized simplex using the softmax function (see the functions reference manual), either externally or within the model’s generated quantities block.

+
+
+

Generalizing soft K-means

+

The multivariate normal distribution with unit covariance matrix produces a log probability density proportional to Euclidean distance (i.e., \(L_2\) distance). Other distributions relate to other geometries. For instance, replacing the normal distribution with the double exponential (Laplace) distribution produces a clustering model based on \(L_1\) distance (i.e., Manhattan or taxicab distance).

+

Within the multivariate normal version of \(K\)-means, replacing the unit covariance matrix with a shared covariance matrix amounts to working with distances defined in a space transformed by the inverse covariance matrix.

+

Although there is no global spatial analog, it is common to see soft \(K\)-means specified with a per-cluster covariance matrix. In this situation, a hierarchical prior may be used for the covariance matrices.

+
+
+
+

The difficulty of Bayesian inference for clustering

+

Two problems make it pretty much impossible to perform full Bayesian inference for clustering models, the lack of parameter identifiability and the extreme multimodality of the posteriors. There is additional discussion related to the non-identifiability due to label switching in the label switching section.

+
+

Non-identifiability

+

Cluster assignments are not identified—permuting the cluster mean vectors mu leads to a model with identical likelihoods. For instance, permuting the first two indexes in mu and the first two indexes in each soft_z[n] leads to an identical likelihood (and prior).

+

The lack of identifiability means that the cluster parameters cannot be compared across multiple Markov chains. In fact, the only parameter in soft \(K\)-means is not identified, leading to problems in monitoring convergence. Clusters can even fail to be identified within a single chain, with indices swapping if the chain is long enough or the data are not cleanly separated.

+
+
+

Multimodality

+

The other problem with clustering models is that their posteriors are highly multimodal. One form of multimodality is the non-identifiability leading to index swapping. But even without the index problems the posteriors are highly multimodal.

+

Bayesian inference fails in cases of high multimodality because there is no way to visit all of the modes in the posterior in appropriate proportions and thus no way to evaluate integrals involved in posterior predictive inference.

+

In light of these two problems, the advice often given in fitting clustering models is to try many different initializations and select the sample with the highest overall probability. It is also popular to use optimization-based point estimators such as expectation maximization or variational Bayes, which can be much more efficient than sampling-based approaches.

+
+
+
+

Naive Bayes classification and clustering

+

Naive Bayes is a kind of mixture model that can be used for classification or for clustering (or a mix of both), depending on which labels for items are observed.1

+

Multinomial mixture models are referred to as “naive Bayes” because they are often applied to classification problems where the multinomial independence assumptions are clearly false.

+

Naive Bayes classification and clustering can be applied to any data with multinomial structure. A typical example of this is natural language text classification and clustering, which is used an example in what follows.

+

The observed data consists of a sequence of \(M\) documents made up of bags of words drawn from a vocabulary of \(V\) distinct words. A document \(m\) has \(N_m\) words, which are indexed as \(w_{m,1}, \dotsc, +w_{m,N[m]} \in \{1,\dotsc,V\}\). Despite the ordered indexing of words in a document, this order is not part of the model, which is clearly defective for natural human language data. A number of topics (or categories) \(K\) is fixed.

+

The multinomial mixture model generates a single category \(z_m \in +\{1,\dotsc,K\}\) for each document \(m \in \{1,\dotsc,M\}\) according to a categorical distribution, \[ +z_m \sim \textsf{categorical}(\theta). +\] The \(K\)-simplex parameter \(\theta\) represents the prevalence of each category in the data.

+

Next, the words in each document are generated conditionally independently of each other and the words in other documents based on the category of the document, with word \(n\) of document \(m\) being generated as \[ +w_{m,n} \sim \textsf{categorical}(\phi_{z[m]}). +\] The parameter \(\phi_{z[m]}\) is a \(V\)-simplex representing the probability of each word in the vocabulary in documents of category \(z_m\).

+

The parameters \(\theta\) and \(\phi\) are typically given symmetric Dirichlet priors. The prevalence \(\theta\) is sometimes fixed to produce equal probabilities for each category \(k \in \{1,\dotsc,K\}\).

+
+

Coding ragged arrays

+

The specification for naive Bayes in the previous sections have used a ragged array notation for the words \(w\). Because Stan does not support ragged arrays, the models are coded using an alternative strategy that provides an index for each word in a global list of words. The data is organized as follows, with the word arrays laid out in a column and each assigned to its document in a second column.

+

\[ +\begin{array}{lll} +\hline +\mathrm{n} \qquad\qquad\qquad\qquad & \mathrm{w[n]} \qquad & \mathrm{doc[n]} \\ +\hline +1 & w_{1,1} & 1 \\ +2 & w_{1,2} & 1 \\ +\vdots & \vdots & \vdots \\ +N_1 & w_{1,N[1]} & 1 \\ +N_1 + 1 & w_{2,1} & 2 \\ +N_1 + 2 & w_{2,2} & 2 \\ +\vdots & \vdots & \vdots \\ +N_1 + N_2 & w_{2,N[2]} & 2 \\ +N_1 + N_2 + 1 & w_{3,1} & 3 \\ +\vdots & \vdots & \vdots \\ +N = \sum_{m=1}^M N_m & w_{M,N[M]} & M \\ +\hline +\end{array} +\]

+

The relevant variables for the program are N, the total number of words in all the documents, the word array w, and the document identity array doc.

+
+
+

Estimation with category-labeled training data

+

A naive Bayes model for estimating the simplex parameters given training data with documents of known categories can be coded in Stan as follows

+
data {
+  // training data
+  int<lower=1> K;               // num topics
+  int<lower=1> V;               // num words
+  int<lower=0> M;               // num docs
+  int<lower=0> N;               // total word instances
+  array[M] int<lower=1, upper=K> z;    // topic for doc m
+  array[N] int<lower=1, upper=V> w;    // word n
+  array[N] int<lower=1, upper=M> doc;  // doc ID for word n
+  // hyperparameters
+  vector<lower=0>[K] alpha;     // topic prior
+  vector<lower=0>[V] beta;      // word prior
+}
+parameters {
+  simplex[K] theta;             // topic prevalence
+  array[K] simplex[V] phi;      // word dist for topic k
+}
+model {
+  theta ~ dirichlet(alpha);
+  for (k in 1:K) {
+    phi[k] ~ dirichlet(beta);
+  }
+  for (m in 1:M) {
+    z[m] ~ categorical(theta);
+  }
+  for (n in 1:N) {
+    w[n] ~ categorical(phi[z[doc[n]]]);
+  }
+}
+

The topic identifiers \(z_m\) are declared as data and the latent category assignments are included as part of the likelihood function.

+
+
+

Estimation without category-labeled training data

+

Naive Bayes models can be used in an unsupervised fashion to cluster multinomial-structured data into a fixed number \(K\) of categories. The data declaration includes the same variables as the model in the previous section excluding the topic labels z. Because z is discrete, it needs to be summed out of the model calculation. This is done for naive Bayes as for other mixture models. The parameters are the same up to the priors, but the likelihood is now computed as the marginal document probability

+

\[\begin{align*} +\log\, &p(w_{m,1},\dotsc,w_{m,N_m} \mid \theta,\phi) \\ +&= \log \sum_{k=1}^K + \left( \textsf{categorical}(k \mid \theta) + \times \prod_{n=1}^{N_m} \textsf{categorical}(w_{m,n} \mid \phi_k) + \right) \\ +&= \log \sum_{k=1}^K \exp \left( + \log \textsf{categorical}(k \mid \theta) + + \sum_{n=1}^{N_m} \log \textsf{categorical}(w_{m,n} \mid \phi_k) + \right). +\end{align*}\]

+

The last step shows how the log_sum_exp function can be used to stabilize the numerical calculation and return a result on the log scale.

+
model {
+  array[M, K] real gamma;
+  theta ~ dirichlet(alpha);
+  for (k in 1:K) {
+    phi[k] ~ dirichlet(beta);
+  }
+  for (m in 1:M) {
+    for (k in 1:K) {
+      gamma[m, k] = categorical_lpmf(k | theta);
+    }
+  }
+  for (n in 1:N) {
+    for (k in 1:K) {
+      gamma[doc[n], k] = gamma[doc[n], k]
+                         + categorical_lpmf(w[n] | phi[k]);
+    }
+  }
+  for (m in 1:M) {
+    target += log_sum_exp(gamma[m]);
+  }
+}
+

The local variable gamma[m, k] represents the value \[ +\gamma_{m,k} = \log \textsf{categorical}(k \mid \theta) ++ \sum_{n=1}^{N_m} \log \textsf{categorical}(w_{m,n} \mid \phi_k). +\]

+

Given \(\gamma\), the posterior probability that document \(m\) is assigned category \(k\) is \[ +\Pr[z_m = k \mid w,\alpha,\beta] += +\exp \left( +\gamma_{m,k} +- \log \sum_{k=1}^K \exp \left( \gamma_{m,k} \right) +\right). +\]

+

If the variable gamma were declared and defined in the transformed parameter block, its sampled values would be saved by Stan. The normalized posterior probabilities could also be defined as generated quantities.

+
+
+

Full Bayesian inference for naive Bayes

+

Full Bayesian posterior predictive inference for the naive Bayes model can be implemented in Stan by combining the models for labeled and unlabeled data. The estimands include both the model parameters and the posterior distribution over categories for the unlabeled data. The model is essentially a missing data model assuming the unknown category labels are missing completely at random; see Gelman et al. (2013) and Gelman and Hill (2007) for more information on missing data imputation. The model is also an instance of semisupervised learning because the unlabeled data contributes to the parameter estimations.

+

To specify a Stan model for performing full Bayesian inference, the model for labeled data is combined with the model for unlabeled data. A second document collection is declared as data, but without the category labels, leading to new variables M2 N2, w2, and doc2. The number of categories and number of words, as well as the hyperparameters are shared and only declared once. Similarly, there is only one set of parameters. Then the model contains a single set of statements for the prior, a set of statements for the labeled data, and a set of statements for the unlabeled data.

+
+
+

Prediction without model updates

+

An alternative to full Bayesian inference involves estimating a model using labeled data, then applying it to unlabeled data without updating the parameter estimates based on the unlabeled data. This behavior can be implemented by moving the definition of gamma for the unlabeled documents to the generated quantities block. Because the variables no longer contribute to the log probability, they no longer jointly contribute to the estimation of the model parameters.

+
+
+
+

Latent Dirichlet allocation

+

Latent Dirichlet allocation (LDA) is a mixed-membership multinomial clustering model (Blei, Ng, and Jordan 2003) that generalizes naive Bayes. Using the topic and document terminology common in discussions of LDA, each document is modeled as having a mixture of topics, with each word drawn from a topic based on the mixing proportions.

+
+

The LDA Model

+

The basic model assumes each document is generated independently based on fixed hyperparameters. For document \(m\), the first step is to draw a topic distribution simplex \(\theta_m\) over the \(K\) topics, \[ +\theta_m \sim \textsf{Dirichlet}(\alpha). +\]

+

The prior hyperparameter \(\alpha\) is fixed to a \(K\)-vector of positive values. Each word in the document is generated independently conditional on the distribution \(\theta_m\). First, a topic \(z_{m,n} \in \{1,\dotsc,K\}\) is drawn for the word based on the document-specific topic-distribution, \[ +z_{m,n} \sim \textsf{categorical}(\theta_m). +\]

+

Finally, the word \(w_{m,n}\) is drawn according to the word distribution for topic \(z_{m,n}\), \[ +w_{m,n} \sim \textsf{categorical}(\phi_{z[m,n]}). +\] The distributions \(\phi_k\) over words for topic \(k\) are also given a Dirichlet prior, \[ +\phi_k \sim \textsf{Dirichlet}(\beta) +\]

+

where \(\beta\) is a fixed \(V\)-vector of positive values.

+
+
+

Summing out the discrete parameters

+

Although Stan does not (yet) support discrete sampling, it is possible to calculate the marginal distribution over the continuous parameters by summing out the discrete parameters as in other mixture models. The marginal posterior of the topic and word variables is \[\begin{align*} +p(\theta,\phi \mid w,\alpha,\beta) +&\propto p(\theta \mid \alpha) \, p(\phi \mid \beta) \, p(w \mid \theta,\phi) \\ +&= \prod_{m=1}^M p(\theta_m \mid \alpha) + \times \prod_{k=1}^K p(\phi_k \mid \beta) + \times \prod_{m=1}^M \prod_{n=1}^{M[n]} p(w_{m,n} \mid \theta_m,\phi). +\end{align*}\]

+

The inner word-probability term is defined by summing out the topic assignments, \[\begin{align*} +p(w_{m,n} \mid \theta_m,\phi) +&= \sum_{z=1}^K p(z,w_{m,n} \mid \theta_m,\phi) \\ +&= \sum_{z=1}^K p(z \mid \theta_m) \, p(w_{m,n} \mid \phi_z). +\end{align*}\]

+

Plugging the distributions in and converting to the log scale provides a formula that can be implemented directly in Stan, \[\begin{align*} +\log\, &p(\theta,\phi \mid w,\alpha,\beta) \\ +&= \sum_{m=1}^M \log \textsf{Dirichlet}(\theta_m \mid \alpha) + + \sum_{k=1}^K \log \textsf{Dirichlet}(\phi_k \mid \beta) \\ +&\qquad + \sum_{m=1}^M \sum_{n=1}^{N[m]} \log \left( + \sum_{z=1}^K \textsf{categorical}(z \mid \theta_m) + \times \textsf{categorical}(w_{m,n} \mid \phi_z) + \right) +\end{align*}\]

+
+
+

Implementation of LDA

+

Applying the marginal derived in the last section to the data structure described in this section leads to the following Stan program for LDA.

+
data {
+  int<lower=2> K;               // num topics
+  int<lower=2> V;               // num words
+  int<lower=1> M;               // num docs
+  int<lower=1> N;               // total word instances
+  array[N] int<lower=1, upper=V> w;    // word n
+  array[N] int<lower=1, upper=M> doc;  // doc ID for word n
+  vector<lower=0>[K] alpha;     // topic prior
+  vector<lower=0>[V] beta;      // word prior
+}
+parameters {
+  array[M] simplex[K] theta;    // topic dist for doc m
+  array[K] simplex[V] phi;      // word dist for topic k
+}
+model {
+  for (m in 1:M) {
+    theta[m] ~ dirichlet(alpha);  // prior
+  }
+  for (k in 1:K) {
+    phi[k] ~ dirichlet(beta);     // prior
+  }
+  for (n in 1:N) {
+    array[K] real gamma;
+    for (k in 1:K) {
+      gamma[k] = log(theta[doc[n], k]) + log(phi[k, w[n]]);
+    }
+    target += log_sum_exp(gamma);  // likelihood;
+  }
+}
+

As in the other mixture models, the log-sum-of-exponents function is used to stabilize the numerical arithmetic.

+
+
+

Correlated topic model

+

To account for correlations in the distribution of topics for documents, Blei and Lafferty (2007) introduced a variant of LDA in which the Dirichlet prior on the per-document topic distribution is replaced with a multivariate logistic normal distribution.

+

The authors treat the prior as a fixed hyperparameter. They use an \(L_1\)-regularized estimate of covariance, which is equivalent to the maximum a posteriori estimate given a double-exponential prior. Here the mean and covariance of the multivariate logistic normal are specified as data.

+
+

Fixed hyperparameter correlated topic model

+

The Stan model in the previous section can be modified to implement the correlated topic model by replacing the Dirichlet topic prior alpha in the data declaration with the mean and covariance of the multivariate logistic normal prior.

+
data {
+  // ... data as before without alpha ...
+  vector[K] mu;          // topic mean
+  cov_matrix[K] Sigma;   // topic covariance
+}
+

Rather than drawing the simplex parameter theta from a Dirichlet, a parameter eta is drawn from a multivariate normal distribution and then transformed using softmax into a simplex.

+
parameters {
+  array[K] simplex[V] phi;     // word dist for topic k
+  array[M] vector[K] eta;      // topic dist for doc m
+}
+transformed parameters {
+  array[M] simplex[K] theta;
+  for (m in 1:M) {
+    theta[m] = softmax(eta[m]);
+  }
+}
+model {
+  for (m in 1:M) {
+    eta[m] ~ multi_normal(mu, Sigma);
+  }
+  // ... model as before w/o prior for theta ...
+}
+
+
+

Full Bayes correlated topic model

+

By adding a prior for the mean and covariance, Stan supports full Bayesian inference for the correlated topic model. This requires moving the declarations of topic mean mu and covariance Sigma from the data block to the parameters block and providing them with priors in the model. A relatively efficient and interpretable prior for the covariance matrix Sigma may be encoded as follows.

+
// ... data block as before, but without alpha ...
+parameters {
+  vector[K] mu;              // topic mean
+  corr_matrix[K] Omega;      // correlation matrix
+  vector<lower=0>[K] sigma;  // scales
+  array[M] vector[K] eta;    // logit topic dist for doc m
+  array[K] simplex[V] phi;   // word dist for topic k
+}
+transformed parameters {
+  // ... eta as above ...
+  cov_matrix[K] Sigma;       // covariance matrix
+  for (m in 1:K) {
+    Sigma[m, m] = sigma[m] * sigma[m] * Omega[m, m];
+  }
+  for (m in 1:(K-1)) {
+    for (n in (m+1):K) {
+      Sigma[m, n] = sigma[m] * sigma[n] * Omega[m, n];
+      Sigma[n, m] = Sigma[m, n];
+    }
+  }
+}
+model {
+  mu ~ normal(0, 5);      // vectorized, diffuse
+  Omega ~ lkj_corr(2.0);  // regularize to unit correlation
+  sigma ~ cauchy(0, 5);   // half-Cauchy due to constraint
+  // ... words sampled as above ...
+}
+

The \(\textsf{LKJCorr}\) distribution with shape \(\alpha > 0\) has support on correlation matrices (i.e., symmetric positive definite with unit diagonal). Its density is defined by \[ +\mathsf{LkjCorr}(\Omega\mid\alpha) \propto \mathrm{det}(\Omega)^{\alpha - 1} +\] With a scale of \(\alpha = 2\), the weakly informative prior favors a unit correlation matrix. Thus the compound effect of this prior on the covariance matrix \(\Sigma\) for the multivariate logistic normal is a slight concentration around diagonal covariance matrices with scales determined by the prior on sigma.

+ + + +
+
+
+
+ + + Back to top

References

+
+Blei, David M., and John D. Lafferty. 2007. “A Correlated Topic Model of Science.” The Annals of Applied Statistics 1 (1): 17–37. +
+
+Blei, David M., Andrew Y. Ng, and Michael I. Jordan. 2003. “Latent Dirichlet Allocation.” Journal of Machine Learning Research 3: 993–1022. +
+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge, United Kingdom: Cambridge University Press. +
+

Footnotes

+ +
    +
  1. For clustering, the non-identifiability problems for all mixture models present a problem, whereas there is no such problem for classification. Despite the difficulties with full Bayesian inference for clustering, researchers continue to use it, often in an exploratory data analysis setting rather than for predictive modeling.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/complex-numbers.html b/docs/2_39/stan-users-guide/complex-numbers.html new file mode 100644 index 000000000..09e110b52 --- /dev/null +++ b/docs/2_39/stan-users-guide/complex-numbers.html @@ -0,0 +1,1350 @@ + + + + + + + + + +Complex Numbers + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Complex Numbers

+

Stan supports complex scalars, matrices, and vectors as well as real-based ones.

+
+

Working with complex numbers

+

This section describes the complex scalar type, including how to build complex numbers, assign them, and use them in arrays and functions.

+
+

Constructing and accessing complex numbers

+

Complex numbers can be constructed using imaginary literals. For example,

+
complex z = -1.1 + 2.3i;
+

produces the complex number \(-1.1 + 2.3i\). This only works if the real and imaginary components are literal numerals. To construct a complex number out of arbitrary real variables, the to_complex() function may be used. For example, the following code will work if x and y are parameters, transformed data, or local variables in a function or model block.

+
real x = // ...
+real y = // ...
+complex z = to_complex(x, y);
+

The real and imaginary parts of the complex number can be accessed with getters as follows.

+
real x = get_real(z);  // x = -1.1
+real y = get_imag(z);  // y = 2.3
+

Complex numbers can be compared using equality (or inequality), but not with greater than or less than operators. For example, after running the code above, the following code snippet will print “hello”.

+
complex a = 3.2 + 2i;
+complex b = to_complex(3.2, 2);
+if (a == b) print("hello");
+
+
+

Complex assignment and promotion

+

Integer- or real-valued expressions may be assigned to complex numbers, with the corresponding imaginary component set to zero.

+
complex z1 = 3;  // int promoted to 3 + 0i
+complex z2 = 3.2;  // real promoted to 3.2 + 0.0i
+
+
+

Complex arrays

+

Arrays of complex numbers work as usual and allow the usual curly bracket constructors.

+
complex z1;  complex z2;  complex z3;
+// ...
+array[3] complex zs = { z1, z2, z3 };
+for (z in zs) {
+  print(z);
+}
+

Complex arrays allow assignment into their elements, with integer or real assigned values being promoted to complex.

+
+
+

Complex functions

+

All of the standard complex functions are available, including natural logarithm log(z), natural exponentiation exp(z), and powers pow(z1, z2), as well as all of the trig and hyperbolic trigonometric functions and their inverse, such as sin(z), acos(z), tanh(z) and asinh(z).

+

Promotion also works for complex-valued function arguments, which may be passed integer or real values, which will be promoted before the function is evaluated. For example, the following user-defined complex function will accept integer, real, or complex arguments.

+
complex times_i(complex z) {
+  complex i = to_complex(0, 1);
+  return i * z;
+}
+

For instance, times_i(1) evaluates to the imaginary base \(i\), as does times_i(1.0).

+
+
+
+

Complex random variables

+

The simplest way to model a distribution over a complex random number \(z = x + yi\) is to consider its real part \(x\) and imaginary part \(y\) to have a bivariate normal distribution. For example, a complex prior can be expressed as follows.

+
complex z;
+vector[2] mu;
+cholesky_factor_cov[2] L_Sigma;
+// ...
+[get_real(z), get_imag(z)]' ~ multi_normal_cholesky(mu, L_Sigma);
+

For example, if z is data, this can be used to estimate mu and the covariance Cholesky factor L_Sigma. Alternatively, if z is a parameter, mu and L_Sigma may constants defining a prior or further parameters defining a hierarchical model.

+
+
+

Complex matrices and vectors

+

Stan supports complex matrices, vectors, and row vectors. Variables of these types are declared with sizes in the same way as their real-based counterparts.

+
complex_vector[3] v;
+complex_row_vector[2] rv;
+complex_matrix[3, 2] m;
+

We can construct vectors and matrices using brackets in the same way as for real-valued vectors and matrices. For example, given the declaration of rv above, we could assign it to a constructed row vector.

+
rv =  [2 + 3i, 1.9 - 2.3i];
+

Complex matrices and vectors support all of the standard arithmetic operations including negation, addition, subtraction, and multiplication (division involves a solve, and isn’t a simple arithmetic operation for matrices). They also support transposition.

+

Furthermore, it is possible to convert back and forth between arrays and matrices using the to_array functions.

+
+
+

Complex linear regression

+

Complex valued linear regression with complex predictors and regression coefficients looks just like standard regression. For example, if we take x to be predictors, y to be an array of outcomes. For example, consider the following complete Stan program for an intercept and slope.

+
data {
+  int<lower=0> N;
+  complex_vector[N] x;
+  complex_vector[N] y;
+}
+parameters {
+  complex alpha;
+  complex beta;
+}
+model {
+  complex_vector[N] eps = y - (alpha + beta * x);
+  eps ~  // ...error distribution...
+}
+

The question remains of how to fill in the error distribution and there are several alternatives. We consider only two simple alternatives, and do not consider penalizing the absolute value of the error.

+
+

Independent real and imaginary error

+

The simplest approach to error in complex regression is to give the real and imaginary parts of eps_n independent independent normal distributions, as follows.

+
parameters {
+  // ...
+  vector[2] sigma;
+}
+// ...
+model {
+  // ...
+  get_real(eps) ~ normal(0, sigma[1]);
+  get_imag(eps) ~ normal(0, sigma[2]);
+  sigma ~ //...hyperprior...
+}
+

A new error scale vector sigma is introduced, and it should itself get a prior based on the expected scale of error for the problem.

+
+
+

Dependent complex error

+

The next simplest approach is to treat the real and imaginary parts of the complex number as having a multivariate normal prior. This can be done by adding a parameter for correlation to the above, or just working with a multivariate covariance matrix, as we do here.

+
parameters {
+  cholesky_factor_corr[2] L_Omega;  // correlation matrix
+  vector[2] sigma;                  // real, imag error scales
+  // ...
+}
+// ...
+model {
+  array[N] vector[2] eps_arr;
+  for (n in 1:N) {
+    eps_arr[n] = { to_real(eps[n]), to_imag(eps[n]) };
+  }
+  eps_arr ~ multi_normal_cholesky([0, 0]',
+                                  diag_pre_multiply(sigma, L_Omega));
+  L_Omega ~ lkj_cholesky(4);  // shrink toward diagonal correlation
+  sigma ~ // ... hyperprior ...
+}
+

Here, the real and imaginary components of the error get a joint distribution with correlation and independent scales. The error gets a multivariate normal distribution with zero mean and a Cholesky factor representation of covariance, consisting of a scale vector sigma and a Cholesky factor or a correlation matrix, L_Omega. The prior on the correlations is concentrated loosely around diagonal covariance, and the prior on the scales is left open. In order to vectorize the call to multi_normal_cholesky, the vector of complex numbers needs to be converted to an array of size 2 vectors.

+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/copulas.html b/docs/2_39/stan-users-guide/copulas.html new file mode 100644 index 000000000..b90c1957d --- /dev/null +++ b/docs/2_39/stan-users-guide/copulas.html @@ -0,0 +1,1441 @@ + + + + + + + + + +Copulas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Copulas

+

Copulas provide a flexible way to model multivariate distributions by separating the marginal cumulative distribution functions from the dependence structure. This chapter introduces copulas in Stan, focusing on implementation techniques and practical examples. This chapter was derived from Brynjólfur Gauti Guðrúnar Jónsson’s A gentle introduction: the Gaussian copula.

+
+

What Are Copulas?

+

According to Sklar’s theorem (Sklar 1959), any multivariate distribution can be expressed in terms of its marginals and a copula that captures the dependence structure. Copulas are functions that join univariate marginal cumulative distribution functions to form multivariate distributions.

+

For a multivariate random variable \(\mathbf{X} = [X_1 \cdots X_D]^\top\) with marginal cumulative distribution functions \(F_i\), the joint cumulative distribution function can be written as:

+

\[ +F_{\mathbf{X}}(\mathbf{x}) = C(F_1(x_1), \ldots, F_D(x_D)) = \Pr[X_1 \leq x_1, \ldots, X_D \leq x_D] +\]

+

where \(C\) is the copula function, \(F_{\mathbf{X}}\) is the joint cumulative distribution function, and \(F_i\) are the marginal cumulative distribution functions. The copula function \(C\) must be a joint cumulative distribution function over the unit hypercube \([0, 1]^D\).

+
+
+

General Structure of Copula Models in Stan

+

This section describes the general structure of copula models in Stan. The next sections will provide specific examples of copula implementations, but first, let’s understand the general pattern that separates the marginal distributions from the dependence structure.

+

The log density of a multivariate distribution using a copula can be written as:

+

\[ +\log h(\mathbf{x}) = \log c\left(u_1, \dots, u_D \vert \boldsymbol{\alpha}\right) + \sum_{i=1}^D \log f_i(x_i \vert \boldsymbol{\beta}_i) +\]

+

where:

+
    +
  • \(u_i = F_i(x_i \vert \boldsymbol{\beta}_i)\) are the probability integral transforms of the data
  • +
  • \(\log c\left(u_1, \dots, u_D \vert \boldsymbol{\alpha}\right)\) is the log density of the copula
  • +
  • \(\sum_{i=1}^D \log f_i(x_i \vert \boldsymbol{\beta}_i)\) is the sum of the log densities of the marginals
  • +
  • \(\boldsymbol{\alpha}\) represents the parameters describing the parametric form of the copula
  • +
  • \(\boldsymbol{\beta}_i\) represents the parameters describing the parametric form of the \(i\)-th marginal distribution
  • +
+

The implementation of copulas in Stan has two key requirements:

+
    +
  1. Both the probability density functions and cumulative distribution functions of the marginal distributions must be available
  2. +
  3. A function that computes the log density of the copula for the transformed data must be implemented
  4. +
+

Most copula implementations in Stan follow a three-step process:

+
    +
  1. Accumulate marginal log likelihoods: Calculate and add the log density of each marginal distribution to the target log density
  2. +
  3. Transform to uniform variables: Apply the marginal CDFs to transform the data to uniform variables on the unit interval
  4. +
  5. Calculate copula density: Compute the log density of the copula based on these uniform variables and add it to the target log density
  6. +
+

This process is reflected in the general form of the log density shown above, where the first term represents the copula density and the second term represents the sum of marginal log densities.

+

In a way, we are always modeling with copulas, as the independence assumption can be viewed as a special case using the independence copula, where \(\log c(\mathbf{u}) = 0\), resulting in the familiar sum of marginal log densities. This perspective highlights that traditional independent modeling is just a specific case within the broader copula framework.

+

Most parametric copula families include independence as a special case, either as a subset of their parameter space (e.g., when correlation parameters are zero) or as a limit when parameters approach specific values (e.g., when the dependence parameter approaches zero in Archimedean copulas).

+
+
+

Gaussian Copula Example

+

The Gaussian copula is constructed using the multivariate normal distribution. For a \(D\)-dimensional random vector \(\mathbf{X}\) with marginals \(F_i\), the log Gaussian copula density is given by:

+

\[ +\begin{aligned} +\log c(\mathbf{u}) &= +-\frac{1}{2} \log |\boldsymbol{\Omega}| -\frac{1}{2} \mathbf{z}^\top (\boldsymbol{\Omega}^{-1} - \mathbf{I}) \mathbf{z} \\ +& = +-\frac{1}{2} \log |\boldsymbol{\Omega}| -\frac{1}{2} \mathbf{z}^\top \boldsymbol{\Omega}^{-1} \mathbf{z} + \frac{1}{2} \mathbf{z}^\top \mathbf{z} \\ +&= \log \mathcal{N}(\mathbf{z} \mid \mathbf{0}, \boldsymbol{\Omega}) - \log \mathcal{N}(\mathbf{z} \mid \mathbf{0}, \mathbf{I}) +\end{aligned} +\]

+

where \(\mathbf{z} = [\Phi^{-1}(u_1), \ldots, \Phi^{-1}(u_D)]^\top\) are the inverse normal CDF transforms of the uniform marginals, \(\boldsymbol{\Omega}\) is the correlation matrix, and \(\mathbf{I}\) is the identity matrix. The joint log density is then:

+

\[ +\log h(\mathbf{x}) = \log c(F_1(x_1), \ldots, F_D(x_D)) + \sum_{i=1}^D \log f_i(x_i) +\]

+

Following the three-step process for implementing copulas in Stan:

+
    +
  1. Accumulate marginal log likelihoods: The exponential log densities are added to the target in the line target += exponential_lpdf(y[n] | lambda)
  2. +
  3. Transform to uniform variables: The exponential CDF transforms the data to uniform variables: exponential_cdf(y[n, d] | lambda[d])
  4. +
  5. Calculate copula density: The transformed variables are converted to normal scale using inv_Phi and the multivariate normal log density is computed: z ~ multi_normal_cholesky(zeros, L_Omega)
  6. +
+

The following example demonstrates a Gaussian copula with exponential marginal distributions. Note that while the copula is Gaussian, the marginals are exponential.

+
data {
+  int<lower=0> N;  // number of observations
+  int<lower=0> D;  // number of dimensions
+  vector<lower=0>[D] y[N];  // data
+}
+
+transformed data {
+  vector[D] zeros = rep_vector(0, D);
+}
+
+parameters {
+  // Parameters for exponential marginal distributions
+  vector<lower=0>[D] lambda;  // rate parameters
+  
+  // Correlation matrix for Gaussian copula
+  cholesky_factor_corr[D] L_Omega;
+}
+
+model {
+  // Priors
+  lambda ~ gamma(2, 1);  // prior for rate parameters
+  L_Omega ~ lkj_corr_cholesky(2);
+  
+  // Likelihood using Gaussian copula with exponential marginals
+  for (n in 1:N) {
+    // Add exponential log density to target
+    target += exponential_lpdf(y[n] | lambda);
+    
+    vector[D] z;
+    for (d in 1:D) {
+      // Transform to uniform using exponential CDF
+      real u_d = exponential_cdf(y[n, d] | lambda[d]);
+      
+      // Transform to standard normal
+      z[d] = inv_Phi(u_d);
+    }
+    // Multivariate normal log density with correlation matrix
+    z ~ multi_normal_cholesky(zeros, L_Omega);
+  }
+}
+
+generated quantities {
+  // Optional: Recover correlation matrix from Cholesky factor
+  matrix[D, D] Omega = multiply_lower_tri_self_transpose(L_Omega);
+}
+
+
+

Advantages of Copulas

+

Copulas offer several advantages in statistical modeling:

+
    +
  1. Flexibility: They allow combining any marginal distributions with various dependence structures. For example:

    +
      +
    • Modeling financial returns with heavy-tailed marginals and complex dependence structures
    • +
    • Combining different types of distributions (e.g., normal and gamma) in a single model
    • +
    • Capturing asymmetric dependencies between variables, such as in financial markets where joint negative returns are more common than joint positive returns due to macro-events affecting multiple stocks simultaneously, while positive returns tend to be more idiosyncratic
    • +
    • Modeling different types of tail dependence in different parts of the distribution
    • +
  2. +
  3. Factorability: The marginal distributions and dependence structure can be modeled separately, allowing for different prior knowledge about each component. This is similar to the common practice of factoring scale and correlation in multivariate normal priors.

    +

    For example, when modeling the survival times of two components in a system, we can separately specify exponential or gamma marginal distributions based on historical failure data for each component, and a Gaussian copula (or asymmetrical Archimedean copula) capturing how the failure of one component affects the other, making it easier to incorporate prior knowledge about each aspect independently.

  4. +
  5. Tail dependence: Different copulas can capture different types of tail dependence, which is crucial in applications like risk management and extreme value analysis where joint extreme scenarios need to be quantified.

  6. +
  7. Universal Framework: In a way, we are always modeling with copulas, as the independence assumption can be viewed as a special case using the independence copula. This perspective highlights that traditional independent modeling is just a specific case within the broader copula framework.

  8. +
+
+
+

Common Pitfalls and Considerations

+

When implementing copulas in Stan, several considerations should be kept in mind:

+
    +
  1. Computational efficiency: The probability integral transform and inverse transform steps can be computationally intensive, especially for complex marginal distributions.

  2. +
  3. Parameter identifiability: Care must be taken to ensure that the parameters of the marginal distributions and the copula are identifiable.

  4. +
  5. Model selection: The choice of copula family should be guided by the specific dependence structure of the data. For example:

    +
      +
    • The Gaussian copula may underestimate the probability of joint extreme events in financial data
    • +
    • The Student-t copula, while offering tail dependence, maintains symmetric tail behavior that may not match all applications
    • +
    • Archimedean copulas can model asymmetric tail dependence but may be less flexible and harder to estimate in high dimensions
    • +
  6. +
  7. Numerical stability: The transformations between different scales (original, uniform, and normal/Student-t/calculations using Archimedian copulas) require careful implementation to maintain numerical stability.

  8. +
  9. Symmetry considerations: Many copula families exhibit strong symmetries that may not match the data:

    +
      +
    • Radial symmetry: Some copulas (like Gaussian and Student-t) treat positive and negative extremes equally, which may not match financial data where joint negative returns are more common than joint positive returns
    • +
    • Exchangeability: Some copulas are invariant under permutations of their arguments, which can lead to unintuitive results when combined with inhomogeneous marginals. For example, when modeling time-to-event scenarios with different marginal distributions (e.g., exponential distributions with different parameters), perfect dependence in the copula does not imply simultaneous events. Instead, one event triggers the other at a later time corresponding to the same quantile, which can lead to incorrect modeling of joint events.
    • +
  10. +
  11. Tail dependence: Understanding and choosing appropriate tail dependence is crucial:

    +
      +
    • The upper (lower) tail dependence coefficient \(\lambda_U (\lambda_L)\) is the probability that one variable is extremely large (small) given that another is extremely large (small).
    • +
    • Different copula families exhibit different tail dependence properties: +
        +
      • Some copulas (like Gaussian) have zero tail dependence
      • +
      • Others can model symmetric tail dependence (\(\lambda_U = \lambda_L\))
      • +
      • Some can capture asymmetric tail dependence (\(\lambda_U \neq \lambda_L\))
      • +
      • Certain copulas allow for tail dependence even with zero correlation
      • +
    • +
    • The choice of copula should be guided by the expected tail behavior in the application: +
        +
      • Financial data often requires modeling joint lower extreme events
      • +
      • Risk management applications may need asymmetric tail dependence
      • +
      • Some applications may require different tail behavior in different parts of the distribution
      • +
    • +
  12. +
  13. High-dimensional modeling: As dimensionality increases:

    +
      +
    • The number of dependence parameters grows
    • +
    • Some copula families become less flexible
    • +
    • Vine copulas or factor copulas may be more appropriate
    • +
  14. +
+
+
+

Common Copula Families

+

Several copula families are available for modeling different dependence structures in the correlation component:

+
    +
  • Gaussian copula: Based on the multivariate normal distribution, offering symmetric dependence
  • +
  • Student-t copula: Based on the multivariate Student-t distribution, providing more flexibility in tail dependence than the Gaussian copula
  • +
  • Archimedean copulas: A class of copulas defined through generator functions, including: +
      +
    • Clayton copula: Stronger lower tail dependence
    • +
    • Gumbel copula: Stronger upper tail dependence
    • +
    • Frank copula: Symmetric dependence
    • +
  • +
  • Vine copulas: A flexible approach for modeling high-dimensional dependencies by decomposing the joint distribution into a series of bivariate copulas
  • +
+
+
+

Further reading/viewing

+ + + + +
+
+ + Back to top

References

+
+Sklar, Abe. 1959. “Abe Sklar’s "FONCTIONS DE REPARTITION a n DIMENSIONS ET LEURS MARGES": The Original Document and an English Translation.” Translated by Ben Van Vliet. The Original Document and an English Translation (March 3, 2023). https://dx.doi.org/10.2139/ssrn.4198458. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/cross-validation.html b/docs/2_39/stan-users-guide/cross-validation.html new file mode 100644 index 000000000..c2c63070e --- /dev/null +++ b/docs/2_39/stan-users-guide/cross-validation.html @@ -0,0 +1,1520 @@ + + + + + + + + + +Held-Out Evaluation and Cross-Validation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Held-Out Evaluation and Cross-Validation

+

Held-out evaluation involves splitting a data set into two parts, a training data set and a test data set. The training data is used to estimate the model and the test data is used for evaluation. Held-out evaluation is commonly used to declare winners in predictive modeling competitions such as those run by Kaggle.

+

Cross-validation involves repeated held-out evaluations performed by partitioning a single data set in different ways. The training/test split can be done either by randomly selecting the test set, or by partitioning the data set into several equally-sized subsets and then using each subset in turn as the test data with the other folds as training data.

+

Held-out evaluation and cross-validation may involve any kind of predictive statistics, with common choices being the predictive log density on test data, squared error of parameter estimates, or accuracy in a classification task.

+
+

Evaluating posterior predictive densities

+

Given training data \((x, y)\) consisting of parallel sequences of predictors and observations and test data \((\tilde{x}, \tilde{y})\) of the same structure, the posterior predictive density is \[ +p(\tilde{y} \mid \tilde{x}, x, y) += +\int + p(\tilde{y} \mid \tilde{x}, \theta) + \cdot p(\theta \mid x, y) +\, \textrm{d}\theta, +\]

+

where \(\theta\) is the vector of model parameters. This predictive density is the density of the test observations, conditioned on both the test predictors \(\tilde{x}\) and the training data \((x, y).\)

+

This integral may be calculated with Monte Carlo methods as usual, \[ +p(\tilde{y} \mid \tilde{x}, x, y) +\approx +\frac{1}{M} \sum_{m = 1}^M p(\tilde{y} \mid \tilde{x}, \theta^{(m)}), +\] where the \(\theta^{(m)} \sim p(\theta \mid x, y)\) are draws from the posterior given only the training data \((x, y).\)

+

To avoid underflow in calculations, it will be more stable to compute densities on the log scale. Taking the logarithm and pushing it through results in a stable computation, \[\begin{eqnarray*} +\log p(\tilde{y} \mid \tilde{x}, x, y) +& \approx & +\log \frac{1}{M} \sum_{m = 1}^M p(\tilde{y} \mid \tilde{x}, \theta^{(m)}), +\\[4pt] +& = & -\log M + \log \sum_{m = 1}^M p(\tilde{y} \mid \tilde{x}, \theta^{(m)}), +\\[4pt] +& = & -\log M + \log \sum_{m = 1}^M \exp(\log p(\tilde{y} \mid \tilde{x}, \theta^{(m)})) +\\[4pt] +& = & -\log M + \textrm{log-sum-exp}_{m = 1}^M \log p(\tilde{y} \mid \tilde{x}, \theta^{(m)}) +\end{eqnarray*}\] where the log sum of exponentials function is defined so as to make the above equation hold, \[ +\textrm{log-sum-exp}_{m = 1}^M \, \mu_m += \log \sum_{m=1}^M \exp(\mu_m). +\] The log sum of exponentials function can be implemented so as to avoid underflow and maintain high arithmetic precision as \[ +\textrm{log-sum-exp}_{m = 1}^M \mu_m += \textrm{max}(\mu) ++ \log \sum_{m = 1}^M \exp(\mu_m - \textrm{max}(\mu)). +\] Pulling the maximum out preserves all of its precision. By subtracting the maximum, the terms \(\mu_m - \textrm{max}(\mu) \leq 0\), and thus will not overflow.

+
+

Stan program

+

To evaluate the log predictive density of a model, it suffices to implement the log predictive density of the test data in the generated quantities block. The log sum of exponentials calculation must be done on the outside of Stan using the posterior draws of \(\log p(\tilde{y} \mid \tilde{x}, +\theta^{(m)}).\)

+

Here is the code for evaluating the log posterior predictive density in a simple linear regression of the test data \(\tilde{y}\) given predictors \(\tilde{x}\) and training data \((x, y).\)

+
data {
+  int<lower=0> N;
+  vector[N] y;
+  vector[N] x;
+  int<lower=0> N_tilde;
+  vector[N_tilde] x_tilde;
+  vector[N_tilde] y_tilde;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(alpha + beta * x, sigma);
+}
+generated quantities {
+  real log_p = normal_lpdf(y_tilde | alpha + beta * x_tilde, sigma);
+}
+

Only the training data x and y are used in the model block. The test data y_tilde and test predictors x_tilde appear in only the generated quantities block. Thus the program is not cheating by using the test data during training. Although this model does not do so, it would be fair to use x_tilde in the model block—only the test observations y_tilde are unknown before they are predicted.

+

Given \(M\) posterior draws from Stan, the sequence log_p[1:M] will be available, so that the log posterior predictive density of the test data given training data and predictors is just log_sum_exp(log_p) - log(M).

+
+
+
+

Estimation error

+
+

Parameter estimates

+

Estimation is usually considered for unknown parameters. If the data from which the parameters were estimated came from simulated data, the true value of the parameters may be known. If \(\theta\) is the true value and \(\hat{\theta}\) the estimate, then error is just the difference between the prediction and the true value, \[ +\textrm{err} = \hat{\theta} - \theta. +\]

+

If the estimate is larger than the true value, the error is positive, and if it’s smaller, then error is negative. If an estimator’s unbiased, then expected error is zero. So typically, absolute error or squared error are used, which will always have positive expectations for an imperfect estimator. Absolute error is defined as \[ +\textrm{abs-err} = \left| \hat{\theta} - \theta \right| +\] and squared error as \[ +\textrm{sq-err} = \left( \hat{\theta} - \theta \right)^2. +\] Gneiting and Raftery (2007) provide a thorough overview of such scoring rules and their properties.

+

Bayesian posterior means minimize expected square error, whereas posterior medians minimize expected absolute error. Estimates based on modes rather than probability, such as (penalized) maximum likelihood estimates or maximum a posterior estimates, do not have these properties.

+
+
+

Predictive estimates

+

In addition to parameters, other unknown quantities may be estimated, such as the score of a football match or the effect of a medical treatment given to a subject. In these cases, square error is defined in the same way. If there are multiple exchangeable outcomes being estimated, \(z_1, \ldots, z_N,\) then it is common to report mean square error (MSE), \[ +\textrm{mse} += \frac{1}{N} \sum_{n = 1}^N \left( \hat{z}_n - z_n\right)^2. +\] To put the error back on the scale of the original value, the square root may be applied, resulting in what is known prosaically as root mean square error (RMSE), \[ +\textrm{rmse} = \sqrt{\textrm{mean-sq-err}}. +\]

+
+
+

Predictive estimates in Stan

+

Consider a simple linear regression model, parameters for the intercept \(\alpha\) and slope \(\beta\), along with predictors \(\tilde{x}_n\). The standard Bayesian estimate is the expected value of \(\tilde{y}\) given the predictors and training data, \[\begin{eqnarray*} +\hat{\tilde{y}}_n +& = & \mathbb{E}[\tilde{y}_n \mid \tilde{x}_n, x, y] +\\[4pt] +& \approx & \frac{1}{M} \sum_{m = 1}^M \tilde{y}_n^{(m)} +\end{eqnarray*}\] where \(\tilde{y}_n^{(m)}\) is drawn from the data model \[ +\tilde{y}_n^{(m)} +\sim p(\tilde{y}_n \mid \tilde{x}_n, \alpha^{(m)}, \beta^{(m)}), +\] for parameters \(\alpha^{(m)}\) and \(\beta^{(m)}\) drawn from the posterior, \[ +(\alpha^{(m)}, \beta^{(m)}) \sim p(\alpha, \beta \mid x, y). +\]

+

In the linear regression case, two stages of simplification can be carried out, the first of which helpfully reduces the variance of the estimator. First, rather than averaging draws \(\tilde{y}_n^{(m)}\), the same result is obtained by averaging linear predictions, \[\begin{eqnarray*} +\hat{\tilde{y}}_n +& = & \mathbb{E}\left[ + \alpha + \beta \cdot \tilde{x}_n + \mid \tilde{x}_n, x, y + \right] +\\[4pt] +& \approx & +\frac{1}{M} \sum_{m = 1}^M + \alpha^{(m)} + \beta^{(m)} \cdot \tilde{x}_n. +\end{eqnarray*}\] This is possible because \[ +\tilde{y}_n^{(m)} \sim \textrm{normal}(\tilde{y}_n \mid \alpha^{(m)} + +\beta^{(m)} \cdot \tilde{x}_n, \sigma^{(m)}), +\] and the normal distribution has symmetric error so that the expectation of \(\tilde{y}_n^{(m)}\) is the same as \(\alpha^{(m)} + \beta^{(m)} \cdot +\tilde{x}_n\). Replacing the sampled quantity \(\tilde{y}_n^{(m)}\) with its expectation is a general variance reduction technique for Monte Carlo estimates known as Rao-Blackwellization (Rao 1945; Blackwell 1947).

+

In the linear case, because the predictor is linear in the coefficients, the estimate can be further simplified to use the estimated coefficients, \[\begin{eqnarray*} +\tilde{y}_n^{(m)} +& \approx & +\frac{1}{M} \sum_{m = 1}^M + \left( \alpha^{(m)} + \beta^{(m)} \cdot \tilde{x}_n \right) +\\[4pt] +& = & \frac{1}{M} \sum_{m = 1}^M \alpha^{(m)} + + \frac{1}{M} \sum_{m = 1}^M (\beta^{(m)} \cdot \tilde{x}_n) +\\[4pt] +& = & \frac{1}{M} \sum_{m = 1}^M \alpha^{(m)} + + \left( \frac{1}{M} \sum_{m = 1}^M \beta^{(m)}\right) \cdot \tilde{x}_n +\\[4pt] +& = & \hat{\alpha} + \hat{\beta} \cdot \tilde{x}_n. +\end{eqnarray*}\]

+

In Stan, only the first of the two steps (the important variance reduction step) can be coded in the object model. The linear predictor is defined in the generated quantities block.

+
data {
+  int<lower=0> N_tilde;
+  vector[N_tilde] x_tilde;
+  // ...
+}
+// ...
+generated quantities {
+  vector[N_tilde] tilde_y = alpha + beta * x_tilde;
+}
+

The posterior mean of tilde_y calculated by Stan is the Bayesian estimate \(\hat{\tilde{y}}.\) The posterior median may also be calculated and used as an estimate, though square error and the posterior mean are more commonly reported.

+
+
+
+

Cross-validation

+

Cross-validation involves choosing multiple subsets of a data set as the test set and using the other data as training. This can be done by partitioning the data and using each subset in turn as the test set with the remaining subsets as training data. A partition into ten subsets is common to reduce computational overhead. In the limit, when the test set is just a single item, the result is known as leave-one-out (LOO) cross-validation (Vehtari, Gelman, and Gabry 2017).

+

Partitioning the data and reusing the partitions is very fiddly in the indexes and may not lead to even divisions of the data. It’s far easier to use random partitions, which support arbitrarily sized test/training splits and can be easily implemented in Stan. The drawback is that the variance of the resulting estimate is higher than with a balanced block partition.

+
+

Stan implementation with random folds

+

For the simple linear regression model, randomized cross-validation can be implemented in a single model. To randomly permute a vector in Stan, the simplest approach is the following.

+
functions {
+  array[] int permutation_rng(int N) {
+    array[N] int y;
+    for (n in 1 : N) {
+      y[n] = n;
+    }
+    vector[N] theta = rep_vector(1.0 / N, N);
+    for (n in 1 : size(y)) {
+      int i = categorical_rng(theta);
+      int temp = y[n];
+      y[n] = y[i];
+      y[i] = temp;
+    }
+    return y;
+  }
+}
+

The name of the function must end in _rng because it uses other random functions internally. This will restrict its usage to the transformed data and generated quantities block. The code walks through an array of integers exchanging each item with another randomly chosen item, resulting in a uniformly drawn permutation of the integers 1:N.1

+

The transformed data block uses the permutation RNG to generate training data and test data by taking prefixes and suffixes of the permuted data.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  vector[N] y;
+  int<lower=0, upper=N> N_test;
+}
+transformed data {
+  int N_train = N - N_test;
+  array[N] int permutation = permutation_rng(N);
+  vector[N_train] x_train = x[permutation[1 : N_train]];
+  vector[N_train] y_train = y[permutation[1 : N_train]];
+  vector[N_test] x_test = x[permutation[N_train + 1 : N]];
+  vector[N_test] y_test = y[permutation[N_train + 1 : N]];
+}
+

Recall that in Stan, permutation[1:N_train] is an array of integers, so that x[permutation[1 : N_train]] is a vector defined for i in 1:N_train by

+
x[permutation[1 : N_train]][i] = x[permutation[1:N_train][i]]
+                               = x[permutation[i]]
+

Given the test/train split, the rest of the model is straightforward.

+
parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  y_train ~ normal(alpha + beta * x_train, sigma);
+  { alpha, beta, sigma } ~ normal(0, 1);
+}
+generated quantities {
+  vector[N] y_test_hat = normal_rng(alpha + beta * x_test, sigma);
+  vector[N] err = y_test_sim - y_hat;
+}
+

The prediction y_test_hat is defined in the generated quantities block using the general form involving all uncertainty. The posterior of this quantity corresponds to using a posterior mean estimator, \[\begin{eqnarray*} +\hat{y}^{\textrm{test}} +& = & \mathbb{E}\left[ y^{\textrm{test}} \mid x^{\textrm{test}}, x^{\textrm{train}} y^{\textrm{train}} \right] +\\[4pt] +& \approx & \frac{1}{M} \sum_{m = 1}^M \hat{y}^{\textrm{test}(m)}. +\end{eqnarray*}\]

+

Because the test set is constant and the expectation operator is linear, the posterior mean of err as defined in the Stan program will be the error of the posterior mean estimate, \[\begin{eqnarray*} + \hat{y}^{\textrm{test}} - y^{\textrm{test}} +& = & +\mathbb{E}\left[ + \hat{y}^{\textrm{test}} + \mid x^{\textrm{test}}, x^{\textrm{train}}, y^{\textrm{train}} +\right] + - y^{\textrm{test}} +\\[4pt] +& = & +\mathbb{E}\left[ + \hat{y}^{\textrm{test}} - y^{\textrm{test}} + \mid x^{\textrm{test}}, x^{\textrm{train}}, y^{\textrm{train}} +\right] +\\[4pt] +& \approx & +\frac{1}{M} \sum_{m = 1}^M \hat{y}^{\textrm{test}(m)} - y^{\textrm{test}}, +\end{eqnarray*}\] where \[ +\hat{y}^{\textrm{test}(m)} +\sim p(y \mid x^{\textrm{test}}, x^{\textrm{train}}, +y^{\textrm{train}}). +\] This just calculates error; taking absolute value or squaring will compute absolute error and mean square error. Note that the absolute value and square operation should not be done within the Stan program because neither is a linear function and the result of averaging squares is not the same as squaring an average in general.

+

Because the test set size is chosen for convenience in cross-validation, results should be presented on a per-item scale, such as average absolute error or root mean square error, not on the scale of error in the fold being evaluated.

+
+
+

User-defined permutations

+

It is straightforward to declare the variable permutation in the data block instead of the transformed data block and read it in as data. This allows an external program to control the blocking, allowing non-random partitions to be evaluated.

+
+
+

Cross-validation with structured data

+

Cross-validation must be done with care if the data is inherently structured. For example, in a simple natural language application, data might be structured by document. For cross-validation, one needs to cross-validate at the document level, not at the individual word level. This is related to mixed replication in posterior predictive checking, where there is a choice to simulate new elements of existing groups or generate entirely new groups.

+

Education testing applications are typically grouped by school district, by school, by classroom, and by demographic features of the individual students or the school as a whole. Depending on the variables of interest, different structured subsets should be evaluated. For example, the focus of interest may be on the performance of entire classrooms, so it would make sense to cross-validate at the class or school level on classroom performance.

+
+
+

Cross-validation with spatio-temporal data

+

Often data measurements have spatial or temporal properties. For example, home energy consumption varies by time of day, day of week, on holidays, by season, and by ambient temperature (e.g., a hot spell or a cold snap). Cross-validation must be tailored to the predictive goal. For example, in predicting energy consumption, the quantity of interest may be the prediction for next week’s energy consumption given historical data and current weather covariates. This suggests an alternative to cross-validation, wherein individual weeks are each tested given previous data. This often allows comparing how well prediction performs with more or less historical data.

+
+
+

Approximate cross-validation

+

Vehtari, Gelman, and Gabry (2017) introduce a method that approximates the evaluation of leave-one-out cross validation inexpensively using only the data point log likelihoods from a single model fit. This method is documented and implemented in the R package loo (Gabry et al. 2019).

+ + + +
+
+
+ + + Back to top

References

+
+Blackwell, David. 1947. “Conditional Expectation and Unbiased Sequential Estimation.” The Annals of Mathematical Statistics 18 (1): 105–10. https://doi.org/10.1214/aoms/1177730497. +
+
+Gabry, Jonah, Daniel Simpson, Aki Vehtari, Michael Betancourt, and Andrew Gelman. 2019. “Visualization in Bayesian Workflow.” Journal of the Royal Statistical Society: Series A (Statistics in Society) 182 (2): 389–402. +
+
+Gneiting, Tilmann, and Adrian E Raftery. 2007. “Strictly Proper Scoring Rules, Prediction, and Estimation.” Journal of the American Statistical Association 102 (477): 359–78. +
+
+Rao, C. Radhakrishna. 1945. “Information and Accuracy Attainable in the Estimation of Statistical Parameters.” Bulletin of the Calcutta Math Society 37 (3): 81–91. +
+
+Vehtari, Aki, Andrew Gelman, and Jonah Gabry. 2017. “Practical Bayesian Model Evaluation Using Leave-One-Out Cross-Validation and WAIC.” Statistics and Computing 27 (5): 1413–32. +
+

Footnotes

+ +
    +
  1. The traditional approach is to walk through a vector and replace each item with a random element from the remaining elements, which is guaranteed to only move each item once. This was not done here as it’d require new categorical theta because Stan does not have a uniform discrete RNG built in.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/custom-probability.html b/docs/2_39/stan-users-guide/custom-probability.html new file mode 100644 index 000000000..dcd0d5da8 --- /dev/null +++ b/docs/2_39/stan-users-guide/custom-probability.html @@ -0,0 +1,1326 @@ + + + + + + + + + +Custom Probability Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Custom Probability Functions

+

Custom distributions may also be implemented directly within Stan’s programming language. The only thing that is needed is to increment the total log probability. The rest of the chapter provides examples.

+
+

Examples

+
+

Triangle distribution

+

A simple example is the symmetric triangle distribution, whose density is shaped like an isosceles triangle with corners at specified bounds and height determined by the constraint that a density integrate to 1. If \(\alpha \in \mathbb{R}\) and \(\beta \in \mathbb{R}\) are the bounds, with \(\alpha < \beta\), then \(y \in (\alpha,\beta)\) has a density defined as \[ +\textsf{triangle}(y \mid \alpha,\beta) += +\frac{1}{(\beta - \alpha)^2} +\cdot +\textrm{min}(y - \alpha, \beta - y). +\]

+

The general form of triangle can be coded in Stan as follows.

+
data {
+  real alpha;
+  real<lower=alpha> beta;
+}
+parameters {
+  real<lower=alpha, upper=beta> y;
+}
+model {
+  target += -2 * log(beta - alpha)
+    + log(fmin(y - alpha, beta - y));
+}
+

Because the bounds are specified as data here, the term -2 * log(beta - alpha) could be dropped from the log density. If either of the bounds depends on a parameter, then this term must be included.

+

If \(\alpha = -1\), \(\beta = 1\), and \(y \in (-1,1)\), then fmin(y - alpha, beta - y) is fmin(y + 1, -1 - y), which is fmin(y + 1, -(y + 1)), which reduces to 1 - abs(y). Therfore, the density, dropping constants, reduces to \[ +\textsf{triangle}(y \mid -1, 1) \propto 1 - |y|. +\] Consider the following Stan implementation of \(\textsf{triangle}(-1,1)\) for sampling.

+
parameters {
+  real<lower=-1, upper=1> y;
+}
+model {
+  target += log1m(abs(y));
+}
+

The single scalar parameter y is declared as lying in the interval (-1, 1). The total log probability is incremented with the joint log probability of all parameters, i.e., \(\log \mathsf{Triangle}(y \mid -1, 1)\). This value is coded in Stan as log1m(abs(y)). The function log1m is defined so that log1m(x) has the same value as \(\log(1 - x)\), but the computation is faster, more accurate, and more stable.

+

The constrained type real<lower=-1, upper=1> declared for y is critical for correct sampling behavior. If the constraint on y is removed from the program, say by declaring y as having the unconstrained scalar type real, the program would compile, but it would produce arithmetic exceptions at run time when the sampler explored values of y outside of \((-1,1)\).

+

Now suppose the log probability function were extended to all of \(\mathbb{R}\) as follows by defining the probability to be log(0.0), i.e., \(-\infty\), for values outside of \((-1, 1)\).

+
target += log(fmax(0.0,1 - abs(y)));
+

With the constraint on y in place, this is just a less efficient, slower, and less arithmetically stable version of the original program. But if the constraint on y is removed, the model will compile and run without arithmetic errors, but will not sample properly.1

+
+
+

Exponential distribution

+

If Stan didn’t happen to include the exponential distribution, it could be coded directly using the following assignment statement, where lambda is the inverse scale and y the sampled variate.

+
target += log(lambda) - y * lambda;
+

This encoding will work for any lambda and y; they can be parameters, data, or one of each, or even local variables.

+

The assignment statement in the previous paragraph generates C++ code that is similar to that generated by the following distribution statement.

+
y ~ exponential(lambda);
+

There are two notable differences. First, the distribution statement will check the inputs to make sure both lambda is positive and y is non-negative (which includes checking that neither is the special not-a-number value).

+

The second difference is that if lambda is not a parameter, transformed parameter, or local model variable, the distribution statement is clever enough to drop the log(lambda) term. This results in the same posterior because Stan only needs the log probability up to an additive constant. If lambda and y are both constants, the distribution statement will drop both terms (but still check for out-of-domain errors on the inputs).

+
+
+

Bivariate normal cumulative distribution function

+

For another example of user-defined functions, consider the following definition of the bivariate normal cumulative distribution function (CDF) with location zero, unit variance, and correlation rho. That is, it computes \[ +\texttt{binormal}\mathtt{\_}\texttt{cdf}(z_1, z_2, \rho) = \Pr[Z_1 \leq z_1 \text{ and } Z_2 \leq z_2] +\] where the random 2-vector \(Z\) has the distribution \[ +Z \sim \textsf{multivariate normal}\left( +\begin{bmatrix} +0 \\ +0 +\end{bmatrix}, \ +\begin{bmatrix} +1 & \rho +\\ +\rho & 1 +\end{bmatrix} +\right). +\]

+

The following Stan program implements this function,

+
real binormal_cdf(tuple(real, real) z, real rho) {
+  real z1 = z.1;
+  real z2 = z.2;
+  if (z1 == 0 && z2 == 0) {
+    return 0.25 + asin(rho) / (2 * pi());
+  }
+  real denom = sqrt((1 + rho) * (1 - rho));
+  real term1 = z1 == 0
+    ? (z2 > 0 ? 0.25 : -0.25)
+    :  owens_t(z1, (z2 / z1 - rho) / denom);
+  real term2 = z2 == 0
+    ? (z1 > 0 ? 0.25 : -0.25)
+    : owens_t(z2, (z1 / z2 - rho) / denom);
+  real z1z2 = z1 * z2;
+  real delta = z1z2 < 0 || (z1z2 == 0 && (z1 + z2) < 0);
+  return 0.5 * (Phi(z1) + Phi(z2) - delta) - term1 - term2;
+}
+

It is written using a tuple argument so that it may be called as binormal_cdf((z1, z2) | rho). The two ternary operators defining term1 and term2 are derived by taking the limit of the owens_t function when the second argument goes to infinity.

+

One way to test a user-defined function is to have it operate on transformed data. That way, when it’s run, the output of the functions is printed before sampling begins.

+
transformed data {
+  for (zzr in {[0, 0, 0.5],
+               [0, 1, 0.5],
+               [1, 0, -0.2],
+               [1, 3, 0.9]}) {
+    real z1  = zzr[1];
+    real z2 = zzr[2];
+    real rho = zzr[3];
+    print("binomial_cdf((",  z1, ", ", z2, ") | ", rho, ")",
+          "=", binormal_cdf((z1, z2) | rho));
+  }
+}
+

In this case, we verified that the results match those of the pbivnorm package in R.

+ + +
+
+
+ + + Back to top

Footnotes

+ +
    +
  1. The problem is the (extremely!) light tails of the triangle distribution. The standard HMC and NUTS samplers can’t get into the corners of the triangle properly. Because the Stan code declares y to be of type real<lower=-1, upper=1>, the inverse logit transform is applied to the unconstrained variable and its log absolute derivative added to the log probability. The resulting distribution on the logit-transformed y is well behaved.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/dae.html b/docs/2_39/stan-users-guide/dae.html new file mode 100644 index 000000000..6c2b59173 --- /dev/null +++ b/docs/2_39/stan-users-guide/dae.html @@ -0,0 +1,1358 @@ + + + + + + + + + +Differential-Algebraic Equations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Differential-Algebraic Equations

+

Stan support solving systems of differential-algebraic equations (DAEs) of index 1 (Serban et al. 2021). The solver adaptively refines the solutions in order to satisfy given tolerances.

+

One can think a differential-algebraic system of equations as ODEs with additional algebraic constraints applied to some of the variables. In such a system, the variable derivatives may not be expressed explicitly with a right-hand-side as in ODEs, but implicitly constrained.

+

Similar to ODE solvers, the DAE solvers must not only provide the solution to the DAE itself, but also the gradient of the DAE solution with respect to parameters (the sensitivities). Stan’s DAE solver uses the forward sensitivity calculation to expand the base DAE system with additional DAE equations for the gradients of the solution. For each parameter, an additional full set of \(N\) sensitivity states are added meaning that the full DAE solved has \(N \, + N \cdot M\) states.

+

Two interfaces are provided for the forward sensitivity solver: one with default tolerances and default max number of steps, and one that allows these controls to be modified. Choosing tolerances is important for making any of the solvers work well – the defaults will not work everywhere. The tolerances should be chosen primarily with consideration to the scales of the solutions, the accuracy needed for the solutions, and how the solutions are used in the model. The same principles in the control parameters section apply here.

+

Internally Stan’s DAE solver uses a variable-step, variable-order, backward-differentiation formula implementation (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005).

+
+

Notation

+

A DAE is defined by a set of expressions for the residuals of differential equations and algebraic equations \(r(y', y, t, \theta)\), and consistent initial conditions \(y(t_0, \theta) = y_0, y'(t_0, \theta)=y'_0\). The DAE is define by residual function as \(r(y', y, t, \theta)=0\). The \(\theta\) dependence is included in the notation to highlight that the solution \(y(t)\) is a function of any parameters used in the computation.

+
+
+

Example: chemical kinetics

+

As an example of a system of DAEs, consider following chemical kinetics problem(Robertson 1966). The nondimensionalized DAE consists of two differential equations and one algebraic constraint. The differential equations describe the reactions from reactants \(y_1\) and \(y_2\) to the product \(y_3\), and the algebraic equation describes the mass conservation. (Serban and Hindmarsh 2021).

+

\[\begin{align*} +\frac{dy_1}{dt} + \alpha y_1 - \beta y_2 y_3 = 0 \\ +\frac{dy_2}{dt} - \alpha y_1 + \beta y_2 y_3 + \gamma y_2^2 = 0 \\ +y_1 + y_2 + y_3 - 1.0 = 0 +\end{align*}\]

+

The state equations implicitly defines the state \((y_1(t), y_2(t), y_3(t))\) at future times as a function of an initial state and the system parameters, in this example the reaction rate coefficients \((\alpha, \beta, \gamma)\).

+

Unlike solving ODEs, solving DAEs requires a consistent initial condition. That is, one must specify both \(y(t_0)\) and \(y'(t_0)\) so that residual function becomes zero at initial time \(t_0\) \[\begin{equation*} +r(y'(t_0), y(t_0), t_0) = 0 +\end{equation*}\]

+
+
+

Index of DAEs

+

The index along a DAE solution \(y(t)\) is the minimum number of differentiations of some of the components of the system required to solve for \(y'\) uniquely in terms of \(y\) and \(t\), so that the DAE is converted into an ODE for \(y\). Thus an ODE system is of index 0. The above chemical kinetics DAE is of index 1, as we can perform differentiation of the third equation followed by introducing the first two equations in order to obtain the ODE for \(y_3\).

+

Most DAE solvers, including the one in Stan, support only index-1 DAEs. For a high index DAE problem the user must first convert it to a lower index system. This often can be done by carrying out differentiations analytically (Ascher and Petzold 1998).

+
+
+

Coding the DAE system function

+

The first step in coding an DAE system in Stan is defining the DAE residual function. The system functions require a specific signature so that the solvers know how to use them properly.

+

The first argument to the residual function is time, passed as a real; the second argument to the residual function is the system state \(y\), passed as a vector, the third argument to the residual function is the state derivative \(y'\), also passed as a vector. The residual function’s return value is a vector of the same size as state and state derivatives. Additional arguments can be included in the residual function to pass other information into the solve (these will be passed through the function that starts the DAE solution). These argument can be parameters (in our example, the reaction rate coefficient \(\alpha\), \(\beta\), and \(\gamma\)), data, or any quantities that are needed to define the DAE.

+

The above reaction be coded using the following function in Stan (see the user-defined functions chapter for more information on coding user-defined functions).

+
vector chem(real t, vector yy, vector yp,
+            real alpha, real beta, real gamma) {
+  vector[3] res;
+  res[1] = yp[1] + alpha * yy[1] - beta * yy[2] * yy[3];
+  res[2] = yp[2] - alpha * yy[1] + beta * yy[2] * yy[3] + gamma * yy[2] * yy[2];
+  res[3] = yy[1] + yy[2] + yy[3] - 1.0;
+  return res;
+}
+

The function takes in a time t (a real), the system state yy (a vector), state derivative yp (a vector), as well as parameter alpha (a real), beta (a real), and gamma (a real). The function returns a vector of the residuals at time t. The DAE coded here does not explicitly depend on t, however one still needs to specify t as an argument.

+
+

Strict signature

+

The types in the DAE residual function are strict. The first argument is the time passed as a real, the second argument is the state passed as a vector, the third argument is the state derivative passed as a vector, and the return type is a vector. A model that does not have this signature will fail to compile. The fourth argument onwards can be any type, granted all the argument types match the types of the respective arguments in the solver call.

+

All of these are possible DAE signatures:

+
vector my_dae1(real t, vector y, vector yp, real a0);
+vector my_dae2(real t, vector y, vector yp, array[] int a0, vector a1);
+vector my_dae3(real t, vector y, vector yp, matrix a0, array[] real a1, row_vector a2);
+

but these are not allowed:

+
vector my_dae1(real t, array[] real y, vector yp);
+// Second argument is not a vector
+array[] real my_dae2(real t, vector y, vector yp);
+// Return type is not a vector
+vector my_dae3(real t, vector y);
+// First argument is not a real and missing the third argument
+
+
+
+

Solving DAEs

+

Stan provides a dae function for solving DAEs, so that the above chemical reaction equation can be solved in the following code.

+
data {
+  int N;
+  vector[3] yy0;
+  vector[3] yp0;
+  real t0;
+  real alpha;
+  real beta;
+  array[N] real ts;
+  array[N] vector[3] y;
+}
+parameters {
+  real gamma;
+}
+transformed parameters {
+  vector[3] y_hat[N] = dae(chem, yy0, yp0, t0, ts, alpha, beta, gamma);
+}
+

Since gamma is a parameter, the DAE solver is called in the transformed parameters block.

+
+
+

Control parameters for DAE solving

+

Using dae_tol one can specify the relative_tolerance, absolute_tolerance, and max_num_steps parameters in order to control the DAE solution.

+
vector[3] y_hat[N] = dae_tol(chem, yy0, yp0, t0, ts,
+                             relative_tolerance,
+                             absolute_tolerance,
+                             max_num_steps,
+                             alpha, beta, gamma);
+

relative_tolerance and absolute_tolerance control accuracy the solver tries to achieve, and max_num_steps specifies the maximum number of steps the solver will take between output time points before throwing an error.

+

The control parameters must be data variables – they cannot be parameters or expressions that depend on parameters, including local variables in any block other than transformed data and generated quantities. User-defined function arguments may be qualified as only allowing data arguments using the data qualifier.

+

The default value of relative and absolute tolerances are \(10^{-10}\) and the maximum number of steps between outputs is one hundred million. We suggest the user choose the control parameters according to the problem in hand, and resort to the defaults only when no knowledge of the DAE system or the physics it models is available.

+
+

Maximum number of steps

+

The maximum number of steps can be used to stop a runaway simulation. This can arise in when MCMC moves to a part of parameter space very far from where a differential equation would typically be solved. In particular this can happen during warmup. With the non-stiff solver, this may happen when the sampler moves to stiff regions of parameter space, which will requires small step sizes.

+ + + +
+
+
+ + Back to top

References

+
+Ascher, Uri M., and Linda R. Petzold. 1998. Computer Methods for Ordinary Differential Equations and Differential-Algebraic Equations. Philadelphia: SIAM: Society for Industrial; Applied Mathematics. +
+
+Cohen, Scott D, and Alan C Hindmarsh. 1996. CVODE, a Stiff/Nonstiff ODE Solver in C.” Computers in Physics 10 (2): 138–43. +
+
+Robertson, H. H. 1966. “The Solution of a Set of Reaction Rate Equations.” In Numerical Analysis, an Introduction, 178–82. Lodon; New York: Academic Press. +
+
+Serban, Radu, and Alan C Hindmarsh. 2005. CVODES: The Sensitivity-Enabled ODE Solver in SUNDIALS.” In ASME 2005 International Design Engineering Technical Conferences and Computers and Information in Engineering Conference, 257–69. American Society of Mechanical Engineers. +
+
+Serban, Radu, and Alan C. Hindmarsh. 2021. “Example Programs for IDAS.” LLNL-TR-437091. Lawrence Livermore National Laboratory. +
+
+Serban, Radu, Cosmin Petra, Alan C. Hindmarsh, Cody J. Balos, David J. Gardner, Daniel R. Reynolds, and Carol S. Woodward. 2021. “User Documentation for IDAS V5.0.0.” Lawrence Livermore National Laboratory. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/decision-analysis.html b/docs/2_39/stan-users-guide/decision-analysis.html new file mode 100644 index 000000000..dd3672d7c --- /dev/null +++ b/docs/2_39/stan-users-guide/decision-analysis.html @@ -0,0 +1,1372 @@ + + + + + + + + + +Decision Analysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Decision Analysis

+

Statistical decision analysis is about making decisions under uncertainty. In order to make decisions, outcomes must have some notion of “utility” associated with them. The so-called “Bayes optimal” decision is the one that maximizes expected utility (or equivalently, minimizes expected loss). This chapter shows how Stan can be used to simultaneously estimate the distribution of outcomes based on decisions and compute the required expected utilities.

+
+

Outline of decision analysis

+

Following Gelman et al. (2013), Bayesian decision analysis can be factored into the following four steps.

+
    +
  1. Define a set \(X\) of possible outcomes and a set \(D\) of possible decisions.

  2. +
  3. Define a probability distribution of outcomes conditional on decisions through a conditional density function \(p(x \mid d)\) for \(x \in X\) and \(d \in D.\)

  4. +
  5. Define a utility function \(U : X \rightarrow \mathbb{R}\) mapping outcomes to their utility.

  6. +
  7. Choose action \(d^* \in D\) with highest expected utility, \[ +d^* = \textrm{arg max}_d \ \mathbb{E}[U(x) \mid d]. +\]

  8. +
+

The outcomes should represent as much information as possible that is relevant to utility. In Bayesian decision analysis, the distribution of outcomes will typically be a posterior predictive distribution conditioned on observed data. There is a large literature in psychology and economics related to defining utility functions. For example, the utility of money is usually assumed to be strictly concave rather than linear (i.e., the marginal utility of getting another unit of money decreases the more money one has).

+
+
+

Example decision analysis

+

This section outlines a very simple decision analysis for a commuter deciding among modes of transportation to get to work: walk, bike share, public transportation, or cab. Suppose the commuter has been taking various modes of transportation for the previous year and the transportation conditions and costs have not changed during that time. Over the year, such a commuter might accumulate two hundred observations of the time it takes to get to work given a choice of commute mode.

+
+

Step 1. Define decisions and outcomes

+

A decision consists of the choice of commute mode and the outcome is a time and cost. More formally,

+
    +
  • the set of decisions is \(D = 1:4\), corresponding to the commute types walking, bicycling, public transportation, and cab, respectively, and

  • +
  • the set of outcomes \(X = \mathbb{R} \times \mathbb{R}_+\) contains pairs of numbers \(x = (c, t)\) consisting of a cost \(c\) and time \(t \geq 0\).

  • +
+
+
+

Step 2. Define density of outcome conditioned on decision

+

The density required is \(p(x \mid d),\) where \(d \in D\) is a decision and \(x = (c, t) \in X\) is an outcome. Being a statistical decision problem, this density will the a posterior predictive distribution conditioned on previously observed outcome and decision pairs, based on a parameter model with parameters \(\theta,\) \[ +p(x \mid d, x^{\textrm{obs}}, d^{\textrm{obs}}) += +\int + p(x \mid d, \theta) + \cdot p(\theta \mid x^{\textrm{obs}}, d^{\textrm{obs}}) + \, \textrm{d}\theta. +\] The observed data for a year of commutes consists of choice of the chosen commute mode \(d^{\textrm{obs}}_n\) and observed costs and times \(x^{\textrm{obs}}_n = (c^{\textrm{obs}}_n, t^{\textrm{obs}}_n)\) for \(n +\in 1:200.\)

+

For simplicity, commute time \(t_n\) for trip \(n\) will be modeled as lognormal for a given choice of transportation \(d_n \in 1:4,\) \[ +t_n \sim \textrm{lognormal}(\mu_{d[n]}, \sigma_{d[n]}). +\] To understand the notation, \(d_n\), also written \(d[n]\), is the mode of transportation used for trip \(n\). For example if trip \(n\) was by bicycle, then \(t_n \sim \textrm{lognormal}(\mu_2, \sigma_2),\) where \(\mu_2\) and \(\sigma_2\) are the lognormal parameters for bicycling.

+

Simple fixed priors are used for each mode of transportation \(k \in 1:4,\) \[\begin{eqnarray*} +\mu_k & \sim & \textrm{normal}(0, 5) +\\[2pt] +\sigma_k & \sim & \textrm{lognormal}(0, 1). +\end{eqnarray*}\] These priors are consistent with a broad range of commute times; in a more realistic model each commute mode would have its own prior based on knowledge of the city and the time of day would be used as a covariate; here the commutes are taken to be exchangeable.

+

Cost is usually a constant function for public transportation, walking, and bicycling. Nevertheless, for simplicity, all costs will be modeled as lognormal, \[ +c_n \sim \textrm{lognormal}(\nu_{d[n]}, \tau_{d[n]}). +\] Again, the priors are fixed for the modes of transportation, \[\begin{eqnarray*} +\nu_k & \sim & \textrm{normal}(0, 5) +\\[2pt] +\tau_k & \sim & \textrm{lognormal}(0, 1). +\end{eqnarray*}\] A more realistic approach would model cost conditional on time, because the cost of a cab depends on route chosen and the time it takes.

+

The full set of parameters that are marginalized in the posterior predictive distribution is \[ +\theta = (\mu_{1:4}, \sigma_{1:4}, \nu_{1:4}, \tau_{1:4}). +\]

+
+
+

Step 3. Define the utility function

+

For the sake of concreteness, the utility function will be assumed to be a simple function of cost and time. Further suppose the commuter values their commute time at $25 per hour and has a utility function that is linear in the commute cost and time. Then the utility function may be defined as

+

\[ +U(c, t) = -(c + 25 \cdot t) +\]

+

The sign is negative because high cost is undesirable. A better utility function might have a step function or increasing costs for being late, different costs for different modes of transportation because of their comfort and environmental impact, and non-linearity of utility in cost.

+
+
+

Step 4. Maximize expected utility

+

At this point, all that is left is to calculate expected utility for each decision and choose the optimum. If the decisions consist of a small set of discrete choices, expected utility can be easily coded in Stan. The utility function is coded as a function, the observed data is coded as data, the model parameters coded as parameters, and the model block itself coded to follow the sampling distributions of each parameter.

+
functions {
+  real U(real c, real t) {
+    return -(c + 25 * t);
+  }
+}
+data {
+  int<lower=0> N;
+  array[N] int<lower=1, upper=4> d;
+  array[N] real c;
+  array[N] real<lower=0> t;
+}
+parameters {
+  vector[4] mu;
+  vector<lower=0>[4] sigma;
+  array[4] real nu;
+  array[4] real<lower=0> tau;
+}
+model {
+  mu ~ normal(0, 1);
+  sigma ~ lognormal(0, 0.25);
+  nu ~ normal(0, 20);
+  tau ~ lognormal(0, 0.25);
+  t ~ lognormal(mu[d], sigma[d]);
+  c ~ lognormal(nu[d], tau[d]);
+}
+generated quantities {
+  array[4] real util;
+  for (k in 1:4) {
+    util[k] = U(lognormal_rng(nu[k], tau[k]),
+                lognormal_rng(mu[k], sigma[k]));
+  }
+}
+

The generated quantities block defines an array variable util where util[k], which will hold the utility derived from a random commute for choice k generated according to the model parameters for that choice. This randomness is required to appropriately characterize the posterior predictive distribution of utility.

+

For simplicity in this initial formulation, all four commute options have their costs estimated, even though cost is fixed for three of the options. To deal with the fact that some costs are fixed, the costs would have to be hardcoded or read in as data, nu and tau would be declared as univariate, and the RNG for cost would only be employed when k == 4.

+

Defining the utility function for pairs of vectors would allow the random number generation in the generated quantities block to be vectorized.

+

All that is left is to run Stan. The posterior mean for util[k] is the expected utility, which written out with full conditioning, is \[\begin{eqnarray*} +\mathbb{E}\!\left[U(x) \mid d = k, d^{\textrm{obs}}, x^{\textrm{obs}}\right] +& = & +\int + U(x) + \cdot p(x \mid d = k, \theta) + \cdot p(\theta \mid d^{\textrm{obs}}, x^{\textrm{obs}}) + \, \textrm{d}\theta +\\[4pt] +& \approx & +\frac{1}{M} \sum_{m = 1}^M U(x^{(m)} ), +\end{eqnarray*}\] where \[ +x^{(m)} \sim p(x \mid d = k, \theta^{(m)} ) +\] and \[ +\theta^{(m)} +\sim p(\theta \mid d^{\textrm{obs}}, x^{\textrm{obs}}). +\]

+

In terms of Stan’s execution, the random generation of \(x^{(m)}\) is carried out with the lognormal_rng operations after \(\theta^{(m)}\) is drawn from the model posterior. The average is then calculated after multiple chains are run and combined.

+

It only remains to make the decision k with highest expected utility, which will correspond to the choice with the highest posterior mean for util[k]. This can be read off of the mean column of the Stan’s summary statistics or accessed programmatically through Stan’s interfaces.

+
+
+
+

Continuous choices

+

Many choices, such as how much to invest for retirement or how long to spend at the gym are not discrete, but continuous. In these cases, the continuous choice can be coded as data in the Stan program. Then the expected utilities may be calculated. In other words, Stan can be used as a function from a choice to expected utilities. Then an external optimizer can call that function. This optimization can be difficult without gradient information. Gradients could be supplied by automatic differentiation, but Stan is not currently instrumented to calculate those derivatives.

+ + + +
+
+ + Back to top

References

+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/efficiency-tuning.html b/docs/2_39/stan-users-guide/efficiency-tuning.html new file mode 100644 index 000000000..f89182f93 --- /dev/null +++ b/docs/2_39/stan-users-guide/efficiency-tuning.html @@ -0,0 +1,2128 @@ + + + + + + + + + +Efficiency Tuning + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Efficiency Tuning

+

This chapter provides a grab bag of techniques for optimizing Stan code, including vectorization, sufficient statistics, and conjugacy. At a coarse level, efficiency involves both the amount of time required for a computation and the amount of memory required. For practical applied statistical modeling, we are mainly concerned with reducing wall time (how long a program takes as measured by a clock on the wall) and keeping memory requirements within available bounds.

+
+

What is efficiency?

+

The standard algorithm analyses in computer science measure efficiency asymptotically as a function of problem size (such as data, number of parameters, etc.) and typically do not consider constant additive factors like startup times or multiplicative factors like speed of operations. In practice, the constant factors are important; if run time can be cut in half or more, that’s a huge gain. This chapter focuses on both the constant factors involved in efficiency (such as using built-in matrix operations as opposed to naive loops) and on asymptotic efficiency factors (such as using linear algorithms instead of quadratic algorithms in loops).

+
+
+

Efficiency for probabilistic models and algorithms

+

Stan programs express models which are intrinsically statistical in nature. The algorithms applied to these models may or may not themselves be probabilistic. For example, given an initial value for parameters (which may itself be given deterministically or generated randomly), Stan’s optimization algorithm (L-BFGS) for penalized maximum likelihood estimation is purely deterministic. Stan’s sampling algorithms are based on Markov chain Monte Carlo algorithms, which are probabilistic by nature at every step. Stan’s variational inference algorithm (ADVI) is probabilistic despite being an optimization algorithm; the randomization lies in a nested Monte Carlo calculation for an expected gradient.

+

With probabilistic algorithms, there will be variation in run times (and maybe memory usage) based on the randomization involved. For example, by starting too far out in the tail, iterative algorithms underneath the hood, such as the solvers for ordinary differential equations, may take different numbers of steps. Ideally this variation will be limited; when there is a lot of variation it can be a sign that there is a problem with the model’s parameterization in a Stan program or with initialization.

+

A well-behaved Stan program will have low variance between runs with different random initializations and differently seeded random number generators. But sometimes an algorithm can get stuck in one part of the posterior, typically due to high curvature. Such sticking almost always indicates the need to reparameterize the model. Just throwing away Markov chains with apparently poor behavior (slow, or stuck) can lead to bias in posterior estimates. This problem with getting stuck can often be overcome by lowering the initial step size to avoid getting stuck during adaptation and increasing the target acceptance rate in order to target a lower step size. This is because smaller step sizes allow Stan’s gradient-based algorithms to better follow the curvature in the density or penalized maximum likelihood being fit.

+
+
+

Statistical vs.  computational efficiency

+

There is a difference between pure computational efficiency and statistical efficiency for Stan programs fit with sampling-based algorithms. Computational efficiency measures the amount of time or memory required for a given step in a calculation, such as an evaluation of a log posterior or penalized likelihood.

+

Statistical efficiency typically involves requiring fewer steps in algorithms by making the statistical formulation of a model better behaved. The typical way to do this is by applying a change of variables (i.e., reparameterization) so that sampling algorithms mix better or optimization algorithms require less adaptation.

+
+
+

Model conditioning and curvature

+

Because Stan’s algorithms rely on step-based gradient-based approximations of the density (or penalized maximum likelihood) being fitted, posterior curvature not captured by this first-order approximation plays a central role in determining the statistical efficiency of Stan’s algorithms.

+

A second-order approximation to curvature is provided by the Hessian, the matrix of second derivatives of the log density \(\log +p(\theta)\) with respect to the parameter vector \(\theta\), defined as \[ +H(\theta) = \nabla \, \nabla \, \log p(\theta \mid y), +\] so that \[ +H_{i, j}(\theta) = \frac{\partial^2 \log p(\theta \mid y)} + {\partial \theta_i \ \partial \theta_j}. +\] For pure penalized maximum likelihood problems, the posterior log density \(\log p(\theta \mid y)\) is replaced by the penalized likelihood function \(\mathcal{L}(\theta) = \log p(y \mid \theta) - \lambda(\theta)\).

+
+

Condition number and adaptation

+

A good gauge of how difficult a problem the curvature presents is given by the condition number of the Hessian matrix \(H\), which is the ratio of the largest to the smallest eigenvalue of \(H\) (assuming the Hessian is positive definite). This essentially measures the difference between the flattest direction of movement and the most curved. Typically, the step size of a gradient-based algorithm is bounded by the most sharply curved direction. With better conditioned log densities or penalized likelihood functions, it is easier for Stan’s adaptation, especially the diagonal adaptations that are used as defaults.

+
+
+

Unit scales without correlation

+

Ideally, all parameters should be programmed so that they have unit scale and so that posterior correlation is reduced; together, these properties mean that there is no rotation or scaling required for optimal performance of Stan’s algorithms. For Hamiltonian Monte Carlo, this implies a unit mass matrix, which requires no adaptation as it is where the algorithm initializes.

+
+
+

Varying curvature

+

In all but very simple models (such as multivariate normals), the Hessian will vary as \(\theta\) varies (an extreme example is Neal’s funnel, as naturally arises in hierarchical models with little or no data). The more the curvature varies, the harder it is for all of the algorithms with fixed adaptation parameters to find adaptations that cover the entire density well. Many of the variable transforms proposed are aimed at improving the conditioning of the Hessian and/or making it more consistent across the relevant portions of the density (or penalized maximum likelihood function) being fit.

+

For all of Stan’s algorithms, the curvature along the path from the initial values of the parameters to the solution is relevant. For penalized maximum likelihood and variational inference, the solution of the iterative algorithm will be a single point, so this is all that matters. For sampling, the relevant “solution” is the typical set, which is the posterior volume where almost all draws from the posterior lies; thus, the typical set contains almost all of the posterior probability mass.

+

With sampling, the curvature may vary dramatically between the points on the path from the initialization point to the typical set and within the typical set. This is why adaptation needs to run long enough to visit enough points in the typical set to get a good first-order estimate of the curvature within the typical set. If adaptation is not run long enough, sampling within the typical set after adaptation will not be efficient. We generally recommend at least one hundred iterations after the typical set is reached (and the first effective draw is ready to be realized). Whether adaptation has run long enough can be measured by comparing the adaptation parameters derived from a set of diffuse initial parameter values.

+
+
+

Reparameterizing with a change of variables

+

Improving statistical efficiency is achieved by reparameterizing the model so that the same result may be calculated using a density or penalized maximum likelihood that is better conditioned. Again, see the example of reparameterizing Neal’s funnel for an example, and also the examples in the change of variables chapter.

+

One has to be careful in using change-of-variables reparameterizations when using maximum likelihood estimation, because they can change the result if the Jacobian term is inadvertently included in the revised likelihood model.

+
+
+
+

Well-specified models

+

Model misspecification, which roughly speaking means using a model that doesn’t match the data, can be a major source of slow code. This can be seen in cases where simulated data according to the model runs robustly and efficiently, whereas the real data for which it was intended runs slowly or may even have convergence and mixing issues. While some of the techniques recommended in the remaining sections of this chapter may mitigate the problem, the best remedy is a better model specification.

+

Counterintuitively, more complicated models often run faster than simpler models. One common pattern is with a group of parameters with a wide fixed prior such as normal(0, 1000)). This can fit slowly due to the mismatch between prior and posterior (the prior has support for values in the hundreds or even thousands, whereas the posterior may be concentrated near zero). In such cases, replacing the fixed prior with a hierarchical prior such as normal(mu, sigma), where mu and sigma are new parameters with their own hyperpriors, can be beneficial.

+
+
+

Avoiding validation

+

Stan validates all of its data structure constraints. For example, consider a transformed parameter defined to be a covariance matrix and then used as a covariance parameter in the model block.

+
transformed parameters {
+  cov_matrix[K] Sigma;
+  // ...
+}                               // first validation
+model {
+  y ~ multi_normal(mu, Sigma);  // second validation
+  // ...
+}
+

Because Sigma is declared to be a covariance matrix, it will be factored at the end of the transformed parameter block to ensure that it is positive definite. The multivariate normal log density function also validates that Sigma is positive definite. This test is expensive, having cubic run time (i.e., \(\mathcal{O}(N^3)\) for \(N \times N\) matrices), so it should not be done twice.

+

The test may be avoided by simply declaring Sigma to be a simple unconstrained matrix.

+
transformed parameters {
+  matrix[K, K] Sigma;
+  // ...
+}
+model {
+  y ~ multi_normal(mu, Sigma);  // only validation
+}
+

Now the only validation is carried out by the multivariate normal.

+
+
+

Reparameterization

+

Stan’s sampler can be slow in sampling from distributions with difficult posterior geometries. One way to speed up such models is through reparameterization. In some cases, reparameterization can dramatically increase effective sample size for the same number of iterations or even make programs that would not converge well behaved.

+
+

Example: Neal’s funnel

+

In this section, we discuss a general transform from a centered to a non-centered parameterization (Papaspiliopoulos, Roberts, and Sköld 2007).1

+

This reparameterization is helpful when there is not much data, because it separates the hierarchical parameters and lower-level parameters in the prior.

+

Neal (2003) defines a distribution that exemplifies the difficulties of sampling from some hierarchical models. Neal’s example is fairly extreme, but can be trivially reparameterized in such a way as to make sampling straightforward. Neal’s example has support for \(y \in +\mathbb{R}\) and \(x \in \mathbb{R}^9\) with density

+

\[ +p(y,x) = \textsf{normal}(y \mid 0,3) \times \prod_{n=1}^9 +\textsf{normal}(x_n \mid 0,\exp(y/2)). +\]

+

The probability contours are shaped like ten-dimensional funnels. The funnel’s neck is particularly sharp because of the exponential function applied to \(y\). A plot of the log marginal density of \(y\) and the first dimension \(x_1\) is shown in the following plot.

+

The funnel can be implemented directly in Stan as follows.

+
parameters {
+  real y;
+  vector[9] x;
+}
+model {
+  y ~ normal(0, 3);
+  x ~ normal(0, exp(y/2));
+}
+

When the model is expressed this way, Stan has trouble sampling from the neck of the funnel, where \(y\) is small and thus \(x\) is constrained to be near 0. This is due to the fact that the density’s scale changes with \(y\), so that a step size that works well in the body will be too large for the neck, and a step size that works in the neck will be inefficient in the body. This can be seen in the following plots.

+
+
+
+
+
+

+
+
+

+
+
+
+
+Figure 1: Neal’s funnel. (Left) The marginal density of Neal’s funnel for the upper-level variable \(y\) and one lower-level variable \(x_1\) (see the text for the formula). The blue region has log density greater than -8, the yellow region density greater than -16, and the gray background a density less than -16. (Right) 4000 draws are taken from a run of Stan’s sampler with default settings. Both plots are restricted to the shown window of \(x_1\) and \(y\) values; some draws fell outside of the displayed area as would be expected given the density. The draws are consistent with the marginal density \(p(y) = \textsf{normal}(y \mid 0,3)\), which has mean 0 and standard deviation 3. +
+
+
+

In this particular instance, because the analytic form of the density is known, the model can be converted to the following more efficient form.

+
parameters {
+  real y_raw;
+  vector[9] x_raw;
+}
+transformed parameters {
+  real y;
+  vector[9] x;
+
+  y = 3.0 * y_raw;
+  x = exp(y/2) * x_raw;
+}
+model {
+  y_raw ~ std_normal(); // implies y ~ normal(0, 3)
+  x_raw ~ std_normal(); // implies x ~ normal(0, exp(y/2))
+}
+

In this second model, the parameters x_raw and y_raw are sampled as independent standard normals, which is easy for Stan. These are then transformed into draws from the funnel. In this case, the same transform may be used to define Monte Carlo directly based on independent standard normal draws; Markov chain Monte Carlo methods are not necessary. If such a reparameterization were used in Stan code, it is useful to provide a comment indicating what the distribution for the parameter implies for the distribution of the transformed parameter.

+
+
+

Reparameterizing the Cauchy

+

Sampling from heavy tailed distributions such as the Cauchy is difficult for Hamiltonian Monte Carlo, which operates within a Euclidean geometry.

+

The practical problem is that tail of the Cauchy requires a relatively large step size compared to the trunk. With a small step size, the No-U-Turn sampler requires many steps when starting in the tail of the distribution; with a large step size, there will be too much rejection in the central portion of the distribution. This problem may be mitigated by defining the Cauchy-distributed variable as the transform of a uniformly distributed variable using the Cauchy inverse cumulative distribution function.

+

Suppose a random variable of interest \(X\) has a Cauchy distribution with location \(\mu\) and scale \(\tau\), so that \(X \sim +\textsf{Cauchy}(\mu,\tau)\). The variable \(X\) has a cumulative distribution function \(F_X:\mathbb{R} \rightarrow (0,1)\) defined by \[ +F_X(x) = \frac{1}{\pi} \arctan \left( \frac{x - \mu}{\tau} \right) + +\frac{1}{2}. +\] The inverse of the cumulative distribution function, \(F_X^{-1}:(0,1) \rightarrow \mathbb{R}\), is thus

+

\[ +F^{-1}_X(y) = \mu + \tau \tan \left( \pi \left( y - \frac{1}{2} \right) \right). +\] Thus if the random variable \(Y\) has a unit uniform distribution, \(Y +\sim \textsf{uniform}(0,1)\), then \(F^{-1}_X(Y)\) has a Cauchy distribution with location \(\mu\) and scale \(\tau\), i.e., \(F^{-1}_X(Y) \sim +\textsf{Cauchy}(\mu,\tau)\).

+

Consider a Stan program involving a Cauchy-distributed parameter beta.

+
parameters {
+  real beta;
+  // ...
+}
+model {
+  beta ~ cauchy(mu, tau);
+  // ...
+}
+

This declaration of beta as a parameter may be replaced with a transformed parameter beta defined in terms of a uniform-distributed parameter beta_unif.

+
parameters {
+  real<lower=-pi() / 2, upper=pi() / 2> beta_unif;
+  // ...
+}
+transformed parameters {
+  real beta;
+  beta = mu + tau * tan(beta_unif);  // beta ~ cauchy(mu, tau)
+}
+model {
+  beta_unif ~ uniform(-pi() / 2, pi() / 2);  // not necessary
+  // ...
+}
+

It is more convenient in Stan to transform a uniform variable on \((-\pi/2, \pi/2)\) than one on \((0,1)\). The Cauchy location and scale parameters, mu and tau, may be defined as data or may themselves be parameters. The variable beta could also be defined as a local variable if it does not need to be included in the sampler’s output.

+

The uniform distribution on beta_unif is defined explicitly in the model block, but it could be safely removed from the program without changing sampling behavior. This is because \(\log +\textsf{uniform}(\beta_{\textsf{unif}} \mid -\pi/2,\pi/2) = +-\log \pi\) is a constant and Stan only needs the total log probability up to an additive constant. Stan will spend some time checking that that beta_unif is between -pi() / 2 and pi() / 2, but this condition is guaranteed by the constraints in the declaration of beta_unif.

+
+
+

Reparameterizing a Student-t distribution

+

One thing that sometimes works when you’re having trouble with the heavy-tailedness of Student-t distributions is to use the gamma-mixture representation, which says that you can generate a Student-t distributed variable \(\beta\), \[ +\beta \sim \textsf{Student-t}(\nu, 0, 1), +\] by first generating a gamma-distributed precision (inverse variance) \(\tau\) according to \[ +\tau \sim \textsf{Gamma}(\nu/2, \nu/2), +\] and then generating \(\beta\) from the normal distribution, \[ +\beta \sim \textsf{normal}\left(0,\tau^{-\frac{1}{2}}\right). +\]

+

Because \(\tau\) is precision, \(\tau^{-\frac{1}{2}}\) is the scale (standard deviation), which is the parameterization used by Stan.

+

The marginal distribution of \(\beta\) when you integrate out \(\tau\) is \(\textsf{Student-t}(\nu, 0, 1)\), i.e., \[ +\textsf{Student-t}(\beta \mid \nu, 0, 1) += +\int_0^{\infty} +\, +\textsf{normal}\left(\beta \middle| 0, \tau^{-0.5}\right) +\times +\textsf{Gamma}\left(\tau \middle| \nu/2, \nu/2\right) +\ +\text{d} \tau. +\]

+

To go one step further, instead of defining a \(\beta\) drawn from a normal with precision \(\tau\), define \(\alpha\) to be drawn from a unit normal, \[ +\alpha \sim \textsf{normal}(0,1) +\] and rescale by defining \[ +\beta = \alpha \, \tau^{-\frac{1}{2}}. +\]

+

Now suppose \(\mu = \beta x\) is the product of \(\beta\) with a regression predictor \(x\). Then the reparameterization \(\mu = \alpha +\tau^{-\frac{1}{2}} x\) has the same distribution, but in the original, direct parameterization, \(\beta\) has (potentially) heavy tails, whereas in the second, neither \(\tau\) nor \(\alpha\) have heavy tails.

+

To translate into Stan notation, this reparameterization replaces

+
parameters {
+  real<lower=0> nu;
+  real beta;
+  // ...
+}
+model {
+  beta ~ student_t(nu, 0, 1);
+  // ...
+}
+

with

+
parameters {
+  real<lower=0> nu;
+  real<lower=0> tau;
+  real alpha;
+  // ...
+}
+transformed parameters {
+  real beta;
+  beta = alpha / sqrt(tau);
+  // ...
+}
+model {
+  real half_nu;
+  half_nu = 0.5 * nu;
+  tau ~ gamma(half_nu, half_nu);
+  alpha ~ std_normal();
+  // ...
+}
+

Although set to 0 here, in most cases, the lower bound for the degrees of freedom parameter nu can be set to 1 or higher; when nu is 1, the result is a Cauchy distribution with fat tails and as nu approaches infinity, the Student-t distribution approaches a normal distribution. Thus the parameter nu characterizes the heaviness of the tails of the model.

+
+
+

Hierarchical models and the non-centered parameterization

+

Unfortunately, the usual situation in applied Bayesian modeling involves complex geometries and interactions that are not known analytically. Nevertheless, the non-centered parameterization can still be effective for separating parameters.

+
+

Centered parameterization

+

For example, a vectorized hierarchical model might draw a vector of coefficients \(\beta\) with definitions as follows. The so-called centered parameterization is as follows.

+
parameters {
+  real mu_beta;
+  real<lower=0> sigma_beta;
+  vector[K] beta;
+  // ...
+}
+model {
+  beta ~ normal(mu_beta, sigma_beta);
+  // ...
+}
+

Although not shown, a full model will have priors on both mu_beta and sigma_beta along with data modeled based on these coefficients. For instance, a standard binary logistic regression with data matrix x and binary outcome vector y would include a likelihood statement such as form y ~ bernoulli_logit(x * beta), leading to an analytically intractable posterior.

+

A hierarchical model such as the above will suffer from the same kind of inefficiencies as Neal’s funnel, because the values of beta, mu_beta and sigma_beta are highly correlated in the posterior. The extremity of the correlation depends on the amount of data, with Neal’s funnel being the extreme with no data. In these cases, the non-centered parameterization, discussed in the next section, is preferable; when there is a lot of data, the centered parameterization is more efficient. See Betancourt and Girolami (2013) for more information on the effects of centering in hierarchical models fit with Hamiltonian Monte Carlo.

+
+
+
+

Non-centered parameterization

+

Sometimes the group-level effects do not constrain the hierarchical distribution tightly. Examples arise when there are not many groups, or when the inter-group variation is high. In such cases, hierarchical models can be made much more efficient by shifting the data’s correlation with the parameters to the hyperparameters. Similar to the funnel example, this will be much more efficient in terms of effective sample size when there is not much data (see Betancourt and Girolami (2013)), and in more extreme cases will be necessary to achieve convergence.

+
parameters {
+  real mu_beta;
+  real<lower=0> sigma_beta;
+  vector[K] beta_raw;
+  // ...
+}
+transformed parameters {
+  vector[K] beta;
+  // implies: beta ~ normal(mu_beta, sigma_beta)
+  beta = mu_beta + sigma_beta * beta_raw;
+}
+model {
+  beta_raw ~ std_normal();
+  // ...
+}
+

Any priors defined for mu_beta and sigma_beta remain as defined in the original model.

+

Alternatively, Stan’s affine transform can be used to decouple sigma and beta:

+
parameters {
+  real mu_beta;
+  real<lower=0> sigma_beta;
+  vector<offset=mu_beta, multiplier=sigma_beta>[K] beta;
+  // ...
+}
+model {
+  beta ~ normal(mu_beta, sigma_beta);
+  // ...
+}
+

Reparameterization of hierarchical models is not limited to the normal distribution, although the normal distribution is the best candidate for doing so. In general, any distribution of parameters in the location-scale family is a good candidate for reparameterization. Let \(\beta = l + s\alpha\) where \(l\) is a location parameter and \(s\) is a scale parameter. The parameter \(l\) need not be the mean, \(s\) need not be the standard deviation, and neither the mean nor the standard deviation need to exist. If \(\alpha\) and \(\beta\) are from the same distributional family but \(\alpha\) has location zero and unit scale, while \(\beta\) has location \(l\) and scale \(s\), then that distribution is a location-scale distribution. Thus, if \(\alpha\) were a parameter and \(\beta\) were a transformed parameter, then a prior distribution from the location-scale family on \(\alpha\) with location zero and unit scale implies a prior distribution on \(\beta\) with location \(l\) and scale \(s\). Doing so would reduce the dependence between \(\alpha\), \(l\), and \(s\).

+

There are several univariate distributions in the location-scale family, such as the Student t distribution, including its special cases of the Cauchy distribution (with one degree of freedom) and the normal distribution (with infinite degrees of freedom). As shown above, if \(\alpha\) is distributed standard normal, then \(\beta\) is distributed normal with mean \(\mu = l\) and standard deviation \(\sigma = s\). The logistic, the double exponential, the generalized extreme value distributions, and the stable distribution are also in the location-scale family.

+

Also, if \(z\) is distributed standard normal, then \(z^2\) is distributed chi-squared with one degree of freedom. By summing the squares of \(K\) independent standard normal variates, one can obtain a single variate that is distributed chi-squared with \(K\) degrees of freedom. However, for large \(K\), the computational gains of this reparameterization may be overwhelmed by the computational cost of specifying \(K\) primitive parameters just to obtain one transformed parameter to use in a model.

+
+
+

Multivariate reparameterizations

+

The benefits of reparameterization are not limited to univariate distributions. A parameter with a multivariate normal prior distribution is also an excellent candidate for reparameterization. Suppose you intend the prior for \(\beta\) to be multivariate normal with mean vector \(\mu\) and covariance matrix \(\Sigma\). Such a belief is reflected by the following code.

+
data {
+  int<lower=2> K;
+  vector[K] mu;
+  cov_matrix[K] Sigma;
+  // ...
+}
+parameters {
+  vector[K] beta;
+  // ...
+}
+model {
+  beta ~ multi_normal(mu, Sigma);
+  // ...
+}
+

In this case mu and Sigma are fixed data, but they could be unknown parameters, in which case their priors would be unaffected by a reparameterization of beta.

+

If \(\alpha\) has the same dimensions as \(\beta\) but the elements of \(\alpha\) are independently and identically distributed standard normal such that \(\beta = \mu + L\alpha\), where \(LL^\top = \Sigma\), then \(\beta\) is distributed multivariate normal with mean vector \(\mu\) and covariance matrix \(\Sigma\). One choice for \(L\) is the Cholesky factor of \(\Sigma\). Thus, the model above could be reparameterized as follows.

+
data {
+  int<lower=2> K;
+  vector[K] mu;
+  cov_matrix[K] Sigma;
+  // ...
+}
+transformed data {
+  matrix[K, K] L;
+  L = cholesky_decompose(Sigma);
+}
+parameters {
+  vector[K] alpha;
+  // ...
+}
+transformed parameters {
+  vector[K] beta;
+  beta = mu + L * alpha;
+}
+model {
+  alpha ~ std_normal();
+  // implies: beta ~ multi_normal(mu, Sigma)
+  // ...
+}
+

This reparameterization is more efficient for two reasons. First, it reduces dependence among the elements of alpha and second, it avoids the need to invert Sigma every time multi_normal is evaluated.

+

The Cholesky factor is also useful when a covariance matrix is decomposed into a correlation matrix that is multiplied from both sides by a diagonal matrix of standard deviations, where either the standard deviations or the correlations are unknown parameters. The Cholesky factor of the covariance matrix is equal to the product of a diagonal matrix of standard deviations and the Cholesky factor of the correlation matrix. Furthermore, the product of a diagonal matrix of standard deviations and a vector is equal to the elementwise product between the standard deviations and that vector. Thus, if for example the correlation matrix Tau were fixed data but the vector of standard deviations sigma were unknown parameters, then a reparameterization of beta in terms of alpha could be implemented as follows.

+
data {
+  int<lower=2> K;
+  vector[K] mu;
+  corr_matrix[K] Tau;
+  // ...
+}
+transformed data {
+  matrix[K, K] L;
+  L = cholesky_decompose(Tau);
+}
+parameters {
+  vector[K] alpha;
+  vector<lower=0>[K] sigma;
+  // ...
+}
+transformed parameters {
+  vector[K] beta;
+  // This equals mu + diag_matrix(sigma) * L * alpha;
+  beta = mu + sigma .* (L * alpha);
+}
+model {
+  sigma ~ cauchy(0, 5);
+  alpha ~ std_normal();
+  // implies: beta ~ multi_normal(mu,
+  //  diag_matrix(sigma) * L * L' * diag_matrix(sigma)))
+  // ...
+}
+

This reparameterization of a multivariate normal distribution in terms of standard normal variates can be extended to other multivariate distributions that can be conceptualized as contaminations of the multivariate normal, such as the multivariate Student t and the skew multivariate normal distribution.

+

A Wishart distribution can also be reparameterized in terms of standard normal variates and chi-squared variates. Let \(L\) be the Cholesky factor of a \(K \times K\) positive definite scale matrix \(S\) and let \(\nu\) be the degrees of freedom. If \[ +A = \begin{pmatrix} +\sqrt{c_{1}} & 0 & \cdots & 0 \\ +z_{21} & \sqrt{c_{2}} & \ddots & \vdots \\ +\vdots & \ddots & \ddots & 0 \\ +z_{K1} & \cdots & z_{K\left(K-1\right)} & \sqrt{c_{K}} +\end{pmatrix}, +\] where each \(c_i\) is distributed chi-squared with \(\nu - i + 1\) degrees of freedom and each \(z_{ij}\) is distributed standard normal, then \(W = LAA^{\top}L^{\top}\) is distributed Wishart with scale matrix \(S = LL^{\top}\) and degrees of freedom \(\nu\). Such a reparameterization can be implemented by the following Stan code:

+
data {
+  int<lower=1> N;
+  int<lower=1> K;
+  int<lower=K + 2> nu
+  matrix[K, K] L; // Cholesky factor of scale matrix
+  vector[K] mu;
+  matrix[N, K] y;
+  // ...
+}
+parameters {
+  vector<lower=0>[K] c;
+  vector[0.5 * K * (K - 1)] z;
+  // ...
+}
+model {
+  matrix[K, K] A;
+  int count = 1;
+  for (j in 1:(K - 1)) {
+    for (i in (j + 1):K) {
+      A[i, j] = z[count];
+      count += 1;
+    }
+    for (i in 1:(j - 1)) {
+      A[i, j] = 0.0;
+    }
+    A[j, j] = sqrt(c[j]);
+  }
+  for (i in 1:(K - 1)) {
+    A[i, K] = 0;
+  }
+  A[K, K] = sqrt(c[K]);
+
+  for (i in 1:K) {
+    c[i] ~ chi_square(nu - i + 1);
+  }
+
+  z ~ std_normal();
+  // implies: L * A * A' * L' ~ wishart(nu, L * L')
+  y ~ multi_normal_cholesky(mu, L * A);
+  // ...
+}
+

This reparameterization is more efficient for three reasons. First, it reduces dependence among the elements of z and second, it avoids the need to invert the covariance matrix, \(W\) every time wishart is evaluated. Third, if \(W\) is to be used with a multivariate normal distribution, you can pass \(L A\) to the more efficient multi_normal_cholesky function, rather than passing \(W\) to multi_normal.

+

If \(W\) is distributed Wishart with scale matrix \(S\) and degrees of freedom \(\nu\), then \(W^{-1}\) is distributed inverse Wishart with inverse scale matrix \(S^{-1}\) and degrees of freedom \(\nu\). Thus, the previous result can be used to reparameterize the inverse Wishart distribution. Since \(W = L A A^{\top} L^{\top}\), \(W^{-1} = L^{{\top}^{-1}} A^{{\top}^{-1}} A^{-1} L^{-1}\), where all four inverses exist, but \(L^{{-1}^{\top}} = L^{{\top}^{-1}}\) and \(A^{{-1}^{\top}} = A^{{\top}^{-1}}\). We can slightly modify the above Stan code for this case:

+
data {
+  int<lower=1> K;
+  int<lower=K + 2> nu
+  matrix[K, K] L; // Cholesky factor of scale matrix
+  // ...
+}
+transformed data {
+  matrix[K, K] eye;
+  matrix[K, K] L_inv;
+  for (j in 1:K) {
+    for (i in 1:K) {
+      eye[i, j] = 0.0;
+    }
+    eye[j, j] = 1.0;
+  }
+  L_inv = mdivide_left_tri_low(L, eye);
+}
+parameters {
+  vector<lower=0>[K] c;
+  vector[0.5 * K * (K - 1)] z;
+  // ...
+}
+model {
+  matrix[K, K] A;
+  matrix[K, K] A_inv_L_inv;
+  int count;
+  count = 1;
+  for (j in 1:(K - 1)) {
+    for (i in (j + 1):K) {
+      A[i, j] = z[count];
+      count += 1;
+    }
+    for (i in 1:(j - 1)) {
+      A[i, j] = 0.0;
+    }
+    A[j, j] = sqrt(c[j]);
+  }
+  for (i in 1:(K - 1)) {
+    A[i, K] = 0;
+  }
+  A[K, K] = sqrt(c[K]);
+
+  A_inv_L_inv = mdivide_left_tri_low(A, L_inv);
+  for (i in 1:K) {
+    c[i] ~ chi_square(nu - i + 1);
+  }
+
+  z ~ std_normal(); // implies: crossprod(A_inv_L_inv) ~
+  // inv_wishart(nu, L_inv' * L_inv)
+  // ...
+}
+

Another candidate for reparameterization is the Dirichlet distribution with all \(K\) shape parameters equal. Zyczkowski and Sommers (2001) shows that if \(\theta_i\) is equal to the sum of \(\beta\) independent squared standard normal variates and \(\rho_i = \frac{\theta_i}{\sum \theta_i}\), then the \(K\)-vector \(\rho\) is distributed Dirichlet with all shape parameters equal to \(\frac{\beta}{2}\). In particular, if \(\beta = 2\), then \(\rho\) is uniformly distributed on the unit simplex. Thus, we can make \(\rho\) be a transformed parameter to reduce dependence, as in:

+
data {
+  int<lower=1> beta;
+  // ...
+}
+parameters {
+  array[K] vector[beta] z;
+  // ...
+}
+transformed parameters {
+  simplex[K] rho;
+  for (k in 1:K) {
+    rho[k] = dot_self(z[k]); // sum-of-squares
+  }
+  rho = rho / sum(rho);
+}
+model {
+  for (k in 1:K) {
+    z[k] ~ std_normal();
+  }
+  // implies: rho ~ dirichlet(0.5 * beta * ones)
+  // ...
+}
+
+
+
+

Vectorization

+
+

Gradient bottleneck

+

Stan spends the vast majority of its time computing the gradient of the log probability function, making gradients the obvious target for optimization. Stan’s gradient calculations with algorithmic differentiation require a template expression to be allocated and constructed for each subexpression of a Stan program involving parameters or transformed parameters.2 This section defines optimization strategies based on vectorizing these subexpressions to reduce the work done during algorithmic differentiation.

+
+
+

Vectorizing summations

+

Because of the gradient bottleneck described in the previous section, it is more efficient to collect a sequence of summands into a vector or array and then apply the sum() operation than it is to continually increment a variable by assignment and addition. For example, consider the following code snippet, where foo() is some operation that depends on n.

+
for (n in 1:N) {
+  total += foo(n,...);
+}
+

This code has to create intermediate representations for each of the N summands.

+

A faster alternative is to copy the values into a vector, then apply the sum() operator, as in the following refactoring.

+
{
+  vector[N] summands;
+  for (n in 1:N) {
+    summands[n] = foo(n,...);
+  }
+  total = sum(summands);
+}
+

Syntactically, the replacement is a statement block delineated by curly brackets ({, }), starting with the definition of the local variable summands.

+

Even though it involves extra work to allocate the summands vector and copy N values into it, the savings in differentiation more than make up for it. Perhaps surprisingly, it will also use substantially less memory overall than incrementing total within the loop.

+
+
+

Vectorization through matrix operations

+

The following program directly encodes a linear regression with fixed unit noise using a two-dimensional array x of predictors, an array y of outcomes, and an array beta of regression coefficients.

+
data {
+  int<lower=1> K;
+  int<lower=1> N;
+  array[K, N] real x;
+  array[N] real y;
+}
+parameters {
+  array[K] real beta;
+}
+model {
+  for (n in 1:N) {
+    real gamma = 0;
+    for (k in 1:K) {
+      gamma += x[n, k] * beta[k];
+    }
+    y[n] ~ normal(gamma, 1);
+  }
+}
+

The following model computes the same log probability function as the previous model, even supporting the same input files for data and initialization.

+
data {
+  int<lower=1> K;
+  int<lower=1> N;
+  array[N] vector[K] x;
+  array[N] real y;
+}
+parameters {
+  vector[K] beta;
+}
+model {
+  for (n in 1:N) {
+    y[n] ~ normal(dot_product(x[n], beta), 1);
+  }
+}
+

Although it produces equivalent results, the dot product should not be replaced with a transpose and multiply, as in

+
y[n] ~ normal(x[n]' * beta, 1);
+

The relative inefficiency of the transpose and multiply approach is that the transposition operator allocates a new vector into which the result of the transposition is copied. This consumes both time and memory.3

+

The inefficiency of transposition could itself be mitigated by reordering the product and pulling the transposition out of the loop, as follows.

+
// ...
+transformed parameters {
+  row_vector[K] beta_t;
+  beta_t = beta';
+}
+model {
+  for (n in 1:N) {
+    y[n] ~ normal(beta_t * x[n], 1);
+  }
+}
+

The problem with transposition could be completely solved by directly encoding the x as a row vector, as in the following example.

+
data {
+  // ...
+  array[N] row_vector[K] x;
+  // ...
+}
+parameters {
+  vector[K] beta;
+}
+model {
+  for (n in 1:N) {
+    y[n] ~ normal(x[n] * beta, 1);
+  }
+}
+

Declaring the data as a matrix and then computing all the predictors at once using matrix multiplication is more efficient still, as in the example discussed in the next section.

+

Having said all this, the most efficient way to code this model is with direct matrix multiplication, as in

+
data {
+  matrix[N, K] x;
+  vector[N] y;
+}
+parameters {
+  vector[K] beta;
+}
+model {
+  y ~ normal(x * beta, 1);
+}
+

In general, encapsulated single operations that do the work of loops will be more efficient in their encapsulated forms. Rather than performing a sequence of row-vector/vector multiplications, it is better to encapsulate it as a single matrix/vector multiplication.

+
+
+

Vectorized probability functions

+

The final and most efficient version replaces the loops and transformed parameters by using the vectorized form of the normal probability function, as in the following example.

+
data {
+  int<lower=1> K;
+  int<lower=1> N;
+  matrix[N, K] x;
+  vector[N] y;
+}
+parameters {
+  vector[K] beta;
+}
+model {
+  y ~ normal(x * beta, 1);
+}
+

The variables are all declared as either matrix or vector types. The result of the matrix-vector multiplication x * beta in the model block is a vector of the same length as y.

+

The probability function documentation in the function reference manual indicates which of Stan’s probability functions support vectorization; see the function reference manual for full details. Vectorized probability functions accept either vector or scalar inputs for all arguments, with the only restriction being that all vector arguments are the same dimensionality. In the example above, y is a vector of size N, x * beta is a vector of size N, and 1 is a scalar.

+
+
+

Reshaping data for vectorization

+

Sometimes data does not arrive in a shape that is ideal for vectorization, but can be put into such shape with some munging (either inside Stan’s transformed data block or outside).

+

John Hall provided a simple example on the Stan users group. Simplifying notation a bit, the original model had a sampling statement in a loop, as follows.

+
for (n in 1:N) {
+  y[n] ~ normal(mu[ii[n]], sigma);
+}
+

The brute force vectorization would build up a mean vector and then vectorize all at once.

+
{
+  vector[N] mu_ii;
+  for (n in 1:N) {
+    mu_ii[n] = mu[ii[n]];
+  }
+  y ~ normal(mu_ii, sigma);
+}
+

If there aren’t many levels (values ii[n] can take), then it behooves us to reorganize the data by group in a case like this. Rather than having a single observation vector y, there are K of them. And because Stan doesn’t support ragged arrays, it means K declarations. For instance, with 5 levels, we have

+
y_1 ~ normal(mu[1], sigma);
+// ...
+y_5 ~ normal(mu[5], sigma);
+

This way, both the mu and sigma parameters are shared. Which way works out to be more efficient will depend on the shape of the data; if the sizes are small, the simple vectorization may be faster, but for moderate to large sized groups, the full expansion should be faster.

+
+
+
+

Exploiting sufficient statistics

+

In some cases, models can be recoded to exploit sufficient statistics in estimation. This can lead to large efficiency gains compared to an expanded model. This section provides examples for Bernoulli and normal distributions, but the same approach can be applied to other members of the exponential family.

+
+

Bernoulli sufficient statistics

+

Consider the following Bernoulli sampling model.

+
data {
+  int<lower=0> N;
+  array[N] int<lower=0, upper=1> y;
+  real<lower=0> alpha;
+  real<lower=0> beta;
+}
+parameters {
+  real<lower=0, upper=1> theta;
+}
+model {
+  theta ~ beta(alpha, beta);
+  for (n in 1:N) {
+    y[n] ~ bernoulli(theta);
+  }
+}
+

In this model, the sum of positive outcomes in y is a sufficient statistic for the chance of success theta. The model may be recoded using the binomial distribution as follows.

+
theta ~ beta(alpha, beta);
+sum(y) ~ binomial(N, theta);
+

Because truth is represented as one and falsehood as zero, the sum sum(y) of a binary vector y is equal to the number of positive outcomes out of a total of N trials.

+

This can be generalized to other discrete cases (one wouldn’t expect continuous observations to be duplicated if they are random). Suppose there are only \(K\) possible discrete outcomes, \(z_1, \dotsc, z_K\), but there are \(N\) observations, where \(N\) is much larger than \(K\). If \(f_k\) is the frequency of outcome \(z_k\), then the entire likelihood with distribution foo can be coded as follows.

+
for (k in 1:K) {
+  target += f[k] * foo_lpmf(z[k] | ...);
+}
+

where the ellipses are the parameters of the log probability mass function for distribution foo (there’s no distribution called “foo”; this is just a placeholder for any discrete distribution name).

+

The resulting program looks like a “weighted” regression, but here the weights f[k] are counts and thus sufficient statistics for the PMF and simply amount to an alternative, more efficient coding of the same likelihood. For efficiency, the frequencies f[k] should be counted once in the transformed data block and stored.

+

The same trick works for combining multiple binomial observations.

+
+
+

Normal sufficient statistics

+

Consider the following Stan model for fitting a normal distribution to data.

+
data {
+  int N;
+  vector[N] y;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(mu, sigma);
+}
+

With the vectorized form used for y, Stan is clever enough to only evaluate log(sigma) once, but it still has to evaluate the normal for all of y[1] to y[N], which involves adding up all the squared differences from the mean and then dividing by sigma squared.

+

An equivalent density to the one above (up to normalizing constants that do not depend on parameters), is given in the following Stan program.

+
data {
+  int N;
+  vector[N] y;
+}
+transformed data {
+  real mean_y = mean(y);
+  real<lower=0> var_y = variance(y);
+  real nm1_over2 = 0.5 * (N - 1);
+  real sqrt_N = sqrt(N);
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  mean_y ~ normal(mu, sigma / sqrt_N);
+  var_y ~ gamma(nm1_over2, nm1_over2 / sigma^2);
+}
+

The data and parameters are the same in this program as in the first. The second version adds a transformed data block to compute the mean and variance of the data, which are the sufficient statistics here. These are stored along with two other useful constants. Then the program can define distributions over the mean and variance, both of which are scalars here.

+

The original Stan program and this one define the same model in the sense that they define the same log density up to a constant additive term that does not depend on the parameters. The priors on mu and sigma are both improper, but proper priors could be added as additional statements in the model block without affecting the sufficiency.

+

This transform explicitly relies on aggregating the data. Using this trick on parameters leads to more computation than just computing the normal log density, even before accounting for the non-linear change of variables in the variance.

+
+
+

Poisson sufficient statistics

+

The Poisson distribution is the easiest case, because the sum of observations is sufficient. Specifically, we can replace

+
y ~ poisson(lambda);
+

with

+
sum(y) ~ poisson(size(y) * lambda);
+

This will work even if y is a parameter vector because no Jacobian adjustment is required for summation.

+
+
+
+

Aggregating common subexpressions

+

If an expression is calculated once, the value should be saved and reused wherever possible. That is, rather than using exp(theta) in multiple places, declare a local variable to store its value and reuse the local variable.

+

Another case that may not be so obvious is with two multilevel parameters, say a[ii[n]] + b[jj[n]]. If a and b are small (i.e., do not have many levels), then a table a_b of their sums can be created, with

+
matrix[size(a), size(b)] a_b;
+for (i in 1:size(a)) {
+  for (j in 1:size(b)) {
+    a_b[i, j] = a[i] + b[j];
+  }
+}
+

Then the sum can be replaced with a_b[ii[n], jj[n]].

+
+
+

Exploiting conjugacy

+

Continuing the model from the previous section, the conjugacy of the beta prior and binomial distribution allow the model to be further optimized to the following equivalent form.

+
theta ~ beta(alpha + sum(y), beta + N - sum(y));
+

To make the model even more efficient, a transformed data variable defined to be sum(y) could be used in the place of sum(y).

+
+
+

Standardizing predictors

+

Standardizing the data so that all predictors have a zero sample mean and unit sample variance has the following potential benefits:

+
    +
  • It helps in faster convergence of MCMC chains.
  • +
  • It makes the model less sensitive to the specifics of the parameterization.
  • +
  • It aids in the interpretation and comparison of the importance of coefficients across different predictors.
  • +
+

When there are large differences between the units and scales of the predictors, standardizing the predictors is especially useful. This section illustrates the principle for a simple linear regression.

+

Suppose that \(y = (y_1,\dotsc,y_N)\) is a vector of \(N\) outcomes and \(x = (x_1,\dotsc,x_N)\) the corresponding vector of \(N\) predictors. A simple linear regression involving an intercept coefficient \(\alpha\) and slope coefficient \(\beta\) can be expressed as \[ +y_n = \alpha + \beta x_n + \epsilon_n, +\] where \[ +\epsilon_n \sim \textsf{normal}(0,\sigma). +\]

+

If \(x\) has very large or very small values or if the mean of the values is far away from 0 (on the scale of the values), then it can be more efficient to standardize the predictor values \(x_n\). First the elements of \(x\) are zero-centered by subtracting the mean, then scaled by dividing by the standard deviation.

+

The mean of \(x\) is given by:

+

\[ +mean_x = \frac{1}{N} \sum_{n=1}^{N} x_n +\]

+

The standard deviation of \(x\) is calculated as: \[ +sd_x = {\left({\frac{1}{N} \sum_{n=1}^{N} (x_n - mean_x)^2}\right)}^{1/2} +\]

+

With these, we compute the \(z\), the standardized predictors

+

\[ +z_n = \frac{x_n - mean_x}{sd_x} +\]

+

where \(z_n\) is the standardized value corresponding to \(x_n\).

+

The inverse transform is defined by reversing the two normalization steps, first rescaling by the same deviation and relocating by the sample mean.

+

\[ +x_n = z_n sd_x + mean_x +\]

+

Standardizing the predictors standardizes the scale of the variables, and hence the scale of the priors.

+

Consider the following initial model.

+
data {
+  int<lower=0> N;
+  vector[N] y;
+  vector[N] x;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  // priors
+  alpha ~ normal(0, 10);
+  beta ~ normal(0, 10);
+  sigma ~ normal(0, 5);
+  // likelihood
+  y ~ normal(x * beta + alpha, sigma);
+}
+

The data block for the standardized model is identical. The mean and standard deviation of the data are defined in the transformed data block, along with the standardized predictors.

+
data {
+  int<lower=0> N;
+  vector[N] y;
+  vector[N] x;
+}
+transformed data {
+  real mean_x = mean(x);
+  real sd_x = sd(x);
+  vector[N] x_std = (x - mean_x) / sd_x;
+}
+parameters {
+  real alpha_std;
+  real beta_std;
+  real<lower=0> sigma_std;
+}
+model {
+  alpha_std ~ normal(0, 10);
+  beta_std ~ normal(0, 10);
+  sigma_std ~ normal(0, 5);
+  y ~ normal(x_std * beta_std + alpha_std, sigma_std);
+}
+

The parameters are renamed to indicate that they aren’t the “natural” parameters. The transformed data x_std is defined in terms of variables mean_x and sd_x; by declaring these variables in the transformed data block, they will be available in all following blocks, and therefore can be used in the generated quantities block to record the “natural” parameters alpha and beta.

+

The fairly diffuse priors on the coefficients are the same. These could have been transformed as well, but here they are left as is, because the scales make sense as diffuse priors for standardized data.

+

The original regression \[ +y_n = \alpha + \beta x_n + \epsilon_n +\] has been transformed to a regression on the standardized data variable \(z\),

+

\[ +y_n = \alpha' + \beta' z_n + \epsilon_n. +\]

+

The likelihood is specified in terms of the standardized parameters. The original slope \(\beta\) is the standardized slope \(\beta'\) scaled by the inverse of the standard deviation of \(x\). The original intercept \(\alpha\) is the intercept from the standardized model \(\alpha'\), corrected for the effect of scaling and centering \(x\). Thus, the formulas to retrieve \(\alpha\) and \(\beta\) from \(\alpha'\) and \(\beta'\) are:

+

\[\begin{align*} +\beta = \frac{\beta'}{\sigma_x} \\ +\alpha = \alpha' - \beta' \frac{\mu_x}{\sigma_x} +\end{align*}\]

+

These recovered parameter values on the original scales can be calculated within Stan using a generated quantities block following the model block,

+
generated quantities {
+  real beta = beta_std / sd_x;
+  real alpha = alpha_std - beta_std * mean_x / sd_x;
+
+}
+

When there are multiple real-valued predictors, i.e., when K is the number of predictors, x is an \(N \times K\) matrix, and beta ia \(K\)-vector of coefficients, then x * beta is an \(N\)-vector of predictions, one for each of the \(N\) data items. When \(K \ll N\) the QR reparameterization is recommended for linear and generalized linear models unless there is an informative prior on the location of \(\beta\).

+
+

Standard normal distribution

+

For many applications on the standard scale, normal distributions with location zero and scale one will be used. In these cases, it is more efficient to use

+
y ~ std_normal();
+

than to use

+
y ~ normal(0, 1);
+

because the subtraction of the location and division by the scale cancel, as does subtracting the log of the scale.

+
+
+
+

Using map-reduce

+

The map-reduce operation, even without multi-core MPI support, can be used to make programs more scalable and also more efficient. See the map-reduce chapter for more information on implementing map-reduce operations.

+

Map-reduce allows greater scalability because only the Jacobian of the mapped function for each shard is stored. The Jacobian consists of all of the derivatives of the outputs with respect to the parameters. During execution, the derivatives of the shard are evaluated using nested automatic differentiation. As often happens with modern CPUs, reduced memory overhead leads to increased memory locality and faster execution. The Jacobians are all computed with local memory and their outputs stored contiguously in memory.

+ + + +
+
+ + + Back to top

References

+
+Betancourt, Michael, and Mark Girolami. 2013. Hamiltonian Monte Carlo for Hierarchical Models.” arXiv 1312.0906. http://arxiv.org/abs/1312.0906. +
+
+Neal, Radford M. 2003. “Slice Sampling.” Annals of Statistics 31 (3): 705–67. +
+
+Papaspiliopoulos, Omiros, Gareth O. Roberts, and Martin Sköld. 2007. “A General Framework for the Parametrization of Hierarchical Models.” Statistical Science 22 (1): 59–73. +
+
+Zyczkowski, K., and H. J. Sommers. 2001. “Induced Measures in the Space of Mixed Quantum States.” Journal of Physics A: Mathematical and General 34 (35): 7111. +
+

Footnotes

+ +
    +
  1. This parameterization came to be known on our mailing lists as the “Matt trick” after Matt Hoffman, who independently came up with it while fitting hierarchical models in Stan.↩︎

  2. +
  3. Stan uses its own arena-based allocation, so allocation and deallocation are faster than with a raw call to new.↩︎

  4. +
  5. Future versions of Stan may remove this inefficiency by more fully exploiting expression templates inside the Eigen C++ matrix library. This will require enhancing Eigen to deal with mixed-type arguments, such as the type double used for constants and the algorithmic differentiation type stan::math::var used for variables.↩︎

  6. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/finite-mixtures.html b/docs/2_39/stan-users-guide/finite-mixtures.html new file mode 100644 index 000000000..b3a240090 --- /dev/null +++ b/docs/2_39/stan-users-guide/finite-mixtures.html @@ -0,0 +1,1659 @@ + + + + + + + + + +Finite Mixtures + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Finite Mixtures

+

Finite mixture models of an outcome assume that the outcome is drawn from one of several distributions, the identity of which is controlled by a categorical mixing distribution. Mixture models typically have multimodal densities with modes near the modes of the mixture components. Mixture models may be parameterized in several ways, as described in the following sections. Mixture models may be used directly for modeling data with multimodal distributions, or they may be used as priors for other parameters.

+
+

Relation to clustering

+

Clustering models, as discussed in the clustering chapter, are just a particular class of mixture models that have been widely applied to clustering in the engineering and machine-learning literature. The normal mixture model discussed in this chapter reappears in multivariate form as the statistical basis for the \(K\)-means algorithm; the latent Dirichlet allocation model, usually applied to clustering problems, can be viewed as a mixed-membership multinomial mixture model.

+
+
+

Latent discrete parameterization

+

One way to parameterize a mixture model is with a latent categorical variable indicating which mixture component was responsible for the outcome. For example, consider \(K\) normal distributions with locations \(\mu_k \in \mathbb{R}\) and scales \(\sigma_k \in (0,\infty)\). Now consider mixing them in proportion \(\lambda\), where \(\lambda_k \geq 0\) and \(\sum_{k=1}^K \lambda_k = 1\) (i.e., \(\lambda\) lies in the unit \(K\)-simplex). For each outcome \(y_n\) there is a latent variable \(z_n\) in \(\{ 1,\dotsc,K \}\) with a categorical distribution parameterized by \(\lambda\), \[ +z_n \sim \textsf{categorical}(\lambda). +\]

+

The variable \(y_n\) is distributed according to the parameters of the mixture component \(z_n\), \[ +y_n \sim \textsf{normal}(\mu_{z[n]},\sigma_{z[n]}). +\]

+

This model is not directly supported by Stan because it involves discrete parameters \(z_n\), but Stan can sample \(\mu\) and \(\sigma\) by summing out the \(z\) parameter as described in the next section.

+
+
+

Summing out the responsibility parameter

+

To implement the normal mixture model outlined in the previous section in Stan, the discrete parameters can be summed out of the model. If \(Y\) is a mixture of \(K\) normal distributions with locations \(\mu_k\) and scales \(\sigma_k\) with mixing proportions \(\lambda\) in the unit \(K\)-simplex, then \[ +p_Y\left(y \mid \lambda, \mu, \sigma \right) += +\sum_{k=1}^K \lambda_k \, \textsf{normal}\left(y \mid \mu_k, \sigma_k\right). +\]

+
+

Log sum of exponentials: linear Sums on the log scale

+

The log sum of exponentials function is used to define mixtures on the log scale. It is defined for two inputs by \[ +\texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}(a, b) = \log \left(\exp(a) + \exp(b)\right). +\]

+

If \(a\) and \(b\) are probabilities on the log scale, then \(\exp(a) + +\exp(b)\) is their sum on the linear scale, and the outer log converts the result back to the log scale; to summarize, log_sum_exp does linear addition on the log scale. The reason to use Stan’s built-in log_sum_exp function is that it can prevent underflow and overflow in the exponentiation, by calculating the result as \[ +\log \left( \exp(a) + \exp(b)\right) += c + \log \left( \exp(a - c) + \exp(b - c) \right), +\] where \(c = \max(a, b)\). In this evaluation, one of the terms, \(a - c\) or \(b - c\), is zero and the other is negative, thus eliminating the possibility of overflow or underflow in the leading term while extracting the most arithmetic precision possible by pulling the \(\max(a, b)\) out of the log-exp round trip.

+

For example, the mixture of \(\textsf{normal}(-1, 2)\) with \(\textsf{normal}(3, 1)\), with mixing proportion \(\lambda = +[0.3,0.7]^{\top}\), can be implemented in Stan as follows.

+
parameters {
+  real y;
+}
+model {
+  target += log_sum_exp(log(0.3) + normal_lpdf(y | -1, 2),
+                        log(0.7) + normal_lpdf(y | 3, 1));
+}
+

The log probability term is derived by taking \[\begin{align*} +\log\, &p\left(y \mid \lambda,\mu,\sigma \right) \\ +&= \log\big( 0.3 \times \textsf{normal}\left(y \mid -1,2 \right) + + 0.7 \times \textsf{normal}\left(y \mid 3,1 \right) \big) \\ +&= \log\bigg( \exp\Big(\log\big(0.3 \times \textsf{normal}\left(y \mid -1,2 \right)\big)\Big) + + \exp\Big(\log\big(0.7 \times \textsf{normal}\left(y \mid 3,1 \right)\big)\Big) \bigg) \\ +&= \texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}\big( + \log(0.3) + \log \textsf{normal}\left(y \mid -1,2 \right), + \log(0.7) + \log \textsf{normal}\left(y \mid 3,1 \right) \big). +\end{align*}\]

+
+
+

Dropping uniform mixture ratios

+

If a two-component mixture has a mixing ratio of 0.5, then the mixing ratios can be dropped, because

+
log_half = log(0.5);
+for (n in 1:N) {
+    target +=
+        log_sum_exp(log_half + normal_lpdf(y[n] | mu[1], sigma[1]),
+                    log_half + normal_lpdf(y[n] | mu[2], sigma[2]));
+}
+

then the \(\log 0.5\) term isn’t contributing to the proportional density, and the above can be replaced with the more efficient version

+
for (n in 1:N) {
+  target += log_sum_exp(normal_lpdf(y[n] | mu[1], sigma[1]),
+                        normal_lpdf(y[n] | mu[2], sigma[2]));
+}
+

The same result holds if there are \(K\) components and the mixing simplex \(\lambda\) is symmetric, i.e., \[ +\lambda = \left( \frac{1}{K}, \dotsc, \frac{1}{K} \right). +\]

+

The result follows from the identity \[ +\texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}(c + a, c + b) += +c + \texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}(a, b) +\] and the fact that adding a constant \(c\) to the log density accumulator has no effect because the log density is only specified up to an additive constant in the first place. There is nothing specific to the normal distribution here; constants may always be dropped from the target.

+
+
+

Recovering posterior mixture proportions

+

The posterior \(p(z_n \mid y_n, \mu, \sigma)\) over the mixture indicator \(z_n +\in 1:K\) is often of interest as \(p(z_n = k \mid y, \mu, \sigma)\) is the posterior probability that that observation \(y_n\) was generated by mixture component \(k\). The posterior can be computed via Bayes’s rule, \[\begin{align*} +\Pr\!\left[z_n = k \mid y_n, \mu, \sigma, \lambda \right] + &\propto p\left(y_n \mid z_n = k, \mu, \sigma\right)\, p\left(z_n = k \mid \lambda\right) \\ + &= \textsf{normal}\left(y_n \mid \mu_k, \sigma_k\right) \cdot \lambda_k. +\end{align*}\]

+

The normalization can be done via summation, because \(z_n \in 1{:}K\) only takes on finitely many values. In detail, \[ +p\left(z_n = k \mid y_n, \mu, \sigma, \lambda \right) = +\frac{p\left(y_n \mid z_n = k, \mu, \sigma \right) \cdot p\left(z_n = k \mid \lambda \right)} + {\sum_{k' = 1}^K p\left(y_n \mid z_n = k', \mu, \sigma \right) + \cdot p\left(z_n = k' \mid \lambda \right)}. +\]

+

On the log scale, the normalized probability is computed as \[\begin{align*} +\log\,&\Pr\!\left[z_n = k \mid y_n, \mu, \sigma, \lambda\right] \\ +&= \log p\left(y_n \mid z_n = k, \mu, \sigma\right) + \log \Pr\!\left[z_n = k \mid \lambda\right] \\ +&\quad - \texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}_{k' = 1}^K \big(\log p\left(y_n \mid z_n = k', \mu, \sigma\right) + + \log p\left(z_n = k' \mid \lambda\right)\big). +\end{align*}\] This can be coded up directly in Stan; the change-point model in the change point section provides an example.

+
+
+

Estimating parameters of a mixture

+

Given the scheme for representing mixtures, it may be moved to an estimation setting, where the locations, scales, and mixture components are unknown. Further generalizing to a number of mixture components specified as data yields the following model.

+
data {
+  int<lower=1> K;          // number of mixture components
+  int<lower=1> N;          // number of data points
+  array[N] real y;         // observations
+}
+parameters {
+  simplex[K] theta;          // mixing proportions
+  ordered[K] mu;             // locations of mixture components
+  vector<lower=0>[K] sigma;  // scales of mixture components
+}
+model {
+  vector[K] log_theta = log(theta);  // cache log calculation
+  sigma ~ lognormal(0, 2);
+  mu ~ normal(0, 10);
+  for (n in 1:N) {
+    vector[K] lps = log_theta;
+    for (k in 1:K) {
+      lps[k] += normal_lpdf(y[n] | mu[k], sigma[k]);
+    }
+    target += log_sum_exp(lps);
+  }
+}
+

The model involves K mixture components and N data points. The mixing proportion parameter theta is declared to be a unit \(K\)-simplex, whereas the component location parameter mu and scale parameter sigma are both defined to be K-vectors.

+

The location parameter mu is declared to be an ordered vector in order to identify the model. This will not affect inferences that do not depend on the ordering of the components as long as the prior for the components mu[k] is symmetric, as it is here (each component has an independent \(\textsf{normal}(0, 10)\) prior). It would even be possible to include a hierarchical prior for the components.

+

The values in the scale array sigma are constrained to be non-negative, and have a weakly informative prior given in the model chosen to avoid zero values and thus collapsing components.

+

The model declares a local array variable lps to be size K and uses it to accumulate the log contributions from the mixture components. The main action is in the loop over data points n. For each such point, the log of \(\theta_k \times +\textsf{normal}\left(y_n \mid \mu_k,\sigma_k\right)\) is calculated and added to the array lps. Then the log probability is incremented with the log sum of exponentials of those values.

+
+
+
+

Vectorizing mixtures

+

There is (currently) no way to vectorize mixture models at the observation level in Stan. This section is to warn users away from attempting to vectorize naively, as it results in a different model. A proper mixture at the observation level is defined as follows, where we assume that lambda, y[n], mu[1], mu[2], and sigma[1], sigma[2] are all scalars and lambda is between 0 and 1.

+
for (n in 1:N) {
+  target += log_sum_exp(log(lambda)
+                          + normal_lpdf(y[n] | mu[1], sigma[1]),
+                        log1m(lambda)
+                          + normal_lpdf(y[n] | mu[2], sigma[2]));
+

or equivalently

+
for (n in 1:N) {
+  target += log_mix(lambda,
+                    normal_lpdf(y[n] | mu[1], sigma[1]),
+                    normal_lpdf(y[n] | mu[2], sigma[2]))
+  };
+

This definition assumes that each observation \(y_n\) may have arisen from either of the mixture components. The density is \[ +p\left(y \mid \lambda, \mu, \sigma\right) += \prod_{n=1}^N \big(\lambda \times \textsf{normal}\left(y_n \mid \mu_1, \sigma_1 \right) + + (1 - \lambda) \times \textsf{normal}\left(y_n \mid \mu_2, \sigma_2 \right)\big). +\]

+

Contrast the previous model with the following (erroneous) attempt to vectorize the model.

+
target += log_sum_exp(log(lambda)
+                        + normal_lpdf(y | mu[1], sigma[1]),
+                      log1m(lambda)
+                        + normal_lpdf(y | mu[2], sigma[2]));
+

or equivalently,

+
target += log_mix(lambda,
+                  normal_lpdf(y | mu[1], sigma[1]),
+                  normal_lpdf(y | mu[2], sigma[2]));
+

This second definition implies that the entire sequence \(y_1, \dotsc, y_n\) of observations comes form one component or the other, defining a different density, \[ +p\left(y \mid \lambda, \mu, \sigma \right) += \lambda \times \prod_{n=1}^N \textsf{normal}\left(y_n \mid \mu_1, \sigma_1\right) ++ (1 - \lambda) \times \prod_{n=1}^N \textsf{normal}\left(y_n \mid \mu_2, \sigma_2\right). +\]

+
+
+

Inferences supported by mixtures

+

In many mixture models, the mixture components are underlyingly exchangeable in the model and thus not identifiable. This arises if the parameters of the mixture components have exchangeable priors and the mixture ratio gets a uniform prior so that the parameters of the mixture components are also exchangeable in the likelihood.

+

We have finessed this basic problem by ordering the parameters. This will allow us in some cases to pick out mixture components either ahead of time or after fitting (e.g., male vs. female, or Democrat vs. Republican).

+

In other cases, we do not care about the actual identities of the mixture components and want to consider inferences that are independent of indexes. For example, we might only be interested in posterior predictions for new observations.

+
+

Mixtures with unidentifiable components

+

As an example, consider the normal mixture from the previous section, which provides an exchangeable prior on the pairs of parameters \((\mu_1, \sigma_1)\) and \((\mu_2, \sigma_2)\), \[\begin{align*} +\mu_1, \mu_2 &\sim \textsf{normal}(0, 10) \\ +\sigma_1, \sigma_2 &\sim \textsf{halfnormal}(0, 10) \\ +\end{align*}\]

+

The prior on the mixture ratio is uniform, \[ +\lambda \sim \textsf{uniform}(0, 1), +\] so that with the likelihood \[ +p\left(y_n \mid \mu, \sigma\right) += \lambda \times \textsf{normal}\left(y_n \mid \mu_1, \sigma_1\right) ++ (1 - \lambda) \times \textsf{normal}\left(y_n \mid \mu_2, \sigma_2\right), +\] the joint distribution \(p(y, \mu, \sigma, \lambda)\) is exchangeable in the parameters \((\mu_1, \sigma_1)\) and \((\mu_2, \sigma_2)\) with \(\lambda\) flipping to \(1 - \lambda\).1

+
+
+

Inference under label switching

+

In cases where the mixture components are not identifiable, it can be difficult to diagnose convergence of sampling or optimization algorithms because the labels will switch, or be permuted, in different MCMC chains or different optimization runs. Luckily, posterior inferences which do not refer to specific component labels are invariant under label switching and may be used directly. This subsection considers a pair of examples.

+
+

Posterior predictive distribution

+

Posterior predictive distribution for a new observation \(\tilde{y}\) given the complete parameter vector \(\theta\) will be \[ +p(\tilde{y} \mid y) += +\int_{\theta} +p(\tilde{y} \mid \theta) +\, p(\theta \mid y) +\, \textsf{d}\theta. +\]

+

The normal mixture example from the previous section, with \(\theta = +(\mu, \sigma, \lambda)\), shows that the model returns the same density under label switching and thus the predictive inference is sound. In Stan, that predictive inference can be done either by computing \(p(\tilde{y} \mid y)\), which is more efficient statistically in terms of effective sample size, or simulating draws of \(\tilde{y}\), which is easier to plug into other inferences. Both approaches can be coded directly in the generated quantities block of the program. Here’s an example of the direct (non-sampling) approach.

+
data {
+  int<lower=0> N_tilde;
+  vector[N_tilde] y_tilde;
+  // ...
+}
+generated quantities {
+  vector[N_tilde] log_p_y_tilde;
+  for (n in 1:N_tilde) {
+    log_p_y_tilde[n]
+      = log_mix(lambda,
+                normal_lpdf(y_tilde[n] | mu[1], sigma[1])
+                normal_lpdf(y_tilde[n] | mu[2], sigma[2]));
+  }
+}
+

It is a bit of a bother afterwards, because the logarithm function isn’t linear and hence doesn’t distribute through averages (Jensen’s inequality shows which way the inequality goes). The right thing to do is to apply log_sum_exp of the posterior draws of log_p_y_tilde. The average log predictive density is then given by subtracting log(N_new).

+
+
+

Clustering and similarity

+

Often a mixture model will be applied to a clustering problem and there might be two data items \(y_i\) and \(y_j\) for which there is a question of whether they arose from the same mixture component. If we take \(z_i\) and \(z_j\) to be the component responsibility discrete variables, then the quantity of interest is \(z_i = z_j\), which can be summarized as an event probability \[ +\Pr[z_i = z_j \mid y] += +\int_{\theta} +\frac{\sum_{k=0}^1 p(z_i=k, z_j = k, y_i, y_j \mid \theta)} + {\sum_{k=0}^1 \sum_{m=0}^1 p(z_i = k, z_j = m, y_i, y_j \mid \theta)} +\, +p(\theta \mid y) +\, +\textsf{d}\theta. +\]

+

As with other event probabilities, this can be calculated in the generated quantities block either by sampling \(z_i\) and \(z_j\) and using the indicator function on their equality, or by computing the term inside the integral as a generated quantity. As with posterior predictive distribute, working in expectation is more statistically efficient than sampling.

+
+
+
+
+

Zero-inflated and hurdle models

+

Zero-inflated and hurdle models both provide mixtures of a Poisson and Bernoulli probability mass function to allow more flexibility in modeling the probability of a zero outcome. Zero-inflated models, as defined by Lambert (1992), add additional probability mass to the outcome of zero. Hurdle models, on the other hand, are formulated as pure mixtures of zero and non-zero outcomes.

+

Zero inflation and hurdle models can be formulated for discrete distributions other than the Poisson. Zero inflation does not work for continuous distributions in Stan because of issues with derivatives; in particular, there is no way to add a point mass to a continuous distribution, such as zero-inflating a normal as a regression coefficient prior. Hurdle models can be formulated as combination of point mass at zero and continuous distribution for positive values.

+
+

Zero inflation

+

Consider the following example for zero-inflated Poisson distributions. There is a probability \(\theta\) of observing a zero, and a probability \(1 - \theta\) of observing a count with a \(\textsf{Poisson}(\lambda)\) distribution (now \(\theta\) is being used for mixing proportions because \(\lambda\) is the traditional notation for a Poisson mean parameter). Given the probability \(\theta\) and the intensity \(\lambda\), the distribution for \(y_n\) can be written as \[\begin{align*} +y_n & = 0 & \quad\text{with probability } \theta, \text{ and}\\ +y_n & \sim \textsf{Poisson}(y_n \mid \lambda) & \quad\text{with probability } 1-\theta. +\end{align*}\]

+

Stan does not support conditional distribution statements (with ~) conditional on some parameter, and we need to consider the corresponding likelihood \[ +p(y_n \mid \theta,\lambda) += +\begin{cases} +\theta + (1 - \theta) \times \textsf{Poisson}(0 \mid \lambda) & \quad\text{if } y_n = 0, \text{ and}\\ +(1-\theta) \times \textsf{Poisson}(y_n \mid \lambda) &\quad\text{if } y_n > 0. +\end{cases} +\] The log likelihood can be coded directly in Stan (with target +=) as follows.

+
data {
+  int<lower=0> N;
+  array[N] int<lower=0> y;
+}
+parameters {
+  real<lower=0, upper=1> theta;
+  real<lower=0> lambda;
+}
+model {
+  for (n in 1:N) {
+    if (y[n] == 0) {
+      target += log_sum_exp(log(theta),
+                            log1m(theta)
+                              + poisson_lpmf(y[n] | lambda));
+    } else {
+      target += log1m(theta)
+                  + poisson_lpmf(y[n] | lambda);
+    }
+  }
+}
+

The log1m(theta) computes log(1-theta), but is more computationally stable. The log_sum_exp(lp1,lp2) function adds the log probabilities on the linear scale; it is defined to be equal to log(exp(lp1) + exp(lp2)), but is more computationally stable and faster.

+
+

Optimizing the zero-inflated Poisson model

+

The code given above to compute the zero-inflated Poisson redundantly calculates all of the Bernoulli terms and also poisson_lpmf(0 | lambda) every time the first condition body executes. The use of the redundant terms is conditioned on y, which is known when the data are read in. This allows the transformed data block to be used to compute some more convenient terms for expressing the log density each iteration.

+

The number of zero cases is computed and handled separately. Then the nonzero cases are collected into their own array for vectorization. The number of zeros is required to declare y_nonzero, so it must be computed in a function.

+
functions {
+  int num_zeros(array[] int y) {
+    int sum = 0;
+    for (n in 1:size(y)) {
+      sum += (y[n] == 0);
+    }
+    return sum;
+  }
+}
+// ...
+transformed data {
+  int<lower=0> N_zero = num_zeros(y);
+  array[N - N_zero] int<lower=1> y_nonzero;
+  int N_nonzero = 0;
+  for (n in 1:N) {
+    if (y[n] == 0) continue;
+    N_nonzero += 1;
+    y_nonzero[N_nonzero] = y[n];
+  }
+}
+// ...
+model {
+  // ...
+   target
+     += N_zero
+          * log_sum_exp(log(theta),
+                        log1m(theta)
+                          + poisson_lpmf(0 | lambda));
+   target += N_nonzero * log1m(theta);
+   target += poisson_lpmf(y_nonzero | lambda);
+  // ...
+}
+

The boundary conditions of all zeros and no zero outcomes is handled appropriately; in the vectorized case, if y_nonzero is empty, N_nonzero will be zero, and the last two target increment terms will add zeros.

+
+
+
+

Hurdle models

+

The hurdle model is similar to the zero-inflated model, but more flexible in that the zero outcomes can be deflated as well as inflated. Given the probability \(\theta\) and the intensity \(\lambda\), the distribution for \(y_n\) can be written as [ \[\begin{align*} +y_n & = 0 \quad\text{with probability } \theta, \text{ and}\\ +y_n & \sim \textsf{Poisson}_{x\neq 0}(y_n \mid \lambda) \quad\text{with probability } 1-\theta, +\end{align*}\] ] Where \(\textsf{Poisson}_{x\neq 0}\) is a truncated Poisson distribution, truncated at \(0\).

+

The corresponding likelihood function for the hurdle model is defined by \[ +p(y\mid\theta,\lambda) += +\begin{cases} +\theta &\quad\text{if } y = 0, \text{ and}\\ +(1 - \theta) + \frac{\displaystyle \textsf{Poisson}(y \mid \lambda)} + {\displaystyle 1 - \textsf{PoissonCDF}(0 \mid \lambda)} +&\quad\text{if } y > 0, +\end{cases} +\] where \(\textsf{PoissonCDF}\) is the cumulative distribution function for the Poisson distribution and and \(1 - \textsf{PoissonCDF}(0 \mid \lambda)\) is the relative normalization term for the truncated Poisson (truncated at \(0\)).

+

The hurdle model is even more straightforward to program in Stan, as it does not require an explicit mixture.

+
if (y[n] == 0) {
+  target += log(theta);
+} else {
+  target += log1m(theta) + poisson_lpmf(y[n] | lambda)
+            - poisson_lccdf(0 | lambda));
+}
+

Julian King pointed out that because \[\begin{align*} +\log \left( 1 - \textsf{PoissonCDF}(0 \mid \lambda) \right) + &= \log \left( 1 - \textsf{Poisson}(0 \mid \lambda) \right) \\ + &= \log(1 - \exp(-\lambda)) +\end{align*}\] the CCDF in the else clause can be replaced with a simpler expression.

+
target += log1m(theta) + poisson_lpmf(y[n] | lambda)
+          - log1m_exp(-lambda));
+

The resulting code is about 15% faster than the code with the CCDF.

+

This is an example where collecting counts ahead of time can also greatly speed up the execution speed without changing the density. For data size \(N=200\) and parameters \(\theta=0.3\) and \(\lambda = 8\), the speedup is a factor of 10; it will be lower for smaller \(N\) and greater for larger \(N\); it will also be greater for larger \(\theta\).

+

To achieve this speedup, it helps to have a function to count the number of non-zero entries in an array of integers,

+
functions {
+  int num_zero(array[] int y) {
+    int nz = 0;
+    for (n in 1:size(y)) {
+      if (y[n] == 0) {
+        nz += 1;
+      }
+    }
+    return nz;
+  }
+}
+

Then a transformed data block can be used to store the sufficient statistics,

+
transformed data {
+  int<lower=0, upper=N> N0 = num_zero(y);
+  int<lower=0, upper=N> Ngt0 = N - N0;
+  array[N - num_zero(y)] int<lower=1> y_nz;
+  {
+    int pos = 1;
+    for (n in 1:N) {
+      if (y[n] != 0) {
+        y_nz[pos] = y[n];
+        pos += 1;
+      }
+    }
+  }
+}
+

The model block is then reduced to three statements.

+
model {
+  N0 ~ binomial(N, theta);
+  y_nz ~ poisson(lambda);
+  target += -Ngt0 * log1m_exp(-lambda);
+}
+

The first statement accounts for the Bernoulli contribution to both the zero and non-zero counts. The second line is the Poisson contribution from the non-zero counts, which is now vectorized. Finally, the normalization for the truncation is a single line, so that the expression for the log CCDF at 0 isn’t repeated. Also note that the negation is applied to the constant Ngt0; whenever possible, leave subexpressions constant because then gradients need not be propagated until a non-constant term is encountered.

+
+
+
+

Priors and effective data size in mixture models

+

Suppose we have a two-component mixture model with mixing rate \(\lambda \in (0, 1)\). Because the likelihood for the mixture components is proportionally weighted by the mixture weights, the effective data size used to estimate each of the mixture components will also be weighted as a fraction of the overall data size. Thus although there are \(N\) observations, the mixture components will be estimated with effective data sizes of \(\theta \, N\) and \((1 - \theta) +\, N\) for the two components for some \(\theta \in (0, 1)\). The effective weighting size is determined by posterior responsibility, not simply by the mixing rate \(\lambda\).

+
+

Comparison to model averaging

+

In contrast to mixture models, which create mixtures at the observation level, model averaging creates mixtures over the posteriors of models separately fit with the entire data set. In this situation, the priors work as expected when fitting the models independently, with the posteriors being based on the complete observed data \(y\).

+

If different models are expected to account for different observations, we recommend building mixture models directly. If the models being mixed are similar, often a single expanded model will capture the features of both and may be used on its own for inferential purposes (estimation, decision making, prediction, etc.). For example, rather than fitting an intercept-only regression and a slope-only regression and averaging their predictions, even as a mixture model, we would recommend building a single regression with both a slope and an intercept. Model complexity, such as having more predictors than data points, can be tamed using appropriately regularizing priors. If computation becomes a bottleneck, the only recourse can be model averaging, which can be calculated after fitting each model independently (see Hoeting et al. (1999) and Gelman et al. (2013) for theoretical and computational details).

+ + + +
+
+
+ + + Back to top

References

+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Hoeting, Jennifer A., David Madigan, Adrian E Raftery, and Chris T. Volinsky. 1999. “Bayesian Model Averaging: A Tutorial.” Statistical Science 14 (4): 382–417. +
+
+Lambert, Diane. 1992. “Zero-Inflated Poisson Regression, with an Application to Defects in Manufacturing.” Technometrics 34 (1). +
+

Footnotes

+ +
    +
  1. Imposing a constraint such as \(\theta < 0.5\) will resolve the symmetry, but fundamentally changes the model and its posterior inferences.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/floating-point.html b/docs/2_39/stan-users-guide/floating-point.html new file mode 100644 index 000000000..ceb78bfdf --- /dev/null +++ b/docs/2_39/stan-users-guide/floating-point.html @@ -0,0 +1,1387 @@ + + + + + + + + + +Floating Point Arithmetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Floating Point Arithmetic

+

Computers approximate real values in \(\mathbb{R}\) using a fixed number of bits. This chapter explains how this is done and why it is important for writing robust Stan (and other numerical) programs. The subfield of computer science devoted to studying how real arithmetic works on computers is called numerical analysis.

+
+

Floating-point representations

+

Stan’s arithmetic is implemented using double-precision arithmetic. The behavior of most1 modern computers follows the floating-point arithmetic, IEEE Standard for Floating-Point Arithmetic (IEEE 754).

+
+

Finite values

+

The double-precision component of the IEEE 754 standard specifies the representation of real values using a fixed pattern of 64 bits (8 bytes). All values are represented in base two (i.e., binary). The representation is divided into two signed components:

+
    +
  • significand (53 bits): base value representing significant digits

  • +
  • exponent (11 bits): power of two multiplied by the base

  • +
+

The value of a finite floating point number is

+

\[ +v = (-1)^s \times c \, 2^q +\]

+
+
+

Normality

+

A normal floating-point value does not use any leading zeros in its significand; subnormal numbers may use leading zeros. Not all I/O systems support subnormal numbers.

+
+
+

Ranges and extreme values

+

There are some reserved exponent values so that legal exponent values range between\(-(2^{10}) + 2 = -1022\) and \(2^{10} - 1 = 1023\). Legal significand values are between \(-2^{52}\) and \(2^{52} - 1\). Floating point allows the representation of both really big and really small values. Some extreme values are

+
    +
  • largest normal finite number: \(\approx 1.8 \times 10^{308}\)

  • +
  • largest subnormal finite number: \(\approx 2.2 \times 10^{308}\)

  • +
  • smallest positive normal number: \(\approx 2.2 \times 10^{-308}\)

  • +
  • smallest positive subnormal number: \(\approx 4.9 \times 10^{-324}\)

  • +
+
+
+

Signed zero

+

Because of the sign bit, there are two ways to represent zero, often called “positive zero” and “negative zero”. This distinction is irrelevant in Stan (as it is in R), because the two values are equal (i.e., 0 == -0 evaluates to true).

+
+
+

Not-a-number values

+

A specially chosen bit pattern is used for the not-a-number value (often written as NaN in programming language output, including Stan’s).

+

Stan provides a value function not_a_number() that returns this special not-a-number value. It is meant to represent error conditions, not missing values. Usually when not-a-number is an argument to a function, the result will not-a-number if an exception (a rejection in Stan) is not raised.

+

Stan also provides a test function is_nan(x) that returns 1 if x is not-a-number and 0 otherwise.

+

Not-a-number values propagate under almost all mathematical operations. For example, all of the built-in binary arithmetic operations (addition, subtraction, multiplication, division, negation) return not-a-number if any of their arguments are not-a-number. The built-in functions such as log and exp have the same behavior, propagating not-a-number values.

+

Most of Stan’s built-in functions will throw exceptions (i.e., reject) when any of their arguments is not-a-number.

+

Comparisons with not-a-number always return false, up to and including comparison with itself. That is, not_a_number() == not_a_number() somewhat confusingly returns false. That is why there is a built-in is_nan() function in Stan (and in C++). The only exception is negation, which remains coherent. This means not_a_number() != not_a_number() returns true.

+

Undefined operations often return not-a-number values. For example, sqrt(-1) will evaluate to not-a-number.

+
+
+

Positive and negative infinity

+

There are also two special values representing positive infinity (\(\infty)\) and negative infinity (\(-\infty\)). These are not as pathological as not-a-number, but are often used to represent error conditions such as overflow and underflow. For example, rather than raising an error or returning not-a-number, log(0) evaluates to negative infinity. Exponentiating negative infinity leads back to zero, so that 0 == exp(log(0)). Nevertheless, this should not be done in Stan because the chain rule used to calculate the derivatives will attempt illegal operations and return not-a-number.

+

There are value functions positive_infinity() and negative_infinity() as well as a test function is_inf().

+

Positive and negative infinity have the expected comparison behavior, so that negative_infinity() < 0 evaluates to true (represented with 1 in Stan). Also, negating positive infinity leads to negative infinity and vice-versa.

+

Positive infinity added to either itself or a finite value produces positive infinity. Negative infinity behaves the same way. However, attempts to subtract positive infinity from itself produce not-a-number, not zero. Similarly, attempts to divide infinite values results in a not-a-number value.

+
+
+
+

Literals: decimal and scientific notation

+

In programming languages such as Stan, numbers may be represented in standard decimal (base 10) notation. For example, 2.39 or -1567846.276452. Remember there is no point in writing more than 16 significant digits as they cannot be represented. A number may be coded in Stan using scientific notation, which consists of a signed decimal representation of a base and a signed integer decimal exponent. For example, 36.29e-3 represents the number \(36.29 \times +10^{-3}\), which is the same number as is represented by 0.03629.

+
+
+

Arithmetic precision

+

The choice of significand provides \(\log_{10} 2^{53} \approx 15.95\) decimal (base 10) digits of arithmetic precision. This is just the precision of the floating-point representation. After several operations are chained together, the realized arithmetic precision is often much lower.

+
+

Rounding and probabilities

+

In practice, the finite amount of arithmetic precision leads to rounding, whereby a number is represented by the closest floating-point number. For example, with only 16 decimal digits of accuracy,

+
1 + 1e-20 == 1
+

The closest floating point number to \(1 + 10^{-20}\) turns out to be \(1\) itself. By contrast,

+
0 + 1e-20 == 1e-20
+

This highlights the fact that precision depends on scale. Even though 1 + 1e-20 == 1, we have 1e-20 + 1e-20 == 2e-20, as expected.

+

Rounding also manifests itself in a lack of transitivity. In particular, it does not usually hold for three floating point numbers \(a, b, c\) that \((a + b) + c = a + (b + c)\).

+

In statistical applications, problems often manifest in situations where users expect the usual rules of real-valued arithmetic to hold. Suppose we have a lower triangular matrix \(L\) with strictly positive diagonal, so that it is the Cholesky factor of a positive-definite matrix \(L \, L^{\top}\). In practice, rounding and loss of precision may render the result \(L \, L^{\top}\) neither symmetric nor positive definite.

+

In practice, care must be taken to defend against rounding. For example, symmetry may be produced by adding \(L \, L^{\top}\) with its transpose and dividing by two, or by copying the lower triangular portion into the upper portion. Positive definiteness may be maintained by adding a small quantity to the diagonal.

+
+
+

Machine precision and the asymmetry of 0 and 1

+

The smallest number greater than zero is roughly \(0 + 10^{-323}\). The largest number less than one is roughly \(1 - 10^{-15.95}\). The asymmetry is apparent when considering the representation of that largest number smaller than one—the exponent is of no help, and the number is represented as the binary equivalent of \(0.9999999999999999\).

+

For this reason, the machine precision is said to be roughly \(10^{-15.95}\). This constant is available as machine_precision() in Stan.

+
+
+

Complementary and epsilon functions

+

Special operations are available to mitigate this problem with numbers rounding when they get close to one. For example, consider the operation log(1 + x) for positive x. When x is small (less than \(10^{-16}\) for double-precision floating point), the sum in the argument will round to 1 and the result will round to zero. To allow more granularity, programming languages provide a library function directly implementing \(f(x) = \log (1 + x)\). In Stan (as in C++), this operation is written as log1p(x). Because x itself may be close to zero, the function log1p(x) can take the logarithm of values very close to one, the results of which are close to zero.

+

Similarly, the complementary cumulative distribution functions (CCDF), defined by \(F^{\complement}_Y(y) = 1 - F_Y(y)\), where \(F_Y\) is the cumulative distribution function (CDF) for the random variable \(Y\). This allows values very close to one to be represented in complementary form.

+
+
+

Catastrophic cancellation

+

Another downside to floating point representations is that subtraction of two numbers close to each other results in a loss of precision that depends on how close they are. This is easy to see in practice. Consider \[\begin{align*} + 1&.23456789012345 \\ +- 1&.23456789012344 \\ += 0&.00000000000001 +\end{align*}\] We start with fifteen decimal places of accuracy in the arguments and are left with a single decimal place of accuracy in the result.

+

Catastrophic cancellation arises in statistical computations whenever we calculate variance for a distribution with small standard deviations relative to its location. When calculating summary statistics, Stan uses Welford’s algorithm for computing variances. This avoids catastrophic cancellation and may also be carried out in a single pass.

+
+
+

Overflow

+

Even though 1e200 may be represented as a double precision floating point value, there is no finite value large enough to represent 1e200 * 1e200. The result of 1e200 * 1e200 is said to overflow. The IEEE 754 standard requires the result to be positive infinity.

+

Overflow is rarely a problem in statistical computations. If it is, it’s possible to work on the log scale, just as for underflow as described below.

+
+
+

Underflow and the log scale

+

When there is no number small enough to represent a result, it is said to underflow. For instance, 1e-200 may be represented, but 1e-200 * 1e-200 underflows so that the result is zero.

+

Underflow is a ubiquitous problem in likelihood calculations, For example, if \(p(y_n \mid \theta) < 0.1\), then \[ +p(y \mid \theta) = \prod_{n=1}^N p(y_n \mid \theta) +\] will underflow as soon as \(N > 350\) or so.

+

To deal with underflow, work on the log scale. Even though \(p(y \mid +\theta)\) can’t be represented, there is no problem representing \[ +\begin{array}{rcl} +\log p(y \mid \theta) +& = & \log \prod_{n=1}^N p(y_n \mid \theta) +\\[4pt] +& = & \sum_{n = 1}^N \log p(y_n \mid \theta) +\end{array} +\]

+

This is why all of Stan’s probability functions operate on the log scale.

+
+
+
+

Log sum of exponentials

+

Working on the log scale, multiplication is converted to addition, \[ +\log (a \cdot b) = \log a + \log b. +\] Thus sequences of multiplication operations can remain on the log scale. But what about addition? Given \(\log a\) and \(\log b\), how do we get \(\log (a + b)\)? Working out the algebra, \[ +\log (a + b) += +\log (\exp(\log a) + \exp(\log b)). +\]

+
+

Log-sum-exp function

+

The nested log of sum of exponentials is so common, it has its own name, “log-sum-exp”, \[ +\textrm{log-sum-exp}(u, v) += +\log (\exp(u) + \exp(v)). +\] so that \[ +\log (a + b) += +\textrm{log-sum-exp}(\log a, \log b). +\]

+

Although it appears this might overflow as soon as exponentiation is introduced, evaluation does not proceed by evaluating the terms as written. Instead, with a little algebra, the terms are rearranged into a stable form, \[ +\textrm{log-sum-exp}(u, v) += +\max(u, v) + \log\big( \exp(u - \max(u, v)) + \exp(v - \max(u, v)) \big). +\]

+

Because the terms inside the exponentiations are \(u - \max(u, v)\) and \(v - \max(u, v)\), one will be zero and the other will be negative. Because the operation is symmetric, it may be assumed without loss of generality that \(u \geq v\), so that \[ +\textrm{log-sum-exp}(u, v) = u + \log\big(1 + \exp(v - u)\big). +\]

+

Although the inner term may itself be evaluated using the built-in function log1p, there is only limited gain because \(\exp(v - u)\) is only near zero when \(u\) is much larger than \(v\), meaning the final result is likely to round to \(u\) anyway.

+

To conclude, when evaluating \(\log (a + b)\) given \(\log a\) and \(\log +b\), and assuming \(\log a > \log b\), return

+

\[ +\log (a + b) = +\log a + \textrm{log1p}\big(\exp(\log b - \log a)\big). +\]

+
+
+

Applying log-sum-exp to a sequence

+

The log sum of exponentials function may be generalized to sequences in the obvious way, so that if \(v = v_1, \ldots, v_N\), then \[\begin{eqnarray*} +\textrm{log-sum-exp}(v) +& = & \log \sum_{n = 1}^N \exp(v_n) +\\[4pt] +& = & \max(v) + \log \sum_{n = 1}^N \exp(v_n - \max(v)). +\end{eqnarray*}\] The exponent cannot overflow because its argument is either zero or negative. This form makes it easy to calculate \(\log (u_1 + \cdots + u_N)\) given only \(\log u_n\).

+
+
+

Calculating means with log-sum-exp

+

An immediate application is to computing the mean of a vector \(u\) entirely on the log scale. That is, given \(\log u\) and returning \(\log \textrm{mean}(u)\). \[\begin{eqnarray*} +\log \left( \frac{1}{N} \sum_{n = 1}^N u_n \right) +& = & \log \frac{1}{N} + \log \sum_{n = 1}^N \exp(\log u_n) +\\[4pt] +& = & -\log N + \textrm{log-sum-exp}(\log u). +\end{eqnarray*}\] where \(\log u = (\log u_1, \ldots, \log u_N)\) is understood elementwise.

+
+
+
+

Comparing floating-point numbers

+

Because floating-point representations are inexact, it is rarely a good idea to test exact inequality. The general recommendation is that rather than testing x == y, an approximate test may be used given an absolute or relative tolerance.

+

Given a positive absolute tolerance of epsilon, x can be compared to y using the conditional

+
abs(x - y) <= epsilon.
+

Absolute tolerances work when the scale of x and y and the relevant comparison is known.

+

Given a positive relative tolerance of epsilon, a typical comparison is

+
2 * abs(x - y) / (abs(x) + abs(y)) <= epsilon.
+ + +
+
+ + + Back to top

Footnotes

+ +
    +
  1. The notable exception is Intel’s optimizing compilers under certain optimization settings.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/for-bugs-users.html b/docs/2_39/stan-users-guide/for-bugs-users.html new file mode 100644 index 000000000..52a424609 --- /dev/null +++ b/docs/2_39/stan-users-guide/for-bugs-users.html @@ -0,0 +1,1450 @@ + + + + + + + + + +Transitioning from BUGS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Transitioning from BUGS

+

From the outside, Stan and BUGS1 are similar—they use statistically-themed modeling languages (which are similar but with some differences; see below), they can be called from R, running some specified number of chains to some specified length, producing posterior simulations that can be assessed using standard convergence diagnostics. This is not a coincidence: in designing Stan: we wanted to keep many of the useful features of Bugs.

+
+

Some differences in how BUGS and Stan work

+
+

BUGS is interpreted, Stan is compiled

+

Stan is compiled in two steps, first a model is translated to templated C++ and then to a platform-specific executable. Stan, unlike BUGS, allows the user to directly program in C++, but we do not describe how to do this in this Stan manual (see the getting started with C++ section of https://mc-stan.org for more information on using Stan directly from C++).

+
+
+

BUGS performs MCMC updating one scalar parameter at a time, Stan uses HMC which moves in the entire space of all the parameters at each step

+

BUGS performs MCMC updating one scalar parameter at a time, (with some exceptions such as JAGS’s implementation of regression and generalized linear models and some conjugate multivariate parameters), using conditional distributions (Gibbs sampling) where possible and otherwise using adaptive rejection sampling, slice sampling, and Metropolis jumping. BUGS figures out the dependence structure of the joint distribution as specified in its modeling language and uses this information to compute only what it needs at each step. Stan moves in the entire space of all the parameters using Hamiltonian Monte Carlo (more precisely, the no-U-turn sampler), thus avoiding some difficulties that occur with one-dimension-at-a-time sampling in high dimensions but at the cost of requiring the computation of the entire log density at each step.

+
+
+

Differences in tuning during warmup

+

BUGS tunes its adaptive jumping (if necessary) during its warmup phase (traditionally referred to as “burn-in”). Stan uses its warmup phase to tune the no-U-turn sampler (NUTS).

+
+
+

The Stan language is directly executable, the BUGS modeling language is not

+

The BUGS modeling language is not directly executable. Rather, BUGS parses its model to determine the posterior density and then decides on a sampling scheme. In contrast, the statements in a Stan model are directly executable: they translate exactly into C++ code that is used to compute the log posterior density (which in turn is used to compute the gradient).

+
+
+

Differences in statement order

+

In BUGS, statements are executed according to the directed graphical model so that variables are always defined when needed. A side effect of the direct execution of Stan’s modeling language is that statements execute in the order in which they are written. For instance, the following Stan program, which sets mu before using it to sample y:

+
mu = a + b * x;
+y ~ normal(mu, sigma);
+

translates to the following C++ code:

+
mu = a + b * x;
+target += normal_lpdf(y | mu, sigma);
+

Contrast this with the following Stan program:

+
y ~ normal(mu, sigma);
+mu = a + b * x;
+

This program is well formed, but is almost certainly a coding error, because it attempts to use mu before it is set. The direct translation to C++ code highlights the potential error of using mu in the first statement:

+
target += normal_lpdf(y | mu, sigma);
+mu = a + b * x;
+

To trap these kinds of errors, variables are initialized to the special not-a-number (NaN) value. If NaN is passed to a log probability function, it will raise a domain exception, which will in turn be reported by the sampler. The sampler will reject the sample out of hand as if it had zero probability.

+
+
+

Stan computes the gradient of the log density, BUGS computes the log density but not its gradient

+

Stan uses its own C++ algorithmic differentiation packages to compute the gradient of the log density (up to a proportion). Gradients are required during the Hamiltonian dynamics simulations within the leapfrog algorithm of the Hamiltonian Monte Carlo and NUTS samplers.

+
+
+

Both BUGS and Stan are semi-automatic

+

Both BUGS and Stan are semi-automatic in that they run by themselves with no outside tuning required. Nevertheless, the user needs to pick the number of chains and number of iterations per chain. We usually pick 4 chains and start with 10 iterations per chain (to make sure there are no major bugs and to approximately check the timing), then go to 100, 1000, or more iterations as necessary. Compared to Gibbs or Metropolis, Hamiltonian Monte Carlo can take longer per iteration (as it typically takes many “leapfrog steps” within each iteration), but the iterations typically have lower autocorrelation. So Stan might work fine with 1000 iterations in an example where BUGS would require 100,000 for good mixing. We recommend monitoring potential scale reduction statistics (\(\hat{R}\)) and the effective sample size to judge when to stop (stopping when \(\hat{R}\) values do not counter-indicate convergence and when high enough effective sample size have been obtained).

+
+
+

Licensing

+

WinBUGS is closed source. OpenBUGS and JAGS are both licensed under the Gnu Public License (GPL), otherwise known as copyleft due to the restrictions it places on derivative works. Stan is licensed under the much more liberal new BSD license.

+
+
+

Interfaces

+

Like WinBUGS, OpenBUGS and JAGS, Stan can be run directly from the command line or through common analytics platforms like R, Python, Julia, MATLAB, Mathematica, and the command line.

+
+
+

Platforms

+

Like OpenBUGS and JAGS, Stan can be run on Linux, Mac, and Windows platforms.

+
+
+
+

Some differences in the modeling languages

+

The BUGS modeling language follows an R-like syntax in which line breaks are meaningful. Stan follows the rules of C, in which line breaks are equivalent to spaces, and each statement ends in a semicolon. For example:

+
y ~ normal(mu, sigma);
+

and

+
for (i in 1:n) y[i] ~ normal(mu, sigma);
+

Or, equivalently (recall that a line break is just another form of whitespace),

+
for (i in 1:n)
+  y[i] ~ normal(mu, sigma);
+

and also equivalently,

+
for (i in 1:n) {
+  y[i] ~ normal(mu, sigma);
+}
+

There’s a semicolon after the model statement but not after the brackets indicating the body of the for loop.

+

In Stan, variables can have names constructed using letters, numbers, and the underscore (_) symbol, but nothing else (and a variable name cannot begin with a number). BUGS variables can also include the dot, or period (.) symbol.

+

In Stan, the second argument to the “normal” function is the standard deviation (i.e., the scale), not the variance (as in Bayesian Data Analysis) and not the inverse-variance (i.e., precision) (as in BUGS). Thus a normal with mean 1 and standard deviation 2 is normal(1,2), not normal(1,4) or normal(1,0.25).

+

Similarly, the second argument to the “multivariate normal” function is the covariance matrix and not the inverse covariance matrix (i.e., the precision matrix) (as in BUGS). The same is true for the “multivariate student” distribution.

+

The distributions have slightly different names:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
BUGSStan
dnormnormal
dbinombinomial
dpoispoisson
+

Stan, unlike BUGS, allows intermediate quantities, in the form of local variables, to be reassigned. For example, the following is legal and meaningful (if possibly inefficient) Stan code.

+
{
+  total = 0;
+  for (i in 1:n) {
+    theta[i] ~ normal(total, sigma);
+    total = total + theta[i];
+  }
+}
+

In BUGS, the above model would not be legal because the variable total is defined more than once. But in Stan, the loop is executed in order, so total is overwritten in each step.

+

Stan uses explicit declarations. Variables are declared with base type integer or real, and vectors, matrices, and arrays have specified dimensions. When variables are bounded, we give that information also. For data and transformed parameters, the bounds are used for error checking. For parameters, the constraints are critical to sampling as they determine the geometry over which the Hamiltonian is simulated.

+

In Stan, variables can be declared as data, transformed data, parameters, transformed parameters, or generated quantities. They can also be declared as local variables within blocks. For more information, see the part of this manual devoted to the Stan programming language and examine at the example models.

+

Stan allows all sorts of tricks with vector and matrix operations which can make Stan models more compact. For example, arguments to probability functions may be vectorized,2 allowing

+
for (i in 1:n) {
+  y[i] ~ normal(mu[i], sigma[i]);
+}
+

to be expressed more compactly as

+
y ~ normal(mu, sigma);
+

The vectorized form is also more efficient because Stan can unfold the computation of the chain rule during algorithmic differentiation.

+

Stan also allows for arrays of vectors and matrices. For example, in a hierarchical model might have a vector of K parameters for each of J groups; this can be declared using

+
array[J] vector[K] theta;
+

Then theta[j] is an expression denoting a K-vector and may be used in the code just like any other vector variable.

+

An alternative encoding would be with a two-dimensional array, as in

+
array[J, K] real theta;
+

The vector version can have some advantages, both in convenience and in computational speed for some operations.

+

A third encoding would use a matrix:

+
matrix[J, K] theta;
+

but in this case, theta[j] is a row vector, not a vector, and accessing it as a vector is less efficient than with an array of vectors. The transposition operator, as in theta[j]', may be used to convert the row vector theta[j] to a (column) vector. Column vector and row vector types are not interchangeable everywhere in Stan; see the function signature declarations in the programming language section of this manual.

+

Stan supports general conditional statements using a standard if-else syntax. For example, a zero-inflated (or -deflated) Poisson mixture model is defined using the if-else syntax as described in the zero inflation section.

+

Stan supports general while loops using a standard syntax. While loops give Stan full Turing equivalent computational power. They are useful for defining iterative functions with complex termination conditions. As an illustration of their syntax, the for-loop

+
model {
+    // ...
+    for (n in 1:N) {
+        // ... do something with n ....
+    }
+}
+

may be recoded using the following while loop.

+
model {
+    int n;
+    // ...
+    n = 1;
+    while (n <= N) {
+        // ... do something with n ...
+        n = n + 1;
+    }
+}
+
+
+

Some differences in the statistical models that are allowed

+

Stan does not yet support declaration of discrete parameters. Discrete data variables are supported. Inference is supported for discrete parameters as described in the mixture and latent discrete parameters chapters of the manual.

+

Stan has some distributions on covariance matrices that do not exist in BUGS, including a uniform distribution over correlation matrices which may be rescaled, and the priors based on C-vines defined in Lewandowski, Kurowicka, and Joe (2009). In particular, the Lewandowski et al. prior allows the correlation matrix to be shrunk toward the unit matrix while the scales are given independent priors.

+

In BUGS you need to define all variables. In Stan, if you declare but don’t define a parameter it implicitly has a flat prior (on the scale in which the parameter is defined). For example, if you have a parameter p declared as

+
real<lower=0, upper=1> p;
+

and then have no distribution statement for p in the model block, then you are implicitly assigning a uniform \([0,1]\) prior on p.

+

On the other hand, if you have a parameter theta declared with

+
real theta;
+

and have no distribution statement for theta in the model block, then you are implicitly assigning an improper uniform prior on \((-\infty,\infty)\) to theta.

+

BUGS models are always proper (being constructed as a product of proper marginal and conditional densities). Stan models can be improper. Here is the simplest improper Stan model:

+
parameters {
+  real theta;
+}
+model { }
+

Although parameters in Stan models may have improper priors, we do not want improper posterior distributions, as we are trying to use these distributions for Bayesian inference. There is no general way to check if a posterior distribution is improper. But if all the priors are proper, the posterior will be proper also.

+

Each statement in a Stan model is directly translated into the C++ code for computing the log posterior. Thus, for example, the following pair of statements is legal in a Stan model:

+
y ~ normal(0,1);
+y ~ normal(2,3);
+

The second line here does not simply overwrite the first; rather, both statements contribute to the density function that is evaluated. The above two lines have the effect of including the product, \(\textsf{normal}(y \mid 0,1) * \textsf{normal}(y \mid 2,3)\), into the density function.

+

For a perhaps more confusing example, consider the following two lines in a Stan model:

+
x ~ normal(0.8 * y, sigma);
+y ~ normal(0.8 * x, sigma);
+

At first, this might look like a joint normal distribution with a correlation of 0.8. But it is not. The above are not interpreted as conditional entities; rather, they are factors in the joint density. Multiplying them gives, \(\textsf{normal}(x \mid 0.8y,\sigma) +\times \textsf{normal}(y \mid 0.8x,\sigma)\), which is what it is (you can work out the algebra) but it is not the joint distribution where the conditionals have regressions with slope 0.8.

+

With censoring and truncation, Stan uses the censored-data or truncated-data likelihood—this is not always done in BUGS. All of the approaches to censoring and truncation discussed in Gelman et al. (2013) and Gelman and Hill (2007) may be implemented in Stan directly as written.

+

Stan, like BUGS, can benefit from human intervention in the form of reparameterization.

+
+
+

Some differences when running from R

+

Stan can be set up from within R using two lines of code. Follow the instructions for running Stan from R on the Stan web site. You don’t need to separately download Stan and RStan. Installing RStan will automatically set up Stan.

+

In practice we typically run the same Stan model repeatedly. If you pass RStan the result of a previously fitted model the model will not need be recompiled. An example is given on the running Stan from R pages available from the Stan web site.

+

When you run Stan, it saves various conditions including starting values, some control variables for the tuning and running of the no-U-turn sampler, and the initial random seed. You can specify these values in the Stan call and thus achieve exact replication if desired. (This can be useful for debugging.)

+

When running BUGS from R, you need to send exactly the data that the model needs. When running RStan, you can include extra data, which can be helpful when playing around with models. For example, if you remove a variable x from the model, you can keep it in the data sent from R, thus allowing you to quickly alter the Stan model without having to also change the calling information in your R script.

+

As in R2WinBUGS and R2jags, after running the Stan model, you can quickly summarize using plot() and print(). You can access the simulations themselves using various extractor functions, as described in the RStan documentation.

+

Various information about the sampler, such as number of leapfrog steps, log probability, and step size, is available through extractor functions. These can be useful for understanding what is going wrong when the algorithm is slow to converge.

+
+
+

The Stan community

+

Stan, like WinBUGS, OpenBUGS, and JAGS, has an active community, which you can access via the user’s mailing list and the developer’s mailing list; see the Stan web site for information on subscribing and posting and to look at archives.

+ + + +
+
+ + + Back to top

References

+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge, United Kingdom: Cambridge University Press. +
+
+Lewandowski, Daniel, Dorota Kurowicka, and Harry Joe. 2009. “Generating Random Correlation Matrices Based on Vines and Extended Onion Method.” Journal of Multivariate Analysis 100: 1989–2001. +
+

Footnotes

+ +
    +
  1. Except where otherwise noted, we use “BUGS” to refer to WinBUGS, OpenBUGS, and JAGS, indiscriminately.↩︎

  2. +
  3. Most distributions have been vectorized, but currently the truncated versions may not exist.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/gaussian-processes.html b/docs/2_39/stan-users-guide/gaussian-processes.html new file mode 100644 index 000000000..4f53b356e --- /dev/null +++ b/docs/2_39/stan-users-guide/gaussian-processes.html @@ -0,0 +1,2113 @@ + + + + + + + + + +Gaussian Processes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Gaussian Processes

+

Gaussian processes are continuous stochastic processes and thus may be interpreted as providing a probability distribution over functions. A probability distribution over continuous functions may be viewed, roughly, as an uncountably infinite collection of random variables, one for each valid input. The generality of the supported functions makes Gaussian priors popular choices for priors in general multivariate (non-linear) regression problems.

+

The defining feature of a Gaussian process is that the joint distribution of the function’s value at a finite number of input points is a multivariate normal distribution. This makes it tractable to both fit models from finite amounts of observed data and make predictions for finitely many new data points.

+

Unlike a simple multivariate normal distribution, which is parameterized by a mean vector and covariance matrix, a Gaussian process is parameterized by a mean function and covariance function. The mean and covariance functions apply to vectors of inputs and return a mean vector and covariance matrix which provide the mean and covariance of the outputs corresponding to those input points in the functions drawn from the process.

+

Gaussian processes can be encoded in Stan by implementing their mean and covariance functions or by using the specialized covariance functions outlined below, and plugging the result into the Gaussian model. This form of model is straightforward and may be used for simulation, model fitting, or posterior predictive inference. A more efficient Stan implementation for the GP with a normally distributed outcome marginalizes over the latent Gaussian process, and applies a Cholesky-factor reparameterization of the Gaussian to compute the likelihood and the posterior predictive distribution analytically.

+

After defining Gaussian processes, this chapter covers the basic implementations for simulation, hyperparameter estimation, and posterior predictive inference for univariate regressions, multivariate regressions, and multivariate logistic regressions. Gaussian processes are general, and by necessity this chapter only touches on some basic models. For more information, see Rasmussen and Williams (2006).

+

Note that fitting Gaussian processes as described below using exact inference by computing Cholesky of the covariance matrix scales cubicly with the size of data. Due to how Stan autodiff is implemented, Stan is also slower than Gaussian process specialized software. It is likely that Gaussian processes using exact inference by computing Cholesky of the covariance matrix with \(N>1000\) are too slow for practical purposes in Stan. There are many approximations to speed-up Gaussian process computation, from which the basis function approaches for 1-3 dimensional \(x\) are easiest to implement in Stan (see, e.g., Riutort-Mayol et al. (2023)).

+
+

Gaussian process regression

+

The data for a multivariate Gaussian process regression consists of a series of \(N\) inputs \(x_1,\dotsc,x_N \in \mathbb{R}^D\) paired with outputs \(y_1,\dotsc,y_N \in \mathbb{R}\). The defining feature of Gaussian processes is that the probability of a finite number of outputs \(y\) conditioned on their inputs \(x\) is Gaussian: \[ +y \sim \textsf{multivariate normal}(m(x), K(x \mid \theta)), +\] where \(m(x)\) is an \(N\)-vector and \(K(x \mid \theta)\) is an \(N \times N\) covariance matrix. The mean function \(m : \mathbb{R}^{N \times D} +\rightarrow \mathbb{R}^{N}\) can be anything, but the covariance function \(K : \mathbb{R}^{N \times D} \rightarrow \mathbb{R}^{N \times N}\) must produce a positive-definite matrix for any input \(x\).1

+

A popular covariance function, which will be used in the implementations later in this chapter, is an exponentiated quadratic function, \[ + K(x \mid \alpha, \rho, \sigma)_{i, j} += \alpha^2 +\exp \left( +- \dfrac{1}{2 \rho^2} \sum_{d=1}^D (x_{i,d} - x_{j,d})^2 +\right) ++ \delta_{i, j} \sigma^2, +\] where \(\alpha\), \(\rho\), and \(\sigma\) are hyperparameters defining the covariance function and where \(\delta_{i, j}\) is the Kronecker delta function with value 1 if \(i = j\) and value 0 otherwise; this test is between the indexes \(i\) and \(j\), not between values \(x_i\) and \(x_j\). This kernel is obtained through a convolution of two independent Gaussian processes, \(f_1\) and \(f_2\), with kernels \[ + K_1(x \mid \alpha, \rho)_{i, j} += \alpha^2 +\exp \left( +- \dfrac{1}{2 \rho^2} \sum_{d=1}^D (x_{i,d} - x_{j,d})^2 +\right) +\] and \[ + K_2(x \mid \sigma)_{i, j} += +\delta_{i, j} \sigma^2. +\]

+

The addition of \(\sigma^2\) on the diagonal is important to ensure the positive definiteness of the resulting matrix in the case of two identical inputs \(x_i = x_j\). In statistical terms, \(\sigma\) is the scale of the noise term in the regression.

+

The hyperparameter \(\rho\) is the length-scale, and corresponds to the frequency of the functions represented by the Gaussian process prior with respect to the domain. Values of \(\rho\) closer to zero lead the GP to represent high-frequency functions, whereas larger values of \(\rho\) lead to low-frequency functions. The hyperparameter \(\alpha\) is the marginal standard deviation. It controls the magnitude of the range of the function represented by the GP. If you were to take the standard deviation of many draws from the GP \(f_1\) prior at a single input \(x\) conditional on one value of \(\alpha\) one would recover \(\alpha\).

+

The only term in the squared exponential covariance function involving the inputs \(x_i\) and \(x_j\) is their vector difference, \(x_i - x_j\). This produces a process with stationary covariance in the sense that if an input vector \(x\) is translated by a vector \(\epsilon\) to \(x + +\epsilon\), the covariance at any pair of outputs is unchanged, because \(K(x \mid \theta) = K(x + \epsilon \mid \theta)\).

+

The summation involved is just the squared Euclidean distance between \(x_i\) and \(x_j\) (i.e., the squared \(L_2\) norm of their difference, \(x_i - +x_j\)). This results in support for smooth functions in the process. The amount of variation in the function is controlled by the free hyperparameters \(\alpha\), \(\rho\), and \(\sigma\).

+

Changing the notion of distance from Euclidean to taxicab distance (i.e., an \(L_1\) norm) changes the support to functions which are continuous but not smooth.

+
+
+

Simulating from a Gaussian process

+

It is simplest to start with a Stan model that does nothing more than simulate draws of functions \(f\) from a Gaussian process. In practical terms, the model will draw values \(y_n = f(x_n)\) for finitely many input points \(x_n\).

+

The Stan model defines the mean and covariance functions in a transformed data block and then samples outputs \(y\) in the model using a multivariate normal distribution. To make the model concrete, the squared exponential covariance function described in the previous section will be used with hyperparameters set to \(\alpha^2 = 1\), \(\rho^2 = 1\), and \(\sigma^2 = 0.1\), and the mean function \(m\) is defined to always return the zero vector, \(m(x) = \textbf{0}\). Consider the following implementation of a Gaussian process simulator.

+
data {
+  int<lower=1> N;
+  array[N] real x;
+}
+transformed data {
+  matrix[N, N] K;
+  vector[N] mu = rep_vector(0, N);
+  for (i in 1:(N - 1)) {
+    K[i, i] = 1 + 0.1;
+    for (j in (i + 1):N) {
+      K[i, j] = exp(-0.5 * square(x[i] - x[j]));
+      K[j, i] = K[i, j];
+    }
+  }
+  K[N, N] = 1 + 0.1;
+}
+parameters {
+  vector[N] y;
+}
+model {
+  y ~ multi_normal(mu, K);
+}
+

The above model can also be written more compactly using the specialized covariance function that implements the exponentiated quadratic kernel.

+
data {
+  int<lower=1> N;
+  array[N] real x;
+}
+transformed data {
+  matrix[N, N] K = gp_exp_quad_cov(x, 1.0, 1.0);
+  vector[N] mu = rep_vector(0, N);
+  for (n in 1:N) {
+    K[n, n] = K[n, n] + 0.1;
+  }
+}
+parameters {
+  vector[N] y;
+}
+model {
+  y ~ multi_normal(mu, K);
+}
+

The input data are just the vector of inputs x and its size N. Such a model can be used with values of x evenly spaced over some interval in order to plot sample draws of functions from a Gaussian process.

+
+

Multivariate inputs

+

Only the input data needs to change in moving from a univariate model to a multivariate model.

+

The only lines that change from the univariate model above are as follows.

+
data {
+  int<lower=1> N;
+  int<lower=1> D;
+  array[N] vector[D] x;
+}
+transformed data {
+  // ...
+}
+

The data are now declared as an array of vectors instead of an array of scalars; the dimensionality D is also declared.

+

In the remainder of the chapter, univariate models will be used for simplicity, but any of the models could be changed to multivariate in the same way as the simple sampling model. The only extra computational overhead from a multivariate model is in the distance calculation.

+
+
+

Cholesky factored and transformed implementation

+

A more efficient implementation of the simulation model can be coded in Stan by relocating, rescaling and rotating an isotropic standard normal variate. Suppose \(\eta\) is an an isotropic standard normal variate \[ +\eta \sim \textsf{normal}(\textbf{0}, \textrm{I}), +\] where \(\textbf{0}\) is an \(N\)-vector of 0 values and \(\textrm{I}\) is the \(N +\times N\) identity matrix. Let \(L\) be the Cholesky decomposition of \(K(x \mid \theta)\), i.e., the lower-triangular matrix \(L\) such that \(LL^{\top} = +K(x \mid \theta)\). Then the transformed variable \(\mu + L\eta\) has the intended target distribution, \[ + \mu + L\eta \sim \textsf{multivariate normal}(\mu(x), K(x \mid \theta)). +\]

+

This transform can be applied directly to Gaussian process simulation.

+

This model has the same data declarations for N and x, and the same transformed data definitions of mu and K as the previous model, with the addition of a transformed data variable for the Cholesky decomposition. The parameters change to the raw parameters sampled from an isotropic standard normal, and the actual parameters are defined in generated quantities.

+
// ...
+transformed data {
+  matrix[N, N] L;
+  // ...
+  L = cholesky_decompose(K);
+}
+parameters {
+  vector[N] eta;
+}
+model {
+  eta ~ std_normal();
+}
+generated quantities {
+  vector[N] y;
+  y = mu + L * eta;
+}
+

The Cholesky decomposition is only computed once, after the data are loaded and the covariance matrix K computed. The isotropic normal distribution for eta is specified as a vectorized univariate distribution for efficiency; this specifies that each eta[n] has an independent standard normal distribution. The sampled vector y is then defined as a generated quantity using a direct encoding of the transform described above.

+
+
+
+

Fitting a Gaussian process

+
+

GP with a normal outcome

+

The full generative model for a GP with a normal outcome, \(y \in \mathbb{R}^N\), with inputs \(x \in \mathbb{R}^N\), for a finite \(N\): \[\begin{align*} +\rho &\sim \textsf{InvGamma}(5, 5) \\ +\alpha &\sim \textsf{normal}(0, 1) \\ +\sigma &\sim \textsf{normal}(0, 1) \\ +f &\sim \textsf{multivariate normal}\left(0, K(x \mid \alpha, \rho)\right) \\ +y_i &\sim \textsf{normal}(f_i, \sigma) \, \forall i \in \{1, \dots, N\} +\end{align*}\] With a normal outcome, it is possible to integrate out the Gaussian process \(f\), yielding the more parsimonious model: \[\begin{align*} +\rho &\sim \textsf{InvGamma}(5, 5) \\ +\alpha &\sim \textsf{normal}(0, 1) \\ +\sigma &\sim \textsf{normal}(0, 1) \\ +y &\sim \textsf{multivariate normal} + \left(0, K(x \mid \alpha, \rho) + \textbf{I}_N \sigma^2\right) \\ +\end{align*}\]

+

It can be more computationally efficient when dealing with a normal outcome to integrate out the Gaussian process, because this yields a lower-dimensional parameter space over which to do inference. We’ll fit both models in Stan. The former model will be referred to as the latent variable GP, while the latter will be called the marginal likelihood GP.

+

The hyperparameters controlling the covariance function of a Gaussian process can be fit by assigning them priors, like we have in the generative models above, and then computing the posterior distribution of the hyperparameters given observed data. The priors on the parameters should be defined based on prior knowledge of the scale of the output values (\(\alpha\)), the scale of the output noise (\(\sigma\)), and the scale at which distances are measured among inputs (\(\rho\)). See the Gaussian process priors section for more information about how to specify appropriate priors for the hyperparameters.

+

The Stan program implementing the marginal likelihood GP is shown below. The program is similar to the Stan programs that implement the simulation GPs above, but because we are doing inference on the hyperparameters, we need to calculate the covariance matrix K in the model block, rather than the transformed data block.

+
data {
+  int<lower=1> N;
+  array[N] real x;
+  vector[N] y;
+}
+transformed data {
+  vector[N] mu = rep_vector(0, N);
+}
+parameters {
+  real<lower=0> rho;
+  real<lower=0> alpha;
+  real<lower=0> sigma;
+}
+model {
+  matrix[N, N] L_K;
+  matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+  real sq_sigma = square(sigma);
+
+  // diagonal elements
+  for (n in 1:N) {
+    K[n, n] = K[n, n] + sq_sigma;
+  }
+
+  L_K = cholesky_decompose(K);
+
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+
+  y ~ multi_normal_cholesky(mu, L_K);
+}
+

The data block declares a vector y of observed values y[n] for inputs x[n]. The transformed data block now only defines the mean vector to be zero. The three hyperparameters are defined as parameters constrained to be non-negative. The computation of the covariance matrix K is now in the model block because it involves unknown parameters and thus can’t simply be precomputed as transformed data. The rest of the model consists of the priors for the hyperparameters and the multivariate Cholesky-parameterized normal distribution, only now the value y is known and the covariance matrix K is an unknown dependent on the hyperparameters, allowing us to learn the hyperparameters.

+

We have used the Cholesky parameterized multivariate normal rather than the standard parameterization because it allows us to the cholesky_decompose function which has been optimized for both small and large matrices. When working with small matrices the differences in computational speed between the two approaches will not be noticeable, but for larger matrices (\(N \gtrsim 100\)) the Cholesky decomposition version will be faster.

+

Hamiltonian Monte Carlo sampling is fast and effective for hyperparameter inference in this model (Neal 1997). If the posterior is well-concentrated for the hyperparameters the Stan implementation will fit hyperparameters in models with a few hundred data points in seconds.

+
+

Latent variable GP

+

We can also explicitly code the latent variable formulation of a GP in Stan. This will be useful for when the outcome is not normal. We’ll need to add a small positive term, \(\delta\) to the diagonal of the covariance matrix in order to ensure that our covariance matrix remains positive definite.

+
data {
+  int<lower=1> N;
+  array[N] real x;
+  vector[N] y;
+}
+transformed data {
+  real delta = 1e-9;
+}
+parameters {
+  real<lower=0> rho;
+  real<lower=0> alpha;
+  real<lower=0> sigma;
+  vector[N] eta;
+}
+model {
+  vector[N] f;
+  {
+    matrix[N, N] L_K;
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+
+    // diagonal elements
+    for (n in 1:N) {
+      K[n, n] = K[n, n] + delta;
+    }
+
+    L_K = cholesky_decompose(K);
+    f = L_K * eta;
+  }
+
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+  eta ~ std_normal();
+
+  y ~ normal(f, sigma);
+}
+

Two differences between the latent variable GP and the marginal likelihood GP are worth noting. The first is that we have augmented our parameter block with a new parameter vector of length \(N\) called eta. This is used in the model block to generate a multivariate normal vector called \(f\), corresponding to the latent GP. We put a \(\textsf{normal}(0,1)\) prior on eta like we did in the Cholesky-parameterized GP in the simulation section. The second difference is that although we could code the distribution statement for \(y\) with one \(N\)-dimensional multivariate normal with an identity covariance matrix multiplied by \(\sigma^2\), we instead use vectorized univariate normal distribution, which is equivalent but more efficient to use.

+
+
+
+

Discrete outcomes with Gaussian processes

+

Gaussian processes can be generalized the same way as standard linear models by introducing a link function. This allows them to be used as discrete data models.

+
+

Poisson GP

+

If we want to model count data, we can remove the \(\sigma\) parameter, and use poisson_log, which implements a Poisson distribution with log link function, rather than normal. We can also add an overall mean parameter, \(a\), which will account for the marginal expected value for \(y\). We do this because we cannot center count data like we would for normally distributed data.

+
data {
+  // ...
+  array[N] int<lower=0> y;
+  // ...
+}
+// ...
+parameters {
+  real<lower=0> rho;
+  real<lower=0> alpha;
+  real a;
+  vector[N] eta;
+}
+model {
+  vector[N] f;
+  {
+    matrix[N, N] L_K;
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+
+    // diagonal elements
+    for (n in 1:N) {
+      K[n, n] = K[n, n] + delta;
+    }
+
+    L_K = cholesky_decompose(K);
+    f = L_K * eta;
+  }
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  a ~ std_normal();
+  eta ~ std_normal();
+
+  y ~ poisson_log(a + f);
+}
+
+
+

Poisson GP using an embedded Laplace approximation

+

For computational reasons, we may want to integrate out the latent variable \(f\), as was done in the normal output model. Unfortunately, exact marginalization over \(f\) is not possible when the outcome model is not normal. Instead, we may perform approximate marginalization with an embedded Laplace approximation (Rasmussen and Williams 2006; Rue, Martino, and Chopin 2009; Margossian et al. 2020). To do so, we first use the function laplace_marginal to approximate the marginal likelihood \(p(y \mid \rho, \alpha, a)\) and sample the hyperparameters with Hamiltonian Monte Carlo sampling. Then, we recover the integrated out \(f\) in the generated quantities block using laplace_latent_rng.

+

The embedded Laplace approximation computes a Gaussian approximation of the conditional posterior, \[ + \hat p_\mathcal{L}(f \mid \rho, \alpha, a, y) \approx p(f \mid \rho, \alpha, a, y), +\] where \(\hat p_\mathcal{L}\) is a Gaussian that matches the mode and curvature of \(p(f \mid \rho, \alpha, a, y)\). We then obtain an approximation of the marginal likelihood as follows: \[ + \hat p_\mathcal{L}(y \mid \rho, \alpha, a) + = \frac{p(f^* \mid \alpha, \rho) p(y \mid f^*, a)}{ + \hat p_\mathcal{L}(f^* \mid \rho, \alpha, a, y)}, +\] where \(f^*\) is the mode of \(p(f \mid \rho, \alpha, a, y)\), obtained via numerical optimization.

+

To use Stan’s embedded Laplace approximation, we must define the prior covariance function and the log likelihood function in the functions block.

+
functions {
+  // log likelihood function
+  real ll_function(vector f, real a, array[] int y) {
+      return poisson_log_lpmf(y | a + f);
+  }
+
+  // covariance function
+  matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+    return add_diag(K, delta)
+  }
+
+}
+

The embedded Laplace relies on calculations of the log likelihood’s Hessian, \(\partial^2 \log p(y \mid f, a, \rho, \alpha) / \partial f^2\), and these calculations can be much faster when the Hessian is sparse. In particular, it is expected that the Hessian is block diagonal. In the transformed data block we can specify the block size of the Hessian.

+
transformed data {
+  int hessian_block_size = 1;
+}
+

For example, if \(y_i\) depends only on \(f_i\), then the Hessian of the log likelihood is diagonal and the block size is 1. On the other hand, if the Hessian is not sparse, then we set the hessian block size to \(N\), where \(N\) is the dimension of \(f\). Currently, Stan does not check the block size of the Hessian and so the user is responsible for correctly specifying the block size.

+

Finally, we increment target in the model block with the approximation to \(\log p(y \mid \rho, \alpha, a)\).

+
model {
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+
+  target += laplace_marginal(ll_function, (a, y), hessian_block_size,
+                             cov_function, (rho, alpha, x, N, delta));
+}
+

Notice that we do not need to construct \(f\) explicitly, since it is marginalized out. Instead, we can recover the latent variables in generated quantities:

+
generated quantities {
+  vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,
+                                   cov_function, (rho, alpha, x, N, delta));
+}
+

Users can set the control parameters of the embedded Laplace approximation, via laplace_marginal_tol and laplace_latent_tol_rng. When using these functions, the user must set all the control options and store them in a tuple. These control parameters mostly concern the numerical optimizer used to find the mode \(f^*\) of \(p(f \mid \rho, \alpha, a)\).

+
transformed data {
+  tuple(vector[N], real, int, int, int, int) laplace_ops;
+  laplace_ops.1 = rep_vector(0, N);  // starting point for Laplace optimizer
+  laplace_ops.2 = 1.49e-8;           // tolerance for optimizer
+  laplace_ops.3 = 500;               // maximum number of steps for optimizer.
+  laplace_ops.4 = 1;              // solver type being used.
+  laplace_ops.5 = 1000;           // max number of steps for linesearch.
+  laplace_ops.6 = 1;              // allow_fallback (1: TRUE, 0: FALSE)
+

If users want to depart from the defaults for only some of the control parameters, a tuple with the default values (as above) can be created with the helper callable generate_laplace_options(), and the specific control parameter can then be modified,

+
transformed data {
+  tuple(vector[N], real, int, int, int, int, int) laplace_ops =
+    generate_laplace_options(N);
+
+  laplace_ops.2 = 1e-6; // make tolerance of the optimizer less strict.
+}
+

The tuple laplace_ops is then passed to laplace_marginal_tol and laplace_rng_tol.

+
model {
+// ...
+
+  target += laplace_marginal_tol(ll_function, (a, y), hessian_block_size,
+                                 cov_function, (rho, alpha, x, N, delta),
+                                 laplace_ops);
+}
+
+generated quantities {
+  vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,
+                                   cov_function, (rho, alpha, x, N, delta),
+                                   laplace_ops);
+}
+

Stan also provides support for a limited menu of built-in functions, including the Poisson distribution with a log link and and prior mean \(m\). When using such a built-in function, the user does not need to specify a likelihood in the functions block. However, the user must strictly follow the signature of the likelihood: in this case, \(m\) must be a vector of length \(N\) (to allow for different offsets for each observation \(y_i\)) and we must indicate which element of \(f\) each component of \(y\) matches using the variable \(y_\text{index}\). In our example, there is a simple pairing \((y_i, f_i)\), however we could imagine a scenario where multiple observations \((y_{j1}, y_{j2}, ...)\) are observed for a single \(f_j\).

+
transformed data {
+  // ...
+  array[n_obs] int y_index;
+  for (i in 1:n_obs) y_index[i] = i - 1;
+}
+
+// ...
+
+transformed parameter {
+  vector[N] m = rep_vector(a, N);
+}
+
+model {
+  // ...
+  target += laplace_marginal_poisson_log_lpmf(y | y_index, m,
+                                       cov_function, (rho, alpha, x, N, delta));
+}
+
+generated quantities {
+  vector[N] f = laplace_latent_poisson_log_rng(y, y_index, m,
+                                   cov_function, (rho, alpha, x, N, delta));
+}
+

As before, we could specify the control parameters for the embedded Laplace approximation using laplace_marginal_tol_poisson_log_lpmf and laplace_latent_tol_poisson_log_nrg.

+

Marginalization with a Laplace approximation can lead to faster inference, however it also introduces an approximation error. In practice, this error is negligible when using a Poisson likelihood and the approximation works well for log concave likelihoods (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). Still, users should exercise caution, especially when trying unconventional likelihoods.

+
+
+

Logistic GP regression

+

For binary classification problems, the observed outputs \(z_n \in +\{ 0,1 \}\) are binary. These outputs are modeled using a Gaussian process with (unobserved) outputs \(y_n\) through the logistic link, \[ +z_n \sim \textsf{Bernoulli}(\operatorname{logit}^{-1}(y_n)), +\] or in other words, \[ +\Pr[z_n = 1] = \operatorname{logit}^{-1}(y_n). +\]

+

We can extend our latent variable GP Stan program to deal with classification problems. Below a is the bias term, which can help account for imbalanced classes in the training data:

+
data {
+  // ...
+  array[N] int<lower=0, upper=1> z;
+  // ...
+}
+// ...
+model {
+  // ...
+  z ~ bernoulli_logit(a + f);
+}
+
+
+

Logistic GP regression with an embedded Laplace approximation

+

As with the Poisson GP, we cannot marginalize the latent variables exactly, however we can resort to an embedded Laplace approximation.

+
functions {
+  // log likelihood function
+  real ll_function(vector f, real a, array[] int z) {
+      return bernoulli_logit_lpmf(z | a + f);
+  }
+
+  // covariance function
+  matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+    return add_diag(K, delta)
+  }
+}
+
+// ...
+
+transformed data {
+  int hessian_block_size = 1;
+}
+
+// ...
+
+model {
+  target += laplace_marginal(ll_function, (a, z), hessian_block_size,
+                             cov_function, (rho, alpha, x, N, delta));
+}
+
+generated quantities {
+  vector[N] f = laplace_latent_rng(ll_function, (a, z), hessian_block_size,
+                                   cov_function, (rho, alpha, x, N, delta));
+}
+

While marginalization with a Laplace approximation can lead to faster inference, it also introduces an approximation error. In practice, this error may not be negligible with a Bernoulli likelihood; for more discussion see, e.g. (Vehtari et al. 2016; Margossian et al. 2020).

+
+
+
+

Automatic relevance determination

+

If we have multivariate inputs \(x \in \mathbb{R}^D\), the squared exponential covariance function can be further generalized by fitting a scale parameter \(\rho_d\) for each dimension \(d\), \[ + k(x \mid \alpha, \vec{\rho}, \sigma)_{i, j} = \alpha^2 \exp +\left(-\dfrac{1}{2} +\sum_{d=1}^D \dfrac{1}{\rho_d^2} (x_{i,d} - x_{j,d})^2 +\right) ++ \delta_{i, j}\sigma^2. +\] The estimation of \(\rho\) was termed “automatic relevance determination” by Neal (1996), but this is misleading, because the magnitude of the scale of the posterior for each \(\rho_d\) is dependent on the scaling of the input data along dimension \(d\). Moreover, the scale of the parameters \(\rho_d\) measures non-linearity along the \(d\)-th dimension, rather than “relevance” (Piironen and Vehtari 2016).

+

A priori, the closer \(\rho_d\) is to zero, the more nonlinear the conditional mean in dimension \(d\) is. A posteriori, the actual dependencies between \(x\) and \(y\) play a role. With one covariate \(x_1\) having a linear effect and another covariate \(x_2\) having a nonlinear effect, it is possible that \(\rho_1 > \rho_2\) even if the predictive relevance of \(x_1\) is higher (Rasmussen and Williams 2006, 80). The collection of \(\rho_d\) (or \(1/\rho_d\)) parameters can also be modeled hierarchically.

+

The implementation of automatic relevance determination is a straightforward extension of the one-dimensional case by modifying rho to be an array.

+
data {
+  int<lower=1> N;
+  int<lower=1> D;
+  array[N] vector[D] x;
+  vector[N] y;
+}
+transformed data {
+  real delta = 1e-9;
+}
+parameters {
+  array[D] real<lower=0> rho;
+  real<lower=0> alpha;
+  real<lower=0> sigma;
+  vector[N] eta;
+}
+model {
+  vector[N] f;
+  {
+    matrix[N, N] L_K;
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+    for (n in 1:N) {
+      K[n, n] = K[n, n] + delta;
+    }
+    L_K = cholesky_decompose(K);
+    f = L_K * eta;
+  }
+
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+  eta ~ std_normal();
+
+  y ~ normal(f, sigma);
+}
+
+
+

Priors for Gaussian process parameters

+

Formulating priors for GP hyperparameters requires the analyst to consider the inherent statistical properties of a GP, the GP’s purpose in the model, and the numerical issues that may arise in Stan when estimating a GP.

+

Perhaps most importantly, the parameters \(\rho\) and \(\alpha\) are weakly identified (Zhang 2004). The ratio of the two parameters is well-identified, but in practice we put independent priors on the two hyperparameters because these two quantities are more interpretable than their ratio.

+
+

Priors for length-scale

+

GPs are a flexible class of priors and, as such, can represent a wide spectrum of functions. For length scales below the minimum spacing of the covariates the GP likelihood plateaus. Unless regularized by a prior, this flat likelihood induces considerable posterior mass at small length scales where the observation variance drops to zero and the functions supported by the GP begin to exactly interpolate between the input data. The resulting posterior not only significantly overfits to the input data, it also becomes hard to accurately sample using Euclidean HMC.

+

We may wish to put further soft constraints on the length-scale, but these are dependent on how the GP is used in our statistical model.

+

If our model consists of only the GP, i.e.: \[\begin{align*} +f &\sim \textsf{multivariate normal}\left(0, K(x \mid \alpha, \rho)\right) \\ +y_i &\sim \textsf{normal}(f_i, \sigma) \, \forall i \in \{1, \dots, N\} \\ + & x \in \mathbb{R}^{N \times D}, \quad + f \in \mathbb{R}^N +\end{align*}\]

+

we likely don’t need constraints beyond penalizing small length-scales. We’d like to allow the GP prior to represent both high-frequency and low-frequency functions, so our prior should put non-negligible mass on both sets of functions. In this case, an inverse gamma, inv_gamma_lpdf in Stan’s language, will work well as it has a sharp left tail that puts negligible mass on infinitesimal length-scales, but a generous right tail, allowing for large length-scales. Inverse gamma priors will avoid infinitesimal length-scales because the density is zero at zero, so the posterior for length-scale will be pushed away from zero. An inverse gamma distribution is one of many zero-avoiding or boundary-avoiding distributions.2.

+

If we’re using the GP as a component in a larger model that includes an overall mean and fixed effects for the same variables we’re using as the domain for the GP, i.e.: \[\begin{align*} +f &\sim \textsf{multivariate normal}\big(0, K(x \mid \alpha, \rho)\big) \\ +y_i &\sim \textsf{normal}\left(\beta_0 + x_i \beta_{[1:D]} + f_i, \sigma\right) \, \forall i + \in \{1, \dots, N\} \\ + & x_i^T, \beta_{[1:D]} \in \mathbb{R}^D,\quad + x \in \mathbb{R}^{N \times D},\quad + f \in \mathbb{R}^N +\end{align*}\]

+

we’ll likely want to constrain large length-scales as well. A length scale that is larger than the scale of the data yields a GP posterior that is practically linear (with respect to the particular covariate) and increasing the length scale has little impact on the likelihood. This will introduce nonidentifiability in our model, as both the fixed effects and the GP will explain similar variation. In order to limit the amount of overlap between the GP and the linear regression, we should use a prior with a sharper right tail to limit the GP to higher-frequency functions. We can use a generalized inverse Gaussian distribution: \[\begin{align*} +f(x \mid a, b, p) &= \dfrac{\left(a/b\right)^{p/2}}{2K_p\left(\sqrt{ab}\right)} x^{p - 1}\exp\big(-(ax + b + / x)/2\big) \\ + & x, a, b \in \mathbb{R}^{+},\quad + p \in \mathbb{Z} +\end{align*}\]

+

which has an inverse gamma left tail if \(p \leq 0\) and an inverse Gaussian right tail. This has not yet been implemented in Stan’s math library, but it is possible to implement as a user defined function:

+
functions {
+  real generalized_inverse_gaussian_lpdf(real x, int p,
+                                        real a, real b) {
+    return p * 0.5 * log(a / b)
+      - log(2 * modified_bessel_second_kind(p, sqrt(a * b)))
+      + (p - 1) * log(x)
+      - (a * x + b / x) * 0.5;
+ }
+}
+data {
+  // ...
+}
+

If we have high-frequency covariates in our fixed effects, we may wish to further regularize the GP away from high-frequency functions, which means we’ll need to penalize smaller length-scales. Luckily, we have a useful way of thinking about how length-scale affects the frequency of the functions supported by the GP. If we were to repeatedly draw from a zero-mean GP with a length-scale of \(\rho\) in a fixed-domain \([0,T]\), we would get a distribution for the number of times each draw of the GP crossed the zero axis. The expectation of this random variable, the number of zero crossings, is \(T / \pi +\rho\). You can see that as \(\rho\) decreases, the expectation of the number of upcrossings increases as the GP is representing higher-frequency functions. Thus, this is a good statistic to keep in mind when setting a lower-bound for our prior on length-scale in the presence of high-frequency covariates. However, this statistic is only valid for one-dimensional inputs.

+
+
+

Priors for marginal standard deviation

+

The parameter \(\alpha\) corresponds to how much of the variation is explained by the regression function and has a similar role to the prior variance for linear model weights. This means the prior can be the same as used in linear models, such as a half-\(t\) prior on \(\alpha\).

+

A half-\(t\) or half-Gaussian prior on alpha also has the benefit of putting nontrivial prior mass around zero. This allows the GP support the zero functions and allows the possibility that the GP won’t contribute to the conditional mean of the total output.

+
+
+
+

Predictive inference with a Gaussian process

+

Suppose for a given sequence of inputs \(x\) that the corresponding outputs \(y\) are observed. Given a new sequence of inputs \(\tilde{x}\), the posterior predictive distribution of their labels is computed by sampling outputs \(\tilde{y}\) according to \[ +p\left(\tilde{y} \mid \tilde{x},x,y\right) +\ = \ +\frac{p\left(\tilde{y}, y \mid \tilde{x},x\right)} + {p(y \mid x)} +\ \propto \ +p\left(\tilde{y}, y \mid \tilde{x},x\right). +\]

+

A direct implementation in Stan defines a model in terms of the joint distribution of the observed \(y\) and unobserved \(\tilde{y}\).

+
data {
+  int<lower=1> N1;
+  array[N1] real x1;
+  vector[N1] y1;
+  int<lower=1> N2;
+  array[N2] real x2;
+}
+transformed data {
+  real delta = 1e-9;
+  int<lower=1> N = N1 + N2;
+  array[N] real x;
+  for (n1 in 1:N1) {
+    x[n1] = x1[n1];
+  }
+  for (n2 in 1:N2) {
+    x[N1 + n2] = x2[n2];
+  }
+}
+parameters {
+  real<lower=0> rho;
+  real<lower=0> alpha;
+  real<lower=0> sigma;
+  vector[N] eta;
+}
+transformed parameters {
+  vector[N] f;
+  {
+    matrix[N, N] L_K;
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+
+    // diagonal elements
+    for (n in 1:N) {
+      K[n, n] = K[n, n] + delta;
+    }
+
+    L_K = cholesky_decompose(K);
+    f = L_K * eta;
+  }
+}
+model {
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+  eta ~ std_normal();
+
+  y1 ~ normal(f[1:N1], sigma);
+}
+generated quantities {
+  vector[N2] y2;
+  for (n2 in 1:N2) {
+    y2[n2] = normal_rng(f[N1 + n2], sigma);
+  }
+}
+

The input vectors x1 and x2 are declared as data, as is the observed output vector y1. The unknown output vector y2, which corresponds to input vector x2, is declared in the generated quantities block and will be sampled when the model is executed.

+

A transformed data block is used to combine the input vectors x1 and x2 into a single vector x.

+

The model block declares and defines a local variable for the combined output vector f, which consists of the concatenation of the conditional mean for known outputs y1 and unknown outputs y2. Thus the combined output vector f is aligned with the combined input vector x. All that is left is to define the univariate normal distribution statement for y.

+

The generated quantities block defines the quantity y2. We generate y2 by randomly generating N2 values from univariate normals with each mean corresponding to the appropriate element in f.

+
+

Predictive inference in non-Gaussian GPs

+

We can do predictive inference in non-Gaussian GPs in much the same way as we do with Gaussian GPs.

+

Consider the following full model for prediction using logistic Gaussian process regression.

+
data {
+  int<lower=1> N1;
+  array[N1] real x1;
+  array[N1] int<lower=0, upper=1> z1;
+  int<lower=1> N2;
+  array[N2] real x2;
+}
+transformed data {
+  real delta = 1e-9;
+  int<lower=1> N = N1 + N2;
+  array[N] real x;
+  for (n1 in 1:N1) {
+    x[n1] = x1[n1];
+  }
+  for (n2 in 1:N2) {
+    x[N1 + n2] = x2[n2];
+  }
+}
+parameters {
+  real<lower=0> rho;
+  real<lower=0> alpha;
+  real a;
+  vector[N] eta;
+}
+transformed parameters {
+  vector[N] f;
+  {
+    matrix[N, N] L_K;
+    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
+
+    // diagonal elements
+    for (n in 1:N) {
+      K[n, n] = K[n, n] + delta;
+    }
+
+    L_K = cholesky_decompose(K);
+    f = L_K * eta;
+  }
+}
+model {
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  a ~ std_normal();
+  eta ~ std_normal();
+
+  z1 ~ bernoulli_logit(a + f[1:N1]);
+}
+generated quantities {
+  array[N2] int z2;
+  for (n2 in 1:N2) {
+    z2[n2] = bernoulli_logit_rng(a + f[N1 + n2]);
+  }
+}
+
+
+

Analytical form of joint predictive inference

+

Bayesian predictive inference for Gaussian processes with Gaussian observations can be sped up by deriving the posterior analytically, then directly sampling from it.

+

Jumping straight to the result, \[ +p\left(\tilde{y} \mid \tilde{x},y,x\right) += +\textsf{normal}\left(K^{\top}\Sigma^{-1}y,\ + \Omega - K^{\top}\Sigma^{-1}K\right), +\] where \(\Sigma = K(x \mid \alpha, \rho, \sigma)\) is the result of applying the covariance function to the inputs \(x\) with observed outputs \(y\), \(\Omega = +K(\tilde{x} \mid \alpha, \rho)\) is the result of applying the covariance function to the inputs \(\tilde{x}\) for which predictions are to be inferred, and \(K\) is the matrix of covariances between inputs \(x\) and \(\tilde{x}\), which in the case of the exponentiated quadratic covariance function would be \[ +K(x \mid \alpha, \rho)_{i, j} = \alpha^2 \exp\left(-\dfrac{1}{2 \rho^2} +\sum_{d=1}^D \left(x_{i,d} - \tilde{x}_{j,d}\right)^2\right). +\]

+

There is no noise term including \(\sigma^2\) because the indexes of elements in \(x\) and \(\tilde{x}\) are never the same.

+

This Stan code below uses the analytic form of the posterior and provides sampling of the resulting multivariate normal through the Cholesky decomposition. The data declaration is the same as for the latent variable example, but we’ve defined a function called gp_pred_rng which will generate a draw from the posterior predictive mean conditioned on observed data y1. The code uses a Cholesky decomposition in triangular solves in order to cut down on the number of matrix-matrix multiplications when computing the conditional mean and the conditional covariance of \(p(\tilde{y})\).

+
functions {
+  vector gp_pred_rng(array[] real x2,
+                     vector y1,
+                     array[] real x1,
+                     real alpha,
+                     real rho,
+                     real sigma,
+                     real delta) {
+    int N1 = rows(y1);
+    int N2 = size(x2);
+    vector[N2] f2;
+    {
+      matrix[N1, N1] L_K;
+      vector[N1] K_div_y1;
+      matrix[N1, N2] k_x1_x2;
+      matrix[N1, N2] v_pred;
+      vector[N2] f2_mu;
+      matrix[N2, N2] cov_f2;
+      matrix[N2, N2] diag_delta;
+      matrix[N1, N1] K;
+      K = gp_exp_quad_cov(x1, alpha, rho);
+      for (n in 1:N1) {
+        K[n, n] = K[n, n] + square(sigma);
+      }
+      L_K = cholesky_decompose(K);
+      K_div_y1 = mdivide_left_tri_low(L_K, y1);
+      K_div_y1 = mdivide_right_tri_low(K_div_y1', L_K)';
+      k_x1_x2 = gp_exp_quad_cov(x1, x2, alpha, rho);
+      f2_mu = (k_x1_x2' * K_div_y1);
+      v_pred = mdivide_left_tri_low(L_K, k_x1_x2);
+      cov_f2 = gp_exp_quad_cov(x2, alpha, rho) - v_pred' * v_pred;
+      diag_delta = diag_matrix(rep_vector(delta, N2));
+
+      f2 = multi_normal_rng(f2_mu, cov_f2 + diag_delta);
+    }
+    return f2;
+  }
+}
+data {
+  int<lower=1> N1;
+  array[N1] real x1;
+  vector[N1] y1;
+  int<lower=1> N2;
+  array[N2] real x2;
+}
+transformed data {
+  vector[N1] mu = rep_vector(0, N1);
+  real delta = 1e-9;
+}
+parameters {
+  real<lower=0> rho;
+  real<lower=0> alpha;
+  real<lower=0> sigma;
+}
+model {
+  matrix[N1, N1] L_K;
+  {
+    matrix[N1, N1] K = gp_exp_quad_cov(x1, alpha, rho);
+    real sq_sigma = square(sigma);
+
+    // diagonal elements
+    for (n1 in 1:N1) {
+      K[n1, n1] = K[n1, n1] + sq_sigma;
+    }
+
+    L_K = cholesky_decompose(K);
+  }
+
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+
+  y1 ~ multi_normal_cholesky(mu, L_K);
+}
+generated quantities {
+  vector[N2] f2;
+  vector[N2] y2;
+
+  f2 = gp_pred_rng(x2, y1, x1, alpha, rho, sigma, delta);
+  for (n2 in 1:N2) {
+    y2[n2] = normal_rng(f2[n2], sigma);
+  }
+}
+
+
+
+

Multiple-output Gaussian processes

+

Suppose we have observations \(y_i \in \mathbb{R}^M\) observed at \(x_i \in \mathbb{R}^K\). One can model the data like so: \[\begin{align*} +y_i &\sim \textsf{multivariate normal}\left(f(x_i), \textbf{I}_M \sigma^2\right) \\ +f(x) &\sim \textsf{GP}\big(m(x), K(x \mid \theta, \phi)\big) \\ + & K(x \mid \theta) \in \mathbb{R}^{M \times M}, \quad + f(x), m(x) \in \mathbb{R}^M +\end{align*}\] where the \(K(x, x^\prime \mid \theta, \phi)_{[m, m^\prime]}\) entry defines the covariance between \(f_m(x)\) and \(f_{m^\prime}(x^\prime)(x)\). This construction of Gaussian processes allows us to learn the covariance between the output dimensions of \(f(x)\). If we parameterize our kernel \(K\): \[ +K(x, x^\prime \mid \theta, \phi)_{[m, m^\prime]} = k\left(x, x^\prime \mid +\theta\right) k\left(m, m^\prime \mid \phi\right) +\] then our finite dimensional generative model for the above is: \[\begin{align*} +f &\sim \textsf{matrixnormal}\big(m(x), K(x \mid \alpha, \rho), C(\phi)\big) \\ +y_{i, m} &\sim \textsf{normal}(f_{i,m}, \sigma) \\ +f &\in \mathbb{R}^{N \times M} +\end{align*}\] where \(K(x \mid \alpha, \rho)\) is the exponentiated quadratic kernel we’ve used throughout this chapter, and \(C(\phi)\) is a positive-definite matrix, parameterized by some vector \(\phi\).

+

The matrix normal distribution has two covariance matrices: \(K(x \mid +\alpha, \rho)\) to encode column covariance, and \(C(\phi)\) to define row covariance. The salient features of the matrix normal are that the rows of the matrix \(f\) are distributed: \[ +f_{[n,]} \sim \textsf{multivariate normal}\big(m(x)_{[n,]}, K(x \mid \alpha, +\rho)_{[n,n]} C(\phi)\big) +\] and that the columns of the matrix \(f\) are distributed: \[ +f_{[,m]} \sim \textsf{multivariate normal}\big(m(x)_{[,m]}, K(x + \mid \alpha, \rho) C(\phi)_{[m,m]}\big) +\] This also means means that \(\mathbb{E}\left[f^T f\right]\) is equal to \(\operatorname{trace}\!\big(K(x \mid \alpha, \rho)\big) \times C\), whereas \(\mathbb{E}\left[ff^T\right]\) is \(\operatorname{trace}(C) \times K(x \mid \alpha, \rho)\). We can derive this using properties of expectation and the matrix normal density.

+

We should set \(\alpha\) to \(1.0\) because the parameter is not identified unless we constrain \(\operatorname{trace}(C) = 1\). Otherwise, we can multiply \(\alpha\) by a scalar \(d\) and \(C\) by \(1/d\) and our likelihood will not change.

+

We can generate a random variable \(f\) from a matrix normal density in \(\mathbb{R}^{N \times M}\) using the following algorithm: \[\begin{align*} +\eta_{i,j} &\sim \textsf{normal}(0, 1) \, \forall i,j \\ +f &= L_{K(x \mid 1.0, \rho)} \, \eta \, L_C(\phi)^T \\ +f &\sim \textsf{matrixnormal}\big(0, K(x \mid 1.0, \rho), C(\phi)\big) \\ +\eta &\in \mathbb{R}^{N \times M} \\ +L_C(\phi) &= \texttt{cholesky}\mathtt{\_}\texttt{decompose}\big(C(\phi)\big) \\ +L_{K(x \mid 1.0, \rho)} &= \texttt{cholesky}\mathtt{\_}\texttt{decompose}\big(K(x \mid 1.0, \rho)\big) +\end{align*}\]

+

This can be implemented in Stan using a latent-variable GP formulation. We’ve used \(\textsf{LKJCorr}\) for \(C(\phi)\), but any positive-definite matrix will do.

+
data {
+  int<lower=1> N;
+  int<lower=1> D;
+  array[N] real x;
+  matrix[N, D] y;
+}
+transformed data {
+  real delta = 1e-9;
+}
+parameters {
+  real<lower=0> rho;
+  vector<lower=0>[D] alpha;
+  real<lower=0> sigma;
+  cholesky_factor_corr[D] L_Omega;
+  matrix[N, D] eta;
+}
+model {
+  matrix[N, D] f;
+  {
+    matrix[N, N] K = gp_exp_quad_cov(x, 1.0, rho);
+    matrix[N, N] L_K;
+
+    // diagonal elements
+    for (n in 1:N) {
+      K[n, n] = K[n, n] + delta;
+    }
+
+    L_K = cholesky_decompose(K);
+    f = L_K * eta
+        * diag_pre_multiply(alpha, L_Omega)';
+  }
+
+  rho ~ inv_gamma(5, 5);
+  alpha ~ std_normal();
+  sigma ~ std_normal();
+  L_Omega ~ lkj_corr_cholesky(3);
+  to_vector(eta) ~ std_normal();
+
+  to_vector(y) ~ normal(to_vector(f), sigma);
+}
+generated quantities {
+  matrix[D, D] Omega;
+  Omega = L_Omega * L_Omega';
+}
+ + + +
+
+
+ + + Back to top

References

+
+Cseke, Botond, and Tom Heskes. 2011. “Approximate Marginals in Latent Gaussian Models.” Journal of Machine Learning Research 12. +
+
+Kuss, Malte, and Carl E Rasmussen. 2005. “Assessing Approximate Inference for Binary Gaussian Process Classification.” Journal of Machine Learning Research 6: 1679–1704. +
+
+Margossian, Charles C, Aki Vehtari, Daniel Simpson, and Raj Agrawal. 2020. “Hamiltonian Monte Carlo Using an Adjoint-Differentiated Laplace Approximation: Bayesian Inference for Latent Gaussian Models and Beyond.” Advances in Neural Information Processing Systems 34. +
+
+Neal, Radford M. 1996. Bayesian Learning for Neural Networks. Lecture Notes in Statistics 118. New York: Springer. +
+
+———. 1997. “Monte Carlo Implementation of Gaussian Process Models for Bayesian Regression and Classification.” 9702. University of Toronto, Department of Statistics. +
+
+Piironen, Juho, and Aki Vehtari. 2016. “Projection Predictive Model Selection for Gaussian Processes.” In Machine Learning for Signal Processing (MLSP), 2016 IEEE 26th International Workshop on. IEEE. +
+
+Rasmussen, Carl Edward, and Christopher K. I. Williams. 2006. Gaussian Processes for Machine Learning. MIT Press. +
+
+Riutort-Mayol, Gabriel, Paul-Christian Bürkner, Michael R Andersen, Arno Solin, and Aki Vehtari. 2023. “Practical Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming.” Statistics and Computing 33 (1): 17. +
+
+Rue, Håvard, Sara Martino, and Nicolas Chopin. 2009. “Approximate Bayesian Inference for Latent Gaussian Models by Using Integrated Nested Laplace Approximations.” Journal of the Royal Statistical Society: Series B (Statistical Methodology) 71 (2): 319–92. https://doi.org/10.1111/j.1467-9868.2008.00700.x. +
+
+Vanhatalo, Jarno, Ville Pietiläinen, and Aki Vehtari. 2010. “Approximate Inference for Disease Mapping with Sparse Gaussian Processes.” Statistics in Medicine 29 (15): 1580–1607. +
+
+Vehtari, Aki, Tommi Mononen, Ville Tolvanen, Tuomas Sivula, and Ole Winther. 2016. “Bayesian Leave-One-Out Cross-Validation Approximations for Gaussian Latent Variable Models.” Journal of Machine Learning Research 17 (103): 1–38. http://jmlr.org/papers/v17/14-540.html. +
+
+Zhang, Hao. 2004. “Inconsistent Estimation and Asymptotically Equal Interpolations in Model-Based Geostatistics.” Journal of the American Statistical Association 99 (465): 250–61. +
+

Footnotes

+ +
    +
  1. Gaussian processes can be extended to covariance functions producing positive semi-definite matrices, but Stan does not support inference in the resulting models because the resulting distribution does not have unconstrained support.↩︎

  2. +
  3. A boundary-avoiding prior is just one where the limit of the density is zero at the boundary, the result of which is estimates that are pushed away from the boundary.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/hyperspherical-models.html b/docs/2_39/stan-users-guide/hyperspherical-models.html new file mode 100644 index 000000000..08c37a052 --- /dev/null +++ b/docs/2_39/stan-users-guide/hyperspherical-models.html @@ -0,0 +1,1309 @@ + + + + + + + + + +Directions, Rotations, and Hyperspheres + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Directions, Rotations, and Hyperspheres

+

Directional statistics involve data and/or parameters that are constrained to be directions. The set of directions forms a sphere, the geometry of which is not smoothly mappable to that of a Euclidean space because you can move around a sphere and come back to where you started. This is why it is impossible to make a map of the globe on a flat piece of paper where all points that are close to each other on the globe are close to each other on the flat map. The fundamental problem is easy to visualize in two dimensions, because as you move around a circle, you wind up back where you started. In other words, 0 degrees and 360 degrees (equivalently, 0 and \(2 \pi\) radians) pick out the same point, and the distance between 359 degrees and 2 degrees is the same as the distance between 137 and 140 degrees.

+

Stan supports directional statistics by providing a unit-vector data type, the values of which determine points on a hypersphere (circle in two dimensions, sphere in three dimensions).

+
+

Unit vectors

+

The length of a vector \(x \in \mathbb{R}^K\) is given by \[ +\Vert x \Vert += \sqrt{x^{\top}\,x} += \sqrt{x_1^2 + x_2^2 + \cdots + x_K^2}. +\] Unit vectors are defined to be vectors of unit length (i.e., length one).

+

With a variable declaration such as

+
unit_vector[K] x;
+

the value of x will be constrained to be a vector of size K with unit length; the reference manual chapter on constrained parameter transforms provides precise definitions.

+

Warning: An extra term gets added to the log density to ensure the distribution on unit vectors is proper. This is not a problem in practice, but it may lead to misunderstandings of the target log density output (lp__ in some interfaces). The underlying source of the problem is that a unit vector of size \(K\) has only \(K - 1\) degrees of freedom. But there is no way to map those \(K - 1\) degrees of freedom continuously to \(\mathbb{R}^N\)—for example, the circle can’t be mapped continuously to a line so the limits work out, nor can a sphere be mapped to a plane. A workaround is needed instead. Stan’s unit vector transform uses \(K\) unconstrained variables, then projects down to the unit hypersphere. Even though the hypersphere is compact, the result would be an improper distribution. To ensure the unit vector distribution is proper, each unconstrained variable is given a “Jacobian” adjustment equal to an independent standard normal distribution. Effectively, each dimension is drawn standard normal, then they are together projected down to the hypersphere to produce a unit vector. The result is a proper uniform distribution over the hypersphere.

+
+
+

Circles, spheres, and hyperspheres

+

An \(n\)-sphere, written \(S^{n}\), is defined as the set of \((n + +1)\)-dimensional unit vectors, \[ +S^{n} = \left\{ x \in \mathbb{R}^{n+1} \: : \: \Vert x \Vert = 1 \right\}. +\]

+

Even though \(S^n\) is made up of points in \((n+1)\) dimensions, it is only an \(n\)-dimensional manifold. For example, \(S^2\) is defined as a set of points in \(\mathbb{R}^3\), but each such point may be described uniquely by a latitude and longitude. Geometrically, the surface defined by \(S^2\) in \(\mathbb{R}^3\) behaves locally like a plane, i.e., \(\mathbb{R}^2\). However, the overall shape of \(S^2\) is not like a plane in that it is compact (i.e., there is a maximum distance between points). If you set off around the globe in a “straight line” (i.e., a geodesic), you wind up back where you started eventually; that is why the geodesics on the sphere (\(S^2\)) are called “great circles,” and why we need to use some clever representations to do circular or spherical statistics.

+

Even though \(S^{n-1}\) behaves locally like \(\mathbb{R}^{n-1}\), there is no way to smoothly map between them. For example, because latitude and longitude work on a modular basis (wrapping at \(2\pi\) radians in natural units), they do not produce a smooth map.

+

Like a bounded interval \((a, b)\), in geometric terms, a sphere is compact in that the distance between any two points is bounded.

+
+
+

Transforming to unconstrained parameters

+

Stan (inverse) transforms arbitrary points in \(\mathbb{R}^{K+1}\) to points in \(S^K\) using the auxiliary variable approach of Muller (1959). A point \(y \in \mathbb{R}^K\) is transformed to a point \(x \in S^{K-1}\) by \[ +x = \frac{y}{\sqrt{y^{\top} y}}. +\]

+

The problem with this mapping is that it’s many to one; any point lying on a vector out of the origin is projected to the same point on the surface of the sphere. Muller (1959) introduced an auxiliary variable interpretation of this mapping that provides the desired properties of uniformity; the reference manual contains the precise definitions used in the chapter on constrained parameter transforms.

+
+

Warning: undefined at zero!

+

The above mapping from \(\mathbb{R}^n\) to \(S^n\) is not defined at zero. While this point outcome has measure zero during sampling, and may thus be ignored, it is the default initialization point and thus unit vector parameters cannot be initialized at zero. A simple workaround is to initialize from a small interval around zero, which is an option built into all of the Stan interfaces.

+
+
+
+

Unit vectors and rotations

+

Unit vectors correspond directly to angles and thus to rotations. This is easy to see in two dimensions, where a point on a circle determines a compass direction, or equivalently, an angle \(\theta\). Given an angle \(\theta\), a matrix can be defined, the pre-multiplication by which rotates a point by an angle of \(\theta\). For angle \(\theta\) (in two dimensions), the \(2 \times 2\) rotation matrix is defined by \[ +R_{\theta} += +\begin{bmatrix} +\cos \theta & -\sin \theta \\ +\sin \theta & \cos \theta +\end{bmatrix}. +\] Given a two-dimensional vector \(x\), \(R_{\theta} \, x\) is the rotation of \(x\) (around the origin) by \(\theta\) degrees.

+
+

Angles from unit vectors

+

Angles can be calculated from unit vectors. For example, a random variable theta representing an angle in \((-\pi, \pi)\) radians can be declared as a two-dimensional unit vector then transformed to an angle.

+
parameters {
+  unit_vector[2] xy;
+}
+transformed parameters {
+  real<lower=-pi(), upper=pi()> theta = atan2(xy[2], xy[1]);
+}
+

If the distribution of \((x, y)\) is uniform over a circle, then the distribution of \(\arctan \frac{y}{x}\) is uniform over \((-\pi, \pi)\).

+

It might be tempting to try to just declare theta directly as a parameter with the lower and upper bound constraint as given above. The drawback to this approach is that the values \(-\pi\) and \(\pi\) are at \(-\infty\) and \(\infty\) on the unconstrained scale, which can produce multimodal posterior distributions when the true distribution on the circle is unimodal.

+

With a little additional work on the trigonometric front, the same conversion back to angles may be accomplished in more dimensions.

+
+
+
+

Circular representations of days and years

+

A 24-hour clock naturally represents the progression of time through the day, moving from midnight to noon and back again in one rotation. A point on a circle divided into 24 hours is thus a natural representation for the time of day. Similarly, years cycle through the seasons and return to the season from which they started.

+

In human affairs, temporal effects often arise by convention. These can be modeled directly with ad-hoc predictors for holidays and weekends, or with data normalization back to natural scales for daylight savings time.

+ + + +
+
+ + Back to top

References

+
+Muller, Mervin E. 1959. “A Note on a Method for Generating Points Uniformly on n-Dimensional Spheres.” Commun. ACM 2 (4): 19–20. https://doi.org/10.1145/377939.377946. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/img/Figure_DDM.pdf b/docs/2_39/stan-users-guide/img/Figure_DDM.pdf new file mode 100644 index 000000000..381d4b538 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/Figure_DDM.pdf differ diff --git a/docs/2_39/stan-users-guide/img/Figure_DDM.png b/docs/2_39/stan-users-guide/img/Figure_DDM.png new file mode 100644 index 000000000..859185e8f Binary files /dev/null and b/docs/2_39/stan-users-guide/img/Figure_DDM.png differ diff --git a/docs/2_39/stan-users-guide/img/change-point-posterior.png b/docs/2_39/stan-users-guide/img/change-point-posterior.png new file mode 100644 index 000000000..41541256c Binary files /dev/null and b/docs/2_39/stan-users-guide/img/change-point-posterior.png differ diff --git a/docs/2_39/stan-users-guide/img/funnel-fit.png b/docs/2_39/stan-users-guide/img/funnel-fit.png new file mode 100644 index 000000000..d06ff5daf Binary files /dev/null and b/docs/2_39/stan-users-guide/img/funnel-fit.png differ diff --git a/docs/2_39/stan-users-guide/img/funnel.png b/docs/2_39/stan-users-guide/img/funnel.png new file mode 100644 index 000000000..f3f1863e3 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/funnel.png differ diff --git a/docs/2_39/stan-users-guide/img/logo_tm.png b/docs/2_39/stan-users-guide/img/logo_tm.png new file mode 100644 index 000000000..48c9769c7 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/logo_tm.png differ diff --git a/docs/2_39/stan-users-guide/img/non-identified-plus-prior.png b/docs/2_39/stan-users-guide/img/non-identified-plus-prior.png new file mode 100644 index 000000000..e9d242084 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/non-identified-plus-prior.png differ diff --git a/docs/2_39/stan-users-guide/img/non-identified.png b/docs/2_39/stan-users-guide/img/non-identified.png new file mode 100644 index 000000000..cb6f2bd7b Binary files /dev/null and b/docs/2_39/stan-users-guide/img/non-identified.png differ diff --git a/docs/2_39/stan-users-guide/img/one-param-identified.png b/docs/2_39/stan-users-guide/img/one-param-identified.png new file mode 100644 index 000000000..e90fedbe3 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/one-param-identified.png differ diff --git a/docs/2_39/stan-users-guide/img/ppc-nb-pois.jpg b/docs/2_39/stan-users-guide/img/ppc-nb-pois.jpg new file mode 100644 index 000000000..8a4f54567 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/ppc-nb-pois.jpg differ diff --git a/docs/2_39/stan-users-guide/img/ppc-pois-pois.jpg b/docs/2_39/stan-users-guide/img/ppc-pois-pois.jpg new file mode 100644 index 000000000..03234fe14 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/ppc-pois-pois.jpg differ diff --git a/docs/2_39/stan-users-guide/img/ppc-pvalue-nb-pois-mean.jpg b/docs/2_39/stan-users-guide/img/ppc-pvalue-nb-pois-mean.jpg new file mode 100644 index 000000000..e46ba0cc4 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/ppc-pvalue-nb-pois-mean.jpg differ diff --git a/docs/2_39/stan-users-guide/img/ppc-pvalue-nb-pois-sd.jpg b/docs/2_39/stan-users-guide/img/ppc-pvalue-nb-pois-sd.jpg new file mode 100644 index 000000000..f77ed895e Binary files /dev/null and b/docs/2_39/stan-users-guide/img/ppc-pvalue-nb-pois-sd.jpg differ diff --git a/docs/2_39/stan-users-guide/img/s-discrete-posterior.png b/docs/2_39/stan-users-guide/img/s-discrete-posterior.png new file mode 100644 index 000000000..5a6d6e422 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/s-discrete-posterior.png differ diff --git a/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-mu.png b/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-mu.png new file mode 100644 index 000000000..bffabef60 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-mu.png differ diff --git a/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-tau.png b/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-tau.png new file mode 100644 index 000000000..5c17b348d Binary files /dev/null and b/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-tau.png differ diff --git a/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-theta1.png b/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-theta1.png new file mode 100644 index 000000000..63ce49d7e Binary files /dev/null and b/docs/2_39/stan-users-guide/img/sbc-ctr-8-schools-theta1.png differ diff --git a/docs/2_39/stan-users-guide/img/sbc-normal-normal.png b/docs/2_39/stan-users-guide/img/sbc-normal-normal.png new file mode 100644 index 000000000..56cc31f18 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/sbc-normal-normal.png differ diff --git a/docs/2_39/stan-users-guide/img/sbc-student-t-normal.png b/docs/2_39/stan-users-guide/img/sbc-student-t-normal.png new file mode 100644 index 000000000..ad859e2f3 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/sbc-student-t-normal.png differ diff --git a/docs/2_39/stan-users-guide/img/sho-ode-trajectory.png b/docs/2_39/stan-users-guide/img/sho-ode-trajectory.png new file mode 100644 index 000000000..6a6a12862 Binary files /dev/null and b/docs/2_39/stan-users-guide/img/sho-ode-trajectory.png differ diff --git a/docs/2_39/stan-users-guide/index.html b/docs/2_39/stan-users-guide/index.html new file mode 100644 index 000000000..89ccd8743 --- /dev/null +++ b/docs/2_39/stan-users-guide/index.html @@ -0,0 +1,1175 @@ + + + + + + + + + +Stan User’s Guide + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ +
+
+

Stan User’s Guide

+

Version 2.39

+
+ + + +
+ + + + +
+ + + +
+ + +

+

This is the official user’s guide for Stan. It provides example models and programming techniques for coding statistical models in Stan.

+
    +
  • Part 1 gives Stan code and discussions for several important classes of models.

  • +
  • Part 2 discusses various general Stan programming techniques that are not tied to any particular model.

  • +
  • Part 3 introduces algorithms for calibration and model checking that require multiple runs of Stan.

  • +
  • The appendices provide an introduction to the stanc3 compiler used in the various interfaces to Stan, a style guide, and advice for users of BUGS and JAGS.

  • +
+

We recommend working through this guide using the textbooks Bayesian Data Analysis and Statistical Rethinking: A Bayesian Course with Examples in R and Stan as references on the concepts, and using the Stan Reference Manual when necessary to clarify programming issues.

+

Download the pdf version of this manual.

+ +
+

Licensing

+ + + +
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/latent-discrete.html b/docs/2_39/stan-users-guide/latent-discrete.html new file mode 100644 index 000000000..49bf88b9b --- /dev/null +++ b/docs/2_39/stan-users-guide/latent-discrete.html @@ -0,0 +1,1933 @@ + + + + + + + + + +Latent Discrete Parameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Latent Discrete Parameters

+

Stan does not support sampling discrete parameters. So it is not possible to directly translate BUGS or JAGS models with discrete parameters (i.e., discrete stochastic nodes). Nevertheless, it is possible to code many models that involve bounded discrete parameters by marginalizing out the discrete parameters.1

+

This chapter shows how to code several widely-used models involving latent discrete parameters. The next chapter, the clustering chapter, on clustering models, considers further models involving latent discrete parameters.

+
+

The benefits of marginalization

+

Although it requires some algebra on the joint probability function, a pleasant byproduct of the required calculations is the posterior expectation of the marginalized variable, which is often the quantity of interest for a model. This allows far greater exploration of the tails of the distribution as well as more efficient sampling on an iteration-by-iteration basis because the expectation at all possible values is being used rather than itself being estimated through sampling a discrete parameter.

+

Standard optimization algorithms, including expectation maximization (EM), are often provided in applied statistics papers to describe maximum likelihood estimation algorithms. Such derivations provide exactly the marginalization needed for coding the model in Stan.

+
+
+

Change point models

+

The first example is a model of coal mining disasters in the U.K. for the years 1851–1962.2

+
+

Model with latent discrete parameter

+

Fonnesbeck et al. (2013, sec. 3.1) provides a Poisson model of disaster \(D_t\) in year \(t\) with two rate parameters, an early rate (\(e\)) and late rate (\(l\)), that change at a given point in time \(s\). The full model expressed using a latent discrete parameter \(s\) is \[\begin{align*} +e &\sim \textsf{exponential}(r_e) \\ +l &\sim \textsf{exponential}(r_l) \\ +s &\sim \textsf{uniform}(1, T) \\ +D_t &\sim \textsf{Poisson}(t < s \; ? \; e \: : \: l) +\end{align*}\]

+

The last line uses the conditional operator (also known as the ternary operator), which is borrowed from C and related languages. The conditional operator has the same behavior as its counterpart in C++.3

+

It uses a compact notation involving separating its three arguments by a question mark (?) and a colon (:). The conditional operator is defined by \[ +c \; ? \; x_1 \: : \: x_2 += +\begin{cases} +\ x_1 & \quad\text{if } c \text{ is true (i.e., non-zero), and} \\ +\ x_2 & \quad\text{if } c \text{ is false (i.e., zero).} +\end{cases} +\]

+
+
+

Marginalizing out the discrete parameter

+

To code this model in Stan, the discrete parameter \(s\) must be marginalized out to produce a model defining the log of the probability function \(p(e,l,D_t)\). The full joint probability factors as \[\begin{align*} +p(e,l,s,D) &= p(e) \, p(l) \, p(s) \, p(D \mid s, e, l) \\ +&= \textsf{exponential}(e \mid r_e) \ \textsf{exponential}(l \mid r_l) \, + \textsf{uniform}(s \mid 1, T) \\ +& \qquad \prod_{t=1}^T \textsf{Poisson}(D_t \mid t < s \; ? \; e \: : \: l). +\end{align*}\]

+

To marginalize, an alternative factorization into prior and likelihood is used, \[ +p(e,l,D) = p(e,l) \, p(D \mid e,l), +\]

+

where the likelihood is defined by marginalizing \(s\) as \[\begin{align*} +p(D \mid e,l) &= \sum_{s=1}^T p(s, D \mid e,l) \\ +&= \sum_{s=1}^T p(s) \, p(D \mid s,e,l) \\ +&= \sum_{s=1}^T \textsf{uniform}(s \mid 1,T) \, + \prod_{t=1}^T \textsf{Poisson}(D_t \mid t < s \; ? \; e \: : \: l). +\end{align*}\]

+

Stan operates on the log scale and thus requires the log likelihood, \[\begin{align*} +\log p(D \mid e,l) +&= \texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}_{s=1}^T + \left( \log \textsf{uniform}(s \mid 1, T) \vphantom{\sum_{t=1}^T}\right. \\ +&\qquad \left. + + \sum_{t=1}^T \log \textsf{Poisson}(D_t \mid t < s \; ? \; e \: : \: l) +\right), +\end{align*}\] where the log sum of exponents function is defined by \[ +\texttt{log}\mathtt{\_}\texttt{sum}\mathtt{\_}\texttt{exp}_{n=1}^N \, \alpha_n = +\log \sum_{n=1}^N \exp(\alpha_n). +\]

+

The log sum of exponents function allows the model to be coded directly in Stan using the built-in function log_sum_exp, which provides both arithmetic stability and efficiency for mixture model calculations.

+
+
+

Coding the model in Stan

+

The Stan program for the change point model is shown in the figure below. The transformed parameter lp[s] stores the quantity \(\log p(s, D \mid e, l)\).

+
data {
+  real<lower=0> r_e;
+  real<lower=0> r_l;
+
+  int<lower=1> T;
+  array[T] int<lower=0> D;
+}
+transformed data {
+  real log_unif;
+  log_unif = -log(T);
+}
+parameters {
+  real<lower=0> e;
+  real<lower=0> l;
+}
+transformed parameters {
+  vector[T] lp;
+  lp = rep_vector(log_unif, T);
+  for (s in 1:T) {
+    for (t in 1:T) {
+      lp[s] = lp[s] + poisson_lpmf(D[t] | t < s ? e : l);
+    }
+  }
+}
+model {
+  e ~ exponential(r_e);
+  l ~ exponential(r_l);
+  target += log_sum_exp(lp);
+}
+

A change point model in which disaster rates D[t] have one rate, e, before the change point and a different rate, l, after the change point. The change point itself, s, is marginalized out as described in the text.

+

Although the change-point model is coded directly, the doubly nested loop used for s and t is quadratic in T. Luke Wiklendt pointed out that a linear alternative can be achieved by the use of dynamic programming similar to the forward-backward algorithm for Hidden Markov models; he submitted a slight variant of the following code to replace the transformed parameters block of the above Stan program.

+
transformed parameters {
+    vector[T] lp;
+    {
+      vector[T + 1] lp_e;
+      vector[T + 1] lp_l;
+      lp_e[1] = 0;
+      lp_l[1] = 0;
+      for (t in 1:T) {
+        lp_e[t + 1] = lp_e[t] + poisson_lpmf(D[t] | e);
+        lp_l[t + 1] = lp_l[t] + poisson_lpmf(D[t] | l);
+      }
+      lp = rep_vector(log_unif + lp_l[T + 1], T)
+           + head(lp_e, T) - head(lp_l, T);
+    }
+  }
+

As should be obvious from looking at it, it has linear complexity in T rather than quadratic. The result for the mining-disaster data is about 20 times faster; the improvement will be greater for larger T.

+

The key to understanding Wiklendt’s dynamic programming version is to see that head(lp_e) holds the forward values, whereas lp_l[T + 1] - head(lp_l, T) holds the backward values; the clever use of subtraction allows lp_l to be accumulated naturally in the forward direction.

+
+
+

Fitting the model with MCMC

+

This model is easy to fit using MCMC with NUTS in its default configuration. Convergence is fast and sampling produces roughly one effective sample every two iterations. Because it is a relatively small model (the inner double loop over time is roughly 20,000 steps), it is fast.

+

The value of lp for each iteration for each change point is available because it is declared as a transformed parameter. If the value of lp were not of interest, it could be coded as a local variable in the model block and thus avoid the I/O overhead of saving values every iteration.

+
+
+

Posterior distribution of the discrete change point

+

The value of lp[s] in a given iteration is given by \(\log +p(s,D \mid e,l)\) for the values of the early and late rates, \(e\) and \(l\), in the iteration. In each iteration after convergence, the early and late disaster rates, \(e\) and \(l\), are drawn from the posterior \(p(e,l \mid D)\) by MCMC sampling and the associated lp calculated. The value of lp may be normalized to calculate \(p(s \mid e,l,D)\) in each iteration, based on on the current values of \(e\) and \(l\). Averaging over iterations provides an unnormalized probability estimate of the change point being \(s\) (see below for the normalizing constant), \[\begin{align*} +p(s \mid D) &\propto q(s \mid D) \\ +&= \frac{1}{M} \sum_{m=1}^{M} \exp(\texttt{lp}[m,s]). +\end{align*}\] where \(\texttt{lp}[m,s]\) represents the value of lp in posterior draw \(m\) for change point \(s\). By averaging over draws, \(e\) and \(l\) are themselves marginalized out, and the result has no dependence on a given iteration’s value for \(e\) and \(l\). A final normalization then produces the quantity of interest, the posterior probability of the change point being \(s\) conditioned on the data \(D\), \[ +p(s \mid D) = \frac{q(s \mid D)}{\sum_{s'=1}^T q(s' \mid D)}. +\]

+

A plot of the values of \(\log p(s \mid D)\) computed using Stan 2.4’s default MCMC implementation is shown in the posterior plot.

+

Log probability of change point being in year, calculated analytically.

+
+
+

+
Analytical change-point posterior
+
+
+

The frequency of change points generated by sampling the discrete change points.

+
+
+

+
Sampled change-point posterior
+
+
+

In order their range of estimates be visible, the first plot is on the log scale and the second plot on the linear scale; note the narrower range of years in the second plot resulting from sampling. The posterior mean of \(s\) is roughly 1891.

+
+
+

Discrete sampling

+

The generated quantities block may be used to draw discrete parameter values using the built-in pseudo-random number generators. For example, with lp defined as above, the following program draws a random value for s at every iteration.

+
generated quantities {
+  int<lower=1, upper=T> s;
+  s = categorical_logit_rng(lp);
+}
+

A posterior histogram of draws for \(s\) is shown on the second change point posterior figure above.

+

Compared to working in terms of expectations, discrete sampling is highly inefficient, especially for tails of distributions, so this approach should only be used if draws from a distribution are explicitly required. Otherwise, expectations should be computed in the generated quantities block based on the posterior distribution for s given by softmax(lp).

+
+
+

Posterior covariance

+

The discrete sample generated for \(s\) can be used to calculate covariance with other parameters. Although the sampling approach is straightforward, it is more statistically efficient (in the sense of requiring far fewer iterations for the same degree of accuracy) to calculate these covariances in expectation using lp.

+
+
+

Multiple change points

+

There is no obstacle in principle to allowing multiple change points. The only issue is that computation increases from linear to quadratic in marginalizing out two change points, cubic for three change points, and so on. There are three parameters, e, m, and l, and two loops for the change point and then one over time, with log densities being stored in a matrix.

+
matrix[T, T] lp;
+lp = rep_matrix(log_unif, T);
+for (s1 in 1:T) {
+  for (s2 in 1:T) {
+    for (t in 1:T) {
+      lp[s1,s2] = lp[s1,s2]
+        + poisson_lpmf(D[t] | t < s1 ? e : (t < s2 ? m : l));
+    }
+  }
+}
+

The matrix can then be converted back to a vector using to_vector before being passed to log_sum_exp.

+
+
+
+

Mark-recapture models

+

A widely applied field method in ecology is to capture (or sight) animals, mark them (e.g., by tagging), then release them. This process is then repeated one or more times, and is often done for populations on an ongoing basis. The resulting data may be used to estimate population size.

+

The first subsection describes a simple mark-recapture model that does not involve any latent discrete parameters. The following subsections describes the Cormack-Jolly-Seber model, which involves latent discrete parameters for animal death.

+
+

Simple mark-recapture model

+

In the simplest case, a one-stage mark-recapture study produces the following data

+
    +
  • \(M\) : number of animals marked in first capture,
  • +
  • \(C\) : number animals in second capture, and
  • +
  • \(R\) : number of marked animals in second capture.
  • +
+

The estimand of interest is

+
    +
  • \(N\) : number of animals in the population.
  • +
+

Despite the notation, the model will take \(N\) to be a continuous parameter; just because the population must be finite doesn’t mean the parameter representing it must be. The parameter will be used to produce a real-valued estimate of the population size.

+

The Lincoln-Petersen (Lincoln 1930; Petersen 1896) method for estimating population size is \[ +\hat{N} = \frac{M C}{R}. +\]

+

This population estimate would arise from a probabilistic model in which the number of recaptured animals is distributed binomially, \[ +R \sim \textsf{binomial}(C, M / N) +\] given the total number of animals captured in the second round (\(C\)) with a recapture probability of \(M/N\), the fraction of the total population \(N\) marked in the first round.

+
data {
+  int<lower=0> M;
+  int<lower=0> C;
+  int<lower=0, upper=min(M, C)> R;
+}
+parameters {
+  real<lower=(C - R + M)> N;
+}
+model {
+  R ~ binomial(C, M / N);
+}
+

A probabilistic formulation of the Lincoln-Petersen estimator for population size based on data from a one-step mark-recapture study. The lower bound on \(N\) is necessary to efficiently eliminate impossible values.

+

The probabilistic variant of the Lincoln-Petersen estimator can be directly coded in Stan as shown in the Lincon-Petersen model figure. The Lincoln-Petersen estimate is the maximum likelihood estimate (MLE) for this model.

+

To ensure the MLE is the Lincoln-Petersen estimate, an improper uniform prior for \(N\) is used; this could (and should) be replaced with a more informative prior if possible, based on knowledge of the population under study.

+

The one tricky part of the model is the lower bound \(C - R + M\) placed on the population size \(N\). Values below this bound are impossible because it is otherwise not possible to draw \(R\) samples out of the \(C\) animals recaptured. Implementing this lower bound is necessary to ensure sampling and optimization can be carried out in an unconstrained manner with unbounded support for parameters on the transformed (unconstrained) space. The lower bound in the declaration for \(C\) implies a variable transform \(f : (C-R+M,\infty) \rightarrow (-\infty,+\infty)\) defined by \(f(N) = \log(N - (C - R + M))\); the reference manual contains full details of all constrained parameter transforms.

+
+
+

Cormack-Jolly-Seber with discrete parameter

+

The Cormack-Jolly-Seber (CJS) model (Cormack 1964; Jolly 1965; Seber 1965) is an open-population model in which the population may change over time due to death; the presentation here draws heavily on Schofield (2007).

+

The basic data are

+
    +
  • \(I\): number of individuals,
  • +
  • \(T\): number of capture periods, and
  • +
  • \(y_{i,t}\): Boolean indicating if individual \(i\) was captured at time \(t\).
  • +
+

Each individual is assumed to have been captured at least once because an individual only contributes information conditionally after they have been captured the first time.

+

There are two Bernoulli parameters in the model,

+
    +
  • \(\phi_t\) : probability that animal alive at time \(t\) survives until \(t + 1\) and
  • +
  • \(p_t\) : probability that animal alive at time \(t\) is captured at time \(t\).
  • +
+

These parameters will both be given uniform priors, but information should be used to tighten these priors in practice.

+

The CJS model also employs a latent discrete parameter \(z_{i,t}\) indicating for each individual \(i\) whether it is alive at time \(t\), distributed as \[ +z_{i,t} \sim \mathsf{Bernoulli}(z_{i,t-1} \; ? \; 0 \: : \: \phi_{t-1}). +\]

+

The conditional prevents the model positing zombies; once an animal is dead, it stays dead. The data distribution is then simple to express conditional on \(z\) as \[ +y_{i,t} \sim \mathsf{Bernoulli}(z_{i,t} \; ? \; 0 \: : \: p_t). +\]

+

The conditional enforces the constraint that dead animals cannot be captured.

+
+
+

Collective Cormack-Jolly-Seber model

+

This subsection presents an implementation of the model in terms of counts for different history profiles for individuals over three capture times. It assumes exchangeability of the animals in that each is assigned the same capture and survival probabilities.

+

In order to ease the marginalization of the latent discrete parameter \(z_{i,t}\), the Stan models rely on a derived quantity \(\chi_t\) for the probability that an individual is never captured again if it is alive at time \(t\) (if it is dead, the recapture probability is zero). this quantity is defined recursively by \[ +\chi_t += +\begin{cases} +1 & \quad\text{if } t = T \\ +(1 - \phi_t) + \phi_t (1 - p_{t+1}) \chi_{t+1} + & \quad\text{if } t < T +\end{cases} +\]

+

The base case arises because if an animal was captured in the last time period, the probability it is never captured again is 1 because there are no more capture periods. The recursive case defining \(\chi_{t}\) in terms of \(\chi_{t+1}\) involves two possibilities: (1) not surviving to the next time period, with probability \((1 - \phi_t)\), or (2) surviving to the next time period with probability \(\phi_t\), not being captured in the next time period with probability \((1 - p_{t+1})\), and not being captured again after being alive in period \(t+1\) with probability \(\chi_{t+1}\).

+

With three capture times, there are eight captured/not-captured profiles an individual may have. These may be naturally coded as binary numbers as follows.

+

\[ +\begin{array}{crclc} +\hline +& \qquad\qquad & captures & \qquad\qquad & \\ +\mathrm{profile} & 1 & 2 & 3 & \mathrm{probability} \\ +\hline +0 & - & - & - & n/a \\ +1 & - & - & + & n/a \\ +2 & - & + & - & \chi_2 \\ +3 & - & + & + & \phi_2 \, p_3 \\ +4 & + & - & - & \chi_1 \\ +5 & + & - & + & \phi_1 \, (1 - p_2) \, \phi_2 \, p_3 \\ +6 & + & + & - & \phi_1 \, p_2 \, \chi_2 \\ +7 & + & + & + & \phi_1 \, p_2 \, \phi_2 \, p_3 \\ +\hline +\end{array} +\]

+

History 0, for animals that are never captured, is unobservable because only animals that are captured are observed. History 1, for animals that are only captured in the last round, provides no information for the CJS model, because capture/non-capture status is only informative when conditioned on earlier captures. For the remaining cases, the contribution to the likelihood is provided in the final column.

+

By defining these probabilities in terms of \(\chi\) directly, there is no need for a latent binary parameter indicating whether an animal is alive at time \(t\) or not. The definition of \(\chi\) is typically used to define the likelihood (i.e., marginalize out the latent discrete parameter) for the CJS model (Schofield 2007).

+

The Stan model defines \(\chi\) as a transformed parameter based on parameters \(\phi\) and \(p\). In the model block, the log probability is incremented for each history based on its count. This second step is similar to collecting Bernoulli observations into a binomial or categorical observations into a multinomial, only it is coded directly in the Stan program using target += rather than being part of a built-in probability function.

+

The following is the Stan program for the Cormack-Jolly-Seber mark-recapture model that considers counts of individuals with observation histories of being observed or not in three capture periods

+
data {
+  array[7] int<lower=0> history;
+}
+parameters {
+  array[2] real<lower=0, upper=1> phi;
+  array[3] real<lower=0, upper=1> p;
+}
+transformed parameters {
+  array[2] real<lower=0, upper=1> chi;
+  chi[2] = (1 - phi[2]) + phi[2] * (1 - p[3]);
+  chi[1] = (1 - phi[1]) + phi[1] * (1 - p[2]) * chi[2];
+}
+model {
+  target += history[2] * log(chi[2]);
+  target += history[3] * (log(phi[2]) + log(p[3]));
+  target += history[4] * (log(chi[1]));
+  target += history[5] * (log(phi[1]) + log1m(p[2])
+                            + log(phi[2]) + log(p[3]));
+  target += history[6] * (log(phi[1]) + log(p[2])
+                            + log(chi[2]));
+  target += history[7] * (log(phi[1]) + log(p[2])
+                            + log(phi[2]) + log(p[3]));
+}
+generated quantities {
+  real<lower=0, upper=1> beta3;
+  beta3 = phi[2] * p[3];
+}
+
+

Identifiability

+

The parameters \(\phi_2\) and \(p_3\), the probability of death at time 2 and probability of capture at time 3 are not identifiable, because both may be used to account for lack of capture at time 3. Their product, \(\beta_3 = \phi_2 \, p_3\), is identified. The Stan model defines beta3 as a generated quantity. Unidentified parameters pose a problem for Stan’s samplers’ adaptation. Although the problem posed for adaptation is mild here because the parameters are bounded and thus have proper uniform priors, it would be better to formulate an identified parameterization. One way to do this would be to formulate a hierarchical model for the \(p\) and \(\phi\) parameters.

+
+
+
+

Individual Cormack-Jolly-Seber model

+

This section presents a version of the Cormack-Jolly-Seber (CJS) model cast at the individual level rather than collectively as in the previous subsection. It also extends the model to allow an arbitrary number of time periods. The data will consist of the number \(T\) of capture events, the number \(I\) of individuals, and a boolean flag \(y_{i,t}\) indicating if individual \(i\) was observed at time \(t\). In Stan,

+
data {
+  int<lower=2> T;
+  int<lower=0> I;
+  array[I, T] int<lower=0, upper=1> y;
+}
+

The advantages to the individual-level model is that it becomes possible to add individual “random effects” that affect survival or capture probability, as well as to avoid the combinatorics involved in unfolding \(2^T\) observation histories for \(T\) capture times.

+
+

Utility functions

+

The individual CJS model is written involves several function definitions. The first two are used in the transformed data block to compute the first and last time period in which an animal was captured.4

+
functions {
+  int first_capture(array[] int y_i) {
+    for (k in 1:size(y_i)) {
+      if (y_i[k]) {
+        return k;
+      }
+    }
+    return 0;
+  }
+  int last_capture(array[] int y_i) {
+    for (k_rev in 0:(size(y_i) - 1)) {
+      int k;
+      k = size(y_i) - k_rev;
+      if (y_i[k]) {
+        return k;
+      }
+    }
+    return 0;
+  }
+  // ...
+}
+

These two functions are used to define the first and last capture time for each individual in the transformed data block.5

+
transformed data {
+  array[I] int<lower=0, upper=T> first;
+  array[I] int<lower=0, upper=T> last;
+  vector<lower=0, upper=I>[T] n_captured;
+  for (i in 1:I) {
+    first[i] = first_capture(y[i]);
+  }
+  for (i in 1:I) {
+    last[i] = last_capture(y[i]);
+  }
+  n_captured = rep_vector(0, T);
+  for (t in 1:T) {
+    for (i in 1:I) {
+      if (y[i, t]) {
+        n_captured[t] = n_captured[t] + 1;
+      }
+    }
+  }
+}
+

The transformed data block also defines n_captured[t], which is the total number of captures at time t. The variable n_captured is defined as a vector instead of an integer array so that it can be used in an elementwise vector operation in the generated quantities block to model the population estimates at each time point.

+

The parameters and transformed parameters are as before, but now there is a function definition for computing the entire vector chi, the probability that if an individual is alive at t that it will never be captured again.

+
parameters {
+  vector<lower=0, upper=1>[T - 1] phi;
+  vector<lower=0, upper=1>[T] p;
+}
+transformed parameters {
+  vector<lower=0, upper=1>[T] chi;
+  chi = prob_uncaptured(T, p, phi);
+}
+

The definition of prob_uncaptured, from the functions block, is

+
functions {
+  // ...
+  vector prob_uncaptured(int T, vector p, vector phi) {
+    vector[T] chi;
+    chi[T] = 1.0;
+    for (t in 1:(T - 1)) {
+      int t_curr;
+      int t_next;
+      t_curr = T - t;
+      t_next = t_curr + 1;
+      chi[t_curr] = (1 - phi[t_curr])
+                     + phi[t_curr]
+                       * (1 - p[t_next])
+                       * chi[t_next];
+    }
+    return chi;
+  }
+}
+

The function definition directly follows the mathematical definition of \(\chi_t\), unrolling the recursion into an iteration and defining the elements of chi from T down to 1.

+
+
+

The model

+

Given the precomputed quantities, the model block directly encodes the CJS model’s log likelihood function. All parameters are left with their default uniform priors and the model simply encodes the log probability of the observations q given the parameters p and phi as well as the transformed parameter chi defined in terms of p and phi.

+
model {
+  for (i in 1:I) {
+    if (first[i] > 0) {
+      for (t in (first[i]+1):last[i]) {
+        1 ~ bernoulli(phi[t - 1]);
+        y[i, t] ~ bernoulli(p[t]);
+      }
+      1 ~ bernoulli(chi[last[i]]);
+    }
+  }
+}
+

The outer loop is over individuals, conditional skipping individuals i which are never captured. The never-captured check depends on the convention of the first-capture and last-capture functions returning 0 for first if an individual is never captured.

+

The inner loop for individual i first increments the log probability based on the survival of the individual with probability phi[t - 1]. The outcome of 1 is fixed because the individual must survive between the first and last capture (i.e., no zombies). The loop starts after the first capture, because all information in the CJS model is conditional on the first capture.

+

In the inner loop, the observed capture status y[i, t] for individual i at time t has a Bernoulli distribution based on the capture probability p[t] at time t.

+

After the inner loop, the probability of an animal never being seen again after being observed at time last[i] is included, because last[i] was defined to be the last time period in which animal i was observed.

+
+
+

Identified parameters

+

As with the collective model described in the previous subsection, this model does not identify phi[T - 1] and p[T], but does identify their product, beta. Thus beta is defined as a generated quantity to monitor convergence and report.

+
generated quantities {
+  real beta;
+  // ...
+
+  beta = phi[T - 1] * p[T];
+  // ...
+}
+

The parameter p[1] is also not modeled and will just be uniform between 0 and 1. A more finely articulated model might have a hierarchical or time-series component, in which case p[1] would be an unknown initial condition and both phi[T - 1] and p[T] could be identified.

+
+
+

Population size estimates

+

The generated quantities also calculates an estimate of the population mean at each time t in the same way as in the simple mark-recapture model as the number of individuals captured at time t divided by the probability of capture at time t. This is done with the elementwise division operation for vectors (./) in the generated quantities block.

+
generated quantities {
+  // ...
+  vector<lower=0>[T] pop;
+  // ...
+  pop = n_captured ./ p;
+  pop[1] = -1;
+}
+
+
+

Generalizing to individual effects

+

All individuals are modeled as having the same capture probability, but this model could be easily generalized to use a logistic regression here based on individual-level inputs to be used as predictors.

+
+
+
+
+

Data coding and diagnostic accuracy models

+

Although seemingly disparate tasks, the rating/coding/annotation of items with categories and diagnostic testing for disease or other conditions, share several characteristics which allow their statistical properties to be modeled similarly.

+
+

Diagnostic accuracy

+

Suppose you have diagnostic tests for a condition of varying sensitivity and specificity. Sensitivity is the probability a test returns positive when the patient has the condition and specificity is the probability that a test returns negative when the patient does not have the condition. For example, mammograms and puncture biopsy tests both test for the presence of breast cancer. Mammograms have high sensitivity and low specificity, meaning lots of false positives, whereas puncture biopsies are the opposite, with low sensitivity and high specificity, meaning lots of false negatives.

+

There are several estimands of interest in such studies. An epidemiological study may be interested in the prevalence of a kind of infection, such as malaria, in a population. A test development study might be interested in the diagnostic accuracy of a new test. A health care worker performing tests might be interested in the disease status of a particular patient.

+
+
+

Data coding

+

Humans are often given the task of coding (equivalently rating or annotating) data. For example, journal or grant reviewers rate submissions, a political study may code campaign commercials as to whether they are attack ads or not, a natural language processing study might annotate Tweets as to whether they are positive or negative in overall sentiment, or a dentist looking at an X-ray classifies a patient as having a cavity or not. In all of these cases, the data coders play the role of the diagnostic tests and all of the same estimands are in play — data coder accuracy and bias, true categories of items being coded, or the prevalence of various categories of items in the data.

+
+
+

Noisy categorical measurement model

+

In this section, only categorical ratings are considered, and the challenge in the modeling for Stan is to marginalize out the discrete parameters.

+

Dawid and Skene (1979) introduce a noisy-measurement model for coding and apply it in the epidemiological setting of coding what doctors say about patient histories; the same model can be used for diagnostic procedures.

+
+

Data

+

The data for the model consists of \(J\) raters (diagnostic tests), \(I\) items (patients), and \(K\) categories (condition statuses) to annotate, with \(y_{i, j} \in \{1, \dotsc, K\}\) being the rating provided by rater \(j\) for item \(i\). In a diagnostic test setting for a particular condition, the raters are diagnostic procedures and often \(K=2\), with values signaling the presence or absence of the condition.6

+

It is relatively straightforward to extend Dawid and Skene’s model to deal with the situation where not every rater rates each item exactly once.

+
+
+
+

Model parameters

+

The model is based on three parameters, the first of which is discrete:

+
    +
  • \(z_i\) : a value in \(\{1, \dotsc, K\}\) indicating the true category of item \(i\),
  • +
  • \(\pi\) : a \(K\)-simplex for the prevalence of the \(K\) categories in the population, and
  • +
  • \(\theta_{j,k}\) : a \(K\)-simplex for the response of annotator \(j\) to an item of true category \(k\).
  • +
+
+
+

Noisy measurement model

+

The true category of an item is assumed to be generated by a simple categorical distribution based on item prevalence, \[ +z_i \sim \textsf{categorical}(\pi). +\]

+

The rating \(y_{i, j}\) provided for item \(i\) by rater \(j\) is modeled as a categorical response of rater \(i\) to an item of category \(z_i\),7 \[ +y_{i, j} \sim \textsf{categorical}(\theta_{j,\pi_{z[i]}}). +\]

+
+

Priors and hierarchical modeling

+

Dawid and Skene provided maximum likelihood estimates for \(\theta\) and \(\pi\), which allows them to generate probability estimates for each \(z_i\).

+

To mimic Dawid and Skene’s maximum likelihood model, the parameters \(\theta_{j,k}\) and \(\pi\) can be given uniform priors over \(K\)-simplexes. It is straightforward to generalize to Dirichlet priors, \[ +\pi \sim \textsf{Dirichlet}(\alpha) +\] and \[ +\theta_{j,k} \sim \textsf{Dirichlet}(\beta_k) +\] with fixed hyperparameters \(\alpha\) (a vector) and \(\beta\) (a matrix or array of vectors). The prior for \(\theta_{j,k}\) must be allowed to vary in \(k\), so that, for instance, \(\beta_{k,k}\) is large enough to allow the prior to favor better-than-chance annotators over random or adversarial ones.

+

Because there are \(J\) coders, it would be natural to extend the model to include a hierarchical prior for \(\beta\) and to partially pool the estimates of coder accuracy and bias.

+
+
+

Marginalizing out the true category

+

Because the true category parameter \(z\) is discrete, it must be marginalized out of the joint posterior in order to carry out sampling or maximum likelihood estimation in Stan. The joint posterior factors as \[ +p(y, \theta, \pi) = p(y \mid \theta,\pi) \, p(\pi) \, p(\theta), +\] where \(p(y \mid \theta,\pi)\) is derived by marginalizing \(z\) out of \[ +p(z, y \mid \theta, \pi) = +\prod_{i=1}^I \left( \textsf{categorical}(z_i \mid \pi) + \prod_{j=1}^J + \textsf{categorical}(y_{i, j} \mid \theta_{j, z[i]}) + \right). +\]

+

This can be done item by item, with \[ +p(y \mid \theta, \pi) = +\prod_{i=1}^I \sum_{k=1}^K + \left( \textsf{categorical}(k \mid \pi) + \prod_{j=1}^J + \textsf{categorical}(y_{i, j} \mid \theta_{j, k}) + \right). +\]

+

In the missing data model, only the observed labels would be used in the inner product.

+

Dawid and Skene (1979) derive exactly the same equation in their Equation (2.7), required for the E-step in their expectation maximization (EM) algorithm. Stan requires the marginalized probability function on the log scale, \[\begin{align*} +\log p(y \mid \theta, \pi) +&= \sum_{i=1}^I \log \left( \sum_{k=1}^K \exp + \left(\log \textsf{categorical}(k \mid \pi) \vphantom{\sum_{j=1}^J}\right.\right. + \left.\left. + \ \sum_{j=1}^J + \log \textsf{categorical}(y_{i, j} \mid \theta_{j, k}) + \right) \right), +\end{align*}\] which can be directly coded using Stan’s built-in log_sum_exp function.

+
+
+
+

Stan implementation

+

The Stan program for the Dawid and Skene model is provided below (Dawid and Skene 1979).

+
data {
+  int<lower=2> K;
+  int<lower=1> I;
+  int<lower=1> J;
+
+  array[I, J] int<lower=1, upper=K> y;
+
+  vector<lower=0>[K] alpha;
+  vector<lower=0>[K] beta[K];
+}
+parameters {
+  simplex[K] pi;
+  array[J, K] simplex[K] theta;
+}
+transformed parameters {
+  array[I] vector[K] log_q_z;
+  for (i in 1:I) {
+    log_q_z[i] = log(pi);
+    for (j in 1:J) {
+      for (k in 1:K) {
+        log_q_z[i, k] = log_q_z[i, k]
+                         + log(theta[j, k, y[i, j]]);
+      }
+    }
+  }
+}
+model {
+  pi ~ dirichlet(alpha);
+  for (j in 1:J) {
+    for (k in 1:K) {
+      theta[j, k] ~ dirichlet(beta[k]);
+    }
+  }
+
+  for (i in 1:I) {
+    target += log_sum_exp(log_q_z[i]);
+  }
+}
+

The model marginalizes out the discrete parameter \(z\), storing the unnormalized conditional probability \(\log q(z_i=k|\theta,\pi)\) in log_q_z[i, k].

+

The Stan model converges quickly and mixes well using NUTS starting at diffuse initial points, unlike the equivalent model implemented with Gibbs sampling over the discrete parameter. Reasonable weakly informative priors are \(\alpha_k = 3\) and \(\beta_{k,k} = 2.5 K\) and \(\beta_{k,k'} = 1\) if \(k \neq k'\). Taking \(\alpha\) and \(\beta_k\) to be unit vectors and applying optimization will produce the same answer as the expectation maximization (EM) algorithm of Dawid and Skene (1979).

+
+

Inference for the true category

+

The quantity log_q_z[i] is defined as a transformed parameter. It encodes the (unnormalized) log of \(p(z_i \mid \theta, +\pi)\). Each iteration provides a value conditioned on that iteration’s values for \(\theta\) and \(\pi\). Applying the softmax function to log_q_z[i] provides a simplex corresponding to the probability mass function of \(z_i\) in the posterior. These may be averaged across the iterations to provide the posterior probability distribution over each \(z_i\).

+
+
+
+
+

The mathematics of recovering marginalized parameters

+
+

Introduction

+

This section describes in more detail the mathematics of statistical inference using the output of marginalized Stan models, such as those presented in the last three sections. It provides a mathematical explanation of why and how certain manipulations of Stan’s output produce valid summaries of the posterior distribution when discrete parameters have been marginalized out of a statistical model. Ultimately, however, fully understanding the mathematics in this section is not necessary to fit models with discrete parameters using Stan.

+

Throughout, the model under consideration consists of both continuous parameters, \(\Theta\), and discrete parameters, \(Z\). It is also assumed that \(Z\) can only take finitely many values, as is the case for all the models described in this chapter of the User’s Guide. To simplify notation, any conditioning on data is suppressed in this section, except where specified. As with all Bayesian analyses, however, all inferences using models with marginalized parameters are made conditional on the observed data.

+
+
+

Estimating expectations

+

When performing Bayesian inference, interest often centers on estimating some (constant) low-dimensional summary statistics of the posterior distribution. Mathematically, we are interested in estimating \(\mu\), say, where \(\mu = \mathbb{E}[g(\Theta, Z)]\) and \(g(\cdot)\) is an arbitrary function. An example of such a quantity is \(\mathbb{E}[\Theta]\), the posterior mean of the continuous parameters, where we would take \(g(\theta, z) = \theta\). To estimate \(\mu\) the most common approach is to sample a series of values, at least approximately, from the posterior distribution of the parameters of interest. The numerical values of these draws can then be used to calculate the quantities of interest. Often, this process of calculation is trivial, but more care is required when working with marginalized posteriors as we describe in this section.

+

If both \(\Theta\) and \(Z\) were continuous, Stan could be used to sample \(M\) draws from the joint posterior \(p_{\Theta, Z}(\theta, z)\) and then estimate \(\mu\) with \[ +\hat{\mu} = \frac{1}{M} \sum_{i = 1}^M {g(\theta^{(i)}, z^{(i)})}. +\] Given \(Z\) is discrete, however, Stan cannot be used to sample from the joint posterior (or even to do optimization). Instead, as outlined in the previous sections describing specific models, the user can first marginalize out \(Z\) from the joint posterior to give the marginalized posterior \(p_\Theta(\theta)\). This marginalized posterior can then be implemented in Stan as usual, and Stan will give draws \(\{\theta^{(i)}\}_{i = 1}^M\) from the marginalized posterior.

+

Using only these draws, how can we estimate \(\mathbb{E}[g(\Theta, Z)]\)? We can use a conditional estimator. We explain in more detail below, but at a high level the idea is that, for each function \(g\) of interest, we compute \[ +h(\Theta) = \mathbb{E}[g(\Theta, Z) \mid \Theta] +\] and then estimate \(\mathbb{E}[g(\Theta, Z)]\) with \[ +\hat{\mu} = \frac{1}{M} \sum_{i = 1}^M h(\theta^{(i)}). +\] This estimator is justified by the law of iterated expectation, the fact that \[ +\mathbb{E}[h(\Theta)] = \mathbb{E}[\mathbb{E}[g(\Theta, Z)] \mid \Theta] = \mathbb{E}[g(\Theta, Z)] = \mu. +\] Using this marginalized estimator provides a way to estimate the expectation of any function \(g(\cdot)\) for all combinations of discrete or continuous parameters in the model. However, it presents a possible new challenge: evaluating the conditional expectation \(\mathbb{E}[g(\Theta, Z) \mid \Theta]\).

+
+
+

Evaluating the conditional expectation

+

Fortunately, the discrete nature of \(Z\) makes evaluating \(\mathbb{E}[g(\Theta, Z) \mid \Theta]\) easy. The function \(h(\Theta)\) can be written as: \[ +h(\Theta) += \mathbb{E}[g(\Theta, Z) \mid \Theta] += \sum_{k} g(\Theta, k) \Pr[Z = k \mid \Theta], +\] where we sum over the possible values of the latent discrete parameters. An essential part of this formula is the probability of the discrete parameters conditional on the continuous parameters, \(\Pr[Z = k \mid \Theta]\). More detail on how this quantity can be calculated is included below. Note that if \(Z\) takes infinitely many values then computing the infinite sums will involve, potentially computationally expensive, approximation.

+

When \(g(\theta, z)\) is a function of either \(\theta\) or \(z\) only, the above formula simplifies further.

+

In the first case, where \(g(\theta, z) = g(\theta)\), we have: \[\begin{align*} +h(\Theta) +&= \sum_{k} g(\Theta) \Pr[Z = k \mid \Theta] \\ +&= g(\Theta) \sum_{k} \Pr[Z = k \mid \Theta] \\ +&= g(\Theta). +\end{align*}\] This means that we can estimate \(\mathbb{E}[g(\Theta)]\) with the standard, seemingly unconditional, estimator: \[ +\frac{1}{M} \sum_{i = 1}^M g(\theta^{(i)}). +\] Even after marginalization, computing expectations of functions of the continuous parameters can be performed as if no marginalization had taken place.

+

In the second case, where \(g(\theta, z) = g(z)\), the conditional expectation instead simplifies as follows: \[ +h(\Theta) = \sum_{k} g(k) \Pr[Z = k \mid \Theta]. +\] An important special case of this result is when \(g(\theta, z) = \textrm{I}(z = k)\), where \(\textrm{I}\) is the indicator function. This choice allows us to recover the probability mass function of the discrete random variable \(Z\), since \(\mathbb{E}[\textrm{I}(Z = k)] = \Pr[Z = k]\). In this case, \[ +h(\Theta) += \sum_{k} \textrm{I}(z = k) \Pr[Z = k \mid \Theta] += \Pr[Z = k \mid \Theta]. +\] The quantity \(\Pr[Z = k]\) can therefore be estimated with: \[ +\frac{1}{M} \sum_{i = 1}^M \Pr[Z = k \mid \Theta = \theta^{(i)}]. +\] When calculating this conditional probability it is important to remember that we are also conditioning on the observed data, \(Y\). That is, we are really estimating \(\Pr[Z = k \mid Y]\) with \[ +\frac{1}{M} \sum_{i = 1}^M \Pr[Z = k \mid \Theta = \theta^{(i)}, Y]. +\] This point is important as it suggests one of the main ways of calculating the required conditional probability. Using Bayes’s theorem gives us \[ +\Pr[Z = k \mid \Theta = \theta^{(i)}, Y] += \frac{\Pr[Y \mid Z = k, \Theta = \theta^{(i)}] +\Pr[Z = k \mid \Theta = \theta^{(i)}]} +{\sum_{k = 1}^K \Pr[Y \mid Z = k, \Theta = \theta^{(i)}] +\Pr[Z = k \mid \Theta = \theta^{(i)}]}. +\] Here, \(\Pr[Y \mid \Theta = \theta^{(i)}, Z = k]\) is the likelihood conditional on a particular value of the latent variables. Crucially, all elements of the expression can be calculated using the draws from the posterior of the continuous parameters and knowledge of the model structure.

+

Other than the use of Bayes’s theorem, \(\Pr[Z = k \mid \theta = \theta^{(i)}, Y]\) can also be extracted by coding the Stan model to include the conditional probability explicitly (as is done for the Dawid–Skene model).

+

For a longer introduction to the mathematics of marginalization in Stan, which also covers the connections between Rao–Blackwellization and marginalization, see Pullin, Gurrin, and Vukcevic (2021).

+ + + +
+
+
+ + + Back to top

References

+
+Cormack, R. M. 1964. “Estimates of Survival from the Sighting of Marked Animals.” Biometrika 51 (3/4): 429–38. +
+
+Dawid, A. P., and A. M. Skene. 1979. “Maximum Likelihood Estimation of Observer Error-Rates Using the EM Algorithm.” Journal of the Royal Statistical Society. Series C (Applied Statistics) 28 (1): 20–28. +
+
+Dempster, A. P., N. M. Laird, and D. B. Rubin. 1977. “Maximum Likelihood from Incomplete Data via the EM Algorithm.” Journal of the Royal Statistical Society. Series B (Methodological) 39 (1): 1–38. +
+
+Fonnesbeck, Chris, Anand Patil, David Huard, and John Salvatier. 2013. PyMC User’s Guide. +
+
+Jarrett, R. G. 1979. “A Note on the Intervals Between Coal-Mining Disasters.” Biometrika 66 (1): 191–93. +
+
+Jolly, G. M. 1965. “Explicit Estimates from Capture-Recapture Data with Both Death and Immigration-Stochastic Model.” Biometrika 52 (1/2): 225–47. +
+
+Lincoln, F. C. 1930. “Calculating Waterfowl Abundance on the Basis of Banding Returns.” United States Department of Agriculture Circular 118: 1–4. +
+
+Petersen, C. G. J. 1896. “The Yearly Immigration of Young Plaice into the Limfjord from the German Sea.” Report of the Danish Biological Station 6: 5–84. +
+
+Pullin, Jeffrey, Lyle Gurrin, and Damjan Vukcevic. 2021. “Statistical Models of Repeated Categorical Ratings: The r Package Rater.” arXiv 2010.09335. https://arxiv.org/abs/2010.09335. +
+
+Schofield, Matthew R. 2007. “Hierarchical Capture-Recapture Models.” PhD thesis, Department of Statistics, University of Otago, Dunedin. +
+
+Seber, G. A. F. 1965. “A Note on the Multiple-Recapture Census.” Biometrika 52 (1/2): 249–59. +
+

Footnotes

+ +
    +
  1. The computations are similar to those involved in expectation maximization (EM) algorithms (Dempster, Laird, and Rubin 1977).↩︎

  2. +
  3. The source of the data is (Jarrett 1979), which itself is a note correcting an earlier data collection.↩︎

  4. +
  5. The R counterpart, ifelse, is slightly different in that it is typically used in a vectorized situation. The conditional operator is not (yet) vectorized in Stan.↩︎

  6. +
  7. An alternative would be to compute this on the outside and feed it into the Stan model as preprocessed data. Yet another alternative encoding would be a sparse one recording only the capture events along with their time and identifying the individual captured.↩︎

  8. +
  9. Both functions return 0 if the individual represented by the input array was never captured. Individuals with no captures are not relevant for estimating the model because all probability statements are conditional on earlier captures. Typically they would be removed from the data, but the program allows them to be included even though they make not contribution to the log probability function.↩︎

  10. +
  11. Diagnostic procedures are often ordinal, as in stages of cancer in oncological diagnosis or the severity of a cavity in dental diagnosis. Dawid and Skene’s model may be used as is or naturally generalized for ordinal ratings using a latent continuous rating and cutpoints as in ordinal logistic regression.↩︎

  12. +
  13. In the subscript, \(z_i\) is written as \(z[i]\) to improve legibility.↩︎

  14. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/matrices-arrays.html b/docs/2_39/stan-users-guide/matrices-arrays.html new file mode 100644 index 000000000..b638c4842 --- /dev/null +++ b/docs/2_39/stan-users-guide/matrices-arrays.html @@ -0,0 +1,1434 @@ + + + + + + + + + +Matrices, Vectors, Arrays, and Tuples + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Matrices, Vectors, Arrays, and Tuples

+

This chapter provides pointers as to how to choose among the various container types (matrix, vector, array, and tuple) provided by Stan.

+
+

Basic motivation

+

Stan provides three basic scalar types, int, real, and complex, as well as three basic linear algebra types, vector, row_vector, and matrix. Stan allows arrays of any dimensionality, containing any type of element (though that type must be declared and must be the same for all elements).

+

This leaves us in the awkward situation of having three one-dimensional containers, as exemplified by the following declarations.

+
array[N] real a;
+vector[N] a;
+row_vector[N] a;
+

These distinctions matter. Matrix types, like vector and row vector, are required for linear algebra operations. There is no automatic promotion of arrays to vectors because the target, row vector or column vector, is ambiguous. Similarly, row vectors are separated from column vectors because multiplying a row vector by a column vector produces a scalar, whereas multiplying in the opposite order produces a matrix.

+

The following code fragment shows all four ways to declare a two-dimensional container of size \(M \times N\).

+
array[M, N] real b;          // b[m] : array[] real     (efficient)
+array[M] vector[N] b;        // b[m] : vector     (efficient)
+array[M] row_vector[N] b;    // b[m] : row_vector (efficient)
+matrix[M, N] b;              // b[m] : row_vector (inefficient)
+

The main differences among these choices involve efficiency for various purposes and the type of b[m], which is shown in comments to the right of the declarations. Thus the only way to efficiently iterate over row vectors is to use the third declaration, but if you need linear algebra on matrices, but the only way to use matrix operations is to use the fourth declaration.

+

The inefficiencies due to any manual reshaping of containers is usually slight compared to what else is going on in a Stan program (typically a lot of gradient calculations).

+
+
+

Tuple types

+

Arrays may contain entries of any type, but the types must be the same for all entries. Matrices and vectors contain either real numbers or complex numbers, but all the contained types are the same (e.g., if a vector has a single complex typed entry, all the entries are complex).

+

With arrays or vectors, we can represent pairs of real numbers or pairs of complex numbers. For example, a complex_vector[3] holds exactly three complex numbers. With arrays and vectors, there is no way to represent a pair consisting of an integer and a real number.

+

Tuples provide a way to represent a sequence of values of heterogeneous types. For example, tuple(int, real) is the type of a pair consisting of an integer and a real number and tuple(array[5] int, vector[6]) is the type of pairs where the first element is a five-element array of integers, and the second is a six-element vector.

+
+

Tuple syntax

+

Tuples are declared using the keyword tuple followed by a sequence of type declarations in parentheses. Tuples are constructed using only parentheses. The following example illustrations both declaration and construction.

+
tuple(int, vector[3]) ny = (5, [3, 2.9, 1.8]');
+

The elements of a tuple are accessed by position, starting from 1. For example, we can extract the elements of the tuple above using

+
int n = ny.1;
+vector[3] y = ny.2;
+

We can also assign into the elements of a tuple.

+
tuple(int, vector[3], complex) abc;
+abc.1 = 5;
+abc.2[1] = 3;
+abc.2[2] = 2.9;
+abc.2[3] = 1.4798;
+abc.3 = 2 + 1.9j;
+

As the cascaded indexing example shows, the result of abc.1 is an lvalue (i.e., something to which values may be assigned), and we can further index into it to create new lvalues (e.g., abc.2[1] pulls out the first element of the vector value of the second element of the tuple.)

+

There are two efficiency considerations for tuples. First, like the other container types, tuples are passed to functions by constant reference, which means only a pointer gets passed rather than copying the data. Second, like the array types, creating a tuple requires copying the data for all of its elements. For example, in the following code, the matrix is copied, entailing 1000 copies of scalar values.

+
int a = 5;
+matrix[10, 100] b = ...;
+tuple(int, matrix[10, 100]) ab = (a, b);  // COPIES b
+b[1,1] = 10.3;  // does NOT change ab
+
+
+

Applications of tuples

+

Tuples are primarily useful for two things. First, they provide a way to encapsulate a group of heterogeneous items so that they may be passed as a group. This lets us define arrays of structures as well as structures of arrays. For example, array[N] tuple(int, real, vector[5]) is an array of tuples, each of which has an integer, real, and vector component. Alternatively, we can represent the same information using a tuple of parallel arrays as tuple(array[N] int, array[N] real, array[N] vector[5]).

+

The second use is for function return values. Here, if a function computes two different things with different types, and the computation shares work, it’s best to write one function that returns both things. For example, an eigendecomposition returns a pair consisting of a vector of eigenvalues and a matrix of eigenvectors, whereas a singular value decomposition returns three matrices of different shapes. Before introducing tuples in version 2.33, the QR decomposition of matrix \(A = Q \cdot R\), where \(Q\) is orthonormal and \(R\) is upper triangular. In the past, this required two function calls.

+
matrix[M, N] A = ...;
+matrix[M, M] Q = qr_Q(A);
+matrix[M, N] R = qr_R(A);
+

With tuples, this can be simplified to the following,

+
tuple(matrix[M, M], matrix[M, N]) QR = qr(A);
+

with QR.1 being Q and QR.2 giving R.

+
+
+
+

Fixed sizes and indexing out of bounds

+

Stan’s matrices, vectors, and array variables are sized when they are declared and may not be dynamically resized. Function arguments do not have sizes, but these sizes are fixed when the function is called and the container is instantiated. Also, declarations may be inside loops and thus may change over the course of running a program, but each time a declaration is visited, it declares a fixed size object.

+

When an index is provided that is out of bounds, Stan throws a rejection error and computation on the current log density and gradient evaluation is halted and the algorithm is left to clean up the error. All of Stan’s containers check the sizes of all indexes.

+
+
+

Data type and indexing efficiency

+

The underlying matrix and linear algebra operations are implemented in terms of data types from the Eigen C++ library. By having vectors and matrices as basic types, no conversion is necessary when invoking matrix operations or calling linear algebra functions.

+

Arrays, on the other hand, are implemented as instances of the C++
+std::vector class (not to be confused with Eigen’s Eigen::Vector class or Stan vectors). By implementing arrays this way, indexing is efficient because values can be returned by reference rather than copied by value.

+
+

Matrices vs. two-dimensional arrays

+

In Stan models, there are a few minor efficiency considerations in deciding between a two-dimensional array and a matrix, which may seem interchangeable at first glance.

+

First, matrices use a bit less memory than two-dimensional arrays. This is because they don’t store a sequence of arrays, but just the data and the two dimensions.

+

Second, matrices store their data in column-major order. Furthermore, all of the data in a matrix is guaranteed to be contiguous in memory. This is an important consideration for optimized code because bringing in data from memory to cache is much more expensive than performing arithmetic operations with contemporary CPUs. Arrays, on the other hand, only guarantee that the values of primitive types are contiguous in memory; otherwise, they hold copies of their values (which are returned by reference wherever possible).

+

Third, both data structures are best traversed in the order in which they are stored. This also helps with memory locality. This is column-major for matrices, so the following order is appropriate.

+
matrix[M, N] a;
+//...
+for (n in 1:N) {
+  for (m in 1:M) {
+    // ... do something with a[m, n] ...
+  }
+}
+

Arrays, on the other hand, should be traversed in row-major order (i.e., last index fastest), as in the following example.

+
array[M, N] real a;
+// ...
+for (m in 1:M) {
+  for (n in 1:N) {
+    // ... do something with a[m, n] ...
+  }
+}
+

The first use of a[m ,n] should bring a[m] into memory. Overall, traversing matrices is more efficient than traversing arrays.

+

This is true even for arrays of matrices. For example, the ideal order in which to traverse a two-dimensional array of matrices is

+
array[I, J] matrix[M, N] b;
+// ...
+for (i in 1:I) {
+  for (j in 1:J) {
+    for (n in 1:N) {
+      for (m in 1:M) {
+        // ... do something with b[i, j, m, n] ...
+      }
+    }
+  }
+}
+

If a is a matrix, the notation a[m] picks out row m of that matrix. This is a rather inefficient operation for matrices. If indexing of vectors is needed, it is much better to declare an array of vectors. That is, this

+
array[M] row_vector[N] b;
+// ...
+for (m in 1:M) {
+   // ... do something with row vector b[m] ...
+}
+

is much more efficient than the pure matrix version

+
matrix[M, N] b;
+// ...
+for (m in 1:M) {
+   // ... do something with row vector b[m] ...
+}
+

Similarly, indexing an array of column vectors is more efficient than using the col function to pick out a column of a matrix.

+

In contrast, whatever can be done as pure matrix algebra will be the fastest. So if I want to create a row of predictor-coefficient dot-products, it’s more efficient to do this

+
matrix[N, k] x;    // predictors (aka covariates)
+// ...
+vector[K] beta;   // coeffs
+// ...
+vector[N] y_hat;  // linear prediction
+// ...
+y_hat = x * beta;
+

than it is to do this

+
array[N] row_vector[K] x;    // predictors (aka covariates)
+// ...
+vector[K] beta;   // coeffs
+// ...
+vector[N] y_hat;  // linear prediction
+// ...
+for (n in 1:N) {
+  y_hat[n] = x[n] * beta;
+}
+
+
+

(Row) vectors vs. one-dimensional arrays

+

For use purely as a container, there is really nothing to decide among vectors, row vectors and one-dimensional arrays. The Eigen::Vector template specialization and the std::vector template class are implemented similarly as containers of double values (the type real in Stan). Only arrays in Stan are allowed to store integer values.

+
+
+
+

Memory locality

+

The key to understanding efficiency of matrix and vector representations is memory locality and reference passing versus copying.

+
+

Memory locality

+

CPUs on computers bring in memory in blocks through layers of caches. Fetching from memory is much slower than performing arithmetic operations. The only way to make container operations fast is to respect memory locality and access elements that are close together in memory sequentially in the program.

+
+
+

Matrices

+

Matrices are stored internally in column-major order. That is, an \(M +\times N\) matrix stores its elements in the order \[ +(1,1), (2, 1), \dotsc, (M, 1), (1, 2), \dotsc, (M, 2), \dotsc, (1, N), +\dotsc, (M, N). +\]

+

This means that it’s much more efficient to write loops over matrices column by column, as in the following example.

+
matrix[M, N] a;
+// ...
+for (n in 1:N) {
+  for (m in 1:M) {
+     // ... do something with a[m, n] ...
+  }
+}
+

It also follows that pulling a row out of a matrix is not memory local, as it has to stride over the whole sequence of values. It also requires a copy operation into a new data structure as it is not stored internally as a unit in a matrix. For sequential access to row vectors in a matrix, it is much better to use an array of row vectors, as in the following example.

+
array[M] row_vector[N] a;
+// ...
+for (m in 1:M) {
+  // ... do something with row vector a[m] ...
+}
+

Even if what is done involves a function call, the row vector a[m] will not have to be copied.

+
+
+

Arrays

+

Arrays are stored internally following their data structure. That means a two dimensional array is stored in row-major order. Thus it is efficient to pull out a “row” of a two-dimensional array.

+
array[M, N] real a;
+// ...
+for (m in 1:M) {
+  // ... do something with a[m] ...
+}
+

A difference with matrices is that the entries a[m] in the two dimensional array are not necessarily adjacent in memory, so there are no guarantees on iterating over all the elements in a two-dimensional array will provide memory locality across the “rows.”

+
+
+
+

Converting among matrix, vector, and array types

+

There is no automatic conversion among matrices, vectors, and arrays in Stan. But there are a wide range of conversion functions to convert a matrix into a vector, or a multi-dimensional array into a one-dimensional array, or convert a vector to an array. See the section on mixed matrix and array operations in the functions reference manual for a complete list of conversion operators and the multi-indexing chapter for some reshaping operations involving multiple indexing and range indexing.

+
+
+

Aliasing in Stan containers

+

Stan expressions are all evaluated before assignment happens, so there is no danger of so-called aliasing in array, vector, or matrix operations. In the following Stan program, contrast the behavior of the loop assignment to u and the compound slicing assignment to x, where u and x start with the same values.

+
transformed data {
+  vector[4] x = [ 1, 2, 3, 4 ]';
+  vector[4] u = [ 1, 2, 3, 4 ]';
+
+  for (t in 2:4) {
+    u[t] = u[t - 1] * 3;
+  }
+
+  x[2:4] = x[1:3] * 3;
+
+  print("u = ", u);
+  print("x = ", x);
+}
+

The output it produces is,

+
u = [1, 3, 9, 27]
+x = [1, 3, 6, 9]
+

In the loop version assigning to u, the values are updated before being used to define subsequent values; in the sliced expression assigning to x, the entire right-hand side is evaluated before assigning to the left-hand side.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/measurement-error.html b/docs/2_39/stan-users-guide/measurement-error.html new file mode 100644 index 000000000..3fb2fee0e --- /dev/null +++ b/docs/2_39/stan-users-guide/measurement-error.html @@ -0,0 +1,1467 @@ + + + + + + + + + +Measurement Error and Meta-Analysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Measurement Error and Meta-Analysis

+

Most quantities used in statistical models arise from measurements. Most of these measurements are taken with some error. When the measurement error is small relative to the quantity being measured, its effect on a model is usually small. When measurement error is large relative to the quantity being measured, or when precise relations can be estimated being measured quantities, it is useful to introduce an explicit model of measurement error. One kind of measurement error is rounding.

+

Meta-analysis plays out statistically much like measurement error models, where the inferences drawn from multiple data sets are combined to do inference over all of them. Inferences for each data set are treated as providing a kind of measurement error with respect to true parameter values.

+
+

Bayesian measurement error model

+

A Bayesian approach to measurement error can be formulated directly by treating the true quantities being measured as missing data (Clayton 1992; Richardson and Gilks 1993). This requires a model of how the measurements are derived from the true values.

+
+

Regression with measurement error

+

Before considering regression with measurement error, first consider a linear regression model where the observed data for \(N\) cases includes a predictor \(x_n\) and outcome \(y_n\). In Stan, a linear regression for \(y\) based on \(x\) with a slope and intercept is modeled as follows.

+
data {
+  int<lower=0> N;       // number of cases
+  vector[N] x;          // predictor (covariate)
+  vector[N] y;          // outcome (variate)
+}
+parameters {
+  real alpha;           // intercept
+  real beta;            // slope
+  real<lower=0> sigma;  // outcome noise
+}
+model {
+  y ~ normal(alpha + beta * x, sigma);
+  alpha ~ normal(0, 10);
+  beta ~ normal(0, 10);
+  sigma ~ cauchy(0, 5);
+}
+

Now suppose that the true values of the predictors \(x_n\) are not known, but for each \(n\), a measurement \(x^{\textrm{meas}}_n\) of \(x_n\) is available. If the error in measurement can be modeled, the measured value \(x^{\textrm{meas}}_n\) can be modeled in terms of the true value \(x_n\) plus measurement noise. The true value \(x_n\) is treated as missing data and estimated along with other quantities in the model. A simple approach is to assume the measurement error is normal with known deviation \(\tau\). This leads to the following regression model with constant measurement error.

+
data {
+  // ...
+  array[N] real x_meas;   // measurement of x
+  real<lower=0> tau;     // measurement noise
+}
+parameters {
+  array[N] real x;    // unknown true value
+  real mu_x;          // prior location
+  real sigma_x;       // prior scale
+  // ...
+}
+model {
+  x ~ normal(mu_x, sigma_x);  // prior
+  x_meas ~ normal(x, tau);    // measurement model
+  y ~ normal(alpha + beta * x, sigma);
+  // ...
+}
+

The regression coefficients alpha and beta and regression noise scale sigma are the same as before, but now x is declared as a parameter rather than as data. The data are now x_meas, which is a measurement of the true x value with noise scale tau. The model then specifies that the measurement error for x_meas[n] given true value x[n] is normal with deviation tau. Furthermore, the true values x are given a hierarchical prior here.

+

In cases where the measurement errors are not normal, richer measurement error models may be specified. The prior on the true values may also be enriched. For instance, Clayton (1992) introduces an exposure model for the unknown (but noisily measured) risk factors \(x\) in terms of known (without measurement error) risk factors \(c\). A simple model would regress \(x_n\) on the covariates \(c_n\) with noise term \(\upsilon\), \[ +x_n \sim \textsf{normal}(\gamma^{\top}c, \upsilon). +\] This can be coded in Stan just like any other regression. And, of course, other exposure models can be provided.

+
+
+

Rounding

+

A common form of measurement error arises from rounding measurements. Rounding may be done in many ways, such as rounding weights to the nearest milligram, or to the nearest pound; rounding may even be done by rounding down to the nearest integer.

+

Exercise 3.5(b) by Gelman et al. (2013) provides an example.

+
+

3.5. Suppose we weigh an object five times and measure weights, rounded to the nearest pound, of 10, 10, 12, 11, 9. Assume the unrounded measurements are normally distributed with a noninformative prior distribution on \(\mu\) and \(\sigma^2\).

+
    +
  1. Give the correct posterior distribution for \((\mu, \sigma^2)\), treating the measurements as rounded.
  2. +
+
+

Letting \(z_n\) be the unrounded measurement for \(y_n\), the problem as stated assumes \[ +z_n \sim \textsf{normal}(\mu, \sigma). +\]

+

The rounding process entails that \(z_n \in (y_n - 0.5, y_n + 0.5)\)1. The probability mass function for the discrete observation \(y\) is then given by marginalizing out the unrounded measurement, producing the likelihood \[\begin{align*} +p(y_n \mid \mu, \sigma) +&= \int_{y_n - 0.5}^{y_n + 0.5} \textsf{normal}(z_n \mid \mu, \sigma) \,\textsf{d}z_n \\ +&= \Phi\!\left(\frac{y_n + 0.5 - \mu}{\sigma}\right) + -\Phi\!\left(\frac{y_n - 0.5 - \mu}{\sigma}\right). +\end{align*}\] Gelman’s answer for this problem took the noninformative prior to be uniform in the variance \(\sigma^2\) on the log scale, but we replace it with more recently recommended half-normal prior on \(\sigma\) \[ +\sigma \sim \textsf{normal}^+(0, 1). +\] The posterior after observing \(y = (10, 10, 12, 11, 9)\) can be calculated by Bayes’s rule as \[\begin{align*} +p(\mu, \sigma \mid y) +&\propto p(\mu, \sigma) \ p(y \mid \mu, \sigma) \\ +&\propto \textsf{normal}^+(\sigma \mid 0, 1)\prod_{n=1}^5 + \left( \Phi\!\left(\frac{y_n + 0.5 - \mu}{\sigma}\right) + -\Phi\!\left(\frac{y_n - 0.5 - \mu}{\sigma}\right) + \right). +\end{align*}\]

+

The Stan code simply follows the mathematical definition, providing an example of the direct definition of a probability function up to a proportion.

+
data {
+  int<lower=0> N;
+  vector[N] y;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  sigma ~ normal(0, 1);
+  for (n in 1:N) {
+    target += log_diff_exp(normal_lcdf(y[n] + 0.5 | mu, sigma),
+                           normal_lcdf(y[n] - 0.5 | mu, sigma));
+  }
+}
+

where normal_lcdf(y[n]+0.5 | mu, sigma) is equal to log(Phi((y[n] + 0.5 - mu) / sigma)), and log_diff_exp(a, b) computes log(exp(a) - exp(b)) in numerically more stable way.

+

Alternatively, the model may be defined with latent parameters for the unrounded measurements \(z_n\). The Stan code in this case uses a distribution statement for \(z_n\) directly while respecting the constraint \(z_n \in (y_n - 0.5, y_n + 0.5)\).

+
data {
+  int<lower=0> N;
+  vector[N] y;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+  vector<lower=y-0.5, upper=y+0.5>[N] z;
+}
+model {
+  sigma ~ normal(0, 1);
+  z ~ normal(mu, sigma);
+}
+

This explicit model for the unrounded measurements \(z\) produces the same posterior for \(\mu\) and \(\sigma\) as the previous model that marginalizes \(z\) out. Both approaches mix well, but the latent parameter version is about twice as efficient in terms of effective sample size per iteration, as well as providing a posterior for the unrounded parameters.

+
+
+
+

Meta-analysis

+

Meta-analysis aims to pool the data from several studies, such as the application of a tutoring program in several schools or treatment using a drug in several clinical trials.

+

The Bayesian framework is particularly convenient for meta-analysis, because each previous study can be treated as providing a noisy measurement of some underlying quantity of interest. The model then follows directly from two components, a prior on the underlying quantities of interest and a measurement-error style model for each of the studies being analyzed.

+
+

Treatment effects in controlled studies

+

Suppose the data in question arise from a total of \(M\) studies providing paired binomial data for a treatment and control group. For instance, the data might be post-surgical pain reduction under a treatment of ibuprofen (Warn, Thompson, and Spiegelhalter 2002) or mortality after myocardial infarction under a treatment of beta blockers (Gelman et al. 2013, sec. 5.6).

+
+

Data

+

The clinical data consists of \(J\) trials, each with \(n^t\) treatment cases, \(n^c\) control cases, \(r^t\) successful outcomes among those treated and \(r^c\) successful outcomes among those in the control group. This data can be declared in Stan as follows.2

+
data {
+  int<lower=0> J;
+  array[J] int<lower=0> n_t;  // num cases, treatment
+  array[J] int<lower=0> r_t;  // num successes, treatment
+  array[J] int<lower=0> n_c;  // num cases, control
+  array[J] int<lower=0> r_c;  // num successes, control
+}
+
+
+

Converting to log odds and standard error

+

Although the clinical trial data are binomial in its raw format, it may be transformed to an unbounded scale by considering the log odds ratio \[\begin{align*} +y_j &= \log \left( \frac{r^t_j / (n^t_j - r^t_j)} + {r^c_j / (n^c_j - r^c_j)} + \right) \\ +&= \log \left( \frac{r^t_j}{n^t_j - r^t_j} \right) + -\log \left( \frac{r^c_j}{n^c_j - r^c_j} \right) +\end{align*}\] and corresponding standard errors \[ +\sigma_j = \sqrt{ + \frac{1}{r^T_j} ++ \frac{1}{n^T_j - r^T_j} ++ \frac{1}{r^C_j} ++ \frac{1}{n^C_j - r^C_j} +}. +\]

+

The log odds and standard errors can be defined in a transformed data block, though care must be taken not to use integer division.3

+
transformed data {
+  array[J] real y;
+  array[J] real<lower=0> sigma;
+  for (j in 1:J) {
+    y[j] = log(r_t[j]) - log(n_t[j] - r_t[j])
+            - (log(r_c[j]) - log(n_c[j] - r_c[j]));
+  }
+  for (j in 1:J) {
+    sigma[j] = sqrt(1 / r_t[j] + 1 / (n_t[j] - r_t[j])
+                     + 1 / r_c[j] + 1 / (n_c[j] - r_c[j]));
+  }
+}
+

This definition will be problematic if any of the success counts is zero or equal to the number of trials. If that arises, a direct binomial model will be required or other transforms must be used than the unregularized sample log odds.

+
+
+

Non-hierarchical model

+

With the transformed data in hand, two standard forms of meta-analysis can be applied. The first is a so-called “fixed effects” model, which assumes a single parameter for the global odds ratio. This model is coded in Stan as follows.

+
parameters {
+  real theta;  // global treatment effect, log odds
+}
+model {
+  y ~ normal(theta, sigma);
+}
+

The distribution statement for y is vectorized; it has the same effect as the following.

+
  for (j in 1:J) {
+    y[j] ~ normal(theta, sigma[j]);
+  }
+

It is common to include a prior for theta in this model, but it is not strictly necessary for the model to be proper because y is fixed and \(\textsf{normal}(y \mid \mu,\sigma) = +\textsf{normal}(\mu \mid y,\sigma)\).

+
+
+

Hierarchical model

+

To model so-called “random effects,” where the treatment effect may vary by clinical trial, a hierarchical model can be used. The parameters include per-trial treatment effects and the hierarchical prior parameters, which will be estimated along with other unknown quantities.

+
parameters {
+  array[J] real theta;  // per-trial treatment effect
+  real mu;              // mean treatment effect
+  real<lower=0> tau;    // deviation of treatment effects
+}
+model {
+  y ~ normal(theta, sigma);
+  theta ~ normal(mu, tau);
+  mu ~ normal(0, 10);
+  tau ~ cauchy(0, 5);
+}
+

Although the vectorized distribution statement for y appears unchanged, the parameter theta is now a vector. The distribution statement for theta is also vectorized, with the hyperparameters mu and tau themselves being given wide priors compared to the scale of the data.

+

Rubin (1981) provided a hierarchical Bayesian meta-analysis of the treatment effect of Scholastic Aptitude Test (SAT) coaching in eight schools based on the sample treatment effect and standard error in each school.

+
+
+

Extensions and alternatives

+

Smith, Spiegelhalter, and Thomas (1995) and Gelman et al. (2013, sec. 19.4) provide meta-analyses based directly on binomial data. Warn, Thompson, and Spiegelhalter (2002) consider the modeling implications of using alternatives to the log-odds ratio in transforming the binomial data.

+

If trial-specific predictors are available, these can be included directly in a regression model for the per-trial treatment effects \(\theta_j\).

+ + + +
+
+
+
+ + + Back to top

References

+
+Clayton, D. G. 1992. “Models for the Analysis of Cohort and Case-Control Studies with Inaccurately Measured Exposures.” In Statistical Models for Longitudinal Studies of Exposure and Health, edited by James H. Dwyer, Manning Feinleib, Peter Lippert, and Hans Hoffmeister, 301–31. New York: Oxford University Press. +
+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Richardson, Sylvia, and Walter R. Gilks. 1993. “A Bayesian Approach to Measurement Error Problems in Epidemiology Using Conditional Independence Models.” American Journal of Epidemiology 138 (6): 430–42. +
+
+Rubin, Donald B. 1981. “Estimation in Parallel Randomized Experiments.” Journal of Educational Statistics 6: 377–401. +
+
+Smith, Teresa C., David J. Spiegelhalter, and Andrew Thomas. 1995. Bayesian Approaches to Random-Effects Meta-Analysis: A Comparative Study.” Statistics in Medicine 14 (24): 2685–99. +
+
+Warn, David E., S. G. Thompson, and David J. Spiegelhalter. 2002. Bayesian Random Effects Meta-Analysis of Trials with Binary Outcomes: Methods for the Absolute Risk Difference and Relative Risk Scales.” Statistics in Medicine 21: 1601–23. +
+

Footnotes

+ +
    +
  1. There are several different rounding rules (see, e.g., Wikipedia: Rounding), which affect which interval ends are open and which are closed, but these do not matter here as for continuous \(z_n\) \(p(z_n=y_n-0.5)=p(z_n=y_n+0.5)=0\).↩︎

  2. +
  3. Stan’s integer constraints are not powerful enough to express the constraint that \(\texttt{r}\mathtt{\_}\texttt{t[j]} \leq \texttt{n}\mathtt{\_}\texttt{t[j]}\), but this constraint could be checked in the transformed data block.↩︎

  4. +
  5. When dividing two integers, the result type is an integer and rounding will ensue if the result is not exact. See the discussion of primitive arithmetic types in the reference manual for more information.↩︎

  6. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/missing-data.html b/docs/2_39/stan-users-guide/missing-data.html new file mode 100644 index 000000000..38af5f613 --- /dev/null +++ b/docs/2_39/stan-users-guide/missing-data.html @@ -0,0 +1,1415 @@ + + + + + + + + + +Missing Data and Partially Known Parameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Missing Data and Partially Known Parameters

+

Bayesian inference supports a general approach to missing data in which any missing data item is represented as a parameter that is estimated in the posterior (Gelman et al. 2013). If the missing data are not explicitly modeled, as in the predictors for most regression models, then the result is an improper prior on the parameter representing the missing predictor.

+

Mixing arrays of observed and missing data can be difficult to include in Stan, partly because it can be tricky to model discrete unknowns in Stan and partly because unlike some other statistical languages (for example, R and Bugs), Stan requires observed and unknown quantities to be defined in separate places in the model. Thus it can be necessary to include code in a Stan program to splice together observed and missing parts of a data structure. Examples are provided later in the chapter.

+
+

Missing data

+

Stan treats variables declared in the data and transformed data blocks as known and the variables in the parameters block as unknown.

+

An example involving missing normal observations could be coded as follows.1

+
data {
+  int<lower=0> N_obs;
+  int<lower=0> N_mis;
+  array[N_obs] real y_obs;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+  array[N_mis] real y_mis;
+}
+model {
+  y_obs ~ normal(mu, sigma);
+  y_mis ~ normal(mu, sigma);
+}
+

The number of observed and missing data points are coded as data with non-negative integer variables N_obs and N_mis. The observed data are provided as an array data variable y_obs. The missing data are coded as an array parameter, y_mis. The ordinary parameters being estimated, the location mu and scale sigma, are also coded as parameters. The model is vectorized on the observed and missing data; combining them in this case would be less efficient because the data observations would be promoted and have needless derivatives calculated.

+
+
+

Partially known parameters

+

In some situations, such as when a multivariate probability function has partially observed outcomes or parameters, it will be necessary to create a vector mixing known (data) and unknown (parameter) values. This can be done in Stan by creating a vector or array in the transformed parameters block and assigning to it.

+

The following example involves a bivariate covariance matrix in which the variances are known, but the covariance is not.

+
data {
+  int<lower=0> N;
+  array[N] vector[2] y;
+  real<lower=0> var1;
+  real<lower=0> var2;
+}
+transformed data {
+  real<lower=0> max_cov = sqrt(var1 * var2);
+  real<upper=0> min_cov = -max_cov;
+}
+parameters {
+  vector[2] mu;
+  real<lower=min_cov, upper=max_cov> cov;
+}
+transformed parameters {
+  matrix[2, 2] Sigma;
+  Sigma[1, 1] = var1;
+  Sigma[1, 2] = cov;
+  Sigma[2, 1] = cov;
+  Sigma[2, 2] = var2;
+}
+model {
+  y ~ multi_normal(mu, Sigma);
+}
+

The variances are defined as data in variables var1 and var2, whereas the covariance is defined as a parameter in variable cov. The \(2 \times 2\) covariance matrix Sigma is defined as a transformed parameter, with the variances assigned to the two diagonal elements and the covariance to the two off-diagonal elements.

+

The constraint on the covariance declaration ensures that the resulting covariance matrix sigma is positive definite. The bound, plus or minus the square root of the product of the variances, is defined as transformed data so that it is only calculated once.

+

The vectorization of the multivariate normal is critical for efficiency here. The transformed parameter Sigma could be defined as a local variable within the model block if it does not need to be included in the sampler’s output.

+
+
+

Sliced missing data

+

If the missing data are part of some larger data structure, then it can often be effectively reassembled using index arrays and slicing. Here’s an example for time-series data, where only some entries in the series are observed.

+
data {
+  int<lower=0> N_obs;
+  int<lower=0> N_mis;
+  array[N_obs] int<lower=1, upper=N_obs + N_mis> ii_obs;
+  array[N_mis] int<lower=1, upper=N_obs + N_mis> ii_mis;
+  array[N_obs] real y_obs;
+}
+transformed data {
+  int<lower=0> N = N_obs + N_mis;
+}
+parameters {
+  array[N_mis] real y_mis;
+  real<lower=0> sigma;
+}
+transformed parameters {
+  array[N] real y;
+  y[ii_obs] = y_obs;
+  y[ii_mis] = y_mis;
+}
+model {
+  sigma ~ gamma(1, 1);
+  y[1] ~ normal(0, 100);
+  y[2:N] ~ normal(y[1:(N - 1)], sigma);
+}
+

The index arrays ii_obs and ii_mis contain the indexes into the final array y of the observed data (coded as a data vector y_obs) and the missing data (coded as a parameter vector y_mis). See the time series chapter for further discussion of time-series model and specifically the autoregression section for an explanation of the vectorization for y as well as an explanation of how to convert this example to a full AR(1) model. To ensure y[1] has a proper posterior in case it is missing, we have given it an explicit, albeit broad, prior.

+

Another potential application would be filling the columns of a data matrix of predictors for which some predictors are missing; matrix columns can be accessed as vectors and assigned the same way, as in

+
x[N_obs_2, 2] = x_obs_2;
+x[N_mis_2, 2] = x_mis_2;
+

where the relevant variables are all hard coded with index 2 because Stan doesn’t support ragged arrays. These could all be packed into a single array with more fiddly indexing that slices out vectors from longer vectors (see the ragged data structures section for a general discussion of coding ragged data structures in Stan).

+
+
+

Loading matrix for factor analysis

+

Rick Farouni, on the Stan users group, inquired as to how to build a Cholesky factor for a covariance matrix with a unit diagonal, as used in Bayesian factor analysis (Aguilar and West 2000). This can be accomplished by declaring the below-diagonal elements as parameters, then filling the full matrix as a transformed parameter.

+
data {
+  int<lower=2> K;
+}
+transformed data {
+  int<lower=1> K_choose_2;
+  K_choose_2 = (K * (K - 1)) / 2;
+}
+parameters {
+  vector[K_choose_2] L_lower;
+}
+transformed parameters {
+  cholesky_factor_cov[K] L;
+  for (k in 1:K) {
+    L[k, k] = 1;
+  }
+  {
+    int i;
+    for (m in 2:K) {
+      for (n in 1:(m - 1)) {
+        L[m, n] = L_lower[i];
+        L[n, m] = 0;
+        i += 1;
+      }
+    }
+  }
+}
+

It is most convenient to place a prior directly on L_lower. An alternative would be a prior for the full Cholesky factor L, because the transform from L_lower to L is just the identity and thus does not require a Jacobian adjustment (despite the warning from the parser, which is not smart enough to do the code analysis to infer that the transform is linear). It would not be at all convenient to place a prior on the full covariance matrix L * L', because that would require a Jacobian adjustment; the exact adjustment is detailed in the reference manual.

+
+
+

Missing multivariate data

+

It’s often the case that one or more components of a multivariate outcome are missing.2

+

As an example, we’ll consider the bivariate distribution, which is easily marginalized. The coding here is brute force, representing both an array of vector observations y and a boolean array y_observed to indicate which values were observed (others can have dummy values in the input).

+
array[N] vector[2] y;
+array[N, 2] int<lower=0, upper=1> y_observed;
+

If both components are observed, we model them using the full multi-normal, otherwise we model the marginal distribution of the component that is observed.

+
for (n in 1:N) {
+  if (y_observed[n, 1] && y_observed[n, 2]) {
+    y[n] ~ multi_normal(mu, Sigma);
+  } else if (y_observed[n, 1]) {
+    y[n, 1] ~ normal(mu[1], sqrt(Sigma[1, 1]));
+  } else if (y_observed[n, 2]) {
+    y[n, 2] ~ normal(mu[2], sqrt(Sigma[2, 2]));
+  }
+}
+

It’s a bit more work, but much more efficient to vectorize these distribution statements. In transformed data, build up three vectors of indices, for the three cases above:

+
transformed data {
+  array[observed_12(y_observed)] int ns12;
+  array[observed_1(y_observed)] int ns1;
+  array[observed_2(y_observed)] int ns2;
+}
+

You will need to write functions that pull out the count of observations in each of the three situations. This must be done with functions because the result needs to go in top-level block variable size declaration. Then the rest of transformed data just fills in the values using three counters.

+
int n12 = 1;
+int n1 = 1;
+int n2 = 1;
+for (n in 1:N) {
+  if (y_observed[n, 1] && y_observed[n, 2]) {
+    ns12[n12] = n;
+    n12 += 1;
+  } else if (y_observed[n, 1]) {
+    ns1[n1] = n;
+    n1 += 1;
+  } else if (y_observed[n, 2]) {
+    ns2[n2] = n;
+    n2 += 1;
+  }
+}
+

Then, in the model block, everything is vectorizable using those indexes constructed once in transformed data:

+
y[ns12] ~ multi_normal(mu, Sigma);
+y[ns1] ~ normal(mu[1], sqrt(Sigma[1, 1]));
+y[ns2] ~ normal(mu[2], sqrt(Sigma[2, 2]));
+

The result will be much more efficient than using latent variables for the missing data, but it requires the multivariate distribution to be marginalized analytically. It’d be more efficient still to precompute the three arrays in the transformed data block, though the efficiency improvement will be relatively minor compared to vectorizing the probability functions.

+

This approach can easily be generalized with some index fiddling to the general multivariate case. The trick is to pull out entries in the covariance matrix for the missing components. It can also be used in situations such as multivariate differential equation solutions where only one component is observed, as in a phase-space experiment recording only time and position of a pendulum (and not recording momentum).

+ + + +
+
+ + + Back to top

References

+
+Aguilar, Omar, and Mike West. 2000. “Bayesian Dynamic Factor Models and Portfolio Allocation.” Journal of Business & Economic Statistics 18 (3): 338–57. +
+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+

Footnotes

+ +
    +
  1. A more meaningful estimation example would involve a regression of the observed and missing observations using predictors that were known for each and specified in the data block.↩︎

  2. +
  3. This is not the same as missing components of a multivariate predictor in a regression problem; in that case, you will need to represent the missing data as a parameter and impute missing values in order to feed them into the regression.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/multi-indexing.html b/docs/2_39/stan-users-guide/multi-indexing.html new file mode 100644 index 000000000..ffaf5a741 --- /dev/null +++ b/docs/2_39/stan-users-guide/multi-indexing.html @@ -0,0 +1,1460 @@ + + + + + + + + + +Multiple Indexing and Range Indexing + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Multiple Indexing and Range Indexing

+

Stan allows multiple indexes to be provided for containers (i.e., arrays, vectors, and matrices) in a single position, using either an array of integer indexes or range bounds. In many cases, there are functions that provide similar behavior.

+

Allowing multiple indexes supports inline vectorization of models. For instance, consider the data model for a varying-slope, varying-intercept hierarchical linear regression, which could be coded as

+
for (n in 1:N) {
+  y[n] ~ normal(alpha[ii[n]] + beta[ii[n]] * x[n], sigma);
+}
+

With multiple indexing, this can be coded in one line, leading to more efficient vectorized code.

+
y ~ normal(alpha[ii] + rows_dot_product(beta[ii], x), sigma);
+

This latter version is faster than the loop version; it is equivalent in speed to the clunky assignment to a local variable.

+
{
+  vector[N] mu;
+  for (n in 1:N) {
+    mu[n] = alpha[ii[n]] + beta[ii[n]] * x[n];
+  }
+  y ~ normal(mu, sigma);
+}
+

The boost in speed compared to the original version is because the single call to the normal log density in the distribution statement will be much more memory efficient than the original version.

+
+

Multiple indexing

+

The following is the simplest concrete example of multiple indexing with an array of integers; the ellipses stand for code defining the variables as indicated in the comments.

+
array[3] int c;
+// ... define: c == (5, 9, 7)
+array[4] int idxs;
+// ... define: idxs == (3, 3, 1, 2)
+array[4] int d;
+d = c[idxs];    // result: d == (7, 7, 5, 9)
+

In general, the multiple indexed expression c[idxs] is defined as follows, assuming idxs is of size K.

+
c[idxs] = ( c[idxs[1]], c[idxs[2]], ..., c[idxs[K]] )
+

Thus c[idxs] is of the same size as idxs, which is K in this example.

+

Multiple indexing can also be used with multi-dimensional arrays. For example, consider the following.

+
array[2, 3] int c;
+// ... define: c = ((1, 3, 5), (7, 11, 13))
+array[4] int idxs;
+// ... define: idxs = (2, 2, 1, 2)
+array[4, 3] int d
+d = c[idxs];    // result: d = ((7, 11, 13), (7, 11, 13),
+                //              (1, 3, 5), (7, 11, 13))
+

That is, putting an index in the first position acts exactly the same way as defined above. The fact that the values are themselves arrays makes no difference—the result is still defined by c[idxs][j] == c[idxs[j]].

+

Multiple indexing may also be used in the second position of a multi-dimensional array. Continuing the above example, consider a single index in the first position and a multiple index in the second.

+
array[4] int e;
+e = c[2, idxs]; // result:  c[2] = (7, 11, 13)
+                // result:  e = (11, 11, 7, 11)
+

The single index is applied, the one-dimensional result is determined, then the multiple index is applied to the result. That is, c[2,idxs] evaluates to the same value as c[2][idxs].

+

Multiple indexing can apply to more than one position of a multi-dimensional array. For instance, consider the following

+
array[2, 3] int c;
+// ... define: c = ((1, 3, 5), (7, 11, 13))
+array[3] int idxs1;
+// ... define: idxs1 = (2, 2, 1)
+array[2] int idxs2;
+// ... define: idxs2 = (1, 3)
+array[3, 2] int d;
+d = c[idxs1, idxs2];  // result: d = ((7, 13), (7, 13), (1, 5))
+

With multiple indexes, we no longer have c[idxs1, idxs2] being the same as c[idxs1][idxs2]. Rather, the entry d[i, j] after executing the above is given by

+
d[i, j] == c[idxs1, idxs2][i, j] = c[idxs1[i], idxs2[j]]
+

This example illustrates the operation of multiple indexing in the general case: a multiple index like idxs1 converts an index i used on the result (here, c[idxs1, idxs2]) to index idxs1[i] in the variable being indexed (here, c). In contrast, a single index just returns the value at that index, thus reducing dimensionality by one in the result.

+
+
+

Slicing with range indexes

+

Slicing returns a contiguous slice of a one-dimensional array, a contiguous sub-block of a two-dimensional array, and so on. Semantically, it is just a special form of multiple indexing.

+
+

Lower and upper bound indexes

+

For instance, consider supplying an upper and lower bound for an index.

+
array[7] int c;
+// ...
+array[4] int d;
+d = c[3:6];  // result: d == (c[3], c[4], c[5], c[6])
+

The range index 3:6 behaves semantically just like the multiple index (3, 4, 5, 6). In terms of implementation, the sliced upper and/or lower bounded indices are faster and use less memory because they do not explicitly create a multiple index, but rather use a direct loop. They are also easier to read, so should be preferred over multiple indexes where applicable.

+
+
+

Lower or upper bound indexes

+

It is also possible to supply just a lower bound, or just an upper bound. Writing c[3:] is just shorthand for c[3:size(c)]. Writing c[:5] is just shorthand for c[1:5].

+
+
+

Full range indexes

+

Finally, it is possible to write a range index that covers the entire range of an array, either by including just the range symbol (:) as the index or leaving the index position empty. In both cases, c[] and c[:] are equal to c[1:size(c)], which in turn is just equal to c.

+
+
+

Slicing functions

+

Stan provides head and tail functions that pull out prefixes or suffixes of vectors, row vectors, and one-dimensional arrays. In each case, the return type is the same as the argument type. For example,

+
vector[M] a = ...;
+vector[N] b = head(a, N);
+

assigns b to be a vector equivalent to the first N elements of the vector a. The function tail works the same way for suffixes, with

+
array[M] a = ...;
+array[N] b = tail(a, N);
+

Finally, there is a segment function, which specifies a first element and number of elements. For example,

+
array[15] a = ...;
+array[3] b = segment(a, 5, 3);
+

will set b to be equal to { a[5], a[6], a[7] }, so that it starts at element 5 of a and includes a total of 3 elements.

+
+
+
+

Multiple indexing on the left of assignments

+

Multiple expressions may be used on the left-hand side of an assignment statement, where they work exactly the same way as on the right-hand side in terms of picking out entries of a container. For example, consider the following.

+
array[3] int a;
+array[2] int c;
+array[2] int idxs;
+// ... define: a == (1, 2, 3);  c == (5, 9)
+               //         idxs = (3,2)
+a[idxs] = c;   // result: a == (1, 9, 5)
+

The result above can be worked out by noting that the assignment sets a[idxs[1]] (a[3]) to c[1] (5) and a[idxs[2]] (a[2]) to c[2] (9).

+

The same principle applies when there are many multiple indexes, as in the following example.

+
array[5, 7] int a;
+array[2, 2] int c;
+// ...
+a[2:3, 5:6] = c;  // result: a[2, 5] == c[1, 1];  a[2, 6] == c[1, 2]
+                  //         a[3, 5] == c[2, 1];  a[3, 6] == c[2, 2]
+

As in the one-dimensional case, the right-hand side is written into the slice, block, or general chunk picked out by the left-hand side.

+

Usage on the left-hand side allows the full generality of multiple indexing, with single indexes reducing dimensionality and multiple indexes maintaining dimensionality while rearranging, slicing, or blocking. For example, it is valid to assign to a segment of a row of an array as follows.

+
array[10, 13] int a;
+array[2] int c;
+// ...
+a[4, 2:3] = c;  // result:  a[4, 2] == c[1];  a[4, 3] == c[2]
+
+

Assign-by-value and aliasing

+

Aliasing issues arise when there are references to the same data structure on the right-hand and left-hand side of an assignment. For example, consider the array a in the following code fragment.

+
array[3] int a;
+// ... define: a == (5, 6, 7)
+a[2:3] = a[1:2];
+// ... result: a == (5, 5, 6)
+

The reason the value of a after the assignment is \((5,5,6)\) rather than \((5,5,5)\) is that Stan behaves as if the right-hand side expression is evaluated to a fresh copy. As another example, consider the following.

+
array[3] int a;
+array[3] int idxs;
+// ... define idxs = (2, 1, 3)
+a[idxs] = a;
+

In this case, it is evident why the right-hand side needs to be copied before the assignment.

+

It is tempting (but wrong) to think of the assignment a[2:3] = a[1:2] as executing the following assignments.

+
// ... define: a = (5, 6, 7)
+a[2] = a[1];      // result: a = (5, 5, 7)
+a[3] = a[2];      // result: a = (5, 5, 5)!
+

This produces a different result than executing the assignment because a[2]’s value changes before it is used.

+
+
+
+

Multiple indexes with vectors and matrices

+

Multiple indexes can be supplied to vectors and matrices as well as arrays of vectors and matrices.

+
+

Vectors

+

Vectors and row vectors behave exactly the same way as arrays with multiple indexes. If v is a vector, then v[3] is a scalar real value, whereas v[2:4] is a vector of size 3 containing the elements v[2], v[3], and v[4].

+

The only subtlety with vectors is in inferring the return type when there are multiple indexes. For example, consider the following minimal example.

+
array[3] vector[5] v;
+array[7] int idxs;
+// ...
+vector[7] u;
+u = v[2, idxs];
+
+array[7] real w;
+w = v[idxs, 2];
+

The key is understanding that a single index always reduces dimensionality, whereas a multiple index never does. The dimensions with multiple indexes (and unindexed dimensions) determine the indexed expression’s type. In the example above, because v is an array of vectors, v[2, idxs] reduces the array dimension but doesn’t reduce the vector dimension, so the result is a vector. In contrast, v[idxs, 2] does not reduce the array dimension, but does reduce the vector dimension (to a scalar), so the result type for w is an array of reals. In both cases, the size of the multiple index (here, 7) determines the size of the result.

+
+
+

Matrices

+

Matrices are a bit trickier because they have two dimensions, but the underlying principle of type inference is the same—multiple indexes leave dimensions in place, whereas single indexes reduce them. The following code shows how this works for multiple indexing of matrices.

+
matrix[5, 7] m;
+// ...
+row_vector[3] rv;
+rv = m[4, 3:5];    // result is 1 x 3
+// ...
+vector[4] v;
+v = m[2:5, 3];     // result is 3 x 1
+// ...
+matrix[3, 4] m2;
+m2 = m[1:3, 2:5];  // result is 3 x 4
+

The key is realizing that any position with a multiple index or bounded index remains in play in the result, whereas any dimension with a single index is replaced with 1 in the resulting dimensions. Then the type of the result can be read off of the resulting dimensionality as indicated in the comments above.

+
+
+

Matrices with one multiple index

+

If matrices receive a single multiple index, the result is a matrix. So if m is a matrix, so is m[2:4]. In contrast, supplying a single index, m[3], produces a row vector result. That is, m[3] produces the same result as m[3, ] or m[3, 1:cols(m)].

+
+
+

Arrays of vectors or matrices

+

With arrays of matrices, vectors, and row vectors, the basic access rules remain exactly the same: single indexes reduce dimensionality and multiple indexes redirect indexes. For example, consider the following example.

+
array[5, 7] matrix[3, 4] m;
+// ...
+array[2] matrix[3, 4] a;
+a = m[1, 2:3];  // knock off first array dimension
+a = m[3:4, 5];  // knock off second array dimension
+

In both assignments, the multiple index knocks off an array dimension, but it’s different in both cases. In the first case, a[i] == m[1, i + 1], whereas in the second case, a[i] == m[i + 2, 5].

+

Continuing the previous example, consider the following.

+
// ...
+vector[2] b;
+b = a[1, 3, 2:3, 2];
+

Here, the two array dimensions are reduced as is the column dimension of the matrix, leaving only a row dimension index, hence the result is a vector. In this case, b[j] == a[1, 3, 1 + j, 2].

+

This last example illustrates an important point: if there is a lower-bounded index, such as 2:3, with lower bound 2, then the lower bound minus one is added to the index, as seen in the 1 + j expression above.

+

Continuing further, consider continuing with the following.

+
// ...
+array[2] row_vector[3] c;
+c = a[4:5, 3, 1, 2: ];
+

Here, the first array dimension is reduced, leaving a single array dimension, and the row index of the matrix is reduced, leaving a row vector. For indexing, the values are given by c[i, j] == a[i + 3, 3, 1, j + 1]

+
+
+

Block, row, and column extraction for matrices

+

Matrix slicing can also be performed using the block function. For example,

+
matrix[20, 20] a = ...;
+matrix[3, 2] b = block(a, 5, 9, 3, 2);
+

will set b equal to the submatrix of a starting at index [5, 9] and extending 3 rows and 2 columns. Thus block(a, 5, 9, 3, 2) is equivalent to b[5:7, 9:10].

+

The sub_col function extracts a slice of a column of a matrix as a vector. For example,

+
matrix[10, 10] a = ...;
+vector b = sub_col(a, 2, 3, 5);
+

will set b equal to the vector a[2:6, 3], taking the element starting at [2, 3], then extending for a total of 5 rows. The function sub_row works the same way for extracting a slice of a row as a row vector. For example, sub_row(a, 2, 3, 5) is equal to the row vector a[2, 3:7], which also starts at position [2, 3] then extends for a total of 5 columns.

+
+
+
+

Matrices with parameters and constants

+

Suppose you have a \(3 \times 3\) matrix and know that two entries are zero but the others are parameters. Such a situation arises in missing data situations and in problems with fixed structural parameters.

+

Suppose a \(3 \times 3\) matrix is known to be zero at indexes \([1,2]\) and \([1,3]\). The indexes for parameters are included in a “melted” data-frame or database format.

+
transformed data {
+  array[7, 2] int<lower=1, upper=3> idxs
+    = { {1, 1},
+        {2, 1}, {2, 2}, {2, 3},
+        {3, 1}, {3, 2}, {3, 3} };
+  // ...
+

The seven remaining parameters are declared as a vector.

+
parameters {
+  vector[7] A_raw;
+  // ...
+}
+

Then the full matrix A is constructed in the model block as a local variable.

+
model {
+  matrix[3, 3] A;
+  for (i in 1:7) {
+    A[idxs[i, 1], idxs[i, 2]] = A_raw[i];
+  }
+  A[1, 2] = 0;
+  A[1, 3] = 0;
+  // ...
+}
+

This may seem like overkill in this setting, but in more general settings, the matrix size, vector size, and the idxs array will be too large to code directly. Similar techniques can be used to build up matrices with ad-hoc constraints, such as a handful of entries known to be positive.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/multiple-imputation.html b/docs/2_39/stan-users-guide/multiple-imputation.html new file mode 100644 index 000000000..5e4b37331 --- /dev/null +++ b/docs/2_39/stan-users-guide/multiple-imputation.html @@ -0,0 +1,1450 @@ + + + + + + + + + +Multiple Imputation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Multiple imputation

+

Missing data is common in applied data analysis. Ignoring this missingness can distort posterior inferences and reduce their precision, and modeling it can improve inference quality. There are several ways to model missing data. Chapter 18 in Gelman et al. (2013) offers an approximately Bayesian perspective, and the chapter Missing Data and Partially Known Parameters in the Reference Manual shows an approach that handles missing data as parameters.

+

Another way to model missing data is through multiple imputation, which replaces missing values with sampled values to obtain different versions of a complete data set. Then, the model of interest can be fitted to each of these complete versions separately. Combining the resulting draws from these different fits produces a sample that accounts for the uncertainty in the missing values.

+
+

Motivation and outline

+

Suppose that we have a data set \(x\) with columns \(x_{\cdot, 1}, \ldots, x_{\cdot, K}\) that make up covariates in a regression with quantities of interest \(\theta\). With the completely observed data, \(x^\text{comp}\), we could estimate the posterior distribution of \(\theta\) as \(p(\theta \mid x^\text{comp})\). But with missing data, our matrix is split into \(x^{\text{obs}}\) (the observed values of \(x\)) and \(x^{\text{mis}}\) (the missing values of \(x\)).

+

Fortunately, we can treat \(x^{\text{mis}}\) as additional, nuisance parameters that we can estimate along with \(\theta\) as \(p(\theta, x^{\text{mis}} \mid x^{\text{obs}})\). So, we can express the marginal distribution of \(\theta\) given only the observed values \(x^{\text{obs}}\) as \[\begin{align*} +p(\theta \mid x^{\text{obs}}) =& +\int + p(\theta, x^{\text{mis}} \mid x^{\text{obs}}) + \mathrm{d}x^{\text{mis}} \\ +=& +\int + p(\theta \mid x^{\text{obs}}, x^{\text{mis}}) + p(x^{\text{mis}} \mid x^{\text{obs}}) + \mathrm{d}x^{\text{mis}} \\ +=& \int + p(\theta \mid x^{\text{imp}}) + p(x^{\text{mis}} \mid x^{\text{obs}}) + \mathrm{d}x^{\text{mis}}, +\end{align*}\] where \(x^{\text{imp}}\) is a data set that includes imputed values of \(x^{\text{mis}}\).

+

The equations above show that we do not need to describe \(p(\theta \mid x^{\text{obs}})\) directly. Instead, we can first find a way to sample from \(x^{\text{mis}}\) based on \(x^{\text{obs}}\), and then use these samples to fit the model \(p(\theta \mid x^\text{imp})\), treating \(x^\text{imp}\) as if it was \(x^\text{comp}\).

+

Note that the model for \(p(x^{\text{mis}} \mid x^{\text{obs}})\) needs to be explicit. In a typical regression setting with covariates \(x\) and outcome \(y\), this model for \(x\) is not needed because inferences for the regression parameters are independent of the model of \(x\) when \(x\) is fully observed. But with missing data and multiple imputation, we need to generate values for \(x^{\text{mis}}\) based on \(x^{\text{obs}}\), which in turn needs an explicit model for \(x^{\text{mis}}\).

+

Thus, the general outline for multiple imputation with a number \(M\) of imputations is:

+
    +
  1. Draw \(M\) random samples \(x^{\text{mis}}_1, \ldots, +x^{\text{mis}}_M\) from the posterior predictive distribution \(p(x^{\text{mis}} \mid x^{\text{obs}})\).
  2. +
  3. Use these samples to get \(M\) different complete data sets \(x_1^\text{imp}, \ldots, x_M^\text{imp}\).
  4. +
  5. For each of the \(M\) imputed data sets, sample from the model of interest \(p(\theta \mid x_m^\text{imp})\), each time treating \(x_m^\text{imp}\) as if it was \(x^\text{comp}\).
  6. +
  7. Combine the draws for \(\theta\) from all \(M\) fits.
  8. +
+
+

Pseudocode for outline

+

Following the notation above, we can express the outline with the pseudocode below:

+
theta_estimates = []
+for (m in 1:M) {
+  x_mis <- get_imputations(x_obs)
+  x_imp <- add_imputations(x_obs, x_mis)
+  imputation_fits <- my_model.fit(x_imp).draws()
+  theta_estimates <- theta_estimates.append(imputation_fits)
+}
+

Each of the functions in this pseudocode can encapsulate simple or complex procedures. get_imputations(), for example, may impute missing data by taking values directly from x_obs; or it may model a regression where an \(x_{\cdot, k}\) with missing values is the outcome and the \(x_{\cdot, j}\) without missing observations are predictors. Below we illustrate how to implement this second approach in Stan.

+
+
+
+

Imputing one variable in Stan

+

Imagine our data set is composed of two numerical, continuous variables \(x\) and \(y\), and one binary variable \(z\). We want to estimate the parameters that govern the conditional association \(p(y \mid x, z)\), but several values of \(x\) are missing. So, before we fit this model for \(y\), we impute the missing values in \(x\).

+

We first find all the fully observed data points and identify their values as \(x^{\text{obs}}\), \(y^{\text{obs}}\), and \(z^{\text{obs}}\). Then we use these data to fit the model \[ +x^{\text{obs}} \sim \text{normal}(\gamma_0 + \gamma_1 y^{\text{obs}} + + \gamma_2 z^{\text{obs}}, \lambda). +\]

+

This model can then give us samples for the missing values \(x^{\text{mis}}\) conditional on the corresponding values of \(y\) and \(z\), which we call \(y^{\text{aux}}\) and \(z^{\text{aux}}\). The Stan code for this imputation is:

+
data {
+  int<lower=0> N_obs;
+  vector[N_obs] x_obs;
+  vector[N_obs] y_obs;
+  array[N_obs] int<lower=0, upper=1> z_obs;  
+  int<lower=0> N_mis;   
+  vector[N_mis] y_aux;  
+  array[N_mis] int<lower=0, upper=1> z_aux; 
+}
+parameters {
+  vector[3] gamma;
+  real<lower=0> lambda;
+}
+model {
+  gamma ~ normal(0, 1);
+  lambda ~ exponential(1);
+  x_obs ~ normal(gamma[1] + gamma[2] * y_obs + gamma[3] * z_obs,
+          lambda);
+}
+generated quantities {
+  array[N_mis] x_imp = normal_rng(gamma[1] + gamma[2] * y_aux[n]
+                      + gamma[3] * z_aux[n], lambda);
+}
+

The generated quantities block automatically samples \(\gamma_0\), \(\gamma_1\), \(\gamma_2\), and \(\lambda\) from their posterior distributions. So, the random draws of x_imp incorporate uncertainty from the estimated parameters and from the sampling variation in \(x^{\text{mis}}\).

+

Multiple posterior draws of \(x^{\text{mis}}\) give us multiple imputed data sets. With these data sets we can model \(p(y \mid \beta, \sigma, x, z)\) as \[ +y \sim \text{normal}(\beta_0 + \beta_1 x^\text{comp} + \beta_2 z, + \sigma), +\] where \(x^\text{comp}\) contains observed and imputed values. The Stan code for this model is:

+
data {
+  int<lower=0> N;
+  vector[N] y;
+  vector[N] x;
+  vector[N] z;
+}
+parameters {
+  vector[3] beta;
+  real<lower=0> sigma;
+}
+model {
+  beta ~ normal(0, 1);
+  sigma ~ exponential(1);
+  y ~ normal(beta[1] + beta[2] * x + beta[3] * z, sigma);
+}
+

With multiple imputation, the model for \(p(y \mid \beta, \sigma, x, z)\) does not need to distinguish between observed and imputed values of \(x\). Instead, this model can treat all imputed data sets as complete because we can combine all the posterior draws from multiple fits.

+
+
+

Imputing two or more variables

+

A more general scenario involves an outcome variable \(y\) and several explanatory variables \(x_1, \ldots, x_K\), each of which can have missing values that we want to impute.

+

One solution is to do multiple imputation through chained equations, a procedure often called “MICE”1. The MICE procedure in this scenario is:

+
    +
  1. Initialize missing values in all \(x_i\). For each variable \(x_i\) with missing entries, fill its missing values with random samples from its observed values (or use another simple initialization rule).
  2. +
  3. Update each \(x_i\) given \(y\) and the other \(x\)’s. For \(i=1, \ldots, K\), fit a model for the observed \(x_i\) conditional on the current versions (with observed and imputed values) of \(y\) and \(x_{-k} = x_1, \ldots x_{k-1}, x_{k+1}, \ldots, +x_{K}\). Use this model to draw impuations from the predictive distribution of the missing \(x_i\). Completing this step for all \(i = 1, \ldots, K\) constitutes a single imputation cycle.
  4. +
  5. Warmup period. Repeat the imputation cycle in step 1 several times as warmup to let the imputations stabilize.
  6. +
  7. Create M imputed datasets. After the warmup, record the current complete dataset as one imputed dataset. Then either restart from step 0 and repeat steps 1–3 until you have \(M\) imputed datasets; or do a single long run, i.e., continue iterating steps 1–2 and save the imputed dataset at \(M\) well-spaced iterations (e.g., every \(S\) cycles) to obtain \(M\) imputed datasets, without restarting from step 0 each time.
  8. +
  9. Fit the target model \(p(y \mid \beta, \sigma, x_1, \ldots, +x_K)\) separately to each of the \(M\) imputed datasets and save the posterior draws for the parameters of interest.
  10. +
  11. Combine the draws (or other summaries) from all \(M\) fits.
  12. +
+

Note that, as described here, the MICE procedure does not guarantee that the conditional distributions of all variables will be compatible. Compatibility means that there is a joint distribution for all the variables used in the imputation that can be decomposed as the conditional distributions we used. An incompatible imputation model will technically not sample from any well-defined probability distribution. But the consequences of this are not always serious. See section 6.4 in Carpenter et al. (2023) for a more detailed explanation of compatibility and of the related concept of congeniality.

+
+

Imputing two variables in Stan

+

Imagine that we again want to use numerical variables \(x\) and \(y\), and dichotomic variable \(z\) to estimate the parameters in \(p(y \mid \beta, \sigma, x, z)\). But now both \(x\) and \(z\) have missing values that we want to impute.

+

To apply the MICE procedure in this example, we can reuse the models for \(x\) and \(y\) that we defined above. We also need a new model to impute \(z^{\text{mis}}\), so we use the logistic regression \[ +z^{\text{obs}} \sim \text{Bernoulli}( + \text{logit}^{-1}( + \alpha_0 + \alpha_1 y^{\text{obs}} + \alpha_2 x^{\text{obs}}) + ). +\] Here, \(z^{\text{obs}}\) contains only the completely observed values from our original data set, while \(x^{\text{obs}}\) and \(y^{\text{obs}}\) contain all the values that correspond to \(z^{\text{obs}}\). Thus, \(x^{\text{obs}}\) can include observed and imputed values, and \(y^{\text{obs}}\) need not be the same as in the model for \(x^{\text{obs}}\).

+

With this model we can sample values for \(z^{\text{mis}}\) conditional on its corresponding values \(y^{\text{aux}}\) and \(x^{\text{aux}}\).

+

The Stan code to impute \(z^{\text{mis}}\) is:

+
data {
+  int<lower=0> N_obs;
+  array[N_obs] int<lower=0, upper=1> z_obs;
+  vector[N_obs] y_obs;
+  vector[N_obs] x_obs;
+  int<lower=0> N_mis;
+  vector[N_mis] y_aux;
+  vector[N_mis] x_aux;
+}
+parameters {
+  vector[3] gamma;
+}
+model {
+  gamma ~ normal(0, 1);
+  z_obs ~ bernoulli_logit(gamma[1] + gamma[2] * y_obs 
+                          + gamma[3] * x_obs);
+}
+generated quantities {
+  array[N_mis] int z_imp = bernoulli_logit_rng(gamma[1]
+                      + gamma[2] * y_aux
+                      + gamma[3] * x_aux);
+}
+

The pseudocode for this example of MICE, restarting the imputations after each imputation cycle, is shown below. Note that we do not need to initialize \(x^{\text{mis}}\) in step 0 because we can impute it directly with the model for \(x^{\text{obs}}\).

+
completed_datasets <- []
+for (m in 1:M) {
+  data_m <- copy(data_orig)
+  data_m.z[missing_idz] <- random_sample(data_m.z[observed_idz])
+  for (cycle in 1:n_warm) {
+    stanmod_x <- build_model("model_for_x.stan")
+    fit_x <- stanmod_x.fit(
+      obs_data=data_m[observed_idx],
+      mis_data=data_m[missing_idx]
+    )
+    data_m.x[missing_idx] <- get_imputations(fit_x, "x_imp")
+    stanmod_z <- build_model("model_for_z.stan")
+    fit_z <- stanmod_z.fit(
+      obs_data=data_m[observed_idz],
+      mis_data=data_m[missing_idz]
+    )
+    data_m.z[missing_idz] <- get_imputations(fit_z, "z_imp")
+  }
+  completed_datasets.append(data_m)
+}
+all_draws <- []
+for (dataset in completed_datasets) {
+  stanmod_y <- build_model("model_for_y.stan")
+  fit_y <- stanmod_y.fit(dataset)
+  all_draws.append(extract_draws(model_results))
+}
+all_results <- combine_results(all_draws)
+
+
+
+

Combining posterior draws

+

With Stan’s MCMC sampler, we can treat posterior draw chains from imputed data sets as if they were chains based on complete data. There is one important difference. Multiple imputation expresses uncertainty in the missing values as consistent differences in the estimates obtained from different imputated data sets. This means that chains obtained from the same imputed data set should converge, but chains obtained from different data sets do not have to. So, we need not worry if diagnostics2 signal that the chains from different imputed data sets are not converging properly. See an example in Bürkner (2025).

+
+
+

Cut models

+

A full Bayesian probability model includes a feedback flow of information between all parameters and all data. Cut models separate some parts of this feedback flow so that different subsets of data influence only some parameters in the model (see Plummer (2015)).

+

Multiple imputation interrupts the flow of information from data to parameters. In our regression above, for example, the imputations influence the distribution of the parameters in \(p(y \mid x_1, \ldots, x_K)\), but these parameters do not influence the imputations. So, we could use multiple imputation to implement a cut model.

+ + + +
+
+ + + Back to top

References

+
+Bürkner, Paul. 2025. “Handle Missing Values with Brms.” https://cran.r-project.org/web/packages/brms/vignettes/brms_missings.html. +
+
+Carpenter, James R., Jonathan W. Bartlett, Tim P. Morris, Angela M. Wood, Matteo Quartagno, and Michael G. Kenward. 2023. Multiple Imputation and Its Application. 2nd ed. John Wiley & Sons, Ltd. https://doi.org/10.1002/9781119756118. +
+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Plummer, Martyn. 2015. “Cuts in Bayesian Graphical Models.” Statistics and Computing 25: 37–43. https://doi.org/10.1007/s11222-014-9503-z. +
+
+van Buuren, Stef. 2018. Flexible Imputation of Missing Data. 2nd ed. Chapman; Hall/CRC. https://doi.org/10.1201/9780429492259. +
+

Footnotes

+ +
    +
  1. Section 5 of chapter 4 in van Buuren (2018) details the MICE procedure in a frequentist context.↩︎

  2. +
  3. Such as \(\hat{R}\). See Split R-hat for detecting non-stationarity in the Reference Manual.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/odes.html b/docs/2_39/stan-users-guide/odes.html new file mode 100644 index 000000000..164f13882 --- /dev/null +++ b/docs/2_39/stan-users-guide/odes.html @@ -0,0 +1,1543 @@ + + + + + + + + + +Ordinary Differential Equations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Ordinary Differential Equations

+

Stan provides a number of different methods for solving systems of ordinary differential equations (ODEs). All of these methods adaptively refine their solutions in order to satisfy given tolerances, but internally they handle calculations quite a bit differently.

+

Because Stan’s algorithms requires gradients of the log density, the ODE solvers must not only provide the solution to the ODE itself, but also the gradient of the ODE solution with respect to parameters (the sensitivities). Two fundamentally different approaches are available in Stan to solve this problem, each having very different computational cost depending on the number of ODE states \(N\) and the number of parameters \(M\) being used:

+
    +
  • A forward sensitivity solver expands the base ODE system with additional ODE equations for the gradients of the solution. For each parameter, an additional full set of \(N\) sensitivity states are added meaning that the full ODE solved has \(N \, + N \cdot M\) states.

  • +
  • An adjoint sensitivity solver starts by solving the base ODE system forward in time to get the ODE solution and then solves another ODE system (the adjoint) backward in time to get the gradients. The forward and reverse solves both have \(N\) states each. There is additionally one quadrature problem solved for every parameter.

  • +
+

The adjoint sensitivity approach scales much better than the forward sensitivity approach. Whereas the computational cost of the forward approach scales multiplicatively in the number of ODE states \(N\) and parameters \(M\), the adjoint sensitivity approach scales linear in states \(N\) and parameters \(M\). However, the adjoint problem is harder to configure and the overhead for small problems actually makes it slower than solving the full forward sensitivity system. With that in mind, the rest of this introduction focuses on the forward sensitivity interfaces. For information on the adjoint sensitivity interface see the Adjoint ODE solver

+

Two interfaces are provided for each forward sensitivity solver: one with default tolerances and default max number of steps, and one that allows these controls to be modified. Choosing tolerances is important for making any of the solvers work well – the defaults will not work everywhere. The tolerances should be chosen primarily with consideration to the scales of the solutions, the accuracy needed for the solutions, and how the solutions are used in the model. For instance, if a solution component slowly varies between 3.0 and 5.0 and measurements of the ODE state are noisy, then perhaps the tolerances do not need to be as tight as for a situation where the solutions vary between 3.0 and 3.1 and very high precision measurements of the ODE state are available. It is also often useful to reduce the absolute tolerance when a component of the solution is expected to approach zero. For information on choosing tolerances, see the control parameters section.

+

The advantage of adaptive solvers is that as long as reasonable tolerances are provided and an ODE solver well-suited to the problem is chosen the technical details of solving the ODE can be abstracted away. The catch is that it is not always clear from the outset what reasonable tolerances are or which ODE solver is best suited to a problem. In addition, as changes are made to an ODE model, the optimal solver and tolerances may change.

+

With this in mind, the four forward solvers are rk45, bdf, adams, and ckrk. If no other information about the ODE is available, start with the rk45 solver. The list below has information on when each solver is useful.

+

If there is any uncertainty about which solver is the best, it can be useful to measure the performance of all the interesting solvers using profile statements. It is difficult to always know exactly what solver is the best in all situations, but a profile can provide a quick check.

+
    +
  • rk45: a fourth and fifth order Runge-Kutta method for non-stiff systems (Dormand and Prince 1980; Ahnert and Mulansky 2011). rk45 is the most generic solver and should be tried first.

  • +
  • bdf: a variable-step, variable-order, backward-differentiation formula implementation for stiff systems (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). bdf is often useful for ODEs modeling chemical reactions.

  • +
  • adams: a variable-step, variable-order, Adams-Moulton formula implementation for non-stiff systems (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). The method has order up to 12, hence is commonly used when high-accuracy is desired for a very smooth solution, such as in modeling celestial mechanics and orbital dynamics (Montenbruck and Gill 2000).

  • +
  • ckrk: a fourth and fifth order explicit Runge-Kutta method for non-stiff and semi-stiff systems (Cash and Karp 1990; Mazzia, Cash, and Soetaert 2012). The difference between ckrk and rk45 is that ckrk should perform better for systems that exhibit rapidly varying solutions. Often in those situations the derivatives become large or even nearly discontinuous, and ckrk is designed to address such problems.

  • +
+

For a discussion of stiff ODE systems, see the stiff ODE section. For information on the adjoint sensitivity interface see the Adjoint ODE solver section. The function signatures for Stan’s ODE solvers can be found in the function reference manual section on ODE solvers.

+
+

Notation

+

An ODE is defined by a set of differential equations, \(y(t, \theta)' = f(t, y, \theta)\), and initial conditions, \(y(t_0, \theta) = y_0\). The function \(f(t, y, \theta)\) is called the system function. The \(\theta\) dependence is included in the notation for \(y(t, \theta)\) and \(f(t, y, \theta)\) as a reminder that the solution is a function of any parameters used in the computation.

+
+
+

Example: simple harmonic oscillator

+

As an example of a system of ODEs, consider a harmonic oscillator. In a harmonic oscillator a particle disturbed from equilibrium is pulled back towards its equilibrium position by a force proportional to its displacement from equilibrium. The system here additionally has a friction force proportional to particle speed which points in the opposite direction of the particle velocity. The system state will be a pair \(y = (y_1, y_2)\) representing position and speed. The change in the system with respect to time is given by the following differential equations.1

+

\[\begin{align*} +&\frac{d}{dt} y_1 = y_2 \\ +&\frac{d}{dt} y_2 = -y_1 - \theta y_2 +\end{align*}\]

+

The state equations implicitly defines the state at future times as a function of an initial state and the system parameters.

+
+
+

Coding the ODE system function

+

The first step in coding an ODE system in Stan is defining the ODE system function. The system functions require a specific signature so that the solvers know how to use them properly.

+

The first argument to the system function is time, passed as a real; the second argument to the system function is the system state, passed as a vector, and the return value from the system function are the current time derivatives of the state defined as a vector. Additional arguments can be included in the system function to pass other information into the solve (these will be passed through the function that starts the ODE integration). These argument can be parameters (in this case, the friction coefficient), data, or any quantities that are needed to define the differential equation.

+

The simple harmonic oscillator can be coded using the following function in Stan (see the user-defined functions chapter for more information on coding user-defined functions).

+
vector sho(real t,        // time
+           vector y,      // state
+           real theta) {  // friction parameter
+  vector[2] dydt;
+  dydt[1] = y[2];
+  dydt[2] = -y[1] - theta * y[2];
+  return dydt;
+}
+

The function takes in a time t (a real), the system state y (a vector), and the parameter theta (a real). The function returns a vector of time derivatives of the system state at time t, state y, and parameter theta. The simple harmonic oscillator coded here does not have time-sensitive equations; that is, t does not show up in the definition of dydt, however it is still required.

+
+

Strict signature

+

The types in the ODE system function are strict. The first argument is the time passed as a real, the second argument is the state passed as a vector, and the return type is a vector. A model that does not have this signature will fail to compile. The third argument onwards can be any type, granted all the argument types match the types of the respective arguments in the solver call.

+

All of these are possible ODE signatures:

+
vector myode1(real t, vector y, real a0);
+vector myode2(real t, vector y, array[] int a0, vector a1);
+vector myode3(real t, vector y, matrix a0, array[] real a1, row_vector a2);
+

but these are not allowed:

+
vector myode1(real t, array[] real y, real a0);
+// Second argument is not a vector
+array[] real myode2(real t, vector y, real a0);
+// Return type is not a vector
+vector myode3(vector y, real a0);
+// First argument is not a real and second is not a vector
+
+
+
+

Measurement error models

+

Noisy observations of the ODE state can be used to estimate the parameters and/or the initial state of the system.

+
+

Simulating noisy measurements

+

As an example, suppose the simple harmonic oscillator has a parameter value of \(\theta = 0.15\) and an initial state \(y(t = 0, \theta = 0.15) = (1, 0)\). Assume the system is measured at 10 time points, \(t = 1, 2, \cdots, 10\), where each measurement of \(y(t, \theta)\) has independent \(\textsf{normal}(0, 0.1)\) error in both dimensions (\(y_1(t, \theta)\) and \(y_2(t, \theta)\)).

+

The following model can be used to generate data like this:

+
functions {
+  vector sho(real t,
+             vector y,
+             real theta) {
+    vector[2] dydt;
+    dydt[1] = y[2];
+    dydt[2] = -y[1] - theta * y[2];
+    return dydt;
+  }
+}
+data {
+  int<lower=1> T;
+  vector[2] y0;
+  real t0;
+  array[T] real ts;
+  real theta;
+}
+model {
+}
+generated quantities {
+  array[T] vector[2] y_sim = ode_rk45(sho, y0, t0, ts, theta);
+  // add measurement error
+  for (t in 1:T) {
+    y_sim[t, 1] += normal_rng(0, 0.1);
+    y_sim[t, 2] += normal_rng(0, 0.1);
+  }
+}
+

The system parameters theta and initial state y0 are read in as data along with the initial time t0 and observation times ts. The ODE is solved for the specified times, and then random measurement errors are added to produce simulated observations y_sim. Because the system is not stiff, the ode_rk45 solver is used.

+

This program illustrates the way in which the ODE solver is called in a Stan program,

+
array[T] vector[2] y_sim = ode_rk45(sho, y0, t0, ts, theta);
+

this returns the solution of the ODE initial value problem defined by system function sho, initial state y0, initial time t0, and parameter theta at the times ts. The call explicitly specifies the non-stiff RK45 solver.

+

The parameter theta is passed unmodified to the ODE system function. If there were additional arguments that must be passed, they could be appended to the end of the ode call here. For instance, if the system function took two parameters, \(\theta\) and \(\beta\), the system function definition would look like:

+
vector sho(real t, vector y, real theta, real beta) { ... }
+

and the appropriate ODE solver call would be:

+
ode_rk45(sho, y0, t0, ts, theta, beta);
+

Any number of additional arguments can be added. They can be any Stan type (as long as the types match between the ODE system function and the solver call).

+

Because all none of the input arguments are a function of parameters, the ODE solver is called in the generated quantities block. The random measurement noise is added to each of the T outputs with normal_rng.

+
+
+

+
Typical realization of harmonic oscillator trajectory.
+
+
+
+
+

Estimating system parameters and initial state

+

These ten noisy observations of the state can be used to estimate the friction parameter, \(\theta\), the initial conditions, \(y(t_0, \theta)\), and the scale of the noise in the problem. The full Stan model is:

+
functions {
+  vector sho(real t,
+             vector y,
+             real theta) {
+    vector[2] dydt;
+    dydt[1] = y[2];
+    dydt[2] = -y[1] - theta * y[2];
+    return dydt;
+  }
+}
+data {
+  int<lower=1> T;
+  array[T] vector[2] y;
+  real t0;
+  array[T] real ts;
+}
+parameters {
+  vector[2] y0;
+  vector<lower=0>[2] sigma;
+  real theta;
+}
+model {
+  array[T] vector[2] mu = ode_rk45(sho, y0, t0, ts, theta);
+  sigma ~ normal(0, 2.5);
+  theta ~ std_normal();
+  y0 ~ std_normal();
+  for (t in 1:T) {
+    y[t] ~ normal(mu[t], sigma);
+  }
+}
+

Because the solves are now a function of model parameters, the ode_rk45 call is now made in the model block. There are half-normal priors on the measurement error scales sigma, and standard normal priors on theta and the initial state vector y0. The solutions to the ODE are assigned to mu, which is used as the location for the normal observation model.

+

As with other regression models, it’s easy to change the noise model to something with heavier tails (e.g., Student-t distributed), correlation in the state variables (e.g., with a multivariate normal distribution), or both heavy tails and correlation in the state variables (e.g., with a multivariate Student-t distribution).

+
+
+
+

Stiff ODEs

+

Stiffness is a numerical phenomena that causes some differential equation solvers difficulty, notably the Runge-Kutta RK45 solver used in the examples earlier. The phenomena is common in chemical reaction systems, which are often characterized by having multiple vastly different time-scales. The stiffness of a system can also vary between different parts of parameter space, and so a typically non-stiff system may exhibit stiffness occasionally. These sorts of difficulties can occur more frequently with loose priors or during warmup.

+

Stan provides a specialized solver for stiff ODEs (Cohen and Hindmarsh 1996; Serban and Hindmarsh 2005). An ODE system is specified exactly the same way with a function of exactly the same signature. The only difference is in the call to the solver the rk45 suffix is replaced with bdf, as in

+
ode_bdf(sho, y0, t0, ts, theta);
+

Using the stiff (bdf) solver on a system that is not stiff may be much slower than using the non-stiff (rk45) solver because each step of the stiff solver takes more time to compute. On the other hand, attempting to use the non-stiff solver for a stiff system will cause the timestep to become very small, leading the non-stiff solver taking more time overall even if each step is easier to compute than for the stiff solver.

+

If it is not known for sure that an ODE system is stiff, run the model with both the rk45 and bdf solvers and see which is faster. If the rk45 solver is faster, then the problem is probably non-stiff, and then it makes sense to try the adams solver as well. The adams solver uses higher order methods which can take larger timesteps than the rk45 solver, though similar to the bdf solver each of these steps is more expensive to compute.

+
+
+

Control parameters for ODE solving

+

For additional control of the solves, both the stiff and non-stiff forward ODE solvers have function signatures that makes it possible to specify the relative_tolerance, absolute_tolerance, and max_num_steps parameters. These are the same as the regular function names but with _tol appended to the end. All three control arguments must be supplied with this signature (there are no defaults).

+
array[T] vector[2] y_sim = ode_bdf_tol(sho, y0, t0, ts,
+                                 relative_tolerance,
+                                 absolute_tolerance,
+                                 max_num_steps,
+                                 theta);
+

relative_tolerance and absolute_tolerance control accuracy the solver tries to achieve, and max_num_steps specifies the maximum number of steps the solver will take between output time points before throwing an error.

+

The control parameters must be data variables – they cannot be parameters or expressions that depend on parameters, including local variables in any block other than transformed data and generated quantities. User-defined function arguments may be qualified as only allowing data arguments using the data qualifier.

+

For the RK45 and Cash-Karp solvers, the default values for relative and absolute tolerance are both \(10^{-6}\) and the maximum number of steps between outputs is one million. For the BDF and Adams solvers, the relative and absolute tolerances are \(10^{-10}\) and the maximum number of steps between outputs is one hundred million.

+
+

Discontinuous ODE system function

+

If there are discontinuities in the ODE system function, it is best to integrate the ODE between the discontinuities, stopping the solver at each one, and restarting it on the other side.

+

Nonetheless, the ODE solvers will attempt to integrate over discontinuities they encounters in the state function. The accuracy of the solution near the discontinuity may be problematic (requiring many small steps). An example of such a discontinuity is a lag in a pharmacokinetic model, where a concentration is zero for times \(0 < t < t'\) and then positive for \(t \geq t'\). In this example example, we would use code in the system such as

+
if (t < t_lag) {
+  return [0, 0]';
+} else {
+  // ... return non-zero vector...
+}
+

In general it is better to integrate up to t_lag in one solve and then integrate from t_lag onwards in another. Mathematically, the discontinuity can make the problem ill-defined and the numerical integrator may behave erratically around it.

+

If the location of the discontinuity cannot be controlled precisely, or there is some other rapidly change in ODE behavior, it can be useful to tell the ODE solver to produce output in the neighborhood. This can help the ODE solver avoid indiscriminately stepping over an important feature of the solution.

+
+
+

Tolerance

+

The relative tolerance RTOL and absolute tolerance ATOL control the accuracy of the numerical solution. Specifically, when solving an ODE with unknowns \(y=(y_1,\dots,y_n)^T\), at every step the solver controls estimated local error \(e=(e_1,\dots,e_n)^T\) through its weighted root-mean-square norm (Serban and Hindmarsh (2005), Hairer, Nørsett, and Wanner (1993))

+

\[\begin{equation*} +\sqrt{\sum_{i=1}^n{\frac{1}{n}\frac{e_i^2}{(\text{RTOL}\times y_i + \text{ATOL})^2}}} < 1 +\end{equation*}\] by reducing the stepsize when the inequality is not satisfied.

+

To understand the roles of the two tolerances it helps to assume \(y\) at opposite scales in the above expression: on one hand the absolute tolerance has little effect when \(y_i \gg 1\), on the other the relative tolerance can not affect the norm when \(y_i = 0\). Users are strongly encouraged to carefully choose tolerance values according to the ODE and its application. One can follow Brenan, Campbell, and Petzold (1995) for a rule of thumb: let \(m\) be the number of significant digits required for \(y\), set \(\text{RTOL}=10^{-(m+1)}\), and set ATOL at which \(y\) becomes insignificant. Note that the same weighted root-mean-square norm is used to control nonlinear solver convergence in bdf and adams solvers, and the same tolerances are used to control forward sensitivity calculation. See Serban and Hindmarsh (2005) for details.

+
+
+

Maximum number of steps

+

The maximum number of steps can be used to stop a runaway simulation. This can arise in when MCMC moves to a part of parameter space very far from where a differential equation would typically be solved. In particular this can happen during warmup. With the non-stiff solver, this may happen when the sampler moves to stiff regions of parameter space, which will requires small step sizes.

+
+
+
+

Adjoint ODE solver

+

The adjoint ODE solver method differs mathematically from the forward ODE solvers in the way gradients of the ODE solution are obtained. The forward ODE approach augments the original ODE system with \(N\) additional states for each parameter for which gradients are needed. If there are \(M\) parameters for which sensitivities are required, then the augmented ODE system has a total of \(N \cdot (M + +1)\) states. This can result in very large ODE systems through the multiplicative scaling of the computational effort needed.

+

In contrast, the adjoint ODE solver integrates forward in time a system of \(N\) equations to compute the ODE solution and then integrates backwards in time another system of \(N\) equations to get the sensitivities. Additionally, for \(M\) parameters there are \(M\) additional equations to integrate during the backwards solve. Because of this the adjoint sensitivity problem scales better in parameters than the forward sensitivity problem. The adjoint solver in Stan uses CVODES (the same as the bdf and adams forward sensitivity interfaces).

+

The solution computed in the forward integration is required during the backward integration. CVODES uses a checkpointing scheme that saves the forward solver state regularly. The number of steps between saving checkpoints is configurable in the interface. These checkpoints are then interpolated during the backward solve using one of two interpolation schemes.

+

The solver type (either bdf or adams) can be individually set for both the forward and backward solves.

+

The tolerances for each phase of the solve must be specified in the interface. Note that the absolute tolerance for the forward and backward ODE integration phase need to be set for each ODE state separately. The harmonic oscillator example call from above becomes:

+
array[T] vector[2] y_sim
+    = ode_adjoint_tol_ctl(sho, y0, t0, ts,
+                          relative_tolerance/9.0,                // forward tolerance
+                          rep_vector(absolute_tolerance/9.0, 2), // forward tolerance
+                          relative_tolerance/3.0,                // backward tolerance
+                          rep_vector(absolute_tolerance/3.0, 2), // backward tolerance
+                          relative_tolerance,                    // quadrature tolerance
+                          absolute_tolerance,                    // quadrature tolerance
+                          max_num_steps,
+                          150,                                   // number of steps between checkpoints
+                          1,                                     // interpolation polynomial: 1=Hermite, 2=polynomial
+                          2,                                     // solver for forward phase: 1=Adams, 2=BDF
+                          2,                                     // solver for backward phase: 1=Adams, 2=BDF
+                          theta);
+

For a detailed information on each argument please see the Stan function reference manual.

+
+
+

Solving a system of linear ODEs using a matrix exponential

+

Linear systems of ODEs can be solved using a matrix exponential. This can be considerably faster than using one of the ODE solvers.

+

The solution to \(\frac{d}{dt} y = ay\) is \(y = y_0e^{at}\), where the constant \(y_0\) is determined by boundary conditions. We can extend this solution to the vector case: \[ +\frac{d}{dt}y = A \, y +\] where \(y\) is now a vector of length \(n\) and \(A\) is an \(n\) by \(n\) matrix. The solution is then given by: \[ +y = e^{tA} \, y_0 +\] where the matrix exponential is formally defined by the convergent power series: \[ +e^{tA} = \sum_{n=0}^{\infty} \dfrac{tA^n}{n!} = I + tA + \frac{t^2A^2}{2!} + \dotsb +\]

+

We can apply this technique to the simple harmonic oscillator example, by setting \[ +y = \begin{bmatrix} y_1 \\ y_2 \end{bmatrix} \qquad +A = \begin{bmatrix} 0 & 1 \\ -1 & -\theta \end{bmatrix} +\]

+

The Stan model to simulate noisy observations using a matrix exponential function is given below.

+

In general, computing a matrix exponential will be more efficient than using a numerical solver. We can however only apply this technique to systems of linear ODEs.

+
data {
+  int<lower=1> T;
+  vector[2] y0;
+  array[T] real ts;
+  array[1] real theta;
+}
+model {
+}
+generated quantities {
+  array[T] vector[2] y_sim;
+  matrix[2, 2] A = [[ 0,  1],
+                    [-1, -theta[1]]]
+  for (t in 1:T) {
+    y_sim[t] = matrix_exp((t - 1) * A) * y0;
+  }
+  // add measurement error
+  for (t in 1:T) {
+    y_sim[t, 1] += normal_rng(0, 0.1);
+    y_sim[t, 2] += normal_rng(0, 0.1);
+  }
+}
+

This Stan program simulates noisy measurements from a simple harmonic oscillator. The system of linear differential equations is coded as a matrix. The system parameters theta and initial state y0 are read in as data along observation times ts. The generated quantities block is used to solve the ODE for the specified times and then add random measurement error, producing observations y_sim. Because the ODEs are linear, we can use the matrix_exp function to solve the system.

+ + + +
+
+ + + Back to top

References

+
+Ahnert, Karsten, and Mario Mulansky. 2011. “Odeint—Solving Ordinary Differential Equations in C++.” arXiv 1110.3397. +
+
+Brenan, K. E., S. L. Campbell, and L. R. Petzold. 1995. Numerical Solution of Initial-Value Problems in Differential-Algebraic Equations. SIAM Classics in Applied Mathematics 14. Philadelphia: Society for Industrial; Applied Mathematics. +
+
+Cash, J. R., and Alan H. Karp. 1990. “A Variable Order Runge-Kutta Method for Initial Value Problems with Rapidly Varying Right-Hand Sides.” ACM Transactions on Mathematical Software 16 (3): 201–22. https://doi.org/10.1145/79505.79507. +
+
+Cohen, Scott D, and Alan C Hindmarsh. 1996. CVODE, a Stiff/Nonstiff ODE Solver in C.” Computers in Physics 10 (2): 138–43. +
+
+Dormand, John R, and Peter J Prince. 1980. “A Family of Embedded Runge-Kutta Formulae.” Journal of Computational and Applied Mathematics 6 (1): 19–26. +
+
+Hairer, Ernst, Syvert P. Nørsett, and Gerhard Wanner. 1993. Solving Ordinary Differential Equations I: Nonstiff Problems. 2nd ed. Springer Series in Computational Mathematics, Springer Ser.Comp.Mathem. Hairer,E.:Solving Ordinary Diff. Berlin Heidelberg: Springer-Verlag. +
+
+Mazzia, F., J. R. Cash, and K. Soetaert. 2012. “A Test Set for Stiff Initial Value Problem Solvers in the Open Source Software R: Package deTestSet.” Journal of Computational and Applied Mathematics 236 (16): 4119–31. +
+
+Montenbruck, Oliver, and Eberhard Gill. 2000. Satellite Orbits: Models, Methods and Applications. Berlin Heidelberg: Springer-Verlag. https://doi.org/10.1007/978-3-642-58351-3. +
+
+Serban, Radu, and Alan C Hindmarsh. 2005. CVODES: The Sensitivity-Enabled ODE Solver in SUNDIALS.” In ASME 2005 International Design Engineering Technical Conferences and Computers and Information in Engineering Conference, 257–69. American Society of Mechanical Engineers. +
+

Footnotes

+ +
    +
  1. This example is drawn from the documentation for the Boost Numeric Odeint library (Ahnert and Mulansky 2011), which Stan uses to implement the rk45 and ckrk solver.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/one-dimensional-integrals.html b/docs/2_39/stan-users-guide/one-dimensional-integrals.html new file mode 100644 index 000000000..b48f60b63 --- /dev/null +++ b/docs/2_39/stan-users-guide/one-dimensional-integrals.html @@ -0,0 +1,1414 @@ + + + + + + + + + +Computing One Dimensional Integrals + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Computing One Dimensional Integrals

+

Definite and indefinite one dimensional integrals can be performed in Stan using the integrate_1d function

+

As an example, the normalizing constant of a left-truncated normal distribution is

+

\[ + \int_a^\infty \frac{1}{\sqrt{2 \pi \sigma^2}} e^{-\frac{1}{2}\frac{(x - \mu)^2}{\sigma^2}} +\]

+

To compute this integral in Stan, the integrand must first be defined as a Stan function (see the Stan Reference Manual chapter on User-Defined Functions for more information on coding user-defined functions).

+
real normal_density(real x,             // Function argument
+                    real xc,            // Complement of function argument
+                                        //  on the domain (defined later)
+                    array[] real theta, // parameters
+                    array[] real x_r,   // data (real)
+                    array[] int x_i) {  // data (integer)
+  real mu = theta[1];
+  real sigma = theta[2];
+
+  return 1 / (sqrt(2 * pi()) * sigma) * exp(-0.5 * ((x - mu) / sigma)^2);
+}
+

This function is expected to return the value of the integrand evaluated at point x. The argument xc is used in definite integrals to avoid loss of precision near the limits of integration and is set to NaN when either limit is infinite (see the section on precision/loss in the chapter on Higher-Order Functions of the Stan Functions Reference for details on how to use this). The argument theta is used to pass in arguments of the integral that are a function of the parameters in our model. The arguments x_r and x_i are used to pass in real and integer arguments of the integral that are not a function of our parameters.

+

The function defining the integrand must have exactly the argument types and return type of normal_density above, though argument naming is not important. Even if x_r and x_i are unused in the integrand, they must be included in the function signature. Even if the integral does not involve some of these, they must still be supplied some value. The most efficient will be a zero-length array or vector, which can be created with rep_array(0, 0) and rep_vector(0, 0), respectively. Other options include an uninitialized variable declared with size 0, which is equivalent to the above, or any easy value, such as size 1 array created with {0}.

+
+

Calling the integrator

+

Suppose that our model requires evaluating the lpdf of a left-truncated normal, but the truncation limit is to be estimated as a parameter. Because the truncation point is a parameter, we must include the normalization term of the truncated pdf when computing our model’s log density. Note this is just an example of how to use the 1D integrator. The more efficient way to perform the correct normalization in Stan is described in the chapter on Truncated or Censored Data of this guide.

+

Such a model might look like (include the function defined at the beginning of this chapter to make this code compile):

+
data {
+  int N;
+  array[N] real y;
+}
+
+transformed data {
+  array[0] real x_r;
+  array[0] int x_i;
+}
+
+parameters {
+  real mu;
+  real<lower=0.0> sigma;
+  real left_limit;
+}
+
+model {
+  mu ~ normal(0, 1);
+  sigma ~ normal(0, 1);
+  left_limit ~ normal(0, 1);
+  target += normal_lpdf(y | mu, sigma);
+  target += -log(integrate_1d(normal_density,
+                              left_limit,
+                              positive_infinity(),
+                              { mu, sigma }, x_r, x_i));
+}
+
+

Limits of integration

+

The limits of integration can be finite or infinite. The infinite limits are made available via the Stan calls negative_infinity() and positive_infinity().

+

If both limits are either negative_infinity() or positive_infinity(), the integral and its gradients are set to zero.

+
+
+

Data vs. parameters

+

The arguments for the real data x_r and the integer data x_i must be expressions that only involve data or transformed data variables. theta, on the other hand, can be a function of data, transformed data, parameters, or transformed parameters.

+

The endpoints of integration can be data or parameters (and internally the derivatives of the integral with respect to the endpoints are handled with the Leibniz integral rule).

+
+
+
+

Integrator convergence

+

The integral is performed with the iterative 1D double exponential quadrature methods implemented in the Boost library (Agrawal et al. 2017). If the \(n\)th estimate of the integral is denoted \(I_n\) and the \(n\)th estimate of the norm of the integral is denoted \(|I|_n\), the iteration is terminated when

+

\[ + \frac{{|I_{n + 1} - I_n|}}{{|I|_{n + 1}}} < \text{relative tolerance}. +\]

+

The relative_tolerance parameter can be optionally specified as the last argument to integrate_1d. By default, integrate_1d follows the Boost library recommendation of setting relative_tolerance to the square root of the machine epsilon of double precision floating point numbers (about 1e-8). If the Boost integrator is not able to reach the relative tolerance an exception is raised with a message somehing like “Exception: integrate: error estimate of integral 4.25366e-13 exceeds the given relative tolerance times norm of integral”. If integrate_1d causes an exception in transformed parameters block or model block, the result has the same effect as assigning a \(-\infty\) log probability, which causes rejection of the current proposal in MCMC samplers and adjustment of search parameters in optimization. If integrate_1d causes an exception in generated quantities block, the returned output from integrate_1d is NaN. In these cases, a bigger relative_tolerance value can be specified.

+
+

Zero-crossing integrals

+

Integrals on the (possibly infinite) interval \((a, b)\) that cross zero are split into two integrals, one from \((a, 0)\) and one from \((0, b)\). This is because the quadrature methods employed internally can have difficulty near zero.

+

In this case, each integral is separately integrated to the given relative_tolerance.

+
+
+

Avoiding precision loss near limits of integration in definite integrals

+

If care is not taken, the quadrature can suffer from numerical loss of precision near the endpoints of definite integrals.

+

For instance, in integrating the pdf of a beta distribution when the values of \(\alpha\) and \(\beta\) are small, most of the probability mass is lumped near zero and one.

+

The pdf of a beta distribution is proportional to

+

\[ +p(x) \propto x^{\alpha - 1}(1 - x)^{\beta - 1} +\]

+

Normalizing this distribution requires computing the integral of \(p(x)\) from zero to one. In Stan code, the integrand might look like:

+
real beta(real x, real xc, array[] real theta, array[] real x_r, array[] int x_i) {
+  real alpha = theta[1];
+  real beta = theta[2];
+
+  return x^(alpha - 1.0) * (1.0 - x)^(beta - 1.0);
+}
+

The issue is that there will be numerical breakdown in the precision of 1.0 - x as x gets close to one. This is because of the limited precision of double precision floating numbers. This integral will fail to converge for values of alpha and beta much less than one.

+

This is where xc is useful. It is defined, for definite integrals, as a high precision version of the distance from x to the nearest endpoint — a - x or b - x for a lower endpoint a and an upper endpoint b. To make use of this for the beta integral, the integrand can be re-coded:

+
real beta(real x, real xc, array[] real theta, array[] real x_r, array[] int x_i) {
+  real alpha = theta[1];
+  real beta = theta[2];
+  real v;
+
+  if(x > 0.5) {
+    v = x^(alpha - 1.0) * xc^(beta - 1.0);
+  } else {
+    v = x^(alpha - 1.0) * (1.0 - x)^(beta - 1.0);
+  }
+
+  return v;
+}
+

In this case, as we approach the upper limit of integration \(a = 1\), xc will take on the value of \(a - x = 1 - x\). This version of the integrand will converge for much smaller values of alpha and beta than otherwise possible.

+

Consider another example: let’s say we have a log-normal distribution that is both shifted away from zero by some amount \(\delta\), and truncated at some value \(b\). If we were interested in calculating the expectation of a variable \(X\) distributed in this way, we would need to calculate \[ +\int_a^b xf(x)\,dx = \int_{\delta}^b xf(x)\,dx +\] in the numerator, where \(f(x)\) is the probability density function for the shifted log-normal distribution. This probability density function can be coded in Stan as:

+
real shift_lognormal_pdf(real x,
+                         real mu,
+                         real sigma,
+                         real delta) {
+  real p;
+
+  p = (1.0 / ((x - delta) * sigma * sqrt(2 * pi()))) *
+    exp(-1 * (log(x - delta) - mu)^2 / (2 * sigma^2));
+
+  return p;
+}
+

Therefore, the function that we want to integrate is:

+
real integrand(real x,
+               real xc,
+               array[] real theta,
+               array[] real x_r,
+               array[] int x_i) {
+  real numerator;
+  real p;
+
+  real mu = theta[1];
+  real sigma = theta[2];
+  real delta = theta[3];
+  real b = theta[4];
+
+  p = shift_lognormal_pdf(x, mu, sigma, delta);
+
+  numerator = x * p;
+
+  return numerator;
+}
+

What happens here is that, given that the log-normal distribution is shifted by \(\delta\), when we then try to integrate the numerator, our x starts at values just above delta. This, in turn, causes the x - delta term to be near zero, leading to a breakdown.

+

We can use xc, and define the integrand as:

+
real integrand(real x,
+               real xc,
+               array[] real theta,
+               array[] real x_r,
+               array[] int x_i) {
+  real numerator;
+  real p;
+
+  real mu = theta[1];
+  real sigma = theta[2];
+  real delta = theta[3];
+  real b = theta[4];
+
+  if (x < delta + 1) {
+    p = shift_lognormal_pdf(xc, mu, sigma, delta);
+  } else {
+    p = shift_lognormal_pdf(x, mu, sigma, delta);
+  }
+
+  numerator = x * p;
+
+  return numerator;
+}
+

Why does this work? When our values of x are less than delta + 1 (so, when they’re near delta, given that our lower bound of integration is equal to \(\delta\)), we pass xc as an argument to our shift_lognormal_pdf function. This way, instead of dealing with x - delta in shift_lognormal_pdf, we are working with xc - delta which is equal to delta - x - delta, as delta is the lower endpoint in that case. The delta terms cancel out, and we are left with a high-precision version of x. We don’t encounter the same problem at the upper limit \(b\) so we don’t adjust the code for that case.

+

Note, xc is only used for definite integrals. If either the left endpoint is at negative infinity or the right endpoint is at positive infinity, xc will be NaN.

+

For zero-crossing definite integrals (see section Zero Crossing) the integrals are broken into two pieces (\((a, 0)\) and \((0, b)\) for endpoints \(a < 0\) and \(b > 0\)) and xc is a high precision version of the distance to the limits of each of the two integrals separately. This means xc will be a high precision version of a - x, x, or b - x, depending on the value of x and the endpoints.

+ + + +
+
+
+ + Back to top

References

+
+Agrawal, Nikhar, Anton Bikineev, Paul Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, et al. 2017. “Double-Exponential Quadrature.” https://www.boost.org/doc/libs/1_66_0/libs/math/doc/html/math_toolkit/double_exponential.html. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/parallelization.html b/docs/2_39/stan-users-guide/parallelization.html new file mode 100644 index 000000000..dc87548c5 --- /dev/null +++ b/docs/2_39/stan-users-guide/parallelization.html @@ -0,0 +1,1636 @@ + + + + + + + + + +Parallelization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Parallelization

+

Stan has support for different types of parallelization: multi-threading with Intel Threading Building Blocks (TBB), multi-processing with Message Passing Interface (MPI) and manycore processing with OpenCL.

+

Multi-threading in Stan can be used with two mechanisms: reduce with summation and rectangular map. The latter can also be used with multi-processing.

+

The advantages of reduce with summation are:

+
    +
  1. More flexible argument interface, avoiding the packing and unpacking that is necessary with rectangular map.
  2. +
  3. Partitions data for parallelization automatically (this is done manually in rectangular map).
  4. +
  5. Is easier to use.
  6. +
+

The advantages of rectangular map are:

+
    +
  1. Returns a list of vectors, while the reduce summation returns only a scalar.
  2. +
  3. Can be parallelized across multiple cores and multiple computers, while reduce summation can only parallelized across multiple cores on a single machine.
  4. +
+

The actual speedup gained from using these functions will depend on many details. It is strongly recommended to only parallelize the computationally most expensive operations in a Stan program. Oftentimes this is the evaluation of the log likelihood for the observed data. When it is not clear which parts of the model is the most computationally expensive, we recommend using profiling, which is available in Stan 2.26 and newer.

+

Since only portions of a Stan program will run in parallel, the maximal speedup one can achieve is capped, a phenomen described by Amdahl’s law.

+
+

Reduce-sum

+

It is often necessary in probabilistic modeling to compute the sum of a number of independent function evaluations. This occurs, for instance, when evaluating a number of conditionally independent terms in a log-likelihood. If g: U -> real is the function and { x1, x2, ... } is an array of inputs, then that sum looks like:

+

g(x1) + g(x2) + ...

+

reduce_sum and reduce_sum_static are tools for parallelizing these calculations.

+

For efficiency reasons the reduce function doesn’t work with the element-wise evaluated function g, but instead the partial sum function f: U[] -> real, where f computes the partial sum corresponding to a slice of the sequence x passed in. Due to the associativity of the sum reduction it holds that:

+
g(x1) + g(x2) + g(x3) = f({ x1, x2, x3 })
+                      = f({ x1, x2 }) + f({ x3 })
+                      = f({ x1 }) + f({ x2, x3 })
+                      = f({ x1 }) + f({ x2 }) + f({ x3 })
+

With the partial sum function f: U[] -> real reduction of a large number of terms can be evaluated in parallel automatically, since the overall sum can be partitioned into arbitrary smaller partial sums. The exact partitioning into the partial sums is not under the control of the user. However, since the exact numerical result will depend on the order of summation, Stan provides two versions of the reduce summation facility:

+
    +
  • reduce_sum: Automatically choose partial sums partitioning based on a dynamic scheduling algorithm.
  • +
  • reduce_sum_static: Compute the same sum as reduce_sum, but partition the input in the same way for given data set (in reduce_sum this partitioning might change depending on computer load).
  • +
+

grainsize is the one tuning parameter. For reduce_sum, grainsize is a suggested partial sum size. A grainsize of 1 leaves the partitioning entirely up to the scheduler. This should be the default way of using reduce_sum unless time is spent carefully picking grainsize. For picking a grainsize, see details below.

+

For reduce_sum_static, grainsize specifies the maximal partial sum size. With reduce_sum_static it is more important to choose grainsize carefully since it entirely determines the partitioning of work. See details below.

+

For efficiency and convenience additional shared arguments can be passed to every term in the sum. So for the array { x1, x2, ... } and the shared arguments s1, s2, ... the effective sum (with individual terms) looks like:

+
g(x1, s1, s2, ...) + g(x2, s1, s2, ...) + g(x3, s1, s2, ...) + ...
+

which can be written equivalently with partial sums to look like:

+
f({ x1, x2 }, s1, s2, ...) + f({ x3 }, s1, s2, ...)
+

where the particular slicing of the x array can change.

+

Given this, the signatures are:

+
real reduce_sum(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)
+real reduce_sum_static(F f, array[] T x, int grainsize, T1 s1, T2 s2, ...)
+
    +
  1. f - User defined function that computes partial sums
  2. +
  3. x - Array to slice, each element corresponds to a term in the summation
  4. +
  5. grainsize - Target for size of slices
  6. +
  7. s1, s2, ... - Arguments shared in every term
  8. +
+

The user-defined partial sum functions have the signature:

+
real f(array[] T x_slice, int start, int end, T1 s1, T2 s2, ...)
+

and take the arguments:

+
    +
  1. x_slice - The subset of x (from reduce_sum / reduce_sum_static) for which this partial sum is responsible (x_slice = x[start:end])
  2. +
  3. start - An integer specifying the first term in the partial sum
  4. +
  5. end - An integer specifying the last term in the partial sum (inclusive)
  6. +
  7. s1, s2, ... - Arguments shared in every term (passed on without modification from the reduce_sum / reduce_sum_static call)
  8. +
+

The user-provided function f is expected to compute the partial sum with the terms start through end of the overall sum. The user function is passed the subset x[start:end] as x_slice. start and end are passed so that f can index any of the tailing sM arguments as necessary. The trailing sM arguments are passed without modification to every call of f.

+

A reduce_sum (or reduce_sum_static) call:

+
real sum = reduce_sum(f, x, grainsize, s1, s2, ...);
+

can be replaced by either:

+
real sum = f(x, 1, size(x), s1, s2, ...);
+

or the code:

+
real sum = 0.0;
+for(i in 1:size(x)) {
+  sum += f({ x[i] }, i, i, s1, s2, ...);
+}
+
+

Example: logistic regression

+

Logistic regression is a useful example to clarify both the syntax and semantics of reduce summation and how it can be used to speed up a typical model. A basic logistic regression can be coded in Stan as:

+
data {
+  int N;
+  array[N] int y;
+  vector[N] x;
+}
+parameters {
+  vector[2] beta;
+}
+model {
+  beta ~ std_normal();
+  y ~ bernoulli_logit(beta[1] + beta[2] * x);
+}
+

In this model predictions are made about the N outputs y using the covariate x. The intercept and slope of the linear equation are to be estimated. The key point to getting this calculation to use reduce summation, is recognizing that the statement:

+
y ~ bernoulli_logit(beta[1] + beta[2] * x);
+

can be rewritten (up to a proportionality constant) as:

+
for(n in 1:N) {
+  target += bernoulli_logit_lpmf(y[n] | beta[1] + beta[2] * x[n])
+}
+

Now it is clear that the calculation is the sum of a number of conditionally independent Bernoulli log probability statements, which is the condition where reduce summation is useful. To use the reduce summation, a function must be written that can be used to compute arbitrary partial sums of the total sum. Using the interface defined in Reduce-Sum, such a function can be written like:

+
functions {
+  real partial_sum(array[] int y_slice,
+                   int start, int end,
+                   vector x,
+                   vector beta) {
+    return bernoulli_logit_lpmf(y_slice | beta[1] + beta[2] * x[start:end]);
+  }
+}
+

The likelihood statement in the model can now be written:

+
target += partial_sum(y, 1, N, x, beta); // Sum terms 1 to N of the likelihood
+

In this example, y was chosen to be sliced over because there is one term in the summation per value of y. Technically x would have worked as well. Use whatever conceptually makes the most sense for a given model, e.g. slice over independent terms like conditionally independent observations or groups of observations as in hierarchical models. Because x is a shared argument, it is subset accordingly with start:end. With this function, reduce summation can be used to automatically parallelize the likelihood:

+
int grainsize = 1;
+target += reduce_sum(partial_sum, y,
+                     grainsize,
+                     x, beta);
+

The reduce summation facility automatically breaks the sum into pieces and computes them in parallel. grainsize = 1 specifies that the grainsize should be estimated automatically. The final model is:

+
functions {
+  real partial_sum(array[] int y_slice,
+                   int start, int end,
+                   vector x,
+                   vector beta) {
+    return bernoulli_logit_lpmf(y_slice | beta[1] + beta[2] * x[start:end]);
+  }
+}
+data {
+  int N;
+  array[N] int y;
+  vector[N] x;
+}
+parameters {
+  vector[2] beta;
+}
+model {
+  int grainsize = 1;
+  beta ~ std_normal();
+  target += reduce_sum(partial_sum, y,
+                       grainsize,
+                       x, beta);
+}
+
+
+

Picking the grainsize

+

The rational for choosing a sensible grainsize is based on balancing the overhead implied by creating many small tasks versus creating fewer large tasks which limits the potential parallelism.

+

In reduce_sum, grainsize is a recommendation on how to partition the work in the partial sum into smaller pieces. A grainsize of 1 leaves this entirely up to the internal scheduler and should be chosen if no benchmarking of other grainsizes is done. Ideally this will be efficient, but there are no guarantees.

+

In reduce_sum_static, grainsize is an upper limit on the worksize. Work will be split until all partial sums are just smaller than grainsize (and the split will happen the same way every time for the same inputs). For the static version it is more important to select a sensible grainsize.

+

In order to figure out an optimal grainsize, if there are N terms and M cores, run a quick test model with grainsize set roughly to N / M. Record the time, cut the grainsize in half, and run the test again. Repeat this iteratively until the model runtime begins to increase. This is a suitable grainsize for the model, because this ensures the calculations can be carried out with the most parallelism without losing too much efficiency.

+

For instance, in a model with N=10000 and M = 4, start with grainsize = 2500, and sequentially try grainsize = 1250, grainsize = 625, etc.

+

It is important to repeat this process until performance gets worse. It is possible after many halvings nothing happens, but there might still be a smaller grainsize that performs better. Even if a sum has many tens of thousands of terms, depending on the internal calculations, a grainsize of thirty or forty or smaller might be the best, and it is difficult to predict this behavior. Without doing these halvings until performance actually gets worse, it is easy to miss this.

+
+
+
+

Map-rect

+

Map-reduce allows large calculations (e.g., log likelihoods) to be broken into components which may be calculated modularly (e.g., data blocks) and combined (e.g., by summation and incrementing the target log density).

+

A map function is a higher-order function that applies an argument function to every member of some collection, returning a collection of the results. For example, mapping the square function, \(f(x) = x^2\), over the vector \([3, 5, 10]\) produces the vector \([9, 25, 100]\). In other words, map applies the square function elementwise.

+

The output of mapping a sequence is often fed into a reduction. A reduction function takes an arbitrarily long sequence of inputs and returns a single output. Examples of reduction functions are summation (with the return being a single value) or sorting (with the return being a sorted sequence). The combination of mapping and reducing is so common it has its own name, map-reduce.

+
+

Map function

+

In order to generalize the form of functions and results that are possible and accommodate both parameters (which need derivatives) and data values (which don’t), Stan’s map function operates on more than just a sequence of inputs.

+
+
+

Map function signature

+

Stan’s map function has the following signature

+
vector map_rect((vector, vector, array[] real, array[] int):vector f,
+                vector phi, array[] vector thetas,
+                data array[,] real x_rs, data array[,] int x_is);
+

The arrays thetas of parameters, x_rs of real data, and x_is of integer data have the suffix “s” to indicate they are arrays. These arrays must all be the same size, as they will be mapped in parallel by the function f. The value of phi is reused in each mapped operation.

+

The _rect suffix in the name arises because the data structures it takes as arguments are rectangular. In order to deal with ragged inputs, ragged inputs must be padded out to rectangular form.

+

The last two arguments are two dimensional arrays of real and integer data values. These argument types are marked with the data qualifier to indicate that they must only contain variables originating in the data or transformed data blocks. This will allow such data to be pinned to a processor on which it is being processed to reduce communication overhead.

+

The notation (vector, vector, array[] real, array[] int):vector indicates that the function argument f must have the following signature.

+
vector f(vector phi, vector theta,
+         data array[] real x_r, data array[] int x_i);
+

Although f will often return a vector of size one, the built-in flexibility allows general multivariate functions to be mapped, even raggedly.

+
+

Map function semantics

+

Stan’s map function applies the function f to the shared parameters along with one element each of the job parameters, real data, and integer data arrays. Each of the arguments theta, x_r, and x_i must be arrays of the same size. If the arrays are all size N, the result is defined as follows.

+
map_rect(f, phi, thetas, xs, ns)
+= f(phi, thetas[1], xs[1], ns[1]) . f(phi, thetas[2], xs[2], ns[2])
+  . ... . f(phi, thetas[N], xs[N], ns[N])
+

The dot operators in the notation above are meant to indicate concatenation (implemented as append_row in Stan). The output of each application of f is a vector, and the sequence of N vectors is concatenated together to return a single vector.

+
+
+
+

Example: logistic regression

+

An example should help to clarify both the syntax and semantics of the mapping operation and how it may be combined with reductions built into Stan to provide a map-reduce implementation.

+
+

Unmapped logistic regression

+

Consider the following simple logistic regression model, which is coded unconventionally to accommodate direct translation to a mapped implementation.

+
data {
+  array[12] int y;
+  array[12] real x;
+}
+parameters {
+  vector[2] beta;
+}
+model {
+  beta ~ std_normal();
+  y ~ bernoulli_logit(beta[1] + beta[2] * to_vector(x));
+}
+

The program is unusual in that it (a) hardcodes the data size, which is not required by the map function but is just used here for simplicity, (b) represents the predictors as a real array even though it needs to be used as a vector, and (c) represents the regression coefficients (intercept and slope) as a vector even though they’re used individually. The bernoulli_logit distribution is used because the argument is on the logit scale—it implicitly applies the inverse logit function to map the argument to a probability.

+
+
+

Mapped logistic regression

+

The unmapped logistic regression model described in the previous subsection may be implemented using Stan’s rectangular mapping functionality as follows.

+
functions {
+  vector lr(vector beta, vector theta, array[] real x, array[] int y) {
+    real lp = bernoulli_logit_lpmf(y | beta[1]
+                                       + to_vector(x) * beta[2]);
+    return [lp]';
+  }
+}
+data {
+  array[12] int y;
+  array[12] real x;
+}
+transformed data {
+  // K = 3 shards
+  array[3, 4] int ys = { y[1:4], y[5:8], y[9:12] };
+  array[3, 4] real xs = { x[1:4], x[5:8], x[9:12] };
+  array[3] vector[0] theta;
+}
+parameters {
+  vector[2] beta;
+}
+model {
+  beta ~ std_normal();
+  target += sum(map_rect(lr, beta, theta, xs, ys));
+}
+

The first piece of the code is the actual function to compute the logistic regression. The argument beta will contain the regression coefficients (intercept and slope), as before. The second argument theta of job-specific parameters is not used, but nevertheless must be present. The modeled data y is passed as an array of integers and the predictors x as an array of real values. The function body then computes the log probability mass of y and assigns it to the local variable lp. This variable is then used in [lp]' to construct a row vector and then transpose it to a vector to return.

+

The data are taken in as before. There is an additional transformed data block that breaks the data up into three shards.1

+

The value 3 is also hard coded; a more practical program would allow the number of shards to be controlled. There are three parallel arrays defined here, each of size three, corresponding to the number of shards. The array ys contains the modeled data variables; each element of the array ys is an array of size four. The second array xs is for the predictors, and each element of it is also of size four. These contained arrays are the same size because the predictors x stand in a one-to-one relationship with the modeled data y. The final array theta is also of size three; its elements are empty vectors, because there are no shard-specific parameters.

+

The parameters and the prior are as before. The likelihood is now coded using map-reduce. The function lr to compute the log probability mass is mapped over the data xs and ys, which contain the original predictors and outcomes broken into shards. The parameters beta are in the first argument because they are shared across shards. There are no shard-specific parameters, so the array of job-specific parameters theta contains only empty vectors.

+
+
+
+

Example: hierarchical logistic regression

+

Consider a hierarchical model of American presidential voting behavior based on state of residence.2

+

Each of the fifty states \(k \in \{1,\dotsc,50\}\) will have its own slope \(\beta_k\) and intercept \(\alpha_k\) to model the log odds of voting for the Republican candidate as a function of income. Suppose there are \(N\) voters and with voter \(n \in 1{:}N\) being in state \(s[n]\) with income \(x_n\). The data model for the vote \(y_n \in \{ 0, 1 \}\) is \[ +y_n \sim \textsf{Bernoulli} +\Big( + \operatorname{logit}^{-1}\left( \alpha_{s[n]} + \beta_{s[n]} \, x_n \right) +\Big). +\]

+

The slopes and intercepts get hierarchical priors, \[\begin{align*} +\alpha_k &\sim \textsf{normal}(\mu_{\alpha}, \sigma_{\alpha}) \\ +\beta_k &\sim \textsf{normal}(\mu_{\beta}, \sigma_{\beta}) +\end{align*}\]

+
+

Unmapped implementation

+

This model can be coded up in Stan directly as follows.

+
data {
+  int<lower=0> K;
+  int<lower=0> N;
+  array[N] int<lower=1, upper=K> kk;
+  vector[N] x;
+  array[N] int<lower=0, upper=1> y;
+}
+parameters {
+  matrix[K, 2] beta;
+  vector[2] mu;
+  vector<lower=0>[2] sigma;
+}
+model {
+  mu ~ normal(0, 2);
+  sigma ~ normal(0, 2);
+  for (i in 1:2) {
+    beta[ , i] ~ normal(mu[i], sigma[i]);
+  }
+  y ~ bernoulli_logit(beta[kk, 1] + beta[kk, 2] .* x);
+}
+

For this model the vector of predictors x is coded as a vector, corresponding to how it is used in the model. The priors for mu and sigma are vectorized. The priors on the two components of beta (intercept and slope, respectively) are stored in a \(K \times 2\) matrix.

+

The distribution statement is also vectorized using multi-indexing with index kk for the states and elementwise multiplication (.*) for the income x. The vectorized distribution statement works out to the same thing as the following less efficient looped form.

+
for (n in 1:N) {
+  y[n] ~ bernoulli_logit(beta[kk[n], 1] + beta[kk[n], 2] * x[n]);
+}
+
+
+

Mapped implementation

+

The mapped version of the model will map over the states K. This means the group-level parameters, real data, and integer-data must be arrays of the same size.

+

The mapped implementation requires a function to be mapped. In this function we can’t use distribution statements, but need to accumulate the desired log prior and log likelihood terms to the return value. The following function evaluates both the likelihood for the data observed for a group as well as the prior for the group-specific parameters (the name bernoulli_logit_glm derives from the fact that it’s a generalized linear model with a Bernoulli data model and logistic link function).

+
functions {
+ vector bl_glm(vector mu_sigma, vector beta,
+               array[] real x, array[] int y) {
+   vector[2] mu = mu_sigma[1:2];
+   vector[2] sigma = mu_sigma[3:4];
+   real lp = normal_lpdf(beta | mu, sigma);
+   real ll = bernoulli_logit_lpmf(y | beta[1] + beta[2] * to_vector(x));
+   return [lp + ll]';
+ }
+}
+

The shared parameter mu_sigma contains the locations (mu_sigma[1:2]) and scales (mu_sigma[3:4]) of the priors, which are extracted in the first two lines of the program. The variable lp is assigned the log density of the prior on beta. The vector beta is of size two, as are the vectors mu and sigma, so everything lines up for the vectorization. Next, the variable ll is assigned to the log likelihood contribution for the group. Here beta[1] is the intercept of the regression and beta[2] the slope. The predictor array x needs to be converted to a vector allow the multiplication.

+

The data block is identical to that of the previous program, but repeated here for convenience. A transformed data block computes the data structures needed for the mapping by organizing the data into arrays indexed by group.

+
data {
+  int<lower=0> K;
+  int<lower=0> N;
+  array[N] int<lower=1, upper=K> kk;
+  vector[N] x;
+  array[N] int<lower=0, upper=1> y;
+}
+transformed data {
+  int<lower=0> J = N / K;
+  array[K, J] real x_r;
+  array[K, J] int<lower=0, upper=1> x_i;
+  {
+    int pos = 1;
+    for (k in 1:K) {
+      int end = pos + J - 1;
+      x_r[k] = to_array_1d(x[pos:end]);
+      x_i[k] = to_array_1d(y[pos:end]);
+      pos += J;
+    }
+  }
+}
+

The integer J is set to the number of observations per group.3

+

The real data array x_r holds the predictors and the integer data array x_i holds the outcomes. The grouped data arrays are constructed by slicing the predictor vector x (and converting it to an array) and slicing the outcome array y.

+

Given the transformed data with groupings, the parameters are the same as the previous program. The model has the same priors for the hyperparameters mu and sigma, but moves the prior for beta and the likelihood to the mapped function.

+
parameters {
+  array[K] vector[2] beta;
+  vector[2] mu;
+  vector<lower=0>[2] sigma;
+}
+model {
+  mu ~ normal(0, 2);
+  sigma ~ normal(0, 2);
+  target += sum(map_rect(bl_glm, append_row(mu, sigma), beta, x_r, x_i));
+                         
+}
+

The model as written here computes the priors for each group’s parameters along with the likelihood contribution for the group. An alternative mapping would leave the prior in the model block and only map the likelihood computation. In a serial setting this shouldn’t make much of a difference, but with parallelization, there is reduced communication (the prior’s parameters need not be transmitted) and also reduced parallelization with the version that leaves the prior in the model block.

+
+
+
+

Ragged inputs and outputs

+

The previous examples included rectangular data structures and single outputs. Despite the name, this is not technically required by map_rect.

+
+

Ragged inputs

+

If each group has a different number of observations, then the rectangular data structures for predictors and outcomes will need to be padded out to be rectangular. In addition, the size of the ragged structure will need to be passed as integer data. This holds for shards with varying numbers of parameters as well as varying numbers of data points.

+
+
+

Ragged outputs

+

The output of each mapped function is concatenated in order of inputs to produce the output of map_rect. When every shard returns a singleton (size one) array, the result is the same size as the number of shards and is easy to deal with downstream. If functions return longer arrays, they can still be structured using the to_matrix function if they are rectangular.

+

If the outputs are of varying sizes, then there will have to be some way to convert it back to a usable form based on the input, because there is no way to directly return sizes or a ragged structure.

+
+
+
+
+

OpenCL

+

OpenCL (Open Computing Language) is a framework that enables writing programs that execute across heterogeneous platforms. An OpenCL program can be run on CPUs and GPUs. In order to run OpenCL programs, an OpenCL runtime be installed on the target system.

+

Stan’s OpenCL backend is currently supported in CmdStan and its wrappers. In order to use it, the model must be compiled with the STAN_OPENCL makefile flag. Setting this flag means that the Stan-to-C++ translator (stanc3) will be supplied the --use-opencl flag and that the OpenCL enabled backend (Stan Math functions) will be enabled.

+

In Stan, the following distributions can be automatically run in parallel on both CPUs and GPUs with OpenCL:

+
    +
  • bernoulli_lpmf
  • +
  • bernoulli_logit_lpmf
  • +
  • bernoulli_logit_glm_lpmf*
  • +
  • beta_lpdf
  • +
  • beta_proportion_lpdf
  • +
  • binomial_lpmf
  • +
  • categorical_logit_glm_lpmf*
  • +
  • cauchy_lpdf
  • +
  • chi_square_lpdf
  • +
  • double_exponential_lpdf
  • +
  • exp_mod_normal_lpdf
  • +
  • exponential_lpdf
  • +
  • frechet_lpdf
  • +
  • gamma_lpdf
  • +
  • gumbel_lpdf
  • +
  • inv_chi_square_lpdf
  • +
  • inv_gamma_lpdf
  • +
  • logistic_lpdf
  • +
  • lognormal_lpdf
  • +
  • neg_binomial_lpmf
  • +
  • neg_binomial_2_lpmf
  • +
  • neg_binomial_2_log_lpmf
  • +
  • neg_binomial_2_log_glm_lpmf*
  • +
  • normal_lpdf
  • +
  • normal_id_glm_lpdf*
  • +
  • ordered_logistic_glm_lpmf*
  • +
  • pareto_lpdf
  • +
  • pareto_type_2_lpdf
  • +
  • poisson_lpmf
  • +
  • poisson_log_lpmf
  • +
  • poisson_log_glm_lpmf*
  • +
  • rayleigh_lpdf
  • +
  • scaled_inv_chi_square_lpdf
  • +
  • skew_normal_lpdf
  • +
  • std_normal_lpdf
  • +
  • student_t_lpdf
  • +
  • uniform_lpdf
  • +
  • weibull_lpdf
  • +
+

* OpenCL is not used when the covariate argument to the GLM functions is a row_vector.

+ + + +
+
+ + + Back to top

References

+
+Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge, United Kingdom: Cambridge University Press. +
+

Footnotes

+ +
    +
  1. The term “shard” is borrowed from databases, where it refers to a slice of the rows of a database. That is exactly what it is here if we think of rows of a dataframe. Stan’s shards are more general in that they need not correspond to rows of a dataframe.↩︎

  2. +
  3. This example is a simplified form of the model described in (Gelman and Hill 2007, sec. 14.2)↩︎

  4. +
  5. This makes the strong assumption that each group has the same number of observations!↩︎

  6. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/posterior-prediction.html b/docs/2_39/stan-users-guide/posterior-prediction.html new file mode 100644 index 000000000..b176170af --- /dev/null +++ b/docs/2_39/stan-users-guide/posterior-prediction.html @@ -0,0 +1,1476 @@ + + + + + + + + + +Posterior Predictive Sampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Posterior Predictive Sampling

+

The goal of inference is often posterior prediction, that is evaluating or sampling from the posterior predictive distribution \(p(\tilde{y} \mid y),\) where \(y\) is observed data and \(\tilde{y}\) is yet to be observed data. Often there are unmodeled predictors \(x\) and \(\tilde{x}\) for the observed data \(y\) and unobserved data \(\tilde{y}\). With predictors, the posterior predictive density is \(p(\tilde{y} \mid +\tilde{x}, x, y).\) All of these variables may represent multivariate quantities.

+

This chapter explains how to sample from the posterior predictive distribution in Stan, including applications to posterior predictive simulation and calculating event probabilities. These techniques can be coded in Stan using random number generation in the generated quantities block. Further, a technique for fitting and performing inference in two stages is presented in a section on stand-alone generated quantities in Stan

+
+

Posterior predictive distribution

+

Given a full Bayesian model \(p(y, \theta)\), the posterior predictive density for new data \(\tilde{y}\) given observed data \(y\) is \[ +p(\tilde{y} \mid y) += +\int p(\tilde{y} \mid \theta) \cdot p(\theta \mid y) +\, \textrm{d}\theta. +\] The product under the integral reduces to the joint posterior density \(p(\tilde{y}, \theta \mid y),\) so that the integral is simply marginalizing out the parameters \(\theta,\) leaving the predictive density \(p(\tilde{y} \mid y)\) of future observations given past observations.

+
+
+

Computing the posterior predictive distribution

+

The posterior predictive density (or mass) of a prediction \(\tilde{y}\) given observed data \(y\) can be computed using \(M\) Monte Carlo draws

+

\[ +\theta^{(m)} \sim p(\theta \mid y) +\] from the posterior as \[ +p(\tilde{y} \mid y) +\approx +\frac{1}{M} \sum_{m = 1}^M p(\tilde{y} \mid \theta^{(m)}). +\]

+

Computing directly using this formula will lead to underflow in many situations, but the log posterior predictive density, \(\log +p(\tilde{y} \mid y)\) may be computed using the stable log sum of exponents function as \[\begin{eqnarray*} +\log p(\tilde{y} \mid y) +& \approx & +\log \frac{1}{M} \sum_{m = 1}^M p(\tilde{y} \mid \theta^{(m)}). +\\[4pt] +& = & +- \log M ++ \textrm{log-sum-exp}_{m = 1}^M \log p(\tilde{y} \mid \theta^{(m)}), +\end{eqnarray*}\] where \[ +\textrm{log-sum-exp}_{m = 1}^M v_m += \log \sum_{m = 1}^M \exp v_m +\] is used to maintain arithmetic precision. See the section on log sum of exponentials for more details.

+
+
+

Sampling from the posterior predictive distribution

+

Given draws from the posterior \(\theta^{(m)} \sim p(\theta \mid y),\) draws from the posterior predictive \(\tilde{y}^{(m)} \sim p(\tilde{y} +\mid y)\) can be generated by randomly generating from the sampling distribution with the parameter draw plugged in, \[ +\tilde{y}^{(m)} \sim p(y \mid \theta^{(m)}). +\]

+

Randomly drawing \(\tilde{y}\) from the data model is critical because there are two forms of uncertainty in posterior predictive quantities, aleatoric uncertainty and epistemic uncertainty. Epistemic uncertainty arises because \(\theta\) is unknown and estimated based only on a finite sample of data \(y\). Aleatoric uncertainty arises because even a known value of \(\theta\) leads to uncertainty about new \(\tilde{y}\) as described by the data model \(p(\tilde{y} \mid \theta)\). Both forms of uncertainty show up in the factored form of the posterior predictive distribution, \[ +p(\tilde{y} \mid y) += +\int +\underbrace{p(\tilde{y} \mid \theta)}_{\begin{array}{l} + \textrm{aleatoric} + \\[-2pt] \textrm{uncertainty} + \end{array}} +\cdot \underbrace{p(\theta \mid y)}_{\begin{array}{l} + \textrm{epistemic} + \\[-2pt] \textrm{uncertainty} + \end{array}} +\, \textrm{d}\theta. +\]

+
+
+

Posterior predictive simulation in Stan

+

Posterior predictive quantities can be coded in Stan using the generated quantities block.

+
+

Simple Poisson model

+

For example, consider a simple Poisson model for count data with a rate parameter \(\lambda > 0\) having a gamma-distributed prior, \[ +\lambda \sim \textrm{gamma}(1, 1). +\] The \(N\) observations \(y_1, \ldots, y_N\) are modeled as Poisson distributed, \[ +y_n \sim \textrm{poisson}(\lambda). +\]

+
+
+

Stan code

+

The following Stan program defines a variable for \(\tilde{y}\) by random number generation in the generated quantities block.

+
data {
+  int<lower=0> N;
+  array[N] int<lower=0> y;
+}
+parameters {
+  real<lower=0> lambda;
+}
+model {
+  lambda ~ gamma(1, 1);
+  y ~ poisson(lambda);
+}
+generated quantities {
+  int<lower=0> y_tilde = poisson_rng(lambda);
+}
+

The random draw from the data model for \(\tilde{y}\) is coded using Stan’s Poisson random number generator in the generated quantities block. This accounts for the aleatoric component of the uncertainty; Stan’s posterior sampler will account for the epistemic uncertainty, generating a new \(\tilde{y}^{(m)} \sim p(y \mid +\lambda^{(m)})\) for each posterior draw \(\lambda^{(m)} \sim p(\theta +\mid y).\)

+

The posterior draws \(\tilde{y}^{(m)}\) may be used to estimate the expected value of \(\tilde{y}\) or any of its quantiles or posterior intervals, as well as event probabilities involving \(\tilde{y}\). In general, \(\mathbb{E}[f(\tilde{y}, \theta) \mid y]\) may be evaluated as \[ +\mathbb{E}[f(\tilde{y}, \theta) \mid y] +\approx \frac{1}{M} \sum_{m=1}^M f(\tilde{y}^{(m)}, \theta^{(m)}), +\] which is just the posterior mean of \(f(\tilde{y}, \theta).\) This quantity is computed by Stan if the value of \(f(\tilde{y}, \theta)\) is assigned to a variable in the generated quantities block. That is, if we have

+
generated quantities {
+  real f_val = f(y_tilde, theta);
+  // ...
+}
+

where the value of \(f(\tilde{y}, \theta)\) is assigned to variable f_val, then the posterior mean of f_val will be the expectation \(\mathbb{E}[f(\tilde{y}, \theta) \mid y]\).

+
+
+

Analytic posterior and posterior predictive

+

The gamma distribution is the conjugate prior distribution for the Poisson distribution, so the posterior density \(p(\lambda \mid y)\) will also follow a gamma distribution.

+

Because the posterior follows a gamma distribution and the sampling distribution is Poisson, the posterior predictive \(p(\tilde{y} \mid +y)\) will follow a negative binomial distribution, because the negative binomial is defined as a compound gamma-Poisson. That is, \(y \sim +\textrm{negative-binomial}(\alpha, \beta)\) if \(\lambda \sim +\textrm{gamma}(\alpha, \beta)\) and \(y \sim \textrm{poisson}(\lambda).\) Rather than marginalizing out the rate parameter \(\lambda\) analytically as can be done to define the negative binomial probability mass function, the rate \(\lambda^{(m)} \sim p(\lambda \mid y)\) is sampled from the posterior and then used to generate a draw of \(\tilde{y}^{(m)} \sim p(y \mid \lambda^{(m)}).\)

+
+
+
+

Posterior prediction for regressions

+
+

Posterior predictive distributions for regressions

+

Consider a regression with a single predictor \(x_n\) for the training outcome \(y_n\) and \(\tilde{x}_n\) for the test outcome \(\tilde{y}_n.\) Without considering the parametric form of any of the distributions, the posterior predictive distribution for a general regression in \[\begin{eqnarray} +p(\tilde{y} \mid \tilde{x}, y, x) +& = & \int p(\tilde{y} \mid \tilde{x}, \theta) \cdot p(\theta \mid y, x) \, +\textrm{d}\theta +\\[4pt] +& \approx & +\frac{1}{M} \sum_{m=1}^M \, p(\tilde{y} \mid \tilde{x}, \theta^{(m)}), +\end{eqnarray}\] where \(\theta^{(m)} \sim p(\theta \mid x, y).\)

+
+
+

Stan program

+

The following program defines a Poisson regression with a single predictor. These predictors are all coded as data, as are their sizes. Only the observed \(y\) values are coded as data. The predictive quantities \(\tilde{y}\) appear in the generated quantities block, where they are generated by random number generation.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  array[N] int<lower=0> y;
+  int<lower=0> N_tilde;
+  vector[N_tilde] x_tilde;
+}
+parameters {
+  real alpha;
+  real beta;
+}
+model {
+  y ~ poisson_log(alpha + beta * x);
+  { alpha, beta } ~ normal(0, 1);
+}
+generated quantities {
+  array[N_tilde] int<lower=0> y_tilde
+    = poisson_log_rng(alpha + beta * x_tilde);
+}
+

The Poisson distributions in both the model and generated quantities block are coded using the log rate as a parameter (that’s poisson_log vs. poisson, with the suffixes defining the scale of the parameter). The regression coefficients, an intercept alpha and slope beta, are given standard normal priors.

+

In the model block, the log rate for the Poisson is a linear function of the training data \(x\), whereas in the generated quantities block it is a function of the test data \(\tilde{x}\). Because the generated quantities block does not affect the posterior draws, the model fits \(\alpha\) and \(\beta\) using only the training data, reserving \(\tilde{x}\) to generate \(\tilde{y}.\)

+

The result from running Stan is a predictive sample \(\tilde{y}^{(1)}, +\ldots \tilde{y}^{(M)}\) where each \(\tilde{y}^{(m)} \sim p(\tilde{y} +\mid \tilde{x}, x, y).\)

+

The mean of the posterior predictive distribution is the expected value \[\begin{align} +\mathbb{E}[\tilde{y} \mid \tilde{x}, x, y] +& = +\int +\tilde{y} +\cdot p(\tilde{y} \mid \tilde{x}, \theta) +\cdot p(\theta \mid x, y) +\, \textrm{d}\theta +\\[4pt] +& \approx \frac{1}{M} \sum_{m = 1}^M \tilde{y}^{(m)}, +\end{align}\] where the \(\tilde{y}^{(m)} \sim p(\tilde{y} \mid \tilde{x}, x, y)\) are drawn from the posterior predictive distribution. Thus the posterior mean of y_tilde[n] after running Stan is the expected value of \(\tilde{y}_n\) conditioned on the training data \(x, y\) and predictor \(\tilde{x}_n.\) This is the Bayesian estimate for \(\tilde{y}\) with minimum expected squared error. The posterior draws can also be used to estimate quantiles for the median and any posterior intervals of interest for \(\tilde{y}\), as well as covariance of the \(\tilde{y_n}.\) The posterior draws \(\tilde{y}^{(m)}\) may also be used to estimate predictive event probabilities, such as \(\Pr[\tilde{y}_1 > 0]\) or \(\Pr[\prod_{n = +1}^{\tilde{N}}(\tilde{y_n}) > 1],\) as expectations of indicator functions.

+

All of this can be carried out by running Stan only a single time to draw a single sample of \(M\) draws, \[ +\tilde{y}^{(1)}, \ldots, \tilde{y}^{(M)} \sim p(\tilde{y} \mid +\tilde{x}, x, y). +\] It’s only when moving to cross-validation where multiple runs are required.

+
+
+
+

Estimating event probabilities

+

Event probabilities involving either parameters or predictions or both may be coded in the generated quantities block. For example, to evaluate \(\Pr[\lambda > 5 \mid y]\) in the simple Poisson example with only a rate parameter \(\lambda\), it suffices to define a generated quantity

+
generated quantities {
+  int<lower=0, upper=1> lambda_gt_5 = lambda > 5;
+  // ...
+}
+

The value of the expression lambda > 5 is 1 if the condition is true and 0 otherwise. The posterior mean of this parameter is the event probability \[\begin{eqnarray*} +\Pr[\lambda > 5 \mid y] +& = & +\int \textrm{I}(\lambda > 5) \cdot p(\lambda \mid y) +\, \textrm{d}\lambda +\\[4pt] +& \approx & +\frac{1}{M} \sum_{m = 1}^M \textrm{I}[\lambda^{(m)} > 5], +\end{eqnarray*}\] where each \(\lambda^{(m)} \sim p(\lambda \mid y)\) is distributed according to the posterior. In Stan, this is recovered as the posterior mean of the parameter lambda_gt_5.

+

In general, event probabilities may be expressed as expectations of indicator functions. For example, \[\begin{eqnarray*} +\Pr[\lambda > 5 \mid y] +& = & \mathbb{E}[\textrm{I}[\lambda > 5] \mid y] +\\[4pt] +& = & +\int +\textrm{I}(\lambda > 5) \cdot p(\lambda \mid y) +\, \textrm{d}\lambda +\\[4pt] +& \approx & \frac{1}{M} \sum_{m = 1}^M \textrm{I}(\lambda^{(m)} > 5). +\end{eqnarray*}\] The last line above is the posterior mean of the indicator function as coded in Stan.

+

Event probabilities involving posterior predictive quantities \(\tilde{y}\) work exactly the same way as those for parameters. For example, if \(\tilde{y}_n\) is the prediction for the \(n\)-th unobserved outcome (such as the score of a team in a game or a level of expression of a protein in a cell), then \[\begin{eqnarray*} +\Pr[\tilde{y}_3 > \tilde{y}_7 \mid \tilde{x}, x, y] +& = & +\mathbb{E}\!\left[I[\tilde{y}_3 > \tilde{y}_7] \mid \tilde{x}, x, y\right] +\\[4pt] +& = & +\int +\textrm{I}(\tilde{y}_3 > \tilde{y}_7) +\cdot p(\tilde{y} \mid \tilde{x}, x, y) +\, \textrm{d}\tilde{y} +\\[4pt] +& \approx & +\frac{1}{M} \sum_{m = 1}^M +\textrm{I}(\tilde{y}^{(m)}_3 > \tilde{y}^{(m)}_7), +\end{eqnarray*}\] where \(\tilde{y}^{(m)} \sim p(\tilde{y} \mid \tilde{x}, x, y).\)

+
+
+

Stand-alone generated quantities and ongoing prediction

+

Stan’s sampling algorithms take a Stan program representing a posterior \(p(\theta \mid y, x)\) along with actual data \(x\) and \(y\) to produce a set of draws \(\theta^{(1)}, \ldots, \theta^{(M)}\) from the posterior. Posterior predictive draws \(\tilde{y}^{(m)} \sim p(\tilde{y} \mid +\tilde{x}, x, y)\) can be generated by drawing \[ +\tilde{y}^{(m)} \sim p(y \mid \tilde{x}, \theta^{(m)}) +\] from the data model. Note that drawing \(\tilde{y}^{(m)}\) only depends on the new predictors \(\tilde{x}\) and the posterior draws \(\theta^{(m)}\). Most importantly, neither the original data or the model density is required.

+

By saving the posterior draws, predictions for new data items \(\tilde{x}\) may be generated whenever needed. In Stan’s interfaces, this is done by writing a second Stan program that inputs the original program’s parameters and the new predictors. For example, for the linear regression case, the program to take posterior draws declares the data and parameters, and defines the model.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  vector[N] y;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(alpha + beta * x, sigma);
+  alpha ~ normal(0, 5);
+  beta ~ normal(0, 1);
+  sigma ~ lognormal(0, 0.5);
+}
+

A second program can be used to generate new observations. This follow-on program need only declare the parameters as they were originally defined. This may require defining constants in the data block such as sizes and hyperparameters that are involved in parameter size or constraint declarations. Then additional data is read in corresponding to predictors for new outcomes that have yet to be observed. There is no need to repeat the model or unneeded transformed parameters or generated quantities. The complete follow-on program for prediction just declares the predictors in the data, the original parameters, and then the predictions in the generated quantities block.

+
data {
+  int<lower=0> N_tilde;
+  vector[N_tilde] x_tilde;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+generated quantities {
+  vector[N_tilde] y_tilde
+    = normal_rng(alpha + beta * x_tilde, sigma);
+}
+

When running stand-alone generated quantities, the inputs required are the original draws for the parameters and any predictors corresponding to new predictions, and the output will be draws for \(\tilde{y}\) or derived quantities such as event probabilities.

+

Any posterior predictive quantities desired may be generated this way. For example, event probabilities are estimated in the usual way by defining indicator variables in the generated quantities block.

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/posterior-predictive-checks.html b/docs/2_39/stan-users-guide/posterior-predictive-checks.html new file mode 100644 index 000000000..e76588b67 --- /dev/null +++ b/docs/2_39/stan-users-guide/posterior-predictive-checks.html @@ -0,0 +1,1594 @@ + + + + + + + + + +Posterior and Prior Predictive Checks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Posterior and Prior Predictive Checks

+

Posterior predictive checks are a way of measuring whether a model does a good job of capturing relevant aspects of the data, such as means, standard deviations, and quantiles (Rubin 1984; Andrew Gelman, Meng, and Stern 1996). Posterior predictive checking works by simulating new replicated data sets based on the fitted model parameters and then comparing statistics applied to the replicated data set with the same statistic applied to the original data set.

+

Prior predictive checks evaluate the prior the same way. Specifically, they evaluate what data sets would be consistent with the prior. They will not be calibrated with actual data, but extreme values help diagnose priors that are either too strong, too weak, poorly shaped, or poorly located.

+

Prior and posterior predictive checks are two cases of the general concept of predictive checks, just conditioning on different things (no data and the observed data, respectively). For hierarchical models, there are intermediate versions, as discussed in the section on hierarchical models and mixed replication.

+
+

Simulating from the posterior predictive distribution

+

The posterior predictive distribution is the distribution over new observations given previous observations. It’s predictive in the sense that it’s predicting behavior on new data that is not part of the training set. It’s posterior in that everything is conditioned on observed data \(y\).

+

The posterior predictive distribution for replications \(y^{\textrm{rep}}\) of the original data set \(y\) given model parameters \(\theta\) is defined by \[ +p(y^{\textrm{rep}} \mid y) += \int p(y^{\textrm{rep}} \mid \theta) + \cdot p(\theta \mid y) \, \textrm{d}\theta. +\]

+

As with other posterior predictive quantities, generating a replicated data set \(y^{\textrm{rep}}\) from the posterior predictive distribution is straightforward using the generated quantities block. Consider a simple regression model with parameters \(\theta = (\alpha, \beta, \sigma).\)

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  vector[N] y;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  alpha ~ normal(0, 2);
+  beta ~ normal(0, 1);
+  sigma ~ normal(0, 1);
+  y ~ normal(alpha + beta * x, sigma);
+}
+

To generate a replicated data set y_rep for this simple model, the following generated quantities block suffices.

+
generated quantities {
+  array[N] real y_rep = normal_rng(alpha + beta * x, sigma);
+}
+

The vectorized form of the normal random number generator is used with the original predictors x and the model parameters alpha, beta, and sigma. The replicated data variable y_rep is declared to be the same size as the original data y, but instead of a vector type, it is declared to be an array of reals to match the return type of the function normal_rng. Because the vector and real array types have the same dimensions and layout, they can be plotted against one another and otherwise compared during downstream processing.

+

The posterior predictive sampling for posterior predictive checks is different from usual posterior predictive sampling discussed in the chapter on posterior predictions in that the original predictors \(x\) are used. That is, the posterior predictions are for the original data.

+
+
+

Plotting multiples

+

A standard posterior predictive check would plot a histogram of each replicated data set along with the original data set and compare them by eye. For this purpose, only a few replications are needed. These should be taken by thinning a larger set of replications down to the size needed to ensure rough independence of the replications.

+

Here’s a complete example where the model is a simple Poisson with a weakly informative exponential prior with a mean of 10 and standard deviation of 10.

+
data {
+  int<lower=0> N;
+  array[N] int<lower=0> y;
+}
+transformed data {
+  real<lower=0> mean_y = mean(to_vector(y));
+  real<lower=0> sd_y = sd(to_vector(y));
+}
+parameters {
+  real<lower=0> lambda;
+}
+model {
+  y ~ poisson(lambda);
+  lambda ~ exponential(0.2);
+}
+generated quantities {
+  array[N] int<lower=0> y_rep = poisson_rng(rep_array(lambda, N));
+  real<lower=0> mean_y_rep = mean(to_vector(y_rep));
+  real<lower=0> sd_y_rep = sd(to_vector(y_rep));
+  int<lower=0, upper=1> mean_gte = (mean_y_rep >= mean_y);
+  int<lower=0, upper=1> sd_gte = (sd_y_rep >= sd_y);
+}
+

The generated quantities block creates a variable y_rep for the replicated data, variables mean_y_rep and sd_y_rep for the statistics of the replicated data, and indicator variables mean_gte and sd_gte for whether the replicated statistic is greater than or equal to the statistic applied to the original data.

+

Now consider generating data \(y \sim \textrm{Poisson}(5)\). The resulting small multiples plot shows the original data plotted in the upper left and eight different posterior replications plotted in the remaining boxes.

+
+
+

Posterior predictive checks for Poisson data generating process and Poisson model.

+
Posterior predictive checks for Poisson data generating process and Poisson model.
+
+
+

With a Poisson data-generating process and Poisson model, the posterior replications look similar to the original data. If it were easy to pick the original data out of the lineup, there would be a problem.

+

Now consider generating over-dispersed data \(y \sim \textrm{negative-binomial2}(5, 1).\) This has the same mean as \(\textrm{Poisson}(5)\), namely \(5\), but a standard deviation of \(\sqrt{5 + 5^2 /1} \approx 5.5.\) There is no way to fit this data with the Poisson model, because a variable distributed as \(\textrm{Poisson}(\lambda)\) has mean \(\lambda\) and standard deviation \(\sqrt{\lambda},\) which is \(\sqrt{5}\) for \(\textrm{Poisson}(5).\) Here’s the resulting small multiples plot, again with original data in the upper left.

+
+
+

Posterior predictive checks for negative binomial data generating process and Poisson model.

+
Posterior predictive checks for negative binomial data generating process and Poisson model.
+
+
+

This time, the original data stands out in stark contrast to the replicated data sets, all of which are clearly more symmetric and lower variance than the original data. That is, the model’s not appropriately capturing the variance of the data.

+
+
+

Posterior ‘’p-values’’

+

If a model captures the data well, summary statistics such as sample mean and standard deviation, should have similar values in the original and replicated data sets. This can be tested by means of a p-value-like statistic, which here is just the probability the test statistic \(s(\cdot)\) in a replicated data set exceeds that in the original data, \[ +\Pr\!\left[ s(y^{\textrm{rep}}) \geq s(y) \mid y \right] += +\int +\textrm{I}\left( s(y^{\textrm{rep}}) \geq s(y) \mid y \right) +\cdot p\left( y^{\textrm{rep}} \mid y \right) +\, \textrm{d}{y^{\textrm{rep}}}. +\] It is important to note that ‘’p-values’’ is in quotes because these statistics are not classically calibrated, and thus will not in general have a uniform distribution even when the model is well specified (Bayarri and Berger 2000).

+

Nevertheless, values of this statistic very close to zero or one are cause for concern that the model is not fitting the data well. Unlike a visual test, this p-value-like test is easily automated for bulk model fitting.

+

To calculate event probabilities in Stan, it suffices to define indicator variables that take on value 1 if the event occurs and 0 if it does not. The posterior mean is then the event probability. For efficiency, indicator variables are defined in the generated quantities block.

+
generated quantities {
+  int<lower=0, upper=1> mean_gt;
+  int<lower=0, upper=1> sd_gt;
+  {
+    array[N] real y_rep = normal_rng(alpha + beta * x, sigma);
+    mean_gt = mean(y_rep) > mean(y);
+    sd_gt = sd(y_rep) > sd(y);
+  }
+}
+

The indicator variable mean_gt will have value 1 if the mean of the simulated data y_rep is greater than or equal to the mean of he original data y. Because the values of y_rep are not needed for the posterior predictive checks, the program saves output space by using a local variable for y_rep. The statistics mean(u) and sd(y) could also be computed in the transformed data block and saved.

+

For the example in the previous section, where over-dispersed data generated by a negative binomial distribution was fit with a simple Poisson model, the following plot illustrates the posterior p-value calculation for the mean statistic.

+
+
+

Histogram of means of replicated data sets; vertical red line at mean of original data.

+
Histogram of means of replicated data sets; vertical red line at mean of original data.
+
+
+

The p-value for the mean is just the percentage of replicated data sets whose statistic is greater than or equal that of the original data. Using a Poisson model for negative binomial data still fits the mean well, with a posterior \(p\)-value of 0.49. In Stan terms, it is extracted as the posterior mean of the indicator variable mean_gt.

+

The standard deviation statistic tells a different story.

+
+
+

Scatterplot of standard deviations of replicated data sets; the vertical red line is at standard deviation of original data.

+
Scatterplot of standard deviations of replicated data sets; the vertical red line is at standard deviation of original data.
+
+
+

Here, the original data has much higher standard deviation than any of the replicated data sets. The resulting \(p\)-value estimated by Stan after a large number of iterations is exactly zero (the absolute error bounds are fine, but a lot of iterations are required to get good relative error bounds on small \(p\)-values by sampling). In other words, there were no posterior draws in which the replicated data set had a standard deviation greater than or equal to that of the original data set. Clearly, the model is not capturing the dispersion of the original data. The point of this exercise isn’t just to figure out that there’s a problem with a model, but to isolate where it is. Seeing that the data is over-dispersed compared to the Poisson model would be reason to fit a more general model like the negative binomial or a latent varying effects (aka random effects) model that can account for the over-dispersion.

+
+

Which statistics to test?

+

Any statistic may be used for the data, but these can be guided by the quantities of interest in the model itself. Popular choices in addition to mean and standard deviation are quantiles, such as the median, 5% or 95% quantiles, or even the maximum or minimum value to test extremes.

+

Despite the range of choices, test statistics should ideally be ancillary, in the sense that they should be testing something other than the fit of a parameter. For example, a simple normal model of a data set will typically fit the mean and variance of the data quite well as long as the prior doesn’t dominate the posterior. In contrast, a Poisson model of the same data cannot capture both the mean and the variance of a data set if they are different, so they bear checking in the Poisson case. As we saw with the Poisson case, the posterior mean for the single rate parameter was located near the data mean, not the data variance. Other distributions such as the lognormal and gamma distribution, have means and variances that are functions of two or more parameters.

+
+
+
+

Prior predictive checks

+

Prior predictive checks generate data according to the prior in order to asses whether a prior is appropriate (Gabry et al. 2019). A posterior predictive check generates replicated data according to the posterior predictive distribution. In contrast, the prior predictive check generates data according to the prior predictive distribution, \[ +y^{\textrm{sim}} \sim p(y). +\] The prior predictive distribution is just like the posterior predictive distribution with no observed data, so that a prior predictive check is nothing more than the limiting case of a posterior predictive check with no data.

+

This is easy to carry out mechanically by simulating parameters \[ +\theta^{\textrm{sim}} \sim p(\theta) +\] according to the priors, then simulating data \[ +y^{\textrm{sim}} \sim p(y \mid \theta^{\textrm{sim}}) +\] according to the data model given the simulated parameters. The result is a simulation from the joint distribution, \[ +(y^{\textrm{sim}}, \theta^{\textrm{sim}}) \sim p(y, \theta) +\] and thus \[ +y^{\textrm{sim}} \sim p(y) +\] is a simulation from the prior predictive distribution.

+
+

Coding prior predictive checks in Stan

+

A prior predictive check is coded just like a posterior predictive check. If a posterior predictive check has already been coded and it’s possible to set the data to be empty, then no additional coding is necessary. The disadvantage to coding prior predictive checks as posterior predictive checks with no data is that Markov chain Monte Carlo will be used to sample the parameters, which is less efficient than taking independent draws using random number generation.

+

Prior predictive checks can be coded entirely within the generated quantities block using random number generation. The resulting draws will be independent. Predictors must be read in from the actual data set—they do not have a generative model from which to be simulated. For a Poisson regression, prior predictive sampling can be encoded as the following complete Stan program.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+}
+generated quantities {
+  real alpha = normal_rng(0, 1);
+  real beta = normal_rng(0, 1);
+  array[N] real y_sim = poisson_log_rng(alpha + beta * x);
+}
+

Running this program using Stan’s fixed-parameter sampler yields draws from the prior. These may be plotted to consider their appropriateness.

+
+
+
+

Example of prior predictive checks

+

Suppose we have a model for a football (aka soccer) league where there are \(J\) teams. Each team has a scoring rate \(\lambda_j\) and in each game will be assumed to score \(\textrm{poisson}(\lambda_j)\) points. Yes, this model completely ignores defense. Suppose the modeler does not want to “put their thumb on the scale” and would rather “let the data speak for themselves” and so uses a prior with very wide tails, because it seems uninformative, such as the widely deployed \[ +\lambda_j \sim \textrm{gamma}(\epsilon_1, \epsilon_2). +\] This is not just a manufactured example; The BUGS Book recommends setting \(\epsilon = (0.5, 0.00001)\), which corresponds to a Jeffreys prior for a Poisson rate parameter prior (Lunn et al. 2012, 85).

+

Suppose the league plays a round-robin tournament wherein every team plays every other team. The following Stan model generates random team abilities and the results of such a round-robin tournament, which may be used to perform prior predictive checks.

+
data {
+  int<lower=0> J;
+  array[2] real<lower=0> epsilon;
+}
+generated quantities {
+  array[J] real<lower=0> lambda;
+  array[J, J] int y;
+  for (j in 1:J) lambda[j] = gamma_rng(epsilon[1], epsilon[2]);
+  for (i in 1:J) {
+    for (j in 1:J) {
+      y[i, j] = poisson_rng(lambda[i]) - poisson_rng(lambda[j]);
+    }
+  }
+}
+

In this simulation, teams play each other twice and play themselves once. This could be made more realistic by controlling the combinatorics to only generate a single result for each pair of teams, of which there are \(\binom{J}{2} = \frac{J \cdot (J - 1)}{2}.\)

+

Using the \(\textrm{gamma}(0.5, 0.00001)\) reference prior on team abilities, the following are the first 20 simulated point differences for the match between the first two teams, \(y^{(1:20)}_{1, 2}\).

+
2597 -26000   5725  22496   1270   1072   4502  -2809   -302   4987
+7513   7527  -3268 -12374   3828   -158 -29889   2986  -1392     66
+

That’s some pretty highly scoring football games being simulated; all but one has a score differential greater than 100! In other words, this \(\textrm{gamma}(0.5, 0.00001)\) prior is putting around 95% of its weight on score differentials above 100. Given that two teams combined rarely score 10 points, this prior is way out of line with prior knowledge about football matches; it is not only consistent with outcomes that have never occurred in the history of the sport, it puts most of the prior probability mass there.

+

The posterior predictive distribution can be strongly affected by the prior when there is not much observed data and substantial prior mass is concentrated around infeasible values (A. Gelman 2006).

+

Just as with posterior predictive distributions, any statistics of the generated data may be evaluated. Here, the focus was on score difference between a single pair of teams, but it could’ve been on maximums, minimums, averages, variances, etc.

+

In this textbook example, the prior is univariate and directly related to the expected number of points scored, and could thus be directly inspected for consistency with prior knowledge about scoring rates in football. There will not be the same kind of direct connection when the prior and data model distributions are multivariate. In these more challenging situations, prior predictive checks are an easy way to get a handle on the implications of a prior in terms of what it says the data is going to look like; for a more complex application involving spatially heterogeneous air pollution concentration, see (Gabry et al. 2019).

+

Prior predictive checks can also be compared with the data, but one should not expect them to be calibrated in the same way as posterior predictive checks. That would require guessing the posterior and encoding it in the prior. The goal is make sure the prior is not so wide that it will pull probability mass away from feasible values.

+
+
+

Mixed predictive replication for hierarchical models

+

Andrew Gelman, Meng, and Stern (1996) discuss the case of mixed replication for hierarchical models in which the hyperparameters remain fixed, but varying effects are replicated. This is neither a purely prior nor purely posterior predictive check, but falls somewhere in between.

+

For example, consider a simple varying intercept logistic regression, with intercepts \(\alpha_k\) for \(k \in 1:K\). Each data item \(y_n \in \{ 0, 1 \}\) is assumed to correspond to group \(kk_n \in 1:K.\) The data model is thus \[ +y_n \sim \textrm{bernoulli}(\textrm{logit}^{-1}(\alpha_{kk[n]})). +\] The varying intercepts have a hierarchical normal prior, \[ +\alpha_k \sim \textrm{normal}(\mu, \sigma). +\] The hyperparameters are themselves given weakly informative priors, \[\begin{eqnarray*} +\mu & \sim & \textrm{normal}(0, 2) +\\[4pt] +\sigma & \sim & \textrm{lognormal}(0, 1). +\end{eqnarray*}\]

+

Like in a posterior predictive check, the hyperparameters \(\mu\) and \(\sigma\) are drawn from the posterior, \[ +\mu^{(m)}, \sigma^{(m)} \sim p(\mu, \sigma \mid y) +\] Like in a prior predictive check, replicated values of \(\alpha\) are drawn from the hyperparameters, \[ +\alpha^{\textrm{rep}(m)}_k \sim \textrm{normal}(\alpha_k \mid +\mu^{(m)}, \sigma^{(m)}). +\] The data items are then each replicated using the replicated intercepts, \[ +y^{\textrm{rep}(m)}_n \sim +\textrm{bernoulli} + (\textrm{logit}^{-1}(\alpha^{\textrm{rep}(m)}_{kk[n]})). +\] Thus the \(y^{\textrm{rep}(m)}\) can be seen as a kind of posterior predictive replication of observations from new groups that were not among the original \(K\) groups.

+

In Stan, mixed predictive replications \(y^{\textrm{rep}(m)}\) can be programmed directly.

+
data {
+  int<lower=0> K;
+  int<lower=0> N;
+  array[N] int<lower=1, upper=K> kk;
+  array[N] int<lower=0, upper=1> y;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+  vector<offset=mu, multiplier=sigma>[K] alpha;
+}
+model {
+  mu ~ normal(0, 2);               // hyperprior
+  sigma ~ lognormal(0, 1);
+  alpha ~ normal(mu, sigma);       // hierarchical prior
+  y ~ bernoulli_logit(alpha[kk]);  // data model
+}
+generated quantities {
+  // alpha replicated;  mu and sigma not replicated
+  array[K] real alpha_rep
+    = normal_rng(rep_vector(mu, K), sigma);
+  array[N] int<lower=0, upper=1> y_rep
+    = bernoulli_logit_rng(alpha_rep[kk]);
+}
+
+
+

Joint model representation

+

Following Andrew Gelman, Meng, and Stern (1996), prior, posterior, and mixed replications may all be defined as posteriors from joint models over parameters and observed and replicated data.

+
+

Posterior predictive model

+

For example, posterior predictive replication may be formulated using distribution notation as follows. \[\begin{eqnarray*} +\theta & \sim & p(\theta) +\\[2pt] +y & \sim & p(y \mid \theta) +\\[2pt] +y^{\textrm{rep}} & \sim & p(y \mid \theta) +\end{eqnarray*}\] The heavily overloaded distribution notation is meant to indicate that both \(y\) and \(y^{\textrm{rep}}\) are drawn from the same distribution, or more formally using capital letters to distinguish random variables, that the conditional densities \(p_{Y^{\textrm{rep}} \mid +\Theta}\) and \(p_{Y \mid \Theta}\) are the same.

+

The joint density is \[ +p(\theta, y, y^{\textrm{rep}}) += p(\theta) \cdot p(y \mid \theta) \cdot p(y^{\textrm{rep}} \mid \theta). +\] This again is assuming that the two distributions for \(y\) and \(y^{\textrm{rep}}\) are identical.

+

The variable \(y\) is observed, with the predictive simulation \(y^{\textrm{rep}}\) and parameter vector \(\theta\) not observed. The posterior is \(p(y^{\textrm{rep}}, \theta \mid y)\). Given draws from the posterior, the posterior predictive simulations \(y^{\textrm{rep}}\) are retained.

+
+
+

Prior predictive model

+

The prior predictive model simply drops the data component of the posterior predictive model. \[\begin{eqnarray*} +\theta & \sim & p(\theta) +\\[2pt] +y^{\textrm{rep}} & \sim & p(y \mid \theta) +\end{eqnarray*}\] This corresponds to the joint density \[ +p(\theta, y^{\textrm{rep}}) = p(\theta) \cdot p(y^{\textrm{rep}} \mid +\theta). +\]

+

It is typically straightforward to draw \(\theta\) from the prior and \(y^{\textrm{rep}}\) from the data model given \(\theta\) efficiently. In cases where it is not, the model may be coded and executed just as the posterior predictive model, only with no data.

+
+
+

Mixed replication for hierarchical models

+

The mixed replication corresponds to the model \[\begin{eqnarray*} +\phi & \sim & p(\phi) +\\[2pt] +\alpha & \sim & p(\alpha \mid \phi) +\\[2pt] +y & \sim & p(y \mid \alpha) +\\[2pt] +\alpha^{\textrm{rep}} & \sim & p(\alpha \mid \phi) +\\[2pt] +y^{\textrm{rep}} & \sim & p(y \mid \phi) +\end{eqnarray*}\] The notation here is meant to indicate that \(\alpha\) and \(\alpha^{\textrm{rep}}\) have identical distributions, as do \(y\) and \(y^{\textrm{rep}}\).

+

This corresponds to a joint model \[ +p(\phi, \alpha, \alpha^{\textrm{rep}}, y, y^{\textrm{rep}}) += +p(\phi) +\cdot p(\alpha \mid \phi) +\cdot p(y \mid \alpha) +\cdot p(\alpha^{\textrm{rep}} \mid \phi) +\cdot p(y^{\textrm{rep}} \mid \alpha^{\textrm{rep}}), +\] where \(y\) is the only observed variable, \(\alpha\) contains the lower-level parameters and \(\phi\) the hyperparameters. Note that \(\phi\) is not replicated and instead appears in the distribution for both \(\alpha\) and \(\alpha^{\textrm{rep}}\).

+

The posterior is \(p(\phi, \alpha, \alpha^{\textrm{rep}}, +y^{\textrm{rep}} \mid y)\). From posterior draws, the posterior predictive simulations \(y^{\textrm{rep}}\) are kept.

+ + + +
+
+
+ + Back to top

References

+
+Bayarri, MJ, and James O Berger. 2000. “P Values for Composite Null Models.” Journal of the American Statistical Association 95 (452): 1127–42. +
+
+Gabry, Jonah, Daniel Simpson, Aki Vehtari, Michael Betancourt, and Andrew Gelman. 2019. “Visualization in Bayesian Workflow.” Journal of the Royal Statistical Society: Series A (Statistics in Society) 182 (2): 389–402. +
+
+Gelman, A. 2006. “Prior Distributions for Variance Parameters in Hierarchical Models.” Bayesian Analysis 1 (3): 515–34. +
+
+Gelman, Andrew, Xiao-Li Meng, and Hal Stern. 1996. “Posterior Predictive Assessment of Model Fitness via Realized Discrepancies.” Statistica Sinica, 733–60. +
+
+Lunn, David, Christopher Jackson, Nicky Best, Andrew Thomas, and David Spiegelhalter. 2012. The BUGS Book: A Practical Introduction to Bayesian Analysis. CRC Press/Chapman & Hall. +
+
+Rubin, Donald B. 1984. “Bayesianly Justifiable and Relevant Frequency Calculations for the Applied Statistician.” The Annals of Statistics, 1151–72. +
+
+ + +
+ + + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/poststratification.html b/docs/2_39/stan-users-guide/poststratification.html new file mode 100644 index 000000000..179eaca67 --- /dev/null +++ b/docs/2_39/stan-users-guide/poststratification.html @@ -0,0 +1,1511 @@ + + + + + + + + + +Poststratification + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Poststratification

+

Stratification is a technique developed for survey sampling in which a population is partitioned into subgroups (i.e., stratified) and each group (i.e., stratum) is sampled independently. If the subgroups are more homogeneous (i.e., lower variance) than the population as a whole, this can reduce variance in the estimate of a quantity of interest at the population level.

+

Poststratification is a technique for adjusting a non-representative sample (i.e., a convenience sample or other observational data) for which there are demographic predictors characterizing the strata. It is carried out after a model is fit to the observed data, hence the name poststratification (Little 1993). Poststratification can be fruitfully combined with regression modeling (or more general parametric modeling), which provides estimates based on combinations of predictors (or general parameters) rather than raw counts in each stratum. Multilevel modeling is useful in determining how much partial pooling to apply in the regressions, leading to the popularity of the combination of multilevel regression and poststratification (MRP) (Park, Gelman, and Bafumi 2004).

+
+

Some examples

+
+

Earth science

+

Stratification and poststratification can be applied to many applications beyond survey sampling (Kennedy and Gelman 2019). For example, large-scale whole-earth soil-carbon models are fit with parametric models of how soil-carbon depends on features of an area such as soil composition, flora, fauna, temperature, humidity, etc. Given a model that predicts soil-carbon concentration given these features, a whole-earth model can be created by stratifying the earth into a grid of say 10km by 10km “squares” (they can’t literally be square because the earth’s surface is topologically a sphere). Each grid area has an estimated makeup of soil type, forestation, climate, etc. The global level of soil carbon is then estimated using poststratification by simply summing the expected soil carbon estimated for each square in the grid (Paustian et al. 1997). Dynamic models can then be constructed by layering a time-series component, varying the poststratification predictors over time, or both (Field et al. 1998).

+
+
+

Polling

+

Suppose a university’s administration would like to estimate the support for a given proposal among its students. A poll is carried out in which 490 respondents are undergraduates, 112 are graduate students, and 47 are continuing education students. Now suppose that support for the issue among the poll respondents is is 25% among undergraduate students (subgroup 1), 40% among graduate students (subgroup 2), and 80% among continuing education students (subgroup 3). Now suppose that the student body is made up of 20,000 undergraduates, 5,000 graduate students, and 2,000 continuing education students. It is important that our subgroups are exclusive and exhaustive, i.e., they form a partition of the population.

+

The proportion of support in the poll among students in each group provides a simple maximum likelihood estimate \(\theta^* = (0.25, 0.5, +0.8)\) of support in each group for a simple Bernoulli model where student \(n\)’s vote is modeled as \[ +y_n \sim \textrm{bernoulli}(\theta_{jj[n]}), +\] where \(jj[n] \in 1:3\) is the subgroup to which the \(n\)-th student belongs.

+

An estimate of the population prevalence of support for the issue among students can be constructed by simply multiplying estimated support in each group by the size of each group. Letting \(N = (20\,000,\, 5\,000,\, 2\,000)\) be the subgroup sizes, the poststratified estimate of support in the population \(\phi^*\) is estimated by \[ +\phi^* += \frac{\displaystyle \sum_{j = 1}^3 \theta_j^* \cdot N_j} + {\displaystyle \sum_{j = 1}^3 N_j}. +\] Plugging in our estimates and population counts yields \[\begin{eqnarray*} +\phi* +& = & \frac{0.25 \cdot 20\,000 + 0.4 \cdot 5\,000 + 0.8 \cdot 2\,000} + {20\,000 + 5\,000 + 2\,000} +\\[4pt] & = & \frac{8\,600}{27\,000} +\\[4pt] & \approx & 0.32. +\end{eqnarray*}\]

+
+
+
+

Bayesian poststratification

+

Considering the same polling data from the previous section in a Bayesian setting, the uncertainty in the estimation of subgroup support is pushed through predictive inference in order to get some idea of the uncertainty of estimated support. Continuing the example of the previous section, the data model remains the same, \[ +y_n \sim \textrm{bernoulli}(\theta_{jj[n]}), +\] where \(jj[n] \in 1:J\) is the group to which item \(n\) belongs and \(\theta_j\) is the proportion of support in group \(j\).

+

This can be reformulated from a Bernoulli model to a binomial model in the usual way. Letting \(A_j\) be the number of respondents in group \(j\) and \(a_j\) be the number of positive responses in group \(j\), the data model may be reduced to the form \[ +a_j \sim \textrm{binomial}(A_j, \theta_j). +\] A simple uniform prior on the proportion of support in each group completes the model, \[ +\theta_j \sim \textrm{beta}(1, 1). +\] A more informative prior could be used if there is prior information available about support among the student body.

+

Using sampling, draws \(\theta^{(m)} \sim p(\theta \mid y)\) from the posterior may be combined with the population sizes \(N\) to estimate \(\phi\), the proportion of support in the population, \[ +\phi^{(m)} += \frac{\displaystyle \sum_{j = 1}^J \theta_j^{(m)} \cdot N_j} + {\displaystyle \sum_{j = 1}^J N_j}. +\] The posterior draws for \(\phi^{(m)}\) characterize expected support for the issue in the entire population. These draws may be used to estimate expected support (the average of the \(\phi^{(m)}\)), posterior intervals (quantiles of the \(\phi^{(m)}\)), or to plot a histogram.

+
+
+

Poststratification in Stan

+

The maximum likelihood and Bayesian estimates can be handled with the same Stan program. The model of individual votes is collapsed to a binomial, where \(A_j\) is the number of voters from group \(j\), \(a_j\) is the number of positive responses from group \(j\), and \(N_j\) is the size of group \(j\) in the population.

+
data {
+  int<lower=1> J;
+  array[J] int<lower=0> A; 
+  array[J] int<lower=0> a;
+  vector<lower=0>[J] N;
+}
+parameters {
+  vector<lower=0, upper=1>[J] theta;
+}
+model {
+  a ~ binomial(A, theta);
+}
+generated quantities {t
+  real<lower=0, upper=1> phi = dot(N, theta) / sum(N);
+}
+

The binomial distribution statement is vectorized, and implicitly generates the joint likelihood for the \(J\) terms. The prior is implicitly uniform on \((0, 1),\) the support of \(\theta.\) The summation is computed using a dot product and the sum function, which is why N was declared as a vector rather than as an array of integers.

+
+
+

Regression and poststratification

+

In applications to polling, there are often numerous demographic features like age, gender, income, education, state of residence, etc. If each of these demographic features induces a partition on the population, then their product also induces a partition on the population. Often sources such as the census have matching (or at least matchable) demographic data; otherwise it must be estimated.

+

The problem facing poststratification by demographic feature is that the number of strata increases exponentially as a function of the number of features. For instance, 4 age brackets, 2 sexes, 5 income brackets, and 50 states of residence leads to \(5 \cdot 2 \cdot 5 \cdot +50 = 2000\) strata. Adding another 5-way distinction, say for education level, leads to 10,000 strata. A simple model like the one in the previous section that takes an independent parameter \(\theta_j\) for support in each stratum is unworkable in that many groups will have zero respondents and almost all groups will have very few respondents.

+

A practical approach to overcoming the problem of low data size per stratum is to use a regression model. Each demographic feature will require a regression coefficient for each of its subgroups, but now the parameters add to rather than multiply the total number of parameters. For example, with 4 age brackets, 2 sexes, 5 income brackets, and 50 states of residence, there are only \(4 + 2 + 5 + 50 = +61\) regression coefficients to estimate. Now suppose that item \(n\) has demographic features \(\textrm{age}_n \in 1:5\), \(\textrm{sex}_n \in 1:2\), \(\textrm{income}_n \in 1:5,\) and \(\textrm{state}_n \in 1:50\). A logistic regression may be formulated as \[ +y_n \sim +\textrm{bernoulli}(\textrm{logit}^{-1}( +\alpha + \beta_{\textrm{age}[n]} ++ \gamma_{\textrm{sex}[n]} ++ \delta_{\textrm{income}[n]} ++ \epsilon_{\textrm{state}[n]} +)), +\] where \(\textrm{age}[n]\) is the age of the \(n\)-th respondent, \(\textrm{sex}[n]\) is their sex, \(\textrm{income}[n]\) their income and \(\textrm{state}[n]\) their state of residence. These coefficients can be assigned priors, resulting in a Bayesian regression model.

+

To poststratify the results, the population size for each combination of predictors must still be known. Then the population estimate is constructed as \[ +\sum_{i = 1}^5 \sum_{j = 1}^2 \sum_{k = 1}^5 \sum_{m = 1}^{50} +\textrm{logit}^{-1}(\alpha + \beta_i + \gamma_j + \delta_k + \eta_m) +\cdot \textrm{pop}_{i, j, k, m}, +\] where \(\textrm{pop}_{i, j, k, m}\) is the size of the subpopulation with age \(i\), sex \(j\), income level \(k\), and state of residence \(m\).

+

As formulated, it should be clear that any kind of prediction could be used as a basis for poststratification. For example, a Gaussian process or neural network could be used to produce a non-parametric model of outcomes \(y\) given predictors \(x\).

+
+
+

Multilevel regression and poststratification

+

With large numbers of demographic features, each cell may have very few items in it with which to estimate regression coefficients. For example, even in a national-level poll of 10,000 respondents, if they are divided by the 50 states, that’s only 200 respondents per state on average. When data sizes are small, parameter estimation can be stabilized and sharpened by providing hierarchical priors. With hierarchical priors, the data determines the amount of partial pooling among the groups. The only drawback is that if the number of groups is small, it can be hard to fit these models without strong hyperpriors.

+

The model introduced in the previous section had the data model \[ +y_n \sim +\textrm{bernoulli}(\textrm{logit}^{-1}( +\alpha + \beta_{\textrm{age}[n]} ++ \gamma_{\textrm{sex}[n]} ++ \delta_{\textrm{income}[n]} ++ \epsilon_{\textrm{state}[n]} +)). +\] The overall intercept can be given a broad fixed prior, \[ +\alpha \sim \textrm{normal}(0, 5). +\] The other regression parameters can be given hierarchical priors, \[\begin{eqnarray*} +\beta_{1:4} & \sim & \textrm{normal}(0, \sigma^{\beta}) +\\[2pt] +\gamma_{1:2} & \sim & \textrm{normal}(0, \sigma^{\gamma}) +\\[2pt] +\delta_{1:5} & \sim & \textrm{normal}(0, \sigma^{\delta}) +\\[2pt] +\epsilon_{1:50} & \sim & \textrm{normal}(0, \sigma^{\epsilon}). +\end{eqnarray*}\]

+

The hyperparameters for scale of variation within a group can be given simple standard hyperpriors, \[ +\sigma^{\beta}, \sigma^{\gamma}, \sigma^{\delta}, \sigma^{\epsilon} +\sim \textrm{normal}(0, 1). +\] The scales of these fixed hyperpriors need to be determined on a problem-by-problem basis, though ideally they will be close to standard (mean zero, unit variance).

+
+

Dealing with small partitions and non-identifiability

+

The multilevel structure of the models used for multilevel regression and poststratification consist of a sum of intercepts that vary by demographic feature. This immediately introduces non-identifiability. A constant added to each state coefficient and subtracted from each age coefficient leads to exactly the same likelihood.

+

This is non-identifiability that is only mitigated by the (hierarchical) priors. When demographic partitions are small, as they are with several categories in the example, it can be more computationally tractable to enforce a sum-to-zero constraint on the coefficients. Other values than zero will by necessity be absorbed into the intercept, which is why it typically gets a broader prior even with standardized data. With a sum to zero constraint, coefficients for binary features will be negations of each other. For example, because there are only two sex categories, \(\gamma_2 = +-\gamma_1.\)

+

To implement sum-to-zero constraints,

+
parameters {
+  vector[K - 1] alpha_raw;
+// ...
+}
+transformed parameters {
+  vector<multiplier=sigma_alpha>[K] alpha
+    = append_row(alpha_raw, -sum(alpha_raw));
+// ...    
+}
+model {
+  alpha ~ normal(0, sigma_alpha);
+}
+

This prior is hard to interpret in that there are K normal distributions, but only K - 1 free parameters. An alternative is to put the prior only on alpha_raw, but that is also difficult to interpret.

+

Soft constraints can be more computationally tractable. They are also simpler to implement.

+
parameters {
+  vector<multiplier=alpha>[K] alpha;
+// ...
+}
+model {
+  alpha ~ normal(0, sigma_alpha);
+  sum(alpha) ~ normal(0, 0.001);
+}
+

This leaves the regular prior, but adds a second prior that concentrates the sum near zero. The scale of the second prior will need to be established on a problem and data-set specific basis so that it doesn’t shrink the estimates beyond the shrinkage of the hierarchical scale parameters.

+

Note that in the hierarchical model, the values of the coefficients when there are only two coefficients should be the same absolute value but opposite signs. Any other difference could be combined into the overall intercept \(\alpha.\) Even with a wide prior on the intercept, the hyperprior on \(\sigma^{\gamma}\) may not be strong enough to enforce that, leading to a weak form non-identifiability in the posterior. Enforcing a (hard or soft) sum-to-zero constraint can help mitigate non-identifiability. Whatever prior is chosen, prior predictive checks can help diagnose problems with it.

+

None of this work to manage identifiability in multilevel regressions has anything to do with the poststratification; it’s just required to fit a large multilevel regression with multiple discrete categories. Having multiple intercepts always leads to weak non-identifiability, even with the priors on the intercepts all centered at zero.

+
+
+
+

Coding MRP in Stan

+

Multilevel regression and poststratification can be coded directly in Stan. To code the non-centered parameterization for each coefficient, which will be required for sampling efficiency, the multiplier transform is used on each of the parameters. The combination of

+
vector<multiplier=s>[K] a;
+// ...
+a ~ normal(0, s);
+

implements a non-centered parameterization for a; a centered parameterization would drop the multiplier specification. The prior scale s is being centered here. The prior location is fixed to zero in multilevel regressions because there is an overall intercept; introducing a location parameters in the prior would introduce non-identifiability with the overall intercept. The centered parameterization drops the multiplier.

+

Here is the full Stan model, which performs poststratification in the generated quantities using population sizes made available through data variable P.

+
data {
+  int<lower=0> N;
+  array[N] int<lower=1, upper=4> age;
+  array[N] int<lower=1, upper=5> income;
+  array[N] int<lower=1, upper=50> state;
+  array[N] int<lower=0> y;
+  array[4, 5, 50] int<lower=0> P;
+}
+parameters {
+  real alpha;
+  real<lower=0> sigma_beta;
+  vector<multiplier=sigma_beta>[4] beta;
+  real<lower=0> sigma_gamma;
+  vector<multiplier=sigma_gamma>[5] gamma;
+  real<lower=0> sigma_delta;
+  vector<multiplier=sigma_delta>[50] delta;
+}
+model {
+  y ~ bernoulli_logit(alpha + beta[age] + gamma[income] + delta[state]);
+  alpha ~ normal(0, 2);
+  beta ~ normal(0, sigma_beta);
+  gamma ~ normal(0, sigma_gamma);
+  delta ~ normal(0, sigma_delta);
+  { sigma_beta, sigma_gamma, sigma_delta } ~ normal(0, 1);
+}
+generated quantities {
+  real expect_pos = 0;
+  int total = 0;
+  for (b in 1:4) {
+    for (c in 1:5) {
+      for (d in 1:50) {
+        total += P[b, c, d];
+        expect_pos
+          += P[b, c, d]
+             * inv_logit(alpha + beta[b] + gamma[c] + delta[d]);
+      }
+    }
+  }
+  real<lower=0, upper=1> phi = expect_pos / total;
+}
+

Unlike in posterior predictive inference aimed at uncertainty, there is no need to introduce binomial sampling uncertainty into the estimate of expected positive votes. Instead, generated quantities are computed as expectations. In general, it is more efficient to work in expectation if possible (the Rao-Blackwell theorem says it’s at least as efficient to work in expectation, but in practice, it can be much much more efficient, especially for discrete quantities).

+
+

Binomial coding

+

In some cases, it can be more efficient to break the data down by group. Suppose there are \(4 \times 5 \times 2 \times 50 = 2000\) groups. The data can be broken down into a size-2000 array, with entries corresponding to total vote counts in that group

+
int<lower=0> G;
+array[G] int<lower=1, upper=4> age;
+array[G] int<lower=1, upper=5> income;
+array[G] int<lower=1, upper=50> state;
+

Then the number of positive votes and the number of total votes are collected into two parallel arrays indexed by group.

+
array[G] int<lower=0> pos_votes;
+array[G] int<lower=0> total_votes;
+

Finally, the data model is converted to binomial.

+
pos_votes ~ binomial_logit(total_votes,
+                           alpha + beta[age] + ...);
+

The predictors look the same because of the way the age and other data items are coded.

+
+
+

Coding binary groups

+

In this first model, sex is not included as a predictor. With only two categories, it needs to be modeled separately, because it is not feasible to build a hierarchical model with only two cases. A sex predictor is straightforward to add to the data block; it takes on values 1 or 2 for each of the N data points.

+
  array[N] int<lower=1, upper=2> sex;
+

Then add a single regression coefficient as a parameter,

+
  real epsilon;
+

In the log odds calculation, introduce a new term

+
[epsilon, -epsilon][sex]';
+

That is, the data model will now look like

+
  y ~ bernoulli_logit(alpha + beta[age] + gamma[income] + delta[state]
+                      + [epsilon, -epsilon][sex]');
+

For data point n, the expression [epsilon, -epsilon][sex] takes on value [epsilon, -epsilon][sex][n], which with Stan’s multi-indexing reduces to [epsilon, -epsilon][sex[n]]. This term evaluates to epsilon if sex[n] is 1 and to -epsilon if sex[n] is 2. The result is effectively a sum-to-zero constraint on two sex coefficients. The ' at the end transposes [epsilon, -epsilon][sex] which is a row_vector into a vector that can be added to the other variables.

+

Finally, a prior is needed for the coefficient in the model block,

+
epsilon ~ normal(0, 2);
+

As with other priors in multilevel models, the posterior for epsilon should be investigated to make sure it is not unrealistically wide.

+
+
+
+

Adding group-level predictors

+

If there are group-level predictors, such as average income in a state, or vote share in a previous election, these may be used as predictors in the regression. They will not pose an obstacle to poststratification because they are at the group level. For example, suppose the average income level in the state is available as the data variable

+
array[50] real<lower=0> income;
+

then a regression coefficient psi can be added for the effect of average state income,

+
real psi;
+

with a fixed prior,

+
psi ~ normal(0, 2);
+

This prior assumes the income predictor has been standardized. Finally, a term is added to the regression for the fixed predictor,

+
y ~ bernoulli_logit(alpha + beta[age] + ... + delta[state]
+                    + income[state] * psi);
+

And finally, the formula in the generated quantities block is also updated,

+
expect_pos
+  += P[b, c, d]
+     * inv_logit(alpha + beta[b] + gamma[c] + delta[d]
+             + income[d] * psi);
+

Here d is the loop variable looping over states. This ensures that the poststratification formula matches the model formula.

+ + + +
+
+ + Back to top

References

+
+Field, Christopher B, Michael J Behrenfeld, James T Randerson, and Paul Falkowski. 1998. “Primary Production of the Biosphere: Integrating Terrestrial and Oceanic Components.” Science 281 (5374): 237–40. +
+
+Kennedy, Lauren, and Andrew Gelman. 2019. “Know Your Population and Know Your Model: Using Model-Based Regression and Poststratification to Generalize Findings Beyond the Observed Sample.” arXiv, no. 1906.11323. +
+
+Little, Roderick JA. 1993. “Post-Stratification: A Modeler’s Perspective.” Journal of the American Statistical Association 88 (423): 1001–12. +
+
+Park, David K, Andrew Gelman, and Joseph Bafumi. 2004. “Bayesian Multilevel Estimation with Poststratification: State-Level Estimates from National Polls.” Political Analysis 12 (4): 375–85. +
+
+Paustian, Keith, Elissa Levine, Wilfred M Post, and Irene M Ryzhova. 1997. “The Use of Models to Integrate Information and Understanding of Soil C at the Regional Scale.” Geoderma 79 (1-4): 227–60. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/problematic-posteriors.html b/docs/2_39/stan-users-guide/problematic-posteriors.html new file mode 100644 index 000000000..651f38493 --- /dev/null +++ b/docs/2_39/stan-users-guide/problematic-posteriors.html @@ -0,0 +1,1619 @@ + + + + + + + + + +Problematic Posteriors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Problematic Posteriors

+

Mathematically speaking, with a proper posterior, one can do Bayesian inference and that’s that. There is not even a need to require a finite variance or even a finite mean—all that’s needed is a finite integral. Nevertheless, modeling is a tricky business and even experienced modelers sometimes code models that lead to improper priors. Furthermore, some posteriors are mathematically sound, but ill-behaved in practice. This chapter discusses issues in models that create problematic posterior inferences, either in general for Bayesian inference or in practice for Stan.

+
+

Collinearity of predictors in regressions

+

This section discusses problems related to the classical notion of identifiability, which lead to ridges in the posterior density and wreak havoc with both sampling and inference.

+
+

Examples of collinearity

+
+

Redundant intercepts

+

The first example of collinearity is an artificial example involving redundant intercept parameters.1

+

Suppose there are observations \(y_n\) for \(n \in \{1,\dotsc,N\}\), two intercept parameters \(\lambda_1\) and \(\lambda_2\), a scale parameter \(\sigma > 0\), and the data model \[ +y_n \sim \textsf{normal}(\lambda_1 + \lambda_2, \sigma). +\]

+

For any constant \(q\), the sampling density for \(y\) does not change if we add \(q\) to \(\lambda_1\) and subtract it from \(\lambda_2\), i.e., \[ +p(y \mid \lambda_1, \lambda_2,\sigma) += +p(y \mid \lambda_1 + q, \lambda_2 - q, \sigma). +\]

+

The consequence is that an improper uniform prior \(p(\mu,\sigma) +\propto 1\) leads to an improper posterior. This impropriety arises because the neighborhoods around \(\lambda_1 + q, \lambda_2 - q\) have the same mass no matter what \(q\) is. Therefore, a sampler would need to spend as much time in the neighborhood of \(\lambda_1=1\,000\,000\,000\) and \(\lambda_2=-1\,000\,000\,000\) as it does in the neighborhood of \(\lambda_1=0\) and \(\lambda_2=0\), and so on for ever more far-ranging values.

+

The marginal posterior \(p(\lambda_1,\lambda_2 \mid y)\) for this model is thus improper.2

+

The impropriety shows up visually as a ridge in the posterior density, as illustrated in the left-hand plot. The ridge for this model is along the line where \(\lambda_2 = \lambda_1 + c\) for some constant \(c\).

+

Contrast this model with a simple regression with a single intercept parameter \(\mu\) and data model \[ +y_n \sim \textsf{normal}(\mu,\sigma). +\] Even with an improper prior, the posterior is proper as long as there are at least two data points \(y_n\) with distinct values.

+
+
+

Ability and difficulty in IRT models

+

Consider an item-response theory model for students \(j \in 1{:}J\) with abilities \(\alpha_j\) and test items \(i \in 1{:}I\) with difficulties \(\beta_i\). The observed data are an \(I \times J\) array with entries \(y_{i, j} \in \{ 0, 1 \}\) coded such that \(y_{i, j} = 1\) indicates that student \(j\) answered question \(i\) correctly. The sampling distribution for the data is \[ +y_{i, j} \sim \textsf{Bernoulli}(\operatorname{logit}^{-1}(\alpha_j - \beta_i)). +\]

+

For any constant \(c\), the probability of \(y\) is unchanged by adding a constant \(c\) to all the abilities and subtracting it from all the difficulties, i.e., \[ +p(y \mid \alpha, \beta) += +p(y \mid \alpha + c, \beta - c). +\]

+

This leads to a multivariate version of the ridge displayed by the regression with two intercepts discussed above.

+
+
+

General collinear regression predictors

+

The general form of the collinearity problem arises when predictors for a regression are collinear. For example, consider a linear regression data model \[ +y_n \sim \textsf{normal}(x_n \beta, \sigma) +\] for an \(N\)-dimensional observation vector \(y\), an \(N \times K\) predictor matrix \(x\), and a \(K\)-dimensional coefficient vector \(\beta\).

+

Now suppose that column \(k\) of the predictor matrix is a multiple of column \(k'\), i.e., there is some constant \(c\) such that \(x_{n,k} = c +\, x_{n,k'}\) for all \(n\). In this case, the coefficients \(\beta_k\) and \(\beta_{k'}\) can covary without changing the predictions, so that for any \(d \neq 0\), \[ +p(y \mid \ldots, \beta_k, \dotsc, \beta_{k'}, \dotsc, \sigma) += +p(y \mid \ldots, d \beta_k, \dotsc, \frac{d}{c} \, \beta_{k'}, \dotsc, +\sigma). +\]

+

Even if columns of the predictor matrix are not exactly collinear as discussed above, they cause similar problems for inference if they are nearly collinear.

+
+
+

Multiplicative issues with discrimination in IRT

+

Consider adding a discrimination parameter \(\delta_i\) for each question in an IRT model, with data model \[ +y_{i, j} \sim \textsf{Bernoulli}(\operatorname{logit}^{-1}(\delta_i(\alpha_j - \beta_i))). +\] For any constant \(c \neq 0\), multiplying \(\delta\) by \(c\) and dividing \(\alpha\) and \(\beta\) by \(c\) produces the same likelihood, \[ +p(y \mid \delta,\alpha,\beta) += p(y \mid c \delta, \frac{1}{c}\alpha, \frac{1}{c}\beta). +\] If \(c < 0\), this switches the signs of every component in \(\alpha\), \(\beta\), and \(\delta\) without changing the density.

+
+
+

Softmax with \(K\) vs. \(K-1\) parameters

+

In order to parameterize a \(K\)-simplex (i.e., a \(K\)-vector with non-negative values that sum to one), only \(K - 1\) parameters are necessary because the \(K\)th is just one minus the sum of the first \(K +- 1\) parameters, so that if \(\theta\) is a \(K\)-simplex, \[ +\theta_K = 1 - \sum_{k=1}^{K-1} \theta_k. +\]

+

The softmax function maps a \(K\)-vector \(\alpha\) of linear predictors to a \(K\)-simplex \(\theta = \texttt{softmax}(\alpha)\) by defining \[ +\theta_k = \frac{\exp(\alpha_k)}{\sum_{k'=1}^K \exp(\alpha_{k'})}. +\]

+

The softmax function is many-to-one, which leads to a lack of identifiability of the unconstrained parameters \(\alpha\). In particular, adding or subtracting a constant from each \(\alpha_k\) produces the same simplex \(\theta\).

+
+
+
+

Mitigating the invariances

+

All of the examples discussed in the previous section allow translation or scaling of parameters while leaving the data probability density invariant. These problems can be mitigated in several ways.

+
+

Removing redundant parameters or predictors

+

In the case of the multiple intercepts, \(\lambda_1\) and \(\lambda_2\), the simplest solution is to remove the redundant intercept, resulting in a model with a single intercept parameter \(\mu\) and sampling distribution \(y_n \sim \textsf{normal}(\mu, \sigma)\). The same solution works for solving the problem with collinearity—just remove one of the columns of the predictor matrix \(x\).

+
+
+

Pinning parameters

+

The IRT model without a discrimination parameter can be fixed by pinning one of its parameters to a fixed value, typically 0. For example, the first student ability \(\alpha_1\) can be fixed to 0. Now all other student ability parameters can be interpreted as being relative to student 1. Similarly, the difficulty parameters are interpretable relative to student 1’s ability to answer them.

+

This solution is not sufficient to deal with the multiplicative invariance introduced by the question discrimination parameters \(\delta_i\). To solve this problem, one of the discrimination parameters, say \(\delta_1\), must also be constrained. Because it’s a multiplicative and not an additive invariance, it must be constrained to a non-zero value, with 1 being a convenient choice. Now all of the discrimination parameters may be interpreted relative to item 1’s discrimination.

+

The many-to-one nature of \(\texttt{softmax}(\alpha)\) is typically mitigated by pinning a component of \(\alpha\), for instance fixing \(\alpha_K = 0\). The resulting mapping is one-to-one from \(K-1\) unconstrained parameters to a \(K\)-simplex. This is roughly how simplex-constrained parameters are defined in Stan; see the reference manual chapter on constrained parameter transforms for a precise definition. The Stan code for creating a simplex from a \(K-1\)-vector can be written as

+
vector softmax_id(vector alpha) {
+  vector[num_elements(alpha) + 1] alphac1;
+  for (k in 1:num_elements(alpha)) {
+    alphac1[k] = alpha[k];
+  }
+  alphac1[num_elements(alphac1)] = 0;
+  return softmax(alphac1);
+}
+
+
+

Adding priors

+

So far, the models have been discussed as if the priors on the parameters were improper uniform priors.

+

A more general Bayesian solution to these invariance problems is to impose proper priors on the parameters. This approach can be used to solve problems arising from either additive or multiplicative invariance.

+

For example, normal priors on the multiple intercepts, \[ +\lambda_1, \lambda_2 \sim \textsf{normal}(0,\tau), +\] with a constant scale \(\tau\), ensure that the posterior mode is located at a point where \(\lambda_1 = \lambda_2\), because this minimizes \(\log \textsf{normal}(\lambda_1 \mid 0,\tau) + \log +\textsf{normal}(\lambda_2 \mid 0,\tau)\).3

+

The following plots show the posteriors for two intercept parameterization without prior, two intercept parameterization with standard normal prior, and one intercept reparameterization without prior. For all three cases, the posterior is plotted for 100 data points drawn from a standard normal.

+

The two intercept parameterization leads to an improper prior with a ridge extending infinitely to the northwest and southeast.

+
+
+

+
Two intercepts with improper prior
+
+
+

Adding a standard normal prior for the intercepts results in a proper posterior.

+
+
+

+
Two intercepts with proper prior
+
+
+

The single intercept parameterization with no prior also has a proper posterior.

+
+
+

+
Single intercepts with improper prior
+
+
+

The addition of a prior to the two intercepts model is shown in the second plot; the final plot shows the result of reparameterizing to a single intercept.

+

An alternative strategy for identifying a \(K\)-simplex parameterization \(\theta = \texttt{softmax}(\alpha)\) in terms of an unconstrained \(K\)-vector \(\alpha\) is to place a prior on the components of \(\alpha\) with a fixed location (that is, specifically avoid hierarchical priors with varying location). Unlike the approaching of pinning \(\alpha_K = +0\), the prior-based approach models the \(K\) outcomes symmetrically rather than modeling \(K-1\) outcomes relative to the \(K\)-th. The pinned parameterization, on the other hand, is usually more efficient statistically because it does not have the extra degree of (prior constrained) wiggle room.

+
+
+

Vague, strongly informative, and weakly informative priors

+

Care must be used when adding a prior to resolve invariances. If the prior is taken to be too broad (i.e., too vague), the resolution is in theory only, and samplers will still struggle.

+

Ideally, a realistic prior will be formulated based on substantive knowledge of the problem being modeled. Such a prior can be chosen to have the appropriate strength based on prior knowledge. A strongly informative prior makes sense if there is strong prior information.

+

When there is not strong prior information, a weakly informative prior strikes the proper balance between controlling computational inference without dominating the data in the posterior. In most problems, the modeler will have at least some notion of the expected scale of the estimates and be able to choose a prior for identification purposes that does not dominate the data, but provides sufficient computational control on the posterior.

+

Priors can also be used in the same way to control the additive invariance of the IRT model. A typical approach is to place a strong prior on student ability parameters \(\alpha\) to control scale simply to control the additive invariance of the basic IRT model and the multiplicative invariance of the model extended with a item discrimination parameters; such a prior does not add any prior knowledge to the problem. Then a prior on item difficulty can be chosen that is either informative or weakly informative based on prior knowledge of the problem.

+
+
+
+
+

Label switching in mixture models

+

Where collinearity in regression models can lead to infinitely many posterior maxima, swapping components in a mixture model leads to finitely many posterior maxima.

+
+

Mixture models

+

Consider a normal mixture model with two location parameters \(\mu_1\) and \(\mu_2\), a shared scale \(\sigma > 0\), a mixture ratio \(\theta \in +[0,1]\), and data model \[ +p(y \mid \theta,\mu_1,\mu_2,\sigma) += \prod_{n=1}^N \big( \theta \, \textsf{normal}(y_n \mid \mu_1,\sigma) + + (1 - \theta) \, \textsf{normal}(y_n \mid \mu_2,\sigma) \big). +\] The issue here is exchangeability of the mixture components, because \[ +p(\theta,\mu_1,\mu_2,\sigma \mid y) = p\big((1-\theta),\mu_2,\mu_1,\sigma \mid y\big). +\] The problem is exacerbated as the number of mixture components \(K\) grows, as in clustering models, leading to \(K!\) identical posterior maxima.

+
+
+

Convergence monitoring and effective sample size

+

The analysis of posterior convergence and effective sample size is also difficult for mixture models. For example, the \(\hat{R}\) convergence statistic reported by Stan and the computation of effective sample size are both compromised by label switching. The problem is that the posterior mean, a key ingredient in these computations, is affected by label switching, resulting in a posterior mean for \(\mu_1\) that is equal to that of \(\mu_2\), and a posterior mean for \(\theta\) that is always 1/2, no matter what the data are.

+
+
+

Some inferences are invariant

+

In some sense, the index (or label) of a mixture component is irrelevant. Posterior predictive inferences can still be carried out without identifying mixture components. For example, the log probability of a new observation does not depend on the identities of the mixture components. The only sound Bayesian inferences in such models are those that are invariant to label switching. Posterior means for the parameters are meaningless because they are not invariant to label switching; for example, the posterior mean for \(\theta\) in the two component mixture model will always be 1/2.

+
+
+

Highly multimodal posteriors

+

Theoretically, this should not present a problem for inference because all of the integrals involved in posterior predictive inference will be well behaved. The problem in practice is computation.

+

Being able to carry out such invariant inferences in practice is an altogether different matter. It is almost always intractable to find even a single posterior mode, much less balance the exploration of the neighborhoods of multiple local maxima according to the probability masses. In Gibbs sampling, it is unlikely for \(\mu_1\) to move to a new mode when sampled conditioned on the current values of \(\mu_2\) and \(\theta\). For HMC and NUTS, the problem is that the sampler gets stuck in one of the two “bowls” around the modes and cannot gather enough energy from random momentum assignment to move from one mode to another.

+

Even with a proper posterior, all known sampling and inference techniques are notoriously ineffective when the number of modes grows super-exponentially as it does for mixture models with increasing numbers of components.

+
+
+

Hacks as fixes

+

Several hacks (i.e., “tricks”) have been suggested and employed to deal with the problems posed by label switching in practice.

+
+

Parameter ordering constraints

+

One common strategy is to impose a constraint on the parameters that identifies the components. For instance, we might consider constraining \(\mu_1 < \mu_2\) in the two-component normal mixture model discussed above. A problem that can arise from such an approach is when there is substantial probability mass for the opposite ordering \(\mu_1 > \mu_2\). In these cases, the posteriors are affected by the constraint and true posterior uncertainty in \(\mu_1\) and \(\mu_2\) is not captured by the model with the constraint. In addition, standard approaches to posterior inference for event probabilities is compromised. For instance, attempting to use \(M\) posterior draws to estimate \(\Pr[\mu_1 > \mu_2]\), will fail, because the estimator \[ +\Pr[\mu_1 > \mu_2] +\approx +\sum_{m=1}^M \textrm{I}\left(\mu_1^{(m)} > \mu_2^{(m)}\right) +\] will result in an estimate of 0 because the posterior respects the constraint in the model.

+
+
+

Initialization around a single mode

+

Another common approach is to run a single chain or to initialize the parameters near realistic values.4

+

This can work better than the hard constraint approach if reasonable initial values can be found and the labels do not switch within a Markov chain. The result is that all chains are glued to a neighborhood of a particular mode in the posterior.

+
+
+
+
+

Component collapsing in mixture models

+

It is possible for two mixture components in a mixture model to collapse to the same values during sampling or optimization. For example, a mixture of \(K\) normals might devolve to have \(\mu_i = +\mu_j\) and \(\sigma_i = \sigma_j\) for \(i \neq j\).

+

This will typically happen early in sampling due to initialization in MCMC or optimization or arise from random movement during MCMC. Once the parameters match for a given draw \((m)\), it can become hard to escape because there can be a trough of low-density mass between the current parameter values and the ones without collapsed components.

+

It may help to use a smaller step size during warmup, a stronger prior on each mixture component’s membership responsibility. A more extreme measure is to include additional mixture components to deal with the possibility that some of them may collapse.

+

In general, it is difficult to recover exactly the right \(K\) mixture components in a mixture model as \(K\) increases beyond one (yes, even a two-component mixture can have this problem).

+
+
+

Posteriors with unbounded densities

+

In some cases, the posterior density grows without bounds as parameters approach certain poles or boundaries. In such, there are no posterior modes and numerical stability issues can arise as sampled parameters approach constraint boundaries.

+
+

Mixture models with varying scales

+

One such example is a binary mixture model with scales varying by component, \(\sigma_1\) and \(\sigma_2\) for locations \(\mu_1\) and \(\mu_2\). In this situation, the density grows without bound as \(\sigma_1 \rightarrow 0\) and \(\mu_1 \rightarrow y_n\) for some \(n\); that is, one of the mixture components concentrates all of its mass around a single data item \(y_n\).

+
+
+

Beta-binomial models with skewed data and weak priors

+

Another example of unbounded densities arises with a posterior such as \(\textsf{beta}(\phi \mid 0.5,0.5)\), which can arise if seemingly weak beta priors are used for groups that have no data. This density is unbounded as \(\phi \rightarrow 0\) and \(\phi \rightarrow 1\). Similarly, a Bernoulli data model coupled with a “weak” beta prior, leads to a posterior \[\begin{align*} +p(\phi \mid y) +&\propto + \textsf{beta}(\phi \mid 0.5,0.5) \times \prod_{n=1}^N \textsf{Bernoulli}(y_n \mid \phi) \\ +&= + \textsf{beta}\left(\phi \,\middle|\, 0.5 + \sum_{n=1}^N y_n, 0.5 + N - \sum_{n=1}^N y_n\right). +\end{align*}\]

+

If \(N = 9\) and each \(y_n = 1\), the posterior is \(\textsf{beta}(\phi \mid 9.5,0.5)\). This posterior is unbounded as \(\phi +\rightarrow 1\). Nevertheless, the posterior is proper, and although there is no posterior mode, the posterior mean is well-defined with a value of exactly 0.95.

+
+

Constrained vs. unconstrained scales

+

Stan does not sample directly on the constrained \((0,1)\) space for this problem, so it doesn’t directly deal with unconstrained density values. Rather, the probability values \(\phi\) are logit-transformed to \((-\infty,\infty)\). The boundaries at 0 and 1 are pushed out to \(-\infty\) and \(\infty\) respectively. The Jacobian adjustment that Stan automatically applies ensures the unconstrained density is proper. The adjustment for the particular case of \((0,1)\) is \(\log +\operatorname{logit}^{-1}(\phi) + \log \operatorname{logit}(1 - \phi)\).

+

There are two problems that still arise, though. The first is that if the posterior mass for \(\phi\) is near one of the boundaries, the logit-transformed parameter will have to sweep out long paths and thus can dominate the U-turn condition imposed by the no-U-turn sampler (NUTS). The second issue is that the inverse transform from the unconstrained space to the constrained space can underflow to 0 or overflow to 1, even when the unconstrained parameter is not infinite. Similar problems arise for the expectation terms in logistic regression, which is why the logit-scale parameterizations of the Bernoulli and binomial distributions are more stable.

+
+
+
+
+

Posteriors with unbounded parameters

+

In some cases, the posterior density will not grow without bound, but parameters will grow without bound with gradually increasing density values. Like the models discussed in the previous section that have densities that grow without bound, such models also have no posterior modes.

+
+

Separability in logistic regression

+

Consider a logistic regression model with \(N\) observed outcomes \(y_n +\in \{ 0, 1 \}\), an \(N \times K\) matrix \(x\) of predictors, a \(K\)-dimensional coefficient vector \(\beta\), and data model \[ +y_n \sim \textsf{Bernoulli}(\operatorname{logit}^{-1}(x_n \beta)). +\] Now suppose that column \(k\) of the predictor matrix is such that \(x_{n,k} > 0\) if and only if \(y_n = 1\), a condition known as “separability.” In this case, predictive accuracy on the observed data continue to improve as \(\beta_k \rightarrow \infty\), because for cases with \(y_n = 1\), \(x_n \beta \rightarrow \infty\) and hence \(\operatorname{logit}^{-1}(x_n \beta) \rightarrow 1\).

+

With separability, there is no maximum to the likelihood and hence no maximum likelihood estimate. From the Bayesian perspective, the posterior is improper and therefore the marginal posterior mean for \(\beta_k\) is also not defined. The usual solution to this problem in Bayesian models is to include a proper prior for \(\beta\), which ensures a proper posterior.

+
+
+
+

Uniform posteriors

+

Suppose your model includes a parameter \(\psi\) that is defined on \([0,1]\) and is given a flat prior \(\textsf{uniform}(\psi \mid 0,1)\). Now if the data don’t tell us anything about \(\psi\), the posterior is also \(\textsf{uniform}(\psi \mid 0,1)\).

+

Although there is no maximum likelihood estimate for \(\psi\), the posterior is uniform over a closed interval and hence proper. In the case of a uniform posterior on \([0,1]\), the posterior mean for \(\psi\) is well-defined with value \(1/2\). Although there is no posterior mode, posterior predictive inference may nevertheless do the right thing by simply integrating (i.e., averaging) over the predictions for \(\psi\) at all points in \([0,1]\).

+
+
+

Sampling difficulties with problematic priors

+

With an improper posterior, it is theoretically impossible to properly explore the posterior. However, Gibbs sampling as performed by BUGS and JAGS, although still unable to properly sample from such an improper posterior, behaves differently in practice than the Hamiltonian Monte Carlo sampling performed by Stan when faced with an example such as the two intercept model discussed in the collinearity section and illustrated in the non-identifiable density plot.

+
+

Gibbs sampling

+

Gibbs sampling, as performed by BUGS and JAGS, may appear to be efficient and well behaved for this unidentified model, but as discussed in the previous subsection, will not actually explore the posterior properly.

+

Consider what happens with initial values \(\lambda_1^{(0)}, \lambda_2^{(0)}\). Gibbs sampling proceeds in iteration \(m\) by drawing \[\begin{align*} +\lambda_1^{(m)} &\sim p(\lambda_1 \mid \lambda_2^{(m-1)}, \sigma^{(m-1)}, y) \\ +\lambda_2^{(m)} &\sim p(\lambda_2 \mid \lambda_1^{(m)}, \sigma^{(m-1)}, y) \\ +\sigma^{(m)} &\sim p(\sigma \mid \lambda_1^{(m)}, \lambda_2^{(m)}, y). +\end{align*}\]

+

Now consider the draw for \(\lambda_1\) (the draw for \(\lambda_2\) is symmetric), which is conjugate in this model and thus can be done efficiently. In this model, the range from which the next \(\lambda_1\) can be drawn is highly constrained by the current values of \(\lambda_2\) and \(\sigma\). Gibbs will run quickly and provide seemingly reasonable inferences for \(\lambda_1 + \lambda_2\). But it will not explore the full range of the posterior; it will merely take a slow random walk from the initial values. This random walk behavior is typical of Gibbs sampling when posteriors are highly correlated and the primary reason to prefer Hamiltonian Monte Carlo to Gibbs sampling for models with parameters correlated in the posterior.

+
+
+

Hamiltonian Monte Carlo sampling

+

Hamiltonian Monte Carlo (HMC), as performed by Stan, is much more efficient at exploring posteriors in models where parameters are correlated in the posterior. In this particular example, the Hamiltonian dynamics (i.e., the motion of a fictitious particle given random momentum in the field defined by the negative log posterior) is going to run up and down along the valley defined by the potential energy (ridges in log posteriors correspond to valleys in potential energy). In practice, even with a random momentum for \(\lambda_1\) and \(\lambda_2\), the gradient of the log posterior is going to adjust for the correlation and the simulation will run \(\lambda_1\) and \(\lambda_2\) in opposite directions along the valley corresponding to the ridge in the posterior log density.

+
+
+

No-U-turn sampling

+

Stan’s default no-U-turn sampler (NUTS), is even more efficient at exploring the posterior (see Hoffman and Gelman 2014). NUTS simulates the motion of the fictitious particle representing the parameter values until it makes a U-turn, it will be defeated in most cases, as it will just move down the potential energy valley indefinitely without making a U-turn. What happens in practice is that the maximum number of leapfrog steps in the simulation will be hit in many of the iterations, causing a large number of log probability and gradient evaluations (1000 if the max tree depth is set to 10, as in the default). Thus sampling will appear to be slow. This is indicative of an improper posterior, not a bug in the NUTS algorithm or its implementation. It is simply not possible to sample from an improper posterior! Thus the behavior of HMC in general and NUTS in particular should be reassuring in that it will clearly fail in cases of improper posteriors, resulting in a clean diagnostic of sweeping out large paths in the posterior.

+

Here are results of Stan runs with default parameters fit to \(N=100\) data points generated from \(y_n \sim \textsf{normal}(0,1)\):

+

Two Scale Parameters, Improper Prior

+
Inference for Stan model: improper_stan
+Warmup took (2.7, 2.6, 2.9, 2.9) seconds, 11 seconds total
+Sampling took (3.4, 3.7, 3.6, 3.4) seconds, 14 seconds total
+
+                  Mean     MCSE   StdDev        5%       95%  N_Eff  N_Eff/s  R_hat
+lp__          -5.3e+01  7.0e-02  8.5e-01  -5.5e+01  -5.3e+01    150       11    1.0
+n_leapfrog__   1.4e+03  1.7e+01  9.2e+02   3.0e+00   2.0e+03   2987      212    1.0
+lambda1        1.3e+03  1.9e+03  2.7e+03  -2.3e+03   6.0e+03    2.1     0.15    5.2
+lambda2       -1.3e+03  1.9e+03  2.7e+03  -6.0e+03   2.3e+03    2.1     0.15    5.2
+sigma          1.0e+00  8.5e-03  6.2e-02   9.5e-01   1.2e+00     54      3.9    1.1
+mu             1.6e-01  1.9e-03  1.0e-01  -8.3e-03   3.3e-01   2966      211    1.0
+

Two Scale Parameters, Weak Prior

+
Warmup took (0.40, 0.44, 0.40, 0.36) seconds, 1.6 seconds total
+Sampling took (0.47, 0.40, 0.47, 0.39) seconds, 1.7 seconds total
+
+                 Mean     MCSE   StdDev        5%    95%  N_Eff  N_Eff/s  R_hat
+lp__              -54  4.9e-02  1.3e+00  -5.7e+01    -53    728      421    1.0
+n_leapfrog__      157  2.8e+00  1.5e+02   3.0e+00    511   3085     1784    1.0
+lambda1          0.31  2.8e-01  7.1e+00  -1.2e+01     12    638      369    1.0
+lambda2         -0.14  2.8e-01  7.1e+00  -1.2e+01     12    638      369    1.0
+sigma             1.0  2.6e-03  8.0e-02   9.2e-01    1.2    939      543    1.0
+mu               0.16  1.8e-03  1.0e-01  -8.1e-03   0.33   3289     1902    1.0
+

One Scale Parameter, Improper Prior

+
Warmup took (0.011, 0.012, 0.011, 0.011) seconds, 0.044 seconds total
+Sampling took (0.017, 0.020, 0.020, 0.019) seconds, 0.077 seconds total
+
+                Mean     MCSE  StdDev        5%   50%   95%  N_Eff  N_Eff/s  R_hat
+lp__             -54  2.5e-02    0.91  -5.5e+01   -53   -53   1318    17198    1.0
+n_leapfrog__     3.2  2.7e-01     1.7   1.0e+00   3.0   7.0     39      507    1.0
+mu              0.17  2.1e-03    0.10  -3.8e-03  0.17  0.33   2408    31417    1.0
+sigma            1.0  1.6e-03   0.071   9.3e-01   1.0   1.2   2094    27321    1.0
+

On the top is the non-identified model with improper uniform priors and data model \(y_n \sim \textsf{normal}(\lambda_1 + \lambda_2, +\sigma)\).

+

In the middle is the same data model as in top plus priors \(\lambda_k \sim \textsf{normal}(0,10)\).

+

On the bottom is an identified model with an improper prior, with data model \(y_n \sim \textsf{normal}(\mu,\sigma)\). All models estimate \(\mu\) at roughly 0.16 with low Monte Carlo standard error, but a high posterior standard deviation of 0.1; the true value \(\mu=0\) is within the 90% posterior intervals in all three models.

+
+
+

Examples: fits in Stan

+

To illustrate the issues with sampling from non-identified and only weakly identified models, we fit three models with increasing degrees of identification of their parameters. The posteriors for these models is illustrated in the non-identifiable density plot. The first model is the unidentified model with two location parameters and no priors discussed in the collinearity section.

+
data {
+  int N;
+  array[N] real y;
+}
+parameters {
+  real lambda1;
+  real lambda2;
+  real<lower=0> sigma;
+}
+transformed parameters {
+  real mu;
+  mu = lambda1 + lambda2;
+}
+model {
+  y ~ normal(mu, sigma);
+}
+

The second adds priors to the model block for lambda1 and lambda2 to the previous model.

+
lambda1 ~ normal(0, 10);
+lambda2 ~ normal(0, 10);
+

The third involves a single location parameter, but no priors.

+
data {
+  int N;
+  array[N] real y;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(mu, sigma);
+}
+

All three of the example models were fit in Stan 2.1.0 with default parameters (1000 warmup iterations, 1000 sampling iterations, NUTS sampler with max tree depth of 10). The results are shown in the non-identified fits figure. The key statistics from these outputs are the following.

+
    +
  • As indicated by R_hat column, all parameters have converged other than \(\lambda_1\) and \(\lambda_2\) in the non-identified model.

  • +
  • The average number of leapfrog steps is roughly 3 in the identified model, 150 in the model identified by a weak prior, and 1400 in the non-identified model.

  • +
  • The effective sample size per second for \(\mu\) is roughly 31,000 in the identified model, 1,900 in the model identified with weakly informative priors, and 200 in the non-identified model; the results are similar for \(\sigma\).

  • +
  • In the non-identified model, the 95% interval for \(\lambda_1\) is (-2300,6000), whereas it is only (-12,12) in the model identified with weakly informative priors.

  • +
  • In all three models, the simulated value of \(\mu=0\) and \(\sigma=1\) are well within the posterior 90% intervals.

  • +
+

The first two points, lack of convergence and hitting the maximum number of leapfrog steps (equivalently maximum tree depth) are indicative of improper posteriors. Thus rather than covering up the problem with poor sampling as may be done with Gibbs samplers, Hamiltonian Monte Carlo tries to explore the posterior and its failure is a clear indication that something is amiss in the model.

+ + + +
+
+
+ + + Back to top

References

+
+Hoffman, Matthew D., and Andrew Gelman. 2014. The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.” Journal of Machine Learning Research 15: 1593–623. http://jmlr.org/papers/v15/hoffman14a.html. +
+
+Neal, Radford M. 1996. “Sampling from Multimodal Distributions Using Tempered Transitions.” Statistics and Computing 6 (4): 353–66. +
+
+Swendsen, Robert H., and Jian-Sheng Wang. 1986. “Replica Monte Carlo Simulation of Spin Glasses.” Physical Review Letters 57: 2607–9. +
+

Footnotes

+ +
    +
  1. This example was raised by Richard McElreath on the Stan users group in a query about the difference in behavior between Gibbs sampling as used in BUGS and JAGS and the Hamiltonian Monte Carlo (HMC) and no-U-turn samplers (NUTS) used by Stan.↩︎

  2. +
  3. The marginal posterior \(p(\sigma \mid y)\) for \(\sigma\) is proper here as long as there are at least two distinct data points.↩︎

  4. +
  5. A Laplace prior (or an L1 regularizer for penalized maximum likelihood estimation) is not sufficient to remove this additive invariance. It provides shrinkage, but does not in and of itself identify the parameters because adding a constant to \(\lambda_1\) and subtracting it from \(\lambda_2\) results in the same value for the prior density.↩︎

  6. +
  7. Tempering methods may be viewed as automated ways to carry out such a search for modes, though most MCMC tempering methods continue to search for modes on an ongoing basis; see (Swendsen and Wang 1986; Neal 1996).↩︎

  8. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/proportionality-constants.html b/docs/2_39/stan-users-guide/proportionality-constants.html new file mode 100644 index 000000000..ece81d6b6 --- /dev/null +++ b/docs/2_39/stan-users-guide/proportionality-constants.html @@ -0,0 +1,1299 @@ + + + + + + + + + +Proportionality Constants + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Proportionality Constants

+

When evaluating a likelihood or prior as part of the log density computation in MCMC, variational inference, or optimization, it is usually only necessary to compute the functions up to a proportionality constant (or similarly compute log densities up to an additive constant). In MCMC this comes from the fact that the distribution being sampled does not need to be normalized (and so it is the normalization constant that is ignored). Similarly the distribution does not need normalized to perform variational inference or do optimizations. The advantage of working with unnormalized distributions is they can make computation quite a bit cheaper.

+

There are three different syntaxes to build the model in Stan. The way to select between them is by determining if the proportionality constants are necessary. If performance is not a problem, it is always safe to use the normalized densities.

+

The distribution statement (~) and log density increment statement (target +=) with _lupdf() use unnormalized densities for \(x\) (dropping proportionality constants):

+
x ~ normal(0, 1);
+target += normal_lupdf(x | 0, 1); // the 'u' is for unnormalized
+

The log density increment statement (target +=) with _lpdf() uses the full normalized density for \(x\) (dropping no constants):

+
target += normal_lpdf(x | 0, 1);
+

For discrete distributions, the target += syntax is using _lupmf and _lpmf instead:

+
y ~ bernoulli(0.5);
+target += bernoulli_lupmf(y | 0.5);
+target += bernoulli_lpmf(y | 0.5);
+
+

Dropping Proportionality Constants

+

If a density \(p(\theta)\) can be factored into \(K g(\theta)\) where \(K\) are all the factors that are a not a function of \(\theta\) and \(g(\theta)\) are all the terms that are a function of \(\theta\), then it is said that \(g(\theta)\) is proportional to \(p(\theta)\) up to a constant.

+

The advantage of all this is that sometimes \(K\) is expensive to compute and if it is not a function of the distribution that is to be sampled (or optimized or approximated with variational inference), there is no need to compute it because it will not affect the results.

+

Stan takes advantage of the proportionality constant fact with the ~ syntax. Take for instance the normal data model:

+
data {
+  real mu;
+  real<lower=0.0> sigma;
+}
+parameters {
+  real x;
+}
+model {
+  x ~ normal(mu, sigma);
+}
+

Syntactically, this is just shorthand for the equivalent model that replaces the ~ syntax with a target += statement and a normal_lupdf function call:

+
data {
+  real mu;
+  real<lower=0.0> sigma;
+}
+parameters {
+  real x;
+}
+model {
+  target += normal_lupdf(x | mu, sigma);
+}
+

The function normal_lupdf is only guaranteed to return the log density of the normal distribution up to a proportionality constant density to be sampled. The proportionality constant itself is not defined. The full log density of the statement here is:

+

\[ +\textsf{normal\_lpdf}(x | \mu, \sigma) = +-\log \left( \sigma \sqrt{2 \pi} \right) +-\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right)^2. +\]

+

Now because the density here is only a function of \(x\), the additive terms in the log density that are not a function of \(x\) can be dropped. In this case it is enough to know only the quadratic term:

+

\[ +\textsf{normal\_lupdf}(x | \mu, \sigma) = +-\frac{1}{2} \left( \frac{x - \mu}{\sigma} \right)^2. +\]

+
+
+

Keeping Proportionality Constants

+

In the case that the proportionality constants were needed for a normal log density the function normal_lpdf can be used. For clarity, if there is ever a situation where it is unclear if the normalization is necessary, it should always be safe to include it. Only use the ~ or target += normal_lupdf syntaxes if it is absolutely clear that the proportionality constants are not necessary.

+
+
+

User-defined Distributions

+

When a custom _lpdf or _lpmf function is defined, the compiler will automatically make available a _lupdf or _lupmf version of the function. It is only possible to define custom distributions in the normalized form in Stan. Any attempt to define an unnormalized distribution directly will result in an error.

+

The difference in the normalized and unnormalized versions of custom probability functions is how probability functions are treated inside these functions. Any internal unnormalized probability function call will be replaced with its normalized equivalent if the normalized version of the parent custom distribution is called.

+

The following code demonstrates the different behaviors:

+
functions {
+  real custom1_lpdf(x) {
+    return normal_lupdf(x | 0.0, 1.0);
+  }
+  real custom2_lpdf(x) {
+    return normal_lpdf(x | 0.0, 1.0);
+  }
+}
+parameters {
+  real mu;
+}
+model {
+  mu ~ custom1(); // Normalization constants dropped
+  target += custom1_lupdf(mu); // Normalization constants dropped
+  target += custom1_lpdf(mu);  // Normalization constants kept
+
+  mu ~ custom2();  // Normalization constants kept
+  target += custom2_lupdf(mu);  // Normalization constants kept
+  target += custom2_lpdf(mu);  // Normalization constants kept
+}
+
+
+

Limitations on Using _lupdf and _lupmf Functions

+

To avoid ambiguities in how the normalization constants work, functions ending in _lupdf and _lupmf can only be used in the model block or user-defined probability functions (functions ending in _lpdf or _lpmf).

+ + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/references.html b/docs/2_39/stan-users-guide/references.html new file mode 100644 index 000000000..a4117cb99 --- /dev/null +++ b/docs/2_39/stan-users-guide/references.html @@ -0,0 +1,821 @@ + + + + + + + + + +references + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ + + + +
+

References

+ + +
+ + Back to top
+ +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/regression.html b/docs/2_39/stan-users-guide/regression.html new file mode 100644 index 000000000..379f4b6f1 --- /dev/null +++ b/docs/2_39/stan-users-guide/regression.html @@ -0,0 +1,2305 @@ + + + + + + + + + +Regression Models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Regression Models

+

Stan supports regression models from simple linear regressions to multilevel generalized linear models.

+
+

Linear regression

+

The simplest linear regression model is the following, with a single predictor and a slope and intercept coefficient, and normally distributed noise. This model can be written using standard regression notation as \[ +y_n = \alpha + \beta x_n + \epsilon_n +\quad\text{where}\quad +\epsilon_n \sim \operatorname{normal}(0,\sigma). +\]

+

This is equivalent to the following sampling involving the residual, \[ +y_n - (\alpha + \beta X_n) \sim \operatorname{normal}(0,\sigma), +\] and reducing still further, to \[ +y_n \sim \operatorname{normal}(\alpha + \beta X_n, \, \sigma). +\]

+

This latter form of the model is coded in Stan as follows.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  vector[N] y;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(alpha + beta * x, sigma);
+}
+

There are N observations and for each observation, \(n \in N\), we have predictor x[n] and outcome y[n]. The intercept and slope parameters are alpha and beta. The model assumes a normally distributed noise term with scale sigma. This model has improper priors for the two regression coefficients.

+
+

Matrix notation and vectorization

+

The distribution statement in the previous model is vectorized, with

+
y ~ normal(alpha + beta * x, sigma);
+

providing the same model as the unvectorized version,

+
for (n in 1:N) {
+  y[n] ~ normal(alpha + beta * x[n], sigma);
+}
+

In addition to being more concise, the vectorized form is much faster.1

+

In general, Stan allows the arguments to distributions such as normal to be vectors. If any of the other arguments are vectors or arrays, they have to be the same size. If any of the other arguments is a scalar, it is reused for each vector entry.

+

The other reason this works is that Stan’s arithmetic operators are overloaded to perform matrix arithmetic on matrices. In this case, because x is of type vector and beta of type real, the expression beta * x is of type vector. Because Stan supports vectorization, a regression model with more than one predictor can be written directly using matrix notation.

+
data {
+  int<lower=0> N;   // number of data items
+  int<lower=0> K;   // number of predictors
+  matrix[N, K] x;   // predictor matrix
+  vector[N] y;      // outcome vector
+}
+parameters {
+  real alpha;           // intercept
+  vector[K] beta;       // coefficients for predictors
+  real<lower=0> sigma;  // error scale
+}
+model {
+  y ~ normal(x * beta + alpha, sigma);  // data model
+}
+

The constraint lower=0 in the declaration of sigma constrains the value to be greater than or equal to 0. With no prior in the model block, the effect is an improper prior on non-negative real numbers. Although a more informative prior may be added, improper priors are acceptable as long as they lead to proper posteriors.

+

In the model above, x is an \(N \times K\) matrix of predictors and beta a \(K\)-vector of coefficients, so x * beta is an \(N\)-vector of predictions, one for each of the \(N\) data items. These predictions line up with the outcomes in the \(N\)-vector y, so the entire model may be written using matrix arithmetic as shown. It would be possible to include a column of ones in the data matrix x to remove the alpha parameter.

+

The distribution statement in the model above is just a more efficient, vector-based approach to coding the model with a loop, as in the following statistically equivalent model.

+
model {
+  for (n in 1:N) {
+    y[n] ~ normal(x[n] * beta, sigma);
+  }
+}
+

With Stan’s matrix indexing scheme, x[n] picks out row n of the matrix x; because beta is a column vector, the product x[n] * beta is a scalar of type real.

+
+

Intercepts as inputs

+

In the model formulation

+
y ~ normal(x * beta, sigma);
+

there is no longer an intercept coefficient alpha. Instead, we have assumed that the first column of the input matrix x is a column of 1 values. This way, beta[1] plays the role of the intercept. If the intercept gets a different prior than the slope terms, then it would be clearer to break it out. It is also slightly more efficient in its explicit form with the intercept variable singled out because there’s one fewer multiplications; it should not make that much of a difference to speed, though, so the choice should be based on clarity.

+
+
+
+
+

The QR reparameterization

+

In the previous example, the linear predictor can be written as \(\eta += x \beta\), where \(\eta\) is a \(N\)-vector of predictions, \(x\) is a \(N +\times K\) matrix, and \(\beta\) is a \(K\)-vector of coefficients. Presuming \(N \geq K\), we can exploit the fact that any design matrix \(x\) can be decomposed using the thin QR decomposition into an orthogonal matrix \(Q\) and an upper-triangular matrix \(R\), i.e. \(x = Q +R\).

+

The functions qr_thin_Q and qr_thin_R implement the thin QR decomposition, which is to be preferred to the fat QR decomposition that would be obtained by using qr_Q and qr_R, as the latter would more easily run out of memory (see the Stan Functions Reference for more information on the qr_thin_Q and qr_thin_R functions). In practice, it is best to write \(x = Q^\ast +R^\ast\) where \(Q^\ast = Q * \sqrt{n - 1}\) and \(R^\ast = +\frac{1}{\sqrt{n - 1}} R\). Thus, we can equivalently write \(\eta = x +\beta = Q R \beta = Q^\ast R^\ast \beta\). If we let \(\theta = R^\ast +\beta\), then we have \(\eta = Q^\ast \theta\) and \(\beta = R^{\ast^{-1}} +\theta\). In that case, the previous Stan program becomes

+
data {
+  int<lower=0> N;   // number of data items
+  int<lower=0> K;   // number of predictors
+  matrix[N, K] x;   // predictor matrix
+  vector[N] y;      // outcome vector
+}
+transformed data {
+  matrix[N, K] Q_ast;
+  matrix[K, K] R_ast;
+  matrix[K, K] R_ast_inverse;
+  // thin and scale the QR decomposition
+  Q_ast = qr_thin_Q(x) * sqrt(N - 1);
+  R_ast = qr_thin_R(x) / sqrt(N - 1);
+  R_ast_inverse = inverse(R_ast);
+}
+parameters {
+  real alpha;           // intercept
+  vector[K] theta;      // coefficients on Q_ast
+  real<lower=0> sigma;  // error scale
+}
+model {
+  y ~ normal(Q_ast * theta + alpha, sigma);  // data model
+}
+generated quantities {
+  vector[K] beta;
+  beta = R_ast_inverse * theta; // coefficients on x
+}
+

Since this Stan program generates equivalent predictions for \(y\) and the same posterior distribution for \(\alpha\), \(\beta\), and \(\sigma\) as the previous Stan program, many wonder why the version with this QR reparameterization performs so much better in practice, often both in terms of wall time and in terms of effective sample size. The reasoning is threefold:

+
    +
  1. The columns of \(Q^\ast\) are orthogonal whereas the columns of \(x\) generally are not. Thus, it is easier for a Markov Chain to move around in \(\theta\)-space than in \(\beta\)-space.

  2. +
  3. The columns of \(Q^\ast\) have the same scale whereas the columns of \(x\) generally do not. Thus, a Hamiltonian Monte Carlo algorithm can move around the parameter space with a smaller number of larger steps

  4. +
  5. Since the covariance matrix for the columns of \(Q^\ast\) is an identity matrix, \(\theta\) typically has a reasonable scale if the units of \(y\) are also reasonable. This also helps HMC move efficiently without compromising numerical accuracy.

  6. +
+

Consequently, this QR reparameterization is recommended for linear and generalized linear models in Stan whenever \(K > 1\) and you do not have an informative prior on the location of \(\beta\). It can also be worthwhile to subtract the mean from each column of \(x\) before obtaining the QR decomposition, which does not affect the posterior distribution of \(\theta\) or \(\beta\) but does affect \(\alpha\) and allows you to interpret \(\alpha\) as the expectation of \(y\) in a linear model.

+
+
+

Priors for coefficients and scales

+

See our general discussion of priors for tips on priors for parameters in regression models.

+

Later sections discuss univariate hierarchical priors and multivariate hierarchical priors, as well as priors used to identify models.

+

However, as described in QR-reparameterization section, if you do not have an informative prior on the location of the regression coefficients, then you are better off reparameterizing your model so that the regression coefficients are a generated quantity. In that case, it usually does not matter much what prior is used on on the reparameterized regression coefficients and almost any weakly informative prior that scales with the outcome will do.

+
+
+

Robust noise models

+

The standard approach to linear regression is to model the noise term \(\epsilon\) as having a normal distribution. From Stan’s perspective, there is nothing special about normally distributed noise. For instance, robust regression can be accommodated by giving the noise term a Student-\(t\) distribution. To code this in Stan, the distribution distribution is changed to the following.

+
data {
+  // ...
+  real<lower=0> nu;
+}
+// ...
+model {
+  y ~ student_t(nu, alpha + beta * x, sigma);
+}
+

The degrees of freedom constant nu is specified as data.

+
+
+

Logistic and probit regression

+

For binary outcomes, either of the closely related logistic or probit regression models may be used. These generalized linear models vary only in the link function they use to map linear predictions in \((-\infty,\infty)\) to probability values in \((0,1)\). Their respective link functions, the logistic function and the standard normal cumulative distribution function, are both sigmoid functions (i.e., they are both S-shaped).

+

A logistic regression model with one predictor and an intercept is coded as follows.

+
data {
+  int<lower=0> N;
+  vector[N] x;
+  array[N] int<lower=0, upper=1> y;
+}
+parameters {
+  real alpha;
+  real beta;
+}
+model {
+  y ~ bernoulli_logit(alpha + beta * x);
+}
+

The noise parameter is built into the Bernoulli formulation here rather than specified directly.

+

Logistic regression is a kind of generalized linear model with binary outcomes and the log odds (logit) link function, defined by \[ +\operatorname{logit}(v) = \log \left( \frac{v}{1-v} \right). +\]

+

The inverse of the link function appears in the model: \[ +\operatorname{logit}^{-1}(u) = \texttt{inv}\mathtt{\_}\texttt{logit}(u) = \frac{1}{1 + \exp(-u)}. +\]

+

The model formulation above uses the logit-parameterized version of the Bernoulli distribution, which is defined by \[ +\texttt{bernoulli}\mathtt{\_}\texttt{logit}\left(y \mid \alpha \right) += +\texttt{bernoulli}\left(y \mid \operatorname{logit}^{-1}(\alpha)\right). +\]

+

The formulation is also vectorized in the sense that alpha and beta are scalars and x is a vector, so that alpha + beta * x is a vector. The vectorized formulation is equivalent to the less efficient version

+
for (n in 1:N) {
+  y[n] ~ bernoulli_logit(alpha + beta * x[n]);
+}
+

Expanding out the Bernoulli logit, the model is equivalent to the more explicit, but less efficient and less arithmetically stable

+
for (n in 1:N) {
+  y[n] ~ bernoulli(inv_logit(alpha + beta * x[n]));
+}
+

Other link functions may be used in the same way. For example, probit regression uses the cumulative normal distribution function, which is typically written as

+

\[ +\Phi(x) = \int_{-\infty}^x \textsf{normal}\left(y \mid 0,1 \right) \,\textrm{d}y. +\]

+

The cumulative standard normal distribution function \(\Phi\) is implemented in Stan as the function Phi. The probit regression model may be coded in Stan by replacing the logistic model’s distribution statement with the following.

+
y[n] ~ bernoulli(Phi(alpha + beta * x[n]));
+

A fast approximation to the cumulative standard normal distribution function \(\Phi\) is implemented in Stan as the function Phi_approx.2 The approximate probit regression model may be coded with the following.

+
y[n] ~ bernoulli(Phi_approx(alpha + beta * x[n]));
+
+
+

Multi-logit regression

+

Multiple outcome forms of logistic regression can be coded directly in Stan. For instance, suppose there are \(K\) possible outcomes for each output variable \(y_n\). Also suppose that there is a \(D\)-dimensional vector \(x_n\) of predictors for \(y_n\). The multi-logit model with \(\textsf{normal}(0,5)\) priors on the coefficients is coded as follows.

+
data {
+  int K;
+  int N;
+  int D;
+  array[N] int y;
+  matrix[N, D] x;
+}
+parameters {
+  matrix[D, K] beta;
+}
+model {
+  matrix[N, K] x_beta = x * beta;
+
+  to_vector(beta) ~ normal(0, 5);
+
+  for (n in 1:N) {
+    y[n] ~ categorical_logit(x_beta[n]');
+
+  }
+}
+

where x_beta[n]' is the transpose of x_beta[n]. The prior on beta is coded in vectorized form. As of Stan 2.18, the categorical-logit distribution is not vectorized for parameter arguments, so the loop is required. The matrix multiplication is pulled out to define a local variable for all of the predictors for efficiency. Like the Bernoulli-logit, the categorical-logit distribution applies softmax internally to convert an arbitrary vector to a simplex, \[ +\texttt{categorical}\mathtt{\_}\texttt{logit}\left(y \mid \alpha\right) += +\texttt{categorical}\left(y \mid \texttt{softmax}(\alpha)\right), +\] where \[ +\texttt{softmax}(u) = \exp(u) / \operatorname{sum}\left(\exp(u)\right). +\]

+

The categorical distribution with log-odds (logit) scaled parameters used above is equivalent to writing

+
y[n] ~ categorical(softmax(x[n] * beta));
+
+

Constraints on data declarations

+

The data block in the above model is defined without constraints on sizes K, N, and D or on the outcome array y. Constraints on data declarations provide error checking at the point data are read (or transformed data are defined), which is before sampling begins. Constraints on data declarations also make the model author’s intentions more explicit, which can help with readability. The above model’s declarations could be tightened to

+
int<lower=2> K;
+int<lower=0> N;
+int<lower=1> D;
+array[N] int<lower=1, upper=K> y;
+

These constraints arise because the number of categories, K, must be at least two in order for a categorical model to be useful. The number of data items, N, can be zero, but not negative; unlike R, Stan’s for-loops always move forward, so that a loop extent of 1:N when N is equal to zero ensures the loop’s body will not be executed. The number of predictors, D, must be at least one in order for beta * x[n] to produce an appropriate argument for softmax(). The categorical outcomes y[n] must be between 1 and K in order for the discrete sampling to be well defined.

+

Constraints on data declarations are optional. Constraints on parameters declared in the parameters block, on the other hand, are not optional—they are required to ensure support for all parameter values satisfying their constraints. Constraints on transformed data, transformed parameters, and generated quantities are also optional.

+
+
+

Identifiability

+

Because softmax is invariant under adding a constant to each component of its input, the model is typically only identified if there is a suitable prior on the coefficients.

+

An alternative is to use \((K-1)\)-vectors by fixing one of them to be zero. The partially known parameters section discusses how to mix constants and parameters in a vector. In the multi-logit case, the parameter block would be redefined to use \((K - 1)\)-vectors

+
parameters {
+  matrix[D, K - 1] beta_raw;
+}
+

and then these are transformed to parameters to use in the model. First, a transformed data block is added before the parameters block to define a vector of zero values,

+
transformed data {
+  vector[D] zeros = rep_vector(0, D);
+}
+

which can then be appended to beta_raw to produce the coefficient matrix beta,

+
transformed parameters {
+  matrix[D, K] beta = append_col(beta_raw, zeros);
+}
+

The rep_vector(0, D) call creates a column vector of size D with all entries set to zero. The derived matrix beta is then defined to be the result of appending the vector zeros as a new column at the end of beta_raw; the vector zeros is defined as transformed data so that it doesn’t need to be constructed from scratch each time it is used.

+

This is not the same model as using \(K\)-vectors as parameters, because now the prior only applies to \((K-1)\)-vectors. In practice, this will cause the maximum likelihood solutions to be different and also the posteriors to be slightly different when taking priors centered around zero, as is typical for regression coefficients.

+
+
+
+

Parameterizing centered vectors

+

When there are varying effects in a regression, the resulting likelihood is not identified unless further steps are taken. For example, we might have a global intercept \(\alpha\) and then a varying effect \(\beta_k\) for age group \(k\) to make a linear predictor \(\alpha + +\beta_k\). With this predictor, we can add a constant to \(\alpha\) and subtract from each \(\beta_k\) and get exactly the same likelihood.

+

The traditional approach to identifying such a model is to pin the first varying effect to zero, i.e., \(\beta_1 = 0\). With one of the varying effects fixed, you can no longer add a constant to all of them and the model’s likelihood is identified. In addition to the difficulty in specifying such a model in Stan, it is awkward to formulate priors because the other coefficients are all interpreted relative to \(\beta_1\).

+

In a Bayesian setting, a proper prior on each of the \(\beta\) is enough to identify the model. Unfortunately, this can lead to inefficiency during sampling as the model is still only weakly identified through the prior—there is a very simple example of the difference in the discussion of collinearity in the collinearity section.

+

An alternative identification strategy that allows a symmetric prior is to enforce a sum-to-zero constraint on the varying effects, i.e., \(\sum_{k=1}^K \beta_k = 0.\)

+

A parameter vector constrained to sum to zero may also be used to identify a multi-logit regression parameter vector (see the multi-logit section for details), or may be used for ability or difficulty parameters (but not both) in an IRT model (see the item-response model section for details).

+
+

Built-in sum-to-zero vector

+

As of Stan 2.36, there is a built in sum_to_zero_vector type, which can be used as follows.

+
parameters {
+  sum_to_zero_vector[K] beta;
+  // ...
+}
+

This produces a vector of size K such that sum(beta) = 0. In the unconstrained representation requires only K - 1 values because the last is determined by the first K - 1.

+

Placing a prior on beta in this parameterization, for example,

+
  beta ~ normal(0, 1);
+

leads to a subtly different posterior than what you would get with the same prior on an unconstrained size-K vector. As explained below, the variance is reduced.

+

The sum-to-zero constraint can be implemented naively by setting the last element to the negative sum of the first elements, i.e., \(\beta_K += -\sum_{k=1}^{K-1} \beta_k.\) But that leads to high correlation among the \(\beta_k\).

+

The transform used in Stan eliminates these correlations by constructing an orthogonal basis and applying it to the zero-sum-constraint; Seyboldt (2024) provides an explanation. The Stan Reference Manual provides the details in the chapter on transforms. Although any orthogonal basis can be used, Stan uses the inverse isometric log transform because it is convenient to describe and the transform simplifies to efficient scalar operations rather than more expensive matrix operations.

+
+

Marginal distribution of sum-to-zero components

+

On the Stan forums, Aaron Goodman provided the following code to produce a prior with standard normal marginals on the components of beta,

+
model {
+  beta ~ normal(0, inv(sqrt(1 - inv(K))));
+  // ...
+}
+

The scale component can be multiplied by sigma to produce a normal(0, sigma) prior marginally.

+

To generate distributions with marginals other than standard normal, the resulting beta may be scaled by some factor sigma and translated to some new location mu.

+
+
+
+

Soft centering

+

Adding a prior such as \(\beta \sim \textsf{normal}(0,\epsilon)\) for a small \(\epsilon\) will provide a kind of soft centering of a parameter vector \(\beta\) by preferring, all else being equal, that \(\sum_{k=1}^K +\beta_k = 0\). This approach is only guaranteed to roughly center if \(\beta\) and the elementwise addition \(\beta + c\) for a scalar constant \(c\) produce the same likelihood (perhaps by another vector \(\alpha\) being transformed to \(\alpha - c\), as in the IRT models). This is another way of achieving a symmetric prior, though it requires choosing an \(\epsilon\). If \(\epsilon\) is too large, there won’t be a strong enough centering effect and if it is too small, it will add high curvature to the target density and impede sampling.

+
+
+
+

Ordered logistic and probit regression

+

Ordered regression for an outcome \(y_n \in \{ 1, \dotsc, k \}\) with predictors \(x_n \in \mathbb{R}^D\) is determined by a single coefficient vector \(\beta \in \mathbb{R}^D\) along with a sequence of cutpoints \(c \in +\mathbb{R}^{K-1}\) sorted so that \(c_d < c_{d+1}\). The discrete output is \(k\) if the linear predictor \(x_n \beta\) falls between \(c_{k-1}\) and \(c_k\), assuming \(c_0 = -\infty\) and \(c_K = \infty\). The noise term is fixed by the form of regression, with examples for ordered logistic and ordered probit models.

+
+

Ordered logistic regression

+

The ordered logistic model can be coded in Stan using the ordered data type for the cutpoints and the built-in ordered_logistic distribution.

+
data {
+  int<lower=2> K;
+  int<lower=0> N;
+  int<lower=1> D;
+  array[N] int<lower=1, upper=K> y;
+  matrix[N, D] x;
+}
+parameters {
+  vector[D] beta;
+  ordered[K - 1] c;
+}
+model {
+  y ~ ordered_logistic(x * beta, c);
+}
+

The vector of cutpoints c is declared as ordered[K - 1], which guarantees that c[k] is less than c[k + 1].

+

If the cutpoints were assigned independent priors, the constraint effectively truncates the joint prior to support over points that satisfy the ordering constraint. Luckily, Stan does not need to compute the effect of the constraint on the normalizing term because the probability is needed only up to a proportion.

+

The equivalent model can be written using ordered_logistic_glm distribution, which can provide more efficient computation in case of higher dimensional beta.

+
  y ~ ordered_logistic_glm(x,  beta, c);
+
+

Ordered probit

+

An ordered probit model can be coded in exactly the same way by using the built-in ordered_probit distribution.

+
model {
+  ordered_probit(x * beta, c);
+}
+

There is not yet an ordered_probit_glm distribution in Stan.

+
+
+
+
+

Hierarchical regression

+

The simplest multilevel model is a hierarchical model in which the data are grouped into \(L\) distinct categories (or levels). An extreme approach would be to completely pool all the data and estimate a common vector of regression coefficients \(\beta\). At the other extreme, an approach with no pooling assigns each level \(l\) its own coefficient vector \(\beta_l\) that is estimated separately from the other levels. A hierarchical model is an intermediate solution where the degree of pooling is determined by the data and a prior on the amount of pooling.

+

Suppose each binary outcome \(y_n \in \{ 0, 1 \}\) has an associated level, \(ll_n \in \{ 1, \dotsc, L \}\). Each outcome will also have an associated predictor vector \(x_n \in \mathbb{R}^D\). Each level \(l\) gets its own coefficient vector \(\beta_l \in \mathbb{R}^D\). The hierarchical structure involves drawing the coefficients \(\beta_{l,d} +\in \mathbb{R}\) from a prior that is also estimated with the data. This hierarchically estimated prior determines the amount of pooling. If the data in each level are similar, strong pooling will be reflected in low hierarchical variance. If the data in the levels are dissimilar, weaker pooling will be reflected in higher hierarchical variance.

+

The following model encodes a hierarchical logistic regression model with a hierarchical prior on the regression coefficients.

+
data {
+  int<lower=1> D;
+  int<lower=0> N;
+  int<lower=1> L;
+  array[N] int<lower=0, upper=1> y;
+  array[N] int<lower=1, upper=L> ll;
+  array[N] row_vector[D] x;
+}
+parameters {
+  array[D] real mu;
+  array[D] real<lower=0> sigma;
+  array[L] vector[D] beta;
+}
+model {
+  for (d in 1:D) {
+    mu[d] ~ normal(0, 100);
+    for (l in 1:L) {
+      beta[l, d] ~ normal(mu[d], sigma[d]);
+    }
+  }
+  for (n in 1:N) {
+    y[n] ~ bernoulli(inv_logit(x[n] * beta[ll[n]]));
+  }
+}
+

The standard deviation parameter sigma gets an implicit uniform prior on \((0,\infty)\) because of its declaration with a lower-bound constraint of zero. Stan allows improper priors as long as the posterior is proper. Nevertheless, it is usually helpful to have informative or at least weakly informative priors for all parameters; see the regression priors section for recommendations on priors for regression coefficients and scales.

+
+

Optimizing the model

+

Where possible, vectorizing distribution statements leads to faster log probability and derivative evaluations. The speed boost is not because loops are eliminated, but because vectorization allows sharing subcomputations in the log probability and gradient calculations and because it reduces the size of the expression tree required for gradient calculations.

+

The first optimization vectorizes the for-loop over D as

+
mu ~ normal(0, 100);
+for (l in 1:L) {
+  beta[l] ~ normal(mu, sigma);
+}
+

The declaration of beta as an array of vectors means that the expression beta[l] denotes a vector. Although beta could have been declared as a matrix, an array of vectors (or a two-dimensional array) is more efficient for accessing rows; see the indexing efficiency section for more information on the efficiency tradeoffs among arrays, vectors, and matrices.

+

This model can be further sped up and at the same time made more arithmetically stable by replacing the application of inverse-logit inside the Bernoulli distribution with the logit-parameterized Bernoulli,3

+
for (n in 1:N) {
+  y[n] ~ bernoulli_logit(x[n] * beta[ll[n]]);
+}
+

Unlike in R or BUGS, loops, array access and assignments are fast in Stan because they are translated directly to C++. In most cases, the cost of allocating and assigning to a container is more than made up for by the increased efficiency due to vectorizing the log probability and gradient calculations. Thus the following version is faster than the original formulation as a loop over a distribution statement.

+
{
+  vector[N] x_beta_ll;
+  for (n in 1:N) {
+    x_beta_ll[n] = x[n] * beta[ll[n]];
+  }
+  y ~ bernoulli_logit(x_beta_ll);
+}
+

The brackets introduce a new scope for the local variable x_beta_ll; alternatively, the variable may be declared at the top of the model block.

+

In some cases, such as the above, the local variable assignment leads to models that are less readable. The recommended practice in such cases is to first develop and debug the more transparent version of the model and only work on optimizations when the simpler formulation has been debugged.

+
+
+
+

Hierarchical priors

+

Priors on priors, also known as “hyperpriors,” should be treated the same way as priors on lower-level parameters in that as much prior information as is available should be brought to bear. Because hyperpriors often apply to only a handful of lower-level parameters, care must be taken to ensure the posterior is both proper and not overly sensitive either statistically or computationally to wide tails in the priors.

+
+

Boundary-avoiding priors for MLE in hierarchical models

+

The fundamental problem with maximum likelihood estimation (MLE) in the hierarchical model setting is that as the hierarchical variance drops and the values cluster around the hierarchical mean, the overall density grows without bound. As an illustration, consider a simple hierarchical linear regression (with fixed prior mean) of \(y_n \in +\mathbb{R}\) on \(x_n \in \mathbb{R}^K\), formulated as \[\begin{align*} +y_n & \sim \textsf{normal}(x_n \beta, \sigma) \\ +\beta_k & \sim \textsf{normal}(0,\tau) \\ +\tau & \sim \textsf{Cauchy}(0,2.5) \\ +\end{align*}\]

+

In this case, as \(\tau \rightarrow 0\) and \(\beta_k \rightarrow 0\), the posterior density \[ p(\beta,\tau,\sigma|y,x) \propto p(y|x,\beta,\tau,\sigma) \] grows without bound. See the Neal’s funnel density, which has similar behavior.

+

There is obviously no MLE estimate for \(\beta,\tau,\sigma\) in such a case, and therefore the model must be modified if posterior modes are to be used for inference. The approach recommended by Chung et al. (2013) is to use a gamma distribution as a prior, such as \[ +\sigma \sim \textsf{Gamma}(2, 1/A), +\] for a reasonably large value of \(A\), such as \(A = 10\).

+
+
+
+

Item-response theory models

+

Item-response theory (IRT) models the situation in which a number of students each answer one or more of a group of test questions. The model is based on parameters for the ability of the students, the difficulty of the questions, and in more articulated models, the discriminativeness of the questions and the probability of guessing correctly; see Gelman and Hill (2007, pps. 314–320) for a textbook introduction to hierarchical IRT models and Curtis (2010) for encodings of a range of IRT models in BUGS.

+
+

Data declaration with missingness

+

The data provided for an IRT model may be declared as follows to account for the fact that not every student is required to answer every question.

+
data {
+  int<lower=1> J;                     // number of students
+  int<lower=1> K;                     // number of questions
+  int<lower=1> N;                     // number of observations
+  array[N] int<lower=1, upper=J> jj;  // student for observation n
+  array[N] int<lower=1, upper=K> kk;  // question for observation n
+  array[N] int<lower=0, upper=1> y;   // correctness for observation n
+}
+

This declares a total of N student-question pairs in the data set, where each n in 1:N indexes a binary observation y[n] of the correctness of the answer of student jj[n] on question kk[n].

+

The prior hyperparameters will be hard coded in the rest of this section for simplicity, though they could be coded as data in Stan for more flexibility.

+
+
+

1PL (Rasch) model

+

The 1PL item-response model, also known as the Rasch model, has one parameter (1P) for questions and uses the logistic link function (L).

+

The model parameters are declared as follows.

+
parameters {
+  real delta;            // mean student ability
+  array[J] real alpha;   // ability of student j - mean ability
+  array[K] real beta;    // difficulty of question k
+}
+

The parameter alpha[J] is the ability coefficient for student j and beta[k] is the difficulty coefficient for question k. The non-standard parameterization used here also includes an intercept term delta, which represents the average student’s response to the average question.4

+

The model itself is as follows.

+
model {
+  alpha ~ std_normal();         // informative true prior
+  beta ~ std_normal();          // informative true prior
+  delta ~ normal(0.75, 1);      // informative true prior
+  for (n in 1:N) {
+    y[n] ~ bernoulli_logit(alpha[jj[n]] - beta[kk[n]] + delta);
+  }
+}
+

This model uses the logit-parameterized Bernoulli distribution, where \[ +\texttt{bernoulli}\mathtt{\_}\texttt{logit}\left(y \mid \alpha\right) += +\texttt{bernoulli}\left(y \mid \operatorname{logit}^{-1}(\alpha)\right). +\]

+

The key to understanding it is the term inside the bernoulli_logit distribution, from which it follows that \[ +\Pr[y_n = 1] = \operatorname{logit}^{-1}\left(\alpha_{jj[n]} - \beta_{kk[n]} ++ \delta\right). +\]

+

The model suffers from additive identifiability issues without the priors. For example, adding a term \(\xi\) to each \(\alpha_j\) and \(\beta_k\) results in the same predictions. The use of priors for \(\alpha\) and \(\beta\) located at 0 identifies the parameters; see Gelman and Hill (2007) for a discussion of identifiability issues and alternative approaches to identification.

+

For testing purposes, the IRT 1PL model distributed with Stan uses informative priors that match the actual data generation process used to simulate the data in R (the simulation code is supplied in the same directory as the models). This is unrealistic for most practical applications, but allows Stan’s inferences to be validated. A simple sensitivity analysis with fatter priors shows that the posterior is fairly sensitive to the prior even with 400 students and 100 questions and only 25% missingness at random. For real applications, the priors should be fit hierarchically along with the other parameters, as described in the next section.

+
+
+

Multilevel 2PL model

+

The simple 1PL model described in the previous section is generalized in this section with the addition of a discrimination parameter to model how noisy a question is and by adding multilevel priors for the question difficulty and discrimination parameters. The model parameters are declared as follows.

+
parameters {
+  real mu_beta;                // mean question difficulty
+  vector[J] alpha;             // ability for j - mean
+  vector[K] beta;              // difficulty for k
+  vector<lower=0>[K] gamma;    // discrimination of k
+  real<lower=0> sigma_beta;    // scale of difficulties
+  real<lower=0> sigma_gamma;   // scale of log discrimination
+}
+

The parameters should be clearer after the model definition.

+
model {
+  alpha ~ std_normal();
+  beta ~ normal(0, sigma_beta);
+  gamma ~ lognormal(0, sigma_gamma);
+  mu_beta ~ cauchy(0, 5);
+  sigma_beta ~ cauchy(0, 5);
+  sigma_gamma ~ cauchy(0, 5);
+  y ~ bernoulli_logit(gamma[kk] .* (alpha[jj] - (beta[kk] + mu_beta)));
+}
+

The std_normal function is used here, defined by \[ +\texttt{std}\mathtt{\_}\texttt{normal}(y) += +\textsf{normal}\left(y \mid 0, 1\right). +\]

+

The distribution statement is also vectorized using elementwise multiplication; it is equivalent to

+
for (n in 1:N) {
+  y[n] ~ bernoulli_logit(gamma[kk[n]]
+                         * (alpha[jj[n]] - (beta[kk[n]] + mu_beta));
+}
+

The 2PL model is similar to the 1PL model, with the additional parameter gamma[k] modeling how discriminative question k is. If gamma[k] is greater than 1, responses are more attenuated with less chance of getting a question right at random. The parameter gamma[k] is constrained to be positive, which prohibits there being questions that are easier for students of lesser ability; such questions are not unheard of, but they tend to be eliminated from most testing situations where an IRT model would be applied.

+

The model is parameterized here with student abilities alpha being given a standard normal prior. This is to identify both the scale and the location of the parameters, both of which would be unidentified otherwise; see the problematic posteriors chapter for further discussion of identifiability. The difficulty and discrimination parameters beta and gamma then have varying scales given hierarchically in this model. They could also be given weakly informative non-hierarchical priors, such as

+
beta ~ normal(0, 5);
+gamma ~ lognormal(0, 2);
+

The point is that the alpha determines the scale and location and beta and gamma are allowed to float.

+

The beta parameter is here given a non-centered parameterization, with parameter mu_beta serving as the mean beta location. An alternative would’ve been to take:

+
beta ~ normal(mu_beta, sigma_beta);
+

and

+
y[n] ~ bernoulli_logit(gamma[kk[n]] * (alpha[jj[n]] - beta[kk[n]]));
+

Non-centered parameterizations tend to be more efficient in hierarchical models; see the reparameterization section for more information on non-centered reparameterizations.

+

The intercept term mu_beta can’t itself be modeled hierarchically, so it is given a weakly informative \(\textsf{Cauchy}(0,5)\) prior. Similarly, the scale terms, sigma_beta, and sigma_gamma, are given half-Cauchy priors. As mentioned earlier, the scale and location for alpha are fixed to ensure identifiability. The truncation in the half-Cauchy prior is implicit; explicit truncation is not necessary because the log probability need only be calculated up to a proportion and the scale variables are constrained to \((0,\infty)\) by their declarations.

+
+
+
+

Priors for identifiability

+
+

Location and scale invariance

+

One application of (hierarchical) priors is to identify the scale and/or location of a group of parameters. For example, in the IRT models discussed in the previous section, there is both a location and scale non-identifiability. With uniform priors, the posteriors will float in terms of both scale and location. See the collinearity section for a simple example of the problems this poses for estimation.

+

The non-identifiability is resolved by providing a standard normal (i.e., \(\textsf{normal}(0,1)\)) prior on one group of coefficients, such as the student abilities. With a standard normal prior on the student abilities, the IRT model is identified in that the posterior will produce a group of estimates for student ability parameters that have a sample mean of close to zero and a sample variance of close to one. The difficulty and discrimination parameters for the questions should then be given a diffuse, or ideally a hierarchical prior, which will identify these parameters by scaling and locating relative to the student ability parameters.

+
+
+

Collinearity

+

Another case in which priors can help provide identifiability is in the case of collinearity in a linear regression. In linear regression, if two predictors are collinear (i.e, one is a linear function of the other), then their coefficients will have a correlation of 1 (or -1) in the posterior. This leads to non-identifiability. By placing normal priors on the coefficients, the maximum likelihood solution of two duplicated predictors (trivially collinear) will be half the value than would be obtained by only including one.

+
+
+

Separability

+

In a logistic regression, if a predictor is positive in cases of 1 outcomes and negative in cases of 0 outcomes, then the maximum likelihood estimate for the coefficient for that predictor diverges to infinity. This divergence can be controlled by providing a prior for the coefficient, which will “shrink” the estimate back toward zero and thus identify the model in the posterior.

+

Similar problems arise for sampling with improper flat priors. The sampler will try to draw large values. By providing a prior, the posterior will be concentrated around finite values, leading to well-behaved sampling.

+
+
+
+

Multivariate priors for hierarchical models

+

In hierarchical regression models (and other situations), several individual-level variables may be assigned hierarchical priors. For example, a model with multiple varying intercepts and slopes within might assign them a multivariate prior.

+

As an example, the individuals might be people and the outcome income, with predictors such as education level and age, and the groups might be states or other geographic divisions. The effect of education level and age as well as an intercept might be allowed to vary by state. Furthermore, there might be state-level predictors, such as average state income and unemployment level.

+
+

Multivariate regression example

+

Gelman and Hill (2007, chap. 13, Chapter 17) provide a discussion of a hierarchical model with \(N\) individuals organized into \(J\) groups. Each individual has a predictor row vector \(x_n\) of size \(K\); to unify the notation, they assume that \(x_{n,1} = 1\) is a fixed “intercept” predictor. To encode group membership, they assume individual \(n\) belongs to group \(jj[n] \in \{ 1, \dotsc, J \}\). Each individual \(n\) also has an observed outcome \(y_n\) taking on real values.

+
+

Data model

+

The model is a linear regression with slope and intercept coefficients varying by group, so that \(\beta_j\) is the coefficient \(K\)-vector for group \(j\). The data model for individual \(n\) is then just \[ +y_n \sim \textsf{normal}(x_n \, \beta_{jj[n]}, \, \sigma) +\quad\text{for}\quad n \in \{ 1, \dotsc, N \}. +\]

+
+
+

Coefficient prior

+

Gelman and Hill model the coefficient vectors \(\beta_j\) as being drawn from a multivariate distribution with mean vector \(\mu\) and covariance matrix \(\Sigma\), \[ +\beta_j \sim \textsf{multivariate normal}(\mu_j, \, \Sigma) +\quad\text{for}\quad j \in \{ 1, \dotsc, J \}. +\]

+

Below, we discuss the full model of Gelman and Hill, which uses group-level predictors to model \(\mu\); for now, we assume \(\mu\) is a simple vector parameter.

+
+
+

Hyperpriors

+

For hierarchical modeling, the group-level mean vector \(\mu\) and covariance matrix \(\Sigma\) must themselves be given priors. The group-level mean vector can be given a reasonable weakly-informative prior for independent coefficients, such as \[ +\mu_j \sim \textsf{normal}(0,5). +\] If more is known about the expected coefficient values \(\beta_{j, k}\), this information can be incorporated into the prior for \(\mu_j\).

+

For the prior on the covariance matrix, Gelman and Hill suggest using a scaled inverse Wishart. That choice was motivated primarily by convenience as it is conjugate to the multivariate likelihood function and thus simplifies Gibbs sampling

+

In Stan, there is no restriction to conjugacy for multivariate priors, and we in fact recommend a slightly different approach. Like Gelman and Hill, we decompose our prior into a scale and a matrix, but are able to do so in a more natural way based on the actual variable scales and a correlation matrix. Specifically, we define \[ +\Sigma = \texttt{diag}\mathtt{\_}\texttt{matrix}(\tau) \times \Omega \times \texttt{diag}\mathtt{\_}\texttt{matrix}(\tau), +\] where \(\Omega\) is a correlation matrix and \(\tau\) is the vector of coefficient scales. This mapping from scale vector \(\tau\) and correlation matrix \(\Omega\) can be inverted, using \[ +\tau_k = \sqrt{\Sigma_{k,k}} +\quad\textsf{and}\quad +\Omega_{i, j} = \frac{\Sigma_{i, j}}{\tau_i \, \tau_j}. +\]

+

The components of the scale vector \(\tau\) can be given any reasonable prior for scales, but we recommend something weakly informative like a half-Cauchy distribution with a small scale, such as \[ +\tau_k \sim \textsf{Cauchy}(0, 2.5) +\quad\text{for}\quad k \in \{ 1, \dotsc, K \} +\quad\text{constrained\ by}\quad \tau_k > 0. +\] As for the prior means, if there is information about the scale of variation of coefficients across groups, it should be incorporated into the prior for \(\tau\). For large numbers of exchangeable coefficients, the components of \(\tau\) itself (perhaps excluding the intercept) may themselves be given a hierarchical prior.

+

Our final recommendation is to give the correlation matrix \(\Omega\) an LKJ prior with shape \(\eta \geq 1\),5

+

\[ +\Omega \sim \textsf{LKJCorr}(\eta). +\]

+

The LKJ correlation distribution is defined by \[ +\textsf{LKJCorr}\left(\Sigma \mid \eta\right) +\propto +\operatorname{det}\left(\Sigma\right)^{\eta - 1}. +\]

+

The basic behavior of the LKJ correlation distribution is similar to that of a beta distribution. For \(\eta = 1\), the result is a uniform distribution. Despite being the identity over correlation matrices, the marginal distribution over the entries in that matrix (i.e., the correlations) is not uniform between -1 and 1. Rather, it concentrates around zero as the dimensionality increases due to the complex constraints.

+

For \(\eta > 1\), the density increasingly concentrates mass around the unit matrix, i.e., favoring less correlation. For \(\eta < 1\), it increasingly concentrates mass in the other direction, i.e., favoring more correlation.

+

The LKJ prior may thus be used to control the expected amount of correlation among the parameters \(\beta_j\). For a discussion of decomposing a covariance prior into a prior on correlation matrices and an independent prior on scales, see Barnard, McCulloch, and Meng (2000).

+
+
+

Group-level predictors for prior mean

+

To complete Gelman and Hill’s model, suppose each group \(j \in \{ 1, \dotsc, J \}\) is supplied with an \(L\)-dimensional row-vector of group-level predictors \(u_j\). The prior mean for the \(\beta_j\) can then itself be modeled as a regression, using an \(L\)-dimensional coefficient vector \(\gamma\). The prior for the group-level coefficients then becomes \[ +\beta_j \sim \textsf{multivariate normal}(u_j \, \gamma, \Sigma) +\]

+

The group-level coefficients \(\gamma\) may themselves be given independent weakly informative priors, such as \[ +\gamma_l \sim \textsf{normal}(0,5). +\] As usual, information about the group-level means should be incorporated into this prior.

+
+
+

Coding the model in Stan

+

The Stan code for the full hierarchical model with multivariate priors on the group-level coefficients and group-level prior means follows its definition.

+
data {
+  int<lower=0> N;              // num individuals
+  int<lower=1> K;              // num ind predictors
+  int<lower=1> J;              // num groups
+  int<lower=1> L;              // num group predictors
+  array[N] int<lower=1, upper=J> jj;  // group for individual
+  matrix[N, K] x;              // individual predictors
+  array[J] row_vector[L] u;    // group predictors
+  vector[N] y;                 // outcomes
+}
+parameters {
+  corr_matrix[K] Omega;        // prior correlation
+  vector<lower=0>[K] tau;      // prior scale
+  matrix[L, K] gamma;          // group coeffs
+  array[J] vector[K] beta;     // indiv coeffs by group
+  real<lower=0> sigma;         // prediction error scale
+}
+model {
+  tau ~ cauchy(0, 2.5);
+  Omega ~ lkj_corr(2);
+  to_vector(gamma) ~ normal(0, 5);
+  {
+    array[J] row_vector[K] u_gamma;
+    for (j in 1:J) {
+      u_gamma[j] = u[j] * gamma;
+    }
+    beta ~ multi_normal(u_gamma, quad_form_diag(Omega, tau));
+  }
+  for (n in 1:N) {
+    y[n] ~ normal(x[n] * beta[jj[n]], sigma);
+  }
+}
+

The hyperprior covariance matrix is defined implicitly through the quadratic form in the code because the correlation matrix Omega and scale vector tau are more natural to inspect in the output; to output Sigma, define it as a transformed parameter. The function quad_form_diag is defined so that quad_form_diag(Sigma, tau) is equivalent to diag_matrix(tau) * Sigma * diag_matrix(tau), where diag_matrix(tau) returns the matrix with tau on the diagonal and zeroes off diagonal; the version using quad_form_diag should be faster. For details on these and other matrix arithmetic operators and functions, see the function reference manual.

+
+
+

Optimization through vectorization

+

The code in the Stan program above can be sped up dramatically by replacing the the distribution statement inside the for loop:

+
for (n in 1:N) {
+  y[n] ~ normal(x[n] * beta[jj[n]], sigma);
+}
+

with the vectorized distribution statement:

+
{
+  vector[N] x_beta_jj;
+  for (n in 1:N) {
+    x_beta_jj[n] = x[n] * beta[jj[n]];
+  }
+  y ~ normal(x_beta_jj, sigma);
+}
+

The outer brackets create a local scope in which to define the variable x_beta_jj, which is then filled in a loop and used to define a vectorized distribution statement. The reason this is such a big win is that it allows us to take the log of sigma only once and it greatly reduces the size of the resulting expression graph by packing all of the work into a single distribution function.

+

Although it is tempting to redeclare beta and include a revised model block distribution statement,

+
parameters {
+  matrix[J, K] beta;
+// ...
+}
+model {
+  y ~ normal(rows_dot_product(x, beta[jj]), sigma);
+  // ...
+}
+

this fails because it breaks the vectorization for beta,6

+
beta ~ multi_normal(...);
+

which requires beta to be an array of vectors. Both vectorizations are important, so the best solution is to just use the loop above, because rows_dot_product cannot do much optimization in and of itself because there are no shared computations.

+

The code in the Stan program above also builds up an array of vectors for the outcomes and for the multivariate normal, which provides a major speedup by reducing the number of linear systems that need to be solved and differentiated.

+
{
+  matrix[K, K] Sigma_beta;
+  Sigma_beta = quad_form_diag(Omega, tau);
+  for (j in 1:J) {
+    beta[j] ~ multi_normal((u[j] * gamma)', Sigma_beta);
+  }
+}
+

In this example, the covariance matrix Sigma_beta is defined as a local variable so as not to have to repeat the quadratic form computation \(J\) times. This vectorization can be combined with the Cholesky-factor optimization in the next section.

+
+
+

Optimization through Cholesky factorization

+

The multivariate normal density and LKJ prior on correlation matrices both require their matrix parameters to be factored. Vectorizing, as in the previous section, ensures this is only done once for each density. An even better solution, both in terms of efficiency and numerical stability, is to parameterize the model directly in terms of Cholesky factors of correlation matrices using the multivariate version of the non-centered parameterization. For the model in the previous section, the program fragment to replace the full matrix prior with an equivalent Cholesky factorized prior is as follows.

+
data {
+  matrix[L, J] u;              // group predictors transposed
+  // ...
+}
+parameters {
+  matrix[K, J] z;
+  cholesky_factor_corr[K] L_Omega;
+  matrix[K, L] gamma;
+  // ...
+}
+transformed parameters {
+  matrix[K, J] beta;
+  beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;
+}
+model {
+  to_vector(z) ~ std_normal();
+  L_Omega ~ lkj_corr_cholesky(2);
+  // ...
+}
+

The data variable u was originally an array of vectors, which is efficient for access; here it is redeclared as a matrix in order to use it in matrix arithmetic. Moreover, it is transposed, along with gamma and beta, to minimize the number of transposition operations. The new parameter L_Omega is the Cholesky factor of the original correlation matrix Omega, so that

+
Omega = L_Omega * L_Omega'
+

The prior scale vector tau is unchanged, and furthermore, pre-multiplying the Cholesky factor by the scale produces the Cholesky factor of the final covariance matrix,

+
Sigma_beta
+  = quad_form_diag(Omega, tau)
+  = diag_pre_multiply(tau, L_Omega) * diag_pre_multiply(tau, L_Omega)'
+

where the diagonal pre-multiply compound operation is defined by

+
diag_pre_multiply(a, b) = diag_matrix(a) * b
+

The new variable z is declared as a matrix, the entries of which are given independent standard normal priors; the to_vector operation turns the matrix into a vector so that it can be used as a vectorized argument to the univariate normal density. This results in every column of z being a \(K\)-variate normal random vector with the identity as covariance matrix. Therefore, multiplying z by the Cholesky factor of the covariance matrix and adding the mean (u * gamma)' produces a beta distributed as in the original model, where the variance is, letting \(L = \mathrm{diag}(\tau)\,\Omega_L\),

+

\[ +\begin{aligned} +\mathbb{V}[\beta] &= \mathbb{E}\big((L \, z)(L \, z)^\top) \\ +&= \mathbb{E}\big((L \, z \, z^\top \, L^\top) \\ +&= L \, \mathbb{E}(z \, z^\top) \, L^\top \\ +&= L \, L^\top =(\mathrm{diag}(\tau)\,\Omega_L)\,(\mathrm{diag}(\tau)\,\Omega_L)^\top \\ +&= \mathrm{diag}(\tau)\,\Omega\,\mathrm{diag}(\tau) \\ +&= \Sigma. +\end{aligned} +\] Where we have used the linearity of expectations (line 2 to 3), the definition of \(\Omega = \Omega_L \, \Omega_L^\top\), and the fact that \(\mathbb{E}(z \, z^\top) = I\) since \(z \sim \mathcal{N}(0, I)\).

+

Omitting the remaining data declarations, which are the same as before with the exception of u, the optimized model is as follows.

+
parameters {
+  matrix[K, J] z;
+  cholesky_factor_corr[K] L_Omega;
+  vector<lower=0, upper=pi() / 2>[K] tau_unif;  // prior scale
+  matrix[K, L] gamma;                        // group coeffs
+  real<lower=0> sigma;                       // prediction error scale
+}
+transformed parameters {
+  vector<lower=0>[K] tau = 2.5 * tan(tau_unif);
+  matrix[K, J] beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;
+}
+model {
+  vector[N] mu;
+  for(n in 1:N) {
+    mu[n] = x[n, ] * beta[, jj[n]];
+  }
+  to_vector(z) ~ std_normal();
+  L_Omega ~ lkj_corr_cholesky(2);
+  to_vector(gamma) ~ normal(0, 5);
+  y ~ normal(mu, sigma);
+}
+

This model also reparameterizes the prior scale tau to avoid potential problems with the heavy tails of the Cauchy distribution. The statement tau_unif ~ uniform(0, pi() / 2) can be omitted from the model block because Stan increments the log posterior for parameters with uniform priors without it.

+
+
+
+
+

Prediction, forecasting, and backcasting

+

Stan models can be used for “predicting” the values of arbitrary model unknowns. When predictions are about the future, they’re called “forecasts;” when they are predictions about the past, as in climate reconstruction or cosmology, they are sometimes called “backcasts” (or “aftcasts” or “hindcasts” or “antecasts,” depending on the author’s feelings about the opposite of “fore”).

+
+

Programming predictions

+

As a simple example, the following linear regression provides the same setup for estimating the coefficients beta as in our very first example, using y for the N observations and x for the N predictor vectors. The model parameters and model for observations are exactly the same as before.

+

To make predictions, we need to be given the number of predictions, N_new, and their predictor matrix, x_new. The predictions themselves are modeled as a parameter y_new. The model statement for the predictions is exactly the same as for the observations, with the new outcome vector y_new and prediction matrix x_new.

+
data {
+  int<lower=1> K;
+  int<lower=0> N;
+  matrix[N, K] x;
+  vector[N] y;
+
+  int<lower=0> N_new;
+  matrix[N_new, K] x_new;
+}
+parameters {
+  vector[K] beta;
+  real<lower=0> sigma;
+
+  vector[N_new] y_new;                  // predictions
+}
+model {
+  y ~ normal(x * beta, sigma);          // observed model
+
+  y_new ~ normal(x_new * beta, sigma);  // prediction model
+}
+
+
+

Predictions as generated quantities

+

Where possible, the most efficient way to generate predictions is to use the generated quantities block. This provides proper Monte Carlo (not Markov chain Monte Carlo) inference, which can have a much higher effective sample size per iteration.

+
// ...data as above...
+
+parameters {
+  vector[K] beta;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(x * beta, sigma);
+}
+generated quantities {
+  vector[N_new] y_new;
+  for (n in 1:N_new) {
+    y_new[n] = normal_rng(x_new[n] * beta, sigma);
+  }
+}
+

Now the data are just as before, but the parameter y_new is now declared as a generated quantity, and the prediction model is removed from the model and replaced by a pseudo-random draw from a normal distribution.

+
+

Overflow in generated quantities

+

It is possible for values to overflow or underflow in generated quantities. The problem is that if the result is NaN, then any constraints placed on the variables will be violated. It is possible to check a value assigned by an RNG and reject it if it overflows, but this is both inefficient and leads to biased posterior estimates. Instead, the conditions causing overflow, such as trying to generate a negative binomial random variate with a mean of \(2^{31}\), must be intercepted and dealt with. This is typically done by reparameterizing or reimplementing the random number generator using real values rather than integers, which are upper-bounded by \(2^{31} - 1\) in Stan.

+
+
+
+
+

Multivariate outcomes

+

Most regressions are set up to model univariate observations (be they scalar, boolean, categorical, ordinal, or count). Even multinomial regressions are just repeated categorical regressions. In contrast, this section discusses regression when each observed value is multivariate. To relate multiple outcomes in a regression setting, their error terms are provided with covariance structure.

+

This section considers two cases, seemingly unrelated regressions for continuous multivariate quantities and multivariate probit regression for boolean multivariate quantities.

+
+

Seemingly unrelated regressions

+

The first model considered is the “seemingly unrelated” regressions (SUR) of econometrics where several linear regressions share predictors and use a covariance error structure rather than independent errors (Zellner 1962; Greene 2011).

+

The model is easy to write down as a regression, \[\begin{align*} +y_n &= x_n \, \beta + \epsilon_n \\ +\epsilon_n &\sim \textsf{multivariate normal}(0, \Sigma) +\end{align*}\]

+

where \(x_n\) is a \(J\)-row-vector of predictors (\(x\) is an \((N \times +J)\) matrix), \(y_n\) is a \(K\)-vector of observations, \(\beta\) is a \((K +\times J)\) matrix of regression coefficients (vector \(\beta_k\) holds coefficients for outcome \(k\)), and \(\Sigma\) is covariance matrix governing the error. As usual, the intercept can be rolled into \(x\) as a column of ones.

+

The basic Stan code is straightforward (though see below for more optimized code for use with LKJ priors on correlation).

+
data {
+  int<lower=1> K;
+  int<lower=1> J;
+  int<lower=0> N;
+  array[N] vector[J] x;
+  array[N] vector[K] y;
+}
+parameters {
+  matrix[K, J] beta;
+  cov_matrix[K] Sigma;
+}
+model {
+  array[N] vector[K] mu;
+  for (n in 1:N) {
+    mu[n] = beta * x[n];
+  }
+  y ~ multi_normal(mu, Sigma);
+}
+

For efficiency, the multivariate normal is vectorized by precomputing the array of mean vectors and sharing the same covariance matrix.

+

Following the advice in the multivariate hierarchical priors section, we will place a weakly informative normal prior on the regression coefficients, an LKJ prior on the correlations and a half-Cauchy prior on standard deviations. The covariance structure is parameterized in terms of Cholesky factors for efficiency and arithmetic stability.

+
// ...
+parameters {
+  matrix[K, J] beta;
+  cholesky_factor_corr[K] L_Omega;
+  vector<lower=0>[K] L_sigma;
+}
+model {
+  array[N] vector[K] mu;
+  matrix[K, K] L_Sigma;
+
+  for (n in 1:N) {
+    mu[n] = beta * x[n];
+
+  }
+
+  L_Sigma = diag_pre_multiply(L_sigma, L_Omega);
+
+  to_vector(beta) ~ normal(0, 5);
+  L_Omega ~ lkj_corr_cholesky(4);
+  L_sigma ~ cauchy(0, 2.5);
+
+  y ~ multi_normal_cholesky(mu, L_Sigma);
+}
+

The Cholesky factor of the covariance matrix is then reconstructed as a local variable and used in the model by scaling the Cholesky factor of the correlation matrices. The regression coefficients get a prior all at once by converting the matrix beta to a vector.

+

If required, the full correlation or covariance matrices may be reconstructed from their Cholesky factors in the generated quantities block.

+
+
+

Multivariate probit regression

+

The multivariate probit model generates sequences of boolean variables by applying a step function to the output of a seemingly unrelated regression.

+

The observations \(y_n\) are \(D\)-vectors of boolean values (coded 0 for false, 1 for true). The values for the observations \(y_n\) are based on latent values \(z_n\) drawn from a seemingly unrelated regression model (see the previous section), \[\begin{align*} +z_n &= x_n \, \beta + \epsilon_n \\ +\epsilon_n &\sim \textsf{multivariate normal}(0, \Sigma) +\end{align*}\]

+

These are then put through the step function to produce a \(K\)-vector \(z_n\) of boolean values with elements defined by \[ +y_{n, k} = \operatorname{I}\left(z_{n, k} > 0\right), +\] where \(\operatorname{I}()\) is the indicator function taking the value 1 if its argument is true and 0 otherwise.

+

Unlike in the seemingly unrelated regressions case, here the covariance matrix \(\Sigma\) has unit standard deviations (i.e., it is a correlation matrix). As with ordinary probit and logistic regressions, letting the scale vary causes the model (which is defined only by a cutpoint at 0, not a scale) to be unidentified (see Greene (2011)).

+

Multivariate probit regression can be coded in Stan using the trick introduced by Albert and Chib (1993), where the underlying continuous value vectors \(y_n\) are coded as truncated parameters. The key to coding the model in Stan is declaring the latent vector \(z\) in two parts, based on whether the corresponding value of \(y\) is 0 or 1. Otherwise, the model is identical to the seemingly unrelated regression model in the previous section.

+

First, we introduce a sum function for two-dimensional arrays of integers; this is going to help us calculate how many total 1 values there are in \(y\).

+
functions {
+  int sum2d(array[,] int a) {
+    int s = 0;
+    for (i in 1:size(a)) {
+      s += sum(a[i]);
+    }
+    return s;
+  }
+}
+

The function is trivial, but it’s not a built-in for Stan and it’s easier to understand the rest of the model if it’s pulled into its own function so as not to create a distraction.

+

The data declaration block is much like for the seemingly unrelated regressions, but the observations y are now integers constrained to be 0 or 1.

+
data {
+  int<lower=1> K;
+  int<lower=1> D;
+  int<lower=0> N;
+  array[N, D] int<lower=0, upper=1> y;
+  array[N] vector[K] x;
+}
+

After declaring the data, there is a rather involved transformed data block whose sole purpose is to sort the data array y into positive and negative components, keeping track of indexes so that z can be easily reassembled in the transformed parameters block.

+
transformed data {
+  int<lower=0> N_pos;
+  array[sum2d(y)] int<lower=1, upper=N> n_pos;
+  array[size(n_pos)] int<lower=1, upper=D> d_pos;
+  int<lower=0> N_neg;
+  array[(N * D) - size(n_pos)] int<lower=1, upper=N> n_neg;
+  array[size(n_neg)] int<lower=1, upper=D> d_neg;
+
+  N_pos = size(n_pos);
+  N_neg = size(n_neg);
+  {
+    int i;
+    int j;
+    i = 1;
+    j = 1;
+    for (n in 1:N) {
+      for (d in 1:D) {
+        if (y[n, d] == 1) {
+          n_pos[i] = n;
+          d_pos[i] = d;
+          i += 1;
+        } else {
+          n_neg[j] = n;
+          d_neg[j] = d;
+          j += 1;
+        }
+      }
+    }
+  }
+}
+

The variables N_pos and N_neg are set to the number of true (1) and number of false (0) observations in y. The loop then fills in the sequence of indexes for the positive and negative values in four arrays.

+

The parameters are declared as follows.

+
parameters {
+  matrix[D, K] beta;
+  cholesky_factor_corr[D] L_Omega;
+  vector<lower=0>[N_pos] z_pos;
+  vector<upper=0>[N_neg] z_neg;
+}
+

These include the regression coefficients beta and the Cholesky factor of the correlation matrix, L_Omega. This time there is no scaling because the covariance matrix has unit scale (i.e., it is a correlation matrix; see above).

+

The critical part of the parameter declaration is that the latent real value \(z\) is broken into positive-constrained and negative-constrained components, whose size was conveniently calculated in the transformed data block. The transformed data block’s real work was to allow the transformed parameter block to reconstruct \(z\).

+
transformed parameters {
+  array[N] vector[D] z;
+  for (n in 1:N_pos) {
+    z[n_pos[n], d_pos[n]] = z_pos[n];
+  }
+  for (n in 1:N_neg) {
+    z[n_neg[n], d_neg[n]] = z_neg[n];
+  }
+}
+

At this point, the model is simple, pretty much recreating the seemingly unrelated regression.

+
model {
+  L_Omega ~ lkj_corr_cholesky(4);
+  to_vector(beta) ~ normal(0, 5);
+  {
+    array[N] vector[D] beta_x;
+    for (n in 1:N) {
+      beta_x[n] = beta * x[n];
+    }
+    z ~ multi_normal_cholesky(beta_x, L_Omega);
+  }
+}
+

This simple form of model is made possible by the Albert and Chib-style constraints on z.

+

Finally, the correlation matrix itself can be put back together in the generated quantities block if desired.

+
generated quantities {
+  corr_matrix[D] Omega;
+  Omega = multiply_lower_tri_self_transpose(L_Omega);
+}
+

The same could be done for the seemingly unrelated regressions in the previous section.

+
+
+
+

Applications of pseudorandom number generation

+

The main application of pseudorandom number generator (PRNGs) is for posterior inference, including prediction and posterior predictive checks. They can also be used for pure data simulation, which is like a posterior predictive check with no conditioning. See the function reference manual for a complete description of the syntax and usage of pseudorandom number generators.

+
+

Prediction

+

Consider predicting unobserved outcomes using linear regression. Given predictors \(x_1, \dotsc, x_N\) and observed outcomes \(y_1, \dotsc, y_N\), and assuming a standard linear regression with intercept \(\alpha\), slope \(\beta\), and error scale \(\sigma\), along with improper uniform priors, the posterior over the parameters given \(x\) and \(y\) is \[ +p\left(\alpha, \beta, \sigma \mid x, y \right) +\propto +\prod_{n=1}^N + \textsf{normal}\left(y_n \mid \alpha + \beta x_n, \sigma\right). +\]

+

For this model, the posterior predictive inference for a new outcome \(\tilde{y}_m\) given a predictor \(\tilde{x}_m\), conditioned on the observed data \(x\) and \(y\), is \[ +p\left(\tilde{y}_n \mid \tilde{x}_n, x, y\right) += \int_{(\alpha,\beta,\sigma)} + \textsf{normal}\left(\tilde{y}_n \mid \alpha + \beta \tilde{x}_n, \sigma\right) + \times + p\left(\alpha, \beta, \sigma \mid x, y\right) + \,\textrm{d}(\alpha,\beta,\sigma). +\]

+

To code the posterior predictive inference in Stan, a standard linear regression is combined with a random number in the generated quantities block.

+
data {
+  int<lower=0> N;
+  vector[N] y;
+  vector[N] x;
+  int<lower=0> N_tilde;
+  vector[N_tilde] x_tilde;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(alpha + beta * x, sigma);
+}
+generated quantities {
+  vector[N_tilde] y_tilde;
+  for (n in 1:N_tilde) {
+    y_tilde[n] = normal_rng(alpha + beta * x_tilde[n], sigma);
+  }
+}
+

Given observed predictors \(x\) and outcomes \(y\), y_tilde will be drawn according to \(p\left(\tilde{y} \mid \tilde{x}, y, x\right)\). This means that, for example, the posterior mean for y_tilde is the estimate of the outcome that minimizes expected square error (conditioned on the data and model).

+
+
+

Posterior predictive checks

+

A good way to investigate the fit of a model to the data, a critical step in Bayesian data analysis, is to generate simulated data according to the parameters of the model. This is carried out with exactly the same procedure as before, only the observed data predictors \(x\) are used in place of new predictors \(\tilde{x}\) for unobserved outcomes. If the model fits the data well, the predictions for \(\tilde{y}\) based on \(x\) should match the observed data \(y\).

+

To code posterior predictive checks in Stan requires only a slight modification of the prediction code to use \(x\) and \(N\) in place of \(\tilde{x}\) and \(\tilde{N}\),

+
generated quantities {
+  vector[N] y_tilde;
+  for (n in 1:N) {
+    y_tilde[n] = normal_rng(alpha + beta * x[n], sigma);
+  }
+}
+

Gelman et al. (2013) recommend choosing several posterior draws \(\tilde{y}^{(1)}, \dotsc, \tilde{y}^{(M)}\) and plotting each of them alongside the data \(y\) that was actually observed. If the model fits well, the simulated \(\tilde{y}\) will look like the actual data \(y\).

+ + + +
+
+
+ + + Back to top

References

+
+Albert, J. H., and S. Chib. 1993. “Bayesian Analysis of Binary and Polychotomous Response Data.” Journal of the American Statistical Association 88: 669–79. +
+
+Barnard, John, Robert McCulloch, and Xiao-Li Meng. 2000. “Modeling Covariance Matrices in Terms of Standard Deviations and Correlations, with Application to Shrinkage.” Statistica Sinica, 1281–311. +
+
+Chung, Yeojin, Sophia Rabe-Hesketh, Vincent Dorie, Andrew Gelman, and Jingchen Liu. 2013. “A Nondegenerate Penalized Likelihood Estimator for Variance Parameters in Multilevel Models.” Psychometrika 78 (4): 685–709. +
+
+Curtis, S. McKay. 2010. BUGS Code for Item Response Theory.” Journal of Statistical Software 36 (1): 1–34. +
+
+Gelman, Andrew, J. B. Carlin, Hal S. Stern, David B. Dunson, Aki Vehtari, and Donald B. Rubin. 2013. Bayesian Data Analysis. Third Edition. London: Chapman & Hall / CRC Press. +
+
+Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge, United Kingdom: Cambridge University Press. +
+
+Greene, William H. 2011. Econometric Analysis. 7th ed. Prentice-Hall. +
+
+Lewandowski, Daniel, Dorota Kurowicka, and Harry Joe. 2009. “Generating Random Correlation Matrices Based on Vines and Extended Onion Method.” Journal of Multivariate Analysis 100: 1989–2001. +
+
+Seyboldt, Adrian. 2024. “Add ZeroSumNormal Distribution.” https://github.com/pyro-ppl/numpyro/pull/1751#issuecomment-1980569811. +
+
+Zellner, Arnold. 1962. “An Efficient Method of Estimating Seemingly Unrelated Regression Equations and Tests for Aggregation Bias.” Journal of the American Statistical Association 57: 348–68. +
+

Footnotes

+ +
    +
  1. Unlike in Python and R, which are interpreted, Stan is translated to C++ and compiled, so loops and assignment statements are fast. Vectorized code is faster in Stan because (a) the expression tree used to compute derivatives can be simplified, leading to fewer virtual function calls, and (b) computations that would be repeated in the looping version, such as log(sigma) in the above model, will be computed once and reused.↩︎

  2. +
  3. The Phi_approx function is a rescaled version of the inverse logit function, so while the scale is roughly the same \(\Phi\), the tails do not match.↩︎

  4. +
  5. The Bernoulli-logit distribution builds in the log link function, taking \[\texttt{bernoulli}\mathtt{\_}\texttt{logit}\left(y \mid \alpha\right) = \texttt{bernoulli}\left(y \mid \operatorname{logit}^{-1}(\alpha)\right).\]↩︎

  6. +
  7. Gelman and Hill (2007) treat the \(\delta\) term equivalently as the location parameter in the distribution of student abilities.↩︎

  8. +
  9. The prior is named for Lewandowski, Kurowicka, and Joe, as it was derived by inverting the random correlation matrix generation strategy of Lewandowski, Kurowicka, and Joe (2009).↩︎

  10. +
  11. Thanks to Mike Lawrence for pointing this out in the GitHub issue for the manual.↩︎

  12. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/reparameterization.html b/docs/2_39/stan-users-guide/reparameterization.html new file mode 100644 index 000000000..f88283870 --- /dev/null +++ b/docs/2_39/stan-users-guide/reparameterization.html @@ -0,0 +1,1629 @@ + + + + + + + + + +Reparameterization and Change of Variables + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Reparameterization and Change of Variables

+

Stan supports a direct encoding of reparameterizations. Stan also supports changes of variables by directly incrementing the log probability accumulator with the log Jacobian of the transform.

+
+

Theoretical and practical background

+

A Bayesian posterior is technically a probability measure, which is a parameterization-invariant, abstract mathematical object.1

+

Stan’s modeling language, on the other hand, defines a probability density, which is a non-unique, parameterization-dependent function in \(\mathbb{R}^N \rightarrow \mathbb{R}^{+}\). In practice, this means a given model can be represented different ways in Stan, and different representations have different computational performances.

+

As pointed out by Gelman (2004) in a paper discussing the relation between parameterizations and Bayesian modeling, a change of parameterization often carries with it suggestions of how the model might change, because we tend to use certain natural classes of prior distributions. Thus, it’s not just that we have a fixed distribution that we want to sample from, with reparameterizations being computational aids. In addition, once we reparameterize and add prior information, the model itself typically changes, often in useful ways.

+
+
+

Reparameterizations

+

Reparameterizations may be implemented directly using the transformed parameters block or just in the model block.

+
+

Beta and Dirichlet priors

+

The beta and Dirichlet distributions may both be reparameterized from a vector of counts to use a mean and total count.

+
+

Beta distribution

+

For example, the Beta distribution is parameterized by two positive count parameters \(\alpha, \beta > 0\). The following example illustrates a hierarchical Stan model with a vector of parameters theta are drawn i.i.d. for a Beta distribution whose parameters are themselves drawn from a hyperprior distribution.

+
parameters {
+  real<lower=0> alpha;
+  real<lower=0> beta;
+  // ...
+}
+model {
+  alpha ~ ...
+  beta ~ ...
+  for (n in 1:N) {
+    theta[n] ~ beta(alpha, beta);
+  }
+  // ...
+}
+

It is often more natural to specify hyperpriors in terms of transformed parameters. In the case of the Beta, the obvious choice for reparameterization is in terms of a mean parameter \[ +\phi = \alpha / (\alpha + \beta) +\] and total count parameter \[ +\lambda = \alpha + \beta. +\] Following @[GelmanEtAl:2013, Chapter 5] the mean gets a uniform prior and the count parameter a Pareto prior with \(p(\lambda) \propto \lambda^{-2.5}\).

+
parameters {
+  real<lower=0, upper=1> phi;
+  real<lower=0.1> lambda;
+  // ...
+}
+transformed parameters {
+  real<lower=0> alpha = lambda * phi;
+  real<lower=0> beta = lambda * (1 - phi);
+  // ...
+}
+model {
+  phi ~ beta(1, 1); // uniform on phi, could drop
+  lambda ~ pareto(0.1, 1.5);
+  for (n in 1:N) {
+    theta[n] ~ beta(alpha, beta);
+  }
+  // ...
+}
+

The new parameters, phi and lambda, are declared in the parameters block and the parameters for the Beta distribution, alpha and beta, are declared and defined in the transformed parameters block. And if their values are not of interest, they could instead be defined as local variables in the model as follows.

+
model {
+  real alpha = lambda * phi;
+  real beta = lambda * (1 - phi);
+  // ...
+  for (n in 1:N) {
+    theta[n] ~ beta(alpha, beta);
+  }
+  // ...
+}
+

With vectorization, this could be expressed more compactly and efficiently as follows.

+
model {
+  theta ~ beta(lambda * phi, lambda * (1 - phi));
+  // ...
+}
+

If the variables alpha and beta are of interest, they can be defined in the transformed parameter block and then used in the model.

+
+
+

Jacobians not necessary

+

Because the transformed parameters are being used, rather than given a distribution, there is no need to apply a Jacobian adjustment for the transform. For example, in the beta distribution example, alpha and beta have the correct posterior distribution.

+
+
+

Dirichlet priors

+

The same thing can be done with a Dirichlet, replacing the mean for the Beta, which is a probability value, with a simplex. Assume there are \(K > 0\) dimensions being considered (\(K=1\) is trivial and \(K=2\) reduces to the beta distribution case). The traditional prior is

+
parameters {
+  vector[K] alpha;
+  array[N] simplex[K] theta;
+  // ...
+}
+model {
+  alpha ~ // ...
+  for (n in 1:N) {
+    theta[n] ~ dirichlet(alpha);
+  }
+}
+

This provides essentially \(K\) degrees of freedom, one for each dimension of alpha, and it is not obvious how to specify a reasonable prior for alpha.

+

An alternative coding is to use the mean, which is a simplex, and a total count.

+
parameters {
+  simplex[K] phi;
+  real<lower=0> kappa;
+  array[N] simplex[K] theta;
+  // ...
+}
+transformed parameters {
+  vector[K] alpha = kappa * phi;
+  // ...
+}
+model {
+  phi ~ // ...
+  kappa ~ // ...
+  for (n in 1:N) {
+    theta[n] ~ dirichlet(alpha);
+  }
+  // ...
+}
+

Now it is much easier to formulate priors, because phi is the expected value of theta and kappa (minus K) is the strength of the prior mean measured in number of prior observations.

+
+
+
+

Transforming unconstrained priors: probit and logit

+

If the variable \(u\) has a \(\textsf{uniform}(0, 1)\) distribution, then \(\operatorname{logit}(u)\) is distributed as \(\textsf{logistic}(0, 1)\). This is because inverse logit is the cumulative distribution function (cdf) for the logistic distribution, so that the logit function itself is the inverse CDF and thus maps a uniform draw in \((0, 1)\) to a logistically-distributed quantity.

+

Things work the same way for the probit case: if \(u\) has a \(\textsf{uniform}(0, 1)\) distribution, then \(\Phi^{-1}(u)\) has a \(\textsf{normal}(0, 1)\) distribution. The other way around, if \(v\) has a \(\textsf{normal}(0, 1)\) distribution, then \(\Phi(v)\) has a \(\textsf{uniform}(0, 1)\) distribution.

+

In order to use the probit and logistic as priors on variables constrained to \((0, 1)\), create an unconstrained variable and transform it appropriately. For comparison, the following Stan program fragment declares a \((0, 1)\)-constrained parameter theta and gives it a beta prior, then uses it as a parameter in a distribution (here using foo as a placeholder).

+
parameters {
+  real<lower=0, upper=1> theta;
+  // ...
+}
+model {
+  theta ~ beta(a, b);
+  // ...
+  y ~ foo(theta);
+  // ...
+}
+

If the variables a and b are one, then this imposes a uniform distribution theta. If a and b are both less than one, then the density on theta has a U shape, whereas if they are both greater than one, the density of theta has an inverted-U or more bell-like shape.

+

Roughly the same result can be achieved with unbounded parameters that are probit or inverse-logit-transformed. For example,

+
parameters {
+  real theta_raw;
+  // ...
+}
+transformed parameters {
+  real<lower=0, upper=1> theta = inv_logit(theta_raw);
+  // ...
+}
+model {
+  theta_raw ~ logistic(mu, sigma);
+  // ...
+  y ~ foo(theta);
+  // ...
+}
+

In this model, an unconstrained parameter theta_raw gets a logistic prior, and then the transformed parameter theta is defined to be the inverse logit of theta_raw. In this parameterization, inv_logit(mu) is the mean of the implied prior on theta. The prior distribution on theta will be flat if sigma is one and mu is zero, and will be U-shaped if sigma is larger than one and bell shaped if sigma is less than one.

+

When moving from a variable in \((0, 1)\) to a simplex, the same trick may be performed using the softmax function, which is a multinomial generalization of the inverse logit function. First, consider a simplex parameter with a Dirichlet prior.

+
parameters {
+  simplex[K] theta;
+  // ...
+}
+model {
+  theta ~ dirichlet(a);
+  // ...
+  y ~ foo(theta);
+}
+

Now a is a vector with K rows, but it has the same shape properties as the pair a and b for a beta; the beta distribution is just the distribution of the first component of a Dirichlet with parameter vector \([a b]^{\top}\). To formulate an unconstrained prior, the exact same strategy works as for the beta.

+
parameters {
+  vector[K] theta_raw;
+  // ...
+}
+transformed parameters {
+  simplex[K] theta = softmax(theta_raw);
+  // ...
+}
+model {
+  theta_raw ~ multi_normal_cholesky(mu, L_Sigma);
+}
+

The multivariate normal is used for convenience and efficiency with its Cholesky-factor parameterization. Now the mean is controlled by softmax(mu), but we have additional control of covariance through L_Sigma at the expense of having on the order of \(K^2\) parameters in the prior rather than order \(K\). If no covariance is desired, the number of parameters can be reduced back to \(K\) using a vectorized normal distribution as follows.

+
theta_raw ~ normal(mu, sigma);
+

where either or both of mu and sigma can be vectors.

+
+
+
+

Changes of variables

+

Changes of variables are applied when the transformation of a parameter is characterized by a distribution. The standard textbook example is the lognormal distribution, which is the distribution of a variable \(y > 0\) whose logarithm \(\log y\) has a normal distribution. The distribution is being assigned to \(\log y\).

+

The change of variables requires an adjustment to the probability to account for the distortion caused by the transform. For this to work, univariate changes of variables must be monotonic and differentiable everywhere in their support. Multivariate changes of variables must be injective and differentiable everywhere in their support, and they must map \(\mathbb{R}^N \rightarrow \mathbb{R}^N\).

+

The probability must be scaled by a Jacobian adjustment equal to the absolute determinant of the Jacobian of the transform. In the univariate case, the Jacobian adjustment is simply the absolute derivative of the transform.

+

In the case of log normals, if \(y\)’s logarithm is normal with mean \(\mu\) and deviation \(\sigma\), then the distribution of \(y\) is given by \[ +p(y) += \textsf{normal}(\log y \mid \mu, \sigma) \, \left| \frac{d}{dy} \log y \right| += \textsf{normal}(\log y \mid \mu, \sigma) \, \frac{1}{y}. +\] Stan works on the log scale to prevent underflow, where \[ +\log p(y) += +\log \textsf{normal}(\log y \mid \mu, \sigma) - \log y. +\]

+

In Stan, the change of variables can be applied in the sampling statement. To adjust for the curvature, the log probability accumulator is incremented with the log absolute derivative of the transform. The lognormal distribution can thus be implemented directly in Stan as follows.2

+
parameters {
+  real<lower=0> y;
+  // ...
+}
+model {
+  log(y) ~ normal(mu, sigma);
+  target += -log(y);
+  // ...
+}
+

It is important, as always, to declare appropriate constraints on parameters; here y is constrained to be positive.

+

It would be slightly more efficient to define a local variable for the logarithm, as follows.

+
model {
+  real log_y;
+  log_y = log(y);
+  log_y ~ normal(mu, sigma);
+  target += -log_y;
+  // ...
+}
+

If y were declared as data instead of as a parameter, then the adjustment can be ignored because the data will be constant and Stan only requires the log probability up to a constant.

+
+

Change of variables vs. transformations

+

This section illustrates the difference between a change of variables and a simple variable transformation. A transformation samples a parameter, then transforms it, whereas a change of variables transforms a parameter, then samples it. Only the latter requires a Jacobian adjustment.

+

It does not matter whether the probability function is expressed using a distribution statement, such as

+
log(y) ~ normal(mu, sigma);
+

or as an increment to the log probability function, as in

+
target += normal_lpdf(log(y) | mu, sigma);
+
+

Gamma and inverse gamma distribution

+

Like the log normal, the inverse gamma distribution is a distribution of variables whose inverse has a gamma distribution. This section contrasts two approaches, first with a transform, then with a change of variables.

+

The transform based approach to defining y_inv to have an inverse gamma distribution can be coded as follows.

+
parameters {
+  real<lower=0> y;
+}
+transformed parameters {
+  real<lower=0> y_inv;
+  y_inv = 1 / y;
+}
+model {
+  y ~ gamma(2,4);
+}
+

The change-of-variables approach to defining y_inv to have an inverse gamma distribution can be coded as follows.

+
parameters {
+  real<lower=0> y_inv;
+}
+transformed parameters {
+  real<lower=0> y;
+  y = 1 / y_inv;  // change variables
+  jacobian += -2 * log(y_inv); // Jacobian adjustment
+}
+model {
+  y ~ gamma(2,4);
+}
+

The Jacobian adjustment is the log of the absolute derivative of the transform, which in this case is

+

\[ +\log \left| \frac{d}{du} \left( \frac{1}{u} \right) \right| += \log \left| - u^{-2} \right| += \log u^{-2} += -2 \log u. +\]

+
+
+
+

Multivariate changes of variables

+

In the case of a multivariate transform, the log of the absolute determinant of the Jacobian of the transform must be added to the log probability accumulator. In Stan, this can be coded as follows in the general case where the Jacobian is not a full matrix.

+
parameters {
+  vector[K] u;      // multivariate parameter
+   // ...
+}
+transformed parameters {
+  vector[K] v;     // transformed parameter
+  matrix[K, K] J;   // Jacobian matrix of transform
+  // ... compute v as a function of u ...
+  // ... compute J[m, n] = d.v[m] / d.u[n] ...
+  jacobian += log(abs(determinant(J)));
+  // ...
+}
+model {
+  v ~ // ...
+  // ...
+}
+

If the determinant of the Jacobian is known analytically, it will be more efficient to apply it directly than to call the determinant function, which is neither efficient nor particularly stable numerically.

+

In many cases, the Jacobian matrix will be triangular, so that only the diagonal elements will be required for the determinant calculation. Triangular Jacobians arise when each element v[k] of the transformed parameter vector only depends on elements u[1], …, u[k] of the parameter vector. For triangular matrices, the determinant is the product of the diagonal elements, so the transformed parameters block of the above model can be simplified and made more efficient by recoding as follows.

+
transformed parameters {
+  // ...
+  vector[K] J_diag;  // diagonals of Jacobian matrix
+  // ...
+  // ... compute J[k, k] = d.v[k] / d.u[k] ...
+  jacobian += sum(log(J_diag));
+  // ...
+}
+
+
+
+

Vectors with varying bounds

+

Stan allows scalar and non-scalar upper and lower bounds to be declared in the constraints for a container data type. The transforms are calculated and their log Jacobians added to the log density accumulator; the Jacobian calculations are described in detail in the reference manual chapter on constrained parameter transforms.

+
+

Varying lower bounds

+

For example, suppose there is a vector parameter \(\alpha\) with a vector \(L\) of lower bounds. The simplest way to deal with this if \(L\) is a constant is to shift a lower-bounded parameter.

+
data {
+  int N;
+  vector[N] L;  // lower bounds
+  // ...
+}
+parameters {
+  vector<lower=L>[N] alpha_raw;
+  // ...
+}
+

The above is equivalent to manually calculating the vector bounds by the following.

+
data {
+  int N;
+  vector[N] L;  // lower bounds
+  // ...
+}
+parameters {
+  vector<lower=0>[N] alpha_raw;
+  // ...
+}
+transformed parameters {
+  vector[N] alpha = L + alpha_raw;
+  // ...
+}
+

The Jacobian for adding a constant is one, so its log drops out of the log density.

+

Even if the lower bound is a parameter rather than data, there is no Jacobian required, because the transform from \((L, \alpha_{\textrm{raw}})\) to \((L + \alpha_{\textrm{raw}}, \alpha_{\textrm{raw}})\) produces a Jacobian derivative matrix with a unit determinant.

+

It’s also possible to implement the transform using an array or vector of parameters as bounds (with the requirement that the type of the variable must match the bound type) in the following.

+
data {
+  int N;
+  vector[N] L;  // lower bounds
+  // ...
+}
+parameters {
+  vector<lower=0>[N] alpha_raw;
+  vector<lower=L + alpha_raw>[N] alpha;
+  // ...
+}
+

This is equivalent to directly transforming an unconstrained parameter and accounting for the Jacobian.

+
data {
+  int N;
+  vector[N] L;  // lower bounds
+  // ...
+}
+parameters {
+  vector[N] alpha_raw;
+  // ...
+}
+transformed parameters {
+  vector[N] alpha = L + exp(alpha_raw);
+  jacobian += sum(alpha_raw); // log Jacobian
+  // ...
+}
+model {
+  // ...
+}
+

The adjustment in the log Jacobian determinant of the transform mapping \(\alpha_{\textrm{raw}}\) to \(\alpha = L + \exp(\alpha_{\textrm{raw}})\). The details are simple in this case because the Jacobian is diagonal; see the reference manual chapter on constrained parameter transforms for full details. Here \(L\) can even be a vector containing parameters that don’t depend on \(\alpha_{\textrm{raw}}\); if the bounds do depend on \(\alpha_{\textrm{raw}}\) then a revised Jacobian needs to be calculated taking into account the dependencies.

+
+
+

Varying upper and lower bounds

+

Suppose there are lower and upper bounds that vary by parameter. These can be applied to shift and rescale a parameter constrained to \((0, 1)\). This is easily accomplished as the following.

+
data {
+  int N;
+  vector[N] L;  // lower bounds
+  vector[N] U;  // upper bounds
+  // ...
+}
+parameters {
+  vector<lower=L, upper=U>[N] alpha;
+  // ...
+}
+

The same may be accomplished by manually constructing the transform as follows.

+
data {
+  int N;
+  vector[N] L;  // lower bounds
+  vector[N] U;  // upper bounds
+  // ...
+}
+parameters {
+  vector<lower=0, upper=1>[N] alpha_raw;
+  // ...
+}
+transformed parameters {
+  vector[N] alpha = L + (U - L) .* alpha_raw;
+}
+

The expression U - L is multiplied by alpha_raw elementwise to produce a vector of variables in \((0, U-L)\), then adding \(L\) results in a variable ranging between \((L, U)\).

+

In this case, it is important that \(L\) and \(U\) are constants, otherwise a Jacobian would be required when multiplying by \(U - L\).

+ + + +
+
+
+ + + Back to top

References

+
+Gelman, Andrew. 2004. “Parameterization and Bayesian Modeling.” Journal of the American Statistical Association 99: 537–45. +
+

Footnotes

+ +
    +
  1. This is in contrast to (penalized) maximum likelihood estimates, which are not parameterization invariant.↩︎

  2. +
  3. This example is for illustrative purposes only; the recommended way to implement the lognormal distribution in Stan is with the built-in lognormal probability function; see the functions reference manual for details.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/simulation-based-calibration.html b/docs/2_39/stan-users-guide/simulation-based-calibration.html new file mode 100644 index 000000000..b3a86f5bf --- /dev/null +++ b/docs/2_39/stan-users-guide/simulation-based-calibration.html @@ -0,0 +1,1619 @@ + + + + + + + + + +Simulation-Based Calibration Checking + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Simulation-Based Calibration Checking

+

A Bayesian posterior is calibrated if the posterior intervals have appropriate coverage. For example, 80% intervals are expected to contain the true parameter 80% of the time. If data is generated according to a model, Bayesian posterior inference with respect to that model is calibrated by construction. Simulation-based calibration checking (SBC) exploits this property of Bayesian inference to assess the soundness of a posterior sampler. Roughly, the way it works is by simulating parameters according to the prior, then simulating data conditioned on the simulated parameters, then testing posterior calibration of the inference algorithm over independently simulated data sets. This chapter follows Talts et al. (2018), which improves on the original approach developed by Cook, Gelman, and Rubin (2006). See also Modrák et al. (2023) for further improvements.

+
+

Bayes is calibrated by construction

+

Suppose a Bayesian model is given in the form of a prior density \(p(\theta)\) and sampling density \(p(y \mid \theta).\) Now consider a process that first simulates parameters from the prior, \[ +\theta^{\textrm{sim}} \sim p(\theta), +\] and then simulates data given the parameters, \[ +y^{\textrm{sim}} \sim p(y \mid \theta^{\textrm{sim}}). +\] By the definition of conditional densities, the simulated data and parameters constitute an independent draw from the model’s joint distribution, \[ +(y^{\textrm{sim}}, \theta^{\textrm{sim}}) \sim p(y, \theta). +\] From Bayes’s rule, it follows that for any observed (fixed) data \(y\), \[ +p(\theta \mid y) \propto p(y, \theta). +\] Therefore, the simulated parameters constitute a draw from the posterior for the simulated data, \[ +\theta^{\textrm{sim}} \sim p(\theta \mid y^{\textrm{sim}}). +\] Now consider an algorithm that produces a sequence of draws from the posterior given this simulated data, \[ +\theta^{(1)}, \ldots, \theta^{(M)} +\sim p(\theta \mid y^{\textrm{sim}}). +\] Because \(\theta^{\textrm{sim}}\) is also distributed as a draw from the posterior, the rank statistics of \(\theta^{\textrm{sim}}\) with respect to \(\theta^{(1)}, \ldots \theta^{(M)}\) should be uniform.

+

This is one way to define calibration, because it follows that posterior intervals will have appropriate coverage (Dawid 1982; Gneiting, Balabdaoui, and Raftery 2007). If the rank of \(\theta^{\textrm{sim}}\) is uniform among the draws \(\theta^{(1)}, \ldots, \theta^{(M)},\) then for any 90% interval selected, the probability the true value \(\theta^{\textrm{sim}}\) falls in it will also be 90%. The same goes for any other posterior interval.

+
+
+

Simulation-based calibration checking

+

Suppose the Bayesian model to test has joint density \[ +p(y, \theta) = p(y \mid \theta) \cdot p(\theta), +\] with data \(y\) and parameters \(\theta\) (both are typically multivariate). Simulation-based calibration checking works by generating \(N\) simulated parameter and data pairs according to the joint density, \[ +(y^{\textrm{sim}(1)}, \theta^{\textrm{sim}(1)}), +\ldots, (y^{\textrm{sim}(N)}, \theta^{\textrm{sim}(N)}), +\sim p(y, \theta). +\] For each simulated data set \(y^{\textrm{sim}(n)}\), use the algorithm to be tested to generate \(M\) posterior draws, which if everything is working properly, will be distributed marginally as \[ +\theta^{(n, 1)}, \ldots, \theta^{(n, M)} +\sim p(\theta \mid y^{\textrm{sim}(n)}). +\] For a simulation \(n\) and parameter \(k\), the rank of the simulated parameter among the posterior draws is \[\begin{eqnarray*} +r_{n, k} +& = & +\textrm{rank}(\theta_k^{\textrm{sim}(n)}, + (\theta^{(n, 1)}, \ldots, \theta^{(n,M)})) +\\[4pt] +& = & +\sum_{m = 1}^M + \textrm{I}[\theta_k^{(n,m)} < \theta_k^{\textrm{sim}(n)}]. +\end{eqnarray*}\] That is, the rank is the number of posterior draws \(\theta^{(n,m)}_k\) that are less than the simulated draw \(\theta^{\textrm{sim}(n)}_k.\)

+

If the algorithm generates posterior draws according to the posterior, the ranks should have uniform discrete distribution from \(0\) to \(M\), so that the ranks plus one are uniformly distributed from \(1\) to \(M + 1\), \[ +r_{n, k} + 1 +\sim +\textrm{categorical}\! \left(\frac{1}{M + 1}, \ldots, \frac{1}{M + 1}\right). +\] Simulation-based calibration checking uses this expected behavior to test the calibration of each parameter of a model on simulated data. Talts et al. (2018) suggest plotting binned counts of \(r_{1:N, +k}\) for different parameters \(k\); Säilynoja, Bürkner, and Vehtari (2022) provide a graphical test for discrete uniformity testing. Before uniformity testing the Markov chains should be thinned to remove autocorrelation as these uniformity tests assume independence (Säilynoja, Bürkner, and Vehtari 2022).

+
+
+

SBC in Stan

+

Running simulation-based calibration checking in Stan will test whether Stan’s sampling algorithm can sample from the posterior associated with data generated according to the model. The data simulation and posterior fitting and rank calculation can all be done within a single Stan program. Then Stan’s posterior sampler has to be run multiple times. Each run produces a rank for each parameter being assessed for uniformity. The total set of ranks can then be tested for uniformity.

+
+

Example model

+

For illustration, a very simple model will suffice. Suppose there are two parameters \((\mu, \sigma)\) with independent priors, \[ +\mu \sim \textrm{normal}(0, 1), +\] and \[ +\sigma \sim \textrm{lognormal}(0, 1). +\] The data \(y = y_1, \ldots, y_N\) is drawn conditionally independently given the parameters, \[ +y_n \sim \textrm{normal}(\mu, \sigma). +\] The joint prior density is thus \[ +p(\mu, \sigma) += \textrm{normal}(\mu \mid 0, 1) + \cdot \textrm{lognormal}(\sigma \mid 0, 1), +\] and the data model is \[ +p(y \mid \mu, \sigma) += \prod_{n=1}^N \textrm{normal}(y_n \mid \mu, \sigma). +\]

+

For example, suppose the following two parameter values are drawn from the prior in the first simulation, \[ +(\mu^{\textrm{sim(1)}}, \sigma^{\textrm{sim(1)}}) = (1.01, 0.23). +\] Then data \(y^{\textrm{sim}(1)} \sim p(y \mid \mu^{\textrm{sim(1)}}, +\sigma^{\textrm{sim(1)}})\) is drawn according to the data model. Next, \(M = 4\) draws are taken from the posterior \(\mu^{(1,m)}, \sigma^{(1,m)} \sim p(\mu, \sigma \mid y^{\textrm{sim}(1)})\), \[ +\begin{array}{r|rr} +m & \mu^{(1,m)} & \sigma^{(1,m)} +\\ \hline +1 & 1.07 & 0.33 +\\ +2 & -0.32 & 0.14 +\\ +3 & -0.99 & 0.26 +\\ +4 & 1.51 & 0.31 +\end{array} +\] Then the comparisons on which ranks are based look as follows, \[ +\begin{array}{r|cc} +m & \textrm{I}(\mu^{(1,m)} < \mu^{\textrm{sim}(1)}) +& \textrm{I}(\sigma^{(1,m)} < \sigma^{\textrm{sim}(1)}) +\\ \hline +1 & 0 & 0 +\\ +2 & 1 & 1 +\\ +3 & 1 & 0 +\\ +4 & 0 & 0 +\end{array} +\] The ranks are the column sums, \(r_{1,1} = 2\) and \(r_{1,2} = 1\). Because the simulated parameters are distributed according to the posterior, these ranks should be distributed uniformly between \(0\) and \(M\), the number of posterior draws.

+
+
+

Testing a Stan program with simulation-based calibration checking

+

To code simulation-based calibration checking in a Stan program, the transformed data block can be used to simulate parameters and data from the model. The parameters, transformed parameters, and model block then define the model over the simulated data. Then, in the generated quantities block, the program records an indicator for whether each parameter is less than the simulated value. As shown above, the rank is then the sum of the simulated indicator variables.

+
transformed data {
+  real mu_sim = normal_rng(0, 1);
+  real<lower=0> sigma_sim = lognormal_rng(0, 1);
+  int<lower=0> J = 10;
+  vector[J] y_sim;
+  for (j in 1:J) {
+    y_sim[j] = normal_rng(mu_sim, sigma_sim);
+  }
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  mu ~ normal(0, 1);
+  sigma ~ lognormal(0, 1);
+  y_sim ~ normal(mu, sigma);
+}
+generated quantities {
+  array[2] int<lower=0, upper=1> lt_sim
+      = { mu < mu_sim, sigma < sigma_sim };
+}
+

To avoid confusion with the number of simulated data sets used for simulation-based calibration checking, J is used for the number of simulated data points.

+

The model is implemented twice—once as a data generating process using random number generators in the transformed data block, then again in the parameters and model block. This duplication is a blessing and a curse. The curse is that it’s more work and twice the chance for errors. The blessing is that by implementing the model twice and comparing results, the chance of there being a mistake in the model is reduced.

+
+
+

Pseudocode for simulation-based calibration checking

+

The entire simulation-based calibration checking process is as follows, where

+
    +
  • p(theta) is the prior density
  • +
  • p(y | theta) is the sampling density
  • +
  • K is the number of parameters
  • +
  • N is the total number of simulated data sets and fits
  • +
  • M is the number of posterior draws per simulated data set
  • +
+
SBC(p(theta), p(y | theta), K, N, M)
+------------------------------------
+for (n in 1:N) {
+    // simulate parameters and data
+    theta(sim(n)) ~ p(theta)
+    y(sim(n)) ~ p(y | theta(sim(n)))
+
+    // posterior draws given simulated data
+    for (m in 1:M) {
+        theta(n, m) ~ p(theta | y(sim(n)))
+    }
+    // calculate rank of sim among posterior draws
+    for (k in 1:K) {
+        rank(n, k) = SUM_m I(theta[k](n,m) < theta[k](sim(n)))
+    }
+}
+// test uniformity of each parameter
+for (k in 1:K) {
+    test uniformity of rank(1:N, k)
+}
+
+
+

The importance of thinning

+

The draws from the posterior are assumed to be roughly independent. If they are not, artifacts may arise in the uniformity tests due to correlation in the posterior draws (Säilynoja, Bürkner, and Vehtari 2022). Thus it is best to thin the posterior draws down to the point where the effective sample size is roughly the same as the number of thinned draws. This may require running the code a few times to judge the number of draws required to produce a target effective sample size. This operation that can be put into a loop that doubles the number of iterations until all parameters have an effective sample size of M, then thinning down to M draws.

+
+
+
+

Testing uniformity

+

A simple, though not very highly powered, \(\chi^2\)-squared test for uniformity can be formulated by binning the ranks \(0:M\) into \(J\) bins and testing that the bins all have roughly the expected number of draws in them. Many other tests for uniformity are possible. For example, Säilynoja, Bürkner, and Vehtari (2022) use binomial model pointiwise for the empirical cumlative distribution function and adjust to obtain simulatenous envelope to be used as graphical uniformity test.

+

The bins don’t need to be exactly the same size. In general, if \(b_j\) is the number of ranks that fall into bin \(j\) and \(e_j\) is the number of ranks expected to fall into bin \(j\) (which will be proportional to its size under uniformity), the test statistic is \[ +X^2 = \sum_{j = 1}^J \frac{(b_j - e_j)^2}{e_j}. +\] The terms are approximately square standard normal, so that under the null hypothesis of uniformity, \[ +X^2 \sim \textrm{chiSquared}(J - 1), +\] with the corresponding \(p\)-value given by the complementary cumulative distribution function (CCDF) of \(\textrm{chiSquared}(J - 1)\) applied to \(X^2\). Because this test relies on the binomial being approximately normal, the traditional advice is to make sure the expected count in each bin is at least five, i.e., \(e_j \geq 5.\)

+
+

Indexing to simplify arithmetic

+

Because there are \(M + 1\) possible ranks, with \(J\) bins, it is easiest to have \(M + 1\) be divisible by \(J\). For instance, if \(J = 20\) and \(M = 999\), then there are \(1000\) possible ranks and an expected count in each bin of \(\frac{M + 1}{J} = 50.\)

+

Distributing the ranks into bins is another fiddly operation that can be done with integer arithmetic or the floor operation. Using floor, the following function determines the bin for a rank, \[ +\textrm{bin}(r_{n, m}, M, J) += 1 + \left\lfloor \frac{r_{n, m}}{(M + 1) / J} \right\rfloor. +\] For example, with \(M = 999\) and \(J = 20\), \((M + 1) / J = 50\). The lowest rank checks out, \[ +\textrm{bin}(0, 999, 20) = 1 + \lfloor 0 / 50 \rfloor = 1, +\] as does the 50th rank, \[ +\textrm{bin}(49, 999, 20) = 1 + \lfloor 49 / 50 \rfloor = 1, +\] and the 51st is appropriately put in the second bin, \[ +\textrm{bin}(50, 999, 20) = 1 + \lfloor 50 / 50 \rfloor = 2. +\] The highest rank also checks out, with \(\textrm{bin}(1000, 999, 20) = 50.\)

+

To summarize, the following pseudocode computes the \(b_j\) values for the \(\chi^2\) test or for visualization in a histogram.

+
Inputs: M draws, J bins, N parameters, ranks r[n, m]
+b[1:J] = 0
+for (m in 1:M) {
+  ++b[1 + floor(r[n, m] * J / (M + 1))]
+}
+

where the ++b[n] notation is a common form of syntactic sugar for b[n] = b[n] + 1.

+

In general, a great deal of care must be taken in visualizing discrete data because it’s easy to introduce off-by-one errors and artifacts at the edges because of the way boundaries are computed by default. That’s why so much attention must be devoted to indexing and binning.

+
+
+
+

Examples of simulation-based calibration checking

+

This section will show what the results look like when the tests pass and then when they fail. The passing test will compare a normal model and normal data generating process, whereas the second will compare a normal model with a Student-t data generating process. The first will produce calibrated posteriors, the second will not.

+
+

When things go right

+

Consider the following simple model for a normal distribution with standard normal and lognormal priors on the location and scale parameters. \[\begin{eqnarray*} +\mu & \sim & \textrm{normal}(0, 1) +\\[4pt] +\sigma & \sim & \textrm{lognormal}(0, 1) +\\[4pt] +y_{1:10} & \sim & \textrm{normal}(\mu, \sigma). +\end{eqnarray*}\] The Stan program for evaluating SBC for this model is

+
transformed data {
+  real mu_sim = normal_rng(0, 1);
+  real<lower=0> sigma_sim = lognormal_rng(0, 1);
+
+  int<lower=0> J = 10;
+  vector[J] y_sim;
+  for (j in 1:J) {
+    y_sim[j] = student_t_rng(4, mu_sim, sigma_sim);
+  }
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  mu ~ normal(0, 1);
+  sigma ~ lognormal(0, 1);
+
+  y_sim ~ normal(mu, sigma);
+}
+generated quantities {
+  array[2] int<lower=0, upper=1> I_lt_sim
+      = { mu < mu_sim, sigma < sigma_sim };
+}
+

After running this for enough iterations so that the effective sample size is larger than \(M\), then thinning to \(M\) draws (here \(M = 999\)), the ranks are computed and binned, and then plotted.

+
+
+
+ +
+
+Figure 1: Simulation based calibration plots for location and scale of a normal model with standard normal prior on the location, standard lognormal prior on the scale. Both histograms appear uniform, which is consistent with inference being well calibrated. +
+
+
+
+
+

When things go wrong

+

Now consider using a Student-t data generating process with a normal model. Compare the apparent uniformity of the well specified model with the ill-specified situation with Student-t generative process and normal model.

+
+
+
+ +
+
+Figure 2: Simulation based calibration plots for location and scale of a normal model with standard normal prior on the location standard lognormal prior on the scale with mismatched generative model using a Student-t data model with 4 degrees of freedom. The mean histogram appears uniform, but the scale parameter shows simulated values much smaller than fit values, clearly signaling the lack of calibration. +
+
+
+
+
+

When Stan’s sampler goes wrong

+

The example in the previous sections show hard-coded pathological behavior. The usual application of SBC is to diagnose problems with a sampler.

+

This can happen in Stan with well-specified models if the posterior geometry is too difficult (usually due to extreme stiffness that varies). A simple example is the eight schools problem, the data for which consists of sample means \(y_j\) and standard deviations \(\sigma_j\) of differences in test score after the same intervention in \(J = 8\) different schools. Rubin (1981) applies a hierarchical model for a meta-analysis of the results, estimating the mean intervention effect and a varying effect for each school. With a standard parameterization and weak priors, this model has very challenging posterior geometry, as shown by Talts et al. (2018); this section replicates their results.

+

The meta-analysis model has parameters for a population mean \(\mu\) and standard deviation \(\tau > 0\) as well as the effect \(\theta_j\) of the treatment in each school. The model has weak normal and half-normal priors for the population-level parameters, \[\begin{eqnarray*} +\mu & \sim & \textrm{normal}(0, 5) +\\[4pt] +\tau & \sim & \textrm{normal}_{+}(0, 5). +\end{eqnarray*}\] School level effects are modeled as normal given the population parameters, \[ +\theta_j \sim \textrm{normal}(\mu, \tau). +\] The data is modeled as in a meta-analysis, given the school effect and sample standard deviation in the school, \[ +y_j \sim \textrm{normal}(\theta_j, \sigma_j). +\]

+

This model can be coded in Stan with a data-generating process that simulates the parameters and then simulates data according to the parameters.

+
transformed data {
+  real mu_sim = normal_rng(0, 5);
+  real tau_sim = abs(normal_rng(0, 5));
+  int<lower=0> J = 8;
+  array[J] real theta_sim = normal_rng(rep_vector(mu_sim, J), tau_sim);
+  array[J] real<lower=0> sigma = abs(normal_rng(rep_vector(0, J), 5));
+  array[J] real y = normal_rng(theta_sim, sigma);
+}
+parameters {
+  real mu;
+  real<lower=0> tau;
+  array[J] real theta;
+}
+model {
+  tau ~ normal(0, 5);
+  mu ~ normal(0, 5);
+  theta ~ normal(mu, tau);
+  y ~ normal(theta, sigma);
+}
+generated quantities {
+  int<lower=0, upper=1> mu_lt_sim = mu < mu_sim;
+  int<lower=0, upper=1> tau_lt_sim = tau < tau_sim;
+  int<lower=0, upper=1> theta1_lt_sim = theta[1] < theta_sim[1];
+}
+

As usual for simulation-based calibration checking, the transformed data encodes the data-generating process using random number generators. Here, the population parameters \(\mu\) and \(\tau\) are first simulated, then the school-level effects \(\theta\), and then finally the observed data \(\sigma_j\) and \(y_j.\) The parameters and model are a direct encoding of the mathematical presentation using vectorized sampling statements. The generated quantities block includes indicators for parameter comparisons, saving only \(\theta_1\) because the schools are exchangeable in the simulation.

+

When fitting the model in Stan, multiple warning messages are provided that the sampler has diverged. The divergence warnings are in Stan’s sampler precisely to diagnose the sampler’s inability to follow the curvature in the posterior and provide independent confirmation that Stan’s sampler cannot fit this model as specified.

+

SBC also diagnoses the problem. Here’s the rank plots for running \(N = +200\) simulations with 1000 warmup iterations and \(M = 999\) draws per simulation used to compute the ranks.

+
+
+
+
+
+
+
+
+ +
+
+(a) \(\mu\) +
+
+
+
+
+
+
+
+ +
+
+(b) \(\tau\) +
+
+
+
+
+
+
+
+ +
+
+(c) \({\theta}_1\) +
+
+
+
+
+
+
+Figure 3: Simulation based calibration plots for the eight-schools model with centered parameterization in Stan. The geometry is too difficult for the NUTS sampler to handle, as indicated by the plot for \(\theta_1\) (Figure 3 (c)). +
+
+
+

Although the population mean and standard deviation \(\mu\) and \(\tau\) appear well calibrated, \(\theta_1\) tells a very different story. The simulated values are much smaller than the values fit from the data. This is because Stan’s no-U-turn sampler is unable to sample with the model formulated in the centered parameterization—the posterior geometry has regions of extremely high curvature as \(\tau\) approaches zero and the \(\theta_j\) become highly constrained. The chapter on reparameterization explains how to remedy this problem and fit this kind of hierarchical model with Stan.

+ + + +
+
+
+ + Back to top

References

+
+Cook, Samantha R., Andrew Gelman, and Donald B Rubin. 2006. “Validation of Software for Bayesian Models Using Posterior Quantiles.” Journal of Computational and Graphical Statistics 15 (3): 675–92. https://doi.org/10.1198/106186006X136976. +
+
+Dawid, A Philip. 1982. “The Well-Calibrated Bayesian.” Journal of the American Statistical Association 77 (379): 605–10. +
+
+Gneiting, Tilmann, Fadoua Balabdaoui, and Adrian E Raftery. 2007. “Probabilistic Forecasts, Calibration and Sharpness.” Journal of the Royal Statistical Society: Series B (Statistical Methodology) 69 (2): 243–68. +
+
+Modrák, Martin, Angie H Moon, Shinyoung Kim, Paul Bürkner, Niko Huurre, Kateřina Faltejsková, Andrew Gelman, and Aki Vehtari. 2023. “Simulation-Based Calibration Checking for Bayesian Computation: The Choice of Test Quantities Shapes Sensitivity.” Bayesian Analysis 20 (2): 461. +
+
+Rubin, Donald B. 1981. “Estimation in Parallel Randomized Experiments.” Journal of Educational Statistics 6: 377–401. +
+
+Säilynoja, Teemu, Paul-Christian Bürkner, and Aki Vehtari. 2022. “Graphical Test for Discrete Uniformity and Its Applications in Goodness-of-Fit Evaluation and Multiple Sample Comparison.” Statistics and Computing 32 (2): 32. +
+
+Talts, Sean, Michael Betancourt, Daniel Simpson, Aki Vehtari, and Andrew Gelman. 2018. “Validating Bayesian Inference Algorithms with Simulation-Based Calibration.” arXiv, no. 1804.06788. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/sparse-ragged.html b/docs/2_39/stan-users-guide/sparse-ragged.html new file mode 100644 index 000000000..f7b72e969 --- /dev/null +++ b/docs/2_39/stan-users-guide/sparse-ragged.html @@ -0,0 +1,1371 @@ + + + + + + + + + +Sparse and Ragged Data Structures + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Sparse and Ragged Data Structures

+

Stan does not directly support either sparse or ragged data structures, though both can be accommodated with some programming effort. The sparse matrices chapter introduces a special-purpose sparse matrix times dense vector multiplication, which should be used where applicable; this chapter covers more general data structures.

+
+

Sparse data structures

+

Coding sparse data structures is as easy as moving from a matrix-like data structure to a database-like data structure. For example, consider the coding of sparse data for the IRT models discussed in the item-response model section. There are \(J\) students and \(K\) questions, and if every student answers every question, then it is practical to declare the data as a \(J \times K\) array of answers.

+
data {
+  int<lower=1> J;
+  int<lower=1> K;
+  array[J, K] int<lower=0, upper=1> y;
+  // ...
+model {
+  for (j in 1:J) {
+    for (k in 1:K) {
+      y[j, k] ~ bernoulli_logit(delta[k] * (alpha[j] - beta[k]));
+    }
+  }
+  // ...
+}
+

When not every student is given every question, the dense array coding will no longer work, because Stan does not support undefined values.

+

The following missing data example shows an example with \(J=3\) and \(K=4\), with missing responses shown as NA, as in R.

+

\[\begin{equation*} +y += +\left[ +\begin{array}{cccc} +0 & 1 & \mbox{NA} & 1 +\\ +0 & \mbox{NA} & \mbox{NA} & 1 +\\ +\mbox{NA} & 0 & \mbox{NA} & \mbox{NA} +\end{array} +\right] +\end{equation*}\]

+

There is no support within Stan for R’s NA values, so this data structure cannot be used directly. Instead, it must be converted to a “long form” as in a database, with columns indicating the indices along with the value. With columns \(jj\) and \(kk\) used for the indexes (following Gelman and Hill (2007)), the 2-D array \(y\) is recoded as a table. The number of rows in the table equals the number of defined array elements, here \(y_{1,1} = 0\), \(y_{1,2} = 1\), up to \(y_{3,2} = 1\). As the array becomes larger and sparser, the long form becomes the more economical encoding.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
jjkky
110
121
141
210
241
320
+

Letting \(N\) be the number of \(y\) that are defined, here \(N=6\), the data and model can be formulated as follows.

+
data {
+  // ...
+  int<lower=1> N;
+  array[N] int<lower=1, upper=J> jj;
+  array[N] int<lower=1, upper=K> kk;
+  array[N] int<lower=0, upper=1> y;
+  // ...
+}
+model {
+  for (n in 1:N) {
+    y[n] ~ bernoulli_logit(delta[kk[n]]
+                           * (alpha[jj[n]] - beta[kk[n]]));
+  }
+  // ...
+}
+

In the situation where there are no missing values, the two model formulations produce exactly the same log posterior density.

+
+
+

Ragged data structures

+

Ragged arrays are arrays that are not rectangular, but have different sized entries. This kind of structure crops up when there are different numbers of observations per entry.

+

A general approach to dealing with ragged structure is to move to a full database-like data structure as discussed in the previous section. A more compact approach is possible with some indexing into a linear array.

+

For example, consider a data structure for three groups, each of which has a different number of observations.

+
+ +
+
+
+
+

\(y_1 = \left[1.3 \ \ 2.4 \ \ 0.9\right]\\\) \(y_2 = \left[-1.8 \ \ -0.1\right]\\\) \(y_3 = \left[12.9 \ \ 18.7 \ \ 42.9 \ \ 4.7\right]\)

+
+
+

\(z = [1.3 \ \ 2.4 \ \ 0.9 \ \ -1.8 \ \ -0.1 \ \ 12.9 \ \ 18.7 \ \ 42.9 \ \ 4.7]\\\) \(s = \{ 3 \ \ 2 \ \ 4 \}\)

+
+
+
+

On the left is the definition of a ragged data structure \(y\) with three rows of different sizes (\(y_1\) is size 3, \(y_2\) size 2, and \(y_3\) size 4). On the right is an example of how to code the data in Stan, using a single vector \(z\) to hold all the values and a separate array of integers \(s\) to hold the group row sizes. In this example, \(y_1 = z_{1:3}\), \(y_2 = z_{4:5}\), and \(y_3 = z_{6:9}\).

+

Suppose the model is a simple varying intercept model, which, using vectorized notation, would yield a log-likelihood \[\begin{equation*} +\sum_{n=1}^3 \log \textsf{normal}(y_n \mid \mu_n, \sigma). +\end{equation*}\] There’s no direct way to encode this in Stan.

+

A full database type structure could be used, as in the sparse example, but this is inefficient, wasting space for unnecessary indices and not allowing vector-based density operations. A better way to code this data is as a single list of values, with a separate data structure indicating the sizes of each subarray. This is indicated on the right of the example. This coding uses a single array for the values and a separate array for the sizes of each row.

+

The model can then be coded up using slicing operations as follows.

+
data {
+  int<lower=0> N;   // # observations
+  int<lower=0> K;   // # of groups
+  vector[N] y;      // observations
+  array[K] int s;   // group sizes
+  // ...
+}
+model {
+  int pos;
+  pos = 1;
+  for (k in 1:K) {
+    segment(y, pos, s[k]) ~ normal(mu[k], sigma);
+    pos = pos + s[k];
+  }
+

This coding allows for efficient vectorization, which is worth the copy cost entailed by the segment() vector slicing operation.

+ + + +
+
+ + Back to top

References

+
+Gelman, Andrew, and Jennifer Hill. 2007. Data Analysis Using Regression and Multilevel-Hierarchical Models. Cambridge, United Kingdom: Cambridge University Press. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/style-guide.html b/docs/2_39/stan-users-guide/style-guide.html new file mode 100644 index 000000000..d2fc86cd0 --- /dev/null +++ b/docs/2_39/stan-users-guide/style-guide.html @@ -0,0 +1,1467 @@ + + + + + + + + + +Stan Program Style Guide + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Stan Program Style Guide

+

This chapter describes the preferred style for laying out Stan models. These are not rules of the language, but simply recommendations for laying out programs in a text editor. Although these recommendations may seem arbitrary, they are similar to those of many teams for many programming languages. Like rules for typesetting text, the goal is to achieve readability without wasting white space either vertically or horizontally. This is the style used in the Stan documentation, and should align with the auto-formatting ability of stanc3.

+
+

Choose a consistent style

+

The most important point of style is consistency. Consistent coding style makes it easier to read not only a single program, but multiple programs. So when departing from this style guide, the number one recommendation is to do so consistently.

+
+
+

Line length

+

Line lengths should not exceed 80 characters.1

+

This is a typical recommendation for many programming language style guides because it makes it easier to lay out text edit windows side by side and to view the code on the web without wrapping, easier to view diffs from version control, etc. About the only thing that is sacrificed is laying out expressions on a single line.

+
+
+

File extensions

+

The recommended file extension for Stan model files is .stan. Files which contain only function definitions (intended for use with #include) should be given the .stanfunctions extension. A .stanfunctions file only includes the function definition and does not require the functions{} block wrapped around the function. A simple example of usage where the function is defined and saved in the file foo.stanfunctions:

+
real foo(real x, real y) {
+  return sqrt(x * log(y));
+}
+

The function foo can be accessed in the Stan program by including the path to the foo.stanfunctions file as:

+
functions {
+  #include foo.stanfunctions;
+}
+// ...body...
+

For Stan data dump files, the recommended extension is .R, or more informatively, .data.R. For JSON output, the recommended extension is .json.

+
+
+

Variable naming

+

The recommended variable naming is to follow C/C++ naming conventions, in which variables are lowercase, with the underscore character (_) used as a separator. Thus it is preferred to use sigma_y, rather than the run together sigmay, camel-case sigmaY, or capitalized camel-case SigmaY. An exception is often made for terms appearing in mathematical expressions with standard names, like A for a matrix.

+

Another exception to the lowercasing recommendation, which follows the C/C++ conventions, is for size constants, for which the recommended form is a single uppercase letter. The reason for this is that it allows the loop variables to match. So loops over the indices of an \(M \times N\) matrix \(a\) would look as follows.

+
for (m in 1:M) {
+  for (n in 1:N) {
+     a[m, n] = ...
+  }
+}
+
+
+

Local variable scope

+

Declaring local variables in the block in which they are used aids in understanding programs because it cuts down on the amount of text scanning or memory required to reunite the declaration and definition.

+

The following Stan program corresponds to a direct translation of a BUGS model, which uses a different element of mu in each iteration.

+
model {
+  array[N] real mu;
+  for (n in 1:N) {
+    mu[n] = alpha * x[n] + beta;
+    y[n] ~ normal(mu[n],sigma);
+  }
+}
+

Because variables can be reused in Stan and because they should be declared locally for clarity, this model should be recoded as follows.

+
model {
+  for (n in 1:N) {
+    real mu;
+    mu = alpha * x[n] + beta;
+    y[n] ~ normal(mu,sigma);
+  }
+}
+

The local variable can be eliminated altogether, as follows.

+
model {
+  for (n in 1:N) {
+    y[n] ~ normal(alpha * x[n] + beta, sigma);
+  }
+}
+

There is unlikely to be any measurable efficiency difference between the last two implementations, but both should be a bit more efficient than the BUGS translation.

+
+

Scope of compound structures with componentwise assignment

+

In the case of local variables for compound structures, such as arrays, vectors, or matrices, if they are built up component by component rather than in large chunks, it can be more efficient to declare a local variable for the structure outside of the block in which it is used. This allows it to be allocated once and then reused.

+
model {
+  vector[K] mu;
+  for (n in 1:N) {
+    for (k in 1:K) {
+      mu[k] = // ...
+    }
+    y[n] ~ multi_normal(mu,Sigma);
+}
+

In this case, the vector mu will be allocated outside of both loops, and used a total of N times.

+
+
+
+

Parentheses and brackets

+
+

Braces for single-statement blocks

+

Single-statement blocks can be rendered in several ways. The preferred style is fully bracketed with the statement appearing on its own line, as follows.

+
for (n in 1:N) {
+  y[n] ~ normal(mu,1);
+}
+

The use of loops and conditionals without brackets can be dangerous. For instance, consider this program.

+
for (n in 1:N)
+  z[n] ~ normal(nu,1);
+  y[n] ~ normal(mu,1);
+

Because Stan ignores whitespace and the parser completes a statement as eagerly as possible (just as in C++), the previous program is equivalent to the following program.

+
for (n in 1:N) {
+  z[n] ~ normal(nu,1);
+}
+y[n] ~ normal(mu,1);
+

Therefore, one should prefer to use braces. The only exception is when nesting if-else clauses, where the else branch contains exactly one conditional. Then, it is preferred to place the following if on the same line, as in the following.

+
if (x) {
+  // ...
+} else if (y) {
+  // ...
+} else {
+  // ...
+}
+
+
+

Parentheses in nested operator expressions

+

The preferred style for operators minimizes parentheses. This reduces clutter in code that can actually make it harder to read expressions. For example, the expression a + b * c is preferred to the equivalent a + (b * c) or (a + (b * c)). The operator precedences and associativities follow those of pretty much every programming language including Fortran, C++, R, and Python; full details are provided in the reference manual.

+

Similarly, comparison operators can usually be written with minimal bracketing, with the form y[n] > 0 || x[n] != 0 preferred to the bracketed form (y[n] > 0) || (x[n] != 0).

+
+
+

No open brackets on own line

+

Vertical space is valuable as it controls how much of a program you can see. The preferred Stan style is with the opening brace appearing at the end of a line.

+
for (n in 1:N) {
+  y[n] ~ normal(mu,1);
+}
+

This also goes for parameters blocks, transformed data blocks, which should look as follows.

+
transformed parameters {
+  real sigma;
+  // ...
+}
+

The exception to this rule is local blocks which only exist for scoping reasons. The opening brace of these blocks is not associated with any control flow or block structure, so it should appear on its own line.

+
+
+
+

Conditionals

+

While Stan supports the full C++-style conditional syntax, allowing real or integer values to act as conditions, real values should be avoided. For a real-valued x, one should use

+
if (x != 0) { ...
+

in place of

+
if (x) { ...
+

Beyond stylistic choices, one should be careful using real values in a conditional expression, as direct comparison can have unexpected results due to numerical accuracy.

+
+
+

Functions

+

Functions are laid out the same way as in languages such as Java and C++. For example,

+
real foo(real x, real y) {
+  return sqrt(x * log(y));
+}
+

The return type is flush left, the parentheses for the arguments are adjacent to the arguments and function name, and there is a space after the comma for arguments after the first. The open curly brace for the body is on the same line as the function name, following the layout of loops and conditionals. The body itself is indented; here we use two spaces. The close curly brace appears on its own line.

+

If function names or argument lists are long, they can be written as

+
matrix
+function_to_do_some_hairy_algebra(matrix thingamabob,
+                                  vector doohickey2) {
+  // ...body...
+}
+

The function starts a new line, under the type. The arguments are aligned under each other.

+

Function documentation should follow the Javadoc and Doxygen styles. Here’s an example repeated from the documenting functions section.

+
/**
+ * Return a data matrix of specified size with rows
+ * corresponding to items and the first column filled
+ * with the value 1 to represent the intercept and the
+ * remaining columns randomly filled with unit-normal draws.
+ *
+ * @param N Number of rows correspond to data items
+ * @param K Number of predictors, counting the intercept, per
+ *          item.
+ * @return Simulated predictor matrix.
+ */
+matrix predictors_rng(int N, int K) {
+  // ...
+}
+

The open comment is /**, asterisks are aligned below the first asterisk of the open comment, and the end comment */ is also aligned on the asterisk. The tags @param and @return are used to label function arguments (i.e., parameters) and return values.

+
+
+

White space

+

Stan allows spaces between elements of a program. The white space characters allowed in Stan programs include the space (ASCII 0x20), line feed (ASCII 0x0A), carriage return (0x0D), and tab (0x09). Stan treats all whitespace characters interchangeably, with any sequence of whitespace characters being syntactically equivalent to a single space character. Nevertheless, effective use of whitespace is the key to good program layout.

+
+

Line breaks between statements and declarations

+

Each statement of a program should appear on its own line. Declaring multiple variables of the same type can be accomplished in a single statement with the syntax

+
real mu, sigma;
+
+
+

No tabs

+

Stan programs should not contain tab characters. Using tabs to layout a program is highly unportable because the number of spaces represented by a single tab character varies depending on which program is doing the rendering and how it is configured.

+
+
+

Two-character indents

+

Stan has standardized on two space characters of indentation, which is the standard convention for C/C++ code.

+
+
+

Space between if, { and condition

+

Use a space after ifs. For instance, use if (x < y) {..., not if(x < y){ ....

+
+
+

No space for function calls

+

There should not be space between a function name and the arguments it applies to. For instance, use normal(0, 1), not normal (0,1).

+
+
+

Spaces around operators

+

There should be spaces around binary operators. For instance, use y[1] = x, not y[1]=x, use (x + y) * z not (x+y)*z.

+

Unary operators are written without a space, such as in -x, !y.

+
+
+

No spaces in type constraints

+

Another exception to the above rule is when the assignment operator (=) is used inside a type constraint, such as

+
real<lower=0> x;
+

Spaces should still be used in arithmetic and following commas, as in

+
real<lower=0, upper=a * x + b> x;
+
+
+

Breaking expressions across lines

+

Sometimes expressions are too long to fit on a single line. In that case, the recommended form is to break before an operator,2 aligning the operator to a term above to indicate scoping. For example, use the following form

+
vector[J] p_distance = Phi((distance_tolerance - overshot)
+                           ./ ((x + overshot) * sigma_distance))
+                       - Phi((-overshot)
+                             ./ ((x + overshot) * sigma_distance));
+

Here, the elementwise division operator (./) is aligned to clearly signal the division is occurring inside the parethenesis, while the subtraction indicates it is between the function applications (Phi).

+

For functions with multiple arguments, break after a comma and line the next argument up underneath as follows.

+
y[n] ~ normal(alpha + beta * x + gamma * y,
+              pow(tau,-0.5));
+
+
+

Spaces after commas

+

Commas should always be followed by spaces, including in function arguments, sequence literals, between variable declarations, etc.

+

For example,

+
normal(alpha * x[n] + beta, sigma);
+

is preferred over

+
normal(alpha * x[n] + beta,sigma);
+
+
+

Unix newlines

+

Wherever possible, Stan programs should use a single line feed character to separate lines. All of the Stan developers (so far, at least) work on Unix-like operating systems and using a standard newline makes the programs easier for us to read and share.

+
+

Platform specificity of newlines

+

Newlines are signaled in Unix-like operating systems such as Linux and Mac OS X with a single line-feed (LF) character (ASCII code point 0x0A). Newlines are signaled in Windows using two characters, a carriage return (CR) character (ASCII code point 0x0D) followed by a line-feed (LF) character.

+ + +
+
+
+
+ + + Back to top

Footnotes

+ +
    +
  1. Even 80 characters may be too many for rendering in print; for instance, in this manual, the number of code characters that fit on a line is about 65.↩︎

  2. +
  3. This is the usual convention in both typesetting and other programming languages. Neither R nor BUGS allows breaks before an operator because they allow newlines to signal the end of an expression or statement.↩︎

  4. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/survival.html b/docs/2_39/stan-users-guide/survival.html new file mode 100644 index 000000000..314136c57 --- /dev/null +++ b/docs/2_39/stan-users-guide/survival.html @@ -0,0 +1,1670 @@ + + + + + + + + + +Survival Models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Survival Models

+

Survival models apply to animals and plants as well as inanimate objects such as machine parts or electrical components. Survival models arise when there is an event of interest for a group of subjects, machine component, or other item that is

+
    +
  • certain to occur after some amount of time,
  • +
  • but only measured for a fixed period of time, during which the event may not have occurred for all subjects.
  • +
+

For example, one might wish to estimate the the distribution of time to failure for solid state drives in a data center, but only measure drives for a two year period, after which some number will have failed and some will still be in service.

+

Survival models are often used comparatively, such as comparing time to death of patients diagnosed with stage one liver cancer under a new treatment and a standard treatment (pure controls are not allowed when there is an effective existing treatment for a serious condition). During a two year trial, some patients will die and others will survive.

+

Survival models may involve covariates, such as the factory at which a component is manufactured, the day on which it is manufactured, and the amount of usage it gets. A clinical trial might be adjusted for the sex and age of a cancer patient or the hospital at which treatment is received.

+

Survival models come in two main flavors, parametric and semi-parametric. In a parametric model, the survival time of a subject is modeled explicitly using a parametric probability distribution. There is a great deal of flexibility in how the parametric probability distribution is constructed. The sections below consider exponential and Weibull distributed survival times.

+

Rather than explicitly modeling a parametric survival probability, semi-parametric survival models instead model the relative effect on survival of covariates. The final sections of this chapter consider the proportional hazards survival model.

+
+

Exponential survival model

+

The exponential distribution is commonly used in survival models where there is a constant risk of failure that does not go up the longer a subject survives. This is because the exponential distribution is memoryless in sense that if \(T \sim \textrm{exponential}(\lambda)\) for some rate \(\lambda > 0,\) then \[\begin{equation*} +\Pr[T > t] = \Pr[T > t + t' \mid T > t']. +\end{equation*}\] If component survival times are distributed exponentially, it means the distribution of time to failure is the same no matter how long the item has already survived. This can be a reasonable assumption for electronic components, but is not a reasonable model for animal survival.

+

The exponential survival model has a single parameter for the rate, which assumes all subjects have the same distribution of failure time (this assumption is relaxed in the next section by introducing per-subject covariates). With the rate parameterization, the expected survival time for a component with survival time represented as the random variable \(T\) is \[\begin{equation*} +\mathbb{E}[T \mid \lambda] = \frac{1}{\lambda}. +\end{equation*}\] The exponential distribution is sometimes parameterized in terms of a scale (i.e., inverse rate) \(\beta = 1 / \lambda\).

+

The data for a survival model consists of two components. First, there is a vector \(t \in (0, \infty)^N\) of \(N\) observed failure times. Second, there is a censoring time \(t^{\textrm{cens}}\) such that failure times greater than \(t^{\textrm{cens}}\) are not observed. The censoring time assumption imposes a constraint which requires \(t_n < t^{\textrm{cens}}\) for all \(n \in 1{:}N.\) For the censored subjects, the only thing required in the model is their total count, \(N^\textrm{cens}\) (their covariates are also required for models with covariates).

+

The model for the observed failure times is exponential, so that for \(n \in 1{:}N,\) \[\begin{equation*} +t_n \sim \textrm{exponential}(\lambda). +\end{equation*}\]

+

The model for the censored failure times is also exponential. All that is known of a censored item is that its failure time is greater than the censoring time, so each censored item contributes a factor to the likelihood of \[\begin{equation*} +\Pr[T > t^{\textrm{cens}}] = 1 - F_T(t^{\textrm{cens}}), +\end{equation*}\] where \(F_T\) is the cumulative distribution function (cdf) of survival time \(T\) (\(F_X(x) = \Pr[X \leq x]\) is standard notation for the cdf of a random variable \(X\)). The function \(1 - F_T(t)\) is the complementary cumulative distribution function (ccdf), and it is used directly to define the likelihood \[\begin{eqnarray*} +p(t, t^{\textrm{cens}}, N^{\textrm{cens}} \mid \lambda) +& = & +\prod_{n=1}^N \textrm{exponential}(t_n \mid \lambda) +\cdot +\prod_{n=1}^{N^{\textrm{cens}}} +\textrm{exponentialCCDF}(t^{\textrm{cens}} \mid \lambda) +\\ +& = & +\prod_{n=1}^N \textrm{exponential}(t_n \mid \lambda) +\cdot +\textrm{exponentialCCDF}(t^{\textrm{cens}} \mid \lambda)^{N^{\textrm{cens}}}. +\end{eqnarray*}\]

+

On the log scale, that’s \[\begin{eqnarray*} +\log p(t, t^{\textrm{cens}}, N^{\textrm{cens}} \mid \lambda) +& = & +\sum_{n=1}^N \log \textrm{exponential}(t_n \mid \lambda) +\\ +& & { } + N^{\textrm{cens}} \cdot \log \textrm{exponentialCCDF}(t^{\textrm{cens}} \mid \lambda). +\end{eqnarray*}\]

+

The model can be completed with a standard lognormal prior on \(\lambda,\) \[\begin{equation*} +\lambda \sim \textrm{lognormal}(0, 1), +\end{equation*}\] which is reasonable if failure times are in the range of 0.1 to 10 time units, because that’s roughly the 95% central interval for a variable distributed \(\textrm{lognormal}(0, 1)\). In general, the range of the prior (and likelihood!) should be adjusted with prior knowledge of expected failure times.

+
+

Stan program

+

The data for a simple survival analysis without covariates can be coded as follows.

+
data {
+  int<lower=0> N;
+  vector[N] t;
+  int<lower=0> N_cens;
+  real<lower=0> t_cens;
+}
+

In this program, N is the number of uncensored observations and t contains the times of the uncensored observations. There are a further N_cens items that are right censored at time t_cens. Right censoring means that if the time to failure is greater than

+

t_cens, it is only observed that the part survived until time t_cens. In the case where there are no covariates, the model only needs the number of censored items because they all share the same censoring time.

+

There is a single rate parameter, the inverse of which is the expected time to failure.

+
parameters {
+  real<lower=0> lambda;
+}
+

The exponential survival model and the prior are coded directly using vectorized distribution and ccdf statements. This both simplifies the code and makes it more computationally efficient by sharing computation across instances.

+
model {
+  t ~ exponential(lambda);
+  target += N_cens * exponential_lccdf(t_cens | lambda);
+
+  lambda ~ lognormal(0, 1);
+}
+

The likelihood for rate lambda is just the density of exponential distribution for observed failure time. The Stan code is vectorized, modeling each entry of the vector t as a having an exponential distribution with rate lambda. This data model could have been written as

+
for (n in 1:N) {
+  t[n] ~ exponential(lambda);
+}
+

The log likelihood contribution given censored items is the number of censored items times the log complementary cumulative distribution function (lccdf) at the censoring time of the exponential distribution with rate lambda. The log likelihood terms arising from the censored events could have been added to the target log density one at a time,

+
for (n in 1:N)
+  target += exponential_lccdf(t_cens | lambda);
+

to define the same log density, but it is much more efficient computationally to multiply by a constant than do a handful of sequential additions.

+
+
+
+

Weibull survival model

+

The Weibull distribution is a popular alternative to the exponential distribution in cases where there is a decreasing probability of survival as a subject gets older. The Weibull distribution models this by generalizing the exponential distribution to include a power-law trend.

+

The Weibull distribution is parameterized by a shape \(\alpha > 0\) and scale \(\sigma > 0.\) For an outcome \(t \geq 0\), the Weibull distribution’s probability density function is \[\begin{equation*} +\textrm{Weibull}(t \mid \alpha, \sigma) += \frac{\alpha}{\sigma} + \cdot \left( \frac{t}{\sigma} \right)^{\alpha - 1} + \cdot \exp\left(-\left(\frac{t}{\sigma}\right)^{\alpha}\right). +\end{equation*}\] In contrast, recall that the exponential distribution can be expressed using a rate (inverse scale) parameter \(\beta > 0\) with probability density function \[\begin{equation*} +\textrm{exponential}(t \mid \beta) = +\beta +\cdot +\exp(-\beta \cdot t). +\end{equation*}\] When \(\alpha = 1,\) the Weibull distribution reduces to an exponential distribution, \[\begin{equation*} +\textrm{Weibull}(t \mid 1, \sigma) += +\textrm{exponential}\!\left(t \,\bigg|\, \frac{1}{\sigma}\right). +\end{equation*}\] In other words, the Weibull is a continuous expansion of the exponential distribution.

+

If \(T \sim \textrm{Weibull}(\alpha, \sigma),\) then the expected survival time is \[\begin{equation*} +\mathbb{E}[T] = \sigma \cdot \Gamma\!\left(1 + \frac{1}{\alpha}\right), +\end{equation*}\] where the \(\Gamma\) function is the continuous completion of the factorial function (i.e., \(\Gamma(1 + n) = n!\ \) for \(n \in +\mathbb{N}\)). As \(\alpha \rightarrow 0\) for a fixed \(\sigma\) or as \(\sigma \rightarrow \infty\) for a fixed \(\alpha\), the expected survival time goes to infinity.

+

There are three regimes of the Weibull distribution.

+
    +
  • \(\alpha < 1.\) A subject is more likely to fail early. When \(\alpha +< 1,\) the Weibull density approaches infinity as \(t \rightarrow 0.\)

  • +
  • \(\alpha = 1.\) The Weibull distribution reduces to the exponential distribution, with a constant rate of failure over time. When \(\alpha = 1,\) the Weibull distribution approaches \(\sigma\) as \(t +\rightarrow 0.\)

  • +
  • \(\alpha > 1.\) Subjects are less likely to fail early. When \(\alpha < 1,\) the Weibull density approaches zero as \(t \rightarrow 0.\)

  • +
+

With \(\alpha \leq 1,\) the mode is zero (\(t = 0\)), whereas with \(\alpha > 1,\) the mode is nonzero (\(t > 0\)).

+
+

Stan program

+

With Stan, one can just swap the exponential distribution for the Weibull distribution with the appropriate parameters and the model remains essentially the same. Recall the exponential model’s parameters and model block.

+
parameters {
+  real<lower=0> beta;
+}
+model {
+  t ~ exponential(beta);
+  target += N_cens * exponential_lccdf(t_cens | beta);
+
+  beta ~ lognormal(0, 1);
+}
+

The Stan program for the Weibull model just swaps in the Weibull distribution and complementary cumulative distribution function with shape (alpha) and scale (sigma) parameters.

+
parameters {
+  real<lower=0> alpha;
+  real<lower=0> sigma;
+}
+model {
+  t ~ weibull(alpha, sigma);
+  target += N_cens * weibull_lccdf(t_cens | alpha, sigma);
+
+  alpha ~ lognormal(0, 1);
+  sigma ~ lognormal(0, 1);
+}
+

As usual, if more is known about expected survival times, alpha and sigma should be given more informative priors.

+
+
+
+

Survival with covariates

+

Suppose that for each of \(n \in 1{:}N\) items observed, both censored and uncensored, there is a covariate (row) vector \(x_n \in +\mathbb{R}^K.\) For example, a clinical trial may include the age (or a one-hot encoding of an age group) and the sex of a participant; an electronic component might include a one-hot encoding of the factory at which it was manufactured and a covariate for the load under which it has been run.

+

Survival with covariates replaces what is essentially a simple regression with only an intercept \(\lambda\) with a generalized linear model with a log link, where the rate for item \(n\) is \[\begin{equation*} +\lambda_n = \exp(x_n \cdot \beta), +\end{equation*}\] where \(\beta \in \mathbb{R}^K\) is a \(K\)-vector of regression coefficients. Thus \[\begin{equation*} +t_n \sim \textrm{exponential}(\lambda_n). +\end{equation*}\] The censored items have probability \[\begin{equation*} +\Pr[n\textrm{-th censored}] = +\textrm{exponentialCCDF}(t^{\textrm{cens}} \mid x^{\textrm{cens}}_n +\cdot \beta). +\end{equation*}\]

+

The covariates form an \(N \times K\) data matrix, \(x \in +\mathbb{R}^{N \times K}\). An intercept can be introduced by adding a column of 1 values to \(x\).

+

A Stan program for the exponential survival model with covariates is as follows. It relies on the fact that the order of failure times (t and t_cens) corresponds to the ordering of items in the covariate matrices (x and x_cens).

+
data {
+  int<lower=0> N;
+  vector[N] t;
+  int<lower=0> N_cens;
+  real<lower=0> t_cens;
+  int<lower=0> K;
+  matrix[N, K] x;
+  matrix[N_cens, K] x_cens;
+}
+parameters {
+  vector[K] gamma;
+}
+model {
+  gamma ~ normal(0, 2);
+
+  t ~ exponential(exp(x * gamma));
+  target += exponential_lccdf(t_cens | exp(x_cens * gamma));
+}
+

Both the distribution statement for uncensored times and the log density increment statement for censored times are vectorized, one in terms of the exponential distribution and one in terms of the log complementary cumulative distribution function.

+
+
+

Hazard and survival functions

+

Suppose \(T\) is a random variable representing a survival time, with a smooth cumulative distribution function \[\begin{equation*} +F_T(t) = \Pr[T \leq t], +\end{equation*}\] so that its probability density function is \[\begin{equation*} +p_T(t) = \frac{\textrm{d}}{\textrm{d}t} F_T(t). +\end{equation*}\]

+

The survival function \(S(t)\) is the probability of surviving until at least time \(t\), which is just the complementary cumulative distribution function (ccdf) of the survival random variable \(T\), \[\begin{equation*} +S(t) = 1 - F_T(t). +\end{equation*}\] The survival function appeared in the Stan model in the previous section as the likelihood for items that did not fail during the period of the experiment (i.e., the censored failure times for the items that survived through the trial period).

+

The hazard function \(h(t)\) is the instantaneous risk of not surviving past time \(t\) assuming survival until time \(t\), which is given by \[\begin{equation*} +h(t) = \frac{p_T(t)}{S(t)} = \frac{p_T(t)}{1 - F_T(t)}. +\end{equation*}\] The cumulative hazard function \(H(t)\) is defined to be the accumulated hazard over time, \[\begin{equation*} +H(t) = \int_0^t h(u) \, \textrm{d}u. +\end{equation*}\]

+

The hazard function and survival function are related through the differential equation \[\begin{eqnarray*} +h(t) & = & -\frac{\textrm{d}}{\textrm{d}t} \log S(t). +\\[4pt] +& = & -\frac{1}{S(t)} \frac{\textrm{d}}{\textrm{d}t} S(t) +\\[4pt] +& = & \frac{1}{S(t)} \frac{\textrm{d}}{\textrm{d}t} -(1 - F_T(t)) +\\[4pt] +& = & \frac{1}{S(t)} \frac{\textrm{d}}{\textrm{d}t} (F_T(t) - 1) +\\[4pt] +& = & \frac{1}{S(t)} \frac{\textrm{d}}{\textrm{d}t} F_T(t) +\\[4pt] +& = & \frac{p_T(t)}{S(t)}. +\end{eqnarray*}\]

+

If \(T \sim \textrm{exponential}(\beta)\) has an exponential distribution, then its hazard function is constant, \[\begin{eqnarray*} +h(t \mid \beta) +& = & \frac{p_T(t \mid \beta)}{S(t \mid \beta)} +\\[4pt] +& = & \frac{\textrm{exponential}(t \mid \beta)}{1 - \textrm{exponentialCCDF}(t \mid \beta)} +\\[4pt] +& = & \frac{\beta \cdot \exp(-\beta \cdot t)} + {1 - (1 - \exp(-\beta \cdot t))} +\\[4pt] +& = & \frac{\beta \cdot \exp(-\beta \cdot t)} + {\exp(-\beta \cdot t)} +\\[4pt] +& = & \beta. +\end{eqnarray*}\] The exponential distribution is the only distribution of survival times with a constant hazard function.

+

If \(T \sim \textrm{Weibull}(\alpha, \sigma),\) then its hazard function is \[\begin{eqnarray*} +h(t \mid \alpha, \sigma) +& = & \frac{p_T(t \mid \alpha, \sigma)}{S(t \mid \alpha, \sigma)} +\\[4pt] +& = & \frac{\textrm{Weibull}(t \mid \alpha, \sigma)}{1 - \textrm{WeibullCCDF}(t \mid \alpha, \sigma)} +\\[4pt] +& = & +\frac{\frac{\alpha}{\sigma} \cdot \left( \frac{t}{\sigma} \right)^{\alpha - 1} + \cdot \exp\left(-\left(\frac{t}{\sigma} \right)^\alpha\right)} + {1 - \left(1 - + \exp\left(-\left(\frac{t}{\sigma}\right)^\alpha + \right)\right)} +\\[4pt] +& = & \frac{\alpha}{\sigma} + \cdot + \left( \frac{t}{\sigma} \right)^{\alpha - 1}. +\end{eqnarray*}\]

+

If \(\alpha = 1\) the hazard is constant over time (which also follows from the fact that the Weibull distribution reduces to the exponential distribution when \(\alpha = 1\)). When \(\alpha > 1,\) the hazard grows as time passes, whereas when \(\alpha < 1,\) it decreases as time passes.

+
+
+

Proportional hazards model

+

The exponential model is parametric in that is specifies an explicit parametric form for the distribution of survival times. Cox (1972) introduced a semi-parametric survival model specified directly in terms of a hazard function \(h(t)\) rather than in terms of a distribution over survival times. Cox’s model is semi-parametric in that it does not model the full hazard function, instead modeling only the proportional differences in hazards among subjects.

+

Let \(x_n \in \mathbb{R}^K\) be a (row) vector of covariates for subject \(n\) so that the full covariate data matrix is \(x \in \mathbb{R}^{N \times +K}\). In Cox’s model, the hazard function for subject \(n\) is defined conditionally in terms of their covariates \(x_n\) and the parameter vector \(\gamma \in \mathbb{R}^K\) as \[\begin{equation*} +h(t \mid x_n, \beta) = h_0(t) \cdot \exp(x_n \cdot \gamma), +\end{equation*}\] where \(h_0(t)\) is a shared baseline hazard function and \(x_n \cdot +\gamma = \sum_{k=1}^K x_{n, k} \cdot \beta_k\) is a row vector-vector product.

+

In the semi-parametric, proportional hazards model, the baseline hazard function \(h_0(t)\) is not modeled. This is why it is called “semi-parametric.” Only the factor \(\exp(x_n \cdot \gamma),\) which determines how individual \(n\) varies by a proportion from the baseline hazard, is modeled. This is why it’s called “proportional hazards.”

+

Cox’s proportional hazards model is not fully generative. There is no way to generate the times of failure because the baseline hazard function \(h_0(t)\) is unmodeled; if the baseline hazard were known, failure times could be generated. Cox’s proportional hazards model is generative for the ordering of failures conditional on a number of censored items. Proportional hazard models may also include parametric or non-parametric model for the baseline hazard function1.

+
+

Partial likelihood function

+

Cox’s proportional specification of the hazard function is insufficient to generate random variates because the baseline hazard function \(h_0(t)\) is unknown. On the other hand, the proportional specification is sufficient to generate a partial likelihood that accounts for the order of the survival times.

+

The hazard function \(h(t \mid x_n, \beta) = h_0(t) \cdot \exp(x_n +\cdot \beta)\) for subject \(n\) represents the instantaneous probability that subject \(n\) fails at time \(t\) given that it has survived until time \(t.\) The probability that subject \(n\) is the first to fail among \(N\) subjects is thus proportional to subject \(n\)’s hazard function, \[\begin{equation*} +\Pr[n \textrm{ first to fail at time } t] +\propto h(t \mid x_n, \beta). +\end{equation*}\] Normalizing yields \[\begin{eqnarray*} +\Pr[n \textrm{ first to fail at time } t] +& = & \frac{h(t \mid x_n, \beta)} + {\sum_{n' = 1}^N h(t \mid x_{n'}, \beta)} +\\[4pt] +& = & \frac{h_0(t) \cdot \exp(x_n \cdot \beta)} + {\sum_{n' = 1}^N h_0(t) \cdot \exp(x_{n'} \cdot \beta)} +\\[4pt] +& = & \frac{\exp(x_n \cdot \beta)} + {\sum_{n' = 1}^N \exp(x_{n'} \cdot \beta)}. +\end{eqnarray*}\]

+

Suppose there are \(N\) subjects with strictly ordered survival times \(t_1 < +t_2 < \cdots < t_N\) and covariate (row) vectors \(x_1, \ldots, x_N\). Let \(t^{\textrm{cens}}\) be the (right) censoring time and let \(N^{\textrm{obs}}\) be the largest value of \(n\) such that \(t_n \leq +t^{\textrm{cens}}\). This means \(N^{\textrm{obs}}\) is the number of subjects whose failure time was observed. The ordering is for convenient indexing and does not cause any loss of generality—survival times can simply be sorted into the necessary order.

+

With failure times sorted in decreasing order, the partial likelihood for each observed subject \(n \in 1{:}N^{\textrm{obs}}\) can be expressed as \[\begin{equation*} +\Pr[n \textrm{ first to fail among } n, n + 1, \ldots N] += \frac{\exp(x_n \cdot \beta)} + {\sum_{n' = n}^N \exp(x_{n'} \cdot \beta)}. +\end{equation*}\] The group of items for comparison and hence the summation is over all items, including those with observed and censored failure times.

+

The partial likelihood, defined in this form by Breslow (1975), is just the product of the partial likelihoods for the observed subjects (i.e., excluding subjects whose failure time is censored). \[\begin{equation*} +\Pr[\textrm{observed failures ordered } 1, \ldots, N^{\textrm{obs}} | +x, \beta] += \prod_{n = 1}^{N^{\textrm{obs}}} + \frac{\exp(x_n \cdot \beta)} + {\sum_{n' = n}^N \exp(x_{n'} \cdot \beta)}. +\end{equation*}\] On the log scale, \[\begin{eqnarray*} +\log \Pr[\textrm{obs.\ fail ordered } 1, \ldots, N^{\textrm{obs}} | +x, \beta] +& = & +\sum_{n = 1}^{N^{\textrm{obs}}} + \log \left( + \frac{\exp(x_n \cdot \beta)} + {\sum_{n' = n}^N \exp(x_{n'} \cdot \beta)} + \right) +\\[4pt] +& = & x_n \cdot \beta - \log \sum_{n' = n}^N \exp(x_{n'} \cdot \beta) +\\ +& = & x_n \cdot \beta - \textrm{logSumExp}_{n' = n}^N \ x_{n'} \cdot \beta, +\end{eqnarray*}\] where \[\begin{equation*} +\textrm{logSumExp}_{n = a}^b \ x_n += \log \sum_{n = a}^b \exp(x_n) +\end{equation*}\] is implemented so as to preserve numerical precision.

+

This likelihood follows the same approach to ranking as that developed by Plackett (1975) for estimating the probability of the order of the first few finishers in a horse race.

+

A simple normal prior on the components of \(\beta\) completes the model, \[\begin{equation*} +\beta \sim \textrm{normal}(0, 2). +\end{equation*}\] This should be scaled based on knowledge of the predictors.

+
+
+

Stan program

+

To simplify the Stan program, the survival times for uncensored events are sorted into decreasing order (unlike in the mathematical presentation, where they were sorted into ascending order). The covariates for censored and uncensored observations are separated into two matrices.

+
data {
+  int<lower=0> K;          // num covariates
+
+  int<lower=0> N;          // num uncensored obs
+  vector[N] t;             // event time (non-strict decreasing)
+  matrix[N, K] x;          // covariates for uncensored obs
+
+  int N_c;                 // num censored obs
+  real<lower=t[N]> t_c;    // censoring time
+  matrix[N_c, K] x_c;      // covariates for censored obs
+}
+

The parameters are just the coefficients.

+
parameters {
+  vector[K] beta;          // slopes (no intercept)
+}
+

The prior is a simple independent centered normal distribution on each element of the parameter vector, which is vectorized in the Stan code.

+
model {
+  beta ~ normal(0, 2);
+  ...
+

The log likelihood is implemented so as to minimize duplicated effort. The first order of business is to calculate the linear predictors, which is done separately for the subjects whose event time is observed and those for which the event time is censored.

+
  vector[N] log_theta = x * beta;
+  vector[N_c] log_theta_c = x_c * beta;
+

These vectors are computed using efficient matrix-vector multiplies. The log of exponential values of the censored covariates times the coefficients is reused in the denominator of each factor, which on the log scale, starts with the log sum of exponentials of the censored items’ linear predictors.

+
  real log_denom = log_sum_exp(log_theta_c);
+

Then, for each observed survival time, going backwards from the latest to the earliest, the denominator can be incremented (which turns into a log sum of exponentials on the log scale), and then the target is updated with its likelihood contribution.

+
  for (n in 1:N) {
+    log_denom = log_sum_exp(log_denom, log_theta[n]);
+    target += log_theta[n] - log_denom;   // log likelihood
+  }
+

The running log sum of exponentials is why the list is iterated in reverse order of survival times. It allows the log denominator to be accumulated one term at a time. The condition that the survival times are sorted into decreasing order is not checked. It could be checked very easily in the transformed data block by adding the following code.

+
transformed data {
+  for (n in 2:N) {
+    if (!(t[n] < t[n - 1])) {
+      reject("times must be strictly decreasing, but found"
+             "!(t[", n, "] < t[, ", (n - 1), "])");
+    }   
+  }
+}
+
+
+

Stan model for tied survival times

+

Technically, for continuous survival times, the probability of two survival times being identical will be zero. Nevertheless, real data sets often round survival times, for instance to the nearest day or week in a multi-year clinical trial. The technically “correct” thing to do in the face of unknown survival times in a range would be to treat their order as unknown and infer it. But considering all \(N!\) permutations for a set of \(N\) subjects with tied survival times is not tractable. As an alternative, Efron (1977) introduced an approximate partial likelihood with better properties than a random permutation while not being quite as good as considering all permutations. Efron’s model averages the contributions as if they truly did occur simultaneously.

+

In the interest of completeness, here is the Stan code for an implementation of Efron’s estimator. It uses two user-defined functions. The first calculates how many different survival times occur in the data.

+
functions {
+  int num_unique_starts(vector t) {
+    if (size(t) == 0) return 0;
+    int us = 1;
+    for (n in 2:size(t)) {
+      if (t[n] != t[n - 1]) us += 1;
+    }
+    return us;
+  }
+

This is then used to compute the value J to send into the function that computes the position in the array of failure times where each new failure time starts, plus an end point that goes one past the target. This is a standard way in Stan to code ragged arrays.

+
  array[] int unique_starts(vector t, int J) {
+    array[J + 1] int starts;
+    if (J == 0) return starts;
+    starts[1] = 1;
+    int pos = 2;
+    for (n in 2:size(t)) {
+      if (t[n] != t[n - 1]) {
+    starts[pos] = n;
+    pos += 1;
+      }
+    }
+    starts[J + 1] = size(t) + 1;
+    return starts;
+  }
+}
+

The data format is exactly the same as for the model in the previous section, but in this case, the transformed data block is used to cache some precomputations required for the model, namely the ragged array grouping elements that share the same survival time.

+
transformed data {
+  int<lower=0> J = num_unique_starts(t);
+  array[J + 1] int<lower=0> starts = unique_starts(t, J);
+}
+

For each unique survival time j in 1:J, the subjects indexed from starts[j] to starts[j + 1] - 1 (inclusive) share the same survival time. The number of elements with survival time j is thus (starts[j + 1] - 1) - starts[j] + 1, or just starts[j + 1] - starts[j].

+

The parameters and prior are also the same—just a vector beta of coefficients with a centered normal prior. Although it starts with the same caching of results for later, and uses the same accumulator for the denominator, the overall partial likelihood is much more involved, and depends on the user-defined functions defining the transformed data variables J and starts.

+
  vector[N] log_theta = x * beta;
+  vector[N_c] log_theta_c = x_c * beta;
+  real log_denom_lhs = log_sum_exp(log_theta_c);
+  for (j in 1:J) {
+    int start = starts[j];
+    int end = starts[j + 1] - 1;
+    int len = end - start + 1;
+    real log_len = log(len);
+    real numerator = sum(log_theta[start:end]);
+    log_denom_lhs = log_sum_exp(log_denom_lhs,
+                                log_sum_exp(log_theta[start:end]));
+    vector[len] diff;
+    for (ell in 1:len) {
+      diff[ell] = log_diff_exp(log_denom_lhs,
+                               log(ell - 1) - log_len
+                               + log_sum_exp(log_theta[start:end]));
+    }
+    target += numerator - sum(diff);
+  }
+

The special function log_diff_exp is defined as

+

\[\begin{equation*} +\textrm{logDiffExp}(u, v) = \log(\exp(u) - \exp(v)). +\end{equation*}\]

+

Because of how J and starts are constructed, the length len will always be strictly positive so that the log is well defined.

+ + + +
+
+
+ + + Back to top

References

+
+Breslow, N. E. 1975. “Analysis of Survival Data Under the Proportional Hazards Model.” International Statisticas Review 43 (1): 45–58. +
+
+Cox, David R. 1972. “Regression Models and Life-Tables.” Journal of the Royal Statistical Society: Series B (Methodological) 34 (2): 187–202. +
+
+Efron, Bradley. 1977. “The Efficiency of Cox’s Likelihood Function for Censored Data.” Journal of the American Statistical Association 72 (359): 557–65. +
+
+Plackett, Robin L. 1975. “The Analysis of Permutations.” Journal of the Royal Statistical Society Series C: Applied Statistics 24 (2): 193–202. +
+

Footnotes

+ +
    +
  1. Cox mentioned in his seminal paper that modeling the baseline hazard function would improve statistical efficiency, but he did not do it for computational reasons.↩︎

  2. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/time-series.html b/docs/2_39/stan-users-guide/time-series.html new file mode 100644 index 000000000..0b0120064 --- /dev/null +++ b/docs/2_39/stan-users-guide/time-series.html @@ -0,0 +1,1689 @@ + + + + + + + + + +Time-Series Models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Time-Series Models

+

Times series data come arranged in temporal order. This chapter presents two kinds of time series models, regression-like models such as autoregressive and moving average models, and hidden Markov models.

+

The Gaussian processes chapter presents Gaussian processes, which may also be used for time-series (and spatial) data.

+
+

Autoregressive models

+

A first-order autoregressive model (AR(1)) with normal noise takes each point \(y_n\) in a sequence \(y\) to be generated according to \[ +y_n \sim \textsf{normal}(\alpha + \beta y_{n-1}, \sigma). +\]

+

That is, the expected value of \(y_n\) is \(\alpha + \beta y_{n-1}\), with noise scaled as \(\sigma\).

+
+

AR(1) models

+

With improper flat priors on the regression coefficients \(\alpha\) and \(\beta\) and on the positively-constrained noise scale (\(\sigma\)), the Stan program for the AR(1) model is as follows.1

+
data {
+  int<lower=0> N;
+  vector[N] y;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma;
+}
+model {
+  for (n in 2:N) {
+    y[n] ~ normal(alpha + beta * y[n-1], sigma);
+  }
+}
+

The first observed data point, y[1], is not modeled here because there is nothing to condition on; instead, it acts to condition y[2]. This model also uses an improper prior for sigma, but there is no obstacle to adding an informative prior if information is available on the scale of the changes in y over time, or a weakly informative prior to help guide inference if rough knowledge of the scale of y is available.

+
+

Slicing for efficiency

+

Although perhaps a bit more difficult to read, a much more efficient way to write the above model is by slicing the vectors, with the model above being replaced with the one-liner

+
model {
+  y[2:N] ~ normal(alpha + beta * y[1:(N - 1)], sigma);
+}
+

The left-hand side slicing operation pulls out the last \(N-1\) elements and the right-hand side version pulls out the first \(N-1\).

+
+
+
+

Extensions to the AR(1) model

+

Proper priors of a range of different families may be added for the regression coefficients and noise scale. The normal noise model can be changed to a Student-\(t\) distribution or any other distribution with unbounded support. The model could also be made hierarchical if multiple series of observations are available.

+

To enforce the estimation of a stationary AR(1) process, the slope coefficient beta may be constrained with bounds as follows.

+
real<lower=-1, upper=1> beta;
+

In practice, such a constraint is not recommended. If the data are not well fit by a stationary model it is best to know this. Stationary parameter estimates can be encouraged with a prior favoring values of beta near zero.

+
+
+

AR(2) models

+

Extending the order of the model is also straightforward. For example, an AR(2) model could be coded with the second-order coefficient gamma and the following model statement.

+
for (n in 3:N) {
+  y[n] ~ normal(alpha + beta*y[n-1] + gamma*y[n-2], sigma);
+}
+
+
+

AR(\(K\)) models

+

A general model where the order is itself given as data can be coded by putting the coefficients in an array and computing the linear predictor in a loop.

+
data {
+  int<lower=0> K;
+  int<lower=0> N;
+  array[N] real y;
+}
+parameters {
+  real alpha;
+  array[K] real beta;
+  real sigma;
+}
+model {
+  for (n in (K+1):N) {
+    real mu = alpha;
+    for (k in 1:K) {
+      mu += beta[k] * y[n-k];
+    }
+    y[n] ~ normal(mu, sigma);
+  }
+}
+
+
+

ARCH(1) models

+

Econometric and financial time-series models usually assume heteroscedasticity: they allow the scale of the noise terms defining the series to vary over time. The simplest such model is the autoregressive conditional heteroscedasticity (ARCH) model (Engle 1982). Unlike the autoregressive model AR(1), which modeled the mean of the series as varying over time but left the noise term fixed, the ARCH(1) model takes the scale of the noise terms to vary over time but leaves the mean term fixed. Models could be defined where both the mean and scale vary over time; the econometrics literature presents a wide range of time-series modeling choices.

+

The ARCH(1) model is typically presented as the following sequence of equations, where \(r_t\) is the observed return at time point \(t\) and \(\mu\), \(\alpha_0\), and \(\alpha_1\) are unknown regression coefficient parameters.

+

\[\begin{align*} +r_t &= \mu + a_t \\ +a_t &= \sigma_t \epsilon_t \\ +\epsilon_t &\sim \textsf{normal}(0,1) \\ +\sigma^2_t &= \alpha_0 + \alpha_1 a_{t-1}^2 +\end{align*}\]

+

In order to ensure the noise terms \(\sigma^2_t\) are positive, the scale coefficients are constrained to be positive, \(\alpha_0, \alpha_1 +> 0\). To ensure stationarity of the time series, the slope is constrained to be less than one, i.e., \(\alpha_1 < 1\).2

+

The ARCH(1) model may be coded directly in Stan as follows.

+
data {
+  int<lower=0> T;                // number of time points
+  array[T] real r;               // return at time t
+}
+parameters {
+  real mu;                       // average return
+  real<lower=0> alpha0;          // noise intercept
+  real<lower=0, upper=1> alpha1; // noise slope
+}
+model {
+  for (t in 2:T) {
+    r[t] ~ normal(mu, sqrt(alpha0 + alpha1
+                                    * pow(r[t - 1] - mu,2)));
+  }
+}
+

The loop in the model is defined so that the return at time \(t=1\) is not modeled; the model in the next section shows how to model the return at \(t=1\). The model can be vectorized to be more efficient; the model in the next section provides an example.

+
+
+
+

Modeling temporal heteroscedasticity

+

A set of variables is homoscedastic if their variances are all the same; the variables are heteroscedastic if they do not all have the same variance. Heteroscedastic time-series models allow the noise term to vary over time.

+
+

GARCH(1,1) models

+

The basic generalized autoregressive conditional heteroscedasticity (GARCH) model, GARCH(1,1), extends the ARCH(1) model by including the squared previous difference in return from the mean at time \(t-1\) as a predictor of volatility at time \(t\), defining \[ +\sigma^2_t = \alpha_0 + \alpha_1 a^2_{t-1} + \beta_1 \sigma^2_{t-1}. +\]

+

To ensure the scale term is positive and the resulting time series stationary, the coefficients must all satisfy \(\alpha_0, \alpha_1, +\beta_1 > 0\) and the slopes \(\alpha_1 + \beta_1 < 1\).

+
data {
+  int<lower=0> T;
+  array[T] real r;
+  real<lower=0> sigma1;
+}
+parameters {
+  real mu;
+  real<lower=0> alpha0;
+  real<lower=0, upper=1> alpha1;
+  real<lower=0, upper=(1-alpha1)> beta1;
+}
+transformed parameters {
+  array[T] real<lower=0> sigma;
+  sigma[1] = sigma1;
+  for (t in 2:T) {
+    sigma[t] = sqrt(alpha0
+                     + alpha1 * pow(r[t - 1] - mu, 2)
+                     + beta1 * pow(sigma[t - 1], 2));
+  }
+}
+model {
+  r ~ normal(mu, sigma);
+}
+

To get the recursive definition of the volatility regression off the ground, the data declaration includes a non-negative value sigma1 for the scale of the noise at \(t = 1\).

+

The constraints are coded directly on the parameter declarations. This declaration is order-specific in that the constraint on beta1 depends on the value of alpha1.

+

A transformed parameter array of non-negative values sigma is used to store the scale values at each time point. The definition of these values in the transformed parameters block is where the regression is now defined. There is an intercept alpha0, a slope alpha1 for the squared difference in return from the mean at the previous time, and a slope beta1 for the previous noise scale squared. Finally, the whole regression is inside the sqrt function because Stan requires scale (deviation) parameters (not variance parameters) for the normal distribution.

+

With the regression in the transformed parameters block, the model reduces a single vectorized distribution statement. Because r and sigma are of length T, all of the data are modeled directly.

+
+
+
+

Moving average models

+

A moving average model uses previous errors as predictors for future outcomes. For a moving average model of order \(Q\), \(\mbox{MA}(Q)\), there is an overall mean parameter \(\mu\) and regression coefficients \(\theta_q\) for previous error terms. With \(\epsilon_t\) being the noise at time \(t\), the model for outcome \(y_t\) is defined by \[ +y_t = \mu + \theta_1 \epsilon_{t-1} + \dotsb + \theta_Q \epsilon_{t-Q} ++ \epsilon_t, +\] with the noise term \(\epsilon_t\) for outcome \(y_t\) modeled as normal, \[ +\epsilon_t \sim \textsf{normal}(0,\sigma). +\] In a proper Bayesian model, the parameters \(\mu\), \(\theta\), and \(\sigma\) must all be given priors.

+
+

MA(2) example

+

An \(\mbox{MA}(2)\) model can be coded in Stan as follows.

+
data {
+  int<lower=3> T;          // number of observations
+  vector[T] y;             // observation at time T
+}
+parameters {
+  real mu;                 // mean
+  real<lower=0> sigma;     // error scale
+  vector[2] theta;         // lag coefficients
+}
+transformed parameters {
+  vector[T] epsilon;       // error terms
+  epsilon[1] = y[1] - mu;
+  epsilon[2] = y[2] - mu - theta[1] * epsilon[1];
+  for (t in 3:T) {
+    epsilon[t] = ( y[t] - mu
+                    - theta[1] * epsilon[t - 1]
+                    - theta[2] * epsilon[t - 2] );
+  }
+}
+model {
+  mu ~ cauchy(0, 2.5);
+  theta ~ cauchy(0, 2.5);
+  sigma ~ cauchy(0, 2.5);
+  for (t in 3:T) {
+    y[t] ~ normal(mu
+                  + theta[1] * epsilon[t - 1]
+                  + theta[2] * epsilon[t - 2],
+                  sigma);
+  }
+}
+

The error terms \(\epsilon_t\) are defined as transformed parameters in terms of the observations and parameters. The definition of the distribution statement (which also defines the likelihood) follows the definition, which can only be applied to \(y_n\) for \(n > Q\). In this example, the parameters are all given Cauchy (half-Cauchy for \(\sigma\)) priors, although other priors can be used just as easily.

+

This model could be improved in terms of speed by vectorizing the distribution statement in the model block. Vectorizing the calculation of the \(\epsilon_t\) could also be sped up by using a dot product instead of a loop.

+
+
+

Vectorized MA(Q) model

+

A general \(\mbox{MA}(Q)\) model with a vectorized distribution statement may be defined as follows.

+
data {
+  int<lower=0> Q;       // num previous noise terms
+  int<lower=3> T;       // num observations
+  vector[T] y;          // observation at time t
+}
+parameters {
+  real mu;              // mean
+  real<lower=0> sigma;  // error scale
+  vector[Q] theta;      // error coeff, lag -t
+}
+transformed parameters {
+  vector[T] epsilon;    // error term at time t
+  for (t in 1:T) {
+    epsilon[t] = y[t] - mu;
+    for (q in 1:min(t - 1, Q)) {
+      epsilon[t] = epsilon[t] - theta[q] * epsilon[t - q];
+    }
+  }
+}
+model {
+  vector[T] eta;
+  mu ~ cauchy(0, 2.5);
+  theta ~ cauchy(0, 2.5);
+  sigma ~ cauchy(0, 2.5);
+  for (t in 1:T) {
+    eta[t] = mu;
+    for (q in 1:min(t - 1, Q)) {
+      eta[t] = eta[t] + theta[q] * epsilon[t - q];
+    }
+  }
+  y ~ normal(eta, sigma);
+}
+

Here all of the data are modeled, with missing terms just dropped from the regressions as in the calculation of the error terms. Both models converge quickly and mix well at convergence, with the vectorized model being faster (per iteration, not to converge—they compute the same model).

+
+
+
+

Autoregressive moving average models

+

Autoregressive moving-average models (ARMA), combine the predictors of the autoregressive model and the moving average model. An ARMA(1,1) model, with a single state of history, can be encoded in Stan as follows.

+
data {
+  int<lower=1> T;            // num observations
+  array[T] real y;                 // observed outputs
+}
+parameters {
+  real mu;                   // mean coeff
+  real phi;                  // autoregression coeff
+  real theta;                // moving avg coeff
+  real<lower=0> sigma;       // noise scale
+}
+model {
+  vector[T] nu;              // prediction for time t
+  vector[T] err;             // error for time t
+  nu[1] = mu + phi * mu;     // assume err[0] == 0
+  err[1] = y[1] - nu[1];
+  for (t in 2:T) {
+    nu[t] = mu + phi * y[t - 1] + theta * err[t - 1];
+    err[t] = y[t] - nu[t];
+  }
+  mu ~ normal(0, 10);        // priors
+  phi ~ normal(0, 2);
+  theta ~ normal(0, 2);
+  sigma ~ cauchy(0, 5);
+  err ~ normal(0, sigma);    // error model
+}
+

The data are declared in the same way as the other time-series regressions and the parameters are documented in the code.

+

In the model block, the local vector nu stores the predictions and err the errors. These are computed similarly to the errors in the moving average models described in the previous section.

+

The priors are weakly informative for stationary processes. The data model only involves the error term, which is efficiently vectorized here.

+

Often in models such as these, it is desirable to inspect the calculated error terms. This could easily be accomplished in Stan by declaring err as a transformed parameter, then defining it the same way as in the model above. The vector nu could still be a local variable, only now it will be in the transformed parameter block.

+

Wayne Folta suggested encoding the model without local vector variables as follows.

+
model {
+  real err;
+  mu ~ normal(0, 10);
+  phi ~ normal(0, 2);
+  theta ~ normal(0, 2);
+  sigma ~ cauchy(0, 5);
+  err = y[1] - (mu + phi * mu);
+  err ~ normal(0, sigma);
+  for (t in 2:T) {
+    err = y[t] - (mu + phi * y[t - 1] + theta * err);
+    err ~ normal(0, sigma);
+  }
+}
+

This approach to ARMA models illustrates how local variables, such as err in this case, can be reused in Stan. Folta’s approach could be extended to higher order moving-average models by storing more than one error term as a local variable and reassigning them in the loop.

+

Both encodings are fast. The original encoding has the advantage of vectorizing the normal distribution, but it uses a bit more memory. A halfway point would be to vectorize just err.

+
+

Identifiability and stationarity

+

MA and ARMA models are not identifiable if the roots of the characteristic polynomial for the MA part lie inside the unit circle, so it’s necessary to add the following constraint3

+
real<lower=-1, upper=1> theta;
+

When the model is run without the constraint, using synthetic data generated from the model, the simulation can sometimes find modes for (theta, phi) outside the \([-1,1]\) interval, which creates a multiple mode problem in the posterior and also causes the NUTS tree depth to get large (often above 10). Adding the constraint both improves the accuracy of the posterior and dramatically reduces the tree depth, which speeds up the simulation considerably (typically by much more than an order of magnitude).

+

Further, unless one thinks that the process is really non-stationary, it’s worth adding the following constraint to ensure stationarity.

+
real<lower=-1, upper=1> phi;
+
+
+
+

Stochastic volatility models

+

Stochastic volatility models treat the volatility (i.e., variance) of a return on an asset, such as an option to buy a security, as following a latent stochastic process in discrete time (Kim, Shephard, and Chib 1998). The data consist of mean corrected (i.e., centered) returns \(y_t\) on an underlying asset at \(T\) equally spaced time points. Kim et al. formulate a typical stochastic volatility model using the following regression-like equations, with a latent parameter \(h_t\) for the log volatility, along with parameters \(\mu\) for the mean log volatility, and \(\phi\) for the persistence of the volatility term. The variable \(\epsilon_t\) represents the white-noise shock (i.e., multiplicative error) on the asset return at time \(t\), whereas \(\delta_t\) represents the shock on volatility at time \(t\). \[\begin{align*} +y_t &= \epsilon_t \exp(h_t / 2) \\ +h_{t+1} &= \mu + \phi (h_t - \mu) + \delta_t \sigma \\ +h_1 &\sim \textsf{normal}\left( \mu, \frac{\sigma}{\sqrt{1 - \phi^2}} \right) \\ +\epsilon_t &\sim \textsf{normal}(0,1) \\ +\delta_t &\sim \textsf{normal}(0,1) +\end{align*}\]

+

Rearranging the first line, \(\epsilon_t = y_t \exp(-h_t / 2)\), allowing the distribution for \(y_t\) to be written as \[ +y_t \sim \textsf{normal}(0,\exp(h_t/2)). +\] The recurrence equation for \(h_{t+1}\) may be combined with the scaling of \(\delta_t\) to yield the distribution \[ +h_t \sim \mathsf{normal}(\mu + \phi(h_{t-1} - \mu), \sigma). +\] This formulation can be directly encoded, as shown in the following Stan model.

+
data {
+  int<lower=0> T;   // # time points (equally spaced)
+  vector[T] y;      // mean corrected return at time t
+}
+parameters {
+  real mu;                     // mean log volatility
+  real<lower=-1, upper=1> phi; // persistence of volatility
+  real<lower=0> sigma;         // white noise shock scale
+  vector[T] h;                 // log volatility at time t
+}
+model {
+  phi ~ uniform(-1, 1);
+  sigma ~ cauchy(0, 5);
+  mu ~ cauchy(0, 10);
+  h[1] ~ normal(mu, sigma / sqrt(1 - phi * phi));
+  for (t in 2:T) {
+    h[t] ~ normal(mu + phi * (h[t - 1] -  mu), sigma);
+  }
+  for (t in 1:T) {
+    y[t] ~ normal(0, exp(h[t] / 2));
+  }
+}
+

Compared to the Kim et al. formulation, the Stan model adds priors for the parameters \(\phi\), \(\sigma\), and \(\mu\). The shock terms \(\epsilon_t\) and \(\delta_t\) do not appear explicitly in the model, although they could be calculated efficiently in a generated quantities block.

+

The posterior of a stochastic volatility model such as this one typically has high posterior variance. For example, simulating 500 data points from the above model with \(\mu = -1.02\), \(\phi = 0.95\), and \(\sigma = 0.25\) leads to 95% posterior intervals for \(\mu\) of \((-1.23, -0.54)\), for \(\phi\) of \((0.82, 0.98)\), and for \(\sigma\) of \((0.16, 0.38)\).

+

The NUTS draws show a high degree of autocorrelation, both for this model and the stochastic volatility model evaluated in (Hoffman and Gelman 2014). Using a non-diagonal mass matrix provides faster convergence and higher effective sample size than a diagonal mass matrix, but will not scale to large values of \(T\).

+

It is relatively straightforward to speed up the effective sample size per second generated by this model by one or more orders of magnitude. First, the distribution statements for return \(y\) is easily vectorized to

+
y ~ normal(0, exp(h / 2));
+

This speeds up the iterations, but does not change the effective sample size because the underlying parameterization and log probability function have not changed. Mixing is improved by reparameterizing in terms of a standardized volatility, then rescaling. This requires a standardized parameter h_std to be declared instead of h.

+
parameters {
+  // ...
+  vector[T] h_std;  // std log volatility time t
+}
+

The original value of h is then defined in a transformed parameter block.

+
transformed parameters {
+  vector[T] h = h_std * sigma;  // now h ~ normal(0, sigma)
+  h[1] /= sqrt(1 - phi * phi);  // rescale h[1]
+  h += mu;
+  for (t in 2:T) {
+    h[t] += phi * (h[t - 1] - mu);
+  }
+}
+

The first assignment rescales h_std to have a \(\textsf{normal}(0,\sigma)\) distribution and temporarily assigns it to h. The second assignment rescales h[1] so that its prior differs from that of h[2] through h[T]. The next assignment supplies a mu offset, so that h[2] through h[T] are now distributed \(\textsf{normal}(\mu,\sigma)\); note that this shift must be done after the rescaling of h[1]. The final loop adds in the moving average so that h[2] through h[T] are appropriately modeled relative to phi and mu.

+

As a final improvement, the distribution statements for h[1] to h[T] are replaced with a single vectorized standard normal distribution statement.

+
model {
+  // ...
+  h_std ~ std_normal();
+}
+

Although the original model can take hundreds and sometimes thousands of iterations to converge, the reparameterized model reliably converges in tens of iterations. Mixing is also dramatically improved, which results in higher effective sample sizes per iteration. Finally, each iteration runs in roughly a quarter of the time of the original iterations.

+
+
+

Hidden Markov models

+

A Hidden Markov model is a probabilistic model over \(N\) observations \(y_{1:N}\) and \(N\) hidden states \(z_{1:N}\). This models is defined by the conditional distributions \(p(y_n \mid z_n, \phi)\) and \(p(z_n \mid z_{n-1}, \phi)\). Here we make the dependency on additional model parameters \(\phi\) explicit. (\(\phi\) may be a vector of parameters.) The complete data likelihood is then \[ +p(y, z \mid \phi) = \prod_n p(y_n \mid z_n, \phi) p(z_n \mid z_{n - 1}, \phi) +\] When \(z_{1:N}\) is continuous, the user can explicitly encode these distributions in Stan and use Markov chain Monte Carlo to integrate \(z\) out.

+

When each state \(z\) takes a value over a discrete and finite set, say \(\{1, 2, ..., K\}\), we can use Stan’s suite of HMM functions to marginalize out \(z_{1:N}\) and compute \[ +p(y_{1:N} \mid \phi) = \int_{\mathcal Z} p(y, z \mid \phi) \text d z. +\] We start by defining the conditional observation distribution, stored in a \(K \times N\) matrix \(\omega\) with \[ +\omega_{kn} = p(y_n \mid z_n = k, \phi). +\] Next, we introduce the \(K \times K\) transition matrix, \(\Gamma\), with \[ +\Gamma_{ij} = p(z_n = j \mid z_{n - 1} = i, \phi). +\] (This is a right-stochastic matrix.) Finally, we define the initial state \(K\)-vector \(\rho\), with \[ +\rho_k = p(z_0 = k \mid \phi). +\] It is common practice to set \(\rho\) to be the stationary distribution of the HMM, that is \(\rho\) is the first eigenvector of \(\Gamma\) and solves \(\Gamma \rho = \rho\).

+

As an example, consider a three-state model with \(K=3\). The observations are normally distributed conditional on the HMM states with \[ + y_n \sim \text{normal}(\mu_k, \sigma), +\] where \(\mu = (1, 5, 9)\) and the standard deviation \(\sigma\) is the same across all observations. The model is then

+
data {
+  int N;  // Number of observations
+  array[N] real y;
+}
+
+parameters {
+  // Rows of the transition matrix
+  array[3] simplex[3] gamma_arr;
+
+  // Initial state
+  simplex[3] rho;
+
+  // Parameters of measurement model
+  vector[3] mu;
+  real<lower = 0.0> sigma;
+}
+
+transformed parameters {
+  // Build transition matrix
+  matrix[3, 3] gamma;
+  for (k in 1:3) gamma[k, ] = to_row_vector(gamma_arr[k]);
+
+  // Compute the log likelihoods in each possible state
+  matrix[3, N] log_omega;
+  for (n in 1:N) {
+    for (i in 1:3) {
+      log_omega[i, n] = normal_lpdf(y[n] | mu[i], sigma);
+    }
+  }
+}
+
+model {
+  // prior
+  mu ~ normal(0, 1);
+  sigma ~ normal(0, 1);
+  
+  // no explicit prior on gamma_arr, meaning we default to a
+  // uniform prior over the simplexes.
+
+  // Increment target by log p(y | mu, sigma, Gamma, rho)
+  target += hmm_marginal(log_omega, gamma, rho);
+}
+

The last function hmm_marginal takes in all the ingredients of the HMM and computes the relevant log marginal distribution, \(\log p(y \mid \phi)\).

+

If we desire draws from the posterior distribution of \(z\), we use the generated quantities block and draw, for each sample \(\phi\), a sample from \(p(z \mid y, \phi)\). In effect, MCMC produces draws from \(p(\phi \mid y)\) and with the draws in generated quantities, we obtain draws from \(p(\phi \mid y) p(z \mid y, \phi) = p(z, \phi \mid y)\). It is also possible to compute the posterior probbability of each hidden state, that is \(\text{Pr}(z_n = k \mid \phi, y)\). Averagging these probabilities over all MCMC draws, we obtain \(\text{Pr}(z_n = k \mid y)\).

+
generated quantities {
+  array[N] int latent_states = hmm_latent_rng(log_omega, gamma, rho);
+  matrix[3, N] hidden_probs = hmm_hidden_state_prob(log_omega, gamma, rho);
+}
+

hmm_hidden_state_prob returns the marginal probabilities of each state, \(\text{Pr}(z_n = k \mid \phi, y)\). This function cannot be used to compute the joint probability \(\text{Pr}(z \mid \phi, y)\), because such calculation requires accounting for the posterior correlation between the different components of \(z\). Therefore, hidden_probs should not be used to obtain posterior draws. Instead, users should rely on hmm_latent_rng.

+
generated quantities {
+   array[N] int<lower=1, upper=K> z = hmm_latent_rng(...fill-in params here to match example...);
+}
+

The example in this section is derived from the more detailed case study by Ben Bales: https://mc-stan.org/users/documentation/case-studies/hmm-example.html.

+ + + +
+
+ + + Back to top

References

+
+Engle, Robert F. 1982. “Autoregressive Conditional Heteroscedasticity with Estimates of Variance of United Kingdom Inflation.” Econometrica 50: 987–1008. +
+
+Hoffman, Matthew D., and Andrew Gelman. 2014. The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.” Journal of Machine Learning Research 15: 1593–623. http://jmlr.org/papers/v15/hoffman14a.html. +
+
+Kim, Sangjoon, Neil Shephard, and Siddhartha Chib. 1998. “Stochastic Volatility: Likelihood Inference and Comparison with ARCH Models.” Review of Economic Studies 65: 361–93. +
+

Footnotes

+ +
    +
  1. The intercept in this model is \(\alpha / (1 - \beta)\). An alternative parameterization in terms of an intercept \(\gamma\) suggested Mark Scheuerell on GitHub is \(y_n \sim \textsf{normal}\left(\gamma + \beta \cdot (y_{n-1} - \gamma), \sigma\right)\).↩︎

  2. +
  3. In practice, it can be useful to remove the constraint to test whether a non-stationary set of coefficients provides a better fit to the data. It can also be useful to add a trend term to the model, because an unfitted trend will manifest as non-stationarity.↩︎

  4. +
  5. This subsection is a lightly edited comment of Jonathan Gilligan’s on GitHub; see https://github.com/stan-dev/stan/issues/1617#issuecomment-160249142.↩︎

  6. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/truncation-censoring.html b/docs/2_39/stan-users-guide/truncation-censoring.html new file mode 100644 index 000000000..fe4b0bdc9 --- /dev/null +++ b/docs/2_39/stan-users-guide/truncation-censoring.html @@ -0,0 +1,1355 @@ + + + + + + + + + +Truncated or Censored Data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Truncated or Censored Data

+

Data in which measurements have been truncated or censored can be coded in Stan following their respective probability models.

+
+

Truncated distributions

+

Truncation in Stan is restricted to univariate distributions for which the corresponding log cumulative distribution function (CDF) and log complementary cumulative distribution (CCDF) functions are available. See the reference manual section on truncated distributions for more information on truncated distributions, CDFs, and CCDFs.

+
+
+

Truncated data

+

Truncated data are data for which measurements are only reported if they fall above a lower bound, below an upper bound, or between a lower and upper bound.

+

Truncated data may be modeled in Stan using truncated distributions. For example, suppose the truncated data are \(y_n\) with an upper truncation point of \(U = 300\) so that \(y_n < 300\). In Stan, this data can be modeled as following a truncated normal distribution for the observations as follows.

+
data {
+  int<lower=0> N;
+  real U;
+  array[N] real<upper=U> y;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  y ~ normal(mu, sigma) T[ , U];
+}
+

The model declares an upper bound U as data and constrains the data for y to respect the constraint; this will be checked when the data are loaded into the model before sampling begins.

+

See the Stan Reference Manual’s Statements chapter for how to use truncated distributions with the log probability increment statements (target += ...).

+

This model implicitly uses an improper flat prior on the scale and location parameters; these could be given priors in the model using distribution statements.

+
+

Constraints and out-of-bounds returns

+

If the sampled variate in a truncated distribution lies outside of the truncation range, the probability is zero, so the log probability will evaluate to \(-\infty\). For instance, if variate y is sampled with the statement.

+
y ~ normal(mu, sigma) T[L, U];
+

then if any value inside y is less than the value of L or greater than the value of U, the distribution statement produces a zero-probability estimate. For user-defined truncation, this zeroing outside of truncation bounds must be handled explicitly.

+

To avoid variables straying outside of truncation bounds, appropriate constraints are required. For example, if y is a parameter in the above model, the declaration should constrain it to fall between the values of L and U.

+
parameters {
+  array[N] real<lower=L, upper=U> y;
+  // ...
+}
+

If in the above model, L or U is a parameter and y is data, then L and U must be appropriately constrained so that all data are in range and the value of L is less than that of U (if they are equal, the parameter range collapses to a single point and the Hamiltonian dynamics used by the sampler break down). The following declarations ensure the bounds are well behaved.

+
parameters {
+  real<upper=min(y)> L;           // L < y[n]
+  real<lower=fmax(L, max(y))> U;  // L < U; y[n] < U
+

For pairs of real numbers, the function fmax is used rather than max.

+
+
+

Unknown truncation points

+

If the truncation points are unknown, they may be estimated as parameters. This can be done with a slight rearrangement of the variable declarations from the model in the previous section with known truncation points.

+
data {
+  int<lower=1> N;
+  array[N] real y;
+}
+parameters {
+  real<upper=min(y)> L;
+  real<lower=max(y)> U;
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  L ~ // ...
+  U ~ // ...
+  y ~ normal(mu, sigma) T[L, U];
+}
+

Here there is a lower truncation point L which is declared to be less than or equal to the minimum value of y. The upper truncation point U is declared to be larger than the maximum value of y. This declaration, although dependent on the data, only enforces the constraint that the data fall within the truncation bounds. With N declared as type int<lower=1>, there must be at least one data point. The constraint that L is less than U is enforced indirectly, based on the non-empty data.

+

The ellipses where the priors for the bounds L and U should go should be filled in with a an informative prior in order for this model to not concentrate L strongly around min(y) and U strongly around max(y).

+
+
+
+

Censored data

+

Censoring hides values from points that are too large, too small, or both. Unlike with truncated data, the number of data points that were censored is known. The textbook example is the household scale which does not report values above 300 pounds.

+
+

Estimating censored values

+

One way to model censored data is to treat the censored data as missing data that is constrained to fall in the censored range of values. Since Stan does not allow unknown values in its arrays or matrices, the censored values must be represented explicitly, as in the following right-censored case.

+
data {
+  int<lower=0> N_obs;
+  int<lower=0> N_cens;
+  array[N_obs] real y_obs;
+  real<lower=max(y_obs)> U;
+}
+parameters {
+  array[N_cens] real<lower=U> y_cens;
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  y_obs ~ normal(mu, sigma);
+  y_cens ~ normal(mu, sigma);
+}
+

Because the censored data array y_cens is declared to be a parameter, it will be sampled along with the location and scale parameters mu and sigma. Because the censored data array y_cens is declared to have values of type real<lower=U>, all imputed values for censored data will be greater than U. The imputed censored data affects the location and scale parameters through the last distribution statement in the model.

+
+
+

Integrating out censored values

+

Although it is wrong to ignore the censored values in estimating location and scale, it is not necessary to impute values. Instead, the values can be integrated out. Each censored data point has a probability of \[\begin{align*} +\Pr[y_{\mathrm{cens},m} > U] + &= \int_U^{\infty} \textsf{normal}\left(y_{\mathrm{cens},m} \mid \mu,\sigma \right) \,\textsf{d}y_{\mathrm{cens},m} \\ + &= 1 - \Phi\left(\frac{U - \mu}{\sigma}\right), +\end{align*}\]

+

where \(\Phi()\) is the standard normal cumulative distribution function. This probability is equivalent to the likelihood contribution of knowing that \(y_{\mathrm{cens},m}>U\). With \(M\) censored observations, the likelihood on the log scale is \[\begin{align*} +\log \prod_{m=1}^M \Pr[y_{\mathrm{cens},m} > U] + &= \log \left( 1 - \Phi\left(\left(\frac{U - \mu}{\sigma}\right)\right)^{M}\right) \\ + &= M \times \texttt{normal}\mathtt{\_}\texttt{lccdf}\left(U \mid \mu, \sigma \right), +\end{align*}\]

+

where normal_lccdf is the log of complementary CDF (Stan provides <distr>_lccdf for each distribution implemented in Stan).

+

The following right-censored model assumes that the censoring point is known, so it is declared as data.

+
data {
+  int<lower=0> N_obs;
+  int<lower=0> N_cens;
+  array[N_obs] real y_obs;
+  real<lower=max(y_obs)> U;
+}
+parameters {
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  y_obs ~ normal(mu, sigma);
+  target += N_cens * normal_lccdf(U | mu, sigma);
+}
+

For the observed values in y_obs, the normal model is used without truncation. The likelihood contribution from the integrated out censored values can not be coded with distribution statement, and the log probability is directly incremented using the calculated log cumulative normal probability of the censored observations.

+

For the left-censored data the CDF (normal_lcdf) has to be used instead of complementary CDF. If the censoring point variable (L) is unknown, its declaration should be moved from the data to the parameters block.

+
data {
+  int<lower=0> N_obs;
+  int<lower=0> N_cens;
+  array[N_obs] real y_obs;
+}
+parameters {
+  real<upper=min(y_obs)> L;
+  real mu;
+  real<lower=0> sigma;
+}
+model {
+  L ~ normal(mu, sigma);
+  y_obs ~ normal(mu, sigma);
+  target += N_cens * normal_lcdf(L | mu, sigma);
+}
+ + +
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/user-functions.html b/docs/2_39/stan-users-guide/user-functions.html new file mode 100644 index 000000000..f5a2f7e71 --- /dev/null +++ b/docs/2_39/stan-users-guide/user-functions.html @@ -0,0 +1,1642 @@ + + + + + + + + + +User-Defined Functions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

User-Defined Functions

+

This chapter explains functions from a user perspective with examples; see the language reference for a full specification. User-defined functions allow computations to be encapsulated into a single named unit and invoked elsewhere by name. Similarly, functions allow complex procedures to be broken down into more understandable components. Writing modular code using descriptively named functions is easier to understand than a monolithic program, even if the latter is heavily commented.1

+
+

Basic functions

+

Here’s an example of a skeletal Stan program with a user-defined relative difference function employed in the generated quantities block to compute a relative differences between two parameters.

+
functions {
+  real relative_diff(real x, real y) {
+    real abs_diff;
+    real avg_scale;
+    abs_diff = abs(x - y);
+    avg_scale = (abs(x) + abs(y)) / 2;
+    return abs_diff / avg_scale;
+  }
+}
+// ...
+generated quantities {
+  real rdiff;
+  rdiff = relative_diff(alpha, beta);
+}
+

The function is named relative_diff, and is declared to have two real-valued arguments and return a real-valued result. It is used the same way a built-in function would be used in the generated quantities block.

+
+

User-defined functions block

+

All functions are defined in their own block, which is labeled functions and must appear before all other program blocks. The user-defined functions block is optional.

+
+
+

Function bodies

+

The body (the part between the curly braces) contains ordinary Stan code, including local variables. The new function is used in the generated quantities block just as any of Stan’s built-in functions would be used.

+
+
+

Return statements

+

Return statements, such as the one on the last line of the definition of relative_diff above, are only allowed in the bodies of function definitions. Return statements may appear anywhere in a function, but functions with non-void return types must end in a return statement.

+
+
+

Reject and error statements

+

The Stan reject statement provides a mechanism to report errors or problematic values encountered during program execution. It accepts any number of quoted string literals or Stan expressions as arguments. This statement is typically embedded in a conditional statement in order to detect bad or illegal outcomes of some processing step.

+

If an error is indicative of a problem from which it is not expected to be able to recover, Stan provides a fatal_error statement.

+
+

Catching errors

+

Rejection is used to flag errors that arise in inputs or in program state. It is far better to fail early with a localized informative error message than to run into problems much further downstream (as in rejecting a state or failing to compute a derivative).

+

The most common errors that are coded is to test that all of the arguments to a function are legal. The following function takes a square root of its input, so requires non-negative inputs; it is coded to guard against illegal inputs.

+
real dbl_sqrt(real x) {
+  if (!(x >= 0)) {
+    reject("dbl_sqrt(x): x must be positive; found x = ", x);
+  }
+  return 2 * sqrt(x);
+}
+

The negation of the positive test is important, because it also catches the case where x is a not-a-number value. If the condition had been coded as (x < 0) it would not catch the not-a-number case, though it could be written as (x < 0 || is_nan(x)). The positive infinite case is allowed through, but could also be checked with the is_inf(x) function. The square root function does not itself reject, but some downstream consumer of dbl_sqrt(-2) would be likely to raise an error, at which point the origin of the illegal input requires detective work. Or even worse, as Matt Simpson pointed out in the GitHub comments, the function could go into an infinite loop if it starts with an infinite value and tries to reduce it by arithmetic, likely consuming all available memory and crashing an interface. Much better to catch errors early and report on their origin.

+

The effect of rejection depends on the program block in which the rejection is executed. In transformed data, rejections cause the program to fail to load. In transformed parameters or in the model block, rejections cause the current state to be rejected in the Metropolis sense.2

+

In generated quantities there is no way to recover and generate the remaining parameters, so rejections cause subsequent values to be reported as NaNs. Extra care should be taken in calling functions which may reject in the generated quantities block.

+
+
+
+

Type declarations for functions

+

Function argument and return types for vector and matrix types are not declared with their sizes, unlike type declarations for variables. Function argument type declarations may not be declared with constraints, either lower or upper bounds or structured constraints like forming a simplex or correlation matrix, (as is also the case for local variables); see the table of types in the reference manual for full details.

+

For example, here’s a function to compute the entropy of a categorical distribution with simplex parameter theta.

+
real entropy(vector theta) {
+  return sum(theta .* log(theta));
+}
+

Although theta must be a simplex, only the type vector is used.3

+

Upper or lower bounds on values or constrained types are not allowed as return types or argument types in function declarations.

+
+
+

Array types for function declarations

+

Array arguments have their own syntax, which follows that used in this manual for function signatures. For example, a function that operates on a two-dimensional array to produce a one-dimensional array might be declared as follows.

+
array[] real baz(array[,] real x);
+

The notation [ ] is used for one-dimensional arrays (as in the return above), [ , ] for two-dimensional arrays, [ , , ] for three-dimensional arrays, and so on.

+

Functions support arrays of any type, including matrix and vector types. As with other types, no constraints are allowed.

+
+
+

Data-only function arguments

+

A function argument which is a real-valued type or a container of a real-valued type, i.e., not an integer type or integer array type, can be qualified using the prefix qualifier data. The following is an example of a data-only function argument.

+
real foo(real y, data real mu) {
+  return -0.5 * (y - mu)^2;
+}
+

This qualifier restricts this argument to being invoked with expressions which consist only of data variables, transformed data variables, literals, and function calls. A data-only function argument cannot involve real variables declared in the parameters, transformed parameters, or model block. Attempts to invoke a function using an expression which contains parameter, transformed parameters, or model block variables as a data-only argument will result in an error message from the parser.

+

Use of the data qualifier must be consistent between the forward declaration and the definition of a functions.

+

This qualifier should be used when writing functions that call the built-in ordinary differential equation (ODE) solvers, algebraic solvers, or map functions. These higher-order functions have strictly specified signatures where some arguments of are data only expressions. (See the ODE solver chapter for more usage details and the functions reference manual for full definitions.) When writing a function which calls the ODE or algebraic solver, arguments to that function which are passed into the call to the solver, either directly or indirectly, should have the data prefix qualifier. This allows for compile-time type checking and increases overall program understandability.

+
+
+
+

Functions as statements

+

In some cases, it makes sense to have functions that do not return a value. For example, a routine to print the lower-triangular portion of a matrix can be defined as follows.

+
functions {
+  void pretty_print_tri_lower(matrix x) {
+    if (rows(x) == 0) {
+      print("empty matrix");
+      return;
+    }
+    print("rows=", rows(x), " cols=", cols(x));
+    for (m in 1:rows(x)) {
+      for (n in 1:m) {
+        print("[", m, ",", n, "]=", x[m, n]);
+      }
+    }
+  }
+}
+

The special symbol void is used as the return type. This is not a type itself in that there are no values of type void; it merely indicates the lack of a value. As such, return statements for void functions are not allowed to have arguments, as in the return statement in the body of the previous example.

+

Void functions applied to appropriately typed arguments may be used on their own as statements. For example, the pretty-print function defined above may be applied to a covariance matrix being defined in the transformed parameters block.

+
transformed parameters {
+  cov_matrix[K] Sigma;
+  // ... code to set Sigma ...
+  pretty_print_tri_lower(Sigma);
+  // ...
+}
+
+
+

Functions accessing the log probability accumulator

+

Functions whose names end in _lp are allowed to use sampling statements and target += statements; other functions are not. Because of this access, their use is restricted to the transformed parameters and model blocks.

+

Here is an example of a function to assign standard normal priors to a vector of coefficients, along with a center and scale, and return the translated and scaled coefficients; see the reparameterization section for more information on efficient non-centered parameterizations

+
functions {
+  vector center_lp(vector beta_raw, real mu, real sigma) {
+    beta_raw ~ std_normal();
+    sigma ~ cauchy(0, 5);
+    mu ~ cauchy(0, 2.5);
+    return sigma * beta_raw + mu;
+  }
+  // ...
+}
+parameters {
+  vector[K] beta_raw;
+  real mu_beta;
+  real<lower=0> sigma_beta;
+  // ...
+}
+transformed parameters {
+  vector[K] beta;
+  // ...
+  beta = center_lp(beta_raw, mu_beta, sigma_beta);
+  // ...
+}
+
+
+

Functions implementing change-of-variable adjustments

+

Functions whose names end in _jacobian can use the jacobian += statement. This can be used to implement a custom change of variables for arbitrary parameters.

+

For example, this function recreates the built-in <upper=x> transform on real numbers:

+
real my_upper_bound_jacobian(real x, real ub) {
+  jacobian += x;
+  return ub - exp(x);
+}
+

It can be used as a replacement for real<lower=ub> as follows:

+
functions {
+  // my_upper_bound_jacobian as above
+}
+data {
+  real ub;
+}
+parameters {
+  real b_raw;
+}
+transformed parameters {
+  real b = my_upper_bound_jacobian(b_raw, ub);
+}
+model {
+  b ~ lognormal(0, 1);
+  // ...
+}
+
+
+

Functions acting as random number generators

+

A user-specified function can be declared to act as a (pseudo) random number generator (PRNG) by giving it a name that ends in _rng. Giving a function a name that ends in _rng allows it to access built-in functions and user-defined functions that end in _rng, which includes all the built-in PRNG functions. Only functions ending in _rng are able access the built-in PRNG functions. The use of functions ending in _rng must therefore be restricted to transformed data and generated quantities blocks like other PRNG functions; they may also be used in the bodies of other user-defined functions ending in _rng.

+

For example, the following function generates an \(N \times K\) data matrix, the first column of which is filled with 1 values for the intercept and the remaining entries of which have values drawn from a standard normal PRNG.

+
matrix predictors_rng(int N, int K) {
+  matrix[N, K] x;
+  for (n in 1:N) {
+    x[n, 1] = 1.0;  // intercept
+    for (k in 2:K) {
+      x[n, k] = normal_rng(0, 1);
+    }
+  }
+  return x;
+}
+

The following function defines a simulator for regression outcomes based on a data matrix x, coefficients beta, and noise scale sigma.

+
vector regression_rng(vector beta, matrix x, real sigma) {
+  vector[rows(x)] y;
+  vector[rows(x)] mu;
+  mu = x * beta;
+  for (n in 1:rows(x)) {
+    y[n] = normal_rng(mu[n], sigma);
+  }
+  return y;
+}
+

These might be used in a generated quantity block to simulate some fake data from a fitted regression model as follows.

+
parameters {
+  vector[K] beta;
+  real<lower=0> sigma;
+  // ...
+}
+generated quantities {
+  matrix[N_sim, K] x_sim;
+  vector[N_sim] y_sim;
+  x_sim = predictors_rng(N_sim, K);
+  y_sim = regression_rng(beta, x_sim, sigma);
+}
+

A more sophisticated simulation might fit a multivariate normal to the predictors x and use the resulting parameters to generate multivariate normal draws for x_sim.

+
+
+

User-defined probability functions

+

Probability functions are distinguished in Stan by names ending in _lpdf for density functions and _lpmf for mass functions; in both cases, they must have real return types.

+

Suppose a model uses several standard normal distributions, for which there is not a specific overloaded density nor defaults in Stan. So rather than writing out the location of 0 and scale of 1 for all of them, a new density function may be defined and reused.

+
functions {
+  real unit_normal_lpdf(real y) {
+    return normal_lpdf(y | 0, 1);
+  }
+}
+// ...
+model {
+  alpha ~ unit_normal();
+  beta ~ unit_normal();
+  // ...
+}
+

The ability to use the unit_normal function as a density is keyed off its name ending in _lpdf (names ending in _lpmf for probability mass functions work the same way).

+

In general, if foo_lpdf is defined to consume \(N + 1\) arguments, then

+
y ~ foo(theta1, ..., thetaN);
+

can be used as shorthand for

+
target += foo_lpdf(y | theta1, ..., thetaN);
+

As with the built-in functions, the suffix _lpdf is dropped and the first argument moves to the left of the tilde symbol (~) in the distribution statement.

+

Functions ending in _lpmf (for probability mass functions), behave exactly the same way. The difference is that the first argument of a density function (_lpdf) must be continuous (not an integer or integer array), whereas the first argument of a mass function (_lpmf) must be discrete (integer or integer array).

+
+
+

Overloading functions

+

As described in the reference manual function overloading is permitted in Stan, beginning in version 2.29.

+

This means multiple functions can be defined with the same name as long as they accept different numbers or types of arguments. User-defined functions can also overload Stan library functions.

+
+

Warning on usage

+

Overloading is a powerful productivity tool in programming languages, but it can also lead to confusion. In particular, it can be unclear at first glance which version of a function is being called at any particular call site, especially with type promotion allowed between scalar types. Because of this, it is a programming best practice that overloaded functions maintain the same meaning across definitions.

+

For example, consider a function triple which has the following three signatures

+
real triple(real x);
+complex triple(complex x);
+array[] real triple(array[] real);
+

One should expect that all overloads of this function perform the same basic task. This should lead to definitions of these functions which would satisfy the following assumptions that someone reading the program would expect

+
// The function does what it says
+triple(3.0) == 9.0
+// It is defined reasonably for different types
+triple(to_complex(3.0)) == to_complex(triple(3.0))
+// A container version of this function works by element
+triple({3.0, 4.0})[0] == triple({3.0, 4.0}[0])
+

Note that none of these properties are enforced by Stan, they are mentioned merely to warn against uses of overloading which cause confusion.

+
+
+

Function resolution

+

Stan resolves overloaded functions by the number and type of arguments passed to the function. This can be subtle when multiple signatures with the same number of arguments are present.

+

Consider the following function signatures

+
real foo(int a, real b);
+real foo(real a, real b);
+

Given these, the function call foo(1.5, 2.5) is unambiguous - it must resolve to the second signature. But, the function call foo(1, 1.5) could be valid for either under Stan’s promotion rules, which allow integers to be promoted to real numbers.

+

To resolve this, Stan selects the signature which requires the fewest number of promotions for a given function call. In the above case, this means the call foo(1, 1.5) would select the first signature, because it requires 0 promotions (the second signature would require 1 promotion).

+

Furthermore, there must be only one such signature, e.g., the minimum number of promotions must be a unique minimum. This requirement forbids certain kinds of overloading. For example, consider the function signatures

+
real bar(int x, real y);
+real bar(real x, int y);
+

These signatures do not have a unique minimum number of promotions for the call bar(1, 2). Both signatures require one int to real promotion, and so it cannot be determined which is correct. Stan will produce a compilation error in this case.

+

Promotion from integers to complex numbers is considered to be two separate promotions, first from int to real, then from real to complex. This means that integer arguments will “prefer” a signature with real types over complex types.

+

For example, consider the function signatures

+
real pop(real x);
+real pop(complex x);
+

Stan will select the first signature when pop is called with an integer argument such as pop(0).

+
+
+
+

Documenting functions

+

Functions will ideally be documented at their interface level. The Stan style guide for function documentation follows the same format as used by the Doxygen (C++) and Javadoc (Java) automatic documentation systems. Such specifications indicate the variables and their types and the return value, prefaced with some descriptive text.

+

For example, here’s some documentation for the prediction matrix generator.

+
/**
+ * Return a data matrix of specified size with rows
+ * corresponding to items and the first column filled
+ * with the value 1 to represent the intercept and the
+ * remaining columns randomly filled with unit-normal draws.
+ *
+ * @param N Number of rows corresponding to data items
+ * @param K Number of predictors, counting the intercept, per
+ *          item.
+ * @return Simulated predictor matrix.
+ */
+matrix predictors_rng(int N, int K) {
+  // ...
+

The comment begins with /**, ends with */, and has an asterisk (*) on each line. It uses @param followed by the argument’s identifier to document a function argument. The tag @return is used to indicate the return value. Stan does not (yet) have an automatic documentation generator like Javadoc or Doxygen, so this just looks like a big comment starting with /* and ending with */ to the Stan parser.

+

For functions that raise exceptions, exceptions can be documented using @throws.4

+

For example,

+
 /** ...
+ * @param theta
+ * @throws If any of the entries of theta is negative.
+ */
+real entropy(vector theta) {
+  // ...
+}
+

Usually an exception type would be provided, but these are not exposed as part of the Stan language, so there is no need to document them.

+
+
+

Summary of function types

+

Functions may have a void or non-void return type and they may or may not have one of the special suffixes, _lpdf, _lpmf, _lp, or _rng.

+
+

Void vs. non-void return

+

Only functions declared to return void may be used as statements. These are also the only functions that use return statements with no arguments.

+

Only functions declared to return non-void values may be used as expressions. These functions require return statements with arguments of a type that matches the declared return type.

+
+
+

Suffixed or non-suffixed

+

Only functions ending in _lpmf or _lpdf and with return type real may be used as probability functions in distribution statements.

+

Only functions ending in _lp may access the log probability accumulator through distribution statements or target += statements. Such functions may only be used in the transformed parameters or model blocks.

+

Only functions ending in _rng may access the built-in pseudo-random number generators. Such functions may only be used in the generated quantities block or transformed data block, or in the bodies of other user-defined functions ending in _rng.

+
+
+
+

Recursive functions

+

Stan supports recursive function definitions, which can be useful for some applications. For instance, consider the matrix power operation, \(A^n\), which is defined for a square matrix \(A\) and positive integer \(n\) by \[ +A^n += +\begin{cases} +\textrm{I} & \quad\text{if } n = 0, \text{ and} \\ +A \, A^{n-1} & \quad\text{if } n > 0. +\end{cases} +\]

+

where \(\textrm{I}\) is the identity matrix. This definition can be directly translated to a recursive function definition.

+
matrix matrix_pow(matrix a, int n) {
+  if (n == 0) {
+    return diag_matrix(rep_vector(1, rows(a)));
+  } else {
+    return a *  matrix_pow(a, n - 1);
+  }
+}
+

It would be more efficient to not allow the recursion to go all the way to the base case, adding the following conditional clause.

+
else if (n == 1) {
+  return a;
+}
+
+
+

Truncated random number generation

+
+

Generation with inverse CDFs

+

To generate random numbers, it is often sufficient to invert their cumulative distribution functions. This is built into many of the random number generators. For example, to generate a standard logistic variate, first generate a uniform variate \(u \sim \textsf{uniform}(0, 1)\), then run through the inverse cumulative distribution function, \(y = \textrm{logit}(u)\). If this were not already built in as logistic_rng(0, 1), it could be coded in Stan directly as

+
real standard_logistic_rng() {
+  real u = uniform_rng(0, 1);
+  real y = logit(u);
+  return y;
+}
+

Following the same pattern, a standard normal RNG could be coded as

+
real standard_normal_rng() {
+  real u = uniform_rng(0, 1);
+  real y = inv_Phi(u);
+  return y;
+}
+

that is, \(y = \Phi^{-1}(u)\), where \(\Phi^{-1}\) is the inverse cumulative distribution function for the standard normal distribution, implemented in the Stan function inv_Phi.

+

In order to generate non-standard variates of the location-scale variety, the variate is scaled by the scale parameter and shifted by the location parameter. For example, to generate \(\textsf{normal}(\mu, \sigma)\) variates, it is enough to generate a uniform variate \(u \sim \textsf{uniform}(0, 1)\), then convert it to a standard normal variate, \(z = \Phi^{-1}(u)\), where \(\Phi^{-1}(\cdot)\) is the inverse cumulative distribution function for the standard normal, and then, finally, scale and translate it, \(y = \mu + +\sigma \times z\). In code,

+
real my_normal_rng(real mu, real sigma) {
+  real u = uniform_rng(0, 1);
+  real z = inv_Phi(u);
+  real y = mu + sigma * z;
+  return y;
+}
+

A robust version of this function would test that the arguments are finite and that sigma is non-negative, e.g.,

+
  if (is_nan(mu) || is_inf(mu)) {
+    reject("my_normal_rng: mu must be finite; ",
+           "found mu = ", mu);
+  }
+  if (is_nan(sigma) || is_inf(sigma) || sigma < 0) {
+    reject("my_normal_rng: sigma must be finite and non-negative; ",
+           "found sigma = ", sigma);
+  }
+
+
+

Truncated variate generation

+

Often truncated uniform variates are needed, as in survival analysis when a time of death is censored beyond the end of the observations. To generate a truncated random variate, the cumulative distribution is used to find the truncation point in the inverse CDF, a uniform variate is generated in range, and then the inverse CDF translates it back.

+
+

Truncating below

+

For example, the following code generates a \(\textsf{Weibull}(\alpha, \sigma)\) variate truncated below at a time \(t\),5

+
real weibull_lb_rng(real alpha, real sigma, real t) {
+  real p = weibull_cdf(t | alpha, sigma);   // cdf for lb
+  real u = uniform_rng(p, 1);               // unif in bounds
+  real y = sigma * (-log1m(u))^inv(alpha);  // inverse cdf
+  return y;
+}
+
+
+

Truncating above and below

+

If there is a lower bound and upper bound, then the CDF trick is used twice to find a lower and upper bound. For example, to generate a \(\textsf{normal}(\mu, \sigma)\) truncated to a region \((a, b)\), the following code suffices,

+
real normal_lub_rng(real mu, real sigma, real lb, real ub) {
+  real p_lb = normal_cdf(lb | mu, sigma);
+  real p_ub = normal_cdf(ub | mu, sigma);
+  real u = uniform_rng(p_lb, p_ub);
+  real y = mu + sigma * inv_Phi(u);
+  return y;
+}
+

To make this more robust, all variables should be tested for finiteness, sigma should be tested for positiveness, and lb and ub should be tested to ensure the upper bound is greater than the lower bound. While it may be tempting to compress lines, the variable names serve as a kind of chunking of operations and naming for readability; compare the multiple statement version above with the single statement

+
  return mu + sigma * inv_Phi(uniform_rng(normal_cdf(lb | mu, sigma),
+                                          normal_cdf(ub | mu, sigma)));
+

for readability. The names like p indicate probabilities, and p_lb and p_ub indicate the probabilities of the bounds. The variable u is clearly named as a uniform variate, and y is used to denote the variate being generated itself.

+ + +
+
+
+
+ + + Back to top

Footnotes

+ +
    +
  1. The main problem with comments is that they can be misleading, either due to misunderstandings on the programmer’s part or because the program’s behavior is modified after the comment is written. The program always behaves the way the code is written, which is why refactoring complex code into understandable units is preferable to simply adding comments.↩︎

  2. +
  3. Just because this makes it possible to code a rejection sampler does not make it a good idea. Rejections break differentiability and the smooth exploration of the posterior. In Hamiltonian Monte Carlo, it can cause the sampler to be reduced to a diffusive random walk.↩︎

  4. +
  5. A range of built-in validation routines is coming to Stan soon! Alternatively, the reject statement can be used to check constraints on the simplex.↩︎

  6. +
  7. As of Stan 2.9.0, the only way a user-defined producer will raise an exception is if a function it calls (including distribution statements) raises an exception via the reject statement.↩︎

  8. +
  9. The original code and impetus for including this in the manual came from the Stan forums post; by user lcomm, who also explained truncation above and below.↩︎

  10. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/using-stanc.html b/docs/2_39/stan-users-guide/using-stanc.html new file mode 100644 index 000000000..27987f747 --- /dev/null +++ b/docs/2_39/stan-users-guide/using-stanc.html @@ -0,0 +1,2048 @@ + + + + + + + + + +Using the Stan Compiler + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Using the Stan Compiler

+

Stan is used in most of our interfaces through the Stan compiler stanc. Since version 2.22, the Stan compiler has been implemented in OCaml and is referred to as stanc3. The binary name is still simply stanc, so this document uses both stanc and stanc3 interchangeably.

+
+

Command-line options for stanc3

+

The stanc3 compiler has the following command-line syntax:

+
> stanc (options) <model_file>
+

where <model_file> is either a path to a file ending in .stan or .stanfunctions (which automatically sets --standalone-functions), or '-' to read from standard input.

+

The stanc3 options are:

+
    +
  • --help / -? - Displays the complete list of stanc3 options, then exits.

  • +
  • --version - Display stanc version number

  • +
  • --info - Print information about the model, such as the type information for variables and the list of used distributions.

  • +
  • --name=<model_name> - Specify the name of the class used for the implementation of the Stan model in the generated C++ code.

  • +
  • --o=<file_name> / -o=<filename> / --output=<filename> - Specify a path to an output file for generated C++ code (default = .hpp) or auto-formatting output (default: no file/print to stdout)

  • +
  • --auto-format - Pretty prints the program to the console. See more on auto formatting.

  • +
  • --allow-undefined - Do not throw a parser error if there is a function in the Stan program that is declared but not defined in the functions block.

  • +
  • --canonicalize - Make changes to the program before pretty-printing by specifying options in a comma separated list. Options are ‘deprecations’, ‘parentheses’, ‘braces’, ‘includes’, and ‘strip-comments’.

  • +
  • --include_paths=<dir1,...dirN> - Takes a comma-separated list of directories that may contain a file in an #include directive.

  • +
  • --max-line-length=<number> - Set the column number at which formatting with --auto-format attempts to split lines. The default value is 78, which results in most lines being shorter than 80 characters.

  • +
  • --print-canonical - Synonymous with --auto-format --canonicalize=deprecations,includes,parentheses,braces.

  • +
  • --print-cpp - If set, output the generated C++ Stan model class to stdout.

  • +
  • --filename-in-msg=<name> - Sets the filename used in compiler and runtime errors. If absent, the <model_file> argument is used.

  • +
  • --standalone-functions - If set, only generate the code for the functions defined in the file. This is the default behavior for .stanfunctions files.

  • +
  • --O0 (Default) Do not apply optimizations to the Stan code.

  • +
  • --O1 Apply level 1 compiler optimizations (only basic optimizations).

  • +
  • --Oexperimental WARNING: This is currently an experimental feature whose components are not thoroughly tested and may not improve a programs performance! Allow the compiler to apply all optimizations to the Stan code.

  • +
  • --O Synonym for --O1 as of Stan 2.37. In earlier versions this was a synonym for --Oexperimental.

  • +
  • --use-opencl - If set, will use additional Stan OpenCL features enabled in the Stan-to-C++ compiler.

  • +
  • --warn-pedantic - Emit warnings in Pedantic mode which warns of potential issues in the meaning of your program. Note: This may produce false positive warnings.

  • +
  • --warn-uninitialized - Emit warnings about uninitialized variables to stderr. Currently an experimental feature.

  • +
  • --color - Control whether errors and warnings are emitted with colored styling on terminals that support it. Valid values are auto (the default), always, never. Can also be controlled by the STANC_COLOR environment variable.

  • +
+

The compiler also provides a number of debug options which are primarily of interest to stanc3 developers; use the --help option to see the full set.

+
+
+

Understanding stanc3 errors and warnings

+

During model compilation, stanc can produce a variety of errors (issues that prevent the model from being compiled) and warnings (non-fatal issues that should still be considered).

+
+

Warnings

+

Even without the optional --warn-pedantic and --warn-uninitialized command line flags, both of which enable additional warnings, stanc can still produce warnings about your program. In particular, warnings will be produced in two situations

+
    +
  1. A completely blank Stan program will produce the following warning message

    +
    Warning in 'empty.stan', line 1, column 0 to line 2, column 0:
    +    Empty model detected; this is a valid Stan model but likely unintended!
  2. +
  3. The use of any deprecated features will lead to warnings which will look as follows

    +
     Warning in 'deprecated.stan', line 2, column 10 to column 17:
    +     lkj_cov is deprecated and will be removed in Stan 3.0. Use lkj_corr with
    +     an independent lognormal distribution on the scales, see:
    +     https://mc-stan.org/docs/reference-manual/deprecations.html#lkj_cov-distribution
    +

    A single Stan program can produce many warnings during compilation.

  4. +
+
+
+

Errors

+

Errors differ from warnings in their severity and format. In particular, errors are fatal and stop compilation, so at most one error is displayed per run of stanc.

+

There are five kinds of errors emitted by stanc3

+
    +
  1. File errors occur when the file passed to stanc is either missing or cannot be opened (i.e. has permissions issues). They look like

    +
    Error: file 'notfound.stan' not found or cannot be opened
  2. +
  3. Syntactic errors occur whenever a program violates the Stan language’s syntax requirements. There are three kinds of errors within syntax errors; “lexing” errors mean that the input was unable to be read properly on the character level, “include” errors which occur when the #include directive fails, and “parsing” errors which result when the structure of the program is incorrect.

    +
      +
    • The lexing errors occur due to the use of invalid characters in a program. For example, a lexing error due to the use of $ in a variable name will look like the following.

      +
      Syntax error in 'char.stan', line 2, column 7 to column 8, lexing error:
      +-------------------------------------------------
      +  1:  data {
      +  2:     int $ome_variable;
      +             ^
      +  3:  }
      +-------------------------------------------------
      +Invalid character found.
    • +
    • When an include directive is used, it can lead to errors if the included file is not found, or if a file includes itself (including a recursive loop of includes, such as A -> B -> A).

      +
      Syntax error in './incl.stan', line 1, column 0, included from
      +'./incl.stan', line 1, column 0, included from
      +'incl.stan', line 1, column 0, include error:
      +   -------------------------------------------------
      +     1:  #include <incl.stan>
      +         ^
      +   -------------------------------------------------
      +File incl.stan recursively included itself.
    • +
    • It is much more common to see parsing errors, which tend to have more in-depth explanations of the error found. For example, if a user forgets to put a size on a type like vector, as in the following, this raises a parsing (structural) error in the compiler.

      +
      Syntax error in 'vec.stan', line 3, column 10 to column 11, parsing error:
      +   -------------------------------------------------
      +     1:  data {
      +     2:     int<lower=0> N;
      +     3:     vector x;
      +                   ^
      +     4:  }
      +   -------------------------------------------------
      +Ill-formed type. Expected "[" expression "]" for vector size.
    • +
  4. +
  5. Semantic errors (also known as type errors) occur when a program is structured correctly but features an error in the type rules imposed by the language. An example of this is assigning a real value to a variable defined as an integer.

    +
    Semantic error in 'type.stan', line 2, column 3 to column 15:
    +   -------------------------------------------------
    +     1:  transformed data {
    +     2:     int x = 1.5;
    +            ^
    +     3:  }
    +   -------------------------------------------------
    +Ill-typed arguments supplied to assignment operator =:
    +The left hand side has type
    +  int
    +and the right hand side has type
    +  real
  6. +
  7. The compiler will raise an error for use of any removed features for at least one version following their removal. The deprecation warnings mentioned above eventually turn into this kind of error to prompt the user to update their model. After the version of removal, these errors will be converted to one of the other types listed here, depending on the feature.

  8. +
  9. Finally, the compiler can raise an internal error. These are caused by bugs in the compiler, not your model, and we would appreciate it if you report them on the stanc3 repo with the error message provided. These errors usually say something like “This should never happen,” and we apologize if they do.

  10. +
+
+
+
+

Pedantic mode

+

Pedantic mode is a compilation option built into Stanc3 that warns you about potential issues in your Stan program.

+

For example, consider the following program.

+
data {
+  int N;
+  array[N] real x;
+}
+parameters {
+  real sigma;
+}
+model {
+  real mu;
+  x ~ normal(mu, sigma);
+}
+

When pedantic mode is turned on, the compiler will produce the following warnings.

+
Warning in 'ped-mode-ex1.stan', line 6, column 2 to column 13:
+    The parameter sigma has no priors. This means either no prior is
+    provided, or the prior(s) depend on data variables. In the later case,
+    this may be a false positive.
+Warning in 'ped-mode-ex1.stan', line 10, column 13 to column 15:
+    The variable mu may not have been assigned a value before its use.
+Warning in 'ped-mode-ex1.stan', line 10, column 17 to column 22:
+    A normal distribution is given parameter sigma as a scale parameter
+    (argument 2), but sigma was not constrained to be strictly positive.
+

Here are the kinds of issues that pedantic mode will find (which are described in more detail in following sections):

+
    +
  • Distribution usages issues. Distribution arguments don’t match the distribution specification, or some specific distribution is used in an inadvisable way.
  • +
  • Unused parameter. A parameter is defined but doesn’t contribute to target.
  • +
  • Large or small constant in a distribution. Very large or very small constants are used as distribution arguments.
  • +
  • Control flow depends on a parameter. Branching control flow (like if/else) depends on a parameter value .
  • +
  • Parameter has multiple tildes. A parameter is on the left-hand side of multiple tildes.
  • +
  • Parameter has zero or multiple priors. A parameter has zero or more than one prior distribution.
  • +
  • Variable is used before assignment. A variable is used before being assigned a value.
  • +
  • Strict or nonsensical parameter bounds. A parameter is given questionable bounds.
  • +
  • Nonlinear transformations. When the left-hand side of a tilde statement (or first argument of a log probability function) contains a nonlinear transform which may require a Jacobian change of variables adjustment.
  • +
+

Some important limitations of pedantic mode are listed at the end of this chapter.

+
+

Distribution argument and variate constraint issues

+

When an argument to a built-in distribution certainly does not match that distribution’s specification in the Stan Functions Reference, a warning is thrown. This primarily checks if any distribution argument’s bounds at declaration, compile-time value, or subtype at declaration (e.g. simplex) is incompatible with the domain of the distribution. x

+

For example, consider the following program.

+
parameters {
+  real unb_p;
+  real<lower=0> pos_p;
+}
+model {
+  1 ~ poisson(unb_p);
+  1 ~ poisson(pos_p);
+}
+

The parameter of poisson should be strictly positive, but unb_p is not constrained to be positive.

+

Pedantic mode produces the following warning.

+
Warning in 'ex-dist-args.stan', line 6, column 14 to column 19:
+  A poisson distribution is given parameter unb_p as a rate parameter
+  (argument 1), but unb_p was not constrained to be strictly positive.
+
+
+

Special-case distribution issues

+

Pedantic mode checks for some specific uses of distributions that may indicate a statistical mistake:

+
+

Uniform distributions

+

Any use of uniform distribution generates a warning, except when the variate parameter’s declared upper and lower bounds exactly match the uniform distribution bounds. In general, assigning a parameter a uniform distribution can create non-differentiable boundary conditions and is not recommended.

+

For example, consider the following program.

+
parameters {
+  real a;
+  real<lower=0, upper=1> b;
+}
+model {
+  a ~ uniform(0, 1);
+  b ~ uniform(0, 1);
+}
+

a is assigned a uniform distribution that doesn’t match its constraints.

+

Pedantic mode produces the following warning.

+
Warning in 'uniform-warn.stan', line 6, column 2 to column 20:
+  Parameter a is given a uniform distribution. The uniform distribution is
+  not recommended, for two reasons: (a) Except when there are logical or
+  physical constraints, it is very unusual for you to be sure that a
+  parameter will fall inside a specified range, and (b) The infinite gradient
+  induced by a uniform density can cause difficulties for Stan's sampling
+  algorithm. As a consequence, we recommend soft constraints rather than hard
+  constraints; for example, instead of giving an elasticity parameter a
+  uniform(0, 1) distribution, try normal(0.5, 0.5).
+
+
+

(Inverse-) Gamma distributions

+

Gamma distributions are sometimes used as an attempt to assign an improper prior to a parameter. Pedantic mode gives a warning when the Gamma arguments indicate that this may be the case.

+
+
+

lkj_corr distribution

+

Any use of the lkj_corr distribution generates a warning that suggests using the Cholesky variant instead. See the LKJ correlation distribution section of the Stan Functions Reference for details.

+
+
+
+

Unused parameters

+

A warning is generated when a parameter is declared but does not have any effect on the program. This is determined by checking whether the value of the target variable depends in any way on each of the parameters.

+

For example, consider the following program.

+
parameters {
+  real a;
+  real b;
+}
+model {
+  a ~ normal(1, 1);
+}
+

a participates in the density function but b does not.

+

Pedantic mode produces the following warning.

+
Warning in 'unused.stan', line 3, column 2 to column 9:
+    The parameter b was declared but was not used in the density calculation.
+
+
+

Large or small constants in a distribution

+

When numbers with magnitude less than 0.1 or greater than 10 are used as arguments to a distribution, it indicates that some parameter is not scaled to unit value, so a warning is thrown. See the efficiency tuning section of the Stan User’s guide for a discussion of scaling parameters.

+

For example, consider the following program.

+
parameters {
+  real x;
+  real y;
+}
+model {
+  x ~ normal(-100, 100);
+  y ~ normal(0, 1);
+}
+

The constants -100 and 100 suggest that x is not unit scaled.

+

Pedantic mode produces the following warning.

+
Warning in 'constants-warn.stan', line 6, column 13 to column 17:
+    Argument -100 suggests there may be parameters that are not unit scale;
+    consider rescaling with a multiplier, see:
+    https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#standardizing-predictors
+Warning in 'constants-warn.stan', line 6, column 19 to column 22:
+    Argument 100 suggests there may be parameters that are not unit scale;
+    consider rescaling with a multiplier, see:
+    https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#standardizing-predictors
+
+
+

Control flow depends on a parameter

+

Control flow statements, such as if, for and while should not depend on parameters or functions of parameters to determine their branching conditions. This is likely to introduce a discontinuity into the density function. Pedantic mode generates a warning when any branching condition may depend on a parameter value.

+

For example, consider the following program.

+
parameters {
+  real a;
+}
+model {
+  // x depends on parameter a
+  real x = a * a;
+
+  int m;
+
+  // the if-then-else depends on x which depends on a
+  if(x > 0) {
+    //now m depends on x which depends on a
+    m = 1;
+  } else {
+    m = 2;
+  }
+
+  // for loop depends on m -> x -> a
+  for (i in 0:m) {
+    a ~ normal(i, 1);
+  }
+}
+

The if and for statements are control flow that depend (indirectly) on the value of the parameter m.

+

Pedantic mode produces the following warning.

+
Warning in 'param-dep-cf-warn.stan', line 11, column 2 to line 16, column 3:
+  A control flow statement depends on parameter(s): a.
+Warning in 'param-dep-cf-warn.stan', line 19, column 2 to line 21, column 3:
+  A control flow statement depends on parameter(s): a.
+
+
+

Parameters with multiple tildes

+

A warning is generated when a parameter is found on the left-hand side of more than one ~ statements (or an equivalent target += conditional density statement). This pattern is not inherently an issue, but it is unusual and may indicate a mistake.

+

Pedantic mode only searches for repeated statements, it will not for example generate a warning when a ~ statement is executed repeatedly inside of a loop.

+

For example, consider the following program.

+
data {
+  real x;
+}
+parameters {
+  real a;
+  real b;
+}
+model {
+  a ~ normal(0, 1);
+  a ~ normal(x, 1);
+
+  b ~ normal(1, 1);
+}
+

Pedantic mode produces the following warning.

+
Warning in 'multi-tildes.stan', line 9, column 2 to column 19:
+  The parameter a is on the left-hand side of more than one tildes
+  statement.
+
+
+

Parameters with zero or multiple priors

+

A warning is generated when a parameter appears to have greater than or less than one prior distribution factor.

+

This analysis depends on a factor graph representation of a Stan program. A factor F that depends on a parameter P is called a prior factor for P if there is no path in the factor graph from F to any data variable except through P.

+

One limitation of this approach is that the compiler cannot distinguish between modeled data variables and other convenient uses of data variables such as data sizes or hyperparameters. This warning assumes that all data variables (except for int variables) are modeled data, which may cause extra warnings.

+

For example, consider the following program.

+
data {
+  real x;
+}
+parameters {
+  real a;
+  real b;
+  real c;
+  real d;
+}
+model
+{
+  a ~ normal(0, 1); // this is a prior
+  x ~ normal(a, 1); // this is not a prior, since data is involved
+
+  b ~ normal(x, 1); // this is also not a prior, since data is involved
+
+  // this is not a prior for c, since data is involved through b
+  // but it is a prior for b, since the data is only involved through b
+  c ~ normal(b, 1);
+
+  //these are multiple priors:
+  d ~ normal(0, 1);
+  1 ~ normal(d, 1);
+}
+

One prior is found for a and for b, while c only has a factor that touches a data variable and d has multiple priors.

+

Pedantic mode produces the following warning.

+
Warning in 'priors.stan', line 7, column 2 to column 9:
+    The parameter c has no priors. This means either no prior is provided, or
+    the prior(s) depend on data variables. In the later case, this may be a
+    false positive.
+Warning in 'priors.stan', line 8, column 2 to column 9:
+    The parameter d has 2 priors.
+
+
+
+

Variables used before assignment

+

A warning is generated when any variable is used before it has been assigned a value.

+

For example, consider the following program.

+
transformed data {
+  real x;
+  if (1 > 2) {
+    x = 1;
+  } else {
+    print("oops");
+  }
+  print(x);
+}
+

Since x is only assigned in one of the branches of the if statement, it might get to print(x) without having been assigned to.

+

Pedantic mode produces the following warning.

+
Warning in 'uninit-warn.stan', line 7, column 8 to column 9:
+  The variable x may not have been assigned a value before its use.
+
+
+

Strict or nonsensical parameter bounds

+

Except when there are logical or physical constraints, it is very unusual for you to be sure that a parameter will fall inside a specified range. A warning is generated for all parameters declared with the bounds <lower=.., upper=..> except for <lower=0, upper=1> or <lower=-1, upper=1>.

+

In addition, a warning is generated when a parameter bound is found to have lower >= upper.

+

For example, consider the following program.

+
parameters {
+  real<lower=0, upper=1> a;
+  real<lower=-1, upper=1> b;
+  real<lower=-2, upper=1012> c;
+}
+model {
+  c ~ normal(b, a);
+}
+

Pedantic mode produces the following warning.

+
Warning in 'hard-constraint.stan', line 4, column 2 to column 31:
+    Your Stan program has a parameter c with a lower and upper bound in its
+    declaration. These hard constraints are not recommended, for two reasons:
+    (a) Except when there are logical or physical constraints, it is very
+    unusual for you to be sure that a parameter will fall inside a specified
+    range, and (b) The infinite gradient induced by a hard constraint can
+    cause difficulties for Stan's sampling algorithm. As a consequence, we
+    recommend soft constraints rather than hard constraints; for example,
+    instead of constraining an elasticity parameter to fall between 0, and 1,
+    leave it unconstrained and give it a normal(0.5,0.5) prior distribution.
+
+
+

Nonlinear transformations

+

When a parameter is transformed in a non-linear fashion, an adjustment must be applied to account for distortion caused by the transform. This is discussed in depth in the Changes of variables section.

+

This portion of pedantic mode tries to detect instances where such an adjustment would be necessary and remind the user.

+

For example, consider the following program.

+
parameters {
+  real y;
+}
+model {
+  log(y) ~ normal(0,1);
+}
+

Pedantic mode produces the following warning.

+
Warning in 'jacobian.stan', line 5, column 2 to column 23:
+    Left-hand side of distribution statement (~) may contain a non-linear
+    transform of a parameter or local variable. If it does, you need to
+    include a target += statement with the log absolute determinant of the
+    Jacobian of the transform. You could also consider defining a transformed
+    parameter and using jacobian += in the transformed parameters block.
+
+
+

Pedantic mode limitations

+
    +
  • Constant values are sometimes uncomputable

    +

    Pedantic mode attempts to evaluate expressions down to literal values so that they can be used to generate warnings. For example, in the code normal(x, 1 - 2), the expression 1 - 2 will be evaluated to -1, which is not a valid variance argument so a warning is generated. However, this strategy is limited; it is often impossible to fully evaluate expressions in finite time.

  • +
  • Container types

    +

    Currently, indexed variables are not handled intelligently, so they are treated as monolithic variables. Each analysis treats indexed variables conservatively (erring toward generating fewer warnings).

  • +
  • Data variables

    +

    The declaration information for data variables is currently not considered, so using data as incompatible arguments to distributions may not generate the appropriate warnings.

  • +
  • Control flow dependent on parameters in nested functions

    +

    If a parameter is passed as an argument to a user-defined function within another user-defined function, and then some control flow depends on that argument, the appropriate warning will not be thrown.

  • +
+
+
+
+

Automatic updating and formatting of Stan programs

+

In addition to compiling Stan programs, stanc3 features several flags which can be used to format Stan programs and update them to the most recent Stan syntax by removing any deprecation features which can be automatically replaced.

+

These flags work for both .stan model files and .stanfunctions function files. They can be combined with --o to redirect the formatted output to a new file.

+
+

Automatic formatting

+

Invoking stanc --auto-format <model_file> will print a version of your model which has been re-formatted. The goal is to have this automatic formatting stay as close as possible to the Stan Program Style Guide. This means spacing, indentation, and line length are all regularized. Some deprecated features, like the use of # for line comments, are replaced, but the goal is mainly to preserve the program while formatting it.

+

By default, this will try to split lines at or before column 78. This number can be changed using --max-line-length.

+
+
+

Canonicalizing

+

In addition to automatic formatting, stanc can also “canonicalize” programs by updating deprecated syntax, removing unnecessary parenthesis, and adding braces around bodies of if statements and for and while loops.

+

This can be done by using stanc --auto-format --canonicalize=... where ... is a comma-separated list of options. Currently these options are:

+
    +
  • deprecations

    +

    Removes deprecated syntax such as replacing deprecated functions with their drop-in replacements.

  • +
  • parentheses

    +

    Removes unnecessary extra parentheses, such as converting y = ((x-1)) to y = x - 1

  • +
  • braces

    +

    Places braces around all blocks. For example, the following statement

    +
    if (cond)
    +  //result
    +

    will be formatted as

    +
    if (cond) {
    +  //result
    +}
    +

    and similarly for both kinds of loops containing a single statement.

  • +
  • includes

    +

    This will pretty-print code from other files included with #include as part of the program. This was the default behavior prior to Stan 2.29. When not enabled, the pretty-printer output will include the same #include directives as the input program.

  • +
+

Invoking stanc --print-canonical <model_file> is synonymous with running stanc --auto-format --canonicalize=deprecations,braces,parentheses,includes

+
+
+

Known issues

+

The formatting and canonicalizing features of stanc3 are still under development. The following are some known issues one should be aware of before using either:

+
    +
  • Oddly placed comments

    +

    If your Stan program features comments in unexpected places, such as inside an expression, they may be moved in the process of formatting. Moved comments are prefixed with the string ^^^: to indicate they originally appeared higher in the program.

    +

    We hope to improve this functionality in future versions. For now, this can usually be avoided by manually moving the comment outside of an expression, either by placing it on its own line or following a separator such as a comma or keyword.

  • +
  • Failure to recreate strange #include structure

    +

    Printing without include inlining (--canonicalize=includes) can fail when includes were used in atypical locations, such as in the middle of statements. We recommend either printing with inlining enabled or reconsidering the use of includes in this way.

  • +
+
+
+
+

Optimization

+

The stanc3 compiler can optimize the code of Stan model during compilation. The optimized model code behaves the same as unoptimized code, but it may be faster, more memory efficient, or more numerically stable.

+

This section introduces the available optimization options and describes their effect.

+

To print out a representation of the optimized Stan program, use the stanc3 command-line flag --debug-optimized-mir-pretty. To print an analogous representation of the Stan program prior to optimization, use the flag --debug-transformed-mir-pretty.

+
+

Optimization levels

+

To turn optimizations on, the user specifies the desired optimization level. The level specifies the set of optimizations to use. The chosen optimizations are used in a specific order, with some of them applied repeatedly.

+

Optimization levels are specified by the numbers 0 and 1 and the ‘experimental’ tag:

+
    +
  • O0 No optimizations are applied.
  • +
  • O1 Optimizations that are simple, do not dramatically change the program, and are unlikely to noticeably slow down compile times are applied.
  • +
  • Oexperimental All optimizations are applied. Some of these are not thorougly tested and may not always improve a programs performance.
  • +
+

O0 is the default setting.

+

The levels include these optimizations:

+ +

In addition, Oexperimental will apply more repetitions of the optimizations, which may increase compile times.

+
+
+

O1 Optimizations

+
+

Dead code elimination

+

Dead code is code that does not affect the behavior of the program. Code is not dead if it affects target, the value of any outside-observable variable like transformed parameters or generated quantities, or side effects such as print statements. Removing dead code can speed up a program by avoiding unnecessary computations.

+

Example Stan program:

+
model {
+  int i;
+  i = 5;
+  for (j in 1:10);
+  if (0) {
+    print("Dead code");
+  } else {
+    print("Hi!");
+  }
+}
+

Compiler representation of program before dead code elimination (simplified from the output of --debug-transformed-mir-pretty):

+
log_prob {
+  int i = 5;
+  for(j in 1:10) {
+    ;
+  }
+  if(0) {
+    FnPrint__("Dead code");
+  } else {
+    FnPrint__("Hi!");
+  }
+}
+

Compiler representation of program after dead code elimination (simplified from the output of --debug-optimized-mir-pretty):

+
log_prob {
+  int i;
+  FnPrint__("Hi!");
+}
+
+
+

Constant propagation

+

Constant propagation replaces uses of a variable which is known to have a constant value C with that constant C. This removes the overhead of looking up the variable, and also makes many other optimizations possible (such as static loop unrolling and partial evaluation).

+

Example Stan program:

+
transformed data {
+  int n = 100;
+  int a[n];
+  for (i in 1:n) {
+    a[i] = i;
+  }
+}
+

Compiler representation of program before constant propagation (simplified from the output of --debug-transformed-mir-pretty):

+
prepare_data {
+  data int n = 100;
+  data array[int, n] a;
+  for(i in 1:n) {
+    a[i] = i;
+  }
+}
+

Compiler representation of program after constant propagation (simplified from the output of --debug-optimized-mir-pretty):

+
prepare_data {
+  data int n = 100;
+  data array[int, 100] a;
+  for(i in 1:100) {
+    a[i] = i;
+  }
+}
+
+
+

Copy propagation

+

Copy propagation is similar to expression propagation, but only propagates variables rather than arbitrary expressions. This can reduce the complexity of the code for other optimizations such as expression propagation.

+

Example Stan program:

+
model {
+  int i = 1;
+  int j = i;
+  int k = i + j;
+}
+

Compiler representation of program before copy propagation (simplified from the output of --debug-transformed-mir-pretty):

+
log_prob {
+    int i = 1;
+    int j = i;
+    int k = (i + j);
+}
+

Compiler representation of program after copy propagation (simplified from the output of --debug-optimized-mir-pretty):

+
log_prob {
+  int i = 1;
+  int j = i;
+  int k = (i + i);
+}
+
+
+

Partial evaluation

+

Partial evaluation searches for expressions that we can replace with a faster, simpler, more memory efficient, or more numerically stable expression with the same meaning.

+

Example Stan program:

+
model {
+  real a = 1 + 1;
+  real b = log(1 - a);
+  real c = a + b * 5;
+}
+

Compiler representation of program before partial evaluation (simplified from the output of --debug-transformed-mir-pretty):

+
log_prob {
+  real a = (1 + 1);
+  real b = log((1 - a));
+  real c = (a + (b * 5));
+}
+

Compiler representation of program after partial evaluation (simplified from the output of --debug-optimized-mir-pretty):

+
log_prob {
+  real a = 2;
+  real b = log1m(a);
+  real c = fma(b, 5, a);
+}
+
+
+

Function inlining

+

Function inlining replaces each function call to each user-defined function f with the body of f. It does this by copying the function body to the call site and doing appropriately renaming the argument variables. This optimization can speed up a program by avoiding the overhead of a function call and providing more opportunities for further optimizations (such as partial evaluation).

+

Example Stan program:

+
functions {
+  int incr(int x) {
+    int y = 1;
+    return x + y;
+  }
+}
+transformed data {
+  int a = 2;
+  int b = incr(a);
+}
+

Compiler representation of program before function inlining (simplified from the output of --debug-transformed-mir-pretty):

+
functions {
+  int incr(int x) {
+    int y = 1;
+    return (x + y);
+  }
+}
+
+prepare_data {
+  data int a = 2;
+  data int b = incr(a);
+}
+

Compiler representation of program after function inlining (simplified from the output of --debug-optimized-mir-pretty):

+
prepare_data {
+  data int a;
+  a = 2;
+  data int b;
+  data int inline_sym1__;
+  data int inline_sym3__;
+  inline_sym3__ = 0;
+  for(inline_sym4__ in 1:1) {
+    int inline_sym2__;
+    inline_sym2__ = 1;
+    inline_sym3__ = 1;
+    inline_sym1__ = (a + inline_sym2__);
+    break;
+  }
+  b = inline_sym1__;
+}
+

In this code, the for loop and break is used to simulate the behavior of a return statement. The value to be returned is held in inline_sym1__. The flag variable inline_sym3__ indicates whether a return has occurred and is necessary to handle return statements nested inside loops within the function body.

+
+
+

Matrix memory layout optimization

+

Matrices and vector variables which require automatic-differentiation (AD) in Stan can be represented in two different forms.

+

The first (and default) representation is the “Array of Structs” (AoS) or “Matrix of vars” (matvar) layout. A “var” is the term used in the Stan implementation of autodiff for a single real. It is represented as a structure containing it’s value and its adjoint. The AoS representation constructs matrices and vectors by simply using those structures as the elements of the matrix internally. This is flexible and very general, but many operations want to deal with the values or the adjoints as blocks, requiring expensive memory access patterns.

+

The second representation is the “Struct of Arrays” (SoA) or “Var of matrices” (varmat) layout. Rather than a matrix containing tiny structures of one value and one adjoint each, this representation uses a single structure which contains separately a matrix of values and a matrix of adjoints. Some operations, like iterating over elements or assigning to specific indices, become more expensive, but many matrix operations like multiplications become much faster in this representation.

+

More general reading on AoS vs SoA can be found on Wikipedia

+

This optimization pass attempts to identify which matrix or vector variables in the Stan program are candidates for using the SoA representation. The conditions change over time, but broadly speaking:

+
    +
  • Any Stan Math Library functions the matrix is passed to must be able to support it.
  • +
  • The matrix should not be accessed/assigned elementwise in a loop.
  • +
+

The debug flag --debug-mem-patterns will list each variable and whether it is using the AoS representation or the SoA representation.

+
+
+
+

0experimental Optimizations

+
+

Automatic-differentiation level optimization

+

Stan variables can have two auto-differentiation (AD) levels: AD or non-AD. AD variables carry gradient information with them, which allows Stan to calculate the log-density gradient, but they also have more overhead than non-AD variables. It is therefore inefficient for a variable to be AD unnecessarily. AD-level optimization sets every variable to be a floating point type unless its gradient is necessary.

+

Example Stan program:

+
data {
+  real y;
+}
+model {
+  real x = y + 1;
+}
+

Compiler representation of program before AD-level optimization (simplified from the output of --debug-transformed-mir-pretty):

+
input_vars {
+  real y;
+}
+
+log_prob {
+  real x = (y + 1);
+}
+

Compiler representation of program after AD-level optimization (simplified from the output of --debug-optimized-mir-pretty):

+
input_vars {
+  real y;
+}
+
+log_prob {
+  data real x = (y + 1);
+}
+
+
+

One step loop unrolling

+

One step loop unrolling is similar to static loop unrolling. However, this optimization only ‘unrolls’ the first loop iteration, and can therefore work even when the total number of iterations is not predictable. This can speed up a program by providing more opportunities for further optimizations such as partial evaluation and lazy code motion.

+

Example Stan program:

+
data {
+  int n;
+}
+transformed data {
+  int x = 0;
+  for (i in 1:n) {
+    x += i;
+  }
+}
+

Compiler representation of program before one step static loop unrolling (simplified from the output of --debug-transformed-mir-pretty):

+
prepare_data {
+  data int n = FnReadData__("n")[1];
+  data int x = 0;
+  for(i in 1:n) {
+    x = (x + i);
+  }
+}
+

Compiler representation of program after one step static loop unrolling (simplified from the output of --debug-optimized-mir-pretty):

+
prepare_data {
+  data int n = FnReadData__("n")[1];
+  int x = 0;
+  if((n >= 1)) {
+    x = (x + 1);
+    for(i in (1 + 1):n) {
+      x = (x + i);
+    }
+  }
+}
+
+
+

Expression propagation

+

Constant propagation replaces the uses of a variable which is known to have a constant value E with that constant E. This often results in recalculating the expression, but provides more opportunities for further optimizations such as partial evaluation. Expression propagation is always followed by lazy code motion to avoid unnecessarily recomputing expressions.

+

Example Stan program:

+
data {
+  int m;
+}
+transformed data {
+  int n = m+1;
+  int a[n];
+  for (i in 1:n-1) {
+    a[i] = i;
+  }
+}
+

Compiler representation of program before expression propagation (simplified from the output of --debug-transformed-mir-pretty):

+
prepare_data {
+  data int m = FnReadData__("m")[1];
+  data int n = (m + 1);
+  data array[int, n] a;
+  for(i in 1:(n - 1)) {
+    a[i] = i;
+  }
+}
+

Compiler representation of program after expression propagation (simplified from the output of --debug-optimized-mir-pretty):

+
prepare_data {
+  data int m = FnReadData__("m")[1];
+  data int n = (m + 1);
+  data array[int, (m + 1)] a;
+  for(i in 1:((m + 1) - 1)) {
+    a[i] = i;
+  }
+}
+
+
+

Lazy code motion

+

Lazy code motion rearranges the statements and expressions in a program with the goals of:

+
    +
  • Avoiding computing expressions more than once, and
  • +
  • Computing expressions as late as possible (to minimize the strain on the working memory set).
  • +
+

To accomplish these goals, lazy code motion will perform optimizations such as:

+
    +
  • Moving a repeatedly calculated expression to its own variable (also referred to as common-subexpression elimination)
  • +
  • Moving an expression outside of a loop if it does not need to be in the loop (also referred to as loop-invariant code motion)
  • +
+

Lazy code motion can make some programs significantly more efficient by avoiding redundant or early computations.

+

As currently implemented in the compiler, it may move items between blocks in a way that actually increases overall computation. Improving this is an ongoing project.

+

Example Stan program:

+
model {
+  real x;
+  real y;
+  real z;
+
+  for (i in 1:10) {
+    x = sqrt(10);
+    y = sqrt(i);
+  }
+  z = sqrt(10);
+}
+

Compiler representation of program before lazy code motion (simplified from the output of --debug-transformed-mir-pretty):

+
log_prob {
+  real x;
+  real y;
+  real z;
+  for(i in 1:10) {
+    x = sqrt(10);
+    y = sqrt(i);
+  }
+  z = sqrt(10);
+}
+

Compiler representation of program after lazy code motion (simplified from the output of --debug-optimized-mir-pretty):

+
log_prob {
+  data real lcm_sym4__;
+  data real lcm_sym3__;
+  real x;
+  real y;
+  lcm_sym4__ = sqrt(10);
+  real z;
+  for(i in 1:10) {
+    x = lcm_sym4__;
+    y = sqrt(i);
+  }
+  z = lcm_sym4__;
+}
+
+
+

Static loop unrolling

+

Static loop unrolling takes a loop with a predictable number of iterations X and replaces it by writing out the loop body X times. The loop index in each repeat is replaced with the appropriate constant. This optimization can speed up a program by avoiding the overhead of a loop and providing more opportunities for further optimizations (such as partial evaluation).

+

Example Stan program:

+
transformed data {
+  int x = 0;
+  for (i in 1:4) {
+    x += i;
+  }
+}
+

Compiler representation of program before static loop unrolling (simplified from the output of --debug-transformed-mir-pretty):

+
prepare_data {
+  data int x = 0;
+  for(i in 1:4) {
+    x = (x + i);
+  }
+}
+

Compiler representation of program after static loop unrolling (simplified from the output of --debug-optimized-mir-pretty):

+
prepare_data {
+  data int x;
+  x = 0;
+  x = (x + 1);
+  x = (x + 2);
+  x = (x + 3);
+  x = (x + 4);
+}
+ + +
+
+
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/wiener_diffusion_model.html b/docs/2_39/stan-users-guide/wiener_diffusion_model.html new file mode 100644 index 000000000..b8cbc39e1 --- /dev/null +++ b/docs/2_39/stan-users-guide/wiener_diffusion_model.html @@ -0,0 +1,1598 @@ + + + + + + + + + +Wiener Diffusion Model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Wiener diffusion model

+

Diffusion models, sometimes also called Wiener diffusion models, are among the most frequently used model families in modeling two-alternative forced-choice tasks (see Wagenmakers (2009), for a review). Diffusion models allow to model response times and responses jointly. The basic version of a diffusion model comprises four parameters: the boundary separation, \(a\), the relative starting point, \(w\), the drift rate, \(v\), and the non-decision time, \(t0\) (Ratcliff 1978). In the basic model, it is assumed that the four basic parameters are the same for the whole experiment. As this assumption is very strict and there are examples that suggest that the basic parameters can be different from trial to trial, so called inter-trial variabilities were introduced and the basic four- parameter model was extended to a seven-parameter model. In the seven-parameter extension of the diffusion model there are the following three parameters added: the inter-trial variability in relative starting point, \(s_w\), the inter-trial variability in drift rate, \(s_v\), and the inter-trial variability in non-decision time, \(s_{t0}\) Nicenboim, Schad, and Vasishth (2025).

+

Data for the diffusion model is two-dimensional: There is one vector for the reaction times, \(y\), and one vector for the given responses, \(\text{resp}\). The reaction times shall be positive, continuous and in seconds, the responses shall be binary.

+

As a diffusion model describes the decision process for a decision with exactly two choices, there exist reaction time distributions for each response alternative. This means that the probability density function (\(p\)) splits into one part for one response alternative and one part for the other response alternative. In the following, we will refer to one alternative as the upper response boundary and to the other alternative as the lower response boundary. \(p\) of the lower response boundary can be obtained when inserting \(-v\) and \(1-w\) to \(p\) of the upper response boundary. Let’s call \(p\) for the lower response boundary \(p_0\) and \(p\) for the upper response boundary \(p_1\). Then:

+

\[ +p_0(a,t0,v,w,sv,sw,st0) = p_1(a,t0,-v,1-w,sv,sw,st0) +\]

+

Usually, a \(PDF\) integrates to 1. In the case of the diffusion model, only the sum of both parts, \(p_0\) and \(p_1\), integrates to 1. This is called defective.

+
+
+
+ +
+
+Figure 1: Figure 1: Realization of a Four-Parameter Diffusion Process Modeling the Binary Decision Process. Image from Henrich et al. (2024), distributed under the Creative Commons Attribution 4.0 International License. Note. The parameters are the boundary separation a for two response alternatives, the relative starting point w, the drift rate v, and the non-decision time t0. The decision process is illustrated as a jagged line between the two boundaries. The predicted distributions of the reaction times are depicted as curved lines below and above the response boundaries (blue). +
+
+
+

In this model it is assumed that the decision process behaves like a random walk and we are interested in the first time that the random walk crosses one of the two decision boundaries. Hence, we are interested in the first-passage time of the decision process. The Stan function wiener_lpdf() returns the logarithm of the first-passage time density function for a diffusion model with up to seven parameters for upper boundary responses, \(\log(p_1)\). As can be seen above, it suffices to implement the density for only one response boundary, as the other can be obtained by mirroring the starting point and drift rate. Any combination of fixed and estimated parameters can be specified. In other words, with this implementation it is not only possible to estimate parameters of the fullseven- parameter model, but also to estimate restricted models such as the basic four- parameter model, or a five or six-parameter model, or even a one-parameter model when fixing the other six parameters.

+

For example, it is possible to permit variability in just one or two parameters and to fix the other variabilities to 0, or even to estimate a three-parameter model when fixing more parameters (e.g., fixing the relative starting point at 0.5).

+

It is assumed that the reaction time data that correspond to the upper response boundary \(y_\text{upper}\) is distributed according to wiener_lpdf():

+

\[ +y_\text{lower} \sim \operatorname{wiener\_lpdf}(a, t0, w, v, s_v, s_w, s_{t0}) +\] and the reaction time data that correspond to the lower response boundary \(y_\text{lower}\) is distributed according to wiener_lpdf() with mirrored starting point and drift rate:

+

\[ +y_\text{upper} \sim \operatorname{wiener\_lpdf}(a, t0, 1-w, -v, s_v, s_w, s_{t0}) +\]

+
+

Function call example

+

The following example demonstrates a diffusion model call in Stan:

+
data {
+  int <lower=0> N; // Number of trials
+  array[N] real rt; // response times (in seconds )
+  array[N] int <lower=0, upper=1> resp; // responses {0 ,1}
+}
+transformed data{
+  real min_rt = min(rt);
+}
+parameters {
+  real <lower=0> a;                // boundary separation
+  real v;                          // drift
+  real <lower=0, upper=1> w;       // relative starting point
+  real <lower=0, upper=min_rt> t0; // non-decision time
+
+  real <lower=0> sv;               // variability in drift
+  // variability in starting point
+  real <lower=0, upper=fmin(2 * w, 2 * (1 - w))> sw; 
+  real <lower=0> st0;             // variability in non-decision time
+}
+transformed parameters{
+  real one_minus_w = 1 - w;
+  real neg_v = -v;
+}
+model {
+  // prior
+  a ~ normal(1, 1);
+  w ~ normal(0.5, 0.1);
+  v ~ normal(2, 3);
+  t0 ~ normal(0.435, 0.12);
+
+  sv ~ normal(1, 3);
+  st0 ~ normal(0.183, 0.09);
+  sw ~ beta(1, 3);
+
+  // likelihood (diffusion model)
+  for (i in 1:N) {
+    if (resp[i] == 1) {
+      // upper boundary
+      target += wiener_full_lpdf(rt[i] | a, t0, w, v,
+                                         sv, sw, st0);
+    } else {
+      // lower boundary: mirror drift and starting point
+      target += wiener_full_lpdf(rt[i] | a, t0, one_minus_w,
+                                         neg_v, sv, sw, st0);
+    }
+  }
+}
+
+

The data block

+

The data should consist of at least three variables:

+
    +
  1. The number of trials N,
  2. +
  3. the response, coded as 0 = “lower bound” and 1 = “upper bound”, and
  4. +
  5. the reaction times in seconds (not milliseconds).
  6. +
+

Note that two different ways of coding responses are commonly used: First, in response coding, the boundaries correspond to the two response alternatives. Second, in accuracy coding, the boundaries correspond to correct (upper bound) and wrong (lower bound) responses. This means, depending on the coding you choose, the bounds mentioned in the second variable above differ and the response variable will have a different form.

+

Most often, an experimenter wants to find out whether an experimental manipulation influences the model parameters. As there exists psychological interpretations for each diffusion model parameter, the experimenter can draw conclusions from differing parameters. Therefore, usually an own diffusion model is being computed for each experimental group to enable a comparison of the parameters between the groups. This can be manipulation between different subjects, like an experimental group and a control group (so called between-subject manipulations). However, this can also be manipulations within the same subject by presenting stimuli from different experimental groups (so called within-subject manipulations). Depending on the experimental design, one would typically also provide the number of conditions and the condition associated with each trial as a vector. Then, one model for each condition will be computed. This means that the parameters also have to be defined for each condition.

+

In a hierarchical setting, the data block would also specify the number of participants and the participant associated with each trial as a vector. It is also possible to hand over a precision value in the data block.

+
+
+

The parameters block

+

The model arguments of the wiener_lpdf() function that are not fixed to a certain value are defned as parameters in the parameters block. In this block, it is also possible to insert restrictions on the parameters. Note that the MCMC algorithm iteratively searches for the next parameter set. If the suggested sample falls outside the internally defined parameter ranges, the program will throw an error, which causes the algorithm to restart the current iteration. Since this slows down the sampling process, it is advisable to include the parameter ranges in the defnition of the parameters in the parameters block to improve the sampling process (see table below for the parameter ranges). In addition, the parameter space is further constrained by the following conditions:

+
    +
  1. The non-decision time \(t_0\) has to be smaller or equal to the observed reaction time: \(t0 \leq y\).
  2. +
  3. The varying relative starting point \(w\) has to be in the interval (0,1) and thus,
  4. +
+

\[ +\begin{aligned} +&w + \frac{s_w}{2} < 1 \text{, and} \\ +&0 < w-\frac{s_w}{2} +\end{aligned} +\]

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterRangeParameterRange
\(a\)(0, \(\infty\))\(y\)(0, \(\infty\))
\(v\)(-\(\infty\), \(\infty\))\(s_v\)[0, \(\infty\))
\(w\)(0,1)\(s_w\)[0,min(2w, 2(1-w)))
\(t_0\)[0,\(\infty\))\(s_{t0}\)[0,\(\infty\))
+
+
+

The model block

+

In the model block, the priors and likelihood are defined for the upper and the lower response boundary. Different kinds of priors can be specifed here. Generally, the regularization induced by mildly informative priors can help both statistically and computationally.

+

In the second part of the model block, the data generating distribution is applied to all responses. The drift rate \(v\) and relative starting point \(w\) have to be mirrored for responses at the lower boundary.

+

For more details regarding the application of the diffusion model in Stan, see Henrich et al. (2024).

+
+
+
+

Truncated and censored data

+

Truncation and censoring frequently occur in psychological data collection. For reaction time data, truncated and censored data regularly arise in psychological studies as a consequence of using response windows or deadlines. These are sometimes introduced in the analysis of data to exclude reaction times that appear too short or too long, but they are also sometimes already built into the study procedures to push participants to respond within a specifc temporal window.

+

Depending on the implementation of the response window, two different types of data arise: truncated data or censored data. Since the effects of truncation or censoring on summary statistics such as mean, median, standard deviation, and skewness is regularly too large to ignore (Ulrich and Miller 1994), data analysts are well advised to account for these effects.

+

As described in the Truncated or Censored Data chapter, the cumulative distribution function (\(F\)) and its complement (\(\text{CCDF}\)) are needed to model truncated and censored data.

+

As explained above, \(p\) is defined defectively, meaning that only the sum of \(p\)s for both response alternatives integrates to 1. For the same reason, \(F\) and \(\text{CCDF}\) are also implemented defectively. Analogously, only the sum of the \(F\)s and \(\text{CCDF}\)s for both response alternatives asymptotes above at 1.

+

In the case of the diffusion model, \(F\) asymptotes above at the probability \(PROB\) to hit the corresponding response boundary: (for simplicity, we omit the inter-trial variabilities in the following)

+

\[ +\begin{aligned} +F_1(\infty\mid a,w,v) &= \text{PROB}(a,w,v) \text{ and} \\ +F_0(\infty\mid a,w,v) &= F_1(\infty\mid a,1-w,-v) = \text{PROB}(a,1-w,-v) +\end{aligned} +\]

+
+

Modeling truncated data with the diffusion model

+

Data are called truncated when there is no information available for analysis from trials with values larger (or smaller) than a right (or left) reaction-time bound. In reaction time experiments, reaction time data are truncated if trials with reaction times outside the response window are excluded from the analysis. Not even a count of those omitted trials is kept.

+

Let \(L\) denote the left reaction-time bound and \(U\) denote the right reaction-time bound of a response window.

+

Then, the density of truncated data for both response boundaries 0 and 1, here denoted as \(\text{resp}\in\{0,1\}\), can be formulated as follows:

+

\[ +\begin{aligned} +&p_{\text{resp}}(y \mid L<X\leq U, a, w, v) = \\ &\frac{p_{\text{resp}}(y \mid a, w, v)\cdot \mathbb{I}_{\{L<y\leq U\}}} +{\bigl(F_0(U \mid a, w, v)+F_1(U \mid a, w, v)\bigr) - +\bigl(F_0(L\mid a, w, v)+F_1(L\mid a, w, v)\bigr)} +\end{aligned} +\]

+

The density of left truncated data can be formulated as follows. \[ +\begin{aligned} +p_{\text{resp}}(y \mid L<X, a, w, v) = \frac{p_{\text{resp}}(y \mid a, w, v)\cdot \mathbb{I}_{\{L<y\}}} +{1-\bigl(F_0(L \mid a, w, v)+F_1(L \mid a, w, v)\bigr)}, +\end{aligned} +\]

+

The density of right truncated data can be formulated as follows.

+

\[ +\begin{aligned} +p_{\text{resp}}(y \mid X\leq U, a, w, v) = \frac{p_{\text{resp}}(y \mid a, w, v)\cdot \mathbb{I}_{\{y\leq U\}}}{F_0(U \mid a, w, v)+F_1(U \mid a, w, v)} +\end{aligned} +\]

+

As the functions are implemented defectively, a truncated diffusion model cannot be calculated with the truncation functor \(T[,]\) as it would usually be done in Stan. This means the function call: y ~ wiener(...)T[L,U] does not work the way it is supposed to. When the truncation functor is called in Stan, Stan searches for a CDF implementation internally. In the case of the diffusion model, Stan would find the CDF, but is not aware of its defective implementation and calculates the computations as if it were a non-defective CDF. This causes misleading and incorrect results.

+

To implement the truncated model, write out the function shown above on the log-scale with left_bound = L and right_bound = U, where wiener_lcdf_unnorm() calls the logarithmized CDF of the diffusion model at the response-1-boundary:

+
model {
+  real log_denom = log_diff_exp(
+    log_sum_exp(
+      wiener_lcdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),
+      wiener_lcdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,
+                                       sv, sw, st)),
+    log_sum_exp(
+      wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st),
+      wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,
+                                      sv, sw, st)));
+  // likelihood
+  for (i in 1:N) {
+    if (resp[i] == 1) {
+      // response -1 boundary
+      target += wiener_lpdf (rt[i] | a, t0, w, v, sv, sw, st);
+    } else { 
+      // response -0 boundary ( mirror v and w)
+      target += wiener_lpdf (rt[i] | a, t0, one_minus_w, neg_v,
+                                     sv, sw, st);
+    }
+  } // end for
+  target += -N * log_denom;
+}
+

For details of how to call a truncated model within the parallelization routine of reduce_sum or with truncation to only on side, see Henrich and Klauer (2026).

+
+
+

Modeling censored data with the diffusion model

+

Data are censored when observations that are above or below a right or left boundary value are reported as occurrences of the event \((y > +U)\), for \(U\) the right bound, or as occurrences of the event \((y \leq +L)\), for \(L\) the left bound, respectively. Like for truncated data, the range of the possible values is restricted, but the number of observations that fall outside the boundaries is kept, whereas in truncation, no count would be kept.

+

For the censored model, we distinguish two cases. In the first case, the responses of the censored trials are known, but the reaction times are not known. In the second case, neither the responses nor the reaction times of the censored trials are known. Note that the second case differs from a truncated model in the fact that the number of censored trials is still known. Consider first the case where the response is known even for censored data.

+

To model such data in Stan, the left and right reaction time bounds, left_bound and right_bound, respectively, are handed over in the data block, as well as a vector censored that tracks whether a trial is censored (= 1) or not (= 0), and counts of trials censored at the left reaction time bound and counts of trials censored at the right reaction time bound for each response in {0,1}. There are four such count variables: N_cens_left_0, N_cens_left_1, N_cens_right_0, N_cens_right_1:

+
model {
+  for (i in 1:N) {
+    if (censored[i] == 0) {
+      if (resp[i] == 1) {
+        y[i] ~ wiener(a, t0, w, v, sv, sw, st0);
+      } else if (resp[i] == 0) {
+        y[i] ~ wiener(a, t0, one_minus_w, neg_v, sv, sw, st0);
+      }
+    }
+  }
+
+  // likelihood (response = 0)
+  target += N_cens_left_0 
+         * wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,
+                                           sv, sw, st0);
+
+  target += N_cens_right_0 
+         * wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,
+                                             sv, sw, st0);
+
+  // likelihood (response = 1)
+  target += N_cens_left_1 
+         * wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st0);
+
+  target += N_cens_right_1 
+         * wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st0);
+}
+

When data are censored at only one side, meaning that the reaction time constraint only exists for one of the two boundaries, omit the lines for the other side in the code. A both sided reaction time window would be, for example, when only reaction times are accepted that occur between 0.2 and 0.8 seconds. A one sided reaction time constraint would be, for example, when all reaction times below 0.8 seconds are accepted.

+

When data consist of many conditions (as explained in the beginning), it is sometimes more convenient to loop over all trials instead of using count variables as described above, using the following notation and code. A vector containing the information whether a trial is censored or not, here censored, needs to be handed over in the data block. This vector splits the data into three bins: all trials \(i\) withcensored[i]=0 are censored below the left reaction time bound, all trials \(i\) with censored[i]=1 fall between the reaction time bounds, and all trials \(i\) with censored[i]=2 are censored above the right reaction time bound. For non-censored trials, the log-PDF is computed, for left censored trials, the log-CDF is computed, and for right censored trials, the log-CCDF is computed:

+
model { 
+  for (i in 1:N) { 
+    // right censored at right_bound
+    if (resp [i] == 1) { 
+      // upper response boundary
+      if (censored[i] == 0) {
+        target += wiener_lcdf_unnorm(left_bound | a, t0, w, v, 
+                                                  sv, sw, st0);
+      } else if (censored[i] == 1) {
+        target += wiener_lpdf(y[i] | a, t0, w, v, sv, sw, st0);
+      } else if (censored[i] == 2) {
+         target += wiener_lccdf_unnorm(right_bound | a, t0, w, v,
+                                                     sv, sw, st0);
+      }
+    } else { 
+      // lower response boundary (mirror drift and // starting point!)
+      if (censored[i] == 0) {
+        target += wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w,
+                                                  neg_v, sv, sw, st0);
+      } else if (censored[i] == 1) {
+        target += wiener_lpdf(y[i] | a, t0, one_minus_w, neg_v,
+                                     sv, sw, st0);
+      } else if (censored[i] == 2) {
+        target += wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,
+                                                    neg_v, sv, sw, st0);
+      }
+    }
+  }
+}
+

When the data are censored on only one side, omit the case that is not needed.

+

Note that this block can be inserted in the defnition of the parallelization function, partial_sum_wiener(), as defined below.

+

Sometimes also the response is missing (i.e., it is known that the reaction time in a trial fell outside the response window, but which response was given is unknown). One method that has been used to model such data has involved inferring the numbers of missing responses of either kind from the observed relative frequencies of the two responses. This approach has the problem that quite specifc assumptions on the missing data have to be made (namely, that the proportions of the two kinds of responses are the same for responses within and outside the response window).

+

The following is a more principled approach that uses the cumulative distribution functions and their complements to provide the data-generating distribution of censored data. As before, let \(L\) be the left reaction time bound, and \(U\) the right reaction time bound, and consider decision times without inter-trial variabilities for the sake of simplicity. It follows that the likelihood contribution \(\textit{lik}_l\) for a left-censored data point is given by

+

\[ +\begin{aligned} +\textit{lik}_l(a,w,v) = F_0(L\mid a,w,v) + F_1(L\mid a,w,v), +\end{aligned} +\]

+

whereas the likelihood contribution \(lik_r\) due to a right-censored data point is given by

+

\[ +\begin{aligned} +\textit{lik}_r(a,w,v) = \text{CCDF}_0(U\mid a,w,v) + \text{CCDF}_1(U\mid a,w,v). +\end{aligned} +\]

+

See the following code for an example of Stan code implementing this second case of censoring. This model call deals with the problem of unknown responses by computing the probability of choosing the response-1 or response-0 boundary outside the response window. Here, the CDF and/or the CCDF are required, depending upon whether there is only left-censoring, right-censoring, or censoring both to the left and to the right. The following code shows the functions block for a model that is right-censored using the function partial_sum_wiener() to parallelize the execution of a single Stan chain across multiple cores:

+
functions {
+  real partial_sum_wiener(array[] real rt_slice, int start,
+                          int end, real a, real t0, real w,
+                          real v, real sv, real sw, real st,
+                          array[] int resp, real right_bound,
+                          array[] int censored) {
+    real ans = 0;
+    for (i in start:end) {
+      if (censored[i] == 1) {
+        // not censored
+        if (resp[i] == 1) {
+          // upper boundary
+          ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, w, v,
+                                                     sv, sw, st);
+        } else {
+          // lower boundary(mirror v and w)
+          ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, one_minus_w,
+                                                     neg_v, sv, sw, st);
+        }
+      } else { 
+        // censored
+        ans += log_sum_exp (
+          wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),
+          wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,
+                                            neg_v, sv, sw, st);
+      }
+    }
+    return ans;
+  }
+}
+

Combine this block with the model block in the example above by using the function reduce_sum().

+
  target += reduce_sum(partial_sum_wiener, rt, 1,
+    a, t0, w, v, sv, sw, st, resp, right_bound, censored);
+}
+

For more details, see Henrich and Klauer (2026).

+ + + +
+
+
+ + Back to top

References

+
+Henrich, Franziska, Raphael Hartmann, Valentin Pratz, Andreas Voss, and Karl Christoph Klauer. 2024. “The Seven-Parameter Diffusion Model: An Implementation in Stan for Bayesian Analyses.” Behavior Research Methods 56 (4): 3102–16. https://doi.org/10.3758/s13428-023-02179-1. +
+
+Henrich, Franziska, and Karl Christoph Klauer. 2026. “Modeling Truncated and Censored Data with the Diffusion Model in Stan.” Behavior Research Methods 58 (42). https://doi.org/10.3758/s13428-025-02822-z. +
+
+Nicenboim, Bruno, Daniel J Schad, and Shravan Vasishth. 2025. Introduction to Bayesian Data Analysis for Cognitive Science. CRC Press. +
+
+Ratcliff, Roger. 1978. “A Theory of Memory Retrieval.” Psychological Review 85 (2): 59–108. +
+
+Ratcliff, Roger, and Jeffrey N. Rouder. 1998. “Modelling Response Times for Two-Choice Decisions.” Psychological Science 9 (5): 347–56. +
+
+Ulrich, Rolf, and Jeff Miller. 1994. “Effects of Truncation on Reaction Time Analysis.” Journal of Experimental Psychology: General 123 (1): 34–80. +
+
+Wagenmakers, Eric-Jan. 2009. “Methodological and Empirical Developments for the Ratcliff Diffusion Model of Response Times and Accuracy.” European Journal of Cognitive Psychology 21 (5): 641–71. https://doi.org/10.1080/09541440802205067. +
+
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/2_39/stan-users-guide/wiener_diffusion_model.pdf b/docs/2_39/stan-users-guide/wiener_diffusion_model.pdf new file mode 100644 index 000000000..fdec7b5f6 Binary files /dev/null and b/docs/2_39/stan-users-guide/wiener_diffusion_model.pdf differ diff --git a/docs/404.html b/docs/404.html index 71cb3a24b..7f4b1223b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -837,8 +837,35 @@

Page Not Found

}); + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/cmdstan-guide/bib.html b/docs/cmdstan-guide/bib.html index cda8b7cf5..5bed838fb 100644 --- a/docs/cmdstan-guide/bib.html +++ b/docs/cmdstan-guide/bib.html @@ -787,8 +787,35 @@

Re }); + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/cmdstan-guide/command_line_options.html b/docs/cmdstan-guide/command_line_options.html index 81addd6f9..e821bbec5 100644 --- a/docs/cmdstan-guide/command_line_options.html +++ b/docs/cmdstan-guide/command_line_options.html @@ -255,7 +255,7 @@ + @@ -1467,8 +1473,35 @@

Reversing functions + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/binary_distributions.html b/docs/functions-reference/binary_distributions.html index 6db376df1..18a7b19d6 100644 --- a/docs/functions-reference/binary_distributions.html +++ b/docs/functions-reference/binary_distributions.html @@ -255,7 +255,7 @@ + @@ -1291,8 +1297,35 @@

Stan Functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/bounded_continuous_distributions.html b/docs/functions-reference/bounded_continuous_distributions.html index f69007d73..eda015ead 100644 --- a/docs/functions-reference/bounded_continuous_distributions.html +++ b/docs/functions-reference/bounded_continuous_distributions.html @@ -255,7 +255,7 @@ + @@ -1157,8 +1163,35 @@

Stan functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/bounded_discrete_distributions.html b/docs/functions-reference/bounded_discrete_distributions.html index 5b142af7e..36da4d4c7 100644 --- a/docs/functions-reference/bounded_discrete_distributions.html +++ b/docs/functions-reference/bounded_discrete_distributions.html @@ -255,7 +255,7 @@ + @@ -1728,8 +1734,35 @@

Stan functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/circular_distributions.html b/docs/functions-reference/circular_distributions.html index 7e81ff5cb..a2e8bc372 100644 --- a/docs/functions-reference/circular_distributions.html +++ b/docs/functions-reference/circular_distributions.html @@ -289,7 +289,7 @@ + @@ -1208,8 +1214,35 @@

Numerical stability + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/complex-valued_basic_functions.html b/docs/functions-reference/complex-valued_basic_functions.html index 80143c22c..70ee5f82b 100644 --- a/docs/functions-reference/complex-valued_basic_functions.html +++ b/docs/functions-reference/complex-valued_basic_functions.html @@ -289,7 +289,7 @@ + @@ -1504,8 +1510,35 @@

Complex hyperbolic trigonom + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/complex_matrix_operations.html b/docs/functions-reference/complex_matrix_operations.html index 03bb5f7e1..35eaadeb8 100644 --- a/docs/functions-reference/complex_matrix_operations.html +++ b/docs/functions-reference/complex_matrix_operations.html @@ -288,7 +288,7 @@ + @@ -2066,8 +2072,35 @@

Rev + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/compound_arithmetic_and_assignment.html b/docs/functions-reference/compound_arithmetic_and_assignment.html index 8d34e2cbc..1b6ce2cfe 100644 --- a/docs/functions-reference/compound_arithmetic_and_assignment.html +++ b/docs/functions-reference/compound_arithmetic_and_assignment.html @@ -226,7 +226,7 @@ + @@ -1137,8 +1143,35 @@

- \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/continuous_distributions_on_0_1.html b/docs/functions-reference/continuous_distributions_on_0_1.html index 252cce468..df8c1baeb 100644 --- a/docs/functions-reference/continuous_distributions_on_0_1.html +++ b/docs/functions-reference/continuous_distributions_on_0_1.html @@ -255,7 +255,7 @@ + @@ -1208,8 +1214,35 @@

Stan functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/conventions_for_probability_functions.html b/docs/functions-reference/conventions_for_probability_functions.html index 838febad9..892c0238d 100644 --- a/docs/functions-reference/conventions_for_probability_functions.html +++ b/docs/functions-reference/conventions_for_probability_functions.html @@ -289,7 +289,7 @@ + @@ -1339,8 +1345,35 @@

Return type

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/correlation_matrix_distributions.html b/docs/functions-reference/correlation_matrix_distributions.html index b1b16ce4c..2d9c7541a 100644 --- a/docs/functions-reference/correlation_matrix_distributions.html +++ b/docs/functions-reference/correlation_matrix_distributions.html @@ -309,7 +309,7 @@ + @@ -1268,8 +1274,35 @@

Stan functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/covariance_matrix_distributions.html b/docs/functions-reference/covariance_matrix_distributions.html index 77e397def..26470d87e 100644 --- a/docs/functions-reference/covariance_matrix_distributions.html +++ b/docs/functions-reference/covariance_matrix_distributions.html @@ -255,7 +255,7 @@ + @@ -1272,8 +1278,35 @@

Stan functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/deprecated_functions.html b/docs/functions-reference/deprecated_functions.html index 2b5f328d0..1a9def5ee 100644 --- a/docs/functions-reference/deprecated_functions.html +++ b/docs/functions-reference/deprecated_functions.html @@ -289,7 +289,7 @@ + @@ -1317,8 +1323,35 @@

Sizes and para + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/distributions_over_unbounded_vectors.html b/docs/functions-reference/distributions_over_unbounded_vectors.html index dc0eee2d7..aa66e4846 100644 --- a/docs/functions-reference/distributions_over_unbounded_vectors.html +++ b/docs/functions-reference/distributions_over_unbounded_vectors.html @@ -275,7 +275,7 @@ + @@ -1584,8 +1590,35 @@

Stan functions

+ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/embedded_laplace.html b/docs/functions-reference/embedded_laplace.html new file mode 100644 index 000000000..e3e23970e --- /dev/null +++ b/docs/functions-reference/embedded_laplace.html @@ -0,0 +1,1517 @@ + + + + + + + + + +Embedded Laplace Approximation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + +
+ + + +
+ + + + +
+

Embedded Laplace Approximation

+

The embedded Laplace approximation can be used to approximate certain marginal and conditional distributions that arise in latent Gaussian models. Embedded Laplace replaces explicit sampling of (high-dimensional) Gaussian latent variables with a local Gaussian approximation. In doing so, it marginalizes out the latent Gaussian variables. Inference can then be performed on the remaining, often low-dimensional, parameters. The embedded Laplace approximation in Stan is best suited for latent Gaussian models when jointly sampling over all model parameters is expensive and the conditional posterior of the Gaussian latent variables is reasonably close to Gaussian.

+

For observed data \(y\), latent Gaussian variables \(\theta\), and hyperparameters \(\phi\), a latent Gaussian model observes the following hierarchical structure: \[\begin{eqnarray} + \phi &\sim& p(\phi), \\ + \theta &\sim& \text{MultiNormal}(0, K(\phi)), \\ + y &\sim& p(y \mid \theta, \phi). +\end{eqnarray}\] In this formulation, \(p(y \mid \theta, \phi)\) is the data model that specifies how observations are generated conditional on \(\theta\) and \(\phi\). \(K(\phi)\) denotes the prior covariance matrix for the latent Gaussian variables \(\theta\) and is parameterized by \(\phi\). The prior on \(\theta\) is centered at 0, however an offset can always be added when specifying the data model \(p(y \mid \theta, \phi)\).

+

Conditioning on observations \(y\) we obtain the joint posterior \(p(\phi, \theta \mid y) \propto p(y \mid \theta, \phi) p(\theta | +\phi) p(\phi)\), where \(p(y \mid \theta, \phi)\) as function of \(\theta\) and \(\phi\) is the likelihood function. To sample from the joint posterior, we can either use a standard method, such as Markov chain Monte Carlo, or we can follow a two-step procedure:

+
    +
  1. sample from the marginal posterior \(p(\phi \mid y)\),
  2. +
  3. sample from the conditional posterior \(p(\theta \mid y, \phi)\).
  4. +
+

In the above procedure, neither the marginal posterior nor the conditional posterior are typically available in closed form and so they must be approximated. The marginal posterior can be written as \(p(\phi \mid y) \propto p(y \mid \phi) p(\phi)\), where \(p(y \mid \phi) = \int p(y \mid \phi, \theta) p(\theta) \text{d}\theta\) is called the marginal likelihood. The Laplace method approximates \(p(y \mid \phi, \theta) p(\theta)\) with a normal distribution centered at the mode, \[ + \theta^* = \underset{\theta}{\text{argmax}} \ \log p(\theta \mid y, \phi), +\] and \(\theta^*\) is obtained using a numerical optimizer. The resulting Gaussian integral can be evaluated analytically to obtain an approximation to the log marginal likelihood \(\log \hat p(y \mid \phi) \approx \log p(y \mid \phi)\). Specifically: \[ + \hat p(y \mid \phi) = \frac{p(\theta^* \mid \phi) p(y \mid \theta^*, \phi)}{\hat p (\theta^* \mid \phi, y)}. +\]

+

Combining this marginal likelihood with the prior in the model block, we can then sample from the marginal posterior \(p(\phi \mid y)\) using one of Stan’s algorithms. The marginal posterior is lower dimensional and likely to have a simpler geometry leading to more efficient inference. On the other hand each marginal likelihood computation is more costly, and the combined change in efficiency depends on the application.

+

To obtain posterior draws for \(\theta\), we sample from the normal approximation to \(p(\theta \mid y, \phi)\) in generated quantities. The process of iteratively sampling from \(p(\phi \mid y)\) (say, with MCMC) and then \(p(\theta \mid y, \phi)\) produces posterior draws from the joint posterior \(p(\theta, \phi \mid y)\).

+

The Laplace approximation is especially useful if \(p(y \mid \phi, \theta)\) as function of \(\theta\) is log-concave, e.g., in case of Poisson, binomial, negative-binomial, and Bernoulli. (The likelihood of normal model is also log concave, however when the likelihood is normal, marginalization can be performed exactly and does not required an approximation.) Stan’s embedded Laplace approximation is restricted to the case where the prior \(p(\theta \mid \phi)\) is multivariate normal. Furthermore, the likelihood \(p(y \mid \phi, \theta)\) must be computed using only operations which support higher-order derivatives (see section specifying the likelihood function).

+

The Laplace approximation can also be useful in generated quantities to marginalize out latent variables even if the sampling had been done using the full joint posterior.

+
+

Approximating the log marginal likelihood \(\log p(y \mid \phi)\)

+

In the model block, we increment target with laplace_marginal, a function that approximates the log marginal likelihood \(\log p(y \mid \phi)\). The signature of the function is:

+ +

+

real laplace_marginal(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)

+

which returns an approximation to the log marginal likelihood \(p(y \mid \phi)\).

+

The embedded Laplace functions accept two functors whose user defined arguments are passed in as tuples to laplace_marginal.

+
    +
  1. likelihood_function - user-specified log likelihood whose first argument is the vector of latent Gaussian variables \(\theta\). The subsequent arguments are user defined.
  2. +
+
    +
  • real likelihood_function(vector theta, likelihood_arguments_1, likelihood_arguments_2, ...).
  • +
+
    +
  1. likelihood_arguments - A tuple of arguments whose internal members are be passed to the log likelihood function. This tuple does NOT include the latent variable \(\theta\).
  2. +
  3. hessian_block_size - the block size of the Hessian of the log likelihood, \(\partial^2 \log p(y \mid \theta, \phi) / \partial \theta^2\).
  4. +
  5. covariance_function - A function that returns the covariance matrix of the multivariate normal prior on \(\theta\).
  6. +
+
    +
  • matrix covariance_function(covariance_argument_1, covariance_argument_2, ...).
  • +
+
    +
  1. covariance_arguments A tuple of the arguments whose internal members will be passed to the the covariance function.
  2. +
+Available since 2.39 +

Below we go over each argument in more detail.

+
+
+

Specifying the log likelihood function

+

The first step to use the embedded Laplace approximation is to write down a function in the functions block which returns the log likelihood \(\log p(y \mid \theta, \phi)\).

+

There are a few constraints on this function:

+
    +
  1. The function return type must be real.

  2. +
  3. The first argument must be the latent Gaussian variable \(\theta\) and must have type vector.

  4. +
  5. The operations in the function must support higher-order automatic differentiation (AD). Most functions in Stan support higher-order AD. The exceptions are functions with specialized calls for reverse-mode AD, and these are higher-order functions (algebraic solvers, differential equation solvers, and integrators), the marginalization function for hidden Markov models (HMM) function, and the embedded Laplace approximation itself.

  6. +
+

The base signature of the function is

+
real likelihood_function(vector theta, ...)
+

The ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter.

+

The tuple after likelihood_function contains the arguments that get passed to likelihood_function excluding \(\theta\). For instance, if a user defined likelihood uses a real and a matrix, the likelihood function’s signature would first have a vector and then a real and matrix argument.

+
real likelihood_fun(vector theta, real a, matrix X)
+

The call to the laplace marginal would start with this likelihood and tuple holding the other likelihood arguments. We do not need to pass theta, since it is marginalized out and therefore does not appear explicitly as a passed parameter.

+
real val = laplace_marginal(likelihood_fun, (a, X), hessian_block_size, ...);
+

If the likelihood_function has only one argument, the tuple syntax is (a, ).

+

As always, users should use parameter arguments only when necessary in order to speed up differentiation. In general, we recommend marking data only arguments with the keyword data, for example,

+
real likelihood_function(vector theta, data vector x, ...)
+

In addition to the likelihood function, users must specify the block size of the Hessian, \(\partial^2 \log p(y \mid \theta, \phi) / \partial \theta^2\). The Hessian is often block diagonal and this structure can be taken advantage of for fast computation. For example, if \(y_i\) only depends on \(\theta_i\), then the Hessian is diagonal and hessian_block_size=1,

+
real val = laplace_marginal(likelihood_fun, (a, X), 1, ...);
+

On the other hand, if the Hessian is not block diagonal, we can always set hessian_block_size=n where \(n\) is the size of \(\theta\).

+
+
+

Specifying the covariance function

+

The argument covariance_function returns the prior covariance matrix \(K\). The signature for this function is the same as a standard stan function. It’s return type must be a matrix of size \(n \times n\) where \(n\) is the size of \(\theta\).

+
matrix covariance_function(...)
+

The ... represents a set of optional variadic arguments. There is no type restrictions for the variadic arguments ... and each argument can be passed as data or parameter. The variables \(\phi\) is implicitly defined as the collection of all non-data arguments passed to likelihood_function (excluding \(\theta\)) and covariance_function.

+

The tuple after covariance_function contains the arguments that get passed to covariance_function. For instance, if a user defined covariance function uses two vectors

+
matrix cov_fun(real b, matrix Z)
+

the call to the Laplace marginal would include the covariance function and a tuple holding the covariance function arguments.

+
real val = laplace_marginal(likelihood_fun, (a, X), cov_fun, (b, Z), ...);
+

If the covariance_function has only one argument, the tuple syntax is (b, ).

+
+
+

Control parameters

+

It also possible to specify control parameters, which can help improve the optimization that underlies the Laplace approximation, using laplace_marginal_tol with the following signature:

+ +

+

real laplace_marginal_tol(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances)

+

The final argument, tolerances, is a tuple with the following elements

+
tuple(vector theta_init, real tol, int max_steps, int solver,
+      int max_steps_linesearch, int allow_fallback)
+

Returns an approximation to the log marginal likelihood \(p(y \mid \phi)\) and allows the user to tune the control parameters of the approximation.

+
    +
  • theta_init: the initial guess for a Newton solver when finding the mode of \(p(\theta \mid y, \phi)\). By default, it is a zero-vector.

  • +
  • tol: the tolerance \(\epsilon\) of the optimizer. Specifically, the optimizer stops when \(||\nabla \log p(\theta \mid y, \phi)|| \le \epsilon\). By default, the value is \(\epsilon \approx 1.49 \times 10^{-8}\), which is the square-root of machine precision.

  • +
  • max_num_steps: the maximum number of steps taken by the optimizer before it gives up (in which case the Metropolis proposal gets rejected). The default is 500 steps.

  • +
  • solver: choice of Newton solver. The optimizer underlying the Laplace approximation does one of three matrix decompositions to compute a Newton step. The problem determines which decomposition is numerically stable. By default (solver=1), the solver attempts a Cholesky decomposition of the negative Hessian of the log likelihood, \(- \partial^2 \log p(y \mid \theta, \phi) / \partial^2 \theta\). This operation is legal if the negative Hessian is positive-definite, which will always be true when the likelihood as function of \(\theta\) is log concave. If solver=2, the solver makes a Cholesky decomposition of the covariance matrix \(K(\phi)\). Since a covariance matrix is always positive-definite, computing its Cholesky decomposition is always a legal operation, at least in theory. In practice, we may not be able to compute the Cholesky decomposition of the negative Hessian nor of the covariance matrix, either because it does not exist or because of numerical issues. In that case, we can use solver=3 which uses a more expensive but less specialized approach to compute a Newton step.

  • +
  • max_steps_linesearch: maximum number of steps in linesearch. The linesearch adjusts to step size to ensure that a Newton step leads to an increase in the objective function (i.e., \(f(\theta) = p(\theta \mid \phi, y)\)). If a standard Newton step does not improve the objective function, the step is adjusted iteratively until the objective function increases or the maximum number of steps in the linesearch is reached. By default, max_steps_linesearch=1000. Setting max_steps_linesearch=0 results in no linesearch.

  • +
  • allow_fallback: If user set solver fails, this flag determines whether to fallback to the next solver. For example, if the user specifies solver=1 but the Cholesky decomposition of the negative Hessian \(- \partial^2 \log p(y \mid \theta, \phi) / \partial^2 \theta\) fails, the optimizer will try solver=2 instead. By default, allow_fallback = 1 (TRUE).

  • +
+Available since 2.39 +

The embedded Laplace approximation’s options have a helper callable generate_laplace_options(int theta_size) that will generate the tuple for the user. This can be useful for quickly setting up the control parameters in the transformed data block to reuse within the model.

+
tuple(vector[theta_size], real, int, int, int, int, int) laplace_ops = generate_laplace_options(theta_size);
+// Modify solver type
+laplace_ops.5 = 2;
+// Turn off fallthrough
+laplace_ops.7 = 0;
+ +

+

tuple(vector, real, int, int, int, int) generate_laplace_options(int dimension)

+

Create a default laplace options tuple for a theta_init of size dimension.

+Available since 2.39 + +

+

tuple(vector, real, int, int, int, int) generate_laplace_options(vector theta_init)

+

Create a default Laplace options tuple containing theta_init.

+Available since 2.39 +
+
+

Sample from the approximate conditional \(\hat{p}(\theta \mid y, \phi)\)

+

In generated quantities, it is possible to sample from the Laplace approximation of \(p(\theta \mid \phi, y)\) using laplace_latent_rng. The signature for laplace_latent_rng follows closely the signature for laplace_marginal:

+ +

+

vector laplace_latent_rng(function likelihood_function, tuple(...) likelihood_arguments, int hessian_block_size, function covariance_function, tuple(...) covariance_arguments)

+

Samples from the Laplace approximation to the conditional posterior \(p(\theta \mid y, \phi)\).

+Available since 2.39 +

Once again, it is possible to specify control parameters:

+ +

+

vector laplace_latent_tol_rng(function likelihood_function, tuple(...), int hessian_block_size, function covariance_function, tuple(...), tuple(vector, real, int, int, int, int) tolerances)
Samples from the approximate conditional posterior \(p(\theta \mid y, \phi)\) and allows the user to tune the control parameters of the approximation.

+Available since 2.39 +
+
+

Built-in Laplace marginal likelihood functions

+

Stan provides convenient wrappers for the embedded Laplace approximation when applied to latent Gaussian models with certain likelihoods arising from some common data models. With this wrapper, the likelihood is pre-specified and does not need to be specified by the user. The selection of supported likelihoods is currently narrow and expected to grow. The wrappers exist for the user’s convenience but are not more computationally efficient than specifying log likelihoods in the functions block.

+ + + +
+
+ + Back to top
+ + +
+ + + + + + \ No newline at end of file diff --git a/docs/functions-reference/functions_index.html b/docs/functions-reference/functions_index.html index b89c87236..cf99a8514 100644 --- a/docs/functions-reference/functions_index.html +++ b/docs/functions-reference/functions_index.html @@ -224,7 +224,7 @@ + @@ -506,6 +512,7 @@

On this page

  • U
  • V
  • W
  • +
  • Y
  • Z
  • @@ -2322,6 +2329,15 @@

    G

    (matrix A) : matrix (matrix_operations.html) +

    generate_laplace_options:

    +

    get_imag:

    • @@ -2880,6 +2896,174 @@

      L

      (T x) : R (real-valued_basic_functions.html)
    +

    laplace_latent_bernoulli_logit_rng:

    + +

    laplace_latent_neg_binomial_2_log_rng:

    + +

    laplace_latent_poisson_log_rng:

    + +

    laplace_latent_rng:

    + +

    laplace_latent_rng_tol:

    + +

    laplace_latent_tol_bernoulli_logit_rng:

    + +

    laplace_latent_tol_neg_binomial_2_log_rng:

    + +

    laplace_latent_tol_poisson_log_rng:

    + +

    laplace_marginal:

    + +

    laplace_marginal_bernoulli_logit:

    + +

    laplace_marginal_bernoulli_logit_lpmf:

    + +

    laplace_marginal_bernoulli_logit_lupmf:

    + +

    laplace_marginal_neg_binomial_2_log:

    + +

    laplace_marginal_neg_binomial_2_log_lpmf:

    + +

    laplace_marginal_neg_binomial_2_log_lupmf:

    + +

    laplace_marginal_poisson_log:

    + +

    laplace_marginal_poisson_log_lpmf:

    + +

    laplace_marginal_poisson_log_lupmf:

    + +

    laplace_marginal_tol:

    + +

    laplace_marginal_tol_bernoulli_logit:

    + +

    laplace_marginal_tol_bernoulli_logit_lpmf:

    + +

    laplace_marginal_tol_bernoulli_logit_lupmf:

    + +

    laplace_marginal_tol_neg_binomial_2_log:

    + +

    laplace_marginal_tol_neg_binomial_2_log_lpmf:

    + +

    laplace_marginal_tol_neg_binomial_2_log_lupmf:

    + +

    laplace_marginal_tol_poisson_log:

    + +

    laplace_marginal_tol_poisson_log_lpmf:

    + +

    laplace_marginal_tol_poisson_log_lupmf:

    +

    lbeta:

    +

    trace_dot:

    +

    trace_gen_quad_form:

    • @@ -6784,6 +6974,51 @@

      W

    +
    +

    Y

    +

    yule_simon:

    + +

    yule_simon_cdf:

    + +

    yule_simon_lccdf:

    + +

    yule_simon_lcdf:

    + +

    yule_simon_lpmf:

    + +

    yule_simon_lupmf:

    + +

    yule_simon_rng:

    + +

    Z

    zeros_array:

    @@ -7387,8 +7622,35 @@

    Z

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/hidden_markov_models.html b/docs/functions-reference/hidden_markov_models.html index 639e59fb4..4b055c0fc 100644 --- a/docs/functions-reference/hidden_markov_models.html +++ b/docs/functions-reference/hidden_markov_models.html @@ -30,7 +30,7 @@ - + @@ -255,7 +255,7 @@ + @@ -1132,14 +1138,41 @@

    Stan functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/higher-order_functions.html b/docs/functions-reference/higher-order_functions.html index d37d0bdb1..1b73b613a 100644 --- a/docs/functions-reference/higher-order_functions.html +++ b/docs/functions-reference/higher-order_functions.html @@ -309,7 +309,7 @@ + @@ -1630,8 +1636,35 @@

    Rectangular map

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/index.html b/docs/functions-reference/index.html index 273f5b5b9..820f6d50b 100644 --- a/docs/functions-reference/index.html +++ b/docs/functions-reference/index.html @@ -74,7 +74,7 @@ - + @@ -225,7 +225,7 @@ + @@ -487,7 +493,7 @@

    Stan Functions Reference

    -

    Version 2.38

    +

    Version 2.39

    @@ -511,7 +517,7 @@

    Stan Functions Reference

  • the Stan User’s Guide. The Stan user’s guide provides example models and programming techniques for coding statistical models in Stan. It also serves as an example-driven introduction to Bayesian modeling and inference:

  • the Stan Reference Manual. Stan’s modeling language is shared across all of its interfaces. The Stan Language Reference Manual provides a concise definition of the language syntax for all elements in the language together with an overview of the inference algorithms and posterior inference tools.

  • -

    Download the pdf version of this manual.

    +

    Download the pdf version of this manual.

    @@ -2637,8 +2647,35 @@

    Reverse functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/mixed_operations.html b/docs/functions-reference/mixed_operations.html index 8101005d9..af4d23e2a 100644 --- a/docs/functions-reference/mixed_operations.html +++ b/docs/functions-reference/mixed_operations.html @@ -255,7 +255,7 @@ + @@ -1338,8 +1344,35 @@

    Mixed Operations

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/multivariate_discrete_distributions.html b/docs/functions-reference/multivariate_discrete_distributions.html index 348a6c101..007d82fcc 100644 --- a/docs/functions-reference/multivariate_discrete_distributions.html +++ b/docs/functions-reference/multivariate_discrete_distributions.html @@ -255,7 +255,7 @@ + @@ -1233,8 +1239,35 @@

    Stan functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/positive_continuous_distributions.html b/docs/functions-reference/positive_continuous_distributions.html index c29d8e03e..6c27f5814 100644 --- a/docs/functions-reference/positive_continuous_distributions.html +++ b/docs/functions-reference/positive_continuous_distributions.html @@ -255,7 +255,7 @@ + @@ -1659,8 +1665,35 @@

    Stan functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/positive_lower-bounded_distributions.html b/docs/functions-reference/positive_lower-bounded_distributions.html index c658d57df..aa3c8aca2 100644 --- a/docs/functions-reference/positive_lower-bounded_distributions.html +++ b/docs/functions-reference/positive_lower-bounded_distributions.html @@ -275,7 +275,7 @@ + @@ -1412,8 +1418,35 @@

    Tolerance tuning

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/real-valued_basic_functions.html b/docs/functions-reference/real-valued_basic_functions.html index b46001086..96f0b6ebe 100644 --- a/docs/functions-reference/real-valued_basic_functions.html +++ b/docs/functions-reference/real-valued_basic_functions.html @@ -309,7 +309,7 @@ + @@ -2220,8 +2226,35 @@

    Hypergeometric Fu + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/references.html b/docs/functions-reference/references.html index 4458bfda0..8c2cd1b2a 100644 --- a/docs/functions-reference/references.html +++ b/docs/functions-reference/references.html @@ -787,8 +787,35 @@

    Re }); + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/removed_functions.html b/docs/functions-reference/removed_functions.html index 041785972..869196b5c 100644 --- a/docs/functions-reference/removed_functions.html +++ b/docs/functions-reference/removed_functions.html @@ -255,7 +255,7 @@ + @@ -1169,8 +1175,35 @@

    - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/simplex_distributions.html b/docs/functions-reference/simplex_distributions.html index bd3e01b16..8d7aafd24 100644 --- a/docs/functions-reference/simplex_distributions.html +++ b/docs/functions-reference/simplex_distributions.html @@ -289,7 +289,7 @@ + @@ -1216,8 +1222,35 @@

    Stan functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/sparse_matrix_operations.html b/docs/functions-reference/sparse_matrix_operations.html index 25534a49a..e7596edd0 100644 --- a/docs/functions-reference/sparse_matrix_operations.html +++ b/docs/functions-reference/sparse_matrix_operations.html @@ -255,7 +255,7 @@ + @@ -1177,8 +1183,35 @@

    Sparse matrix + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/transform_functions.html b/docs/functions-reference/transform_functions.html index 43ab4cbc9..7311ee935 100644 --- a/docs/functions-reference/transform_functions.html +++ b/docs/functions-reference/transform_functions.html @@ -255,7 +255,7 @@ + @@ -1429,8 +1435,35 @@

    Sum-to-zero matrices< + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/unbounded_continuous_distributions.html b/docs/functions-reference/unbounded_continuous_distributions.html index b08557e47..807dfed4c 100644 --- a/docs/functions-reference/unbounded_continuous_distributions.html +++ b/docs/functions-reference/unbounded_continuous_distributions.html @@ -275,7 +275,7 @@ + @@ -1765,8 +1771,35 @@

    Stan functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/unbounded_discrete_distributions.html b/docs/functions-reference/unbounded_discrete_distributions.html index a8a99e58b..d06a2783e 100644 --- a/docs/functions-reference/unbounded_discrete_distributions.html +++ b/docs/functions-reference/unbounded_discrete_distributions.html @@ -275,7 +275,7 @@ + @@ -583,6 +589,12 @@

    On this page

  • Distribution statement
  • Stan functions
  • +
  • Yule Simon distribution +
  • @@ -986,6 +998,50 @@

    Stan functions

    R beta_neg_binomial_rng(reals r, reals alpha, reals beta)
    Generate a beta negative binomial variate with parameters r, alpha and beta; may only be used in transformed data and generated quantities blocks. r \(\cdot\) beta \(/\) (alpha\(-1\)) must be less than \(2 ^ {29}\). For a description of argument and return types, see section vectorized function signatures.

    Available since 2.36 +

    + +
    +

    Yule Simon distribution

    +
    +

    Probability mass function

    +

    If \(\alpha \in \mathbb{R}^+\), then for \(n \in \mathbb{N}^+=\{1,2,...\}\), \[\begin{equation*} +\text{YuleSimon}(n|\alpha) = \alpha \, \mathrm{B}(\alpha + 1, n) = \alpha \, \frac{\Gamma(n) \, \Gamma(\alpha + 1)}{\Gamma(n + \alpha + 1)}. +\end{equation*}\]

    +
    +
    +

    Distribution statement

    +

    n ~ yule_simon(alpha)

    +

    Increment target log probability density with yule_simon_lupmf(n | alpha).

    +Available since 2.39 + +

    +
    +
    +

    Stan functions

    + +

    +

    real yule_simon_lpmf(ints n | reals alpha)
    The log Yule Simon probability mass of n given parameter alpha.

    +Available since 2.39 + +

    +

    real yule_simon_lupmf(ints n | reals alpha)
    The log Yule Simon probability mass of n given parameter alpha dropping constant additive terms.

    +Available since 2.39 + +

    +

    real yule_simon_cdf(ints n | reals alpha)
    The Yule Simon cumulative distribution function of n given parameter alpha.

    +Available since 2.39 + +

    +

    real yule_simon_lcdf(ints n | reals alpha)
    The log of the Yule Simon cumulative distribution function of n given parameter alpha.

    +Available since 2.39 + +

    +

    real yule_simon_lccdf(ints n | reals alpha)
    The log of the Yule Simon complementary cumulative distribution function of n given parameter alpha.

    +Available since 2.39 + +

    +

    R yule_simon_rng(reals alpha)
    Generate a Yule Simon variate with parameter alpha; may only be used in transformed data and generated quantities blocks. alpha \(/\) (alpha\(-1\)) must be less than \(2 ^ {29}\). For a description of argument and return types, see section vectorized function signatures.

    +Available since 2.39 @@ -1572,8 +1628,35 @@

    Stan functions

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/functions-reference/void_functions.html b/docs/functions-reference/void_functions.html index 5752c5b39..3cf06665a 100644 --- a/docs/functions-reference/void_functions.html +++ b/docs/functions-reference/void_functions.html @@ -226,7 +226,7 @@ + @@ -1108,8 +1114,35 @@

    Fatal error statemen + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 8c2fee151..5ba34cc12 100644 --- a/docs/index.html +++ b/docs/index.html @@ -74,7 +74,7 @@ - + @@ -213,7 +213,7 @@

    Stan Documentation

    -

    Version 2.38

    +

    Version 2.39

    @@ -239,14 +239,14 @@

    Stan Documentation

    This is the official documentation for Stan.

    -

    There are also separate installation and getting started guides for CmdStan (pdf), the command-line interface to the Stan inference engine, and the R, Python, and Julia interfaces.

    +

    There are also separate installation and getting started guides for CmdStan (pdf), the command-line interface to the Stan inference engine, and the R, Python, and Julia interfaces.

    Older Versions

    -

    This documentation is for Stan 2.38. Older versions of each of the documents linked above can be found in the table below:

    +

    This documentation is for Stan 2.39. Older versions of each of the documents linked above can be found in the table below:

    @@ -259,146 +259,153 @@

    Older Versions

    + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1088,8 +1095,35 @@

    Licensing

    }); + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/reference-manual/analysis.html b/docs/reference-manual/analysis.html index 2831fdcf7..70de8e8bb 100644 --- a/docs/reference-manual/analysis.html +++ b/docs/reference-manual/analysis.html @@ -275,7 +275,7 @@ + + + + + + + + + + + + +
    2.39html pdfhtml pdfhtml pdfhtml pdf
    2.38 html pdf html pdf html pdf html pdf
    2.37 html pdf html pdf html pdf html pdf
    2.36 html pdf html pdf html pdf html pdf
    2.35 html pdf html pdf html pdf html pdf
    2.34 html pdf html pdf html pdf html pdf
    2.33 html pdf html pdf html pdf html pdf
    2.32 html pdf html pdf html pdf html pdf
    2.31 html pdf html pdf html pdf html pdf
    2.30 html pdf html pdf html pdf html pdf
    2.29 html pdf html pdf html pdf html pdf
    2.28 html pdf html pdf html pdf html pdf
    2.27 html pdf html pdf html pdf html pdf
    2.26 html pdf html pdf html pdf html pdf
    2.25 html pdf html pdf html pdf html pdf
    2.24 html pdf html pdf html pdf html pdf
    2.23 html pdf html pdf html html pdf
    2.22 html pdf html pdf html pdf
    2.21 html pdf html pdf html pdf
    2.20 html pdf html pdf html pdf
    2.19 html pdf html pdf html pdf
    2.18 html pdf html pdf
    @@ -1511,8 +1517,35 @@

    - \ No newline at end of file + \ No newline at end of file diff --git a/docs/reference-manual/optimization.html b/docs/reference-manual/optimization.html index d0542e3ed..074423a20 100644 --- a/docs/reference-manual/optimization.html +++ b/docs/reference-manual/optimization.html @@ -309,7 +309,7 @@ + + + + + + + +

    @@ -1654,24 +1662,24 @@

    int N = 5;
    +
    int N = 5;

    declares the variable N to be an integer scalar type and at the same time defines it to be the value of the expression 5.

    Assignment typing

    The type of the expression on the right-hand side of the assignment must be assignable to the type of the variable being declared. For example, it is legal to have

    -
    real sum = 0;
    +
    real sum = 0;

    even though 0 is of type int and sum is of type real, because integer-typed scalar expressions can be assigned to real-valued scalar variables. In all other cases, the type of the expression on the right-hand side of the assignment must be identical to the type of the variable being declared.

    Variables of any type may have values assigned to them. For example,

    -
    matrix[3, 2] a = b;
    +
    matrix[3, 2] a = b;

    declares a \(3 \times 2\) matrix variable a and assigns a copy of the value of b to the variable a. The variable b must be of type matrix for the statement to be well formed. For the code to execute successfully, b must be the same shape as a, but this cannot be validated until run time. Because a copy is assigned, subsequent changes to a do not affect b and subsequent changes to b do not affect a.

    Right-hand side expressions

    The right-hand side may be any expression which has a type which is assignable to the variable being declared. For example,

    -
    matrix[3, 2] a = 0.5 * (b + c);
    +
    matrix[3, 2] a = 0.5 * (b + c);

    assigns the matrix variable a to half of the sum of b and c. The only requirement on b and c is that the expression b + c be of type matrix. For example, b could be of type matrix and c of type real, because adding a matrix to a scalar produces a matrix, and the multiplying by a scalar produces another matrix.

    Similarly,

    -
    complex z = 2 + 3i;
    +
    complex z = 2 + 3i;

    assigns the the complex number \(2 + 3i\) to the complex scalar z. The right-hand side expression can be a call to a user defined function, allowing general algorithms to be applied that might not be otherwise expressible as simple expressions (e.g., iterative or recursive algorithms).

    @@ -1685,18 +1693,18 @@

    Decla

    Types for multiple declarations

    The code:

    -
    real x, y;
    +
    real x, y;

    is equivalent to

    -
    real x;
    -real y;
    +
    real x;
    +real y;

    As a result, all declarations on the same line must be of the same type.

    Combining with other features

    The ability to declare multiple variables can be combined with assignments whenever a declare-define is valid, as documented in the section introducing compound declarations and definitions :

    -
    real x = 3, y = 5.6;
    +
    real x = 3, y = 5.6;

    Constrained data types can also be declared together, so long as the constraint for each variable is the same:

    -
    real<lower=0> x, y;
    +
    real<lower=0> x, y;
    @@ -2284,8 +2292,35 @@

    C + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/reference-manual/user-functions.html b/docs/reference-manual/user-functions.html index a9b5cb0f0..2c1e2cbc5 100644 --- a/docs/reference-manual/user-functions.html +++ b/docs/reference-manual/user-functions.html @@ -260,7 +260,7 @@ + + + + @@ -519,7 +525,7 @@ + @@ -1296,8 +1308,35 @@

    Maximum + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/bootstrap.html b/docs/stan-users-guide/bootstrap.html index 1140f65ac..041801f23 100644 --- a/docs/stan-users-guide/bootstrap.html +++ b/docs/stan-users-guide/bootstrap.html @@ -84,7 +84,7 @@ - + @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1311,14 +1323,41 @@

    Bayesian bo + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/clustering.html b/docs/stan-users-guide/clustering.html index bb3a43bfb..c69c45ec7 100644 --- a/docs/stan-users-guide/clustering.html +++ b/docs/stan-users-guide/clustering.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1609,8 +1621,35 @@

    - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/complex-numbers.html b/docs/stan-users-guide/complex-numbers.html index 5ab464f1a..09e110b52 100644 --- a/docs/stan-users-guide/complex-numbers.html +++ b/docs/stan-users-guide/complex-numbers.html @@ -289,7 +289,7 @@ + @@ -499,7 +505,7 @@ + @@ -1304,8 +1316,35 @@

    Depende + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/copulas.html b/docs/stan-users-guide/copulas.html index bbd72e525..b90c1957d 100644 --- a/docs/stan-users-guide/copulas.html +++ b/docs/stan-users-guide/copulas.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1395,8 +1407,35 @@

    Further reading/vie + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/cross-validation.html b/docs/stan-users-guide/cross-validation.html index 78393efbe..c2c63070e 100644 --- a/docs/stan-users-guide/cross-validation.html +++ b/docs/stan-users-guide/cross-validation.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1474,8 +1486,35 @@

    Ap + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/custom-probability.html b/docs/stan-users-guide/custom-probability.html index 8e724a256..dcd0d5da8 100644 --- a/docs/stan-users-guide/custom-probability.html +++ b/docs/stan-users-guide/custom-probability.html @@ -289,7 +289,7 @@ + @@ -499,7 +505,7 @@ + @@ -1280,8 +1292,35 @@

    - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/dae.html b/docs/stan-users-guide/dae.html index 665b22ee7..6c2b59173 100644 --- a/docs/stan-users-guide/dae.html +++ b/docs/stan-users-guide/dae.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1312,8 +1324,35 @@

    Maximum + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/decision-analysis.html b/docs/stan-users-guide/decision-analysis.html index 918b21040..dd3672d7c 100644 --- a/docs/stan-users-guide/decision-analysis.html +++ b/docs/stan-users-guide/decision-analysis.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1326,8 +1338,35 @@

    Continuous choices

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/efficiency-tuning.html b/docs/stan-users-guide/efficiency-tuning.html index df3e37a7e..f89182f93 100644 --- a/docs/stan-users-guide/efficiency-tuning.html +++ b/docs/stan-users-guide/efficiency-tuning.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -2082,8 +2094,35 @@

    Using map-reduce

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/finite-mixtures.html b/docs/stan-users-guide/finite-mixtures.html index ea76dd5bb..b3a240090 100644 --- a/docs/stan-users-guide/finite-mixtures.html +++ b/docs/stan-users-guide/finite-mixtures.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1613,8 +1625,35 @@

    C + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/floating-point.html b/docs/stan-users-guide/floating-point.html index 4261cedd6..ceb78bfdf 100644 --- a/docs/stan-users-guide/floating-point.html +++ b/docs/stan-users-guide/floating-point.html @@ -31,7 +31,7 @@ - + @@ -255,7 +255,7 @@ + @@ -465,7 +471,7 @@ + @@ -1330,8 +1342,8 @@

    Comparing + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/for-bugs-users.html b/docs/stan-users-guide/for-bugs-users.html index ff54338ba..52a424609 100644 --- a/docs/stan-users-guide/for-bugs-users.html +++ b/docs/stan-users-guide/for-bugs-users.html @@ -308,7 +308,7 @@ + @@ -518,7 +524,7 @@ + @@ -1404,8 +1416,35 @@

    The Stan community

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/gaussian-processes.html b/docs/stan-users-guide/gaussian-processes.html index 7b794b927..4f53b356e 100644 --- a/docs/stan-users-guide/gaussian-processes.html +++ b/docs/stan-users-guide/gaussian-processes.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -625,8 +637,7 @@

    Gaussian Processes

    Gaussian processes are continuous stochastic processes and thus may be interpreted as providing a probability distribution over functions. A probability distribution over continuous functions may be viewed, roughly, as an uncountably infinite collection of random variables, one for each valid input. The generality of the supported functions makes Gaussian priors popular choices for priors in general multivariate (non-linear) regression problems.

    The defining feature of a Gaussian process is that the joint distribution of the function’s value at a finite number of input points is a multivariate normal distribution. This makes it tractable to both fit models from finite amounts of observed data and make predictions for finitely many new data points.

    Unlike a simple multivariate normal distribution, which is parameterized by a mean vector and covariance matrix, a Gaussian process is parameterized by a mean function and covariance function. The mean and covariance functions apply to vectors of inputs and return a mean vector and covariance matrix which provide the mean and covariance of the outputs corresponding to those input points in the functions drawn from the process.

    -

    Gaussian processes can be encoded in Stan by implementing their mean and covariance functions or by using the specialized covariance functions outlined below, and plugging the result into the Gaussian model.
    -This form of model is straightforward and may be used for simulation, model fitting, or posterior predictive inference. A more efficient Stan implementation for the GP with a normally distributed outcome marginalizes over the latent Gaussian process, and applies a Cholesky-factor reparameterization of the Gaussian to compute the likelihood and the posterior predictive distribution analytically.

    +

    Gaussian processes can be encoded in Stan by implementing their mean and covariance functions or by using the specialized covariance functions outlined below, and plugging the result into the Gaussian model. This form of model is straightforward and may be used for simulation, model fitting, or posterior predictive inference. A more efficient Stan implementation for the GP with a normally distributed outcome marginalizes over the latent Gaussian process, and applies a Cholesky-factor reparameterization of the Gaussian to compute the likelihood and the posterior predictive distribution analytically.

    After defining Gaussian processes, this chapter covers the basic implementations for simulation, hyperparameter estimation, and posterior predictive inference for univariate regressions, multivariate regressions, and multivariate logistic regressions. Gaussian processes are general, and by necessity this chapter only touches on some basic models. For more information, see Rasmussen and Williams (2006).

    Note that fitting Gaussian processes as described below using exact inference by computing Cholesky of the covariance matrix scales cubicly with the size of data. Due to how Stan autodiff is implemented, Stan is also slower than Gaussian process specialized software. It is likely that Gaussian processes using exact inference by computing Cholesky of the covariance matrix with \(N>1000\) are too slow for practical purposes in Stan. There are many approximations to speed-up Gaussian process computation, from which the basis function approaches for 1-3 dimensional \(x\) are easiest to implement in Stan (see, e.g., Riutort-Mayol et al. (2023)).

    @@ -866,17 +877,128 @@

    Poisson GP

    vector[N] eta; } model { - // ... - rho ~ inv_gamma(5, 5); - alpha ~ std_normal(); - a ~ std_normal(); - eta ~ std_normal(); - - y ~ poisson_log(a + f); -}
    + vector[N] f; + { + matrix[N, N] L_K; + matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho); + + // diagonal elements + for (n in 1:N) { + K[n, n] = K[n, n] + delta; + } + + L_K = cholesky_decompose(K); + f = L_K * eta; + } + rho ~ inv_gamma(5, 5); + alpha ~ std_normal(); + a ~ std_normal(); + eta ~ std_normal(); + + y ~ poisson_log(a + f); +} +
    +
    +

    Poisson GP using an embedded Laplace approximation

    +

    For computational reasons, we may want to integrate out the latent variable \(f\), as was done in the normal output model. Unfortunately, exact marginalization over \(f\) is not possible when the outcome model is not normal. Instead, we may perform approximate marginalization with an embedded Laplace approximation (Rasmussen and Williams 2006; Rue, Martino, and Chopin 2009; Margossian et al. 2020). To do so, we first use the function laplace_marginal to approximate the marginal likelihood \(p(y \mid \rho, \alpha, a)\) and sample the hyperparameters with Hamiltonian Monte Carlo sampling. Then, we recover the integrated out \(f\) in the generated quantities block using laplace_latent_rng.

    +

    The embedded Laplace approximation computes a Gaussian approximation of the conditional posterior, \[ + \hat p_\mathcal{L}(f \mid \rho, \alpha, a, y) \approx p(f \mid \rho, \alpha, a, y), +\] where \(\hat p_\mathcal{L}\) is a Gaussian that matches the mode and curvature of \(p(f \mid \rho, \alpha, a, y)\). We then obtain an approximation of the marginal likelihood as follows: \[ + \hat p_\mathcal{L}(y \mid \rho, \alpha, a) + = \frac{p(f^* \mid \alpha, \rho) p(y \mid f^*, a)}{ + \hat p_\mathcal{L}(f^* \mid \rho, \alpha, a, y)}, +\] where \(f^*\) is the mode of \(p(f \mid \rho, \alpha, a, y)\), obtained via numerical optimization.

    +

    To use Stan’s embedded Laplace approximation, we must define the prior covariance function and the log likelihood function in the functions block.

    +
    functions {
    +  // log likelihood function
    +  real ll_function(vector f, real a, array[] int y) {
    +      return poisson_log_lpmf(y | a + f);
    +  }
    +
    +  // covariance function
    +  matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {
    +    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    +    return add_diag(K, delta)
    +  }
    +
    +}
    +

    The embedded Laplace relies on calculations of the log likelihood’s Hessian, \(\partial^2 \log p(y \mid f, a, \rho, \alpha) / \partial f^2\), and these calculations can be much faster when the Hessian is sparse. In particular, it is expected that the Hessian is block diagonal. In the transformed data block we can specify the block size of the Hessian.

    +
    transformed data {
    +  int hessian_block_size = 1;
    +}
    +

    For example, if \(y_i\) depends only on \(f_i\), then the Hessian of the log likelihood is diagonal and the block size is 1. On the other hand, if the Hessian is not sparse, then we set the hessian block size to \(N\), where \(N\) is the dimension of \(f\). Currently, Stan does not check the block size of the Hessian and so the user is responsible for correctly specifying the block size.

    +

    Finally, we increment target in the model block with the approximation to \(\log p(y \mid \rho, \alpha, a)\).

    +
    model {
    +  rho ~ inv_gamma(5, 5);
    +  alpha ~ std_normal();
    +  sigma ~ std_normal();
    +
    +  target += laplace_marginal(ll_function, (a, y), hessian_block_size,
    +                             cov_function, (rho, alpha, x, N, delta));
    +}
    +

    Notice that we do not need to construct \(f\) explicitly, since it is marginalized out. Instead, we can recover the latent variables in generated quantities:

    +
    generated quantities {
    +  vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,
    +                                   cov_function, (rho, alpha, x, N, delta));
    +}
    +

    Users can set the control parameters of the embedded Laplace approximation, via laplace_marginal_tol and laplace_latent_tol_rng. When using these functions, the user must set all the control options and store them in a tuple. These control parameters mostly concern the numerical optimizer used to find the mode \(f^*\) of \(p(f \mid \rho, \alpha, a)\).

    +
    transformed data {
    +  tuple(vector[N], real, int, int, int, int) laplace_ops;
    +  laplace_ops.1 = rep_vector(0, N);  // starting point for Laplace optimizer
    +  laplace_ops.2 = 1.49e-8;           // tolerance for optimizer
    +  laplace_ops.3 = 500;               // maximum number of steps for optimizer.
    +  laplace_ops.4 = 1;              // solver type being used.
    +  laplace_ops.5 = 1000;           // max number of steps for linesearch.
    +  laplace_ops.6 = 1;              // allow_fallback (1: TRUE, 0: FALSE)
    +

    If users want to depart from the defaults for only some of the control parameters, a tuple with the default values (as above) can be created with the helper callable generate_laplace_options(), and the specific control parameter can then be modified,

    +
    transformed data {
    +  tuple(vector[N], real, int, int, int, int, int) laplace_ops =
    +    generate_laplace_options(N);
    +
    +  laplace_ops.2 = 1e-6; // make tolerance of the optimizer less strict.
    +}
    +

    The tuple laplace_ops is then passed to laplace_marginal_tol and laplace_rng_tol.

    +
    model {
    +// ...
    +
    +  target += laplace_marginal_tol(ll_function, (a, y), hessian_block_size,
    +                                 cov_function, (rho, alpha, x, N, delta),
    +                                 laplace_ops);
    +}
    +
    +generated quantities {
    +  vector[N] f = laplace_latent_rng(ll_function, (a, y), hessian_block_size,
    +                                   cov_function, (rho, alpha, x, N, delta),
    +                                   laplace_ops);
    +}
    +

    Stan also provides support for a limited menu of built-in functions, including the Poisson distribution with a log link and and prior mean \(m\). When using such a built-in function, the user does not need to specify a likelihood in the functions block. However, the user must strictly follow the signature of the likelihood: in this case, \(m\) must be a vector of length \(N\) (to allow for different offsets for each observation \(y_i\)) and we must indicate which element of \(f\) each component of \(y\) matches using the variable \(y_\text{index}\). In our example, there is a simple pairing \((y_i, f_i)\), however we could imagine a scenario where multiple observations \((y_{j1}, y_{j2}, ...)\) are observed for a single \(f_j\).

    +
    transformed data {
    +  // ...
    +  array[n_obs] int y_index;
    +  for (i in 1:n_obs) y_index[i] = i - 1;
    +}
    +
    +// ...
    +
    +transformed parameter {
    +  vector[N] m = rep_vector(a, N);
    +}
    +
    +model {
    +  // ...
    +  target += laplace_marginal_poisson_log_lpmf(y | y_index, m,
    +                                       cov_function, (rho, alpha, x, N, delta));
    +}
    +
    +generated quantities {
    +  vector[N] f = laplace_latent_poisson_log_rng(y, y_index, m,
    +                                   cov_function, (rho, alpha, x, N, delta));
    +}
    +

    As before, we could specify the control parameters for the embedded Laplace approximation using laplace_marginal_tol_poisson_log_lpmf and laplace_latent_tol_poisson_log_nrg.

    +

    Marginalization with a Laplace approximation can lead to faster inference, however it also introduces an approximation error. In practice, this error is negligible when using a Poisson likelihood and the approximation works well for log concave likelihoods (Kuss and Rasmussen 2005; Vanhatalo, Pietiläinen, and Vehtari 2010; Cseke and Heskes 2011; Vehtari et al. 2016). Still, users should exercise caution, especially when trying unconventional likelihoods.

    -
    -

    Logistic Gaussian process regression

    +
    +

    Logistic GP regression

    For binary classification problems, the observed outputs \(z_n \in \{ 0,1 \}\) are binary. These outputs are modeled using a Gaussian process with (unobserved) outputs \(y_n\) through the logistic link, \[ z_n \sim \textsf{Bernoulli}(\operatorname{logit}^{-1}(y_n)), @@ -884,16 +1006,51 @@

    data {
    -  // ...
    -  array[N] int<lower=0, upper=1> z;
    -  // ...
    -}
    -// ...
    -model {
    -  // ...
    -  y ~ bernoulli_logit(a + f);
    -}
    +
    data {
    +  // ...
    +  array[N] int<lower=0, upper=1> z;
    +  // ...
    +}
    +// ...
    +model {
    +  // ...
    +  z ~ bernoulli_logit(a + f);
    +}
    +

    +
    +

    Logistic GP regression with an embedded Laplace approximation

    +

    As with the Poisson GP, we cannot marginalize the latent variables exactly, however we can resort to an embedded Laplace approximation.

    +
    functions {
    +  // log likelihood function
    +  real ll_function(vector f, real a, array[] int z) {
    +      return bernoulli_logit_lpmf(z | a + f);
    +  }
    +
    +  // covariance function
    +  matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) {
    +    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    +    return add_diag(K, delta)
    +  }
    +}
    +
    +// ...
    +
    +transformed data {
    +  int hessian_block_size = 1;
    +}
    +
    +// ...
    +
    +model {
    +  target += laplace_marginal(ll_function, (a, z), hessian_block_size,
    +                             cov_function, (rho, alpha, x, N, delta));
    +}
    +
    +generated quantities {
    +  vector[N] f = laplace_latent_rng(ll_function, (a, z), hessian_block_size,
    +                                   cov_function, (rho, alpha, x, N, delta));
    +}
    +

    While marginalization with a Laplace approximation can lead to faster inference, it also introduces an approximation error. In practice, this error may not be negligible with a Bernoulli likelihood; for more discussion see, e.g. (Vehtari et al. 2016; Margossian et al. 2020).

    @@ -907,40 +1064,40 @@

    \(\rho\) was termed “automatic relevance determination” by Neal (1996), but this is misleading, because the magnitude of the scale of the posterior for each \(\rho_d\) is dependent on the scaling of the input data along dimension \(d\). Moreover, the scale of the parameters \(\rho_d\) measures non-linearity along the \(d\)-th dimension, rather than “relevance” (Piironen and Vehtari 2016).

    A priori, the closer \(\rho_d\) is to zero, the more nonlinear the conditional mean in dimension \(d\) is. A posteriori, the actual dependencies between \(x\) and \(y\) play a role. With one covariate \(x_1\) having a linear effect and another covariate \(x_2\) having a nonlinear effect, it is possible that \(\rho_1 > \rho_2\) even if the predictive relevance of \(x_1\) is higher (Rasmussen and Williams 2006, 80). The collection of \(\rho_d\) (or \(1/\rho_d\)) parameters can also be modeled hierarchically.

    The implementation of automatic relevance determination is a straightforward extension of the one-dimensional case by modifying rho to be an array.

    -
    data {
    -  int<lower=1> N;
    -  int<lower=1> D;
    -  array[N] vector[D] x;
    -  vector[N] y;
    -}
    -transformed data {
    -  real delta = 1e-9;
    -}
    -parameters {
    -  array[D] real<lower=0> rho;
    -  real<lower=0> alpha;
    -  real<lower=0> sigma;
    -  vector[N] eta;
    -}
    -model {
    -  vector[N] f;
    -  {
    -    matrix[N, N] L_K;
    -    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    -    for (n in 1:N) {
    -      K[n, n] = K[n, n] + delta;
    -    }
    -    L_K = cholesky_decompose(K);
    -    f = L_K * eta;
    -  }
    -
    -  rho ~ inv_gamma(5, 5);
    -  alpha ~ std_normal();
    -  sigma ~ std_normal();
    -  eta ~ std_normal();
    -
    -  y ~ normal(f, sigma);
    -}
    +
    data {
    +  int<lower=1> N;
    +  int<lower=1> D;
    +  array[N] vector[D] x;
    +  vector[N] y;
    +}
    +transformed data {
    +  real delta = 1e-9;
    +}
    +parameters {
    +  array[D] real<lower=0> rho;
    +  real<lower=0> alpha;
    +  real<lower=0> sigma;
    +  vector[N] eta;
    +}
    +model {
    +  vector[N] f;
    +  {
    +    matrix[N, N] L_K;
    +    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    +    for (n in 1:N) {
    +      K[n, n] = K[n, n] + delta;
    +    }
    +    L_K = cholesky_decompose(K);
    +    f = L_K * eta;
    +  }
    +
    +  rho ~ inv_gamma(5, 5);
    +  alpha ~ std_normal();
    +  sigma ~ std_normal();
    +  eta ~ std_normal();
    +
    +  y ~ normal(f, sigma);
    +}

    Priors for Gaussian process parameters

    @@ -972,18 +1129,18 @@

    Priors p \in \mathbb{Z} \end{align*}\]

    which has an inverse gamma left tail if \(p \leq 0\) and an inverse Gaussian right tail. This has not yet been implemented in Stan’s math library, but it is possible to implement as a user defined function:

    -
    functions {
    -  real generalized_inverse_gaussian_lpdf(real x, int p,
    -                                        real a, real b) {
    -    return p * 0.5 * log(a / b)
    -      - log(2 * modified_bessel_second_kind(p, sqrt(a * b)))
    -      + (p - 1) * log(x)
    -      - (a * x + b / x) * 0.5;
    - }
    -}
    -data {
    -  // ...
    -}
    +
    functions {
    +  real generalized_inverse_gaussian_lpdf(real x, int p,
    +                                        real a, real b) {
    +    return p * 0.5 * log(a / b)
    +      - log(2 * modified_bessel_second_kind(p, sqrt(a * b)))
    +      + (p - 1) * log(x)
    +      - (a * x + b / x) * 0.5;
    + }
    +}
    +data {
    +  // ...
    +}

    If we have high-frequency covariates in our fixed effects, we may wish to further regularize the GP away from high-frequency functions, which means we’ll need to penalize smaller length-scales. Luckily, we have a useful way of thinking about how length-scale affects the frequency of the functions supported by the GP. If we were to repeatedly draw from a zero-mean GP with a length-scale of \(\rho\) in a fixed-domain \([0,T]\), we would get a distribution for the number of times each draw of the GP crossed the zero axis. The expectation of this random variable, the number of zero crossings, is \(T / \pi \rho\). You can see that as \(\rho\) decreases, the expectation of the number of upcrossings increases as the GP is representing higher-frequency functions. Thus, this is a good statistic to keep in mind when setting a lower-bound for our prior on length-scale in the presence of high-frequency covariates. However, this statistic is only valid for one-dimensional inputs.

    @@ -1004,59 +1161,59 @@

    \(y\) and unobserved \(\tilde{y}\).

    -
    data {
    -  int<lower=1> N1;
    -  array[N1] real x1;
    -  vector[N1] y1;
    -  int<lower=1> N2;
    -  array[N2] real x2;
    -}
    -transformed data {
    -  real delta = 1e-9;
    -  int<lower=1> N = N1 + N2;
    -  array[N] real x;
    -  for (n1 in 1:N1) {
    -    x[n1] = x1[n1];
    -  }
    -  for (n2 in 1:N2) {
    -    x[N1 + n2] = x2[n2];
    -  }
    -}
    -parameters {
    -  real<lower=0> rho;
    -  real<lower=0> alpha;
    -  real<lower=0> sigma;
    -  vector[N] eta;
    -}
    -transformed parameters {
    -  vector[N] f;
    -  {
    -    matrix[N, N] L_K;
    -    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    -
    -    // diagonal elements
    -    for (n in 1:N) {
    -      K[n, n] = K[n, n] + delta;
    -    }
    -
    -    L_K = cholesky_decompose(K);
    -    f = L_K * eta;
    -  }
    -}
    -model {
    -  rho ~ inv_gamma(5, 5);
    -  alpha ~ std_normal();
    -  sigma ~ std_normal();
    -  eta ~ std_normal();
    -
    -  y1 ~ normal(f[1:N1], sigma);
    -}
    -generated quantities {
    -  vector[N2] y2;
    -  for (n2 in 1:N2) {
    -    y2[n2] = normal_rng(f[N1 + n2], sigma);
    -  }
    -}
    +
    data {
    +  int<lower=1> N1;
    +  array[N1] real x1;
    +  vector[N1] y1;
    +  int<lower=1> N2;
    +  array[N2] real x2;
    +}
    +transformed data {
    +  real delta = 1e-9;
    +  int<lower=1> N = N1 + N2;
    +  array[N] real x;
    +  for (n1 in 1:N1) {
    +    x[n1] = x1[n1];
    +  }
    +  for (n2 in 1:N2) {
    +    x[N1 + n2] = x2[n2];
    +  }
    +}
    +parameters {
    +  real<lower=0> rho;
    +  real<lower=0> alpha;
    +  real<lower=0> sigma;
    +  vector[N] eta;
    +}
    +transformed parameters {
    +  vector[N] f;
    +  {
    +    matrix[N, N] L_K;
    +    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    +
    +    // diagonal elements
    +    for (n in 1:N) {
    +      K[n, n] = K[n, n] + delta;
    +    }
    +
    +    L_K = cholesky_decompose(K);
    +    f = L_K * eta;
    +  }
    +}
    +model {
    +  rho ~ inv_gamma(5, 5);
    +  alpha ~ std_normal();
    +  sigma ~ std_normal();
    +  eta ~ std_normal();
    +
    +  y1 ~ normal(f[1:N1], sigma);
    +}
    +generated quantities {
    +  vector[N2] y2;
    +  for (n2 in 1:N2) {
    +    y2[n2] = normal_rng(f[N1 + n2], sigma);
    +  }
    +}

    The input vectors x1 and x2 are declared as data, as is the observed output vector y1. The unknown output vector y2, which corresponds to input vector x2, is declared in the generated quantities block and will be sampled when the model is executed.

    A transformed data block is used to combine the input vectors x1 and x2 into a single vector x.

    The model block declares and defines a local variable for the combined output vector f, which consists of the concatenation of the conditional mean for known outputs y1 and unknown outputs y2. Thus the combined output vector f is aligned with the combined input vector x. All that is left is to define the univariate normal distribution statement for y.

    @@ -1065,59 +1222,59 @@

    Predictive inference in non-Gaussian GPs

    We can do predictive inference in non-Gaussian GPs in much the same way as we do with Gaussian GPs.

    Consider the following full model for prediction using logistic Gaussian process regression.

    -
    data {
    -  int<lower=1> N1;
    -  array[N1] real x1;
    -  array[N1] int<lower=0, upper=1> z1;
    -  int<lower=1> N2;
    -  array[N2] real x2;
    -}
    -transformed data {
    -  real delta = 1e-9;
    -  int<lower=1> N = N1 + N2;
    -  array[N] real x;
    -  for (n1 in 1:N1) {
    -    x[n1] = x1[n1];
    -  }
    -  for (n2 in 1:N2) {
    -    x[N1 + n2] = x2[n2];
    -  }
    -}
    -parameters {
    -  real<lower=0> rho;
    -  real<lower=0> alpha;
    -  real a;
    -  vector[N] eta;
    -}
    -transformed parameters {
    -  vector[N] f;
    -  {
    -    matrix[N, N] L_K;
    -    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    -
    -    // diagonal elements
    -    for (n in 1:N) {
    -      K[n, n] = K[n, n] + delta;
    -    }
    -
    -    L_K = cholesky_decompose(K);
    -    f = L_K * eta;
    -  }
    -}
    -model {
    -  rho ~ inv_gamma(5, 5);
    -  alpha ~ std_normal();
    -  a ~ std_normal();
    -  eta ~ std_normal();
    -
    -  z1 ~ bernoulli_logit(a + f[1:N1]);
    -}
    -generated quantities {
    -  array[N2] int z2;
    -  for (n2 in 1:N2) {
    -    z2[n2] = bernoulli_logit_rng(a + f[N1 + n2]);
    -  }
    -}
    +
    data {
    +  int<lower=1> N1;
    +  array[N1] real x1;
    +  array[N1] int<lower=0, upper=1> z1;
    +  int<lower=1> N2;
    +  array[N2] real x2;
    +}
    +transformed data {
    +  real delta = 1e-9;
    +  int<lower=1> N = N1 + N2;
    +  array[N] real x;
    +  for (n1 in 1:N1) {
    +    x[n1] = x1[n1];
    +  }
    +  for (n2 in 1:N2) {
    +    x[N1 + n2] = x2[n2];
    +  }
    +}
    +parameters {
    +  real<lower=0> rho;
    +  real<lower=0> alpha;
    +  real a;
    +  vector[N] eta;
    +}
    +transformed parameters {
    +  vector[N] f;
    +  {
    +    matrix[N, N] L_K;
    +    matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho);
    +
    +    // diagonal elements
    +    for (n in 1:N) {
    +      K[n, n] = K[n, n] + delta;
    +    }
    +
    +    L_K = cholesky_decompose(K);
    +    f = L_K * eta;
    +  }
    +}
    +model {
    +  rho ~ inv_gamma(5, 5);
    +  alpha ~ std_normal();
    +  a ~ std_normal();
    +  eta ~ std_normal();
    +
    +  z1 ~ bernoulli_logit(a + f[1:N1]);
    +}
    +generated quantities {
    +  array[N2] int z2;
    +  for (n2 in 1:N2) {
    +    z2[n2] = bernoulli_logit_rng(a + f[N1 + n2]);
    +  }
    +}

    Analytical form of joint predictive inference

    @@ -1134,89 +1291,89 @@

    \(\sigma^2\) because the indexes of elements in \(x\) and \(\tilde{x}\) are never the same.

    This Stan code below uses the analytic form of the posterior and provides sampling of the resulting multivariate normal through the Cholesky decomposition. The data declaration is the same as for the latent variable example, but we’ve defined a function called gp_pred_rng which will generate a draw from the posterior predictive mean conditioned on observed data y1. The code uses a Cholesky decomposition in triangular solves in order to cut down on the number of matrix-matrix multiplications when computing the conditional mean and the conditional covariance of \(p(\tilde{y})\).

    -
    functions {
    -  vector gp_pred_rng(array[] real x2,
    -                     vector y1,
    -                     array[] real x1,
    -                     real alpha,
    -                     real rho,
    -                     real sigma,
    -                     real delta) {
    -    int N1 = rows(y1);
    -    int N2 = size(x2);
    -    vector[N2] f2;
    -    {
    -      matrix[N1, N1] L_K;
    -      vector[N1] K_div_y1;
    -      matrix[N1, N2] k_x1_x2;
    -      matrix[N1, N2] v_pred;
    -      vector[N2] f2_mu;
    -      matrix[N2, N2] cov_f2;
    -      matrix[N2, N2] diag_delta;
    -      matrix[N1, N1] K;
    -      K = gp_exp_quad_cov(x1, alpha, rho);
    -      for (n in 1:N1) {
    -        K[n, n] = K[n, n] + square(sigma);
    -      }
    -      L_K = cholesky_decompose(K);
    -      K_div_y1 = mdivide_left_tri_low(L_K, y1);
    -      K_div_y1 = mdivide_right_tri_low(K_div_y1', L_K)';
    -      k_x1_x2 = gp_exp_quad_cov(x1, x2, alpha, rho);
    -      f2_mu = (k_x1_x2' * K_div_y1);
    -      v_pred = mdivide_left_tri_low(L_K, k_x1_x2);
    -      cov_f2 = gp_exp_quad_cov(x2, alpha, rho) - v_pred' * v_pred;
    -      diag_delta = diag_matrix(rep_vector(delta, N2));
    -
    -      f2 = multi_normal_rng(f2_mu, cov_f2 + diag_delta);
    -    }
    -    return f2;
    -  }
    -}
    -data {
    -  int<lower=1> N1;
    -  array[N1] real x1;
    -  vector[N1] y1;
    -  int<lower=1> N2;
    -  array[N2] real x2;
    -}
    -transformed data {
    -  vector[N1] mu = rep_vector(0, N1);
    -  real delta = 1e-9;
    -}
    -parameters {
    -  real<lower=0> rho;
    -  real<lower=0> alpha;
    -  real<lower=0> sigma;
    -}
    -model {
    -  matrix[N1, N1] L_K;
    -  {
    -    matrix[N1, N1] K = gp_exp_quad_cov(x1, alpha, rho);
    -    real sq_sigma = square(sigma);
    -
    -    // diagonal elements
    -    for (n1 in 1:N1) {
    -      K[n1, n1] = K[n1, n1] + sq_sigma;
    -    }
    -
    -    L_K = cholesky_decompose(K);
    -  }
    -
    -  rho ~ inv_gamma(5, 5);
    -  alpha ~ std_normal();
    -  sigma ~ std_normal();
    -
    -  y1 ~ multi_normal_cholesky(mu, L_K);
    -}
    -generated quantities {
    -  vector[N2] f2;
    -  vector[N2] y2;
    -
    -  f2 = gp_pred_rng(x2, y1, x1, alpha, rho, sigma, delta);
    -  for (n2 in 1:N2) {
    -    y2[n2] = normal_rng(f2[n2], sigma);
    -  }
    -}
    +
    functions {
    +  vector gp_pred_rng(array[] real x2,
    +                     vector y1,
    +                     array[] real x1,
    +                     real alpha,
    +                     real rho,
    +                     real sigma,
    +                     real delta) {
    +    int N1 = rows(y1);
    +    int N2 = size(x2);
    +    vector[N2] f2;
    +    {
    +      matrix[N1, N1] L_K;
    +      vector[N1] K_div_y1;
    +      matrix[N1, N2] k_x1_x2;
    +      matrix[N1, N2] v_pred;
    +      vector[N2] f2_mu;
    +      matrix[N2, N2] cov_f2;
    +      matrix[N2, N2] diag_delta;
    +      matrix[N1, N1] K;
    +      K = gp_exp_quad_cov(x1, alpha, rho);
    +      for (n in 1:N1) {
    +        K[n, n] = K[n, n] + square(sigma);
    +      }
    +      L_K = cholesky_decompose(K);
    +      K_div_y1 = mdivide_left_tri_low(L_K, y1);
    +      K_div_y1 = mdivide_right_tri_low(K_div_y1', L_K)';
    +      k_x1_x2 = gp_exp_quad_cov(x1, x2, alpha, rho);
    +      f2_mu = (k_x1_x2' * K_div_y1);
    +      v_pred = mdivide_left_tri_low(L_K, k_x1_x2);
    +      cov_f2 = gp_exp_quad_cov(x2, alpha, rho) - v_pred' * v_pred;
    +      diag_delta = diag_matrix(rep_vector(delta, N2));
    +
    +      f2 = multi_normal_rng(f2_mu, cov_f2 + diag_delta);
    +    }
    +    return f2;
    +  }
    +}
    +data {
    +  int<lower=1> N1;
    +  array[N1] real x1;
    +  vector[N1] y1;
    +  int<lower=1> N2;
    +  array[N2] real x2;
    +}
    +transformed data {
    +  vector[N1] mu = rep_vector(0, N1);
    +  real delta = 1e-9;
    +}
    +parameters {
    +  real<lower=0> rho;
    +  real<lower=0> alpha;
    +  real<lower=0> sigma;
    +}
    +model {
    +  matrix[N1, N1] L_K;
    +  {
    +    matrix[N1, N1] K = gp_exp_quad_cov(x1, alpha, rho);
    +    real sq_sigma = square(sigma);
    +
    +    // diagonal elements
    +    for (n1 in 1:N1) {
    +      K[n1, n1] = K[n1, n1] + sq_sigma;
    +    }
    +
    +    L_K = cholesky_decompose(K);
    +  }
    +
    +  rho ~ inv_gamma(5, 5);
    +  alpha ~ std_normal();
    +  sigma ~ std_normal();
    +
    +  y1 ~ multi_normal_cholesky(mu, L_K);
    +}
    +generated quantities {
    +  vector[N2] f2;
    +  vector[N2] y2;
    +
    +  f2 = gp_pred_rng(x2, y1, x1, alpha, rho, sigma, delta);
    +  for (n2 in 1:N2) {
    +    y2[n2] = normal_rng(f2[n2], sigma);
    +  }
    +}

    @@ -1252,50 +1409,50 @@

    \(\textsf{LKJCorr}\) for \(C(\phi)\), but any positive-definite matrix will do.

    -
    data {
    -  int<lower=1> N;
    -  int<lower=1> D;
    -  array[N] real x;
    -  matrix[N, D] y;
    -}
    -transformed data {
    -  real delta = 1e-9;
    -}
    -parameters {
    -  real<lower=0> rho;
    -  vector<lower=0>[D] alpha;
    -  real<lower=0> sigma;
    -  cholesky_factor_corr[D] L_Omega;
    -  matrix[N, D] eta;
    -}
    -model {
    -  matrix[N, D] f;
    -  {
    -    matrix[N, N] K = gp_exp_quad_cov(x, 1.0, rho);
    -    matrix[N, N] L_K;
    -
    -    // diagonal elements
    -    for (n in 1:N) {
    -      K[n, n] = K[n, n] + delta;
    -    }
    -
    -    L_K = cholesky_decompose(K);
    -    f = L_K * eta
    -        * diag_pre_multiply(alpha, L_Omega)';
    -  }
    -
    -  rho ~ inv_gamma(5, 5);
    -  alpha ~ std_normal();
    -  sigma ~ std_normal();
    -  L_Omega ~ lkj_corr_cholesky(3);
    -  to_vector(eta) ~ std_normal();
    -
    -  to_vector(y) ~ normal(to_vector(f), sigma);
    -}
    -generated quantities {
    -  matrix[D, D] Omega;
    -  Omega = L_Omega * L_Omega';
    -}
    +
    data {
    +  int<lower=1> N;
    +  int<lower=1> D;
    +  array[N] real x;
    +  matrix[N, D] y;
    +}
    +transformed data {
    +  real delta = 1e-9;
    +}
    +parameters {
    +  real<lower=0> rho;
    +  vector<lower=0>[D] alpha;
    +  real<lower=0> sigma;
    +  cholesky_factor_corr[D] L_Omega;
    +  matrix[N, D] eta;
    +}
    +model {
    +  matrix[N, D] f;
    +  {
    +    matrix[N, N] K = gp_exp_quad_cov(x, 1.0, rho);
    +    matrix[N, N] L_K;
    +
    +    // diagonal elements
    +    for (n in 1:N) {
    +      K[n, n] = K[n, n] + delta;
    +    }
    +
    +    L_K = cholesky_decompose(K);
    +    f = L_K * eta
    +        * diag_pre_multiply(alpha, L_Omega)';
    +  }
    +
    +  rho ~ inv_gamma(5, 5);
    +  alpha ~ std_normal();
    +  sigma ~ std_normal();
    +  L_Omega ~ lkj_corr_cholesky(3);
    +  to_vector(eta) ~ std_normal();
    +
    +  to_vector(y) ~ normal(to_vector(f), sigma);
    +}
    +generated quantities {
    +  matrix[D, D] Omega;
    +  Omega = L_Omega * L_Omega';
    +}
    @@ -1305,6 +1462,15 @@

    Back to top

    References

    +
    +Cseke, Botond, and Tom Heskes. 2011. “Approximate Marginals in Latent Gaussian Models.” Journal of Machine Learning Research 12. +
    +
    +Kuss, Malte, and Carl E Rasmussen. 2005. “Assessing Approximate Inference for Binary Gaussian Process Classification.” Journal of Machine Learning Research 6: 1679–1704. +
    +
    +Margossian, Charles C, Aki Vehtari, Daniel Simpson, and Raj Agrawal. 2020. “Hamiltonian Monte Carlo Using an Adjoint-Differentiated Laplace Approximation: Bayesian Inference for Latent Gaussian Models and Beyond.” Advances in Neural Information Processing Systems 34. +
    Neal, Radford M. 1996. Bayesian Learning for Neural Networks. Lecture Notes in Statistics 118. New York: Springer.
    @@ -1320,6 +1486,15 @@

    Riutort-Mayol, Gabriel, Paul-Christian Bürkner, Michael R Andersen, Arno Solin, and Aki Vehtari. 2023. “Practical Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming.” Statistics and Computing 33 (1): 17.

    +
    +Rue, Håvard, Sara Martino, and Nicolas Chopin. 2009. “Approximate Bayesian Inference for Latent Gaussian Models by Using Integrated Nested Laplace Approximations.” Journal of the Royal Statistical Society: Series B (Statistical Methodology) 71 (2): 319–92. https://doi.org/10.1111/j.1467-9868.2008.00700.x. +
    +
    +Vanhatalo, Jarno, Ville Pietiläinen, and Aki Vehtari. 2010. “Approximate Inference for Disease Mapping with Sparse Gaussian Processes.” Statistics in Medicine 29 (15): 1580–1607. +
    +
    +Vehtari, Aki, Tommi Mononen, Ville Tolvanen, Tuomas Sivula, and Ole Winther. 2016. “Bayesian Leave-One-Out Cross-Validation Approximations for Gaussian Latent Variable Models.” Journal of Machine Learning Research 17 (103): 1–38. http://jmlr.org/papers/v17/14-540.html. +
    Zhang, Hao. 2004. “Inconsistent Estimation and Asymptotically Equal Interpolations in Model-Based Geostatistics.” Journal of the American Statistical Association 99 (465): 250–61.
    @@ -1904,8 +2079,35 @@

    - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/hyperspherical-models.html b/docs/stan-users-guide/hyperspherical-models.html index e0d154ccf..08c37a052 100644 --- a/docs/stan-users-guide/hyperspherical-models.html +++ b/docs/stan-users-guide/hyperspherical-models.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@

    + + @@ -1263,8 +1275,35 @@

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/img/Figure_DDM.pdf b/docs/stan-users-guide/img/Figure_DDM.pdf new file mode 100644 index 000000000..381d4b538 Binary files /dev/null and b/docs/stan-users-guide/img/Figure_DDM.pdf differ diff --git a/docs/stan-users-guide/img/Figure_DDM.png b/docs/stan-users-guide/img/Figure_DDM.png new file mode 100644 index 000000000..859185e8f Binary files /dev/null and b/docs/stan-users-guide/img/Figure_DDM.png differ diff --git a/docs/stan-users-guide/index.html b/docs/stan-users-guide/index.html index 8f1c647cd..89ccd8743 100644 --- a/docs/stan-users-guide/index.html +++ b/docs/stan-users-guide/index.html @@ -74,7 +74,7 @@ - + @@ -225,7 +225,7 @@ + @@ -435,7 +441,7 @@ + @@ -512,7 +524,7 @@

    Stan User’s Guide

    -

    Version 2.38

    +

    Version 2.39

    @@ -538,7 +550,7 @@

    Stan User’s Guide

  • The appendices provide an introduction to the stanc3 compiler used in the various interfaces to Stan, a style guide, and advice for users of BUGS and JAGS.

  • We recommend working through this guide using the textbooks Bayesian Data Analysis and Statistical Rethinking: A Bayesian Course with Examples in R and Stan as references on the concepts, and using the Stan Reference Manual when necessary to clarify programming issues.

    -

    Download the pdf version of this manual.

    +

    Download the pdf version of this manual.

    @@ -1007,52 +998,52 @@

    Hierarchical regre

    Suppose each binary outcome \(y_n \in \{ 0, 1 \}\) has an associated level, \(ll_n \in \{ 1, \dotsc, L \}\). Each outcome will also have an associated predictor vector \(x_n \in \mathbb{R}^D\). Each level \(l\) gets its own coefficient vector \(\beta_l \in \mathbb{R}^D\). The hierarchical structure involves drawing the coefficients \(\beta_{l,d} \in \mathbb{R}\) from a prior that is also estimated with the data. This hierarchically estimated prior determines the amount of pooling. If the data in each level are similar, strong pooling will be reflected in low hierarchical variance. If the data in the levels are dissimilar, weaker pooling will be reflected in higher hierarchical variance.

    The following model encodes a hierarchical logistic regression model with a hierarchical prior on the regression coefficients.

    -
    data {
    -  int<lower=1> D;
    -  int<lower=0> N;
    -  int<lower=1> L;
    -  array[N] int<lower=0, upper=1> y;
    -  array[N] int<lower=1, upper=L> ll;
    -  array[N] row_vector[D] x;
    -}
    -parameters {
    -  array[D] real mu;
    -  array[D] real<lower=0> sigma;
    -  array[L] vector[D] beta;
    -}
    -model {
    -  for (d in 1:D) {
    -    mu[d] ~ normal(0, 100);
    -    for (l in 1:L) {
    -      beta[l, d] ~ normal(mu[d], sigma[d]);
    -    }
    -  }
    -  for (n in 1:N) {
    -    y[n] ~ bernoulli(inv_logit(x[n] * beta[ll[n]]));
    -  }
    -}
    +
    data {
    +  int<lower=1> D;
    +  int<lower=0> N;
    +  int<lower=1> L;
    +  array[N] int<lower=0, upper=1> y;
    +  array[N] int<lower=1, upper=L> ll;
    +  array[N] row_vector[D] x;
    +}
    +parameters {
    +  array[D] real mu;
    +  array[D] real<lower=0> sigma;
    +  array[L] vector[D] beta;
    +}
    +model {
    +  for (d in 1:D) {
    +    mu[d] ~ normal(0, 100);
    +    for (l in 1:L) {
    +      beta[l, d] ~ normal(mu[d], sigma[d]);
    +    }
    +  }
    +  for (n in 1:N) {
    +    y[n] ~ bernoulli(inv_logit(x[n] * beta[ll[n]]));
    +  }
    +}

    The standard deviation parameter sigma gets an implicit uniform prior on \((0,\infty)\) because of its declaration with a lower-bound constraint of zero. Stan allows improper priors as long as the posterior is proper. Nevertheless, it is usually helpful to have informative or at least weakly informative priors for all parameters; see the regression priors section for recommendations on priors for regression coefficients and scales.

    Optimizing the model

    Where possible, vectorizing distribution statements leads to faster log probability and derivative evaluations. The speed boost is not because loops are eliminated, but because vectorization allows sharing subcomputations in the log probability and gradient calculations and because it reduces the size of the expression tree required for gradient calculations.

    The first optimization vectorizes the for-loop over D as

    -
    mu ~ normal(0, 100);
    -for (l in 1:L) {
    -  beta[l] ~ normal(mu, sigma);
    -}
    +
    mu ~ normal(0, 100);
    +for (l in 1:L) {
    +  beta[l] ~ normal(mu, sigma);
    +}

    The declaration of beta as an array of vectors means that the expression beta[l] denotes a vector. Although beta could have been declared as a matrix, an array of vectors (or a two-dimensional array) is more efficient for accessing rows; see the indexing efficiency section for more information on the efficiency tradeoffs among arrays, vectors, and matrices.

    This model can be further sped up and at the same time made more arithmetically stable by replacing the application of inverse-logit inside the Bernoulli distribution with the logit-parameterized Bernoulli,3

    -
    for (n in 1:N) {
    -  y[n] ~ bernoulli_logit(x[n] * beta[ll[n]]);
    -}
    +
    for (n in 1:N) {
    +  y[n] ~ bernoulli_logit(x[n] * beta[ll[n]]);
    +}

    Unlike in R or BUGS, loops, array access and assignments are fast in Stan because they are translated directly to C++. In most cases, the cost of allocating and assigning to a container is more than made up for by the increased efficiency due to vectorizing the log probability and gradient calculations. Thus the following version is faster than the original formulation as a loop over a distribution statement.

    -
    {
    -  vector[N] x_beta_ll;
    -  for (n in 1:N) {
    -    x_beta_ll[n] = x[n] * beta[ll[n]];
    -  }
    -  y ~ bernoulli_logit(x_beta_ll);
    -}
    +
    {
    +  vector[N] x_beta_ll;
    +  for (n in 1:N) {
    +    x_beta_ll[n] = x[n] * beta[ll[n]];
    +  }
    +  y ~ bernoulli_logit(x_beta_ll);
    +}

    The brackets introduce a new scope for the local variable x_beta_ll; alternatively, the variable may be declared at the top of the model block.

    In some cases, such as the above, the local variable assignment leads to models that are less readable. The recommended practice in such cases is to first develop and debug the more transparent version of the model and only work on optimizations when the simpler formulation has been debugged.

    @@ -1080,14 +1071,14 @@

    Item-response

    Data declaration with missingness

    The data provided for an IRT model may be declared as follows to account for the fact that not every student is required to answer every question.

    -
    data {
    -  int<lower=1> J;                     // number of students
    -  int<lower=1> K;                     // number of questions
    -  int<lower=1> N;                     // number of observations
    -  array[N] int<lower=1, upper=J> jj;  // student for observation n
    -  array[N] int<lower=1, upper=K> kk;  // question for observation n
    -  array[N] int<lower=0, upper=1> y;   // correctness for observation n
    -}
    +
    data {
    +  int<lower=1> J;                     // number of students
    +  int<lower=1> K;                     // number of questions
    +  int<lower=1> N;                     // number of observations
    +  array[N] int<lower=1, upper=J> jj;  // student for observation n
    +  array[N] int<lower=1, upper=K> kk;  // question for observation n
    +  array[N] int<lower=0, upper=1> y;   // correctness for observation n
    +}

    This declares a total of N student-question pairs in the data set, where each n in 1:N indexes a binary observation y[n] of the correctness of the answer of student jj[n] on question kk[n].

    The prior hyperparameters will be hard coded in the rest of this section for simplicity, though they could be coded as data in Stan for more flexibility.

    @@ -1095,21 +1086,21 @@

    1PL (Rasch) model

    The 1PL item-response model, also known as the Rasch model, has one parameter (1P) for questions and uses the logistic link function (L).

    The model parameters are declared as follows.

    -
    parameters {
    -  real delta;            // mean student ability
    -  array[J] real alpha;   // ability of student j - mean ability
    -  array[K] real beta;    // difficulty of question k
    -}
    +
    parameters {
    +  real delta;            // mean student ability
    +  array[J] real alpha;   // ability of student j - mean ability
    +  array[K] real beta;    // difficulty of question k
    +}

    The parameter alpha[J] is the ability coefficient for student j and beta[k] is the difficulty coefficient for question k. The non-standard parameterization used here also includes an intercept term delta, which represents the average student’s response to the average question.4

    The model itself is as follows.

    -
    model {
    -  alpha ~ std_normal();         // informative true prior
    -  beta ~ std_normal();          // informative true prior
    -  delta ~ normal(0.75, 1);      // informative true prior
    -  for (n in 1:N) {
    -    y[n] ~ bernoulli_logit(alpha[jj[n]] - beta[kk[n]] + delta);
    -  }
    -}
    +
    model {
    +  alpha ~ std_normal();         // informative true prior
    +  beta ~ std_normal();          // informative true prior
    +  delta ~ normal(0.75, 1);      // informative true prior
    +  for (n in 1:N) {
    +    y[n] ~ bernoulli_logit(alpha[jj[n]] - beta[kk[n]] + delta);
    +  }
    +}

    This model uses the logit-parameterized Bernoulli distribution, where \[ \texttt{bernoulli}\mathtt{\_}\texttt{logit}\left(y \mid \alpha\right) = @@ -1125,43 +1116,43 @@

    1PL (Rasch) mode

    Multilevel 2PL model

    The simple 1PL model described in the previous section is generalized in this section with the addition of a discrimination parameter to model how noisy a question is and by adding multilevel priors for the question difficulty and discrimination parameters. The model parameters are declared as follows.

    -
    parameters {
    -  real mu_beta;                // mean question difficulty
    -  vector[J] alpha;             // ability for j - mean
    -  vector[K] beta;              // difficulty for k
    -  vector<lower=0>[K] gamma;    // discrimination of k
    -  real<lower=0> sigma_beta;    // scale of difficulties
    -  real<lower=0> sigma_gamma;   // scale of log discrimination
    -}
    +
    parameters {
    +  real mu_beta;                // mean question difficulty
    +  vector[J] alpha;             // ability for j - mean
    +  vector[K] beta;              // difficulty for k
    +  vector<lower=0>[K] gamma;    // discrimination of k
    +  real<lower=0> sigma_beta;    // scale of difficulties
    +  real<lower=0> sigma_gamma;   // scale of log discrimination
    +}

    The parameters should be clearer after the model definition.

    -
    model {
    -  alpha ~ std_normal();
    -  beta ~ normal(0, sigma_beta);
    -  gamma ~ lognormal(0, sigma_gamma);
    -  mu_beta ~ cauchy(0, 5);
    -  sigma_beta ~ cauchy(0, 5);
    -  sigma_gamma ~ cauchy(0, 5);
    -  y ~ bernoulli_logit(gamma[kk] .* (alpha[jj] - (beta[kk] + mu_beta)));
    -}
    +
    model {
    +  alpha ~ std_normal();
    +  beta ~ normal(0, sigma_beta);
    +  gamma ~ lognormal(0, sigma_gamma);
    +  mu_beta ~ cauchy(0, 5);
    +  sigma_beta ~ cauchy(0, 5);
    +  sigma_gamma ~ cauchy(0, 5);
    +  y ~ bernoulli_logit(gamma[kk] .* (alpha[jj] - (beta[kk] + mu_beta)));
    +}

    The std_normal function is used here, defined by \[ \texttt{std}\mathtt{\_}\texttt{normal}(y) = \textsf{normal}\left(y \mid 0, 1\right). \]

    The distribution statement is also vectorized using elementwise multiplication; it is equivalent to

    -
    for (n in 1:N) {
    -  y[n] ~ bernoulli_logit(gamma[kk[n]]
    -                         * (alpha[jj[n]] - (beta[kk[n]] + mu_beta));
    -}
    +
    for (n in 1:N) {
    +  y[n] ~ bernoulli_logit(gamma[kk[n]]
    +                         * (alpha[jj[n]] - (beta[kk[n]] + mu_beta));
    +}

    The 2PL model is similar to the 1PL model, with the additional parameter gamma[k] modeling how discriminative question k is. If gamma[k] is greater than 1, responses are more attenuated with less chance of getting a question right at random. The parameter gamma[k] is constrained to be positive, which prohibits there being questions that are easier for students of lesser ability; such questions are not unheard of, but they tend to be eliminated from most testing situations where an IRT model would be applied.

    The model is parameterized here with student abilities alpha being given a standard normal prior. This is to identify both the scale and the location of the parameters, both of which would be unidentified otherwise; see the problematic posteriors chapter for further discussion of identifiability. The difficulty and discrimination parameters beta and gamma then have varying scales given hierarchically in this model. They could also be given weakly informative non-hierarchical priors, such as

    -
    beta ~ normal(0, 5);
    -gamma ~ lognormal(0, 2);
    +
    beta ~ normal(0, 5);
    +gamma ~ lognormal(0, 2);

    The point is that the alpha determines the scale and location and beta and gamma are allowed to float.

    The beta parameter is here given a non-centered parameterization, with parameter mu_beta serving as the mean beta location. An alternative would’ve been to take:

    -
    beta ~ normal(mu_beta, sigma_beta);
    +
    beta ~ normal(mu_beta, sigma_beta);

    and

    -
    y[n] ~ bernoulli_logit(gamma[kk[n]] * (alpha[jj[n]] - beta[kk[n]]));
    +
    y[n] ~ bernoulli_logit(gamma[kk[n]] * (alpha[jj[n]] - beta[kk[n]]));

    Non-centered parameterizations tend to be more efficient in hierarchical models; see the reparameterization section for more information on non-centered reparameterizations.

    The intercept term mu_beta can’t itself be modeled hierarchically, so it is given a weakly informative \(\textsf{Cauchy}(0,5)\) prior. Similarly, the scale terms, sigma_beta, and sigma_gamma, are given half-Cauchy priors. As mentioned earlier, the scale and location for alpha are fixed to ensure identifiability. The truncation in the half-Cauchy prior is implicit; explicit truncation is not necessary because the log probability need only be calculated up to a proportion and the scale variables are constrained to \((0,\infty)\) by their declarations.

    @@ -1248,99 +1239,99 @@

    Coding the model in Stan

    The Stan code for the full hierarchical model with multivariate priors on the group-level coefficients and group-level prior means follows its definition.

    -
    data {
    -  int<lower=0> N;              // num individuals
    -  int<lower=1> K;              // num ind predictors
    -  int<lower=1> J;              // num groups
    -  int<lower=1> L;              // num group predictors
    -  array[N] int<lower=1, upper=J> jj;  // group for individual
    -  matrix[N, K] x;              // individual predictors
    -  array[J] row_vector[L] u;    // group predictors
    -  vector[N] y;                 // outcomes
    -}
    -parameters {
    -  corr_matrix[K] Omega;        // prior correlation
    -  vector<lower=0>[K] tau;      // prior scale
    -  matrix[L, K] gamma;          // group coeffs
    -  array[J] vector[K] beta;     // indiv coeffs by group
    -  real<lower=0> sigma;         // prediction error scale
    -}
    -model {
    -  tau ~ cauchy(0, 2.5);
    -  Omega ~ lkj_corr(2);
    -  to_vector(gamma) ~ normal(0, 5);
    -  {
    -    array[J] row_vector[K] u_gamma;
    -    for (j in 1:J) {
    -      u_gamma[j] = u[j] * gamma;
    -    }
    -    beta ~ multi_normal(u_gamma, quad_form_diag(Omega, tau));
    -  }
    -  for (n in 1:N) {
    -    y[n] ~ normal(x[n] * beta[jj[n]], sigma);
    -  }
    -}
    +
    data {
    +  int<lower=0> N;              // num individuals
    +  int<lower=1> K;              // num ind predictors
    +  int<lower=1> J;              // num groups
    +  int<lower=1> L;              // num group predictors
    +  array[N] int<lower=1, upper=J> jj;  // group for individual
    +  matrix[N, K] x;              // individual predictors
    +  array[J] row_vector[L] u;    // group predictors
    +  vector[N] y;                 // outcomes
    +}
    +parameters {
    +  corr_matrix[K] Omega;        // prior correlation
    +  vector<lower=0>[K] tau;      // prior scale
    +  matrix[L, K] gamma;          // group coeffs
    +  array[J] vector[K] beta;     // indiv coeffs by group
    +  real<lower=0> sigma;         // prediction error scale
    +}
    +model {
    +  tau ~ cauchy(0, 2.5);
    +  Omega ~ lkj_corr(2);
    +  to_vector(gamma) ~ normal(0, 5);
    +  {
    +    array[J] row_vector[K] u_gamma;
    +    for (j in 1:J) {
    +      u_gamma[j] = u[j] * gamma;
    +    }
    +    beta ~ multi_normal(u_gamma, quad_form_diag(Omega, tau));
    +  }
    +  for (n in 1:N) {
    +    y[n] ~ normal(x[n] * beta[jj[n]], sigma);
    +  }
    +}

    The hyperprior covariance matrix is defined implicitly through the quadratic form in the code because the correlation matrix Omega and scale vector tau are more natural to inspect in the output; to output Sigma, define it as a transformed parameter. The function quad_form_diag is defined so that quad_form_diag(Sigma, tau) is equivalent to diag_matrix(tau) * Sigma * diag_matrix(tau), where diag_matrix(tau) returns the matrix with tau on the diagonal and zeroes off diagonal; the version using quad_form_diag should be faster. For details on these and other matrix arithmetic operators and functions, see the function reference manual.

    Optimization through vectorization

    The code in the Stan program above can be sped up dramatically by replacing the the distribution statement inside the for loop:

    -
    for (n in 1:N) {
    -  y[n] ~ normal(x[n] * beta[jj[n]], sigma);
    -}
    +
    for (n in 1:N) {
    +  y[n] ~ normal(x[n] * beta[jj[n]], sigma);
    +}

    with the vectorized distribution statement:

    -
    {
    -  vector[N] x_beta_jj;
    -  for (n in 1:N) {
    -    x_beta_jj[n] = x[n] * beta[jj[n]];
    -  }
    -  y ~ normal(x_beta_jj, sigma);
    -}
    +
    {
    +  vector[N] x_beta_jj;
    +  for (n in 1:N) {
    +    x_beta_jj[n] = x[n] * beta[jj[n]];
    +  }
    +  y ~ normal(x_beta_jj, sigma);
    +}

    The outer brackets create a local scope in which to define the variable x_beta_jj, which is then filled in a loop and used to define a vectorized distribution statement. The reason this is such a big win is that it allows us to take the log of sigma only once and it greatly reduces the size of the resulting expression graph by packing all of the work into a single distribution function.

    Although it is tempting to redeclare beta and include a revised model block distribution statement,

    -
    parameters {
    -  matrix[J, K] beta;
    -// ...
    -}
    -model {
    -  y ~ normal(rows_dot_product(x, beta[jj]), sigma);
    -  // ...
    -}
    +
    parameters {
    +  matrix[J, K] beta;
    +// ...
    +}
    +model {
    +  y ~ normal(rows_dot_product(x, beta[jj]), sigma);
    +  // ...
    +}

    this fails because it breaks the vectorization for beta,6

    -
    beta ~ multi_normal(...);
    +
    beta ~ multi_normal(...);

    which requires beta to be an array of vectors. Both vectorizations are important, so the best solution is to just use the loop above, because rows_dot_product cannot do much optimization in and of itself because there are no shared computations.

    The code in the Stan program above also builds up an array of vectors for the outcomes and for the multivariate normal, which provides a major speedup by reducing the number of linear systems that need to be solved and differentiated.

    -
    {
    -  matrix[K, K] Sigma_beta;
    -  Sigma_beta = quad_form_diag(Omega, tau);
    -  for (j in 1:J) {
    -    beta[j] ~ multi_normal((u[j] * gamma)', Sigma_beta);
    -  }
    -}
    +
    {
    +  matrix[K, K] Sigma_beta;
    +  Sigma_beta = quad_form_diag(Omega, tau);
    +  for (j in 1:J) {
    +    beta[j] ~ multi_normal((u[j] * gamma)', Sigma_beta);
    +  }
    +}

    In this example, the covariance matrix Sigma_beta is defined as a local variable so as not to have to repeat the quadratic form computation \(J\) times. This vectorization can be combined with the Cholesky-factor optimization in the next section.

    Optimization through Cholesky factorization

    The multivariate normal density and LKJ prior on correlation matrices both require their matrix parameters to be factored. Vectorizing, as in the previous section, ensures this is only done once for each density. An even better solution, both in terms of efficiency and numerical stability, is to parameterize the model directly in terms of Cholesky factors of correlation matrices using the multivariate version of the non-centered parameterization. For the model in the previous section, the program fragment to replace the full matrix prior with an equivalent Cholesky factorized prior is as follows.

    -
    data {
    -  matrix[L, J] u;              // group predictors transposed
    -  // ...
    -}
    -parameters {
    -  matrix[K, J] z;
    -  cholesky_factor_corr[K] L_Omega;
    -  matrix[K, L] gamma;
    -  // ...
    -}
    -transformed parameters {
    -  matrix[K, J] beta;
    -  beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;
    -}
    -model {
    -  to_vector(z) ~ std_normal();
    -  L_Omega ~ lkj_corr_cholesky(2);
    -  // ...
    -}
    +
    data {
    +  matrix[L, J] u;              // group predictors transposed
    +  // ...
    +}
    +parameters {
    +  matrix[K, J] z;
    +  cholesky_factor_corr[K] L_Omega;
    +  matrix[K, L] gamma;
    +  // ...
    +}
    +transformed parameters {
    +  matrix[K, J] beta;
    +  beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;
    +}
    +model {
    +  to_vector(z) ~ std_normal();
    +  L_Omega ~ lkj_corr_cholesky(2);
    +  // ...
    +}

    The data variable u was originally an array of vectors, which is efficient for access; here it is redeclared as a matrix in order to use it in matrix arithmetic. Moreover, it is transposed, along with gamma and beta, to minimize the number of transposition operations. The new parameter L_Omega is the Cholesky factor of the original correlation matrix Omega, so that

    Omega = L_Omega * L_Omega'

    The prior scale vector tau is unchanged, and furthermore, pre-multiplying the Cholesky factor by the scale produces the Cholesky factor of the final covariance matrix,

    @@ -1361,27 +1352,27 @@

    \(\Omega = \Omega_L \, \Omega_L^\top\), and the fact that \(\mathbb{E}(z \, z^\top) = I\) since \(z \sim \mathcal{N}(0, I)\).

    Omitting the remaining data declarations, which are the same as before with the exception of u, the optimized model is as follows.

    -
    parameters {
    -  matrix[K, J] z;
    -  cholesky_factor_corr[K] L_Omega;
    -  vector<lower=0, upper=pi() / 2>[K] tau_unif;  // prior scale
    -  matrix[K, L] gamma;                        // group coeffs
    -  real<lower=0> sigma;                       // prediction error scale
    -}
    -transformed parameters {
    -  vector<lower=0>[K] tau = 2.5 * tan(tau_unif);
    -  matrix[K, J] beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;
    -}
    -model {
    -  vector[N] mu;
    -  for(n in 1:N) {
    -    mu[n] = x[n, ] * beta[, jj[n]];
    -  }
    -  to_vector(z) ~ std_normal();
    -  L_Omega ~ lkj_corr_cholesky(2);
    -  to_vector(gamma) ~ normal(0, 5);
    -  y ~ normal(mu, sigma);
    -}
    +
    parameters {
    +  matrix[K, J] z;
    +  cholesky_factor_corr[K] L_Omega;
    +  vector<lower=0, upper=pi() / 2>[K] tau_unif;  // prior scale
    +  matrix[K, L] gamma;                        // group coeffs
    +  real<lower=0> sigma;                       // prediction error scale
    +}
    +transformed parameters {
    +  vector<lower=0>[K] tau = 2.5 * tan(tau_unif);
    +  matrix[K, J] beta = gamma * u + diag_pre_multiply(tau, L_Omega) * z;
    +}
    +model {
    +  vector[N] mu;
    +  for(n in 1:N) {
    +    mu[n] = x[n, ] * beta[, jj[n]];
    +  }
    +  to_vector(z) ~ std_normal();
    +  L_Omega ~ lkj_corr_cholesky(2);
    +  to_vector(gamma) ~ normal(0, 5);
    +  y ~ normal(mu, sigma);
    +}

    This model also reparameterizes the prior scale tau to avoid potential problems with the heavy tails of the Cauchy distribution. The statement tau_unif ~ uniform(0, pi() / 2) can be omitted from the model block because Stan increments the log posterior for parameters with uniform priors without it.

    @@ -1393,45 +1384,45 @@

    Pre

    Programming predictions

    As a simple example, the following linear regression provides the same setup for estimating the coefficients beta as in our very first example, using y for the N observations and x for the N predictor vectors. The model parameters and model for observations are exactly the same as before.

    To make predictions, we need to be given the number of predictions, N_new, and their predictor matrix, x_new. The predictions themselves are modeled as a parameter y_new. The model statement for the predictions is exactly the same as for the observations, with the new outcome vector y_new and prediction matrix x_new.

    -
    data {
    -  int<lower=1> K;
    -  int<lower=0> N;
    -  matrix[N, K] x;
    -  vector[N] y;
    -
    -  int<lower=0> N_new;
    -  matrix[N_new, K] x_new;
    -}
    -parameters {
    -  vector[K] beta;
    -  real<lower=0> sigma;
    -
    -  vector[N_new] y_new;                  // predictions
    -}
    -model {
    -  y ~ normal(x * beta, sigma);          // observed model
    -
    -  y_new ~ normal(x_new * beta, sigma);  // prediction model
    -}
    +
    data {
    +  int<lower=1> K;
    +  int<lower=0> N;
    +  matrix[N, K] x;
    +  vector[N] y;
    +
    +  int<lower=0> N_new;
    +  matrix[N_new, K] x_new;
    +}
    +parameters {
    +  vector[K] beta;
    +  real<lower=0> sigma;
    +
    +  vector[N_new] y_new;                  // predictions
    +}
    +model {
    +  y ~ normal(x * beta, sigma);          // observed model
    +
    +  y_new ~ normal(x_new * beta, sigma);  // prediction model
    +}

    Predictions as generated quantities

    Where possible, the most efficient way to generate predictions is to use the generated quantities block. This provides proper Monte Carlo (not Markov chain Monte Carlo) inference, which can have a much higher effective sample size per iteration.

    -
    // ...data as above...
    -
    -parameters {
    -  vector[K] beta;
    -  real<lower=0> sigma;
    -}
    -model {
    -  y ~ normal(x * beta, sigma);
    -}
    -generated quantities {
    -  vector[N_new] y_new;
    -  for (n in 1:N_new) {
    -    y_new[n] = normal_rng(x_new[n] * beta, sigma);
    -  }
    -}
    +
    // ...data as above...
    +
    +parameters {
    +  vector[K] beta;
    +  real<lower=0> sigma;
    +}
    +model {
    +  y ~ normal(x * beta, sigma);
    +}
    +generated quantities {
    +  vector[N_new] y_new;
    +  for (n in 1:N_new) {
    +    y_new[n] = normal_rng(x_new[n] * beta, sigma);
    +  }
    +}

    Now the data are just as before, but the parameter y_new is now declared as a generated quantity, and the prediction model is removed from the model and replaced by a pseudo-random draw from a normal distribution.

    Overflow in generated quantities

    @@ -1454,49 +1445,49 @@

    matrix), \(y_n\) is a \(K\)-vector of observations, \(\beta\) is a \((K \times J)\) matrix of regression coefficients (vector \(\beta_k\) holds coefficients for outcome \(k\)), and \(\Sigma\) is covariance matrix governing the error. As usual, the intercept can be rolled into \(x\) as a column of ones.

    The basic Stan code is straightforward (though see below for more optimized code for use with LKJ priors on correlation).

    -
    data {
    -  int<lower=1> K;
    -  int<lower=1> J;
    -  int<lower=0> N;
    -  array[N] vector[J] x;
    -  array[N] vector[K] y;
    -}
    -parameters {
    -  matrix[K, J] beta;
    -  cov_matrix[K] Sigma;
    -}
    -model {
    -  array[N] vector[K] mu;
    -  for (n in 1:N) {
    -    mu[n] = beta * x[n];
    -  }
    -  y ~ multi_normal(mu, Sigma);
    -}
    +
    data {
    +  int<lower=1> K;
    +  int<lower=1> J;
    +  int<lower=0> N;
    +  array[N] vector[J] x;
    +  array[N] vector[K] y;
    +}
    +parameters {
    +  matrix[K, J] beta;
    +  cov_matrix[K] Sigma;
    +}
    +model {
    +  array[N] vector[K] mu;
    +  for (n in 1:N) {
    +    mu[n] = beta * x[n];
    +  }
    +  y ~ multi_normal(mu, Sigma);
    +}

    For efficiency, the multivariate normal is vectorized by precomputing the array of mean vectors and sharing the same covariance matrix.

    Following the advice in the multivariate hierarchical priors section, we will place a weakly informative normal prior on the regression coefficients, an LKJ prior on the correlations and a half-Cauchy prior on standard deviations. The covariance structure is parameterized in terms of Cholesky factors for efficiency and arithmetic stability.

    -
    // ...
    -parameters {
    -  matrix[K, J] beta;
    -  cholesky_factor_corr[K] L_Omega;
    -  vector<lower=0>[K] L_sigma;
    -}
    -model {
    -  array[N] vector[K] mu;
    -  matrix[K, K] L_Sigma;
    -
    -  for (n in 1:N) {
    -    mu[n] = beta * x[n];
    -
    -  }
    -
    -  L_Sigma = diag_pre_multiply(L_sigma, L_Omega);
    -
    -  to_vector(beta) ~ normal(0, 5);
    -  L_Omega ~ lkj_corr_cholesky(4);
    -  L_sigma ~ cauchy(0, 2.5);
    -
    -  y ~ multi_normal_cholesky(mu, L_Sigma);
    -}
    +
    // ...
    +parameters {
    +  matrix[K, J] beta;
    +  cholesky_factor_corr[K] L_Omega;
    +  vector<lower=0>[K] L_sigma;
    +}
    +model {
    +  array[N] vector[K] mu;
    +  matrix[K, K] L_Sigma;
    +
    +  for (n in 1:N) {
    +    mu[n] = beta * x[n];
    +
    +  }
    +
    +  L_Sigma = diag_pre_multiply(L_sigma, L_Omega);
    +
    +  to_vector(beta) ~ normal(0, 5);
    +  L_Omega ~ lkj_corr_cholesky(4);
    +  L_sigma ~ cauchy(0, 2.5);
    +
    +  y ~ multi_normal_cholesky(mu, L_Sigma);
    +}

    The Cholesky factor of the covariance matrix is then reconstructed as a local variable and used in the model by scaling the Cholesky factor of the correlation matrices. The regression coefficients get a prior all at once by converting the matrix beta to a vector.

    If required, the full correlation or covariance matrices may be reconstructed from their Cholesky factors in the generated quantities block.

    @@ -1513,92 +1504,92 @@

    Unlike in the seemingly unrelated regressions case, here the covariance matrix \(\Sigma\) has unit standard deviations (i.e., it is a correlation matrix). As with ordinary probit and logistic regressions, letting the scale vary causes the model (which is defined only by a cutpoint at 0, not a scale) to be unidentified (see Greene (2011)).

    Multivariate probit regression can be coded in Stan using the trick introduced by Albert and Chib (1993), where the underlying continuous value vectors \(y_n\) are coded as truncated parameters. The key to coding the model in Stan is declaring the latent vector \(z\) in two parts, based on whether the corresponding value of \(y\) is 0 or 1. Otherwise, the model is identical to the seemingly unrelated regression model in the previous section.

    First, we introduce a sum function for two-dimensional arrays of integers; this is going to help us calculate how many total 1 values there are in \(y\).

    -
    functions {
    -  int sum2d(array[,] int a) {
    -    int s = 0;
    -    for (i in 1:size(a)) {
    -      s += sum(a[i]);
    -    }
    -    return s;
    -  }
    -}
    +
    functions {
    +  int sum2d(array[,] int a) {
    +    int s = 0;
    +    for (i in 1:size(a)) {
    +      s += sum(a[i]);
    +    }
    +    return s;
    +  }
    +}

    The function is trivial, but it’s not a built-in for Stan and it’s easier to understand the rest of the model if it’s pulled into its own function so as not to create a distraction.

    The data declaration block is much like for the seemingly unrelated regressions, but the observations y are now integers constrained to be 0 or 1.

    -
    data {
    -  int<lower=1> K;
    -  int<lower=1> D;
    -  int<lower=0> N;
    -  array[N, D] int<lower=0, upper=1> y;
    -  array[N] vector[K] x;
    -}
    +
    data {
    +  int<lower=1> K;
    +  int<lower=1> D;
    +  int<lower=0> N;
    +  array[N, D] int<lower=0, upper=1> y;
    +  array[N] vector[K] x;
    +}

    After declaring the data, there is a rather involved transformed data block whose sole purpose is to sort the data array y into positive and negative components, keeping track of indexes so that z can be easily reassembled in the transformed parameters block.

    -
    transformed data {
    -  int<lower=0> N_pos;
    -  array[sum2d(y)] int<lower=1, upper=N> n_pos;
    -  array[size(n_pos)] int<lower=1, upper=D> d_pos;
    -  int<lower=0> N_neg;
    -  array[(N * D) - size(n_pos)] int<lower=1, upper=N> n_neg;
    -  array[size(n_neg)] int<lower=1, upper=D> d_neg;
    -
    -  N_pos = size(n_pos);
    -  N_neg = size(n_neg);
    -  {
    -    int i;
    -    int j;
    -    i = 1;
    -    j = 1;
    -    for (n in 1:N) {
    -      for (d in 1:D) {
    -        if (y[n, d] == 1) {
    -          n_pos[i] = n;
    -          d_pos[i] = d;
    -          i += 1;
    -        } else {
    -          n_neg[j] = n;
    -          d_neg[j] = d;
    -          j += 1;
    -        }
    -      }
    -    }
    -  }
    -}
    +
    transformed data {
    +  int<lower=0> N_pos;
    +  array[sum2d(y)] int<lower=1, upper=N> n_pos;
    +  array[size(n_pos)] int<lower=1, upper=D> d_pos;
    +  int<lower=0> N_neg;
    +  array[(N * D) - size(n_pos)] int<lower=1, upper=N> n_neg;
    +  array[size(n_neg)] int<lower=1, upper=D> d_neg;
    +
    +  N_pos = size(n_pos);
    +  N_neg = size(n_neg);
    +  {
    +    int i;
    +    int j;
    +    i = 1;
    +    j = 1;
    +    for (n in 1:N) {
    +      for (d in 1:D) {
    +        if (y[n, d] == 1) {
    +          n_pos[i] = n;
    +          d_pos[i] = d;
    +          i += 1;
    +        } else {
    +          n_neg[j] = n;
    +          d_neg[j] = d;
    +          j += 1;
    +        }
    +      }
    +    }
    +  }
    +}

    The variables N_pos and N_neg are set to the number of true (1) and number of false (0) observations in y. The loop then fills in the sequence of indexes for the positive and negative values in four arrays.

    The parameters are declared as follows.

    -
    parameters {
    -  matrix[D, K] beta;
    -  cholesky_factor_corr[D] L_Omega;
    -  vector<lower=0>[N_pos] z_pos;
    -  vector<upper=0>[N_neg] z_neg;
    -}
    +
    parameters {
    +  matrix[D, K] beta;
    +  cholesky_factor_corr[D] L_Omega;
    +  vector<lower=0>[N_pos] z_pos;
    +  vector<upper=0>[N_neg] z_neg;
    +}

    These include the regression coefficients beta and the Cholesky factor of the correlation matrix, L_Omega. This time there is no scaling because the covariance matrix has unit scale (i.e., it is a correlation matrix; see above).

    The critical part of the parameter declaration is that the latent real value \(z\) is broken into positive-constrained and negative-constrained components, whose size was conveniently calculated in the transformed data block. The transformed data block’s real work was to allow the transformed parameter block to reconstruct \(z\).

    -
    transformed parameters {
    -  array[N] vector[D] z;
    -  for (n in 1:N_pos) {
    -    z[n_pos[n], d_pos[n]] = z_pos[n];
    -  }
    -  for (n in 1:N_neg) {
    -    z[n_neg[n], d_neg[n]] = z_neg[n];
    -  }
    -}
    +
    transformed parameters {
    +  array[N] vector[D] z;
    +  for (n in 1:N_pos) {
    +    z[n_pos[n], d_pos[n]] = z_pos[n];
    +  }
    +  for (n in 1:N_neg) {
    +    z[n_neg[n], d_neg[n]] = z_neg[n];
    +  }
    +}

    At this point, the model is simple, pretty much recreating the seemingly unrelated regression.

    -
    model {
    -  L_Omega ~ lkj_corr_cholesky(4);
    -  to_vector(beta) ~ normal(0, 5);
    -  {
    -    array[N] vector[D] beta_x;
    -    for (n in 1:N) {
    -      beta_x[n] = beta * x[n];
    -    }
    -    z ~ multi_normal_cholesky(beta_x, L_Omega);
    -  }
    -}
    +
    model {
    +  L_Omega ~ lkj_corr_cholesky(4);
    +  to_vector(beta) ~ normal(0, 5);
    +  {
    +    array[N] vector[D] beta_x;
    +    for (n in 1:N) {
    +      beta_x[n] = beta * x[n];
    +    }
    +    z ~ multi_normal_cholesky(beta_x, L_Omega);
    +  }
    +}

    This simple form of model is made possible by the Albert and Chib-style constraints on z.

    Finally, the correlation matrix itself can be put back together in the generated quantities block if desired.

    -
    generated quantities {
    -  corr_matrix[D] Omega;
    -  Omega = multiply_lower_tri_self_transpose(L_Omega);
    -}
    +
    generated quantities {
    +  corr_matrix[D] Omega;
    +  Omega = multiply_lower_tri_self_transpose(L_Omega);
    +}

    The same could be done for the seemingly unrelated regressions in the previous section.

    @@ -1622,39 +1613,39 @@

    Prediction

    \,\textrm{d}(\alpha,\beta,\sigma). \]

    To code the posterior predictive inference in Stan, a standard linear regression is combined with a random number in the generated quantities block.

    -
    data {
    -  int<lower=0> N;
    -  vector[N] y;
    -  vector[N] x;
    -  int<lower=0> N_tilde;
    -  vector[N_tilde] x_tilde;
    -}
    -parameters {
    -  real alpha;
    -  real beta;
    -  real<lower=0> sigma;
    -}
    -model {
    -  y ~ normal(alpha + beta * x, sigma);
    -}
    -generated quantities {
    -  vector[N_tilde] y_tilde;
    -  for (n in 1:N_tilde) {
    -    y_tilde[n] = normal_rng(alpha + beta * x_tilde[n], sigma);
    -  }
    -}
    +
    data {
    +  int<lower=0> N;
    +  vector[N] y;
    +  vector[N] x;
    +  int<lower=0> N_tilde;
    +  vector[N_tilde] x_tilde;
    +}
    +parameters {
    +  real alpha;
    +  real beta;
    +  real<lower=0> sigma;
    +}
    +model {
    +  y ~ normal(alpha + beta * x, sigma);
    +}
    +generated quantities {
    +  vector[N_tilde] y_tilde;
    +  for (n in 1:N_tilde) {
    +    y_tilde[n] = normal_rng(alpha + beta * x_tilde[n], sigma);
    +  }
    +}

    Given observed predictors \(x\) and outcomes \(y\), y_tilde will be drawn according to \(p\left(\tilde{y} \mid \tilde{x}, y, x\right)\). This means that, for example, the posterior mean for y_tilde is the estimate of the outcome that minimizes expected square error (conditioned on the data and model).

    Posterior predictive checks

    A good way to investigate the fit of a model to the data, a critical step in Bayesian data analysis, is to generate simulated data according to the parameters of the model. This is carried out with exactly the same procedure as before, only the observed data predictors \(x\) are used in place of new predictors \(\tilde{x}\) for unobserved outcomes. If the model fits the data well, the predictions for \(\tilde{y}\) based on \(x\) should match the observed data \(y\).

    To code posterior predictive checks in Stan requires only a slight modification of the prediction code to use \(x\) and \(N\) in place of \(\tilde{x}\) and \(\tilde{N}\),

    -
    generated quantities {
    -  vector[N] y_tilde;
    -  for (n in 1:N) {
    -    y_tilde[n] = normal_rng(alpha + beta * x[n], sigma);
    -  }
    -}
    +
    generated quantities {
    +  vector[N] y_tilde;
    +  for (n in 1:N) {
    +    y_tilde[n] = normal_rng(alpha + beta * x[n], sigma);
    +  }
    +}

    Gelman et al. (2013) recommend choosing several posterior draws \(\tilde{y}^{(1)}, \dotsc, \tilde{y}^{(M)}\) and plotting each of them alongside the data \(y\) that was actually observed. If the model fits well, the simulated \(\tilde{y}\) will look like the actual data \(y\).

    @@ -2280,8 +2271,35 @@

    Pos + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/reparameterization.html b/docs/stan-users-guide/reparameterization.html index ad073e45d..f88283870 100644 --- a/docs/stan-users-guide/reparameterization.html +++ b/docs/stan-users-guide/reparameterization.html @@ -309,7 +309,7 @@ + @@ -519,7 +525,7 @@ + @@ -1583,8 +1595,35 @@

    + - \ No newline at end of file + \ No newline at end of file diff --git a/docs/stan-users-guide/simulation-based-calibration.html b/docs/stan-users-guide/simulation-based-calibration.html index 2f0477d4d..b3a86f5bf 100644 --- a/docs/stan-users-guide/simulation-based-calibration.html +++ b/docs/stan-users-guide/simulation-based-calibration.html @@ -7,7 +7,7 @@ -Simulation-Based Calibration +Simulation-Based Calibration Checking + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + + + + +
    +

    Wiener diffusion model

    +

    Diffusion models, sometimes also called Wiener diffusion models, are among the most frequently used model families in modeling two-alternative forced-choice tasks (see Wagenmakers (2009), for a review). Diffusion models allow to model response times and responses jointly. The basic version of a diffusion model comprises four parameters: the boundary separation, \(a\), the relative starting point, \(w\), the drift rate, \(v\), and the non-decision time, \(t0\) (Ratcliff 1978). In the basic model, it is assumed that the four basic parameters are the same for the whole experiment. As this assumption is very strict and there are examples that suggest that the basic parameters can be different from trial to trial, so called inter-trial variabilities were introduced and the basic four- parameter model was extended to a seven-parameter model. In the seven-parameter extension of the diffusion model there are the following three parameters added: the inter-trial variability in relative starting point, \(s_w\), the inter-trial variability in drift rate, \(s_v\), and the inter-trial variability in non-decision time, \(s_{t0}\) Nicenboim, Schad, and Vasishth (2025).

    +

    Data for the diffusion model is two-dimensional: There is one vector for the reaction times, \(y\), and one vector for the given responses, \(\text{resp}\). The reaction times shall be positive, continuous and in seconds, the responses shall be binary.

    +

    As a diffusion model describes the decision process for a decision with exactly two choices, there exist reaction time distributions for each response alternative. This means that the probability density function (\(p\)) splits into one part for one response alternative and one part for the other response alternative. In the following, we will refer to one alternative as the upper response boundary and to the other alternative as the lower response boundary. \(p\) of the lower response boundary can be obtained when inserting \(-v\) and \(1-w\) to \(p\) of the upper response boundary. Let’s call \(p\) for the lower response boundary \(p_0\) and \(p\) for the upper response boundary \(p_1\). Then:

    +

    \[ +p_0(a,t0,v,w,sv,sw,st0) = p_1(a,t0,-v,1-w,sv,sw,st0) +\]

    +

    Usually, a \(PDF\) integrates to 1. In the case of the diffusion model, only the sum of both parts, \(p_0\) and \(p_1\), integrates to 1. This is called defective.

    +
    +
    +
    + +
    +
    +Figure 1: Figure 1: Realization of a Four-Parameter Diffusion Process Modeling the Binary Decision Process. Image from Henrich et al. (2024), distributed under the Creative Commons Attribution 4.0 International License. Note. The parameters are the boundary separation a for two response alternatives, the relative starting point w, the drift rate v, and the non-decision time t0. The decision process is illustrated as a jagged line between the two boundaries. The predicted distributions of the reaction times are depicted as curved lines below and above the response boundaries (blue). +
    +
    +
    +

    In this model it is assumed that the decision process behaves like a random walk and we are interested in the first time that the random walk crosses one of the two decision boundaries. Hence, we are interested in the first-passage time of the decision process. The Stan function wiener_lpdf() returns the logarithm of the first-passage time density function for a diffusion model with up to seven parameters for upper boundary responses, \(\log(p_1)\). As can be seen above, it suffices to implement the density for only one response boundary, as the other can be obtained by mirroring the starting point and drift rate. Any combination of fixed and estimated parameters can be specified. In other words, with this implementation it is not only possible to estimate parameters of the fullseven- parameter model, but also to estimate restricted models such as the basic four- parameter model, or a five or six-parameter model, or even a one-parameter model when fixing the other six parameters.

    +

    For example, it is possible to permit variability in just one or two parameters and to fix the other variabilities to 0, or even to estimate a three-parameter model when fixing more parameters (e.g., fixing the relative starting point at 0.5).

    +

    It is assumed that the reaction time data that correspond to the upper response boundary \(y_\text{upper}\) is distributed according to wiener_lpdf():

    +

    \[ +y_\text{lower} \sim \operatorname{wiener\_lpdf}(a, t0, w, v, s_v, s_w, s_{t0}) +\] and the reaction time data that correspond to the lower response boundary \(y_\text{lower}\) is distributed according to wiener_lpdf() with mirrored starting point and drift rate:

    +

    \[ +y_\text{upper} \sim \operatorname{wiener\_lpdf}(a, t0, 1-w, -v, s_v, s_w, s_{t0}) +\]

    +
    +

    Function call example

    +

    The following example demonstrates a diffusion model call in Stan:

    +
    data {
    +  int <lower=0> N; // Number of trials
    +  array[N] real rt; // response times (in seconds )
    +  array[N] int <lower=0, upper=1> resp; // responses {0 ,1}
    +}
    +transformed data{
    +  real min_rt = min(rt);
    +}
    +parameters {
    +  real <lower=0> a;                // boundary separation
    +  real v;                          // drift
    +  real <lower=0, upper=1> w;       // relative starting point
    +  real <lower=0, upper=min_rt> t0; // non-decision time
    +
    +  real <lower=0> sv;               // variability in drift
    +  // variability in starting point
    +  real <lower=0, upper=fmin(2 * w, 2 * (1 - w))> sw; 
    +  real <lower=0> st0;             // variability in non-decision time
    +}
    +transformed parameters{
    +  real one_minus_w = 1 - w;
    +  real neg_v = -v;
    +}
    +model {
    +  // prior
    +  a ~ normal(1, 1);
    +  w ~ normal(0.5, 0.1);
    +  v ~ normal(2, 3);
    +  t0 ~ normal(0.435, 0.12);
    +
    +  sv ~ normal(1, 3);
    +  st0 ~ normal(0.183, 0.09);
    +  sw ~ beta(1, 3);
    +
    +  // likelihood (diffusion model)
    +  for (i in 1:N) {
    +    if (resp[i] == 1) {
    +      // upper boundary
    +      target += wiener_full_lpdf(rt[i] | a, t0, w, v,
    +                                         sv, sw, st0);
    +    } else {
    +      // lower boundary: mirror drift and starting point
    +      target += wiener_full_lpdf(rt[i] | a, t0, one_minus_w,
    +                                         neg_v, sv, sw, st0);
    +    }
    +  }
    +}
    +
    +

    The data block

    +

    The data should consist of at least three variables:

    +
      +
    1. The number of trials N,
    2. +
    3. the response, coded as 0 = “lower bound” and 1 = “upper bound”, and
    4. +
    5. the reaction times in seconds (not milliseconds).
    6. +
    +

    Note that two different ways of coding responses are commonly used: First, in response coding, the boundaries correspond to the two response alternatives. Second, in accuracy coding, the boundaries correspond to correct (upper bound) and wrong (lower bound) responses. This means, depending on the coding you choose, the bounds mentioned in the second variable above differ and the response variable will have a different form.

    +

    Most often, an experimenter wants to find out whether an experimental manipulation influences the model parameters. As there exists psychological interpretations for each diffusion model parameter, the experimenter can draw conclusions from differing parameters. Therefore, usually an own diffusion model is being computed for each experimental group to enable a comparison of the parameters between the groups. This can be manipulation between different subjects, like an experimental group and a control group (so called between-subject manipulations). However, this can also be manipulations within the same subject by presenting stimuli from different experimental groups (so called within-subject manipulations). Depending on the experimental design, one would typically also provide the number of conditions and the condition associated with each trial as a vector. Then, one model for each condition will be computed. This means that the parameters also have to be defined for each condition.

    +

    In a hierarchical setting, the data block would also specify the number of participants and the participant associated with each trial as a vector. It is also possible to hand over a precision value in the data block.

    +
    +
    +

    The parameters block

    +

    The model arguments of the wiener_lpdf() function that are not fixed to a certain value are defned as parameters in the parameters block. In this block, it is also possible to insert restrictions on the parameters. Note that the MCMC algorithm iteratively searches for the next parameter set. If the suggested sample falls outside the internally defined parameter ranges, the program will throw an error, which causes the algorithm to restart the current iteration. Since this slows down the sampling process, it is advisable to include the parameter ranges in the defnition of the parameters in the parameters block to improve the sampling process (see table below for the parameter ranges). In addition, the parameter space is further constrained by the following conditions:

    +
      +
    1. The non-decision time \(t_0\) has to be smaller or equal to the observed reaction time: \(t0 \leq y\).
    2. +
    3. The varying relative starting point \(w\) has to be in the interval (0,1) and thus,
    4. +
    +

    \[ +\begin{aligned} +&w + \frac{s_w}{2} < 1 \text{, and} \\ +&0 < w-\frac{s_w}{2} +\end{aligned} +\]

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterRangeParameterRange
    \(a\)(0, \(\infty\))\(y\)(0, \(\infty\))
    \(v\)(-\(\infty\), \(\infty\))\(s_v\)[0, \(\infty\))
    \(w\)(0,1)\(s_w\)[0,min(2w, 2(1-w)))
    \(t_0\)[0,\(\infty\))\(s_{t0}\)[0,\(\infty\))
    +
    +
    +

    The model block

    +

    In the model block, the priors and likelihood are defined for the upper and the lower response boundary. Different kinds of priors can be specifed here. Generally, the regularization induced by mildly informative priors can help both statistically and computationally.

    +

    In the second part of the model block, the data generating distribution is applied to all responses. The drift rate \(v\) and relative starting point \(w\) have to be mirrored for responses at the lower boundary.

    +

    For more details regarding the application of the diffusion model in Stan, see Henrich et al. (2024).

    +
    +

    +
    +

    Truncated and censored data

    +

    Truncation and censoring frequently occur in psychological data collection. For reaction time data, truncated and censored data regularly arise in psychological studies as a consequence of using response windows or deadlines. These are sometimes introduced in the analysis of data to exclude reaction times that appear too short or too long, but they are also sometimes already built into the study procedures to push participants to respond within a specifc temporal window.

    +

    Depending on the implementation of the response window, two different types of data arise: truncated data or censored data. Since the effects of truncation or censoring on summary statistics such as mean, median, standard deviation, and skewness is regularly too large to ignore (Ulrich and Miller 1994), data analysts are well advised to account for these effects.

    +

    As described in the Truncated or Censored Data chapter, the cumulative distribution function (\(F\)) and its complement (\(\text{CCDF}\)) are needed to model truncated and censored data.

    +

    As explained above, \(p\) is defined defectively, meaning that only the sum of \(p\)s for both response alternatives integrates to 1. For the same reason, \(F\) and \(\text{CCDF}\) are also implemented defectively. Analogously, only the sum of the \(F\)s and \(\text{CCDF}\)s for both response alternatives asymptotes above at 1.

    +

    In the case of the diffusion model, \(F\) asymptotes above at the probability \(PROB\) to hit the corresponding response boundary: (for simplicity, we omit the inter-trial variabilities in the following)

    +

    \[ +\begin{aligned} +F_1(\infty\mid a,w,v) &= \text{PROB}(a,w,v) \text{ and} \\ +F_0(\infty\mid a,w,v) &= F_1(\infty\mid a,1-w,-v) = \text{PROB}(a,1-w,-v) +\end{aligned} +\]

    +
    +

    Modeling truncated data with the diffusion model

    +

    Data are called truncated when there is no information available for analysis from trials with values larger (or smaller) than a right (or left) reaction-time bound. In reaction time experiments, reaction time data are truncated if trials with reaction times outside the response window are excluded from the analysis. Not even a count of those omitted trials is kept.

    +

    Let \(L\) denote the left reaction-time bound and \(U\) denote the right reaction-time bound of a response window.

    +

    Then, the density of truncated data for both response boundaries 0 and 1, here denoted as \(\text{resp}\in\{0,1\}\), can be formulated as follows:

    +

    \[ +\begin{aligned} +&p_{\text{resp}}(y \mid L<X\leq U, a, w, v) = \\ &\frac{p_{\text{resp}}(y \mid a, w, v)\cdot \mathbb{I}_{\{L<y\leq U\}}} +{\bigl(F_0(U \mid a, w, v)+F_1(U \mid a, w, v)\bigr) - +\bigl(F_0(L\mid a, w, v)+F_1(L\mid a, w, v)\bigr)} +\end{aligned} +\]

    +

    The density of left truncated data can be formulated as follows. \[ +\begin{aligned} +p_{\text{resp}}(y \mid L<X, a, w, v) = \frac{p_{\text{resp}}(y \mid a, w, v)\cdot \mathbb{I}_{\{L<y\}}} +{1-\bigl(F_0(L \mid a, w, v)+F_1(L \mid a, w, v)\bigr)}, +\end{aligned} +\]

    +

    The density of right truncated data can be formulated as follows.

    +

    \[ +\begin{aligned} +p_{\text{resp}}(y \mid X\leq U, a, w, v) = \frac{p_{\text{resp}}(y \mid a, w, v)\cdot \mathbb{I}_{\{y\leq U\}}}{F_0(U \mid a, w, v)+F_1(U \mid a, w, v)} +\end{aligned} +\]

    +

    As the functions are implemented defectively, a truncated diffusion model cannot be calculated with the truncation functor \(T[,]\) as it would usually be done in Stan. This means the function call: y ~ wiener(...)T[L,U] does not work the way it is supposed to. When the truncation functor is called in Stan, Stan searches for a CDF implementation internally. In the case of the diffusion model, Stan would find the CDF, but is not aware of its defective implementation and calculates the computations as if it were a non-defective CDF. This causes misleading and incorrect results.

    +

    To implement the truncated model, write out the function shown above on the log-scale with left_bound = L and right_bound = U, where wiener_lcdf_unnorm() calls the logarithmized CDF of the diffusion model at the response-1-boundary:

    +
    model {
    +  real log_denom = log_diff_exp(
    +    log_sum_exp(
    +      wiener_lcdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),
    +      wiener_lcdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,
    +                                       sv, sw, st)),
    +    log_sum_exp(
    +      wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st),
    +      wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,
    +                                      sv, sw, st)));
    +  // likelihood
    +  for (i in 1:N) {
    +    if (resp[i] == 1) {
    +      // response -1 boundary
    +      target += wiener_lpdf (rt[i] | a, t0, w, v, sv, sw, st);
    +    } else { 
    +      // response -0 boundary ( mirror v and w)
    +      target += wiener_lpdf (rt[i] | a, t0, one_minus_w, neg_v,
    +                                     sv, sw, st);
    +    }
    +  } // end for
    +  target += -N * log_denom;
    +}
    +

    For details of how to call a truncated model within the parallelization routine of reduce_sum or with truncation to only on side, see Henrich and Klauer (2026).

    +
    +
    +

    Modeling censored data with the diffusion model

    +

    Data are censored when observations that are above or below a right or left boundary value are reported as occurrences of the event \((y > +U)\), for \(U\) the right bound, or as occurrences of the event \((y \leq +L)\), for \(L\) the left bound, respectively. Like for truncated data, the range of the possible values is restricted, but the number of observations that fall outside the boundaries is kept, whereas in truncation, no count would be kept.

    +

    For the censored model, we distinguish two cases. In the first case, the responses of the censored trials are known, but the reaction times are not known. In the second case, neither the responses nor the reaction times of the censored trials are known. Note that the second case differs from a truncated model in the fact that the number of censored trials is still known. Consider first the case where the response is known even for censored data.

    +

    To model such data in Stan, the left and right reaction time bounds, left_bound and right_bound, respectively, are handed over in the data block, as well as a vector censored that tracks whether a trial is censored (= 1) or not (= 0), and counts of trials censored at the left reaction time bound and counts of trials censored at the right reaction time bound for each response in {0,1}. There are four such count variables: N_cens_left_0, N_cens_left_1, N_cens_right_0, N_cens_right_1:

    +
    model {
    +  for (i in 1:N) {
    +    if (censored[i] == 0) {
    +      if (resp[i] == 1) {
    +        y[i] ~ wiener(a, t0, w, v, sv, sw, st0);
    +      } else if (resp[i] == 0) {
    +        y[i] ~ wiener(a, t0, one_minus_w, neg_v, sv, sw, st0);
    +      }
    +    }
    +  }
    +
    +  // likelihood (response = 0)
    +  target += N_cens_left_0 
    +         * wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w, neg_v,
    +                                           sv, sw, st0);
    +
    +  target += N_cens_right_0 
    +         * wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w, neg_v,
    +                                             sv, sw, st0);
    +
    +  // likelihood (response = 1)
    +  target += N_cens_left_1 
    +         * wiener_lcdf_unnorm(left_bound | a, t0, w, v, sv, sw, st0);
    +
    +  target += N_cens_right_1 
    +         * wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st0);
    +}
    +

    When data are censored at only one side, meaning that the reaction time constraint only exists for one of the two boundaries, omit the lines for the other side in the code. A both sided reaction time window would be, for example, when only reaction times are accepted that occur between 0.2 and 0.8 seconds. A one sided reaction time constraint would be, for example, when all reaction times below 0.8 seconds are accepted.

    +

    When data consist of many conditions (as explained in the beginning), it is sometimes more convenient to loop over all trials instead of using count variables as described above, using the following notation and code. A vector containing the information whether a trial is censored or not, here censored, needs to be handed over in the data block. This vector splits the data into three bins: all trials \(i\) withcensored[i]=0 are censored below the left reaction time bound, all trials \(i\) with censored[i]=1 fall between the reaction time bounds, and all trials \(i\) with censored[i]=2 are censored above the right reaction time bound. For non-censored trials, the log-PDF is computed, for left censored trials, the log-CDF is computed, and for right censored trials, the log-CCDF is computed:

    +
    model { 
    +  for (i in 1:N) { 
    +    // right censored at right_bound
    +    if (resp [i] == 1) { 
    +      // upper response boundary
    +      if (censored[i] == 0) {
    +        target += wiener_lcdf_unnorm(left_bound | a, t0, w, v, 
    +                                                  sv, sw, st0);
    +      } else if (censored[i] == 1) {
    +        target += wiener_lpdf(y[i] | a, t0, w, v, sv, sw, st0);
    +      } else if (censored[i] == 2) {
    +         target += wiener_lccdf_unnorm(right_bound | a, t0, w, v,
    +                                                     sv, sw, st0);
    +      }
    +    } else { 
    +      // lower response boundary (mirror drift and // starting point!)
    +      if (censored[i] == 0) {
    +        target += wiener_lcdf_unnorm(left_bound | a, t0, one_minus_w,
    +                                                  neg_v, sv, sw, st0);
    +      } else if (censored[i] == 1) {
    +        target += wiener_lpdf(y[i] | a, t0, one_minus_w, neg_v,
    +                                     sv, sw, st0);
    +      } else if (censored[i] == 2) {
    +        target += wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,
    +                                                    neg_v, sv, sw, st0);
    +      }
    +    }
    +  }
    +}
    +

    When the data are censored on only one side, omit the case that is not needed.

    +

    Note that this block can be inserted in the defnition of the parallelization function, partial_sum_wiener(), as defined below.

    +

    Sometimes also the response is missing (i.e., it is known that the reaction time in a trial fell outside the response window, but which response was given is unknown). One method that has been used to model such data has involved inferring the numbers of missing responses of either kind from the observed relative frequencies of the two responses. This approach has the problem that quite specifc assumptions on the missing data have to be made (namely, that the proportions of the two kinds of responses are the same for responses within and outside the response window).

    +

    The following is a more principled approach that uses the cumulative distribution functions and their complements to provide the data-generating distribution of censored data. As before, let \(L\) be the left reaction time bound, and \(U\) the right reaction time bound, and consider decision times without inter-trial variabilities for the sake of simplicity. It follows that the likelihood contribution \(\textit{lik}_l\) for a left-censored data point is given by

    +

    \[ +\begin{aligned} +\textit{lik}_l(a,w,v) = F_0(L\mid a,w,v) + F_1(L\mid a,w,v), +\end{aligned} +\]

    +

    whereas the likelihood contribution \(lik_r\) due to a right-censored data point is given by

    +

    \[ +\begin{aligned} +\textit{lik}_r(a,w,v) = \text{CCDF}_0(U\mid a,w,v) + \text{CCDF}_1(U\mid a,w,v). +\end{aligned} +\]

    +

    See the following code for an example of Stan code implementing this second case of censoring. This model call deals with the problem of unknown responses by computing the probability of choosing the response-1 or response-0 boundary outside the response window. Here, the CDF and/or the CCDF are required, depending upon whether there is only left-censoring, right-censoring, or censoring both to the left and to the right. The following code shows the functions block for a model that is right-censored using the function partial_sum_wiener() to parallelize the execution of a single Stan chain across multiple cores:

    +
    functions {
    +  real partial_sum_wiener(array[] real rt_slice, int start,
    +                          int end, real a, real t0, real w,
    +                          real v, real sv, real sw, real st,
    +                          array[] int resp, real right_bound,
    +                          array[] int censored) {
    +    real ans = 0;
    +    for (i in start:end) {
    +      if (censored[i] == 1) {
    +        // not censored
    +        if (resp[i] == 1) {
    +          // upper boundary
    +          ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, w, v,
    +                                                     sv, sw, st);
    +        } else {
    +          // lower boundary(mirror v and w)
    +          ans += wiener_lpdf(rt_slice[i+1- start ] | a, t0, one_minus_w,
    +                                                     neg_v, sv, sw, st);
    +        }
    +      } else { 
    +        // censored
    +        ans += log_sum_exp (
    +          wiener_lccdf_unnorm(right_bound | a, t0, w, v, sv, sw, st),
    +          wiener_lccdf_unnorm(right_bound | a, t0, one_minus_w,
    +                                            neg_v, sv, sw, st);
    +      }
    +    }
    +    return ans;
    +  }
    +}
    +

    Combine this block with the model block in the example above by using the function reduce_sum().

    +
      target += reduce_sum(partial_sum_wiener, rt, 1,
    +    a, t0, w, v, sv, sw, st, resp, right_bound, censored);
    +}
    +

    For more details, see Henrich and Klauer (2026).

    + + + +
    +
    +
    + + Back to top

    References

    +
    +Henrich, Franziska, Raphael Hartmann, Valentin Pratz, Andreas Voss, and Karl Christoph Klauer. 2024. “The Seven-Parameter Diffusion Model: An Implementation in Stan for Bayesian Analyses.” Behavior Research Methods 56 (4): 3102–16. https://doi.org/10.3758/s13428-023-02179-1. +
    +
    +Henrich, Franziska, and Karl Christoph Klauer. 2026. “Modeling Truncated and Censored Data with the Diffusion Model in Stan.” Behavior Research Methods 58 (42). https://doi.org/10.3758/s13428-025-02822-z. +
    +
    +Nicenboim, Bruno, Daniel J Schad, and Shravan Vasishth. 2025. Introduction to Bayesian Data Analysis for Cognitive Science. CRC Press. +
    +
    +Ratcliff, Roger. 1978. “A Theory of Memory Retrieval.” Psychological Review 85 (2): 59–108. +
    +
    +Ratcliff, Roger, and Jeffrey N. Rouder. 1998. “Modelling Response Times for Two-Choice Decisions.” Psychological Science 9 (5): 347–56. +
    +
    +Ulrich, Rolf, and Jeff Miller. 1994. “Effects of Truncation on Reaction Time Analysis.” Journal of Experimental Psychology: General 123 (1): 34–80. +
    +
    +Wagenmakers, Eric-Jan. 2009. “Methodological and Empirical Developments for the Ratcliff Diffusion Model of Response Times and Accuracy.” European Journal of Cognitive Psychology 21 (5): 641–71. https://doi.org/10.1080/09541440802205067. +
    +

    + + + + + + + + + \ No newline at end of file diff --git a/docs/stan-users-guide/wiener_diffusion_model.pdf b/docs/stan-users-guide/wiener_diffusion_model.pdf new file mode 100644 index 000000000..fdec7b5f6 Binary files /dev/null and b/docs/stan-users-guide/wiener_diffusion_model.pdf differ diff --git a/src/functions-reference/functions_index.qmd b/src/functions-reference/functions_index.qmd index 6862dc8e5..cc9e2b6b5 100644 --- a/src/functions-reference/functions_index.qmd +++ b/src/functions-reference/functions_index.qmd @@ -4164,6 +4164,11 @@ pagetitle: Alphabetical Index -
    [`(matrix A) : real`](matrix_operations.qmd#index-entry-932fc0bd0a556886a853004f8ad6bc3432760b62) (matrix_operations.html)
    +**trace_dot**: + + -
    [`(matrix A, matrix B) : real`](matrix_operations.qmd#index-entry-b067f1dc33e4e60522d490bc635d4305d0e9e7fb) (matrix_operations.html)
    + + **trace_gen_quad_form**: -
    [`(matrix D ,matrix A, matrix B) : real`](matrix_operations.qmd#index-entry-725c7c07b916a247f645ce441212b4a82778b096) (matrix_operations.html)
    diff --git a/src/index.qmd b/src/index.qmd index 94832b434..50aa0f62e 100644 --- a/src/index.qmd +++ b/src/index.qmd @@ -28,6 +28,7 @@ can be found in the table below: | Version | Stan Reference Manual | Stan Users Guide | CmdStan Guide | Stan Functions Reference | |---------|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------| +| 2.39 | [html](https://mc-stan.org/docs/2_39/reference-manual/) [pdf](https://mc-stan.org/docs/2_39/reference-manual-2_39.pdf) | [html](https://mc-stan.org/docs/2_39/stan-users-guide/) [pdf](https://mc-stan.org/docs/2_39/stan-users-guide-2_39.pdf) | [html](https://mc-stan.org/docs/2_39/cmdstan-guide/) [pdf](https://mc-stan.org/docs/2_39/cmdstan-guide-2_39.pdf) | [html](https://mc-stan.org/docs/2_39/functions-reference/) [pdf](https://mc-stan.org/docs/2_39/functions-reference-2_39.pdf) | | 2.38 | [html](https://mc-stan.org/docs/2_38/reference-manual/) [pdf](https://mc-stan.org/docs/2_38/reference-manual-2_38.pdf) | [html](https://mc-stan.org/docs/2_38/stan-users-guide/) [pdf](https://mc-stan.org/docs/2_38/stan-users-guide-2_38.pdf) | [html](https://mc-stan.org/docs/2_38/cmdstan-guide/) [pdf](https://mc-stan.org/docs/2_38/cmdstan-guide-2_38.pdf) | [html](https://mc-stan.org/docs/2_38/functions-reference/) [pdf](https://mc-stan.org/docs/2_38/functions-reference-2_38.pdf) | | 2.37 | [html](https://mc-stan.org/docs/2_37/reference-manual/) [pdf](https://mc-stan.org/docs/2_37/reference-manual-2_37.pdf) | [html](https://mc-stan.org/docs/2_37/stan-users-guide/) [pdf](https://mc-stan.org/docs/2_37/stan-users-guide-2_37.pdf) | [html](https://mc-stan.org/docs/2_37/cmdstan-guide/) [pdf](https://mc-stan.org/docs/2_37/cmdstan-guide-2_37.pdf) | [html](https://mc-stan.org/docs/2_37/functions-reference/) [pdf](https://mc-stan.org/docs/2_37/functions-reference-2_37.pdf) | | 2.36 | [html](https://mc-stan.org/docs/2_36/reference-manual/) [pdf](https://mc-stan.org/docs/2_36/reference-manual-2_36.pdf) | [html](https://mc-stan.org/docs/2_36/stan-users-guide/) [pdf](https://mc-stan.org/docs/2_36/stan-users-guide-2_36.pdf) | [html](https://mc-stan.org/docs/2_36/cmdstan-guide/) [pdf](https://mc-stan.org/docs/2_36/cmdstan-guide-2_36.pdf) | [html](https://mc-stan.org/docs/2_36/functions-reference/) [pdf](https://mc-stan.org/docs/2_36/functions-reference-2_36.pdf) |