diff --git a/e2e-tests/src/lib.rs b/e2e-tests/src/lib.rs index 083c08e7..6da9303e 100644 --- a/e2e-tests/src/lib.rs +++ b/e2e-tests/src/lib.rs @@ -97,10 +97,24 @@ pub struct LdkServerHandle { client: LdkServerClient, } +pub struct LdkServerConfig { + pub metrics_auth: Option<(String, String)>, +} + +impl Default for LdkServerConfig { + fn default() -> Self { + Self { metrics_auth: None } + } +} + impl LdkServerHandle { /// Starts a new ldk-server instance against the given bitcoind. /// Waits until the server is ready to accept requests. pub async fn start(bitcoind: &TestBitcoind) -> Self { + Self::start_with_config(bitcoind, LdkServerConfig::default()).await + } + + pub async fn start_with_config(bitcoind: &TestBitcoind, config: LdkServerConfig) -> Self { #[allow(deprecated)] let storage_dir = tempfile::tempdir().unwrap().into_path(); let rest_port = find_available_port(); @@ -111,6 +125,12 @@ impl LdkServerHandle { let exchange_name = format!("e2e_test_exchange_{rest_port}"); + let metrics_auth_config = if let Some((user, pass)) = config.metrics_auth { + format!("username = \"{}\"\npassword = \"{}\"", user, pass) + } else { + String::new() + }; + let config_content = format!( r#"[node] network = "regtest" @@ -140,7 +160,13 @@ max_client_to_self_delay = 1024 min_payment_size_msat = 0 max_payment_size_msat = 1000000000 client_trusts_lsp = true + +[metrics] +enabled = true +poll_metrics_interval = 1 +{} "#, + metrics_auth_config, storage_dir = storage_dir.display(), ); diff --git a/e2e-tests/tests/e2e.rs b/e2e-tests/tests/e2e.rs index 77d0adc0..b3ac51f9 100644 --- a/e2e-tests/tests/e2e.rs +++ b/e2e-tests/tests/e2e.rs @@ -12,7 +12,8 @@ use std::time::Duration; use e2e_tests::{ find_available_port, mine_and_sync, run_cli, run_cli_raw, setup_funded_channel, - wait_for_onchain_balance, LdkServerHandle, RabbitMqEventConsumer, TestBitcoind, + wait_for_onchain_balance, LdkServerConfig, LdkServerHandle, RabbitMqEventConsumer, + TestBitcoind, }; use hex_conservative::{DisplayHex, FromHex}; use ldk_node::bitcoin::hashes::{sha256, Hash}; @@ -995,3 +996,132 @@ async fn test_hodl_invoice_fail() { events_a.iter().map(|e| &e.event).collect::>() ); } + +#[tokio::test] +async fn test_metrics_endpoint() { + let bitcoind = TestBitcoind::new(); + + // Test with metrics enabled + let server_a = LdkServerHandle::start(&bitcoind).await; + let server_b = LdkServerHandle::start(&bitcoind).await; + + let client = server_a.client(); + let metrics_result = client.get_metrics().await; + + assert!(metrics_result.is_ok(), "Expected metrics to succeed when enabled"); + let metrics = metrics_result.unwrap(); + + // Verify initial state + assert!(metrics.contains("ldk_server_total_peers_count 0")); + assert!(metrics.contains("ldk_server_total_payments_count 0")); + assert!(metrics.contains("ldk_server_total_successful_payments_count 0")); + assert!(metrics.contains("ldk_server_total_pending_payments_count 0")); + assert!(metrics.contains("ldk_server_total_failed_payments_count 0")); + assert!(metrics.contains("ldk_server_total_channels_count 0")); + assert!(metrics.contains("ldk_server_total_public_channels_count 0")); + assert!(metrics.contains("ldk_server_total_private_channels_count 0")); + assert!(metrics.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(metrics.contains("ldk_server_total_lightning_balance_sats 0")); + + // Set up channel and make a payment to trigger metrics update + setup_funded_channel(&bitcoind, &server_a, &server_b, 100_000).await; + + // Poll for channel, peer and balance metrics. + let timeout = Duration::from_secs(10); + let start = std::time::Instant::now(); + loop { + let metrics = client.get_metrics().await.unwrap(); + if metrics.contains("ldk_server_total_peers_count 1") + && metrics.contains("ldk_server_total_channels_count 1") + && metrics.contains("ldk_server_total_public_channels_count 1") + && metrics.contains("ldk_server_total_payments_count 2") + && !metrics.contains("ldk_server_total_lightning_balance_sats 0") + && !metrics.contains("ldk_server_total_onchain_balance_sats 0") + && !metrics.contains("ldk_server_spendable_onchain_balance_sats 0") + && !metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0") + { + break; + } + + if start.elapsed() > timeout { + let current_metrics = client.get_metrics().await.unwrap(); + panic!( + "Timed out waiting for channel, peer and balance metrics to update. Current metrics:\n{}", + current_metrics + ); + } + tokio::time::sleep(Duration::from_secs(1)).await; + } + + let invoice_resp = server_b + .client() + .bolt11_receive(Bolt11ReceiveRequest { + amount_msat: Some(10_000_000), + description: Some(Bolt11InvoiceDescription { + kind: Some(bolt11_invoice_description::Kind::Direct("metrics test".to_string())), + }), + expiry_secs: 3600, + }) + .await + .unwrap(); + + run_cli(&server_a, &["bolt11-send", &invoice_resp.invoice]); + + // Wait to receive the PaymentSuccessful event and update metrics + let timeout = Duration::from_secs(30); + let start = std::time::Instant::now(); + loop { + let metrics = client.get_metrics().await.unwrap(); + if metrics.contains("ldk_server_total_successful_payments_count 1") + && !metrics.contains("ldk_server_total_lightning_balance_sats 0") + && !metrics.contains("ldk_server_total_onchain_balance_sats 0") + && !metrics.contains("ldk_server_spendable_onchain_balance_sats 0") + && !metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0") + { + break; + } + if start.elapsed() > timeout { + panic!("Timed out waiting for payment metrics to update"); + } + tokio::time::sleep(Duration::from_millis(500)).await; + } +} + +#[tokio::test] +async fn test_metrics_endpoint_with_auth() { + let bitcoind = TestBitcoind::new(); + + let username = "admin"; + let password = "password123"; + + let config = + LdkServerConfig { metrics_auth: Some((username.to_string(), password.to_string())) }; + + let server = LdkServerHandle::start_with_config(&bitcoind, config).await; + let client = server.client(); + + // Should fail because auth is provided in the config + let result = client.get_metrics().await; + assert!(result.is_err(), "Expected failure without credentials"); + + // Request has the correct auth, so it should succeed + let result = client.get_metrics_with_auth(Some(username), Some(password)).await; + + assert!(result.is_ok(), "Expected success with correct credentials"); + let metrics = result.unwrap(); + + assert!(metrics.contains("ldk_server_total_peers_count 0")); + assert!(metrics.contains("ldk_server_total_payments_count 0")); + assert!(metrics.contains("ldk_server_total_successful_payments_count 0")); + assert!(metrics.contains("ldk_server_total_pending_payments_count 0")); + assert!(metrics.contains("ldk_server_total_failed_payments_count 0")); + assert!(metrics.contains("ldk_server_total_channels_count 0")); + assert!(metrics.contains("ldk_server_total_public_channels_count 0")); + assert!(metrics.contains("ldk_server_total_private_channels_count 0")); + assert!(metrics.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(metrics.contains("ldk_server_total_lightning_balance_sats 0")); +} diff --git a/ldk-server-client/src/client.rs b/ldk-server-client/src/client.rs index b68d2e6a..e40e6c9e 100644 --- a/ldk-server-client/src/client.rs +++ b/ldk-server-client/src/client.rs @@ -40,13 +40,15 @@ use ldk_server_protos::endpoints::{ BOLT11_RECEIVE_VIA_JIT_CHANNEL_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH, CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DECODE_INVOICE_PATH, DECODE_OFFER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH, FORCE_CLOSE_CHANNEL_PATH, - GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, - GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH, - LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH, - ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, - SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, + GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, + GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, + LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, + ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, + SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, + VERIFY_SIGNATURE_PATH, }; use ldk_server_protos::error::{ErrorCode, ErrorResponse}; +use prost::bytes::Bytes; use prost::Message; use reqwest::header::CONTENT_TYPE; use reqwest::{Certificate, Client}; @@ -70,6 +72,11 @@ pub struct LdkServerClient { api_key: String, } +enum RequestType { + Get, + Post, +} + impl LdkServerClient { /// Constructs a [`LdkServerClient`] using `base_url` as the ldk-server endpoint. /// @@ -115,6 +122,27 @@ impl LdkServerClient { self.post_request(&request, &url).await } + /// Retrieve the node metrics in Prometheus format. + pub async fn get_metrics(&self) -> Result { + self.get_metrics_with_auth(None, None).await + } + + /// Retrieve the node metrics in Prometheus format using Basic Auth. + pub async fn get_metrics_with_auth( + &self, username: Option<&str>, password: Option<&str>, + ) -> Result { + let url = format!("https://{}/{GET_METRICS_PATH}", self.base_url); + let payload = + self.make_request(&url, RequestType::Get, None, false, username, password).await?; + + String::from_utf8(payload.to_vec()).map_err(|e| { + LdkServerError::new( + InternalError, + format!("Failed to decode metrics response as string: {}", e), + ) + }) + } + /// Retrieves an overview of all known balances. /// For API contract/usage, refer to docs for [`GetBalancesRequest`] and [`GetBalancesResponse`]. pub async fn get_balances( @@ -450,31 +478,60 @@ impl LdkServerClient { &self, request: &Rq, url: &str, ) -> Result { let request_body = request.encode_to_vec(); - let auth_header = self.compute_auth_header(&request_body); - let response_raw = self - .client - .post(url) - .header(CONTENT_TYPE, APPLICATION_OCTET_STREAM) - .header("X-Auth", auth_header) - .body(request_body) - .send() - .await - .map_err(|e| { - LdkServerError::new(InternalError, format!("HTTP request failed: {}", e)) - })?; + let payload = + self.make_request(url, RequestType::Post, Some(request_body), true, None, None).await?; + Rs::decode(&payload[..]).map_err(|e| { + LdkServerError::new(InternalError, format!("Failed to decode success response: {}", e)) + }) + } + + async fn make_request( + &self, url: &str, request_type: RequestType, body: Option>, + hmac_authenticated: bool, metrics_username: Option<&str>, metrics_password: Option<&str>, + ) -> Result { + let builder = match request_type { + RequestType::Get => self.client.get(url), + RequestType::Post => self.client.post(url), + }; + + let builder = if hmac_authenticated { + let body_for_auth = body.as_deref().unwrap_or(&[]); + let auth_header = self.compute_auth_header(body_for_auth); + builder.header("X-Auth", auth_header) + } else { + builder + }; + + let builder = if let Some(body_content) = body { + builder.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM).body(body_content) + } else { + builder + }; + + let builder = if let (Some(username), Some(password)) = (metrics_username, metrics_password) + { + builder.basic_auth(username, Some(password)) + } else { + builder + }; + + let response_raw = builder.send().await.map_err(|e| { + LdkServerError::new(InternalError, format!("HTTP request failed: {}", e)) + })?; + self.handle_response(response_raw).await + } + + async fn handle_response( + &self, response_raw: reqwest::Response, + ) -> Result { let status = response_raw.status(); let payload = response_raw.bytes().await.map_err(|e| { LdkServerError::new(InternalError, format!("Failed to read response body: {}", e)) })?; if status.is_success() { - Ok(Rs::decode(&payload[..]).map_err(|e| { - LdkServerError::new( - InternalError, - format!("Failed to decode success response: {}", e), - ) - })?) + Ok(payload) } else { let error_response = ErrorResponse::decode(&payload[..]).map_err(|e| { LdkServerError::new( diff --git a/ldk-server-protos/src/endpoints.rs b/ldk-server-protos/src/endpoints.rs index a9ab7b49..83cc196f 100644 --- a/ldk-server-protos/src/endpoints.rs +++ b/ldk-server-protos/src/endpoints.rs @@ -45,3 +45,4 @@ pub const GRAPH_LIST_NODES_PATH: &str = "GraphListNodes"; pub const GRAPH_GET_NODE_PATH: &str = "GraphGetNode"; pub const DECODE_INVOICE_PATH: &str = "DecodeInvoice"; pub const DECODE_OFFER_PATH: &str = "DecodeOffer"; +pub const GET_METRICS_PATH: &str = "metrics"; diff --git a/ldk-server/ldk-server-config.toml b/ldk-server/ldk-server-config.toml index 48b6c7b1..cd9a19d9 100644 --- a/ldk-server/ldk-server-config.toml +++ b/ldk-server/ldk-server-config.toml @@ -88,3 +88,11 @@ client_trusts_lsp = false ## A token we may require to be sent by the clients. ## If set, only requests matching this token will be accepted. (uncomment and set if required) # require_token = "" + +# Metrics settings +[metrics] +enabled = false +poll_metrics_interval = 60 # The polling interval for metrics in seconds. Defaults to 60secs if unset and metrics enabled. +# The auth details below are optional, but uncommenting the fields means enabling basic auth, so valid fields must be supplied. +#username = "" # The username required to access the metrics endpoint (Basic Auth). +#password = "" # The password required to access the metrics endpoint (Basic Auth). diff --git a/ldk-server/src/main.rs b/ldk-server/src/main.rs index 3de2a408..c73745d2 100644 --- a/ldk-server/src/main.rs +++ b/ldk-server/src/main.rs @@ -16,8 +16,10 @@ use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use base64::prelude::BASE64_STANDARD; +use base64::Engine; use clap::Parser; use hex::DisplayHex; use hyper::server::conn::http1; @@ -50,6 +52,7 @@ use crate::io::persist::{ use crate::service::NodeService; use crate::util::config::{load_config, ArgsConfig, ChainSource}; use crate::util::logger::ServerLogger; +use crate::util::metrics::Metrics; use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto}; use crate::util::systemd; use crate::util::tls::get_or_generate_tls_config; @@ -273,6 +276,37 @@ fn main() { } }; let event_node = Arc::clone(&node); + + let metrics: Option> = if config_file.metrics_enabled { + let poll_metrics_interval = Duration::from_secs(config_file.poll_metrics_interval.unwrap_or(60)); + let metrics_node = Arc::clone(&node); + let mut interval = tokio::time::interval(poll_metrics_interval); + let metrics = Arc::new(Metrics::new()); + let metrics_bg = Arc::clone(&metrics); + + // Initialize metrics that are event-driven to ensure they start with correct values from persistence + metrics.initialize_payment_metrics(&metrics_node); + + runtime.spawn(async move { + loop { + interval.tick().await; + metrics_bg.update_all_pollable_metrics(&metrics_node); + } + }); + Some(metrics) + } else { + None + }; + + let metrics_auth_header = if let (Some(username), Some(password)) = + (config_file.metrics_username.as_ref(), config_file.metrics_password.as_ref()) + { + let auth = format!("{}:{}", username, password); + Some(format!("Basic {}", BASE64_STANDARD.encode(auth))) + } else { + None + }; + let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr) .await .expect("Failed to bind listening port"); @@ -313,7 +347,24 @@ fn main() { if let Err(e) = event_node.event_handled() { error!("Failed to mark event as handled: {e}"); } + + if let Some(metrics) = &metrics { + metrics.update_channels_count(false); + } }, + Event::ChannelClosed { channel_id, counterparty_node_id, .. } => { + info!( + "CHANNEL_CLOSED: {} from counterparty {:?}", + channel_id, counterparty_node_id + ); + if let Err(e) = event_node.event_handled() { + error!("Failed to mark event as handled: {e}"); + } + + if let Some(metrics) = &metrics { + metrics.update_channels_count(true); + } + } Event::PaymentReceived { payment_id, payment_hash, amount_msat, .. } => { info!( "PAYMENT_RECEIVED: with id {:?}, hash {}, amount_msat {}", @@ -328,6 +379,10 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_all_balances(&event_node); + } }, Event::PaymentSuccessful {payment_id, ..} => { let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1"); @@ -339,6 +394,11 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_payments_count(true); + metrics.update_all_balances(&event_node); + } }, Event::PaymentFailed {payment_id, ..} => { let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1"); @@ -350,6 +410,10 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_payments_count(false); + } }, Event::PaymentClaimable {payment_id, ..} => { publish_event_and_upsert_payment(&payment_id, @@ -435,7 +499,13 @@ fn main() { res = rest_svc_listener.accept() => { match res { Ok((stream, _)) => { - let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone()); + let node_service = NodeService::new( + Arc::clone(&node), + Arc::clone(&paginated_store), + api_key.clone(), + metrics.clone(), + metrics_auth_header.clone(), + ); let acceptor = tls_acceptor.clone(); runtime.spawn(async move { match acceptor.accept(stream).await { diff --git a/ldk-server/src/service.rs b/ldk-server/src/service.rs index 8ba37f65..679cb894 100644 --- a/ldk-server/src/service.rs +++ b/ldk-server/src/service.rs @@ -24,11 +24,12 @@ use ldk_server_protos::endpoints::{ BOLT11_RECEIVE_VIA_JIT_CHANNEL_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH, CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DECODE_INVOICE_PATH, DECODE_OFFER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH, FORCE_CLOSE_CHANNEL_PATH, - GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, - GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH, - LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH, - ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, - SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, + GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, + GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, + LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, + ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, + SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, + VERIFY_SIGNATURE_PATH, }; use prost::Message; @@ -72,6 +73,7 @@ use crate::api::unified_send::handle_unified_send_request; use crate::api::update_channel_config::handle_update_channel_config_request; use crate::api::verify_signature::handle_verify_signature_request; use crate::io::persist::paginated_kv_store::PaginatedKVStore; +use crate::util::metrics::Metrics; use crate::util::proto_adapter::to_error_response; // Maximum request body size: 10 MB @@ -83,13 +85,16 @@ pub struct NodeService { node: Arc, paginated_kv_store: Arc, api_key: String, + metrics: Option>, + metrics_auth_header: Option, } impl NodeService { pub(crate) fn new( node: Arc, paginated_kv_store: Arc, api_key: String, + metrics: Option>, metrics_auth_header: Option, ) -> Self { - Self { node, paginated_kv_store, api_key } + Self { node, paginated_kv_store, api_key, metrics, metrics_auth_header } } } @@ -173,6 +178,42 @@ impl Service> for NodeService { type Future = Pin> + Send>>; fn call(&self, req: Request) -> Self::Future { + // Handle metrics endpoint separately to bypass auth and return plain text + if req.method() == hyper::Method::GET + && req.uri().path().len() > 1 + && &req.uri().path()[1..] == GET_METRICS_PATH + { + if let Some(expected_header) = &self.metrics_auth_header { + let auth_header = req.headers().get("Authorization").and_then(|h| h.to_str().ok()); + if auth_header != Some(expected_header) { + return Box::pin(async move { + Ok(Response::builder() + .status(StatusCode::UNAUTHORIZED) + .header("WWW-Authenticate", "Basic realm=\"metrics\"") + .body(Full::new(Bytes::from("Unauthorized"))) + .unwrap()) + }); + } + } + + if let Some(metrics) = &self.metrics { + let metrics = Arc::clone(metrics); + return Box::pin(async move { + Ok(Response::builder() + .header("Content-Type", "text/plain") + .body(Full::new(Bytes::from(metrics.gather_metrics()))) + .unwrap()) + }); + } else { + return Box::pin(async move { + Ok(Response::builder() + .status(StatusCode::NOT_FOUND) + .body(Full::new(Bytes::from("Not Found"))) + .unwrap()) + }); + } + } + // Extract auth params from headers (validation happens after body is read) let auth_params = match extract_auth_params(&req) { Ok(params) => params, diff --git a/ldk-server/src/util/config.rs b/ldk-server/src/util/config.rs index 2a6976d3..736673b1 100644 --- a/ldk-server/src/util/config.rs +++ b/ldk-server/src/util/config.rs @@ -60,6 +60,10 @@ pub struct Config { pub log_level: LevelFilter, pub log_file_path: Option, pub pathfinding_scores_source_url: Option, + pub metrics_enabled: bool, + pub poll_metrics_interval: Option, + pub metrics_username: Option, + pub metrics_password: Option, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -105,6 +109,10 @@ struct ConfigBuilder { log_level: Option, log_file_path: Option, pathfinding_scores_source_url: Option, + metrics_enabled: Option, + poll_metrics_interval: Option, + metrics_username: Option, + metrics_password: Option, } impl ConfigBuilder { @@ -164,6 +172,14 @@ impl ConfigBuilder { hosts: tls.hosts.unwrap_or_default(), }); } + + if let Some(metrics) = toml.metrics { + self.metrics_enabled = metrics.enabled.or(self.metrics_enabled); + self.poll_metrics_interval = + metrics.poll_metrics_interval.or(self.poll_metrics_interval); + self.metrics_username = metrics.username.or(self.metrics_username.clone()); + self.metrics_password = metrics.password.or(self.metrics_password.clone()); + } } fn merge_args(&mut self, args: &ArgsConfig) { @@ -206,6 +222,22 @@ impl ConfigBuilder { if let Some(pathfinding_scores_source_url) = &args.pathfinding_scores_source_url { self.pathfinding_scores_source_url = Some(pathfinding_scores_source_url.clone()); } + + if args.metrics_enabled { + self.metrics_enabled = Some(true); + } + + if let Some(poll_metrics_interval) = &args.poll_metrics_interval { + self.poll_metrics_interval = Some(*poll_metrics_interval); + } + + if let Some(metrics_username) = &args.metrics_username { + self.metrics_username = Some(metrics_username.clone()); + } + + if let Some(metrics_password) = &args.metrics_password { + self.metrics_password = Some(metrics_password.clone()); + } } fn build(self) -> io::Result { @@ -364,6 +396,20 @@ impl ConfigBuilder { let pathfinding_scores_source_url = self.pathfinding_scores_source_url; + let metrics_enabled = self.metrics_enabled.unwrap_or(false); + + let poll_metrics_interval = self.poll_metrics_interval; + + let metrics_username = self.metrics_username; + let metrics_password = self.metrics_password; + + if self.metrics_enabled.unwrap_or(false) + && (metrics_username.is_some() != metrics_password.is_some()) + { + return Err(io::Error::new(io::ErrorKind::InvalidInput, + "Both `metrics.username` and `metrics.password` must be set if authentication is used for metrics.")); + } + Ok(Config { network, listening_addrs, @@ -381,6 +427,10 @@ impl ConfigBuilder { log_level, log_file_path: self.log_file_path, pathfinding_scores_source_url, + metrics_enabled, + poll_metrics_interval, + metrics_username, + metrics_password, }) } } @@ -397,6 +447,7 @@ pub struct TomlConfig { liquidity: Option, log: Option, tls: Option, + metrics: Option, } #[derive(Deserialize, Serialize)] @@ -456,6 +507,14 @@ struct TomlTlsConfig { hosts: Option>, } +#[derive(Deserialize, Serialize)] +struct MetricsTomlConfig { + enabled: Option, + poll_metrics_interval: Option, + username: Option, + password: Option, +} + #[derive(Deserialize, Serialize)] struct LiquidityConfig { lsps2_client: Option, @@ -614,6 +673,35 @@ pub struct ArgsConfig { help = "The external scores source that is merged into the local scoring system to improve routing." )] pathfinding_scores_source_url: Option, + + #[arg( + long, + env = "LDK_SERVER_METRICS_ENABLED", + help = "The option to enable the metrics endpoint. WARNING: This endpoint is unauthenticated." + )] + metrics_enabled: bool, + + #[arg( + long, + env = "LDK_SERVER_POLL_METRICS_INTERVAL", + help = "The polling interval for metrics in seconds. Required when + metrics is enabled, but defaults to 60secs if unset." + )] + poll_metrics_interval: Option, + + #[arg( + long, + env = "LDK_SERVER_METRICS_USERNAME", + help = "The username required to access the metrics endpoint (Basic Auth)." + )] + metrics_username: Option, + + #[arg( + long, + env = "LDK_SERVER_METRICS_PASSWORD", + help = "The password required to access the metrics endpoint (Basic Auth)." + )] + metrics_password: Option, } pub fn load_config(args: &ArgsConfig) -> io::Result { @@ -747,6 +835,10 @@ mod tests { storage_dir_path: Some(String::from("/tmp_cli")), node_alias: Some(String::from("LDK Server CLI")), pathfinding_scores_source_url: Some(String::from("https://example.com/")), + metrics_enabled: false, + poll_metrics_interval: None, + metrics_username: None, + metrics_password: None, } } @@ -763,6 +855,10 @@ mod tests { bitcoind_rpc_password: None, storage_dir_path: None, pathfinding_scores_source_url: None, + metrics_enabled: false, + poll_metrics_interval: None, + metrics_username: None, + metrics_password: None, } } @@ -839,6 +935,10 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: None, + metrics_enabled: false, + poll_metrics_interval: None, + metrics_username: None, + metrics_password: None, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -857,6 +957,7 @@ mod tests { assert_eq!(config.log_level, expected.log_level); assert_eq!(config.log_file_path, expected.log_file_path); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); // Test case where only electrum is set @@ -1162,6 +1263,10 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: Some("https://example.com/".to_string()), + metrics_enabled: false, + poll_metrics_interval: None, + metrics_username: None, + metrics_password: None, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -1175,6 +1280,7 @@ mod tests { assert_eq!(config.rabbitmq_exchange_name, expected.rabbitmq_exchange_name); assert!(config.lsps2_service_config.is_none()); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); } #[test] @@ -1273,6 +1379,10 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: Some("https://example.com/".to_string()), + metrics_enabled: false, + poll_metrics_interval: None, + metrics_username: None, + metrics_password: None, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -1288,6 +1398,7 @@ mod tests { #[cfg(feature = "experimental-lsps2-support")] assert_eq!(config.lsps2_service_config.is_some(), expected.lsps2_service_config.is_some()); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); } #[test] @@ -1340,4 +1451,97 @@ mod tests { SocketAddr::from_str(DEFAULT_REST_SERVICE_ADDRESS).unwrap() ); } + + #[test] + fn test_metrics_enabled_config() { + let storage_path = std::env::temp_dir(); + let config_file_name = "test_metrics_enabled.toml"; + + let toml_config = r#" + [node] + network = "regtest" + rest_service_address = "127.0.0.1:3002" + + [bitcoind] + rpc_address = "127.0.0.1:8332" + rpc_user = "user" + rpc_password = "password" + + [metrics] + enabled = true + username = "admin" + password = "password123" + + [rabbitmq] + connection_string = "rabbitmq_connection_string" + exchange_name = "rabbitmq_exchange_name" + + [liquidity.lsps2_service] + advertise_service = false + channel_opening_fee_ppm = 1000 # 0.1% fee + channel_over_provisioning_ppm = 500000 # 50% extra capacity + min_channel_opening_fee_msat = 10000000 # 10,000 satoshis + min_channel_lifetime = 4320 # ~30 days + max_client_to_self_delay = 1440 # ~10 days + min_payment_size_msat = 10000000 # 10,000 satoshis + max_payment_size_msat = 25000000000 # 0.25 BTC + client_trusts_lsp = true + "#; + + fs::write(storage_path.join(config_file_name), toml_config).unwrap(); + let mut args_config = empty_args_config(); + args_config.config_file = + Some(storage_path.join(config_file_name).to_string_lossy().to_string()); + + let config = load_config(&args_config).unwrap(); + assert!(config.metrics_enabled); + assert!(config.metrics_username.is_some()); + assert!(config.metrics_password.is_some()); + } + + #[test] + fn test_metrics_enabled_fails_with_invalid_auth() { + let storage_path = std::env::temp_dir(); + let config_file_name = "test_metrics_enabled_with_auth.toml"; + + let toml_config = r#" + [node] + network = "regtest" + rest_service_address = "127.0.0.1:3002" + + [bitcoind] + rpc_address = "127.0.0.1:8332" + rpc_user = "user" + rpc_password = "password" + + [metrics] + enabled = true + username = "admin" + + [rabbitmq] + connection_string = "rabbitmq_connection_string" + exchange_name = "rabbitmq_exchange_name" + + [liquidity.lsps2_service] + advertise_service = false + channel_opening_fee_ppm = 1000 # 0.1% fee + channel_over_provisioning_ppm = 500000 # 50% extra capacity + min_channel_opening_fee_msat = 10000000 # 10,000 satoshis + min_channel_lifetime = 4320 # ~30 days + max_client_to_self_delay = 1440 # ~10 days + min_payment_size_msat = 10000000 # 10,000 satoshis + max_payment_size_msat = 25000000000 # 0.25 BTC + client_trusts_lsp = true + "#; + + fs::write(storage_path.join(config_file_name), toml_config).unwrap(); + let mut args_config = empty_args_config(); + args_config.config_file = + Some(storage_path.join(config_file_name).to_string_lossy().to_string()); + + let result = load_config(&args_config); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::InvalidInput); + } } diff --git a/ldk-server/src/util/metrics.rs b/ldk-server/src/util/metrics.rs new file mode 100644 index 00000000..40424d34 --- /dev/null +++ b/ldk-server/src/util/metrics.rs @@ -0,0 +1,352 @@ +// This file is Copyright its original authors, visible in version control +// history. +// +// This file is licensed under the Apache License, Version 2.0 or the MIT license +// , at your option. +// You may not use this file except in accordance with one or both of these +// licenses. + +//! This module provides metrics for monitoring the LDK Server node in a Prometheus-compatible format. +//! +//! The `Metrics` struct holds atomic counters and gauges for various aspects of the node's +//! operation, such as peer connections, channels and payments statuses, and balances. +//! +//! The metrics are updated through two main mechanisms: +//! 1. **Periodic Polling**: The `update_all_pollable_metrics` function is called at a regular +//! interval (`poll_metrics_interval`) configurable via the config file but defaults to 60secs if unset, to perform a full recount of metrics like peer count, +//! payments count, and channels metrics. +//! 2. **Event-Driven Updates**: For metrics that can change frequently and where a full recount +//! would be inefficient (e.g., total_successful_payments_count, balances), a hybrid approach is used. +//! - `initialize_payment_metrics` is called once at startup to get the accurate persisted state. +//! - `update_payments_count` is called incrementally whenever a relevant event (like +//! `PaymentSuccessful` or `PaymentFailed`) occurs. +//! - `update_all_balances` is called when we receive a `PaymentSuccessful` event to update all balance metrics. +//! - `update_channels_count` is called when we receive a `ChannelReady` or `ChannelClosed` event to update the channels metrics. +//! +//! The `gather_metrics` function collects all current metric values and formats them into the +//! plain-text format that Prometheus scrapers expect. This output is exposed via an +//! unauthenticated `/metrics` HTTP endpoint on the rest service address. + +use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; + +use ldk_node::payment::PaymentStatus; +use ldk_node::Node; + +/// Holds all the metrics that are tracked for LDK Server. +/// +/// These metrics are exposed in a Prometheus-compatible format. The values are stored +/// in atomic types to allow for safe concurrent access. +pub struct Metrics { + pub total_peers_count: AtomicI64, + pub total_payments_count: AtomicI64, + pub total_successful_payments_count: AtomicI64, + pub total_pending_payments_count: AtomicI64, + pub total_failed_payments_count: AtomicI64, + pub total_channels_count: AtomicI64, + pub total_public_channels_count: AtomicI64, + pub total_private_channels_count: AtomicI64, + pub total_onchain_balance_sats: AtomicU64, + pub spendable_onchain_balance_sats: AtomicU64, + pub total_anchor_channels_reserve_sats: AtomicU64, + pub total_lightning_balance_sats: AtomicU64, +} + +impl Metrics { + pub fn new() -> Self { + Self { + total_peers_count: AtomicI64::new(0), + total_payments_count: AtomicI64::new(0), + total_successful_payments_count: AtomicI64::new(0), + total_pending_payments_count: AtomicI64::new(0), + total_failed_payments_count: AtomicI64::new(0), + total_channels_count: AtomicI64::new(0), + total_public_channels_count: AtomicI64::new(0), + total_private_channels_count: AtomicI64::new(0), + total_onchain_balance_sats: AtomicU64::new(0), + spendable_onchain_balance_sats: AtomicU64::new(0), + total_anchor_channels_reserve_sats: AtomicU64::new(0), + total_lightning_balance_sats: AtomicU64::new(0), + } + } + + fn update_peer_count(&self, node: &Node) { + let total_peers_count = node.list_peers().len() as i64; + self.total_peers_count.store(total_peers_count, Ordering::Relaxed); + } + + pub fn update_payments_count(&self, is_successful: bool) { + if is_successful { + self.total_successful_payments_count.fetch_add(1, Ordering::Relaxed); + } else { + self.total_failed_payments_count.fetch_add(1, Ordering::Relaxed); + } + } + + pub fn update_channels_count(&self, is_closed: bool) { + if is_closed { + self.total_channels_count.fetch_sub(1, Ordering::Relaxed); + } else { + self.total_channels_count.fetch_add(1, Ordering::Relaxed); + } + } + + pub fn initialize_payment_metrics(&self, node: &Node) { + let mut successful_payments_count = 0; + let mut failed_payments_count = 0; + let mut pending_payments_count = 0; + + for payment_details in node.list_payments() { + match payment_details.status { + PaymentStatus::Succeeded => successful_payments_count += 1, + PaymentStatus::Failed => failed_payments_count += 1, + PaymentStatus::Pending => pending_payments_count += 1, + } + } + self.total_successful_payments_count.store(successful_payments_count, Ordering::Relaxed); + self.total_failed_payments_count.store(failed_payments_count, Ordering::Relaxed); + self.total_pending_payments_count.store(pending_payments_count, Ordering::Relaxed); + + let channels_count = node.list_channels().len() as i64; + self.total_channels_count.store(channels_count, Ordering::Relaxed); + + self.update_all_balances(node); + } + + pub fn update_all_balances(&self, node: &Node) { + let all_balances = node.list_balances(); + self.total_onchain_balance_sats + .store(all_balances.total_onchain_balance_sats, Ordering::Relaxed); + + self.spendable_onchain_balance_sats + .store(all_balances.spendable_onchain_balance_sats, Ordering::Relaxed); + + self.total_anchor_channels_reserve_sats + .store(all_balances.total_anchor_channels_reserve_sats, Ordering::Relaxed); + + self.total_lightning_balance_sats + .store(all_balances.total_lightning_balance_sats, Ordering::Relaxed); + } + + pub fn update_all_pollable_metrics(&self, node: &Node) { + let all_payments = node.list_payments(); + let all_channels = node.list_channels(); + + let payments_count = all_payments.len() as i64; + self.total_payments_count.store(payments_count, Ordering::Relaxed); + + let pending_payments_count = all_payments + .iter() + .filter(|payment_details| payment_details.status == PaymentStatus::Pending) + .count() as i64; + self.total_pending_payments_count.store(pending_payments_count, Ordering::Relaxed); + + let public_channels_count = + all_channels.iter().filter(|channel_details| channel_details.is_announced).count() + as i64; + self.total_public_channels_count.store(public_channels_count, Ordering::Relaxed); + + let private_channels_count = + all_channels.iter().filter(|channel_details| !channel_details.is_announced).count() + as i64; + self.total_private_channels_count.store(private_channels_count, Ordering::Relaxed); + + self.update_peer_count(node); + self.update_all_balances(node); + } + + /// Gathers all metrics and formats them into the Prometheus text-based format. + /// + /// This function is called by the `/metrics` endpoint to provide the current state + /// of all tracked metrics to a Prometheus scraper. The format is a series of lines, + /// each containing a metric name, and its value, preceded by + /// HELP and TYPE lines as per the Prometheus exposition format specification. + pub fn gather_metrics(&self) -> String { + let mut buffer = String::new(); + + fn format_metric( + buffer: &mut String, name: &str, help: &str, metric_type: &str, + value: impl std::fmt::Display, + ) { + use std::fmt::Write; + let _ = writeln!(buffer, "# HELP {} {}", name, help); + let _ = writeln!(buffer, "# TYPE {} {}", name, metric_type); + let _ = writeln!(buffer, "{} {}", name, value); + } + + format_metric( + &mut buffer, + "ldk_server_total_peers_count", + "Total number of peers", + "gauge", + self.total_peers_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_payments_count", + "Total number of payments", + "counter", + self.total_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_pending_payments_count", + "Total number of pending payments", + "gauge", + self.total_pending_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_successful_payments_count", + "Total number of successful payments", + "counter", + self.total_successful_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_failed_payments_count", + "Total number of failed payments", + "counter", + self.total_failed_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_channels_count", + "Total number of channels", + "gauge", + self.total_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_public_channels_count", + "Total number of public channels", + "gauge", + self.total_public_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_private_channels_count", + "Total number of private channels", + "gauge", + self.total_private_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_onchain_balance_sats", + "Total onchain balance in sats", + "gauge", + self.total_onchain_balance_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_spendable_onchain_balance_sats", + "Spendable onchain balance in sats", + "gauge", + self.spendable_onchain_balance_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_anchor_channels_reserve_sats", + "Total anchor channels reserve in sats", + "gauge", + self.total_anchor_channels_reserve_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_lightning_balance_sats", + "Total lightning balance in sats", + "gauge", + self.total_lightning_balance_sats.load(Ordering::Relaxed), + ); + + buffer + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_initial_metrics_values() { + let metrics = Metrics::new(); + let result = metrics.gather_metrics(); + + // Check that all metrics are present and empty + assert!(result.contains("ldk_server_total_peers_count 0")); + assert!(result.contains("ldk_server_total_payments_count 0")); + assert!(result.contains("ldk_server_total_successful_payments_count 0")); + assert!(result.contains("ldk_server_total_pending_payments_count 0")); + assert!(result.contains("ldk_server_total_failed_payments_count 0")); + assert!(result.contains("ldk_server_total_channels_count 0")); + assert!(result.contains("ldk_server_total_public_channels_count 0")); + assert!(result.contains("ldk_server_total_private_channels_count 0")); + assert!(result.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(result.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(result.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(result.contains("ldk_server_total_lightning_balance_sats 0")); + } + + #[test] + fn test_update_payments_count() { + let metrics = Metrics::new(); + + metrics.total_successful_payments_count.store(10, Ordering::Relaxed); + metrics.total_failed_payments_count.store(5, Ordering::Relaxed); + + metrics.update_payments_count(true); + metrics.update_payments_count(false); + + assert_eq!(metrics.total_successful_payments_count.load(Ordering::Relaxed), 11); + assert_eq!(metrics.total_failed_payments_count.load(Ordering::Relaxed), 6); + } + + #[test] + fn test_metrics_update_and_gather() { + let metrics = Metrics::new(); + + // Manually update metrics to simulate node activity + metrics.total_peers_count.store(5, Ordering::Relaxed); + metrics.total_payments_count.store(10, Ordering::Relaxed); + metrics.total_pending_payments_count.store(1, Ordering::Relaxed); + metrics.total_successful_payments_count.store(8, Ordering::Relaxed); + metrics.total_failed_payments_count.store(2, Ordering::Relaxed); + metrics.total_channels_count.store(3, Ordering::Relaxed); + metrics.total_public_channels_count.store(1, Ordering::Relaxed); + metrics.total_private_channels_count.store(2, Ordering::Relaxed); + metrics.total_onchain_balance_sats.store(100_000, Ordering::Relaxed); + metrics.spendable_onchain_balance_sats.store(50_000, Ordering::Relaxed); + metrics.total_anchor_channels_reserve_sats.store(1_000, Ordering::Relaxed); + metrics.total_lightning_balance_sats.store(250_000, Ordering::Relaxed); + + let result = metrics.gather_metrics(); + + // Check that output contains updated values and correct Prometheus format + assert!(result.contains("# HELP ldk_server_total_peers_count Total number of peers")); + assert!(result.contains("# TYPE ldk_server_total_peers_count gauge")); + assert!(result.contains("ldk_server_total_peers_count 5")); + + assert!(result.contains("ldk_server_total_payments_count 10")); + assert!(result.contains("ldk_server_total_pending_payments_count 1")); + assert!(result.contains("ldk_server_total_successful_payments_count 8")); + assert!(result.contains("ldk_server_total_failed_payments_count 2")); + assert!(result.contains("ldk_server_total_channels_count 3")); + assert!(result.contains("ldk_server_total_public_channels_count 1")); + assert!(result.contains("ldk_server_total_private_channels_count 2")); + assert!(result.contains("ldk_server_total_onchain_balance_sats 100000")); + assert!(result.contains("ldk_server_spendable_onchain_balance_sats 50000")); + assert!(result.contains("ldk_server_total_anchor_channels_reserve_sats 1000")); + assert!(result.contains("ldk_server_total_lightning_balance_sats 250000")); + } +} diff --git a/ldk-server/src/util/mod.rs b/ldk-server/src/util/mod.rs index 5d74de43..a57dbd00 100644 --- a/ldk-server/src/util/mod.rs +++ b/ldk-server/src/util/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod config; pub(crate) mod logger; +pub(crate) mod metrics; pub(crate) mod proto_adapter; pub(crate) mod systemd; pub(crate) mod tls;