Skip to content

Commit 4fe626e

Browse files
committed
feat: Add usage snapshots API for point-in-time app and service metrics
Introduces new V3 API endpoints for capturing point-in-time usage data: App Usage Snapshots (/v3/app_usage/snapshots): - Creates snapshots of all running processes across the platform - Captures instance counts, memory allocation, and buildpack info - Data organized by organization and space in paginated chunks Service Usage Snapshots (/v3/service_usage/snapshots): - Creates snapshots of all service instances across the platform - Captures service plan, offering, and broker information - Supports both managed and user-provided service instances Both snapshot types: - Are admin-only operations that run asynchronously via pollable jobs - Include a checkpoint reference (GUID) to the most recent usage event - Support automatic cleanup of old and stale snapshots via daily jobs - Expose Prometheus metrics for generation duration and failure tracking
1 parent 4646541 commit 4fe626e

File tree

47 files changed

+3560
-3
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+3560
-3
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
require 'presenters/v3/app_usage_snapshot_presenter'
2+
require 'presenters/v3/app_usage_snapshot_chunk_presenter'
3+
require 'messages/app_usage_snapshots_create_message'
4+
require 'messages/app_usage_snapshots_list_message'
5+
require 'fetchers/app_usage_snapshot_list_fetcher'
6+
require 'jobs/runtime/app_usage_snapshot_generator_job'
7+
8+
class AppUsageSnapshotsController < ApplicationController
9+
def index
10+
message = AppUsageSnapshotsListMessage.from_params(query_params)
11+
unprocessable!(message.errors.full_messages) unless message.valid?
12+
13+
dataset = AppUsageSnapshot.where(guid: [])
14+
dataset = AppUsageSnapshotListFetcher.fetch_all(message, AppUsageSnapshot.dataset) if permission_queryer.can_read_globally?
15+
16+
render status: :ok, json: Presenters::V3::PaginatedListPresenter.new(
17+
presenter: Presenters::V3::AppUsageSnapshotPresenter,
18+
paginated_result: SequelPaginator.new.get_page(dataset, message.try(:pagination_options)),
19+
path: '/v3/app_usage/snapshots',
20+
message: message
21+
)
22+
end
23+
24+
def show
25+
snapshot_not_found! unless permission_queryer.can_read_globally?
26+
27+
snapshot = AppUsageSnapshot.first(guid: hashed_params[:guid])
28+
snapshot_not_found! unless snapshot
29+
30+
render status: :ok, json: Presenters::V3::AppUsageSnapshotPresenter.new(snapshot)
31+
end
32+
33+
def create
34+
message = AppUsageSnapshotsCreateMessage.new(hashed_params[:body])
35+
unprocessable!(message.errors.full_messages) unless message.valid?
36+
37+
unauthorized! unless permission_queryer.can_write_globally?
38+
39+
existing_snapshot = AppUsageSnapshot.where(completed_at: nil).first
40+
raise CloudController::Errors::ApiError.new_from_details('AppUsageSnapshotGenerationInProgress') if existing_snapshot
41+
42+
snapshot = AppUsageSnapshot.create(
43+
checkpoint_event_guid: nil,
44+
created_at: Time.now.utc,
45+
completed_at: nil,
46+
instance_count: 0,
47+
organization_count: 0,
48+
space_count: 0,
49+
app_count: 0,
50+
chunk_count: 0
51+
)
52+
53+
begin
54+
job = Jobs::Runtime::AppUsageSnapshotGeneratorJob.new(snapshot.guid)
55+
pollable_job = Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(job)
56+
rescue StandardError
57+
snapshot.destroy
58+
raise
59+
end
60+
61+
head :accepted, 'Location' => url_builder.build_url(path: "/v3/jobs/#{pollable_job.guid}")
62+
end
63+
64+
def chunks
65+
snapshot_not_found! unless permission_queryer.can_read_globally?
66+
67+
snapshot = AppUsageSnapshot.first(guid: hashed_params[:guid])
68+
snapshot_not_found! unless snapshot
69+
70+
unprocessable!('Snapshot is still processing') unless snapshot.complete?
71+
72+
pagination_options = PaginationOptions.from_params(query_params)
73+
paginated_result = SequelPaginator.new.get_page(
74+
snapshot.app_usage_snapshot_chunks_dataset,
75+
pagination_options
76+
)
77+
78+
render status: :ok, json: Presenters::V3::PaginatedListPresenter.new(
79+
presenter: Presenters::V3::AppUsageSnapshotChunkPresenter,
80+
paginated_result: paginated_result,
81+
path: "/v3/app_usage/snapshots/#{snapshot.guid}/chunks"
82+
)
83+
end
84+
85+
private
86+
87+
def snapshot_not_found!
88+
resource_not_found!(:app_usage_snapshot)
89+
end
90+
end
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
require 'presenters/v3/service_usage_snapshot_presenter'
2+
require 'presenters/v3/service_usage_snapshot_chunk_presenter'
3+
require 'messages/service_usage_snapshots_create_message'
4+
require 'messages/service_usage_snapshots_list_message'
5+
require 'fetchers/service_usage_snapshot_list_fetcher'
6+
require 'jobs/runtime/service_usage_snapshot_generator_job'
7+
8+
class ServiceUsageSnapshotsController < ApplicationController
9+
def index
10+
message = ServiceUsageSnapshotsListMessage.from_params(query_params)
11+
unprocessable!(message.errors.full_messages) unless message.valid?
12+
13+
dataset = ServiceUsageSnapshot.where(guid: [])
14+
dataset = ServiceUsageSnapshotListFetcher.fetch_all(message, ServiceUsageSnapshot.dataset) if permission_queryer.can_read_globally?
15+
16+
render status: :ok, json: Presenters::V3::PaginatedListPresenter.new(
17+
presenter: Presenters::V3::ServiceUsageSnapshotPresenter,
18+
paginated_result: SequelPaginator.new.get_page(dataset, message.try(:pagination_options)),
19+
path: '/v3/service_usage/snapshots',
20+
message: message
21+
)
22+
end
23+
24+
def show
25+
snapshot_not_found! unless permission_queryer.can_read_globally?
26+
27+
snapshot = ServiceUsageSnapshot.first(guid: hashed_params[:guid])
28+
snapshot_not_found! unless snapshot
29+
30+
render status: :ok, json: Presenters::V3::ServiceUsageSnapshotPresenter.new(snapshot)
31+
end
32+
33+
def create
34+
message = ServiceUsageSnapshotsCreateMessage.new(hashed_params[:body])
35+
unprocessable!(message.errors.full_messages) unless message.valid?
36+
37+
unauthorized! unless permission_queryer.can_write_globally?
38+
39+
existing_snapshot = ServiceUsageSnapshot.where(completed_at: nil).first
40+
raise CloudController::Errors::ApiError.new_from_details('ServiceUsageSnapshotGenerationInProgress') if existing_snapshot
41+
42+
snapshot = ServiceUsageSnapshot.create(
43+
checkpoint_event_guid: nil,
44+
created_at: Time.now.utc,
45+
completed_at: nil,
46+
service_instance_count: 0,
47+
organization_count: 0,
48+
space_count: 0,
49+
chunk_count: 0
50+
)
51+
52+
begin
53+
job = Jobs::Runtime::ServiceUsageSnapshotGeneratorJob.new(snapshot.guid)
54+
pollable_job = Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(job)
55+
rescue StandardError
56+
snapshot.destroy
57+
raise
58+
end
59+
60+
head :accepted, 'Location' => url_builder.build_url(path: "/v3/jobs/#{pollable_job.guid}")
61+
end
62+
63+
def chunks
64+
snapshot_not_found! unless permission_queryer.can_read_globally?
65+
66+
snapshot = ServiceUsageSnapshot.first(guid: hashed_params[:guid])
67+
snapshot_not_found! unless snapshot
68+
69+
unprocessable!('Snapshot is still processing') unless snapshot.complete?
70+
71+
pagination_options = PaginationOptions.from_params(query_params)
72+
paginated_result = SequelPaginator.new.get_page(
73+
snapshot.service_usage_snapshot_chunks_dataset,
74+
pagination_options
75+
)
76+
77+
render status: :ok, json: Presenters::V3::PaginatedListPresenter.new(
78+
presenter: Presenters::V3::ServiceUsageSnapshotChunkPresenter,
79+
paginated_result: paginated_result,
80+
path: "/v3/service_usage/snapshots/#{snapshot.guid}/chunks"
81+
)
82+
end
83+
84+
private
85+
86+
def snapshot_not_found!
87+
resource_not_found!(:service_usage_snapshot)
88+
end
89+
end
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
require 'fetchers/base_list_fetcher'
2+
3+
module VCAP::CloudController
4+
class AppUsageSnapshotListFetcher < BaseListFetcher
5+
class << self
6+
def fetch_all(message, dataset)
7+
filter(message, dataset)
8+
end
9+
10+
private
11+
12+
def filter(message, dataset)
13+
super(message, dataset, AppUsageSnapshot)
14+
end
15+
end
16+
end
17+
end
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
require 'fetchers/base_list_fetcher'
2+
3+
module VCAP::CloudController
4+
class ServiceUsageSnapshotListFetcher < BaseListFetcher
5+
class << self
6+
def fetch_all(message, dataset)
7+
filter(message, dataset)
8+
end
9+
10+
private
11+
12+
def filter(message, dataset)
13+
super(message, dataset, ServiceUsageSnapshot)
14+
end
15+
end
16+
end
17+
end
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
module VCAP::CloudController
2+
module Jobs
3+
module Runtime
4+
class AppUsageSnapshotCleanup < VCAP::CloudController::Jobs::CCJob
5+
attr_accessor :cutoff_age_in_days
6+
7+
def initialize(cutoff_age_in_days)
8+
@cutoff_age_in_days = cutoff_age_in_days
9+
end
10+
11+
def perform
12+
logger = Steno.logger('cc.background')
13+
logger.info("Cleaning up usage snapshots older than #{cutoff_age_in_days} days")
14+
15+
cutoff_time = Time.now.utc - cutoff_age_in_days.days
16+
17+
old_completed = AppUsageSnapshot.where(
18+
Sequel.lit('created_at < ? AND completed_at IS NOT NULL', cutoff_time)
19+
)
20+
21+
stale_timeout = Time.now.utc - 1.hour
22+
stale_in_progress = AppUsageSnapshot.where(
23+
Sequel.lit('created_at < ? AND completed_at IS NULL', stale_timeout)
24+
)
25+
26+
completed_count = old_completed.count
27+
stale_count = stale_in_progress.count
28+
29+
old_completed.delete
30+
stale_in_progress.delete
31+
32+
logger.info("Deleted #{completed_count} old completed snapshots and #{stale_count} stale in-progress snapshots")
33+
end
34+
35+
def job_name_in_configuration
36+
:app_usage_snapshot_cleanup
37+
end
38+
39+
def max_attempts
40+
1
41+
end
42+
end
43+
end
44+
end
45+
end
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
require 'repositories/app_usage_snapshot_repository'
2+
3+
module VCAP::CloudController
4+
module Jobs
5+
module Runtime
6+
class AppUsageSnapshotGeneratorJob < VCAP::CloudController::Jobs::CCJob
7+
attr_reader :resource_guid
8+
9+
def initialize(snapshot_guid)
10+
@resource_guid = snapshot_guid
11+
end
12+
13+
def perform
14+
logger = Steno.logger('cc.background')
15+
logger.info("Starting usage snapshot generation for snapshot #{@resource_guid}")
16+
17+
snapshot = AppUsageSnapshot.first(guid: @resource_guid)
18+
raise "Snapshot not found: #{@resource_guid}" unless snapshot
19+
20+
repository = Repositories::AppUsageSnapshotRepository.new
21+
repository.populate_snapshot!(snapshot)
22+
23+
logger.info("Usage snapshot #{snapshot.guid} completed: #{snapshot.instance_count} instances")
24+
rescue StandardError => e
25+
logger.error("Usage snapshot generation failed: #{e.message}\n#{e.backtrace.join("\n")}")
26+
raise
27+
end
28+
29+
def job_name_in_configuration
30+
:app_usage_snapshot_generator
31+
end
32+
33+
def max_attempts
34+
1
35+
end
36+
37+
def resource_type
38+
'app_usage_snapshot'
39+
end
40+
41+
def display_name
42+
'app_usage_snapshot.generate'
43+
end
44+
end
45+
end
46+
end
47+
end
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
module VCAP::CloudController
2+
module Jobs
3+
module Runtime
4+
class ServiceUsageSnapshotCleanup < VCAP::CloudController::Jobs::CCJob
5+
attr_accessor :cutoff_age_in_days
6+
7+
def initialize(cutoff_age_in_days)
8+
@cutoff_age_in_days = cutoff_age_in_days
9+
end
10+
11+
def perform
12+
logger = Steno.logger('cc.background')
13+
logger.info("Cleaning up service usage snapshots older than #{cutoff_age_in_days} days")
14+
15+
cutoff_time = Time.now.utc - cutoff_age_in_days.days
16+
17+
old_completed = ServiceUsageSnapshot.where(
18+
Sequel.lit('created_at < ? AND completed_at IS NOT NULL', cutoff_time)
19+
)
20+
21+
stale_timeout = Time.now.utc - 1.hour
22+
stale_in_progress = ServiceUsageSnapshot.where(
23+
Sequel.lit('created_at < ? AND completed_at IS NULL', stale_timeout)
24+
)
25+
26+
completed_count = old_completed.count
27+
stale_count = stale_in_progress.count
28+
29+
old_completed.delete
30+
stale_in_progress.delete
31+
32+
logger.info("Deleted #{completed_count} old completed snapshots and #{stale_count} stale in-progress snapshots")
33+
end
34+
35+
def job_name_in_configuration
36+
:service_usage_snapshot_cleanup
37+
end
38+
39+
def max_attempts
40+
1
41+
end
42+
end
43+
end
44+
end
45+
end

0 commit comments

Comments
 (0)