Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package datadog.trace.common.metrics;

import static datadog.trace.api.ProtocolVersion.V0_4;
import static datadog.trace.bootstrap.instrumentation.api.Tags.SPAN_KIND;
import static datadog.trace.bootstrap.instrumentation.api.Tags.SPAN_KIND_CLIENT;
import static java.util.concurrent.TimeUnit.MICROSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;

Expand Down Expand Up @@ -52,6 +54,7 @@ static List<CoreSpan<?>> generateTrace(int len) {
final List<CoreSpan<?>> trace = new ArrayList<>();
for (int i = 0; i < len; i++) {
SimpleSpan span = new SimpleSpan("", "", "", "", true, true, false, 0, 10, -1);
span.setTag(SPAN_KIND, SPAN_KIND_CLIENT);
span.setTag("peer.hostname", Strings.random(10));
trace.add(span);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package datadog.trace.common.metrics;

import static datadog.trace.bootstrap.instrumentation.api.Tags.SPAN_KIND;
import static datadog.trace.bootstrap.instrumentation.api.Tags.SPAN_KIND_CLIENT;
import static java.util.concurrent.TimeUnit.MICROSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;

import datadog.communication.ddagent.DDAgentFeaturesDiscovery;
import datadog.trace.api.WellKnownTags;
import datadog.trace.common.writer.Writer;
import datadog.trace.core.CoreSpan;
import datadog.trace.core.CoreTracer;
import datadog.trace.core.DDSpan;
import datadog.trace.core.monitor.HealthMetrics;
import datadog.trace.util.Strings;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

/**
* Parallels {@link ConflatingMetricsAggregatorBenchmark} but uses real {@link DDSpan} instances
* instead of the lightweight {@code SimpleSpan} mock, so the JIT exercises the production {@link
* CoreSpan#isKind} path (cached span.kind ordinal + bit-test) rather than the groovy mock's
* dispatch.
*/
@State(Scope.Benchmark)
@Warmup(iterations = 1, time = 30, timeUnit = SECONDS)
@Measurement(iterations = 3, time = 30, timeUnit = SECONDS)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(MICROSECONDS)
@Fork(value = 1)
public class ConflatingMetricsAggregatorDDSpanBenchmark {

private static final CoreTracer TRACER =
CoreTracer.builder().writer(new NoopWriter()).strictTraceWrites(false).build();

private final DDAgentFeaturesDiscovery featuresDiscovery =
new ConflatingMetricsAggregatorBenchmark.FixedAgentFeaturesDiscovery(
Collections.singleton("peer.hostname"), Collections.emptySet());
private final ConflatingMetricsAggregator aggregator =
new ConflatingMetricsAggregator(
new WellKnownTags("", "", "", "", "", ""),
Collections.emptySet(),
featuresDiscovery,
HealthMetrics.NO_OP,
new ConflatingMetricsAggregatorBenchmark.NullSink(),
2048,
2048,
false);
private final List<CoreSpan<?>> spans = generateTrace(64);

static List<CoreSpan<?>> generateTrace(int len) {
final List<CoreSpan<?>> trace = new ArrayList<>();
for (int i = 0; i < len; i++) {
DDSpan span = (DDSpan) TRACER.startSpan("benchmark", "op");
span.setTag(SPAN_KIND, SPAN_KIND_CLIENT);
span.setTag("peer.hostname", Strings.random(10));
// Fix duration; bypasses the wall clock and avoids per-fork drift.
span.finishWithDuration(10);
trace.add(span);
}
return trace;
}

static class NoopWriter implements Writer {
@Override
public void write(List<DDSpan> trace) {}

@Override
public void start() {}

@Override
public boolean flush() {
return true;
}

@Override
public void close() {}

@Override
public void incrementDropCounts(int spanCount) {}
}

@Benchmark
public void benchmark(Blackhole blackhole) {
blackhole.consume(aggregator.publish(spans));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,27 @@ public AggregateMetric recordDurations(int count, AtomicLongArray durations) {
return this;
}

/**
* Records a single hit. {@code tagAndDuration} carries the duration nanos with optional {@link
* #ERROR_TAG} / {@link #TOP_LEVEL_TAG} bits OR-ed in.
*/
public AggregateMetric recordOneDuration(long tagAndDuration) {
++hitCount;
if ((tagAndDuration & TOP_LEVEL_TAG) == TOP_LEVEL_TAG) {
tagAndDuration ^= TOP_LEVEL_TAG;
++topLevelCount;
}
if ((tagAndDuration & ERROR_TAG) == ERROR_TAG) {
tagAndDuration ^= ERROR_TAG;
errorLatencies.accept(tagAndDuration);
++errorCount;
} else {
okLatencies.accept(tagAndDuration);
}
duration += tagAndDuration;
return this;
}

public int getErrorCount() {
return errorCount;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
package datadog.trace.common.metrics;

import static datadog.trace.api.Functions.UTF8_ENCODE;
import static datadog.trace.common.metrics.ConflatingMetricsAggregator.PEER_TAGS_CACHE;
import static datadog.trace.common.metrics.ConflatingMetricsAggregator.PEER_TAGS_CACHE_ADDER;
import static datadog.trace.common.metrics.ConflatingMetricsAggregator.SERVICE_NAMES;
import static datadog.trace.common.metrics.ConflatingMetricsAggregator.SPAN_KINDS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

import datadog.trace.api.Pair;
import datadog.trace.api.cache.DDCache;
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
import datadog.trace.common.metrics.SignalItem.StopSignal;
import datadog.trace.core.monitor.HealthMetrics;
import datadog.trace.core.util.LRUCache;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import org.jctools.queues.MessagePassingQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -21,11 +31,8 @@ final class Aggregator implements Runnable {

private static final Logger log = LoggerFactory.getLogger(Aggregator.class);

private final MessagePassingQueue<Batch> batchPool;
private final MessagePassingQueue<InboxItem> inbox;
private final LRUCache<MetricKey, AggregateMetric> aggregates;
private final ConcurrentMap<MetricKey, Batch> pending;
private final Set<MetricKey> commonKeys;
private final MetricWriter writer;
// the reporting interval controls how much history will be buffered
// when the agent is unresponsive (only 10 pending requests will be
Expand All @@ -41,20 +48,14 @@ final class Aggregator implements Runnable {

Aggregator(
MetricWriter writer,
MessagePassingQueue<Batch> batchPool,
MessagePassingQueue<InboxItem> inbox,
ConcurrentMap<MetricKey, Batch> pending,
final Set<MetricKey> commonKeys,
int maxAggregates,
long reportingInterval,
TimeUnit reportingIntervalTimeUnit,
HealthMetrics healthMetrics) {
this(
writer,
batchPool,
inbox,
pending,
commonKeys,
maxAggregates,
reportingInterval,
reportingIntervalTimeUnit,
Expand All @@ -64,30 +65,37 @@ final class Aggregator implements Runnable {

Aggregator(
MetricWriter writer,
MessagePassingQueue<Batch> batchPool,
MessagePassingQueue<InboxItem> inbox,
ConcurrentMap<MetricKey, Batch> pending,
final Set<MetricKey> commonKeys,
int maxAggregates,
long reportingInterval,
TimeUnit reportingIntervalTimeUnit,
long sleepMillis,
HealthMetrics healthMetrics) {
this.writer = writer;
this.batchPool = batchPool;
this.inbox = inbox;
this.commonKeys = commonKeys;
this.aggregates =
new LRUCache<>(
new CommonKeyCleaner(commonKeys, healthMetrics),
maxAggregates * 4 / 3,
0.75f,
maxAggregates);
this.pending = pending;
new AggregateExpiry(healthMetrics), maxAggregates * 4 / 3, 0.75f, maxAggregates);
this.reportingIntervalNanos = reportingIntervalTimeUnit.toNanos(reportingInterval);
this.sleepMillis = sleepMillis;
}

private static final class AggregateExpiry
implements LRUCache.ExpiryListener<MetricKey, AggregateMetric> {
private final HealthMetrics healthMetrics;

AggregateExpiry(HealthMetrics healthMetrics) {
this.healthMetrics = healthMetrics;
}

@Override
public void accept(Map.Entry<MetricKey, AggregateMetric> expired) {
if (expired.getValue().getHitCount() > 0) {
healthMetrics.onStatsAggregateDropped();
}
}
}

public void clearAggregates() {
this.aggregates.clear();
}
Expand Down Expand Up @@ -129,20 +137,54 @@ public void accept(InboxItem item) {
} else {
signal.ignore();
}
} else if (item instanceof Batch && !stopped) {
Batch batch = (Batch) item;
MetricKey key = batch.getKey();
// important that it is still *this* batch pending, must not remove otherwise
pending.remove(key, batch);
} else if (item instanceof SpanSnapshot && !stopped) {
SpanSnapshot snapshot = (SpanSnapshot) item;
MetricKey key = buildMetricKey(snapshot);
AggregateMetric aggregate = aggregates.computeIfAbsent(key, k -> new AggregateMetric());
batch.contributeTo(aggregate);
aggregate.recordOneDuration(snapshot.tagAndDuration);
dirty = true;
// return the batch for reuse
batchPool.offer(batch);
}
}
}

private static MetricKey buildMetricKey(SpanSnapshot s) {
return new MetricKey(
s.resourceName,
SERVICE_NAMES.computeIfAbsent(s.serviceName, UTF8_ENCODE),
s.operationName,
s.serviceNameSource,
s.spanType,
s.httpStatusCode,
s.synthetic,
s.traceRoot,
SPAN_KINDS.computeIfAbsent(s.spanKind, UTF8BytesString::create),
materializePeerTags(s.peerTagPairs),
s.httpMethod,
s.httpEndpoint,
s.grpcStatusCode);
}

private static List<UTF8BytesString> materializePeerTags(String[] pairs) {
if (pairs == null || pairs.length == 0) {
return Collections.emptyList();
}
if (pairs.length == 2) {
// single-entry fast path (matches the original singletonList shape for INTERNAL spans)
return Collections.singletonList(encodePeerTag(pairs[0], pairs[1]));
}
List<UTF8BytesString> tags = new ArrayList<>(pairs.length / 2);
for (int i = 0; i < pairs.length; i += 2) {
tags.add(encodePeerTag(pairs[i], pairs[i + 1]));
}
return tags;
}

private static UTF8BytesString encodePeerTag(String name, String value) {
final Pair<DDCache<String, UTF8BytesString>, Function<String, UTF8BytesString>>
cacheAndCreator = PEER_TAGS_CACHE.computeIfAbsent(name, PEER_TAGS_CACHE_ADDER);
return cacheAndCreator.getLeft().computeIfAbsent(value, cacheAndCreator.getRight());
}

private void report(long when, SignalItem signal) {
boolean skipped = true;
if (dirty) {
Expand Down Expand Up @@ -177,32 +219,11 @@ private void expungeStaleAggregates() {
AggregateMetric metric = pair.getValue();
if (metric.getHitCount() == 0) {
it.remove();
commonKeys.remove(pair.getKey());
}
}
}

private long wallClockTime() {
return MILLISECONDS.toNanos(System.currentTimeMillis());
}

private static final class CommonKeyCleaner
implements LRUCache.ExpiryListener<MetricKey, AggregateMetric> {

private final Set<MetricKey> commonKeys;
private final HealthMetrics healthMetrics;

private CommonKeyCleaner(Set<MetricKey> commonKeys, HealthMetrics healthMetrics) {
this.commonKeys = commonKeys;
this.healthMetrics = healthMetrics;
}

@Override
public void accept(Map.Entry<MetricKey, AggregateMetric> expired) {
commonKeys.remove(expired.getKey());
if (expired.getValue().getHitCount() > 0) {
healthMetrics.onStatsAggregateDropped();
}
}
}
}
Loading