Skip to content

Commit

Permalink
feat(api-gateway): support monitoring slowest requests with trimmed m…
Browse files Browse the repository at this point in the history
…ean (#444)

Closes #443

---

_By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license_
  • Loading branch information
mattserrano authored Oct 20, 2023
1 parent 241a2dd commit d273f84
Show file tree
Hide file tree
Showing 9 changed files with 1,407 additions and 43 deletions.
328 changes: 328 additions & 0 deletions API.md

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions lib/common/metric/MetricStatistic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export enum MetricStatistic {
* trimmed mean; calculates the average after removing the 0.01% of data points with the highest values
*/
TM9999 = "tm99.99",

/**
* trimmed mean; calculates the average after removing the 1% lowest data points and the 1% highest data points
*/
Expand Down Expand Up @@ -94,6 +95,23 @@ export enum MetricStatistic {
*/
TM70_BOTH = "TM(30%:70%)",

/**
* trimmed mean; calculates the average after removing the 95% lowest data points
*/
TM95_TOP = "TM(95%:100%)",
/**
* trimmed mean; calculates the average after removing the 99% lowest data points
*/
TM99_TOP = "TM(99%:100%)",
/**
* trimmed mean; calculates the average after removing the 99.9% lowest data points
*/
TM999_TOP = "TM(99.9%:100%)",
/**
* trimmed mean; calculates the average after removing the 99.99% lowest data points
*/
TM9999_TOP = "TM(99.99%:100%)",

/**
* winsorized mean; calculates the average while treating the 50% of the highest values to be equal to the value at the 50th percentile
*/
Expand Down
16 changes: 16 additions & 0 deletions lib/common/monitoring/alarms/LatencyAlarmFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ export enum LatencyType {
TM99 = "TM99",
TM999 = "TM999",
TM9999 = "TM9999",
TM95_TOP = "TM(95%:100%)",
TM99_TOP = "TM(99%:100%)",
TM999_TOP = "TM(99.9%:100%)",
TM9999_TOP = "TM(99.99%:100%)",
AVERAGE = "Average",
}

Expand Down Expand Up @@ -58,6 +62,14 @@ export function getLatencyTypeStatistic(latencyType: LatencyType) {
return MetricStatistic.TM999;
case LatencyType.TM9999:
return MetricStatistic.TM9999;
case LatencyType.TM95_TOP:
return MetricStatistic.TM95_TOP;
case LatencyType.TM99_TOP:
return MetricStatistic.TM99_TOP;
case LatencyType.TM999_TOP:
return MetricStatistic.TM999_TOP;
case LatencyType.TM9999_TOP:
return MetricStatistic.TM9999_TOP;
case LatencyType.AVERAGE:
return MetricStatistic.AVERAGE;
default:
Expand Down Expand Up @@ -98,6 +110,10 @@ export function getLatencyTypeLabel(latencyType: LatencyType) {
return latencyType.replace("999", "99.9") + averageSuffix;
case LatencyType.P9999:
case LatencyType.TM9999:
case LatencyType.TM95_TOP:
case LatencyType.TM99_TOP:
case LatencyType.TM999_TOP:
case LatencyType.TM9999_TOP:
// we need proper decimal here
return latencyType.replace("9999", "99.99") + averageSuffix;
case LatencyType.AVERAGE:
Expand Down
8 changes: 8 additions & 0 deletions lib/monitoring/aws-apigateway/ApiGatewayMonitoring.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ export interface ApiGatewayMonitoringOptions extends BaseMonitoringProps {
readonly addLatencyTM99Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM95OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM99OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyAverageAlarm?: Record<string, LatencyThreshold>;

readonly addLowTpsAlarm?: Record<string, LowTpsThreshold>;
Expand Down Expand Up @@ -175,6 +179,10 @@ export class ApiGatewayMonitoring extends Monitoring {
[LatencyType.TM99]: props.addLatencyTM99Alarm,
[LatencyType.TM999]: props.addLatencyTM999Alarm,
[LatencyType.TM9999]: props.addLatencyTM9999Alarm,
[LatencyType.TM95_TOP]: props.addLatencyTM95OutlierAlarm,
[LatencyType.TM99_TOP]: props.addLatencyTM99OutlierAlarm,
[LatencyType.TM999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.TM9999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.AVERAGE]: props.addLatencyAverageAlarm,
};

Expand Down
34 changes: 34 additions & 0 deletions lib/monitoring/aws-apigatewayv2/ApiGatewayV2HttpApiMonitoring.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,20 @@ export interface ApiGatewayV2MonitoringOptions extends BaseMonitoringProps {
readonly addLatencyP999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyP9999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyP100Alarm?: Record<string, LatencyThreshold>;

readonly addLatencyTM50Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM70Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM90Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM95Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM99Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999Alarm?: Record<string, LatencyThreshold>;

readonly addLatencyTM95OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM99OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999OutlierAlarm?: Record<string, LatencyThreshold>;

readonly addLatencyAverageAlarm?: Record<string, LatencyThreshold>;

readonly addIntegrationLatencyP50Alarm?: Record<string, LatencyThreshold>;
Expand All @@ -75,13 +82,32 @@ export interface ApiGatewayV2MonitoringOptions extends BaseMonitoringProps {
readonly addIntegrationLatencyP999Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyP9999Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyP100Alarm?: Record<string, LatencyThreshold>;

readonly addIntegrationLatencyTM50Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM70Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM90Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM95Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM99Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM999Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM9999Alarm?: Record<string, LatencyThreshold>;

readonly addIntegrationLatencyTM95OutlierAlarm?: Record<
string,
LatencyThreshold
>;
readonly addIntegrationLatencyTM99OutlierAlarm?: Record<
string,
LatencyThreshold
>;
readonly addIntegrationLatencyTM999OutlierAlarm?: Record<
string,
LatencyThreshold
>;
readonly addIntegrationLatencyTM9999OutlierAlarm?: Record<
string,
LatencyThreshold
>;

readonly addIntegrationLatencyAverageAlarm?: Record<string, LatencyThreshold>;

readonly addLowTpsAlarm?: Record<string, LowTpsThreshold>;
Expand Down Expand Up @@ -200,6 +226,10 @@ export class ApiGatewayV2HttpApiMonitoring extends Monitoring {
[LatencyType.TM99]: props.addLatencyTM99Alarm,
[LatencyType.TM999]: props.addLatencyTM999Alarm,
[LatencyType.TM9999]: props.addLatencyTM9999Alarm,
[LatencyType.TM95_TOP]: props.addLatencyTM95OutlierAlarm,
[LatencyType.TM99_TOP]: props.addLatencyTM99OutlierAlarm,
[LatencyType.TM999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.TM9999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.AVERAGE]: props.addLatencyAverageAlarm,
};

Expand All @@ -219,6 +249,10 @@ export class ApiGatewayV2HttpApiMonitoring extends Monitoring {
[LatencyType.TM99]: props.addIntegrationLatencyTM99Alarm,
[LatencyType.TM999]: props.addIntegrationLatencyTM999Alarm,
[LatencyType.TM9999]: props.addIntegrationLatencyTM9999Alarm,
[LatencyType.TM95_TOP]: props.addIntegrationLatencyTM95OutlierAlarm,
[LatencyType.TM99_TOP]: props.addIntegrationLatencyTM99OutlierAlarm,
[LatencyType.TM999_TOP]: props.addIntegrationLatencyTM999OutlierAlarm,
[LatencyType.TM9999_TOP]: props.addIntegrationLatencyTM9999OutlierAlarm,
[LatencyType.AVERAGE]: props.addIntegrationLatencyAverageAlarm,
};

Expand Down
52 changes: 50 additions & 2 deletions test/monitoring/aws-apigateway/ApiGatewayMonitoring.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,30 @@ test("snapshot test: all alarms", () => {
datapointsToAlarm: 29999,
},
},
addLatencyTM95OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM99OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM9999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyAverageAlarm: {
Warning: {
maxLatency: Duration.millis(20),
Expand All @@ -170,7 +194,7 @@ test("snapshot test: all alarms", () => {
});

addMonitoringDashboardsToStack(stack, monitoring);
expect(numAlarmsCreated).toStrictEqual(22);
expect(numAlarmsCreated).toStrictEqual(26);
expect(Template.fromStack(stack)).toMatchSnapshot();
});

Expand Down Expand Up @@ -304,6 +328,30 @@ test("snapshot test: all alarms using interface", () => {
datapointsToAlarm: 29999,
},
},
addLatencyTM95OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM99OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM9999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyAverageAlarm: {
Warning: {
maxLatency: Duration.millis(20),
Expand All @@ -324,6 +372,6 @@ test("snapshot test: all alarms using interface", () => {
});

addMonitoringDashboardsToStack(stack, monitoring);
expect(numAlarmsCreated).toStrictEqual(22);
expect(numAlarmsCreated).toStrictEqual(26);
expect(Template.fromStack(stack)).toMatchSnapshot();
});
Loading

0 comments on commit d273f84

Please sign in to comment.