feat: add monitor_uptime_ratio and monitor_response_time_seconds prometheus metric (#5506)
Co-authored-by: François HONORE <francois.honore@i-carre.net> Co-authored-by: Frank Elsinga <frank@elsinga.de> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
680f0f4584
commit
f71787eac1
@ -1044,7 +1044,10 @@ class Monitor extends BeanModel {
|
|||||||
await R.store(bean);
|
await R.store(bean);
|
||||||
|
|
||||||
log.debug("monitor", `[${this.name}] prometheus.update`);
|
log.debug("monitor", `[${this.name}] prometheus.update`);
|
||||||
this.prometheus?.update(bean, tlsInfo);
|
const data24h = uptimeCalculator.get24Hour();
|
||||||
|
const data30d = uptimeCalculator.get30Day();
|
||||||
|
const data1y = uptimeCalculator.get1Year();
|
||||||
|
this.prometheus?.update(bean, tlsInfo, { data24h, data30d, data1y });
|
||||||
|
|
||||||
previousBeat = bean;
|
previousBeat = bean;
|
||||||
|
|
||||||
@ -1952,7 +1955,7 @@ class Monitor extends BeanModel {
|
|||||||
*/
|
*/
|
||||||
async handleTlsInfo(tlsInfo) {
|
async handleTlsInfo(tlsInfo) {
|
||||||
await this.updateTlsInfo(tlsInfo);
|
await this.updateTlsInfo(tlsInfo);
|
||||||
this.prometheus?.update(null, tlsInfo);
|
this.prometheus?.update(null, tlsInfo, null);
|
||||||
|
|
||||||
if (!this.getIgnoreTls() && this.isEnabledExpiryNotification()) {
|
if (!this.getIgnoreTls() && this.isEnabledExpiryNotification()) {
|
||||||
log.debug("monitor", `[${this.name}] call checkCertExpiryNotifications`);
|
log.debug("monitor", `[${this.name}] call checkCertExpiryNotifications`);
|
||||||
|
|||||||
@ -4,6 +4,8 @@ const { R } = require("redbean-node");
|
|||||||
|
|
||||||
let monitorCertDaysRemaining = null;
|
let monitorCertDaysRemaining = null;
|
||||||
let monitorCertIsValid = null;
|
let monitorCertIsValid = null;
|
||||||
|
let monitorUptimeRatio = null;
|
||||||
|
let monitorAverageResponseTimeSeconds = null;
|
||||||
let monitorResponseTime = null;
|
let monitorResponseTime = null;
|
||||||
let monitorStatus = null;
|
let monitorStatus = null;
|
||||||
|
|
||||||
@ -69,6 +71,18 @@ class Prometheus {
|
|||||||
labelNames: commonLabels,
|
labelNames: commonLabels,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
monitorUptimeRatio = new PrometheusClient.Gauge({
|
||||||
|
name: "monitor_uptime_ratio",
|
||||||
|
help: "Uptime ratio calculated over sliding window specified by the 'window' label. (0.0 - 1.0)",
|
||||||
|
labelNames: [...commonLabels, "window"],
|
||||||
|
});
|
||||||
|
|
||||||
|
monitorAverageResponseTimeSeconds = new PrometheusClient.Gauge({
|
||||||
|
name: "monitor_response_time_seconds",
|
||||||
|
help: "Average response time in seconds calculated over sliding window specified by the 'window' label",
|
||||||
|
labelNames: [...commonLabels, "window"],
|
||||||
|
});
|
||||||
|
|
||||||
monitorResponseTime = new PrometheusClient.Gauge({
|
monitorResponseTime = new PrometheusClient.Gauge({
|
||||||
name: "monitor_response_time",
|
name: "monitor_response_time",
|
||||||
help: "Monitor Response Time (ms)",
|
help: "Monitor Response Time (ms)",
|
||||||
@ -130,11 +144,13 @@ class Prometheus {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the metrics page
|
* Update the metrics page
|
||||||
|
* @typedef {import("./uptime-calculator").UptimeDataResult} UptimeDataResult
|
||||||
* @param {object} heartbeat Heartbeat details
|
* @param {object} heartbeat Heartbeat details
|
||||||
* @param {object} tlsInfo TLS details
|
* @param {object} tlsInfo TLS details
|
||||||
|
* @param {{data24h: UptimeDataResult, data30d: UptimeDataResult, data1y:UptimeDataResult} | null} uptime the uptime and average response rate over a variety of fixed windows
|
||||||
* @returns {void}
|
* @returns {void}
|
||||||
*/
|
*/
|
||||||
update(heartbeat, tlsInfo) {
|
update(heartbeat, tlsInfo, uptime) {
|
||||||
if (typeof tlsInfo !== "undefined") {
|
if (typeof tlsInfo !== "undefined") {
|
||||||
try {
|
try {
|
||||||
let isValid;
|
let isValid;
|
||||||
@ -145,8 +161,7 @@ class Prometheus {
|
|||||||
}
|
}
|
||||||
monitorCertIsValid.set(this.monitorLabelValues, isValid);
|
monitorCertIsValid.set(this.monitorLabelValues, isValid);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
log.error("prometheus", "Caught error");
|
log.error("prometheus", "Caught error", e);
|
||||||
log.error("prometheus", e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -154,8 +169,49 @@ class Prometheus {
|
|||||||
monitorCertDaysRemaining.set(this.monitorLabelValues, tlsInfo.certInfo.daysRemaining);
|
monitorCertDaysRemaining.set(this.monitorLabelValues, tlsInfo.certInfo.daysRemaining);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
log.error("prometheus", "Caught error");
|
log.error("prometheus", "Caught error", e);
|
||||||
log.error("prometheus", e);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uptime) {
|
||||||
|
try {
|
||||||
|
monitorAverageResponseTimeSeconds.set(
|
||||||
|
{ ...this.monitorLabelValues, window: "1d" },
|
||||||
|
uptime.data24h.avgPing / 1000
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
log.error("prometheus", "Caught error", e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
monitorAverageResponseTimeSeconds.set(
|
||||||
|
{ ...this.monitorLabelValues, window: "30d" },
|
||||||
|
uptime.data30d.avgPing / 1000
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
log.error("prometheus", "Caught error", e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
monitorAverageResponseTimeSeconds.set(
|
||||||
|
{ ...this.monitorLabelValues, window: "365d" },
|
||||||
|
uptime.data1y.avgPing / 1000
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
log.error("prometheus", "Caught error", e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
monitorUptimeRatio.set({ ...this.monitorLabelValues, window: "1d" }, uptime.data24h.uptime);
|
||||||
|
} catch (e) {
|
||||||
|
log.error("prometheus", "Caught error", e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
monitorUptimeRatio.set({ ...this.monitorLabelValues, window: "30d" }, uptime.data30d.uptime);
|
||||||
|
} catch (e) {
|
||||||
|
log.error("prometheus", "Caught error", e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
monitorUptimeRatio.set({ ...this.monitorLabelValues, window: "365d" }, uptime.data1y.uptime);
|
||||||
|
} catch (e) {
|
||||||
|
log.error("prometheus", "Caught error", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,6 +245,8 @@ class Prometheus {
|
|||||||
try {
|
try {
|
||||||
monitorCertDaysRemaining.remove(this.monitorLabelValues);
|
monitorCertDaysRemaining.remove(this.monitorLabelValues);
|
||||||
monitorCertIsValid.remove(this.monitorLabelValues);
|
monitorCertIsValid.remove(this.monitorLabelValues);
|
||||||
|
monitorUptimeRatio.remove(this.monitorLabelValues);
|
||||||
|
monitorAverageResponseTimeSeconds.remove(this.monitorLabelValues);
|
||||||
monitorResponseTime.remove(this.monitorLabelValues);
|
monitorResponseTime.remove(this.monitorLabelValues);
|
||||||
monitorStatus.remove(this.monitorLabelValues);
|
monitorStatus.remove(this.monitorLabelValues);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
@ -206,7 +206,7 @@ class UptimeCalculator {
|
|||||||
* @param {number} status status
|
* @param {number} status status
|
||||||
* @param {number} ping Ping
|
* @param {number} ping Ping
|
||||||
* @param {dayjs.Dayjs} date Date (Only for migration)
|
* @param {dayjs.Dayjs} date Date (Only for migration)
|
||||||
* @returns {dayjs.Dayjs} date
|
* @returns {Promise<dayjs.Dayjs>} date
|
||||||
* @throws {Error} Invalid status
|
* @throws {Error} Invalid status
|
||||||
*/
|
*/
|
||||||
async update(status, ping = 0, date) {
|
async update(status, ping = 0, date) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user