Skip to content

Commit dab46b1

Browse files
authored
alert on error by default, add configuration to control behaviour (#6)
1 parent 8c21366 commit dab46b1

File tree

6 files changed

+103
-28
lines changed

6 files changed

+103
-28
lines changed

README.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Example:
1919

2020
```toml
2121
reminder_interval = "3h"
22+
alert_on_error = false
2223

2324
[alerts]
2425
[alerts.test-alert-1]
@@ -39,6 +40,7 @@ reminder_interval = "3h"
3940
dbs = ["my-mysql-db"]
4041
destinations = ["slacks.my-slack"]
4142
interval = "1h30m"
43+
alert_on_error = true
4244

4345

4446
[dbs]
@@ -60,16 +62,17 @@ reminder_interval = "3h"
6062

6163

6264
Configuration is defined in [TOML](https://github.com/toml-lang/toml) format.
63-
- `reminder_interval` - Default interval for all alerts after which alert will be triggered again. Set to 0 if you don't need reminders. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Examples: "300s", "1.5h" or "2h45m". Default value is 0.
64-
- `alerts.{alert-name}.reminder_interval` - Same as above, but for individual alert.
65+
- `reminder_interval` - Optional. Default interval for all alerts after which alert will be triggered again. Set to 0 if you don't need reminders. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Examples: "300s", "1.5h" or "2h45m". Default value is 0.
66+
- `alert_on_error` - Option. Defines behavior during error encounters (connection errors, sql syntax errors, timeouts etc ). If set to true any error will trigger an alert. If set to false then error will be logged and ignored. Default value is true.
67+
- `alerts.{alert-name}.reminder_interval` - Optional. Same as above, but for individual alert.
6568
- `alerts.{alert-name}.query` - Required. SQL query to execute at regular interval.
6669
- `alerts.{alert-name}.message` - Optional. Message to pass with alert.
6770
- `alerts.{alert-name}.dbs` - Required. List of database references. Provided query will be executed against each database and trigger an individual alert for each database. All databases must be defined in `dbs` section of the configuration. Example: `["my-postgres-db", "my-mysql-db"]`.
68-
- `alerts.{alert-name}.destinations` - List of destination references to report the alert. Each destination should be defined in related section. Example: `["slacks.channel-1-webhook", "slacks.channel-2-webhook"]`
69-
- `alerts.{alert-name}.interval` - An interval between consecutive query execution. For simplicity it does not take in account time required to execute the query. For example if query execution time is 5s and interval is 10s then interval between two consecutive queries will be 15s. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Examples: "300s", "1.5h" or "2h45m"
70-
- `dbs.{db-name}.driver` - SQL driver. Supported drivers: mysql, postgres.
71-
- `dbs.{db-name}.connection` - Connection string for the database. Documentation for databases: [postgres](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [mysql](https://github.com/go-sql-driver/mysql#dsn-data-source-name).
72-
- `slacks.{destination-name}.webhook_url` - Webhook URL for slack integration. More information how to set it up in [official slack documentation](https://api.slack.com/messaging/webhooks).
71+
- `alerts.{alert-name}.destinations` - Required. List of destination references to report the alert. Each destination should be defined in related section. Example: `["slacks.channel-1-webhook", "slacks.channel-2-webhook"]`
72+
- `alerts.{alert-name}.interval` - Required. An interval between consecutive query execution. For simplicity it does not take in account time required to execute the query. For example if query execution time is 5s and interval is 10s then interval between two consecutive queries will be 15s. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Examples: "300s", "1.5h" or "2h45m"
73+
- `dbs.{db-name}.driver` - Required. SQL driver. Supported drivers: mysql, postgres.
74+
- `dbs.{db-name}.connection` - Required. Connection string for the database. Documentation for databases: [postgres](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [mysql](https://github.com/go-sql-driver/mysql#dsn-data-source-name).
75+
- `slacks.{destination-name}.webhook_url` - Required. Webhook URL for slack integration. More information how to set it up in [official slack documentation](https://api.slack.com/messaging/webhooks).
7376

7477
### Environment substitution.
7578

app/alert/alert.go

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ type Alert struct {
1919
Interval time.Duration
2020
ReminderInterval time.Duration
2121
AlertingAt time.Time
22+
LastAlertSentAt time.Time
23+
AlertOnError bool
2224
}
2325

2426
type Destination interface {
@@ -84,46 +86,51 @@ func (a *Alert) Check() {
8486
msg, err := a.ExecQuery()
8587
if err != nil {
8688
LogError(fmt.Errorf("couldn't check conditions for %s: %s", a.Name, err))
89+
if a.AlertOnError {
90+
a.SendAlert(err.Error())
91+
}
8792
return
8893
}
8994
if msg == "" {
90-
log.Printf("INFO: %s is OK", a.Name)
91-
if !a.AlertingAt.IsZero() {
92-
log.Printf("INFO: %s is resolved", a.Name)
93-
a.Resolve(fmt.Sprintf("Resolved after %s", humanizeDuration(time.Now().Sub(a.AlertingAt))))
94-
a.AlertingAt = time.Time{}
95-
}
95+
a.Resolve()
9696
return
9797
}
98-
log.Printf("INFO: %s is alerting", a.Name)
99-
if a.AlertingAt.IsZero() {
100-
a.AlertingAt = time.Now()
101-
a.SendAlert(msg)
102-
} else if a.ReminderInterval > 0 && time.Now().Sub(a.AlertingAt) > a.ReminderInterval {
103-
a.SendAlert(msg)
104-
}
98+
a.SendAlert(msg)
10599

106100
}
107101

108102
func (a *Alert) SendAlert(msg string) {
109-
if !a.AlertingAt.IsZero() {
103+
log.Printf("INFO: %s is alerting", a.Name)
104+
if a.AlertingAt.IsZero() {
110105
a.AlertingAt = time.Now()
111106
}
112-
for i := range a.Destinations {
113-
err := a.Destinations[i].SendAlert(a, msg)
114-
if err != nil {
115-
LogError(fmt.Errorf("couldn't send alert %s to %s", a.Name, a.Destinations[i].Name()))
107+
if a.LastAlertSentAt.IsZero() || (a.ReminderInterval > 0 && time.Since(a.LastAlertSentAt) > a.ReminderInterval) {
108+
for i := range a.Destinations {
109+
err := a.Destinations[i].SendAlert(a, msg)
110+
if err != nil {
111+
LogError(fmt.Errorf("couldn't send alert %s to %s", a.Name, a.Destinations[i].Name()))
112+
}
116113
}
114+
a.LastAlertSentAt = time.Now()
117115
}
118116
}
119117

120-
func (a *Alert) Resolve(msg string) {
118+
func (a *Alert) Resolve() {
119+
log.Printf("INFO: %s is OK", a.Name)
120+
if a.AlertingAt.IsZero() {
121+
return
122+
}
123+
alertDuration := humanizeDuration(time.Now().Sub(a.AlertingAt))
124+
resolveMessage := fmt.Sprintf("Resolved after %s", alertDuration)
121125
for i := range a.Destinations {
122-
err := a.Destinations[i].ResolveAlert(a, msg)
126+
err := a.Destinations[i].ResolveAlert(a, resolveMessage)
123127
if err != nil {
124128
LogError(fmt.Errorf("error during resolver alert %s to %s", a.Name, a.Destinations))
125129
}
126130
}
131+
a.AlertingAt = time.Time{}
132+
a.LastAlertSentAt = time.Time{}
133+
log.Printf("INFO: %s is resolved after %s", a.Name, alertDuration)
127134
}
128135

129136
func LogError(err error) {

app/alert/alert_test.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package alert
33
import (
44
"strings"
55
"testing"
6+
"time"
67

78
_ "github.com/go-sql-driver/mysql"
89
_ "github.com/lib/pq"
@@ -64,7 +65,7 @@ func TestAlert_ExecQuery_Mysql(t *testing.T) {
6465
func TestAlert_Check(t *testing.T) {
6566
d := new(MockDestination)
6667
d.On("SendAlert", mock.Anything, mock.Anything).Return(nil)
67-
mysql := sqltest.GetTestMysql()
68+
mysql := sqltest.GetTestPostgres()
6869
a := Alert{
6970
Name: "my_alert",
7071
Source: NewSource(mysql, "test"),
@@ -75,10 +76,55 @@ func TestAlert_Check(t *testing.T) {
7576
SELECT 2 as col1, 'string2' as col2`,
7677
}
7778
a.Check()
79+
assert.NotEqual(t, a.LastAlertSentAt, time.Time{})
80+
assert.NotEqual(t, a.AlertingAt, time.Time{})
81+
d.AssertNumberOfCalls(t, "SendAlert", 1)
82+
a.Check()
83+
a.Check()
84+
d.AssertNumberOfCalls(t, "SendAlert", 1)
85+
}
86+
87+
func TestAlert_Check_Error(t *testing.T) {
88+
d := new(MockDestination)
89+
d.On("SendAlert", mock.Anything, mock.Anything).Return(nil)
90+
mysql := sqltest.GetTestPostgres()
91+
a := Alert{
92+
Name: "my_alert",
93+
Source: NewSource(mysql, "test"),
94+
Destinations: []Destination{d},
95+
AlertOnError: false,
96+
Query: `
97+
SELECT 1 as col1, 'string' as col2 FROM`, // invalid query
98+
}
99+
a.Check()
100+
d.AssertNumberOfCalls(t, "SendAlert", 0)
101+
a.AlertOnError = true
102+
a.Check()
103+
assert.NotEqual(t, a.LastAlertSentAt, time.Time{})
104+
assert.NotEqual(t, a.AlertingAt, time.Time{})
78105
d.AssertNumberOfCalls(t, "SendAlert", 1)
106+
}
107+
108+
109+
func TestAlert_Reminder(t *testing.T) {
110+
d := new(MockDestination)
111+
d.On("SendAlert", mock.Anything, mock.Anything).Return(nil)
112+
mysql := sqltest.GetTestPostgres()
113+
a := Alert{
114+
Name: "my_alert",
115+
Source: NewSource(mysql, "test"),
116+
Destinations: []Destination{d},
117+
ReminderInterval: time.Millisecond,
118+
Query: `
119+
SELECT 1 as col1, 'string' as col2`,
120+
}
79121
a.Check()
122+
d.AssertNumberOfCalls(t, "SendAlert", 1)
80123
a.Check()
81124
d.AssertNumberOfCalls(t, "SendAlert", 1)
125+
time.Sleep(time.Millisecond)
126+
a.Check()
127+
d.AssertNumberOfCalls(t, "SendAlert", 2)
82128
}
83129

84130
func assertTables(t *testing.T, actual string, expected string) {

app/config/config.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type tomlAlert struct {
3131
Interval tomlDuration `toml:"interval"`
3232
ReminderInterval tomlDuration `toml:"reminder_interval"`
3333
Message string `toml:"message"`
34+
AlertOnError *bool `toml:"alert_on_error"` // can hold 3 possible values: true/false/not-specified
3435
}
3536

3637
type tomlDuration struct {
@@ -62,6 +63,7 @@ type tomlConfig struct {
6263
PagerDuty map[string]tomlPagerDuty `toml:"pager_duties"`
6364
Message string `toml:"message"`
6465
ReminderInterval tomlDuration `toml:"reminder_interval"`
66+
AlertOnError *bool `toml:"alert_on_error"` // can hold 3 possible values: true/false/not-specified
6567
}
6668

6769
func loadTOMLConfig(path string) (tomlConfig, error) {
@@ -102,6 +104,16 @@ func prepareAlerts(config tomlConfig) ([]alert.Alert, error) {
102104
}
103105

104106
for alertName, alertConfig := range config.Alerts {
107+
var alertOnError bool
108+
if alertConfig.AlertOnError == nil {
109+
if config.AlertOnError == nil {
110+
alertOnError = true
111+
} else {
112+
alertOnError = *config.AlertOnError
113+
}
114+
} else {
115+
alertOnError = *alertConfig.AlertOnError
116+
}
105117
reminderInterval := alertConfig.ReminderInterval.Duration
106118
if reminderInterval == 0 {
107119
reminderInterval = config.ReminderInterval.Duration
@@ -130,6 +142,7 @@ func prepareAlerts(config tomlConfig) ([]alert.Alert, error) {
130142
Interval: alertConfig.Interval.Duration,
131143
ReminderInterval: reminderInterval,
132144
Destinations: alertDestinations,
145+
AlertOnError: alertOnError,
133146
})
134147
}
135148
}

app/config/config_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ func TestMain(m *testing.M) {
2222
func Test_loadTOMLConfig(t *testing.T) {
2323
config, err := loadTOMLConfig("example.toml")
2424
require.NoError(t, err)
25+
falseB := false
26+
trueB := true
2527
expected := tomlConfig{
28+
AlertOnError: &trueB,
2629
ReminderInterval: tomlDuration{Duration: 3 * time.Hour},
2730
Alerts: map[string]tomlAlert{
2831
"test-alert-1": {
@@ -40,6 +43,7 @@ func Test_loadTOMLConfig(t *testing.T) {
4043
Destinations: []string{"slacks.my-slack"},
4144
Interval: tomlDuration{Duration: time.Minute * 90},
4245
ReminderInterval: tomlDuration{Duration: 0},
46+
AlertOnError: &falseB,
4347
},
4448
},
4549
DB: map[string]tomlDB{

app/config/example.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
reminder_interval = "3h"
2+
alert_on_error = true
23

34
[alerts]
45
[alerts.test-alert-1]
@@ -19,6 +20,7 @@ reminder_interval = "3h"
1920
dbs = ["my-mysql-db"]
2021
destinations = ["slacks.my-slack"]
2122
interval = "1h30m"
23+
alert_on_error = false
2224

2325

2426
[dbs]

0 commit comments

Comments
 (0)