summaryrefslogtreecommitdiffstats
path: root/lass/2configs/monitoring
diff options
context:
space:
mode:
Diffstat (limited to 'lass/2configs/monitoring')
-rw-r--r--lass/2configs/monitoring/client.nix94
-rw-r--r--lass/2configs/monitoring/server.nix66
2 files changed, 160 insertions, 0 deletions
diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix
new file mode 100644
index 000000000..e879d6960
--- /dev/null
+++ b/lass/2configs/monitoring/client.nix
@@ -0,0 +1,94 @@
+{pkgs, config, ...}:
+with import <stockholm/lib>;
+{
+ lass.telegraf = {
+ enable = true;
+ interval = "1s";
+
+
+ outputs = ''
+ [outputs.influxdb]
+ urls = ["http://prism:8086"]
+ database = "telegraf_db"
+ user_agent = "telegraf"
+ '';
+ inputs = [
+ ''
+ [cpu]
+ percpu = false
+ totalcpu = true
+ drop = ["cpu_time"]
+ ''
+ ''
+ [[inputs.mem]]
+ ''
+ ''
+ [[inputs.ping]]
+ urls = ["8.8.8.8"]
+ ''
+ ''
+ [[inputs.net]]
+ ''
+ ''
+ [[inputs.dns_query]]
+ servers = ["8.8.8.8"]
+ ''
+ ];
+ };
+ systemd.services.telegraf.path = with pkgs; [
+ iputils
+ lm_sensors
+ ];
+
+ services.collectd = {
+ enable = true;
+ autoLoadPlugin = true;
+ extraConfig = ''
+ Hostname ${config.krebs.build.host.name}
+ LoadPlugin load
+ LoadPlugin disk
+ LoadPlugin memory
+ Interval 30.0
+
+ LoadPlugin interface
+ <Plugin "interface">
+ Interface "*Link"
+ Interface "lo"
+ Interface "vboxnet*"
+ Interface "virbr*"
+ IgnoreSelected true
+ </Plugin>
+
+ LoadPlugin df
+ <Plugin "df">
+ MountPoint "/nix/store"
+ FSType "tmpfs"
+ FSType "binfmt_misc"
+ FSType "debugfs"
+ FSType "mqueue"
+ FSType "hugetlbfs"
+ FSType "systemd-1"
+ FSType "cgroup"
+ FSType "securityfs"
+ FSType "ramfs"
+ FSType "proc"
+ FSType "devpts"
+ FSType "devtmpfs"
+ MountPoint "/var/lib/docker/devicemapper"
+ IgnoreSelected true
+ </Plugin>
+
+ LoadPlugin cpu
+ <Plugin cpu>
+ ReportByCpu true
+ ReportByState true
+ ValuesPercentage true
+ </Plugin>
+
+ LoadPlugin network
+ <Plugin "network">
+ Server "prism" "25826"
+ </Plugin>
+ '';
+ };
+}
diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix
new file mode 100644
index 000000000..2e1c15ca1
--- /dev/null
+++ b/lass/2configs/monitoring/server.nix
@@ -0,0 +1,66 @@
+{pkgs, config, ...}:
+with import <stockholm/lib>;
+{
+ services.influxdb = {
+ enable = true;
+ };
+
+ services.influxdb.extraConfig = {
+ meta.hostname = config.krebs.build.host.name;
+ # meta.logging-enabled = true;
+ http.bind-address = ":8086";
+ admin.bind-address = ":8083";
+ monitoring = {
+ enabled = false;
+ # write-interval = "24h";
+ };
+ collectd = [{
+ enabled = true;
+ typesdb = "${pkgs.collectd}/share/collectd/types.db";
+ database = "collectd_db";
+ port = 25826;
+ }];
+ };
+
+ lass.kapacitor =
+ let
+ echoToIrc = pkgs.writeDash "echo_irc" ''
+ set -euf
+ data="$(${pkgs.jq}/bin/jq -r .message)"
+ export LOGNAME=prism-alarm
+ ${pkgs.irc-announce}/bin/irc-announce \
+ irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null
+ '';
+ in {
+ enable = true;
+ alarms = {
+ test2 = ''
+ batch
+ |query(${"'''"}
+ SELECT mean("usage_user") AS mean
+ FROM "${config.lass.kapacitor.check_db}"."default"."cpu"
+ ${"'''"})
+ .every(3m)
+ .period(1m)
+ .groupBy('host')
+ |alert()
+ .crit(lambda: "mean" > 90)
+ // Whenever we get an alert write it to a file.
+ .log('/tmp/alerts.log')
+ .exec('${echoToIrc}')
+ '';
+ };
+ };
+
+ krebs.iptables.tables.filter.INPUT.rules = [
+ { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; }
+ { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; }
+ { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; }
+ ];
+ services.grafana = {
+ enable = true;
+ addr = "0.0.0.0";
+ auth.anonymous.enable = true;
+ security = import <secrets/grafana_security.nix>; # { AdminUser = ""; adminPassword = ""}
+ };
+}