diff --git a/playbooks/openshift-apps/monitor_gating.yml b/playbooks/openshift-apps/monitor_gating.yml index 407d7619b9..a2534210f5 100644 --- a/playbooks/openshift-apps/monitor_gating.yml +++ b/playbooks/openshift-apps/monitor_gating.yml @@ -80,7 +80,37 @@ - role: openshift/start-build app: monitor-gating - buildname: monitor-gating-build + buildname: monitor-gating -build objectname: monitor-gating-build tags: - build + + - role: openshift/object + app: monitor-gating + template: dashboard_config.yml + objectname: dashboard_config.yml + + - role: openshift/object + app: monitor-gating + template: dashboard_configmap.yml + objectname: dashboard_configmap.yml + + - role: openshift/object + app: monitor-gating + template: dashboard_configmap.yml + objectname: dashboard_configmap.yml + + - role: openshift/object + app: monitor-gating + template: dashboard_route.yml + objectname: dashboard_route.yml + + - role: openshift/object + app: monitor-gating + template: dashboard_serviceaccount.yml + objectname: dashboard_serviceaccount.yml + + - role: openshift/object + app: monitor-gating + template: dashboard_deploymentconfig.yml + objectname: dashboard_deploymentconfig.yml diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_anitya.json.j2 b/roles/openshift-apps/monitor-gating/templates/dashboard_anitya.json.j2 new file mode 100644 index 0000000000..4580515884 --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_anitya.json.j2 @@ -0,0 +1,282 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 2, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Datanommer", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "format": "time_series", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT\n date(\"timestamp\") AS time,\n count(*)\nFROM\n messages\nWHERE\n topic like '%project.add%' AND\n $__timeFilter(\"timestamp\")\ngroup by\n time", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "New projects per day", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Datanommer", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "format": "time_series", + "group": [], + "metricColumn": "none", + "rawQuery": true, + "rawSql": "SELECT\n date(\"timestamp\") AS time,\n count(*)\nFROM\n messages\nWHERE\n topic like '%version.update%' AND\n $__timeFilter(\"timestamp\")\ngroup by\n time", + "refId": "A", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "column" + } + ] + ], + "timeColumn": "time", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Updates per day", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Anitya", + "uid": "8Zi9LU5Mz", + "version": 5 + } \ No newline at end of file diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_config.yml b/roles/openshift-apps/monitor-gating/templates/dashboard_config.yml new file mode 100644 index 0000000000..de19ab1f85 --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_config.yml @@ -0,0 +1,13 @@ +{%- macro load_file(filename) %}{% include filename %}{%- endmacro -%} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards + labels: + app: monitor-dashboard +data: + dashboard_anitya.json: |- + {{ load_file('dashboard_anitya.json.j2') | indent(6) }} + dashboard_fedora_coreos_updates.json: |- + {{ load_file('dashboard_fedora_coreos_updates.json.j2') | indent(6) }} diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_configmap.yml b/roles/openshift-apps/monitor-gating/templates/dashboard_configmap.yml new file mode 100644 index 0000000000..9db2b7f9e0 --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_configmap.yml @@ -0,0 +1,33 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources + labels: + app: monitor-dashboard +data: + datasource.yml: |- + apiVersion: 1 + datasources: + - name: Datanommer + type: postgres + url: db-datanommer01.iad2.fedoraproject.org:5432 + database: datanommer + user: datanommer_ro + secureJsonData: + password: {{ datanommer_ro_password }} + jsonData: + default: true + sslmode: "disable" # disable/require/verify-ca/verify-full + maxOpenConns: 0 # Grafana v5.4+ + maxIdleConns: 2 # Grafana v5.4+ + connMaxLifetime: 14400 # Grafana v5.4+ + postgresVersion: 1200 # 903=9.3, 904=9.4, 905=9.5, 906=9.6, 1000=10 + timescaledb: false + - name: Promscale + type: prometheus + url: https://promscale.apps.ocp.ci.centos.org/ + access: proxy + basicAuth: false + withCredentials: false + isDefault: false diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_deploymentconfig.yml b/roles/openshift-apps/monitor-gating/templates/dashboard_deploymentconfig.yml new file mode 100644 index 0000000000..cd22d2f1ef --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_deploymentconfig.yml @@ -0,0 +1,85 @@ +apiVersion: apps.openshift.io/v1 +kind: DeploymentConfig +metadata: + labels: + app: monitor-dashboard + name: monitor-dashboard +spec: + replicas: 1 + selector: + matchLabels: + app: monitor-dashboard + deploymentconfig: monitor-dashboard + template: + metadata: + labels: + app: monitor-dashboard + deploymentconfig: monitor-dashboard + spec: + containers: + - args: + - "-provider=openshift" + - "-openshift-service-account=monitor-dashboard" + - "-upstream=http://localhost:3000" + - "-client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token" + - "-cookie-secret=9876543210" + - "-tls-cert=/etc/tls/private/tls.crt" + - "-tls-key=/etc/tls/private/tls.key" + - "-pass-basic-auth=false" + - "-skip-auth-regex=(public|api/snapshots/.+|dashboard/snapshot/.+)" + image: registry.hub.docker.com/openshift/oauth-proxy:latest + name: oauth-proxy + ports: + - containerPort: 8443 + volumeMounts: + - mountPath: /etc/tls/private + name: grafana-tls + - env: + - name: GF_INSTALL_PLUGINS + value: "grafana-clock-panel,simpod-json-datasource,grafana-googlesheets-datasource" + - name: GF_AUTH_BASIC_ENABLED + value: 'true' + - name: GF_AUTH_PROXY_ENABLED + value: 'true' + - name: GF_AUTH_PROXY_HEADER_NAME + value: X-Forwarded-User + - name: GF_AUTH_PROXY_HEADER_PROPERTY + value: username + - name: GF_AUTH_PROXY_AUTO_SIGN_UP + value: 'true' + - name: GF_AUTH_DISABLE_LOGIN_FORM + value: 'false' + - name: GF_SECURITY_ADMIN_PASSWORD + value: 'nbusr123' + - name: GF_SECURITY_ADMIN_USER + value: 'admin' + - name: GF_USERS_ALLOW_SIGN_UP + value: 'false' + - name: 'GF_USERS_AUTO_ASSIGN_ORG_ROLE' + value: 'Admin' + - name: 'V' + value: '13' + image: registry.hub.docker.com/grafana/grafana:latest + name: dashboard + ports: + - containerPort: 3000 + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources +# - mountPath: /etc/grafana/provisioning/dashboards +# name: grafana-dashboards-provision + - mountPath: /var/lib/grafana/dashboards + name: grafana-dashboards + serviceAccountName: monitor-dashboard + volumes: + - name: grafana-tls + secret: + secretName: grafana-tls + - configMap: + name: grafana-datasources + name: grafana-datasources +# - configMap: +# name: grafana-dashboards-provision +# name: grafana-dashboards-provision + - configMap: + name: grafana-dashboards + name: grafana-dashboards \ No newline at end of file diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_fedora_coreos_updates.json.j2 b/roles/openshift-apps/monitor-gating/templates/dashboard_fedora_coreos_updates.json.j2 new file mode 100644 index 0000000000..a354ccb4b3 --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_fedora_coreos_updates.json.j2 @@ -0,0 +1,497 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(os_version) (zincati_identity_os_info)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat":{{ '"{{os_version}}"' }}, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "OS versions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "zincati_identity_rollout_wariness != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": {{ '"{{instance}}"' }}, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Static rollout wariness", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Agent identity", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, (time() - zincati_update_agent_last_refresh_timestamp)[15m:])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": {{ '"{{instance}}"' }}, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Agent refresh period (p99)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (kind) (rate(zincati_cincinnati_update_checks_errors_total[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": {{'"kind: {{kind}}"'}}, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Cincinnati client error-rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (os_version) ((zincati_cincinnati_booted_release_is_deadend) + on (instance) group_left(os_version) (0*zincati_identity_os_info))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": {{ '"{{os_version}}"' }}, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Deadends detected", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Agent details", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Fedora CoreOS updates (Zincati)", + "version": 0 + } \ No newline at end of file diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_route.yml b/roles/openshift-apps/monitor-gating/templates/dashboard_route.yml new file mode 100644 index 0000000000..c141041e9b --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_route.yml @@ -0,0 +1,16 @@ +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: monitor-dashboard + labels: + app: monitor-dashboard +spec: + #host: waiverdb.stg.fedoraproject.org + port: + targetPort: web + to: + kind: Service + name: monitor-dashboard + tls: + termination: reencrypt + insecureEdgeTerminationPolicy: Redirect diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_service.yml b/roles/openshift-apps/monitor-gating/templates/dashboard_service.yml new file mode 100644 index 0000000000..45033e9e55 --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_service.yml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: monitor-dashboard + labels: + app: monitor-dashboard + annotations: + service.alpha.openshift.io/serving-cert-secret-name: grafana-tls +spec: + selector: + app: monitor-dashboard + ports: + - name: web + port: 8443 + targetPort: 8443 \ No newline at end of file diff --git a/roles/openshift-apps/monitor-gating/templates/dashboard_serviceaccount.yml b/roles/openshift-apps/monitor-gating/templates/dashboard_serviceaccount.yml new file mode 100644 index 0000000000..d7b0d83396 --- /dev/null +++ b/roles/openshift-apps/monitor-gating/templates/dashboard_serviceaccount.yml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: monitor-dashboard + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"monitor-dashboard"}}'