Add a FAQ page for the monitoring search

Signed-off-by: Pierre-Yves Chibon <pingou@pingoured.fr>
2021-03-25 10:41:06 +01:00 · 2021-03-25 10:41:06 +01:00 · 2361325be6
commit 2361325be6
parent 79a92bb2f3
4 changed files with 102 additions and 6 deletions
--- a/docs/monitoring_metrics/faq.rst
+++ b/docs/monitoring_metrics/faq.rst
@ -0,0 +1,67 @@
+Frequently Asked Questions
+==========================
+
+Here are a list of questions and answers that should help you get start with
+monitoring with zabbix and prometheus.
+
+How do I access zabbix?
+-----------------------
+
+How do I access zabbix when I'm a community member?
+---------------------------------------------------
+
+How do I access Prometheus?
+---------------------------
+
+How do I access Prometheus when I'm a community member?
+-------------------------------------------------------
+
+Do you have a 5 minutes guide on how to use prometheus?
+-------------------------------------------------------
+
+In other words, do you have some how-tos/links I should read to understand/get
+started with prometheus?
+
+How do I get basic HW (disk, cpu, memory, network...) monitoring for a host?
+----------------------------------------------------------------------------
+
+How do I monitor a list of services?
+------------------------------------
+  - pagure.io and src.fp.o have two different list of services to monitor
+    they partly overlap but aren't exactly the same, how can I monitor them?
+
+
+How do I get alerted for a service not running?
+-----------------------------------------------
+
+How can I tune the alerts?
+--------------------------
+
+As in, who gets alerted? When? How?
+
+How do I ask for the service to be restarted <X> times before being alerted?
+----------------------------------------------------------------------------
+
+
+How do I monitor rabbitmq queues?
+---------------------------------
+
+How do we alert about checks not passing to people outside of our teams?
+------------------------------------------------------------------------
+  -> the OSCI team is interesting in having notifications/monitoring for the CI
+     queues in rabbitmq
+
+How can we chain a prometheus instance to ours? 
+-----------------------------------------------
+This allows to consolidate in a single instance monitoring coming from different
+instances
+
+Can we monitor rabbitmq queues in prometheus?
+---------------------------------------------
+
+How can I monitor the performances of my application?
+-----------------------------------------------------
+
+Number of requests served? Number of 500 errors? Number of DB connections?
+
+
--- a/docs/monitoring_metrics/index.rst
+++ b/docs/monitoring_metrics/index.rst
@ -27,3 +27,10 @@ In process we want to be able to answer the questions posed in the latest mailin
 -  Can we get zabbix to pull from prometheus?
 -  Can zabbix handle our number of machines?
 -  How flexible is the alerting?
+
+
+.. toctree::
+    :maxdepth: 1
+
+    faq
+
--- a/scripts/migration.sql
+++ b/scripts/migration.sql
@ -112,3 +112,27 @@ ALTER TABLE user_messages ADD CO

 ALTER TABLE package_messages DROP CONSTRAINT package_messages_msg_fkey;
 ALTER TABLE package_messages ADD CONSTRAINT package_messages_msg_fkey FOREIGN KEY (msg) REFERENCES messages2(id);
+
+--------------------------------------------------------------------------------
+
+DROP TABLE messages_010g , messages_020g, messages_030g, messages_040g, messages_050g,
+        messages_060g, messages_070g, messages_080g, messages_090g, messages_100g,
+        messages_110g, messages_120g, messages_130g, messages_140g, messages_150g,
+        messages_160g, messages_170g, messages_180g, messages_190g, messages_200g,
+        messages_210g, messages_220g
+
+
+-- 1. Dropping the original primary key
+ALTER TABLE messages3 DROP CONSTRAINT messages3_pkey;
+
+-- 2. Renaming existing index for another_id (optional)
+ALTER INDEX uniq_1483a5e93414710b RENAME TO users_pkey
+
+-- 3. Creating new primary key using existing index for another_id
+ALTER TABLE messages3 ADD PRIMARY KEY USING INDEX id
+
+-- 4. Creating index for old id column (optional)
+CREATE UNIQUE INDEX messages3_idx_id ON messages3 (id)
+
+-- 5. You can drop the original sequence generator if you won't need it
+DROP SEQUENCE users_id_seq
--- a/scripts/test_datagrepper_perfs.py
+++ b/scripts/test_datagrepper_perfs.py
@ -222,9 +222,7 @@ class TestAPI:

            # time per requests mean (avg)
            if self.pass_requests != 0 or self.fail_requests != 0:
-                divided_by = self.pass_requests
-                if self.pass_requests == 0:
-                    divided_by = self.fail_requests
+                divided_by = self.pass_requests or self.fail_requests
                self.tpr_mean = self.sum_response_time / divided_by
            # requests per second
            if self.start_time == 0:
@ -341,10 +339,10 @@ def main():
    print("Tests started at %s." % time.asctime())

    for env_name, base_url in [
-        ("datagrepper-timescalebd/aws", "http://datagrepper-timescale.arc.fedorainfracloud.org/datagrepper"),
-        ("datagrepper-test/aws", "http://datagrepper-test.arc.fedorainfracloud.org/datagrepper"),
+        # ("datagrepper-timescalebd/aws", "http://datagrepper-timescale.arc.fedorainfracloud.org/datagrepper"),
+        # ("datagrepper-test/aws", "http://datagrepper-test.arc.fedorainfracloud.org/datagrepper"),
        ("datagrepper-prod/aws", "http://datagrepper-adam.arc.fedorainfracloud.org/datagrepper"),
-        ("datagrepper-prod/openshift", "https://datagrepper-monitor-dashboard.app.os.fedoraproject.org"),
+        # ("datagrepper-prod/openshift", "https://datagrepper-monitor-dashboard.app.os.fedoraproject.org"),
    ]:
        for name in [
            "test_filter_by_topic",