diff --git a/docs/monitoring_metrics/faq.rst b/docs/monitoring_metrics/faq.rst new file mode 100644 index 0000000..a5a68d2 --- /dev/null +++ b/docs/monitoring_metrics/faq.rst @@ -0,0 +1,67 @@ +Frequently Asked Questions +========================== + +Here are a list of questions and answers that should help you get start with +monitoring with zabbix and prometheus. + +How do I access zabbix? +----------------------- + +How do I access zabbix when I'm a community member? +--------------------------------------------------- + +How do I access Prometheus? +--------------------------- + +How do I access Prometheus when I'm a community member? +------------------------------------------------------- + +Do you have a 5 minutes guide on how to use prometheus? +------------------------------------------------------- + +In other words, do you have some how-tos/links I should read to understand/get +started with prometheus? + +How do I get basic HW (disk, cpu, memory, network...) monitoring for a host? +---------------------------------------------------------------------------- + +How do I monitor a list of services? +------------------------------------ + - pagure.io and src.fp.o have two different list of services to monitor + they partly overlap but aren't exactly the same, how can I monitor them? + + +How do I get alerted for a service not running? +----------------------------------------------- + +How can I tune the alerts? +-------------------------- + +As in, who gets alerted? When? How? + +How do I ask for the service to be restarted times before being alerted? +---------------------------------------------------------------------------- + + +How do I monitor rabbitmq queues? +--------------------------------- + +How do we alert about checks not passing to people outside of our teams? +------------------------------------------------------------------------ + -> the OSCI team is interesting in having notifications/monitoring for the CI + queues in rabbitmq + +How can we chain a prometheus instance to ours? +----------------------------------------------- +This allows to consolidate in a single instance monitoring coming from different +instances + +Can we monitor rabbitmq queues in prometheus? +--------------------------------------------- + +How can I monitor the performances of my application? +----------------------------------------------------- + +Number of requests served? Number of 500 errors? Number of DB connections? + + diff --git a/docs/monitoring_metrics/index.rst b/docs/monitoring_metrics/index.rst index c5cdeb2..d1ae92a 100644 --- a/docs/monitoring_metrics/index.rst +++ b/docs/monitoring_metrics/index.rst @@ -27,3 +27,10 @@ In process we want to be able to answer the questions posed in the latest mailin - Can we get zabbix to pull from prometheus? - Can zabbix handle our number of machines? - How flexible is the alerting? + + +.. toctree:: + :maxdepth: 1 + + faq + diff --git a/scripts/migration.sql b/scripts/migration.sql index 9daba79..d469a64 100644 --- a/scripts/migration.sql +++ b/scripts/migration.sql @@ -112,3 +112,27 @@ ALTER TABLE user_messages ADD CO ALTER TABLE package_messages DROP CONSTRAINT package_messages_msg_fkey; ALTER TABLE package_messages ADD CONSTRAINT package_messages_msg_fkey FOREIGN KEY (msg) REFERENCES messages2(id); + +-------------------------------------------------------------------------------- + +DROP TABLE messages_010g , messages_020g, messages_030g, messages_040g, messages_050g, + messages_060g, messages_070g, messages_080g, messages_090g, messages_100g, + messages_110g, messages_120g, messages_130g, messages_140g, messages_150g, + messages_160g, messages_170g, messages_180g, messages_190g, messages_200g, + messages_210g, messages_220g + + +-- 1. Dropping the original primary key +ALTER TABLE messages3 DROP CONSTRAINT messages3_pkey; + +-- 2. Renaming existing index for another_id (optional) +ALTER INDEX uniq_1483a5e93414710b RENAME TO users_pkey + +-- 3. Creating new primary key using existing index for another_id +ALTER TABLE messages3 ADD PRIMARY KEY USING INDEX id + +-- 4. Creating index for old id column (optional) +CREATE UNIQUE INDEX messages3_idx_id ON messages3 (id) + +-- 5. You can drop the original sequence generator if you won't need it +DROP SEQUENCE users_id_seq diff --git a/scripts/test_datagrepper_perfs.py b/scripts/test_datagrepper_perfs.py index 1645312..15bd92e 100644 --- a/scripts/test_datagrepper_perfs.py +++ b/scripts/test_datagrepper_perfs.py @@ -222,9 +222,7 @@ class TestAPI: # time per requests mean (avg) if self.pass_requests != 0 or self.fail_requests != 0: - divided_by = self.pass_requests - if self.pass_requests == 0: - divided_by = self.fail_requests + divided_by = self.pass_requests or self.fail_requests self.tpr_mean = self.sum_response_time / divided_by # requests per second if self.start_time == 0: @@ -341,10 +339,10 @@ def main(): print("Tests started at %s." % time.asctime()) for env_name, base_url in [ - ("datagrepper-timescalebd/aws", "http://datagrepper-timescale.arc.fedorainfracloud.org/datagrepper"), - ("datagrepper-test/aws", "http://datagrepper-test.arc.fedorainfracloud.org/datagrepper"), + # ("datagrepper-timescalebd/aws", "http://datagrepper-timescale.arc.fedorainfracloud.org/datagrepper"), + # ("datagrepper-test/aws", "http://datagrepper-test.arc.fedorainfracloud.org/datagrepper"), ("datagrepper-prod/aws", "http://datagrepper-adam.arc.fedorainfracloud.org/datagrepper"), - ("datagrepper-prod/openshift", "https://datagrepper-monitor-dashboard.app.os.fedoraproject.org"), + # ("datagrepper-prod/openshift", "https://datagrepper-monitor-dashboard.app.os.fedoraproject.org"), ]: for name in [ "test_filter_by_topic",