diff --git a/library/nagios b/library/nagios new file mode 100644 index 0000000000..b5afbcf546 --- /dev/null +++ b/library/nagios @@ -0,0 +1,782 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# This file is largely copied from the Nagios module included in the +# Func project. Original copyright follows: +# +# func-nagios - Schedule downtime and enables/disable notifications +# Copyright 2011, Red Hat, Inc. +# Tim Bielawa +# +# This software may be freely redistributed under the terms of the GNU +# general public license version 2. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +DOCUMENTATION = ''' +--- +module: nagios +short_description: Perform common tasks in Nagios related to downtime and notifications. +description: + - "The M(nagios) module has two basic functions: scheduling downtime and toggling alerts for services or hosts." + - All actions require the I(host) parameter to be given explicitly. In playbooks you can use the C($inventory_hostname) variable to refer to the host the playbook is currently running on. + - You can specify multiple services at once by separating them with commas, .e.g., C(services=httpd,nfs,puppet). + - When specifying what service to handle there is a special service value, I(host), which will handle alerts/downtime for the I(host itself), e.g., C(service=host). This keyword may not be given with other services at the same time. I(Setting alerts/downtime for a host does not affect alerts/downtime for any of the services running on it.) To schedule downtime for all services on particular host use keyword "all", e.g., C(service=all). + - When using the M(nagios) module you will need to specify your Nagios server using the C(delegate_to) parameter. +version_added: 0.7 +options: + action: + description: + - Action to take. + required: true + default: null + choices: [ "downtime", "enable_alerts", "disable_alerts", "silence", "unsilence" ] + host: + description: + - Host to operate on in Nagios. + required: true + default: null + cmdfile: + description: + - Path to the nagios I(command file) (FIFO pipe). + - Only required if auto-detection fails. + required: false + default: auto-detected + author: + description: + - Author to leave downtime comments as. + - Only usable with the C(downtime) action. + required: false + default: Ansible + minutes: + description: + - Minutes to schedule downtime for. + - Only usable with the C(downtime) action. + required: false + default: 30 + services: + description: + - What to manage downtime/alerts for. Separate multiple services with commas. + - C(service) is an alias for C(services). + - B(Required) option when using the C(downtime), C(enable_alerts), and C(disable_alerts) actions. + aliases: [ "service" ] + required: true + default: null +author: Tim Bielawa +requirements: [ "Nagios" ] +examples: + - description: set 30 minutes of apache downtime + code: "nagios: action=downtime minutes=30 service=httpd host=$inventory_hostname" + - description: schedule an hour of HOST downtime + code: "nagios: action=downtime minutes=60 service=host host=$inventory_hostname" + - description: schedule downtime for ALL services on HOST + code: "nagios: action=downtime minutes=45 service=all host=$inventory_hostname" + - description: schedule downtime for a few services + code: "nagios: action=downtime services=frob,foobar,qeuz host=$inventory_hostname" + - description: enable SMART disk alerts + code: "nagios: action=enable_alerts service=smart host=$inventory_hostname" + - description: "two services at once: disable httpd and nfs alerts" + code: "nagios: action=disable_alerts service=httpd,nfs host=$inventory_hostname" + - description: disable HOST alerts + code: "nagios: action=disable_alerts service=host host=$inventory_hostname" + - description: silence ALL alerts + code: "nagios: action=silence host=$inventory_hostname" + - description: unsilence all alerts + code: "nagios: action=unsilence host=$inventory_hostname" +''' + +import ConfigParser +import types +import time +import os.path + +###################################################################### + + +def which_cmdfile(): + locations = [ + # rhel + '/etc/nagios/nagios.cfg', + # debian + '/etc/nagios3/nagios.cfg', + # older debian + '/etc/nagios2/nagios.cfg', + # bsd, solaris + '/usr/local/etc/nagios/nagios.cfg', + # groundwork it monitoring + '/usr/local/groundwork/nagios/etc/nagios.cfg', + # open monitoring distribution + '/omd/sites/oppy/tmp/nagios/nagios.cfg', + # ??? + '/usr/local/nagios/etc/nagios.cfg', + '/usr/local/nagios/nagios.cfg', + '/opt/nagios/etc/nagios.cfg', + '/opt/nagios/nagios.cfg' + ] + + for path in locations: + if os.path.exists(path): + for line in open(path): + if line.startswith('command_file'): + return line.split('=')[1].strip() + + return None + +###################################################################### + + +def main(): + ACTION_CHOICES = [ + 'downtime', + 'silence', + 'unsilence', + 'enable_alerts', + 'disable_alerts' + ] + + module = AnsibleModule( + argument_spec=dict( + action=dict(required=True, default=None, choices=ACTION_CHOICES), + author=dict(default='Ansible'), + host=dict(required=True, default=None), + minutes=dict(default=30), + cmdfile=dict(default=which_cmdfile()), + services=dict(default=None, aliases=['service']), + ) + ) + + action = module.params['action'] + minutes = module.params['minutes'] + services = module.params['services'] + cmdfile = module.params['cmdfile'] + + ################################################################## + # Required args per action: + # downtime = (minutes, service, host) + # (un)silence = (host) + # (enable/disable)_alerts = (service, host) + # + # AnsibleModule will verify most stuff, we need to verify + # 'minutes' and 'service' manually. + + ################################################################## + if action == 'downtime': + # Make sure there's an actual service selected + if not services: + module.fail_json(msg='no service selected to set downtime for') + # Make sure minutes is a number + try: + m = int(minutes) + if not isinstance(m, types.IntType): + module.fail_json(msg='minutes must be a number') + except: + module.fail_json(msg='invalid entry for minutes') + + ################################################################## + if action in ['enable_alerts', 'disable_alerts']: + if not services: + module.fail_json(msg='a service is required when setting alerts') + + ################################################################## + if not cmdfile: + module.fail_json('unable to locate nagios.cfg') + + ################################################################## + ansible_nagios = Nagios(module, **module.params) + ansible_nagios.act() + ################################################################## + + +###################################################################### +class Nagios(object): + """ + Perform common tasks in Nagios related to downtime and + notifications. + + The complete set of external commands Nagios handles is documented + on their website: + + http://old.nagios.org/developerinfo/externalcommands/commandlist.php + + Note that in the case of `schedule_svc_downtime`, + `enable_svc_notifications`, and `disable_svc_notifications`, the + service argument should be passed as a list. + """ + + def __init__(self, module, **kwargs): + self.module = module + self.action = kwargs['action'] + self.author = kwargs['author'] + self.host = kwargs['host'] + self.minutes = int(kwargs['minutes']) + self.cmdfile = kwargs['cmdfile'] + + if (kwargs['services'] is None) or (kwargs['services'] == 'host') or (kwargs['services'] == 'all'): + self.services = kwargs['services'] + else: + self.services = kwargs['services'].split(',') + + self.command_results = [] + + def _now(self): + """ + The time in seconds since 12:00:00AM Jan 1, 1970 + """ + + return int(time.time()) + + def _write_command(self, cmd): + """ + Write the given command to the Nagios command file + """ + + try: + fp = open(self.cmdfile, 'w') + fp.write(cmd) + fp.flush() + fp.close() + self.command_results.append(cmd.strip()) + except IOError: + self.module.fail_json(msg='unable to write to nagios command file', + cmdfile=self.cmdfile) + + def _fmt_dt_str(self, cmd, host, duration, author=None, + comment="Scheduling downtime", start=None, + svc=None, fixed=1, trigger=0): + """ + Format an external-command downtime string. + + cmd - Nagios command ID + host - Host schedule downtime on + duration - Minutes to schedule downtime for + author - Name to file the downtime as + comment - Reason for running this command (upgrade, reboot, etc) + start - Start of downtime in seconds since 12:00AM Jan 1 1970 + Default is to use the entry time (now) + svc - Service to schedule downtime for, omit when for host downtime + fixed - Start now if 1, start when a problem is detected if 0 + trigger - Optional ID of event to start downtime from. Leave as 0 for + fixed downtime. + + Syntax: [submitted] COMMAND;;[] + ;;;;;; + + """ + + entry_time = self._now() + if start is None: + start = entry_time + + hdr = "[%s] %s;%s;" % (entry_time, cmd, host) + duration_s = (duration * 60) + end = start + duration_s + + if not author: + author = self.author + + if svc is not None: + dt_args = [svc, str(start), str(end), str(fixed), str(trigger), + str(duration_s), author, comment] + else: + # Downtime for a host if no svc specified + dt_args = [str(start), str(end), str(fixed), str(trigger), + str(duration_s), author, comment] + + dt_arg_str = ";".join(dt_args) + dt_str = hdr + dt_arg_str + "\n" + + return dt_str + + def _fmt_notif_str(self, cmd, host, svc=None): + """ + Format an external-command notification string. + + cmd - Nagios command ID. + host - Host to en/disable notifications on.. + svc - Service to schedule downtime for. A value is not required + for host downtime. + + Syntax: [submitted] COMMAND;[;] + """ + + entry_time = self._now() + if svc is not None: + notif_str = "[%s] %s;%s;%s\n" % (entry_time, cmd, host, svc) + else: + # Downtime for a host if no svc specified + notif_str = "[%s] %s;%s\n" % (entry_time, cmd, host) + + return notif_str + + def schedule_svc_downtime(self, host, services=[], minutes=30): + """ + This command is used to schedule downtime for a particular + service. + + During the specified downtime, Nagios will not send + notifications out about the service. + + Syntax: SCHEDULE_SVC_DOWNTIME;; + ;;;;;; + + """ + + cmd = "SCHEDULE_SVC_DOWNTIME" + for service in services: + dt_cmd_str = self._fmt_dt_str(cmd, host, minutes, svc=service) + self._write_command(dt_cmd_str) + + def schedule_host_downtime(self, host, minutes=30): + """ + This command is used to schedule downtime for a particular + host. + + During the specified downtime, Nagios will not send + notifications out about the host. + + Syntax: SCHEDULE_HOST_DOWNTIME;;;; + ;;;; + """ + + cmd = "SCHEDULE_HOST_DOWNTIME" + dt_cmd_str = self._fmt_dt_str(cmd, host, minutes) + self._write_command(dt_cmd_str) + + def schedule_host_svc_downtime(self, host, minutes=30): + """ + This command is used to schedule downtime for + all services associated with a particular host. + + During the specified downtime, Nagios will not send + notifications out about the host. + + SCHEDULE_HOST_SVC_DOWNTIME;;;; + ;;;; + """ + + cmd = "SCHEDULE_HOST_SVC_DOWNTIME" + dt_cmd_str = self._fmt_dt_str(cmd, host, minutes) + self._write_command(dt_cmd_str) + + def schedule_hostgroup_host_downtime(self, hostgroup, minutes=30): + """ + This command is used to schedule downtime for all hosts in a + particular hostgroup. + + During the specified downtime, Nagios will not send + notifications out about the hosts. + + Syntax: SCHEDULE_HOSTGROUP_HOST_DOWNTIME;;; + ;;;;; + """ + + cmd = "SCHEDULE_HOSTGROUP_HOST_DOWNTIME" + dt_cmd_str = self._fmt_dt_str(cmd, hostgroup, minutes) + self._write_command(dt_cmd_str) + + def schedule_hostgroup_svc_downtime(self, hostgroup, minutes=30): + """ + This command is used to schedule downtime for all services in + a particular hostgroup. + + During the specified downtime, Nagios will not send + notifications out about the services. + + Note that scheduling downtime for services does not + automatically schedule downtime for the hosts those services + are associated with. + + Syntax: SCHEDULE_HOSTGROUP_SVC_DOWNTIME;;; + ;;;;; + """ + + cmd = "SCHEDULE_HOSTGROUP_SVC_DOWNTIME" + dt_cmd_str = self._fmt_dt_str(cmd, hostgroup, minutes) + self._write_command(dt_cmd_str) + + def schedule_servicegroup_host_downtime(self, servicegroup, minutes=30): + """ + This command is used to schedule downtime for all hosts in a + particular servicegroup. + + During the specified downtime, Nagios will not send + notifications out about the hosts. + + Syntax: SCHEDULE_SERVICEGROUP_HOST_DOWNTIME;; + ;;;;;; + + """ + + cmd = "SCHEDULE_SERVICEGROUP_HOST_DOWNTIME" + dt_cmd_str = self._fmt_dt_str(cmd, servicegroup, minutes) + self._write_command(dt_cmd_str) + + def schedule_servicegroup_svc_downtime(self, servicegroup, minutes=30): + """ + This command is used to schedule downtime for all services in + a particular servicegroup. + + During the specified downtime, Nagios will not send + notifications out about the services. + + Note that scheduling downtime for services does not + automatically schedule downtime for the hosts those services + are associated with. + + Syntax: SCHEDULE_SERVICEGROUP_SVC_DOWNTIME;; + ;;;;;; + + """ + + cmd = "SCHEDULE_SERVICEGROUP_SVC_DOWNTIME" + dt_cmd_str = self._fmt_dt_str(cmd, servicegroup, minutes) + self._write_command(dt_cmd_str) + + def disable_host_svc_notifications(self, host): + """ + This command is used to prevent notifications from being sent + out for all services on the specified host. + + Note that this command does not disable notifications from + being sent out about the host. + + Syntax: DISABLE_HOST_SVC_NOTIFICATIONS; + """ + + cmd = "DISABLE_HOST_SVC_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, host) + self._write_command(notif_str) + + def disable_host_notifications(self, host): + """ + This command is used to prevent notifications from being sent + out for the specified host. + + Note that this command does not disable notifications for + services associated with this host. + + Syntax: DISABLE_HOST_NOTIFICATIONS; + """ + + cmd = "DISABLE_HOST_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, host) + self._write_command(notif_str) + + def disable_svc_notifications(self, host, services=[]): + """ + This command is used to prevent notifications from being sent + out for the specified service. + + Note that this command does not disable notifications from + being sent out about the host. + + Syntax: DISABLE_SVC_NOTIFICATIONS;; + """ + + cmd = "DISABLE_SVC_NOTIFICATIONS" + for service in services: + notif_str = self._fmt_notif_str(cmd, host, svc=service) + self._write_command(notif_str) + + def disable_servicegroup_host_notifications(self, servicegroup): + """ + This command is used to prevent notifications from being sent + out for all hosts in the specified servicegroup. + + Note that this command does not disable notifications for + services associated with hosts in this service group. + + Syntax: DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS; + """ + + cmd = "DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, servicegroup) + self._write_command(notif_str) + + def disable_servicegroup_svc_notifications(self, servicegroup): + """ + This command is used to prevent notifications from being sent + out for all services in the specified servicegroup. + + Note that this does not prevent notifications from being sent + out about the hosts in this servicegroup. + + Syntax: DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS; + """ + + cmd = "DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, servicegroup) + self._write_command(notif_str) + + def disable_hostgroup_host_notifications(self, hostgroup): + """ + Disables notifications for all hosts in a particular + hostgroup. + + Note that this does not disable notifications for the services + associated with the hosts in the hostgroup - see the + DISABLE_HOSTGROUP_SVC_NOTIFICATIONS command for that. + + Syntax: DISABLE_HOSTGROUP_HOST_NOTIFICATIONS; + """ + + cmd = "DISABLE_HOSTGROUP_HOST_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, hostgroup) + self._write_command(notif_str) + + def disable_hostgroup_svc_notifications(self, hostgroup): + """ + Disables notifications for all services associated with hosts + in a particular hostgroup. + + Note that this does not disable notifications for the hosts in + the hostgroup - see the DISABLE_HOSTGROUP_HOST_NOTIFICATIONS + command for that. + + Syntax: DISABLE_HOSTGROUP_SVC_NOTIFICATIONS; + """ + + cmd = "DISABLE_HOSTGROUP_SVC_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, hostgroup) + self._write_command(notif_str) + + def enable_host_notifications(self, host): + """ + Enables notifications for a particular host. + + Note that this command does not enable notifications for + services associated with this host. + + Syntax: ENABLE_HOST_NOTIFICATIONS; + """ + + cmd = "ENABLE_HOST_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, host) + self._write_command(notif_str) + + def enable_host_svc_notifications(self, host): + """ + Enables notifications for all services on the specified host. + + Note that this does not enable notifications for the host. + + Syntax: ENABLE_HOST_SVC_NOTIFICATIONS; + """ + + cmd = "ENABLE_HOST_SVC_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, host) + nagios_return = self._write_command(notif_str) + + if nagios_return: + return notif_str + else: + return "Fail: could not write to the command file" + + def enable_svc_notifications(self, host, services=[]): + """ + Enables notifications for a particular service. + + Note that this does not enable notifications for the host. + + Syntax: ENABLE_SVC_NOTIFICATIONS;; + """ + + cmd = "ENABLE_SVC_NOTIFICATIONS" + nagios_return = True + return_str_list = [] + for service in services: + notif_str = self._fmt_notif_str(cmd, host, svc=service) + nagios_return = self._write_command(notif_str) and nagios_return + return_str_list.append(notif_str) + + if nagios_return: + return return_str_list + else: + return "Fail: could not write to the command file" + + def enable_hostgroup_host_notifications(self, hostgroup): + """ + Enables notifications for all hosts in a particular hostgroup. + + Note that this command does not enable notifications for + services associated with the hosts in this hostgroup. + + Syntax: ENABLE_HOSTGROUP_HOST_NOTIFICATIONS; + """ + + cmd = "ENABLE_HOSTGROUP_HOST_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, hostgroup) + nagios_return = self._write_command(notif_str) + + if nagios_return: + return notif_str + else: + return "Fail: could not write to the command file" + + def enable_hostgroup_svc_notifications(self, hostgroup): + """ + Enables notifications for all services that are associated + with hosts in a particular hostgroup. + + Note that this does not enable notifications for the hosts in + this hostgroup. + + Syntax: ENABLE_HOSTGROUP_SVC_NOTIFICATIONS; + """ + + cmd = "ENABLE_HOSTGROUP_SVC_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, hostgroup) + nagios_return = self._write_command(notif_str) + + if nagios_return: + return notif_str + else: + return "Fail: could not write to the command file" + + def enable_servicegroup_host_notifications(self, servicegroup): + """ + Enables notifications for all hosts that have services that + are members of a particular servicegroup. + + Note that this command does not enable notifications for + services associated with the hosts in this servicegroup. + + Syntax: ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS; + """ + + cmd = "ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, servicegroup) + nagios_return = self._write_command(notif_str) + + if nagios_return: + return notif_str + else: + return "Fail: could not write to the command file" + + def enable_servicegroup_svc_notifications(self, servicegroup): + """ + Enables notifications for all services that are members of a + particular servicegroup. + + Note that this does not enable notifications for the hosts in + this servicegroup. + + Syntax: ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS; + """ + + cmd = "ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS" + notif_str = self._fmt_notif_str(cmd, servicegroup) + nagios_return = self._write_command(notif_str) + + if nagios_return: + return notif_str + else: + return "Fail: could not write to the command file" + + def silence_host(self, host): + """ + This command is used to prevent notifications from being sent + out for the host and all services on the specified host. + + This is equivalent to calling disable_host_svc_notifications + and disable_host_notifications. + + Syntax: DISABLE_HOST_SVC_NOTIFICATIONS; + Syntax: DISABLE_HOST_NOTIFICATIONS; + """ + + cmd = [ + "DISABLE_HOST_SVC_NOTIFICATIONS", + "DISABLE_HOST_NOTIFICATIONS" + ] + nagios_return = True + return_str_list = [] + for c in cmd: + notif_str = self._fmt_notif_str(c, host) + nagios_return = self._write_command(notif_str) and nagios_return + return_str_list.append(notif_str) + + if nagios_return: + return return_str_list + else: + return "Fail: could not write to the command file" + + def unsilence_host(self, host): + """ + This command is used to enable notifications for the host and + all services on the specified host. + + This is equivalent to calling enable_host_svc_notifications + and enable_host_notifications. + + Syntax: ENABLE_HOST_SVC_NOTIFICATIONS; + Syntax: ENABLE_HOST_NOTIFICATIONS; + """ + + cmd = [ + "ENABLE_HOST_SVC_NOTIFICATIONS", + "ENABLE_HOST_NOTIFICATIONS" + ] + nagios_return = True + return_str_list = [] + for c in cmd: + notif_str = self._fmt_notif_str(c, host) + nagios_return = self._write_command(notif_str) and nagios_return + return_str_list.append(notif_str) + + if nagios_return: + return return_str_list + else: + return "Fail: could not write to the command file" + + def act(self): + """ + Figure out what you want to do from ansible, and then do the + needful (at the earliest). + """ + # host or service downtime? + if self.action == 'downtime': + if self.services == 'host': + self.schedule_host_downtime(self.host, self.minutes) + elif self.services == 'all': + self.schedule_host_svc_downtime(self.host, self.minutes) + else: + self.schedule_svc_downtime(self.host, + services=self.services, + minutes=self.minutes) + + # toggle the host AND service alerts + elif self.action == 'silence': + self.silence_host(self.host) + + elif self.action == 'unsilence': + self.unsilence_host(self.host) + + # toggle host/svc alerts + elif self.action == 'enable_alerts': + if self.services == 'host': + self.enable_host_notifications(self.host) + else: + self.enable_svc_notifications(self.host, + services=self.services) + + elif self.action == 'disable_alerts': + if self.services == 'host': + self.disable_host_notifications(self.host) + else: + self.disable_svc_notifications(self.host, + services=self.services) + # wtf? + else: + self.module.fail_json(msg="unknown action specified: '%s'" % \ + self.action) + + self.module.exit_json(nagios_commands=self.command_results, + changed=True) + +###################################################################### +# this is magic, see lib/ansible/module_common.py +#<> +main()