#!/bin/sh

# ########################################################################
# This program is part of Percona Monitoring Plugins
# License: GPL License (see COPYING)
# Authors:
#  Baron Schwartz
# ########################################################################

# ########################################################################
# Redirect STDERR to STDOUT; Nagios doesn't handle STDERR.
# ########################################################################
exec 2>&1

# ########################################################################
# Set up constants, etc.
# ########################################################################
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4

# ########################################################################
# Run the program.
# ########################################################################
main() {

   # Get options
   for o; do
      case "${o}" in
         -w)        shift; OPT_WARN="${1}"; shift; ;;
         -d)        shift; OPT_DISABLELP=1; ;;
         -c)        shift; OPT_CRIT="${1}"; shift; ;;
         --version) grep -A2 '^=head1 VERSION' "$0" | tail -n1; exit 0 ;;
         --help)    perl -00 -ne 'm/^  Usage:/ && print' "$0"; exit 0 ;;
         -*)        echo "Unknown option ${o}.  Try --help."; exit 1; ;;
      esac
   done
   OPT_WARN=${OPT_WARN:-90}
   OPT_CRIT=${OPT_CRIT:-95}
   if is_not_sourced; then
      if [ -n "$1" ]; then
         echo "WARN spurious command-line options: $@"
         exit 1
      fi
   fi

   NOTE="UNK cannot find memory statistics"
   TEMP=$(mktemp -t "${0##*/}.XXXXXX") || exit $?
   trap "rm -f '${TEMP}' >/dev/null 2>&1" EXIT
   IS_CRIT=""
   if free -m > "${TEMP}" ; then
      MEM_TOT=$(get_total_memory "${TEMP}")
      USD_PCT=$(get_used_memory_pct "${TEMP}")
      NOTE="Memory ${USD_PCT}% used"
      if [ "${USD_PCT:-0}" -ge "${OPT_CRIT}" ]; then
         NOTE="CRIT $NOTE"
         IS_CRIT=1
      elif [ "${USD_PCT:-0}" -ge "${OPT_WARN}" ]; then
         NOTE="WARN $NOTE"
      else
         NOTE="OK $NOTE"
      fi
      if [ -z "${OPT_DISABLELP}" ]; then
         if ps -eo pid,vsz,ucomm > "${TEMP}" ; then
            BIGGEST=$(get_largest_process "${TEMP}")
            BIG_PID=$(echo $BIGGEST | cut -d' ' -f1)
            BIG_VSZ=$(echo $BIGGEST | cut -d' ' -f2)
            BIG_COMMAND=$(echo $BIGGEST | cut -d' ' -f3)
            PID_PCT=$(pct_of ${BIG_VSZ:-0} ${MEM_TOT:-0});
            NOTE="$NOTE. Largest process: ${BIG_COMMAND} (${BIG_PID}) = ${BIG_VSZ}MB (${PID_PCT}%)"
            if [ "${PID_PCT:-0}" -ge "${OPT_CRIT}" ]; then
               NOTE="CRIT $NOTE"
            elif [ -z "$IS_CRIT" -a "${PID_PCT:-0}" -ge "${OPT_WARN}" ]; then
               # The IF condition just above is to prevent a mere warning from
               # downgrading a critical that was triggered by overall memory used.
               NOTE="WARN $NOTE"
            fi
         fi
      fi

      # Build the common perf data output for graph trending
      PERFDATA="memory_used=${USD_PCT:-0};${OPT_WARN};${OPT_CRIT};0;100"
      NOTE="$NOTE | $PERFDATA"
   fi

   echo $NOTE
}

# ########################################################################
# Get the total memory from the output of 'free -m'.  In the following,
#              total       used       free     shared    buffers     cached
# Mem:         16046      15212        834          0       4020       7402
# -/+ buffers/cache:       3788      12258
# Swap:         1983          0       1983
# This shows 12258MB free (834 + 4020 + 7402) out of 16046MB.  The percent
# free should be 12258/16046 = 76.4%, and used should be 3788/16046.
# ########################################################################
get_total_memory() {
   awk '/Mem:/{print $2}' "$1"
}

# ########################################################################
# Get the used memory (excluding buffers/cache) from the output of 'free -m'
# ########################################################################
get_used_memory_pct() {
   awk '/Mem:/ {tot = $2;} /cache:/{printf "%d\n", $3 / tot * 100}' "$1"
}

# ########################################################################
# Get the process that consumes the most virtual memory, and its size in MB,
# from the output of 'ps'.  Example:
# ps -eo pid,vsz,ucomm | sort -n -k2,2 | tail -n1
# But that vsz column is in kiB, so we must convert it to MiB.
# ########################################################################
get_largest_process(){
   awk '
      BEGIN {
         maxpid  = 0;
         maxmem  = 0;
         maxproc = "";
      }
      $1 !~ /[^0-9]/ && $2 > maxmem {
         maxpid  = $1;
         maxmem  = $2;
         maxproc = $3;
      }
      END {
         printf "%d %.2f %s\n", maxpid, maxmem / 1024, maxproc;
      }' "$1"
}

# ########################################################################
# Return what percent $1 is of $2, as an integer. If $2 is 0, result is 0.
# ########################################################################
pct_of() {
   echo "$@" | awk '{if($2 > 0){printf "%d\n", $1 / $2 * 100}else{print 0}}'
}

# ########################################################################
# Determine whether this program is being executed directly, or sourced/included
# from another file.
# ########################################################################
is_not_sourced() {
   [ "${0##*/}" = "pmp-check-unix-memory" ] || [ "${0##*/}" = "bash" -a "$_" = "$0" ]
}

# ########################################################################
# Execute the program if it was not included from another file.
# This makes it possible to include without executing, and thus test.
# ########################################################################
if is_not_sourced; then
   OUTPUT=$(main "$@")
   EXITSTATUS=$STATE_UNKNOWN
   case "${OUTPUT}" in
      UNK*)  EXITSTATUS=$STATE_UNKNOWN;  ;;
      OK*)   EXITSTATUS=$STATE_OK;       ;;
      WARN*) EXITSTATUS=$STATE_WARNING;  ;;
      CRIT*) EXITSTATUS=$STATE_CRITICAL; ;;
   esac
   echo "${OUTPUT}"
   exit $EXITSTATUS
fi

# ############################################################################
# Documentation
# ############################################################################
: <<'DOCUMENTATION'
=pod

=head1 NAME

pmp-check-unix-memory - Alert on low memory, or when a process uses too much
memory.

=head1 SYNOPSIS

  Usage: pmp-check-unix-memory [OPTIONS]
  Options:
    -c CRIT     Critical threshold; default 95%.
    -d          Disable single process memory check; default enabled.
    -w WARN     Warning threshold; default 90%.
    --help      Print help and exit.
    --version   Print version and exit.
  Options must be given as --option value, not --option=value or -Ovalue.
  Use perldoc to read embedded documentation with more details.

=head1 DESCRIPTION

This Nagios plugin examines the output of C<free> and C<ps> to look for two
conditions.  The first is when the system is running out of memory, and the
second is when any single process uses too much memory. However, there is an 
option to disable the second condition. Both checks are compared to the same
thresholds.

For example, on a system with 144GB of memory, with the default options, the
plugin might produce the following warning:

  WARN Memory: 94% used; largest process: mysqld (16675) = 133149.54MB (91%)

Both the total used memory (94%) and the virtual size of mysqld (91%) are over
the warning threshold.  If one of these tests evaluates to a higher severity
than the other, the more serious result prevails.

The plugin is tested on GNU/Linux, but may also work on other Unix-like
operating systems, if the output of C<ps> and C<free> can be recognized.

=head1 PRIVILEGES

This plugin does not access MySQL.

This plugin executes the following UNIX commands that may need special privileges:

=over

=item *

free

=item *

ps

=back

=head1 COPYRIGHT, LICENSE, AND WARRANTY

This program is copyright 2012-2013 Baron Schwartz, 2012-2013 Percona Inc.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2.  You should have received a copy of the GNU General
Public License along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 VERSION

Percona Monitoring Plugins pmp-check-unix-memory 1.0.3

=cut

DOCUMENTATION
