diff --git a/checkmk-agent.nix b/checkmk-agent.nix new file mode 100644 index 0000000..481f8a0 --- /dev/null +++ b/checkmk-agent.nix @@ -0,0 +1,22 @@ +{ lib, pkgs, ... }: + +let + agentcmd = "${ (pkgs.callPackage ./checkmk-agent/default.nix { }) }/bin/check_mk_agent.sshwrapper"; +in +{ + users.users.mon = { + isNormalUser = true; + extraGroups = [ "wheel" ]; + openssh.authorizedKeys.keys = [ + ''restrict,command="${ agentcmd }" ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGHkivi9Ye/Uj4ZQxrEfarSaz0iLF/XXhY/crNsLoDMu checkmk'' + ]; + }; + security.sudo.extraRules = [ { + users = [ "mon" ]; + commands = [ { + command = agentcmd; + options = [ "NOPASSWD" ]; + } ]; + } + ]; +} diff --git a/checkmk-agent/agent.tar.gz b/checkmk-agent/agent.tar.gz new file mode 100644 index 0000000..a2de381 Binary files /dev/null and b/checkmk-agent/agent.tar.gz differ diff --git a/checkmk-agent/default.nix b/checkmk-agent/default.nix new file mode 100644 index 0000000..aa37d4e --- /dev/null +++ b/checkmk-agent/default.nix @@ -0,0 +1,26 @@ +{ stdenv, lib, makeWrapper }: + +stdenv.mkDerivation rec { + name = "checkmk-agent"; + src = ./agent.tar.gz; + phases = [ "unpackPhase" "installPhase" "fixupPhase" "wrapPhase" ]; + buildInputs = [ ]; + nativeBuildInputs = [ makeWrapper ]; + + installPhase = '' + mkdir -p $out/bin + install -m 0755 check_mk_agent.linux $out/bin/check_mk_agent + substitute sshwrapper $out/bin/check_mk_agent.sshwrapper \ + --replace SUDOCOMMAND "$out/bin/check_mk_agent" + chmod 0755 $out/bin/check_mk_agent.sshwrapper + ''; + wrapPhase = '' + wrapProgram $out/bin/check_mk_agent --suffix PATH : /run/current-system/sw/bin + ''; + + meta = with lib; { + description = "checkmk-agent"; + platforms = platforms.all; + }; +} + diff --git a/checkmk-agent/files/check_mk_agent.linux b/checkmk-agent/files/check_mk_agent.linux new file mode 100644 index 0000000..1ed035c --- /dev/null +++ b/checkmk-agent/files/check_mk_agent.linux @@ -0,0 +1,2294 @@ +#!/bin/bash +# Copyright (C) 2019 Checkmk GmbH - License: GNU General Public License v2 +# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and +# conditions defined in the file COPYING, which is part of this source code package. + +# +# BEGIN COMMON AGENT CODE +# + +usage() { + cat </dev/null 2>&1 +} + +get_file_atime() { + stat -c %X "${1}" 2>/dev/null || + stat -f %a "${1}" 2>/dev/null || + perl -e 'if (! -f $ARGV[0]){die "0000000"};$atime=(stat($ARGV[0]))[8];print $atime."\n";' "${1}" +} + +get_file_mtime() { + stat -c %Y "${1}" 2>/dev/null || + stat -f %m "${1}" 2>/dev/null || + perl -e 'if (! -f $ARGV[0]){die "0000000"};$mtime=(stat($ARGV[0]))[9];print $mtime."\n";' "${1}" +} + +is_valid_plugin() { + # test if a file is executable and does not have certain + # extensions (remnants from distro upgrades). + case "${1:?No plugin defined}" in + *.dpkg-new | *.dpkg-old | *.dpkg-temp | *.dpkg-tmp) return 1 ;; + *) [ -f "${1}" ] && [ -x "${1}" ] ;; + esac +} + +set_up_process_commandline_arguments() { + while [ -n "${1}" ]; do + case "${1}" in + -d | --debug) + set -xv + DISABLE_STDERR=false + shift + ;; + + -p | --profile) + LOG_SECTION_TIME=true + # disable caching to get the whole execution time + DISABLE_CACHING=true + shift + ;; + + --force-inventory) + export MK_FORCE_INVENTORY=true + shift + ;; + + -h | --help) + usage + exit 1 + ;; + + *) + shift + ;; + esac + done +} + +set_up_get_epoch() { + # On some systems date +%s returns a literal %s + if date +%s | grep "^[0-9].*$" >/dev/null 2>&1; then + get_epoch() { date +%s; } + else + # do not check whether perl is even present. + # in weird cases we may be fine without get_epoch. + get_epoch() { perl -e 'print($^T."\n");'; } + fi +} + +set_up_current_shell() { + # Note the current shell may not be the same as what is specified in the + # shebang, e.g. when reconfigured in the xinetd/systemd/whateverd config file + CURRENT_SHELL="$(ps -o args= -p $$ | cut -d' ' -f1)" +} + +# +# END COMMON AGENT CODE +# + +set_variable_defaults() { + # some 'booleans' + [ "${MK_RUN_SYNC_PARTS}" = "false" ] || MK_RUN_SYNC_PARTS=true + [ "${MK_RUN_ASYNC_PARTS}" = "false" ] || MK_RUN_ASYNC_PARTS=true + + # WATCH OUT: These 5 lines are searched for and replaced by the + # agent bakery! + # TODO: CMK-8339 (proper configuration) + : "${MK_LIBDIR:="/usr/lib/check_mk_agent"}" + : "${MK_CONFDIR:="/etc/check_mk"}" + : "${MK_VARDIR:="/var/lib/check_mk_agent"}" + : "${MK_LOGDIR:="/var/log/check_mk_agent"}" + : "${MK_BIN:="/usr/bin"}" + + export MK_LIBDIR + export MK_CONFDIR + export MK_VARDIR + export MK_LOGDIR + export MK_BIN + + # Optionally set a tempdir for all subsequent calls + #export TMPDIR= + + # All executables in PLUGINSDIR will simply be executed and their + # ouput appended to the output of the agent. Plugins define their own + # sections and must output headers with '<<<' and '>>>' + PLUGINSDIR=${MK_LIBDIR}/plugins + + # All executables in LOCALDIR will by executabled and their + # output inserted into the section <<>>. Please + # refer to online documentation for details about local checks. + LOCALDIR=${MK_LIBDIR}/local + + # All files in SPOOLDIR will simply appended to the agent + # output if they are not outdated (see below) + SPOOLDIR=${MK_VARDIR}/spool +} + +set_up_path() { + _PATH="${1}" + # Make sure that locally installed binaries are found + # Only add binaries if they are not already in the path! If you append to path in a loop the process will + # eventually each the 128k size limit for the environment and become a zombie process. See execve manpage. + [ "${_PATH#*"/usr/local/bin"}" != "${_PATH}" ] || _PATH="${_PATH}:/usr/local/bin" + [ -n "${MK_BIN}" ] && { [ "${_PATH#*"${MK_BIN}"}" != "${_PATH}" ] || _PATH="${_PATH}:${MK_BIN}"; } + [ -d "/var/qmail/bin" ] && { [ "${_PATH#*"/var/qmail/bin"}" != "${_PATH}" ] || _PATH="${_PATH}:/var/qmail/bin"; } + echo "${_PATH}" + unset _PATH +} + +set_up_remote() { + # Provide information about the remote host. That helps when data + # is being sent only once to each remote host. + REMOTE="${REMOTE_HOST:-"${REMOTE_ADDR:-"${SSH_CLIENT%% *}"}"}" + + # If none of the above are set *and* we are configured to, try to read it from stdin + [ -z "${REMOTE}" ] && [ "${MK_READ_REMOTE}" = "true" ] && read -r REMOTE + + export REMOTE +} + +announce_remote() { + # let RTCs know about this remote + [ -d "${MK_VARDIR}/rtc_remotes" ] || mkdir "${MK_VARDIR}/rtc_remotes" + [ -n "${REMOTE}" ] && [ "${REMOTE}" != "push-connection" ] && touch "${MK_VARDIR}/rtc_remotes/${REMOTE}" +} + +# +# BEGIN COMMON AGENT CODE +# + +# SC2089: Quotes/backslashes will be treated literally. Use an array. +# shellcheck disable=SC2089 +MK_DEFINE_LOG_SECTION_TIME='_log_section_time() { "$@"; }' +finalize_profiling() { :; } + +set_up_profiling() { + + PROFILING_CONFIG="${MK_CONFDIR}/profiling.cfg" + if [ -e "${PROFILING_CONFIG}" ]; then + # Config vars: + # LOG_SECTION_TIME=true/false + # DISABLE_CACHING=true/false + + # If LOG_SECTION_TIME=true via profiling.cfg do NOT disable caching in order + # to get the real execution time during operation. + # shellcheck disable=SC1090 + . "${PROFILING_CONFIG}" + fi + + PROFILING_LOGFILE_DIR="${MK_LOGDIR}/profiling/$(date +%Y%m%d_%H%M%S)" + + if ${LOG_SECTION_TIME:-false}; then + mkdir -p "${PROFILING_LOGFILE_DIR}" + agent_start="$(perl -MTime::HiRes=time -le 'print time()')" + + # SC2016: Expressions don't expand in single quotes, use double quotes for that. + # SC2089: Quotes/backslashes will be treated literally. Use an array. + # shellcheck disable=SC2016,SC2089 + MK_DEFINE_LOG_SECTION_TIME='_log_section_time() { + section_func="$@" + + base_name=$(echo "${section_func}" | sed "s/[^A-Za-z0-9.-]/_/g") + profiling_logfile="'"${PROFILING_LOGFILE_DIR}"'/${base_name}.log" + + start="$(perl -MTime::HiRes=time -le "print time()")" + { time ${section_func}; } 2>> "${profiling_logfile}" + echo "runtime $(perl -MTime::HiRes=time -le "print time() - ${start}")" >> "${profiling_logfile}" + }' + + finalize_profiling() { + pro_log_file="${PROFILING_LOGFILE_DIR}/profiling_check_mk_agent.log" + agent_end="$(perl -MTime::HiRes=time -le 'print time()')" + echo "runtime $(echo "${agent_end} - ${agent_start}" | bc)" >>"${pro_log_file}" + } + fi + + eval "${MK_DEFINE_LOG_SECTION_TIME}" + # SC2090: Quotes/backslashes in this variable will not be respected. + # shellcheck disable=SC2090 + export MK_DEFINE_LOG_SECTION_TIME +} + +unset_locale() { + # eliminate localized outputs where possible + # The locale logic here is used to make the Python encoding detection work (see CMK-2778). + unset -v LANG LC_ALL + if inpath locale && inpath paste; then + # match C.UTF-8 at the beginning, but not e.g. es_EC.UTF-8! + case "$(locale -a | paste -sd ' ' -)" in + *' C.UTF-8'* | 'C.UTF-8'*) LC_ALL="C.UTF-8" ;; + *' C.utf8'* | 'C.utf8'*) LC_ALL="C.utf8" ;; + esac + fi + LC_ALL="${LC_ALL:-C}" + export LC_ALL +} + +# +# END COMMON AGENT CODE +# + +read_python_version() { + if inpath "${1}"; then + version=$(${1} -c 'import sys; print("%s.%s"%(sys.version_info[0], sys.version_info[1]))') + + major=${version%%.*} + minor=${version##*.} + + if [ "${major}" -eq "${2}" ] && [ "${minor}" -ge "${3}" ]; then + echo "${1}" + return 0 + fi + fi + return 1 +} + +detect_python() { + PYTHON3=$(read_python_version python3 3 4 || read_python_version python 3 4) + PYTHON2=$(read_python_version python2 2 6 || read_python_version python 2 6) + if [ -f "${MK_CONFDIR}/python_path.cfg" ]; then + # shellcheck source=/dev/null + . "${MK_CONFDIR}/python_path.cfg" + fi + export PYTHON2 PYTHON3 + + if [ -z "${PYTHON2}" ] && [ -z "${PYTHON3}" ]; then + NO_PYTHON=true + elif [ -n "${PYTHON3}" ] && [ "$( + ${PYTHON3} -c 'pass' >/dev/null 2>&1 + echo $? + )" -eq 127 ]; then + WRONG_PYTHON_COMMAND=true + elif [ -z "${PYTHON3}" ] && [ "$( + ${PYTHON2} -c 'pass' >/dev/null 2>&1 + echo $? + )" -eq 127 ]; then + WRONG_PYTHON_COMMAND=true + fi +} + +detect_container_environment() { + if [ -f /.dockerenv ]; then + IS_DOCKERIZED=1 + elif grep container=lxc /proc/1/environ >/dev/null 2>&1; then + # Works in lxc environment e.g. on Ubuntu bionic, but does not + # seem to work in proxmox (see CMK-1561) + IS_LXC_CONTAINER=1 + elif grep 'lxcfs /proc/cpuinfo fuse.lxcfs' /proc/mounts >/dev/null 2>&1; then + # Seems to work in proxmox + IS_LXC_CONTAINER=1 + else + unset IS_DOCKERIZED + unset IS_LXC_CONTAINER + fi + + if [ -n "${IS_DOCKERIZED}" ] || [ -n "${IS_LXC_CONTAINER}" ]; then + if [ "$(stat -fc'%t' /sys/fs/cgroup)" = "63677270" ]; then + IS_CGROUP_V2=1 + CGROUP_SECTION_SUFFIX="_cgroupv2" + else + unset IS_CGROUP_V2 + unset CGROUP_SECTION_SUFFIX + fi + fi +} + +# Prefer (relatively) new /usr/bin/timeout from coreutils against +# our shipped waitmax. waitmax is statically linked and crashes on +# some Ubuntu versions recently. +if inpath timeout; then + waitmax() { + timeout "$@" + } +fi + +encryption_panic() { + echo "<<>>" + echo "EncryptionPanic: true" + exit 1 +} + +set_up_encryption() { + # shellcheck source=agents/cfg_examples/encryption.cfg + [ -f "${MK_CONFDIR}/encryption.cfg" ] && { + . "${MK_CONFDIR}/encryption.cfg" || encryption_panic + } + define_optionally_encrypt "${ENCRYPTED:-"no"}" +} + +hex_decode() { + # We might not have xxd available, so we have to do it manually. + # Be aware that this implementation is very slow and should not be used for large data. + local hex="$1" + for ((i = 0; i < ${#hex}; i += 2)); do + printf '%b' "\x${hex:i:2}" + done +} + +parse_kdf_output() { + local kdf_output="$1" + salt_hex=$(echo "$kdf_output" | grep -oP "(?<=salt=)[0-9A-F]+") + key_hex=$(echo "$kdf_output" | grep -oP "(?<=key=)[0-9A-F]+") + iv_hex=$(echo "$kdf_output" | grep -oP "(?<=iv =)[0-9A-F]+") + # Make sure this rather brittle grepping worked. For example, some openssl update might decide + # to remove that odd-looking space behind 'iv'. + # Note that the expected LENGTHS ARE DOUBLED because the values are hex encoded. + if [ ${#salt_hex} -ne 16 ] || [ ${#key_hex} -ne 64 ] || [ ${#iv_hex} -ne 32 ]; then + encryption_panic + fi + echo "$salt_hex" "$key_hex" "$iv_hex" +} + +encrypt_then_mac() { + # Encrypt the input data, calculate a MAC over IV and ciphertext, then + # print mac and ciphertext. + local salt_hex="$1" + local key_hex="$2" + local iv_hex="$3" + local ciphertext_b64 + + # We need the ciphertext twice: for the mac and for the output. But we can only store it in + # encoded form because it can contain null bytes. + ciphertext_b64=$(openssl enc -aes-256-cbc -K "$key_hex" -iv "$iv_hex" | openssl enc -base64) + + ( + hex_decode "$iv_hex" + echo "$ciphertext_b64" | openssl enc -base64 -d + ) | openssl dgst -sha256 -mac HMAC -macopt hexkey:"$key_hex" -binary + + echo "$ciphertext_b64" | openssl enc -base64 -d +} + +define_optionally_encrypt() { + # if things fail, make sure we don't accidentally send unencrypted data + unset optionally_encrypt + + if [ "${1}" != "no" ]; then + OPENSSL_VERSION=$(openssl version | awk '{print $2}' | awk -F . '{print (($1 * 100) + $2) * 100+ $3}') + # + # Encryption scheme for version 04 and 05: + # + # salt <- random_salt() + # key, IV <- version_specific_kdf( salt, password ) + # + # ciphertext <- aes_256_cbc_encrypt( key, IV, message ) + # mac <- hmac_sha256( key, iv:ciphertext ) + # + # // The output blob is formed as: + # // - 2 bytes version + # // - optional: rtc timestamp + # // - 8 bytes salt + # // - 32 bytes MAC + # // - the ciphertext + # result <- [version:salt:mac:ciphertext] + + if [ "${OPENSSL_VERSION}" -ge 10101 ]; then + optionally_encrypt() { + # version: 05 + # kdf: pbkdf2, 600.000 iterations + + local salt_hex key_hex iv_hex + read -r salt_hex key_hex iv_hex <<<"$( + parse_kdf_output "$(openssl enc -aes-256-cbc -md sha256 -pbkdf2 -iter 600000 -k "${1}" -P)" + )" + + printf "05" + printf "%s" "${2}" + hex_decode "$salt_hex" + encrypt_then_mac "$salt_hex" "$key_hex" "$iv_hex" + } + elif [ "${OPENSSL_VERSION}" -ge 10000 ]; then + optionally_encrypt() { + # version: 04 + # kdf: openssl custom kdf based on sha256 + + local salt_hex key_hex iv_hex + read -r salt_hex key_hex iv_hex <<<"$( + parse_kdf_output "$(openssl enc -aes-256-cbc -md sha256 -k "${1}" -P)" + )" + + printf "04" + printf "%s" "${2}" + hex_decode "$salt_hex" + encrypt_then_mac "$salt_hex" "$key_hex" "$iv_hex" + } + else + optionally_encrypt() { + printf "00%s" "${2}" + openssl enc -aes-256-cbc -md md5 -k "${1}" -nosalt + } + fi + else + optionally_encrypt() { + [ -n "${2}" ] && printf "99%s" "${2}" + cat + } + fi + +} + +set_up_disabled_sections() { + if [ -f "${MK_CONFDIR}/exclude_sections.cfg" ]; then + # shellcheck source=agents/cfg_examples/exclude_sections.cfg + . "${MK_CONFDIR}/exclude_sections.cfg" + fi +} + +export_utility_functions() { + # At the time of writing of this function, the linux agent exports + # some helper functions, so I consolidate those exports here. + # I am not sure whether this is a good idea, though. + # Their API is unstable. + export -f run_mrpe + export -f waitmax + export -f run_cached +} + +section_checkmk() { + cat <>> +Version: 2.3.0p12 +AgentOS: linux +Hostname: $(uname -n) +AgentDirectory: ${MK_CONFDIR} +DataDirectory: ${MK_VARDIR} +SpoolDirectory: ${SPOOLDIR} +PluginsDirectory: ${PLUGINSDIR} +LocalDirectory: ${LOCALDIR} +HERE + + # try to find only_from configuration + if [ -n "${REMOTE_HOST}" ]; then # xinetd + sed -n "/^service[[:space:]]*check-mk-agent/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/OnlyFrom: \1/p" /etc/xinetd.d/* | head -n1 + elif inpath systemctl; then # systemd + sed -n '/^IPAddressAllow/s/IPAddressAllow=/OnlyFrom: /p' "/usr/lib/systemd/system/check-mk-agent.socket" 2>/dev/null + # NOTE: The above line just reads back the socket file we deployed ourselves. Systemd units can be altered by + # other user defined unit files, so this *may* not be correct. A better way of doing this seemed to be querying + # systemctl itself about the 'effective' property: + # + # systemctl show --property IPAddressAllow "check-mk-agent.socket" | sed 's/IPAddressAllow=/OnlyFrom: /' + # + # However this ("successfully") reports an empty list or '[unprintable]' on older systemd versions :-( + fi + + # + # OS based labels are created from these variables + # + + echo "OSType: linux" + while read -r line; do + raw_line="${line//\"/}" + case $line in + ID=*) echo "OSPlatform: ${raw_line##*=}" ;; + NAME=*) echo "OSName: ${raw_line##*=}" ;; + VERSION_ID=*) echo "OSVersion: ${raw_line##*=}" ;; + esac + done <<<"$(cat /etc/os-release 2>/dev/null)" + + # + # BEGIN COMMON AGENT CODE + # + + if [ -n "${NO_PYTHON}" ]; then + python_fail_msg="No suitable python installation found." + elif [ -n "${WRONG_PYTHON_COMMAND}" ]; then + python_fail_msg="Configured python command not found." + fi + + cat </dev/null >&2 || return + + printf "<<>>\n" + cmk-agent-ctl status --json --no-query-remote +} + +section_checkmk_agent_plugins() { + printf "<<>>\n" + printf "pluginsdir %s\n" "${PLUGINSDIR}" + printf "localdir %s\n" "${LOCALDIR}" + for script in \ + "${PLUGINSDIR}"/* \ + "${PLUGINSDIR}"/[1-9]*/* \ + "${LOCALDIR}"/* \ + "${LOCALDIR}"/[1-9]*/*; do + if is_valid_plugin "${script}"; then + script_version=$(grep -e '^__version__' -e '^CMK_VERSION' "${script}" || echo 'CMK_VERSION="unversioned"') + printf "%s:%s\n" "${script}" "${script_version}" + fi + done +} + +section_checkmk_failed_plugin() { + ${MK_RUN_SYNC_PARTS} || return + echo "<<>>" + echo "FailedPythonPlugins: ${1}" +} + +# +# END COMMON AGENT CODE +# + +# +# CHECK SECTIONS +# + +section_labels() { + echo '<<>>' + + if [ -n "${IS_DOCKERIZED}" ] || [ -n "${IS_LXC_CONTAINER}" ]; then + echo '{"cmk/device_type":"container"}' + elif grep "hypervisor" /proc/cpuinfo >/dev/null 2>&1; then + echo '{"cmk/device_type":"vm"}' + fi +} + +section_mem() { + if [ -n "${IS_DOCKERIZED}" ]; then + echo "<<>>" + if [ -n "${IS_CGROUP_V2}" ]; then + cat /sys/fs/cgroup/memory.stat + echo "memory.current $(cat /sys/fs/cgroup/memory.current)" + echo "memory.max $(cat /sys/fs/cgroup/memory.max)" + else + cat /sys/fs/cgroup/memory/memory.stat + echo "usage_in_bytes $(cat /sys/fs/cgroup/memory/memory.usage_in_bytes)" + echo "limit_in_bytes $(cat /sys/fs/cgroup/memory/memory.limit_in_bytes)" + fi + grep -F 'MemTotal:' /proc/meminfo + elif [ -n "${IS_LXC_CONTAINER}" ]; then + echo '<<>>' + grep -v -E '^Swap:|^Mem:|total:|^Vmalloc|^Committed' >>' + grep -v -E '^Swap:|^Mem:|total:' >>' + echo "$(cat /proc/loadavg) ${NUM_CPUS}" + if [ -f "/proc/sys/kernel/threads-max" ]; then + cat /proc/sys/kernel/threads-max + fi + else + if [ -n "${IS_DOCKERIZED}" ]; then + echo "<<>>" + else + echo "<<>>" + fi + if [ -n "${IS_CGROUP_V2}" ]; then + echo "uptime $(cat /proc/uptime)" + echo "num_cpus ${NUM_CPUS}" + cat /sys/fs/cgroup/cpu.stat + else + grep "^cpu " /proc/stat + echo "num_cpus ${NUM_CPUS}" + cat /sys/fs/cgroup/cpuacct/cpuacct.stat + fi + fi +} + +section_uptime() { + echo '<<>>' + if [ -z "${IS_DOCKERIZED}" ]; then + cat /proc/uptime + else + echo "$(($(get_epoch) - $(stat -c %Z /dev/pts)))" + fi +} + +# Print out Partitions / Filesystems. (-P gives non-wrapped POSIXed output) +# Heads up: NFS-mounts are generally supressed to avoid agent hangs. +# If hard NFS mounts are configured or you have too large nfs retry/timeout +# settings, accessing those mounts from the agent would leave you with +# thousands of agent processes and, ultimately, a dead monitored system. +# These should generally be monitored on the NFS server, not on the clients. +section_df() { + if [ -n "${IS_DOCKERIZED}" ]; then + return + fi + + # The exclusion list is getting a bit of a problem. + # -l should hide any remote FS but seems to be all but working. + excludefs="-x smbfs -x cifs -x iso9660 -x udf -x nfsv4 -x nfs -x mvfs -x prl_fs -x squashfs -x devtmpfs -x autofs -x beegfs" + if [ -z "${IS_LXC_CONTAINER}" ]; then + excludefs="${excludefs} -x zfs" + fi + + echo '<<>>' + # We really *need* word splitting below! + # shellcheck disable=SC2086 + df -PTlk ${excludefs} | sed 1d + + # df inodes information + echo '<<>>' + echo '[df_inodes_start]' + # We really *need* word splitting below! + # shellcheck disable=SC2086 + df -PTli ${excludefs} | sed 1d + echo '[df_inodes_end]' + + if inpath lsblk; then + echo "[df_lsblk_start]" + lsblk --list --paths --output NAME,UUID + echo "[df_lsblk_end]" + fi +} + +section_systemd() { + if inpath systemctl; then + echo '<<>>' + # use plain to force ASCII output that is simpler to parse + echo "[list-unit-files]" + systemctl list-unit-files --full --no-legend --no-pager --plain --type service --type socket | tr -s ' ' + echo "[status]" + systemctl status --all --type service --type socket --no-pager --lines 0 | tr -s ' ' + echo "[all]" + systemctl --all --type service --type socket --full --no-legend --no-pager --plain | sed '/^$/q' | tr -s ' ' + fi +} + +section_zfs() { + if inpath zfs; then + echo '<<>>' + zfs get -t filesystem,volume -Hp name,quota,used,avail,mountpoint,type 2>/dev/null + echo '<<>>' + echo '[df]' + df -PTlk -t zfs | sed 1d + fi +} + +section_nfs_mounts() { + proc_mounts_file=${1} + + if inpath waitmax; then + STAT_VERSION=$(stat --version | head -1 | cut -d" " -f4) + STAT_BROKE="5.3.0" + + json_templ() { + echo '{"mountpoint": "'"${1}"'", "source": "'"${2}"'", "state": "ok", "usage": {"total_blocks": %b, "free_blocks_su": %f, "free_blocks": %a, "blocksize": %s}}' + } + json_templ_empty() { + echo '{"mountpoint": "'"${1}"'", "source": "'"${2}"'", "state": "hanging", "usage": {"total_blocks": 0, "free_blocks_su": 0, "free_blocks": 0, "blocksize": 0}}' + } + + echo '<<>>' + + sed -n '/ nfs4\? /s/\([^ ]*\) \([^ ]*\) .*/\1 \2/p' <"${proc_mounts_file}" | + while read -r MD MP; do + MD="$(printf "%s" "${MD}" | sed 's/\\040/ /g')" + MP="$(printf "%s" "${MP}" | sed 's/\\040/ /g')" + if [ "${STAT_VERSION}" != "${STAT_BROKE}" ]; then + waitmax -s 9 5 stat -f -c "$(json_templ "${MP}" "${MD}")" "${MP}" || + json_templ_empty "${MP}" "${MD}" + else + (waitmax -s 9 5 stat -f -c "$(json_templ "${MP}" "${MD}")" "${MP}" && + printf '\n') || json_templ_empty "${MP}" "${MD}" + fi + done + + echo '<<>>' + sed -n -e '/ cifs /s/.*\ \([^ ]*\)\ cifs\ .*/\1/p' <"${proc_mounts_file}" | + while read -r MP; do + MP="$(printf "%s" "${MP}" | sed 's/\\040/ /g')" + if [ ! -r "${MP}" ]; then + echo "${MP} Permission denied" + elif [ "${STAT_VERSION}" != "${STAT_BROKE}" ]; then + waitmax -s 9 2 stat -f -c "${MP} ok %b %f %a %s" "${MP}" || + echo "${MP} hanging 0 0 0 0" + else + waitmax -s 9 2 stat -f -c "${MP} ok %b %f %a %s" "${MP}" && + printf '\n' || echo "${MP} hanging 0 0 0 0" + fi + done + fi +} + +section_mounts() { + echo '<<>>' + grep ^/dev >>' + echo "[time]" + get_epoch + echo "[processes]" + CGROUP="" + if [ -e /sys/fs/cgroup ]; then + CGROUP="cgroup:512," + fi + echo "[header] $(ps ax -ww -o "${CGROUP}"user:32,vsz,rss,cputime,etime,pid,command | tr -s ' ')" + fi +} + +section_lnx_if() { + if inpath ip; then + echo '<<>>' + echo "[start_iplink]" + ip address + echo "[end_iplink]" + fi + + echo '<<>>' + sed 1,2d /proc/net/dev + sed -e 1,2d /proc/net/dev | cut -d':' -f1 | sort | while read -r eth; do + echo "[${eth}]" + if inpath ethtool; then + ethtool "${eth}" | grep -E '(Speed|Duplex|Link detected|Auto-negotiation):' + else + # If interface down we get "Invalid argument" + speed=$(cat "/sys/class/net/${eth}/speed" 2>/dev/null) + if [ -n "${speed}" ] && [ "${speed}" -ge 0 ]; then + echo "Speed: ${speed}Mb/s" + fi + fi + echo "Address: $(cat "/sys/class/net/${eth}/address")" + done +} + +section_bonding_interfaces() { + ( + cd /proc/net/bonding 2>/dev/null || return + echo '<<>>' + head -v -n 1000 ./* + ) +} + +section_vswitch_bonding() { + if inpath ovs-appctl; then + BONDS=$(ovs-appctl bond/list) + COL=$(echo "${BONDS}" | awk '{for(i=1;i<=NF;i++) {if($i == "bond") printf("%d", i)} exit 0}') + echo '<<>>' + for bond in $(echo "${BONDS}" | sed -e 1d | cut -f"${COL}"); do + echo "[${bond}]" + ovs-appctl bond/show "${bond}" + done + fi +} + +section_tcp() { + if inpath waitmax; then + echo '<<>>' + if OUTPUT=$(waitmax 5 cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }'); then + echo "${OUTPUT}" + elif inpath ss; then + ss -ant | grep -v ^State | awk ' /:/ { c[$1]++; } END { for (x in c) { print x, c[x]; } }' | + sed -e 's/^ESTAB/01/g;s/^SYN-SENT/02/g;s/^SYN-RECV/03/g;s/^FIN-WAIT-1/04/g;s/^FIN-WAIT-2/05/g;s/^TIME-WAIT/06/g;s/^CLOSED/07/g;s/^CLOSE-WAIT/08/g;s/^LAST-ACK/09/g;s/^LISTEN/0A/g;s/^CLOSING/0B/g;' + fi + fi +} + +section_multipathing() { + if inpath multipath; then + echo '<<>>' + multipath -l + fi +} + +section_diskstat() { + if [ -z "${IS_DOCKERIZED}" ]; then + echo '<<>>' + get_epoch + grep -E ' (x?[shv]d[a-z]*[0-9]*|cciss/c[0-9]+d[0-9]+|emcpower[a-z]+|dm-[0-9]+|VxVM.*|mmcblk.*|dasd[a-z]*|bcache[0-9]+|nvme[0-9]+n[0-9]+) ' >>" + echo "[time]" + get_epoch + if [ -n "${IS_CGROUP_V2}" ]; then + echo "[io.stat]" + cat "/sys/fs/cgroup/io.stat" + else + for F in io_service_bytes io_serviced; do + echo "[${F}]" + cat "/sys/fs/cgroup/blkio/blkio.throttle.${F}" + done + fi + echo "[names]" + for F in /sys/block/*; do + echo "${F##*/} $(cat "${F}/dev")" + done + fi +} + +section_chrony() { + if inpath chronyc; then + # Force successful exit code. Otherwise section will be missing if daemon not running + # + # The "| cat" has been added for some kind of regression in RedHat 7.5. The + # SELinux rules shipped with that release were denying the chronyc call + # without cat. + _run_cached_internal "chrony" 30 120 200 20 "echo '<<>>'; waitmax 5 chronyc -n tracking | cat || true" + fi +} + +section_kernel() { + if [ -z "${IS_DOCKERIZED}" ] && [ -z "${IS_LXC_CONTAINER}" ]; then + echo '<<>>' + get_epoch + cat /proc/vmstat /proc/stat + fi +} + +section_ipmitool() { + if inpath ipmitool; then + _run_cached_internal "ipmi" 300 300 900 600 "echo '<<>>'; waitmax 300 ipmitool sensor list | grep -v 'command failed' | grep -v -E '^[^ ]+ na ' | grep -v ' discrete '" + # readable discrete sensor states + _run_cached_internal "ipmi_discrete" 300 300 900 600 "echo '<<>>'; waitmax 300 ipmitool sdr elist compact" + fi +} + +section_ipmisensors() { + inpath ipmi-sensors && ls /dev/ipmi* >/dev/null || return + ${MK_RUN_SYNC_PARTS} && echo '<<>>' + # Newer ipmi-sensors version have new output format; Legacy format can be used + if ipmi-sensors --help | grep -q legacy-output; then + IPMI_FORMAT="--legacy-output" + else + IPMI_FORMAT="" + fi + if ipmi-sensors --help | grep -q " \-\-groups"; then + IPMI_GROUP_OPT="-g" + else + IPMI_GROUP_OPT="-t" + fi + + # At least with ipmi-sensors 0.7.16 this group is Power_Unit instead of "Power Unit" + _run_cached_internal "ipmi_sensors" 300 300 900 600 "echo '<<>>'; for class in Temperature Power_Unit Fan; do + ipmi-sensors ${IPMI_FORMAT} --sdr-cache-directory /var/cache ${IPMI_GROUP_OPT} \"\${class}\" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@' + # In case of a timeout immediately leave loop. + if [ $? = 255 ]; then break; fi + done" +} + +section_md() { + echo '<<>>' + cat /proc/mdstat +} + +section_dm_raid() { + if inpath dmraid && DMSTATUS=$(waitmax 3 dmraid -r); then + echo '<<>>' + + # Output name and status + waitmax 20 dmraid -s | grep -e ^name -e ^status + + # Output disk names of the RAID disks + DISKS=$(echo "${DMSTATUS}" | cut -f1 -d":") + + for disk in ${DISKS}; do + device=$(cat /sys/block/"$(basename "${disk}")"/device/model) + status=$(echo "${DMSTATUS}" | grep "^${disk}") + echo "${status} Model: ${device}" + done + fi +} + +section_cfggen() { + if inpath cfggen; then + echo '<<>>' + cfggen 0 DISPLAY | + grep -E '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' | + sed -e 's/ *//g' -e 's/:/ /' + fi +} + +section_storcli() { + if inpath storcli; then + _storcli() { storcli "$@"; } + elif inpath storcli64; then + _storcli() { storcli64 "$@"; } + else + return 1 + fi + + echo '<<>>' + _storcli /call/eall/sall show all + + echo '<<>>' + _storcli /call/vall show all + + echo '<<>>' + _storcli /call/cv show all + + # exit successfully, because storcli was in the path. + return 0 +} + +section_megaraid() { + section_storcli && return + + if inpath MegaCli; then + MegaCli_bin="MegaCli" + elif inpath MegaCli64; then + MegaCli_bin="MegaCli64" + elif inpath megacli; then + MegaCli_bin="megacli" + else + return 1 + fi + + echo '<<>>' + for part in $(${MegaCli_bin} -EncInfo -aALL -NoLog >>' + ${MegaCli_bin} -LDInfo -Lall -aALL -NoLog >>' + ${MegaCli_bin} -AdpBbuCmd -GetBbuStatus -aALL -NoLog >>' + tw_cli "/${C}" show all | grep -E 'Model =|Firmware|Serial' + echo '<<<3ware_disks>>>' + tw_cli "/${C}" show drivestatus | grep -E 'p[0-9]' | sed "s/^/${C}\//" + echo '<<<3ware_units>>>' + tw_cli "/${C}" show unitstatus | grep -E 'u[0-9]' | sed "s/^/${C}\//" + done + fi +} + +section_areca_raid() { + if inpath cli64; then + _run_cached_internal "arc_raid_status" 300 300 900 600 "echo '<<>>'; cli64 rsf info | tail -n +3 | head -n -2" + fi +} + +section_vbox_guest() { + echo '<<>>' + if inpath VBoxControl && lsmod | grep vboxguest >/dev/null 2>&1; then + (VBoxControl -nologo guestproperty enumerate || echo "ERROR") | cut -d, -f1,2 + fi +} + +section_openvpn() { + if [ -e /etc/openvpn/openvpn-status.log ]; then + echo '<<>>' + sed -n -e '/CLIENT LIST/,/ROUTING TABLE/p' >>' + for var in GPUErrors GPUCoreTemp; do + DISPLAY=:0 waitmax 2 nvidia-settings -t -q ${var} | sed "s/^/${var}: /" + done + fi +} + +section_drbd() { + if [ -z "${IS_DOCKERIZED}" ] && [ -z "${IS_LXC_CONTAINER}" ] && [ -e /proc/drbd ]; then + echo '<<>>' + cat /proc/drbd + cat /sys/kernel/debug/drbd/resources/*/connections/*/0/proc_drbd 2>/dev/null + fi +} + +section_heartbeat() { + if [ -S /var/run/heartbeat/crm/cib_ro ] || [ -S /var/run/crm/cib_ro ] || pgrep "^(crmd|pacemaker-contr)$" >/dev/null 2>&1; then + echo '<<>>' + TZ=UTC crm_mon -1 -r | grep -v ^$ | sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g' + fi + + if inpath cl_status; then + echo '<<>>' + cl_status rscstatus + + echo '<<>>' + for NODE in $(cl_status listnodes); do + if [ "${NODE}" != "$(uname -n | tr '[:upper:]' '[:lower:]')" ]; then + STATUS=$(cl_status nodestatus "${NODE}") + printf "%s %s" "${NODE}" "${STATUS}" + for LINK in $(cl_status listhblinks "${NODE}" 2>/dev/null); do + printf " %s %s" "${LINK}" "$(cl_status hblinkstatus "${NODE}" "${LINK}")" + done + echo + fi + done + fi +} + +## Postfix mailqueue monitoring +## Determine the number of mails and their size in several postfix mail queues +read_postfix_queue_dirs() { + postfix_queue_dir=${1} + if [ -n "${postfix_queue_dir}" ]; then + echo '<<>>' + echo "[[[${2}]]]" + for queue in deferred active; do + count=$(find "${postfix_queue_dir}/${queue}" -type f | wc -l) + size=$(du -s "${postfix_queue_dir}/${queue}" | awk '{print $1 }') + if [ -z "${size}" ]; then + size=0 + fi + if [ -z "${count}" ]; then + echo "Mail queue is empty" + else + echo "QUEUE_${queue} ${size} ${count}" + fi + done + fi +} + +## Postfix status monitoring +read_postfix_master_pid() { + postfix_queue_dir=${1} + postfix_instance_name=${2} + echo "<<>>" + if [ -e "${postfix_queue_dir}/pid/master.pid" ]; then + if [ -r "${postfix_queue_dir}/pid/master.pid" ]; then + postfix_pid=$(sed 's/ //g' <"${postfix_queue_dir}/pid/master.pid") # handle possible spaces in output + if ps -p "${postfix_pid}" -o cmd= | grep -q ".*postfix/\(s\?bin/\)\?/\?master.*"; then + echo "${postfix_instance_name}:the Postfix mail system is running:PID:${postfix_pid}" + else + echo "${postfix_instance_name}:PID file exists but instance is not running!" + fi + else + echo "${postfix_instance_name}:PID file exists but is not readable" + fi + else + echo "${postfix_instance_name}:the Postfix mail system is not running" + fi +} + +## Postfix mailqueue monitoring +## Determine the number of mails and their size in several postfix mail queue +section_mailqueue() { + if inpath postconf; then + # Check if multi_instance_directories exists in main.cf and is not empty + # always takes the last entry, multiple entries possible + multi_instances_dirs=$(postconf -c /etc/postfix 2>/dev/null | grep ^multi_instance_directories | sed 's/.*=[[:space:]]*//g') + if [ -n "${multi_instances_dirs}" ]; then + for queue_dir in ${multi_instances_dirs}; do + if [ -n "${queue_dir}" ]; then + postfix_queue_dir=$(postconf -c "${queue_dir}" 2>/dev/null | grep ^queue_directory | sed 's/.*=[[:space:]]*//g') + read_postfix_queue_dirs "${postfix_queue_dir}" "${queue_dir}" + postfix_instance_name=$(postconf -c "${queue_dir}" -h multi_instance_name 2>/dev/null) + read_postfix_master_pid "${postfix_queue_dir}" "${postfix_instance_name}" + fi + done + fi + # Always check for the default queue. It can exist even if multiple instances are configured + read_postfix_queue_dirs "$(postconf -h queue_directory 2>/dev/null)" "default" + read_postfix_master_pid "$(postconf -h queue_directory 2>/dev/null)" "default" + + elif [ -x /usr/sbin/ssmtp ]; then + echo '<<>>' + mailq 2>&1 | sed 's/^[^:]*: \(.*\)/\1/' | tail -n 6 + fi + + # Check status of qmail mailqueue + if inpath qmail-qstat; then + echo "<<>>" + qmail-qstat + fi + + # Nullmailer queue monitoring + if inpath nullmailer-send; then + echo '<<>>' + if [ -d /var/spool/nullmailer/queue ]; then + COUNT=$(find /var/spool/nullmailer/queue -type f | wc -l) + SIZE=$(du -s /var/spool/nullmailer/queue | awk '{print $1 }') + echo "${SIZE} ${COUNT} deferred" + fi + if [ -d /var/spool/nullmailer/failed ]; then + COUNT=$(find /var/spool/nullmailer/failed -type f | wc -l) + SIZE=$(du -s /var/spool/nullmailer/failed | awk '{print $1 }') + echo "${SIZE} ${COUNT} failed" + fi + fi +} + +section_omd() { + if inpath omd; then + # 60 is _probably_ the agents polling interval. Why would you use that?? + _run_cached_internal "omd_status" 60 60 180 120 "echo '<<>>'; omd status --bare || true" + + ${MK_RUN_SYNC_PARTS} || return + + echo '<<>>' + get_epoch + for statefile in /omd/sites/*/var/log/mknotifyd.state; do + if [ -e "${statefile}" ]; then + site=${statefile%/var/log*} + site=${site#/omd/sites/} + echo "[${site}]" + grep -v '^#' <"${statefile}" + fi + done + + echo '<<>>' + for statsfile in /omd/sites/*/var/log/apache/stats; do + if [ -e "${statsfile}" ]; then + site=${statsfile%/var/log*} + site=${site#/omd/sites/} + echo "[${site}]" + cat "${statsfile}" + : >"${statsfile}" + # prevent next section to fail caused by a missing newline at the end of the statsfile + echo + fi + done + + _du_no_errors() { + if [ -e "${1}" ]; then + output=$(du -bs "$1") && printf "%s\n" "${output}" + else + printf "0 %s\n" "${1}" + fi + } + + echo '<<>>' + for sitedir in /omd/sites/*; do + site=${sitedir#/omd/sites/} + echo "[site ${site}]" + _du_no_errors "$sitedir" + _du_no_errors "$sitedir/var/log" + _du_no_errors "$sitedir/var/check_mk/rrd" + _du_no_errors "$sitedir/var/pnp4nagios/" + _du_no_errors "$sitedir/tmp/" + _du_no_errors "$sitedir/local/" + _du_no_errors "$sitedir/var/check_mk/agents/" + _du_no_errors "$sitedir/var/mkeventd/history/" + _du_no_errors "$sitedir/var/check_mk/core/" + _du_no_errors "$sitedir/var/check_mk/inventory_archive/" + done + + echo '<<>>' + echo '[versions]' + echo 'version;number;edition;demo' + for versiondir in /omd/versions/*; do + version=${versiondir#/omd/versions/} + + # filter out special directory 'default' + if [ "${version}" = "default" ]; then + continue + fi + + number=${version} + demo="0" + if [ "${version##*.}" = "demo" ]; then + number=${version%.demo} + demo="1" + fi + edition=${number##*.} + number=${number%.*} + echo "${version};${number};${edition};${demo}" + done + echo '[sites]' + echo 'site;used_version;autostart' + for sitedir in /omd/sites/*; do + site=${sitedir#/omd/sites/} + used_version=$(readlink "${sitedir}"/version) + used_version=${used_version##*/} + autostart="0" + if grep -q "CONFIG_AUTOSTART[[:blank:]]*=[[:blank:]]*'on'" "${sitedir}"/etc/omd/site.conf; then + autostart="1" + fi + echo "${site};${used_version};${autostart}" + done + fi +} + +section_zpool() { + if inpath zpool; then + echo "<<>>" + zpool status -x + echo "<<>>" + zpool list + fi +} + +section_veritas_cluster() { + if [ -x /opt/VRTSvcs/bin/haclus ]; then + echo "<<>>" + vcshost=$(hostname | cut -d. -f1) + waitmax -s 9 2 /opt/VRTSvcs/bin/haclus -display -localclus | grep -e ClusterName -e ClusState + waitmax -s 9 2 /opt/VRTSvcs/bin/hasys -display -attribute SysState + waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -sys "${vcshost}" -attribute State -localclus + waitmax -s 9 2 /opt/VRTSvcs/bin/hares -display -sys "${vcshost}" -attribute State -localclus + waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -attribute TFrozen -attribute Frozen + fi +} + +section_omd_core() { + ( + cd /omd/sites || return + + # The files within a site are site-user writable! Therefore we must not use them! + # The version files are only root writable so we can use them instead. + site_version() { + printf "%s" "$(realpath "${1}/version" | sed 's|.*/||')" + } + + site_cmd() { + # DO NOT ACCESS /omd/sites/${site}/bin/cmd directly. + # bin might point anywhere -> priv escalation. + printf "/omd/versions/%s/bin/%s" "$(site_version "${1}")" "${2}" + } + + site_lib() { + printf "/omd/versions/%s/lib" "$(site_version "${1}")" + } + + waitmax_for_unixcat_with_site_ld_library_path() { + LD_LIBRARY_PATH="$(site_lib "${2}"):${LD_LIBRARY_PATH}" waitmax "${1}" "$(site_cmd "${2}" unixcat)" "/omd/sites/${2}/tmp/run/${3}" + } + + echo '<<>>' + for site in *; do + if [ -S "/omd/sites/${site}/tmp/run/live" ]; then + echo "[${site}]" + echo "GET status" | + waitmax_for_unixcat_with_site_ld_library_path 3 "${site}" "live" + fi + done + + echo '<<>>' + for site in *; do + echo "[${site}]" + for PEM_PATH in "/omd/sites/${site}/etc/ssl/ca.pem" "/omd/sites/${site}/etc/ssl/sites/${site}.pem"; do + if [ -f "${PEM_PATH}" ]; then + CERT_DATE=$(openssl x509 -enddate -noout -in "${PEM_PATH}" | sed 's/notAfter=//') + echo "${PEM_PATH}|$(date --date="${CERT_DATE}" --utc +%s)" + fi + done + done + + echo '<<>>' + for site in *; do + if [ -S "/omd/sites/${site}/tmp/run/mkeventd/status" ]; then + echo "[\"${site}\"]" + (echo "GET status" && echo "OutputFormat: json") | + waitmax_for_unixcat_with_site_ld_library_path 3 "${site}" "mkeventd/status" + fi + done + + echo '<<>>' + for site in *; do + if [ -S "/omd/sites/${site}/tmp/run/live" ]; then + echo "[${site}]" + waitmax_for_unixcat_with_site_ld_library_path 5 "${site}" "live" < 0 +Filter: custom_variable_names < _REALNAME +LimitString + waitmax_for_unixcat_with_site_ld_library_path 5 "${site}" "live" < 0 +Stats: host_scheduled_downtime_depth > 0 +StatsOr: 2 +Stats: scheduled_downtime_depth = 0 +Stats: host_scheduled_downtime_depth = 0 +Stats: host_state != 0 +StatsAnd: 3 +Stats: state = 1 +Stats: scheduled_downtime_depth = 0 +Stats: host_scheduled_downtime_depth = 0 +Stats: host_state = 0 +Stats: host_has_been_checked = 1 +StatsAnd: 5 +Stats: state = 3 +Stats: scheduled_downtime_depth = 0 +Stats: host_scheduled_downtime_depth = 0 +Stats: host_state = 0 +Stats: host_has_been_checked = 1 +StatsAnd: 5 +Stats: state = 2 +Stats: scheduled_downtime_depth = 0 +Stats: host_scheduled_downtime_depth = 0 +Stats: host_state = 0 +Stats: host_has_been_checked = 1 +StatsAnd: 5 +Filter: host_custom_variable_names < _REALNAME +LimitString + fi + done + ) +} + +section_mkbackup() { + if ls /omd/sites/*/var/check_mk/backup/*.state >/dev/null 2>&1; then + echo "<<>>" + for F in /omd/sites/*/var/check_mk/backup/*.state; do + SITE=${F#/*/*/*} + SITE=${SITE%%/*} + + JOB_IDENT=${F%.state} + JOB_IDENT=${JOB_IDENT##*/} + + if [ "${JOB_IDENT}" != "restore" ]; then + echo "[[[site:${SITE}:${JOB_IDENT}]]]" + cat "${F}" + echo + fi + done + fi + + # Collect states of configured CMA backup jobs + if inpath mkbackup && ls /var/lib/mkbackup/*.state >/dev/null 2>&1; then + echo "<<>>" + for F in /var/lib/mkbackup/*.state; do + JOB_IDENT=${F%.state} + JOB_IDENT=${JOB_IDENT##*/} + + if [ "${JOB_IDENT}" != "restore" ]; then + echo "[[[system:${JOB_IDENT}]]]" + cat "${F}" + echo + fi + done + fi +} + +section_thermal() { + if [ -z "${IS_DOCKERIZED}" ] && [ -z "${IS_LXC_CONTAINER}" ] && ls /sys/class/thermal/thermal_zone* >/dev/null 2>&1; then + echo '<<>>' + for F in /sys/class/thermal/thermal_zone*; do + line="${F##*/}" + if [ ! -e "${F}/mode" ]; then + line="${line}|-" + else + line="${line}|$(cat "${F}"/mode)" + fi + + line="${line}|$(cat "${F}/type")|$(cat "${F}/temp")" + + for G in "${F}"/trip_point_*_temp; do + line="${line}|$(cat "$G")|$(cat "${G/%temp/type}")" + done + + echo "$line" + done + fi +} + +section_libelle() { + if inpath trd; then + echo "<<>>" + trd -s + fi +} + +section_http_accelerator() { + if inpath varnishstat; then + echo "<<>>" + varnishstat -1 + fi +} + +section_proxmox() { + if inpath pvecm; then + echo "<<>>" + pvecm status + echo "<<>>" + pvecm nodes + fi +} + +section_haproxy() { + for HAPROXY_SOCK in /run/haproxy/admin.sock /var/lib/haproxy/stats; do + if [ -r "${HAPROXY_SOCK}" ] && inpath socat; then + echo "<<>>" + echo "show stat" | socat - "UNIX-CONNECT:${HAPROXY_SOCK}" + fi + done +} + +# +# BEGIN COMMON AGENT CODE +# + +section_job() { + # Get statistics about monitored jobs. + + _cat_files() { + # read file names from stdin and write like `head -n -0 -v file` + while read -r file; do + printf "==> %s <==\n" "${file##./}" + cat "${file}" + done + } + + ( + cd "${MK_VARDIR}/job" 2>/dev/null || return + printf "<<>>\n" + for user in *; do + ( + cd "${user}" 2>/dev/null || return # return from subshell only + # This folder is owned (and thus writable) by the user that ran the jobs. + # The agent (root) must not read files that are not owned by the user. + # This prevents symlink or hardlink attacks. + find -L . -type f -user "${user}" | _cat_files + ) + done + ) +} + +section_fileinfo() { + # fileinfo check: put patterns for files into /etc/check_mk/fileinfo.cfg + perl -e ' + use File::Glob "bsd_glob"; + my @patterns = (); + foreach (bsd_glob("$ARGV[0]/fileinfo.cfg"), bsd_glob("$ARGV[0]/fileinfo.d/*")) { + open my $handle, "<", $_ or next; + while (<$handle>) { + chomp; + next if /^\s*(#|$)/; + my $pattern = $_; + $pattern =~ s/\$DATE:(.*?)\$/substr(`date +"$1"`, 0, -1)/eg; + push @patterns, $pattern; + } + warn "error while reading $_: $!\n" if $!; + close $handle; + } + exit if ! @patterns; + + my $file_stats = ""; + foreach (@patterns) { + foreach (bsd_glob("$_")) { + if (! -f) { + $file_stats .= "$_|missing\n" if ! -d; + } elsif (my @infos = stat) { + $file_stats .= "$_|ok|$infos[7]|$infos[9]\n"; + } else { + $file_stats .= "$_|stat failed: $!\n"; + } + } + } + + print "<<>>\n", time, "\n[[[header]]]\nname|status|size|time\n[[[content]]]\n$file_stats"; + ' -- "${MK_CONFDIR}" +} + +# +# END COMMON AGENT CODE +# + +# ntpq helper function + +get_ntpq() { + inpath ntpq || return 1 + _run_cached_internal "ntp" 30 120 200 20 "echo '<<>>'; waitmax 5 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' || true" +} + +section_timesyncd() { + if [ -n "${IS_DOCKERIZED}" ] || [ -n "${IS_LXC_CONTAINER}" ]; then + return 0 + fi + inpath systemctl || return 1 + inpath timedatectl || return 1 + systemctl is-enabled systemd-timesyncd.service >/dev/null 2>&1 || return 1 + # debian 10.8 uses ConditionFileIsExecutable to "disable" systemd-timedatectl when ntp is installed. + # The service is still enabled, but does not start timesyncd as the condition is not met. + (inpath ntpd || inpath openntpd || inpath chronyd || inpath VBoxService) && return 1 # we check the same condition as the systemd condition + timedatectl timesync-status >/dev/null 2>&1 || return 1 + + ${MK_RUN_SYNC_PARTS} || return 0 + + echo "<<>>" + timedatectl timesync-status + # /run/systemd/timesync/synchronized is a more reliable/the correct file to look at for > systemd v250 + if [ -f "/run/systemd/timesync/synchronized" ]; then + get_file_mtime /run/systemd/timesync/synchronized | awk '{print "[[["$1"]]]"}' + else + get_file_mtime /var/lib/systemd/timesync/clock | awk '{print "[[["$1"]]]"}' + fi + + echo "<<>>" + timedatectl show-timesync | awk '/NTPMessage/{print $0}' + timedatectl show | awk '/Timezone/{print $0}' + return 0 # intended not to execute section_ntp even in the case where get_file_mtime fails +} + +section_ntp() { + if [ -n "${IS_DOCKERIZED}" ] || [ -n "${IS_LXC_CONTAINER}" ]; then + return 0 + fi + # First we try to identify if we're beholden to systemd + if inpath systemctl; then + # shellcheck disable=SC2016 + if [ "$(systemctl | awk '/ntp.service|ntpd.service|ntpsec.service/{print $3; exit}')" = "active" ]; then + # remove heading, make first column space separated + get_ntpq + return + fi + fi + + # If we get to this point, we attempt via classic ntp daemons (ntpq required) + if inpath ntpq; then + # Try to determine status via /etc/init.d + # This might also be appropriate for AIX, Solaris and others + for _ntp_daemon in ntp ntpd openntpd; do + # Check for a service script + if [ -x /etc/init.d/"${_ntp_daemon}" ]; then + # If the status returns 0, we assume we have a running service + if /etc/init.d/"${_ntp_daemon}" status >/dev/null 2>&1; then + get_ntpq + return + fi + fi + done + unset -v _ntp_daemon + + # For other systems such as Slackware + if [ -x "/etc/rc.d/rc.ntpd" ]; then + get_ntpq + return + fi + fi +} + +run_real_time_checks() { + RTC_PLUGINS="" + # shellcheck source=agents/cfg_examples/real_time_checks.cfg + . "${MK_CONFDIR}/real_time_checks.cfg" 2>/dev/null || return + + if [ -z "${RTC_SECRET}" ]; then + define_optionally_encrypt "no" + else + inpath openssl || { + echo "ERROR: openssl command is missing, but encryption is requested. Not sending real-time data." >&2 + return + } + define_optionally_encrypt "yes" + fi + + for trigger in "${MK_VARDIR}/rtc_remotes/"?*; do + # no such file => no expansion of ?* => nothing to do + # braces are needed so run_real_time_checks_for_remote can be forked away + # otherwise async execution of check plugins is held off by activated realtime checks. + [ -e "${trigger}" ] && { run_real_time_checks_for_remote "${trigger}" "${RTC_SECRET}" >/dev/null & } + done +} + +_rt_pidfile_is_mine() { + [ "$(cat "${1}" 2>/dev/null)" = "$$" ] +} + +_rt_pidfile_is_alive() { + [ "$(("$(get_epoch)" - "$(get_file_atime "${1}")"))" -le "${RTC_TIMEOUT}" ] +} + +_rt_timestamp() { + get_epoch | tr -d '\n' +} + +_rt_sendudp() { + # concatenate the output of all commands to a single udp packet + dd bs=9999 iflag=fullblock 2>/dev/null >"/dev/udp/${1}/${2}" +} + +# Implements Real-Time Check feature of the Checkmk agent which can send +# some section data in 1 second resolution. Useful for fast notifications and +# detailed graphing (if you configure your RRDs to this resolution). +# 2 bytes: protocol version +# 10 bytes: timestamp +# rest: encrypted data +# Be aware of maximum packet size. +run_real_time_checks_for_remote() { + pidfile="${1}" + secret="${2}" + remote="${pidfile##*/rtc_remotes/}" + + # have I already started for this remote? + _rt_pidfile_is_mine "${pidfile}" && return + + echo $$ >"${pidfile}" + + while true; do + _rt_pidfile_is_mine "${pidfile}" || return + _rt_pidfile_is_alive "${pidfile}" || { + rm "${pidfile}" + return + } + + for section in ${RTC_SECTIONS}; do + section_"${section}" | + optionally_encrypt "${secret}" "$(_rt_timestamp)" | + _rt_sendudp "${remote}" "${RTC_PORT}" + done + + # Plugins + cd "${PLUGINSDIR}" || continue + + for script in ${RTC_PLUGINS}; do + is_valid_plugin "${script}" || continue + plugin_interpreter=$(get_plugin_interpreter "${script}") || continue + + "${plugin_interpreter}" "${script}" | + optionally_encrypt "${secret}" "$(_rt_timestamp)" | + _rt_sendudp "${remote}" "${RTC_PORT}" + done + + sleep 1 + done +} + +# +# BEGIN COMMON AGENT CODE +# + +run_cached() { + # Compatibility wrapper for plugins that might use run_cached. + # We should have never exposed this as quasi API. + NAME="${1}" + MAXAGE="${2}" + REFRESH_INTERVAL="${3}" + shift 3 + + OUTPUT_TIMEOUT=$((MAXAGE * 3)) + CREATION_TIMEOUT=$((MAXAGE * 2)) + + _run_cached_internal "${NAME}" "${REFRESH_INTERVAL}" "${MAXAGE}" "${OUTPUT_TIMEOUT}" "${CREATION_TIMEOUT}" "$@" +} + +_run_cached_internal() { + # Run a command asynchronous by use of a cache file. + # Usage: _run_cached_internal NAME REFRESH_INTERVAL MAXAGE OUTPUT_TIMEOUT OUTPUT_TIMEOUT CREATION_TIMEOUT [COMMAND ...] + # Note that while multiple COMMAND arguments are considered, they are evaluated in a string. + # This means that extra escaping is required. + # For example: + # To run a cat command every two minutes, considering the created data valid for one three minutes, + # send the created data for four minutes and allowing the command to run for 12 minutes, you'll have to call + # + # _run_cached_interal "my_file_content" 120 180 240 720 "cat \"My File\"" + # + # Mind the escaping... + + NAME="${1}" + # name of the section (also used as cache file name) + + REFRESH_INTERVAL="${2}" + # threshold in seconds when the cache file needs to be regenerated + + MAXAGE="${3}" + # maximum cache livetime in seconds + + OUTPUT_TIMEOUT="${4}" + # threshold in seconds for how long the cache file will be output (regardless of whether it is outdated) + + CREATION_TIMEOUT="${5}" + # threshold in seconds for how long the process is allowed to be running before it is killed (see below for details) + + shift 5 + # $* is now the command to run + + if ${DISABLE_CACHING:-false}; then + # We need the re-splitting to be compatible with the caching case, so: + # shellcheck disable=SC2068 + $@ + return + fi + + [ -d "${MK_VARDIR}/cache" ] || mkdir -p "${MK_VARDIR}/cache" + CACHEFILE="${MK_VARDIR}/cache/${NAME}.cache" + FAIL_REPORT_FILE="${SPOOLDIR}/${NAME}.cachefail" + + NOW="$(get_epoch)" + MTIME="$(get_file_mtime "${CACHEFILE}" 2>/dev/null)" || MTIME=0 + + if ${MK_RUN_SYNC_PARTS}; then + if [ -s "${CACHEFILE}" ] && [ $((NOW - MTIME)) -le "${OUTPUT_TIMEOUT}" ]; then + # Output the file (if it is not too outdated) + CACHE_INFO="cached(${MTIME},${MAXAGE})" + # prefix or insert cache info, unless already present. + # WATCH OUT: AIX does not allow us to pass this as a single '-e' option! + if [ "${NAME%%_*}" = "local" ] || [ "${NAME%%_*}" = "mrpe" ]; then + sed -e '/^<<<.*>>>/{p;d;}' -e '/^cached([0-9]*,[0-9]*) /{p;d;}' -e "s/^/${CACHE_INFO} /" "${CACHEFILE}" + else + sed -e '/^<<<.*\(:cached(\).*>>>/{p;d;}' -e 's/^<<<\([^>]*\)>>>$/<<<\1:'"${CACHE_INFO}"'>>>/' "${CACHEFILE}" + fi + fi + + fi + + if ${MK_RUN_ASYNC_PARTS}; then + # Kill the process if it is running too long (cache file not accessed for more than CREATION_TIMEOUT seconds). + # If killing succeeds, remove CACHFILE.new.PID. + # Write info about the timed out process and the kill attempt to the SPOOLDIR. + # It will be reported to the server in the next (synchronous) agent execution. + # The file will be deleted as soon as the plugin/local check is functional again. + # Do not output the file here, it will interrupt the local and mrpe sections, as well as any other + # partially cached section. + for cfile in "${CACHEFILE}.new."*; do + [ -e "${cfile}" ] || break # no match + TRYING_SINCE="$(get_file_atime "${cfile}")" + [ -n "${TRYING_SINCE}" ] || break # race condition: file vanished + if [ $((NOW - TRYING_SINCE)) -ge "${CREATION_TIMEOUT}" ]; then + { + printf "<<>>\n" + pid="${cfile##*.new.}" + printf "timeout|%s|%s|%s\n" "${NAME}" "${CREATION_TIMEOUT}" "${pid}" + kill -9 "${pid}" >/dev/null 2>&1 && sleep 2 # TODO: what about child processes? + if [ -n "$(ps -o args= -p "${pid}")" ]; then + printf "killfailed|%s|%s|%s\n" "${NAME}" "${CREATION_TIMEOUT}" "${pid}" + else + rm -f "${cfile}" + fi + } >"${FAIL_REPORT_FILE}" 2>&1 + fi + done + + # This does the right thing, regardless whether the pattern matches! + _cfile_in_use() { + for cfile in "${CACHEFILE}.new."*; do + printf "%s\n" "${cfile}" + break + done + } + + # Time to refresh cache file and new job not yet running? + if [ $((NOW - MTIME)) -gt "${REFRESH_INTERVAL}" ] && [ ! -e "$(_cfile_in_use)" ]; then + # Start it. If the command fails the output is thrown away + cat </dev/null 2>&1 & +eval '${MK_DEFINE_LOG_SECTION_TIME}' +exec > "${CACHEFILE}.new.\$\$" || exit 1 +$* && mv -f "${CACHEFILE}.new.\$\$" "${CACHEFILE}" && rm -f "${FAIL_REPORT_FILE}" || rm -f "${CACHEFILE}.new.\$\$" +HERE + fi + + fi + + unset NAME MAXAGE CREATION_TIMEOUT REFRESH_INTERVAL CACHEFILE NOW MTIME CACHE_INFO TRYING_SINCE OUTPUT_TIMEOUT +} + +run_local_checks() { + cd "${LOCALDIR}" || return + + if ${MK_RUN_SYNC_PARTS}; then + echo '<<>>' + for script in ./*; do + if is_valid_plugin "${script}"; then + _log_section_time "${script}" + fi + done + fi + + # Call some local checks only every X'th second + for script in [1-9]*/*; do + if is_valid_plugin "${script}"; then + interval="${script%/*}" + _run_cached_internal "local_${script##*/}" "${interval}" "${interval}" $((interval * 3)) $((interval * 2)) "_log_section_time '${script}'" + fi + done +} + +run_spooler() { + ( + cd "${SPOOLDIR}" 2>/dev/null || return + + now=$(get_epoch) + + for file in *; do + [ "${file}" != "*" ] || return + + # If prefixed with a number, then that is the maximum age in seconds. + # If the file is older than that, it is ignored. + maxage="${file%%[^0-9]*}" + if [ "${maxage}" ]; then + mtime=$(get_file_mtime "${file}") + [ $((now - mtime)) -le "${maxage}" ] || continue + fi + + cat "${file}" + done + ) +} + +get_plugin_interpreter() { + # Return the interpreter (or "") for the plugin file (or fail). + # We return the interpreter instead of wrapping the call, so we don't + # have to export the function (which is not portable). + + # normalize input + agent_plugin="${1#./}" + + extension="${agent_plugin##*.}" + filename="${agent_plugin%.*}" + + # Execute all non python plugins with ./foo + if [ "${extension}" != "py" ]; then + return 0 + fi + + if [ "${filename#"${filename%??}"}" != "_2" ]; then + if [ -n "${NO_PYTHON}" ] || [ -n "${WRONG_PYTHON_COMMAND}" ]; then + section_checkmk_failed_plugin "${agent_plugin}" + return 1 + fi + + if [ -n "${PYTHON3}" ]; then + echo "${PYTHON3}" + return 0 + fi + + if [ ! -e "${filename}_2.py" ]; then + section_checkmk_failed_plugin "${agent_plugin} (Missing Python 3 installation)" + return 1 + fi + + # no python3 found, but python2 plugin file present + return 1 + fi + + if [ -x "${filename%??}.py" ] && [ -n "${PYTHON3}" ]; then + return 1 + fi + + if [ -n "${PYTHON2}" ]; then + echo "${PYTHON2}" + return 0 + fi + + section_checkmk_failed_plugin "${agent_plugin} (missing Python 2 installation)" + return 1 +} + +run_plugins() { + cd "${PLUGINSDIR}" || return + + if ${MK_RUN_SYNC_PARTS}; then + for script in ./*; do + if is_valid_plugin "${script}"; then + if plugin_interpreter=$(get_plugin_interpreter "${script}"); then + # SC2086: We don't want to quote, interpreter is "nothing" if empty, not "''" + # shellcheck disable=SC2086 + _log_section_time ${plugin_interpreter} "${script}" + fi + fi + done + fi + + # Call some plugins only every X'th second + for script in [1-9]*/*; do + if is_valid_plugin "${script}"; then + if plugin_interpreter=$(get_plugin_interpreter "${script}"); then + interval="${script%/*}" + # shellcheck disable=SC2086 + _run_cached_internal "plugins_${script##*/}" "${interval}" "${interval}" $((interval * 3)) $((interval * 2)) _log_section_time ${plugin_interpreter} "${script}" + fi + fi + done +} + +_non_comment_lines() { + grep -Ev '^[[:space:]]*($|#)' "${1}" +} + +_mrpe_get_interval() { + echo "${1}" | grep -E '^\([^)]*\)' | sed -n 's/^.*interval=\([^:)]*\).*$/\1/p' +} + +_mrpe_normalize_spaces() { + # watch out: + # * [:blank:] does not include \t on AIX + # * [:space:] does include \n on Linux + tr -s '\t' ' ' +} + +run_remote_plugins() { + configfile="${1}" + prefix="${2}" + [ -f "${configfile}" ] || return + + _non_comment_lines "${configfile}" | _mrpe_normalize_spaces | while read -r descr rest; do + interval="$(_mrpe_get_interval "${rest}")" + cmdline="${rest#\(*\) }" + + if [ -n "${prefix}" ]; then + cmdline="${prefix} '${cmdline}'" + fi + + if [ -z "${interval}" ]; then + ${MK_RUN_SYNC_PARTS} && run_mrpe "${descr}" "${cmdline}" + else + # Sourcing the agent here is not very performant, but we need 'run_mrpe', and not all shells support exporting of functions. + _run_cached_internal "mrpe_${descr}" "${interval}" "${interval}" $((interval * 3)) $((interval * 2)) "MK_SOURCE_AGENT=yes . '${0}'; run_mrpe \"${descr}\" \"${cmdline}\"" + fi + + done +} + +run_mrpe() { + descr="${1}" + shift + + PLUGIN="${1%% *}" + OUTPUT="$(eval "${MK_DEFINE_LOG_SECTION_TIME}; _log_section_time $*")" + STATUS="$?" + + printf "<<>>\n" + printf "(%s) %s %s %s" "${PLUGIN##*/}" "${descr}" "${STATUS}" "${OUTPUT}" | tr \\n \\1 + printf "\n" + + unset descr PLUGIN OUTPUT STATUS +} + +# +# END COMMON AGENT CODE +# + +run_runas_executor() { + [ -f "${MK_CONFDIR}/runas.cfg" ] || return + + _non_comment_lines "${MK_CONFDIR}/runas.cfg" | while read -r type user configfile; do + prefix="" + if [ "${user}" != "-" ]; then + prefix="su ${user} -c" + fi + + # mrpe includes + if [ "${type}" = "mrpe" ]; then + run_remote_plugins "${configfile}" "${prefix}" + + # local and plugin includes + elif [ "${type}" = "local" ] || [ "${type}" = "plugin" ]; then + if ${MK_RUN_SYNC_PARTS}; then + if [ "${type}" = "local" ]; then + echo "<<>>" + fi + + find "${configfile}" -executable -type f | + while read -r filename; do + if [ -n "${prefix}" ]; then + # SC2086: We don't want to quote since prefix should not be treated as a + # single string but as multiple arguments passed to _log_section_time + # shellcheck disable=SC2086 + _log_section_time ${prefix} "${filename}" + else + _log_section_time "${filename}" + fi + done + fi + fi + done +} + +run_purely_synchronous_sections() { + _log_section_time section_checkmk + + _log_section_time section_cmk_agent_ctl_status + + [ -z "${MK_SKIP_CHECKMK_AGENT_PLUGINS}" ] && _log_section_time section_checkmk_agent_plugins + + [ -z "${MK_SKIP_LABELS}" ] && _log_section_time section_labels + + [ -z "${MK_SKIP_DF}" ] && _log_section_time section_df + + [ -z "${MK_SKIP_SYSTEMD}" ] && _log_section_time section_systemd + + # Filesystem usage for ZFS + [ -z "${MK_SKIP_ZFS}" ] && _log_section_time section_zfs + + # Check NFS mounts by accessing them with stat -f (System + # call statfs()). If this lasts more then 2 seconds we + # consider it as hanging. We need waitmax. + [ -z "${MK_SKIP_NFS_MOUNTS}" ] && _log_section_time section_nfs_mounts "/proc/mounts" + + # Check mount options. Filesystems may switch to 'ro' in case + # of a read error. + [ -z "${MK_SKIP_MOUNTS}" ] && _log_section_time section_mounts + + [ -z "${MK_SKIP_PS}" ] && _log_section_time section_ps + + # Memory usage + [ -z "${MK_SKIP_MEM}" ] && _log_section_time section_mem + + # Load and number of processes + [ -z "${MK_SKIP_CPU}" ] && _log_section_time section_cpu + + # Uptime + [ -z "${MK_SKIP_UPTIME}" ] && _log_section_time section_uptime + + # New variant: Information about speed and state in one section + [ -z "${MK_SKIP_LNX_IF}" ] && _log_section_time section_lnx_if + + # Current state of bonding interfaces + [ -z "${MK_SKIP_BONDING_IF}" ] && _log_section_time section_bonding_interfaces + + # Same for Open vSwitch bonding + [ -z "${MK_SKIP_VSWITCH_BONDING}" ] && _log_section_time section_vswitch_bonding + + # Number of TCP connections in the various states + [ -z "${MK_SKIP_TCP}" ] && _log_section_time section_tcp + + # Linux Multipathing + [ -z "${MK_SKIP_MULTIPATHING}" ] && _log_section_time section_multipathing + + # Performancecounter Platten + [ -z "${MK_SKIP_DISKSTAT}" ] && _log_section_time section_diskstat + + # Performancecounter Kernel + [ -z "${MK_SKIP_KERNEL}" ] && _log_section_time section_kernel + + # RAID status of Linux software RAID + [ -z "${MK_SKIP_MD}" ] && _log_section_time section_md + + # RAID status of Linux RAID via device mapper + [ -z "${MK_SKIP_DM_RAID}" ] && _log_section_time section_dm_raid + + # RAID status of LSI controllers via cfggen + [ -z "${MK_SKIP_CFGGEN}" ] && _log_section_time section_cfggen + + # RAID status of LSI MegaRAID controller via StorCLI or MegaCli. You can download that tool from: + # https://docs.broadcom.com/docs/007.2007.0000.0000_Unified_StorCLI.zip + [ -z "${MK_SKIP_MEGARAID}" ] && _log_section_time section_megaraid + + # RAID status of 3WARE disk controller (by Radoslaw Bak) + [ -z "${MK_SKIP_THREE_WARE_RAID}" ] && _log_section_time section_3ware_raid + + # VirtualBox Guests. Section must always been output. Otherwise the + # check would not be executed in case no guest additions are installed. + # And that is something the check wants to detect + [ -z "${MK_SKIP_VBOX_GUEST}" ] && _log_section_time section_vbox_guest + + # OpenVPN Clients. Currently we assume that the configuration # is in + # /etc/openvpn. We might find a safer way to find the configuration later. + [ -z "${MK_SKIP_OPENVPN}" ] && _log_section_time section_openvpn + + [ -z "${MK_SKIP_NVIDIA}" ] && _log_section_time section_nvidia + + [ -z "${MK_SKIP_DRBD}" ] && _log_section_time section_drbd + + # Heartbeat monitoring + # Different handling for heartbeat clusters with and without CRM + # for the resource state + [ -z "${MK_SKIP_HEARTBEAT}" ] && _log_section_time section_heartbeat + + [ -z "${MK_SKIP_MAILQUEUE}" ] && _log_section_time section_mailqueue + + ## Check status of OMD sites and Checkmk Notification spooler + # Welcome the ZFS check on Linux + # We do not endorse running ZFS on linux if your vendor doesnt support it ;) + # check zpool status + [ -z "${MK_SKIP_ZPOOL}" ] && _log_section_time section_zpool + + # Veritas Cluster Server + # Software is always installed in /opt/VRTSvcs. + # Secure mode must be off to allow root to execute commands + [ -z "${MK_SKIP_VERITAS}" ] && _log_section_time section_veritas_cluster + + ## Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg + [ -z "${MK_SKIP_FILEINFO}" ] && _log_section_time section_fileinfo + + # Get stats about OMD monitoring cores running on this machine. + # Since cd is a shell builtin the check does not affect the performance + # on non-OMD machines. + [ -z "${MK_SKIP_OMD_CORES}" ] && _log_section_time section_omd_core + + # Collect states of configured Checkmk site backup jobs + _log_section_time section_mkbackup + + # Get statistics about monitored jobs. Below the job directory there + # is a sub directory per user that ran a job. That directory must be + # owned by the user so that a symlink or hardlink attack for reading + # arbitrary files can be avoided. + [ -z "${MK_SKIP_JOB}" ] && _log_section_time section_job + + # Gather thermal information provided e.g. by acpi + # At the moment only supporting thermal sensors + [ -z "${MK_SKIP_THERMAL}" ] && _log_section_time section_thermal + + # Libelle Business Shadow + [ -z "${MK_SKIP_LIBELLE}" ] && _log_section_time section_libelle + + # HTTP Accelerator Statistics + [ -z "${MK_SKIP_HTTP_ACCELERATOR}" ] && _log_section_time section_http_accelerator + + # Proxmox Cluster + [ -z "${MK_SKIP_PROXMOX}" ] && _log_section_time section_proxmox + + [ -z "${MK_SKIP_HAPROXY}" ] && _log_section_time section_haproxy +} + +run_partially_asynchronous_sections() { + # Time synchronization with Chrony + [ -z "${MK_SKIP_CHRONY}" ] && _log_section_time section_chrony + + # Hardware sensors via IPMI (need ipmitool) + [ -z "${MK_SKIP_IPMITOOL}" ] && _log_section_time section_ipmitool + + # IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you + # have installed freeipmi that IPMI is really support by your hardware. + [ -z "${MK_SKIP_IPMISENSORS}" ] && _log_section_time section_ipmisensors + + # RAID controllers from areca (Taiwan) + # cli64 can be found at ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/CLI/ + [ -z "${MK_SKIP_ARECA}" ] && _log_section_time section_areca_raid + + ## Check status of OMD sites and Checkmk Notification spooler + [ -z "${MK_SKIP_OMD}" ] && _log_section_time section_omd + + if [ -z "${MK_SKIP_TIMESYNCHRONISATION}" ]; then + _log_section_time section_timesyncd || _log_section_time section_ntp + fi +} + +main_setup() { + + set_up_remote + + # close stdin + exec /dev/null + fi + + set_up_get_epoch + + set_up_current_shell + + set_variable_defaults + + announce_remote # needs MK_VARDIR! + + PATH="$(set_up_path "${PATH}")" + + set_up_profiling + unset_locale + detect_python + detect_container_environment + set_up_encryption + set_up_disabled_sections + + export_utility_functions +} + +main_sync_parts() { + + run_purely_synchronous_sections + + _log_section_time run_spooler + +} + +main_mixed_parts() { + + run_partially_asynchronous_sections + + run_remote_plugins "${MK_CONFDIR}/mrpe.cfg" "" + + run_runas_executor + + run_local_checks + + run_plugins +} + +main_async_parts() { + # Start new liveupdate process in background + # Starting a new live update process will terminate the old one automatically after + # max. 1 sec. + run_real_time_checks +} + +main_finalize_sync() { + finalize_profiling +} + +# +# BEGIN COMMON AGENT CODE +# + +main() { + + while true; do + + main_setup "$@" + + ( + + ${MK_RUN_SYNC_PARTS} && main_sync_parts + + (${MK_RUN_ASYNC_PARTS} || ${MK_RUN_SYNC_PARTS}) && main_mixed_parts + + ${MK_RUN_ASYNC_PARTS} && main_async_parts + + ${MK_RUN_SYNC_PARTS} && main_finalize_sync + + ) | { if ${MK_RUN_SYNC_PARTS}; then optionally_encrypt "${PASSPHRASE}" ""; else cat; fi; } + + [ "${MK_LOOP_INTERVAL}" -gt 0 ] 2>/dev/null || return 0 + + sleep "${MK_LOOP_INTERVAL}" + + done + +} + +[ -z "${MK_SOURCE_AGENT}" ] && main "$@" diff --git a/checkmk-agent/files/sshwrapper b/checkmk-agent/files/sshwrapper new file mode 100644 index 0000000..f543770 --- /dev/null +++ b/checkmk-agent/files/sshwrapper @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +# The agent does not need stdin +exec