由于公司業(yè)務(wù)服務(wù)器分布比較廣,如果按照之前的監(jiān)控架構(gòu)的話,就是每個(gè)IDC增加一個(gè)nagios監(jiān)控,想來這樣子也有好處,可以互相監(jiān)控,但是由于每個(gè)IDC部署一個(gè)nagios,無疑增加了監(jiān)控人員的查看難度,所以就研究了一下分布式nagios監(jiān)控。
一,分角色
監(jiān)控中心服務(wù)器,分布式服務(wù)器,被監(jiān)控服務(wù)器
監(jiān)控中心服務(wù)器:通過NSCA獲取分布式監(jiān)控服務(wù)器的相關(guān)狀態(tài),呈現(xiàn)相關(guān)服務(wù)器狀態(tài)和發(fā)出報(bào)警等;
分布式服務(wù)器:通過對被監(jiān)控服務(wù)器狀態(tài)采集并且把被監(jiān)控服務(wù)器的狀態(tài)通過NSCA_send發(fā)送給監(jiān)控中心服務(wù)器。
被監(jiān)控服務(wù)器:被監(jiān)控服務(wù)器就是生產(chǎn)環(huán)境服務(wù)器。
二,詳細(xì)部署
1,被監(jiān)控服務(wù)器
tar -zxvf nagios-plugins-1.4.15.tar.gz
cd nagios-plugins-1.4.15
./configure
make
make install
chown nagios.nagios /usr/local/nagios
chown nagios.nagios /usr/local/nagios
cd ..
ls
tar -zxvf nrpe-2.12.tar.gz
pwd
ls
cd nrpe-2.12
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
vi /usr/local/nagios/etc/nrpe.cfg
將allowed_hosts=127.0.0.1
修改成你的nagios分布式服務(wù)器的ip
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d #啟動(dòng)nrpe
netstat -anl|grep 5666 #測試監(jiān)聽端口
2,安裝分布式服務(wù)器
useradd nagios
passwd nagios
groupadd nagcmd
usermod -G nagcmd nagios
usermod -G nagcmd apache #創(chuàng)建Nagios用戶 創(chuàng)建組 把用戶加入組 并加入apche
tar -zxvf nagios-3.2.3.tar.gz
cd nagios-3.2.3
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
tar -zxvf nagios-plugins-1.4.15.tar.gz
cd nagios-plugins-1.4.15
./configure --with-nagios-user=nagios --with-nagios-group=nagcmd
make
make install
chkconfig --add nagios
chkconfig nagios on
tar -zxvf nrpe-2.12.tar.gz
cd nrpe-2.12
./configure
make all
make install-plugin
/usr/local/nagios/libexec/check_nrpe -H 192.168.20.100 #測試被監(jiān)控服務(wù)器是否連通,正常情況下會返回被監(jiān)控端的NRPE版本
vi /usr/local/nagios/etc/objects/commands.cfg
#check nrpe
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
} #添加nrpe外部檢測命令
tar -zxvf nsca-2.7.2.tar.gz
cd nsca-2.7.2
./configure
make all
cp sample-config/send_nsca.cfg /usr/local/nagios/etc/
cd /usr/local/nagios/etc/
chown nagios.nagios send_nsca.cfg
cp src/send_nsca /usr/local/nagios/bin/
cd /usr/local/nagios/bin/
chown nagios.nagios send_nsca
vi /usr/local/nagios/libexec/submit_check_result #創(chuàng)建腳本
#!/bin/sh
# Arguments:
# $1 = host_name (Short name of host that the service is
# associated with)
# $2 = svc_description (Description of the service)
# $3 = state_string (A string representing the status of
# the given service - "OK", "WARNING", "CRITICAL"
# or "UNKNOWN")
# $4 = plugin_output (A text string that should be used
# as the plugin output for the service checks)
#
# Convert the state string to the corresponding return code
return_code=-1
case "$3" in
OK)
return_code=0
;;
WARNING)
return_code=1
;;
CRITICAL)
return_code=2
;;
UNKNOWN)
return_code=-1
;;
esac
# pipe the service check info into the send_nsca program, which
# in turn transmits the data to the nsca daemon on the central
# monitoring server
/bin/printf "%s\t%s\t%s\t%s\n" "$1" "$2" "$return_code" "$4" | /usr/local/nagios/bin/send_nsca 192.168.20.195 -c /usr/local/nagios/etc/send_nsca.cfg #此處的IP地址為監(jiān)控中心服務(wù)器。
chmod +x /usr/local/nagios/libexec/submit_check_result
chown nagios.nagios /usr/local/nagios/libexec/submit_check_result
vi /usr/local/nagios/etc/objects/commands.cfg #增加如下檢測命令
define command{
command_name submit_check_result
command_line /usr/local/nagios/libexec/submit_check_result $HOSTNAME$ '$SERVICEDESC$' $SERVICESTATE$ '$SERVICEOUTPUT$'
}
vi /usr/local/nagios/etc/nagios.cfg
enable_notifications=0 #禁用告警
obsess_over_services=1 #開啟被動(dòng)監(jiān)控
ocsp_command=submit_check_result #定義每次執(zhí)行完檢查后執(zhí)行的命令
obsess_over_hosts=1 #開啟主機(jī)被動(dòng)監(jiān)控
ochp_command=submit_check_result #指定每次執(zhí)行完主機(jī)檢查后執(zhí)行的命令
vi /usr/local/nagios/etc/send_nsca.cfg
password=urgamer #設(shè)置密碼,此處設(shè)置的密碼要和監(jiān)控中心服務(wù)器一致
配置被監(jiān)控的服務(wù)器,此處配置在分布式監(jiān)控服務(wù)器上配置
cd /usr/local/nagios/etc/objects/
vi hosts.cfg
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name urg-test01
alias linux-test01
address 192.168.20..100
}
vi services.cfg
define service{
use local-service ; Name of service template to use
host_name urg-test01
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service ; Name of service template to use
host_name urg-test01
service_description Root Partition
check_command check_nrpe!check_local_disk!20%!10%!/
}
define service{
use local-service ; Name of service template to use
host_name urg-test01
service_description Current Users
check_command check_nrpe!check_local_users!20!50
}
define service{
use local-service ; Name of service template to use
host_name urg-test01
service_description Total Processes
check_command check_nrpe!check_local_procs!250!400!RSZDT
}
vi nagios.cfg #添加以下兩行配置
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #檢查配置文件
service nagios start #啟動(dòng)nagios
3,安裝監(jiān)控中心服務(wù)器
首先確認(rèn)監(jiān)控中心服務(wù)器已經(jīng)安裝了apache且禁用了SElinux
useradd nagios
passwd nagios
groupadd nagcmd
usermod -G nagcmd nagios
usermod -G nagcmd apache #創(chuàng)建Nagios用戶 創(chuàng)建組 把用戶加入組 并加入apche
tar -zxvf nagios-3.2.3.tar.gz
cd nagios-3.2.3
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
tar xzf nagios-plugins-1.4.11.tar.gz
cd nagios-plugins-1.4.11
./configure --with-nagios-user=nagios --with-nagios-group=nagcmd
make
make install
chkconfig --add nagios
chkconfig nagios on
tar -zxvf nsca-2.7.2.tar.gz
cd nsca-2.7.2
./configure
make all
cp /usr/local/src/nsca-2.7.2/src/nsca /usr/local/nagios/bin/
chown nagios:nagios /usr/local/nagios/bin/nsca
cp /usr/local/src/nsca-2.7.2/sample-config/nsca.cfg /usr/local/nagios/etc
chown nagios:nagios /usr/local/nagios/etc/nsca.cf
vi /usr/local/nagios/etc/nsca.cfg
password=urgamer #此處和分布式監(jiān)控服務(wù)器密碼一致
vi /usr/local/nagios/etc/nagios.cfg
check_external_commands=1 # 配置nagios檢查擴(kuò)展命令
accept_passive_service_checks=1 # 配置接受被動(dòng)服務(wù)檢測的結(jié)果
accept_passive_host_checks=1 #配置接受被動(dòng)主機(jī)檢測的結(jié)果
cd /usr/local/nagios/etc/
mkdir monitor
cd monitor
vi monitor.cfg
define host{
use linux-server
host_name urg-test01
address 192,168,20.100
passive_checks_enabled 1
active_checks_enabled 0
}
define service{
use local-service
host_name urg-test01
service_description Root Partiton
check_command check_local_disk!30%!10!/
check_freshness 1
freshness_threshold 450
passive_checks_enabled 1
active_checks_enables 0
}
/usr/local/nagios/bin/nsca -d -c /usr/local/nagios/nsca.cfg
service nagios restart
此時(shí)重新打開瀏覽器就換顯示,新加的服務(wù)器。