二,配置nagios
因为/usr/local/nagios-plus没有关于内存,cpu及uptime的监控,所以需要将脚本拷贝的nagios-plus目录下,并设置相应的配置文件。
1)编辑/usr/local/nagios/etc/nrpe.cfg添加
command[check_mem]=/usr/local/nagios/libexec/check_mem -w 20 -c 15
command[check_cpu.sh]=/usr/local/nagios/libexec/check_cpu.sh -w 20 -c 15
command[check_uptime.sh]=/usr/local/nagios/libexec/check_uptime.sh -w 20 -c 15
2) 编辑/usr/local/nagios/etc/objects/commands.cfg
#nrpe set
define command{
command_name check_nrpe#定义的命令名就叫作check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
#说明:
#$USER1$/check_nrpe会通过引用resource.cfg里的函数$USER1$获得#/usr/local/nagios/libexec/check_nrpe这个绝对路径。
#-H $HOSTADDRESS$ 用来获得指定被监测主机的IP地址,$HOSTADDRESS$变量会
#通过定义主机名查找到host段中的IP地址。
#-c $ARG1$ 用来指定被监测主机上NRPE守护进程运行着的NRPE命令名。
define command{
command_name check_mem
command_line $USER1$/check_mem -w $ARG1$ -c $ARG2$
}
define command{
command_name check_cpu.sh
command_line $USER1$/check_cpu.sh -w $ARG1$ -c $ARG2$
}
define command{
command_name check_uptime.sh
command_line $USER1$/check_uptime.sh -w $ARG1$ -c $ARG2$
修改nagios主配文件/usr/local/nagios/etc/nagios.cfg
反注释cfg_file=/usr/local/nagios/etc/objects/host.cfg此条指定nagios的配置文件路径。
配置host.cfg文件
define host{
host_name Nagios-Server
alias Nagios Server
address 127.0.0.1
check_command check-host-alive
check_interval 5
max_check_attempts 5
retry_interval 1
check_period 24x7
process_perf_data 0
retain_nonstatus_information 0
contact_groups admins
notification_interval 30
notification_period 24x7
notification_options d,u,r
}
define host{
host_name Cacti-Server
alias Cacti Server
address 10.2.5.130
check_command check-host-alive
check_interval 5
max_check_attempts 5
retry_interval 1
check_period 24x7
process_perf_data 0
retain_nonstatus_information 0
contact_groups admins
notification_interval 30
notification_period 24x7
notification_options d,u,r
}
配置services.cfg文件,监控的项目有:查看主机存活(网络ping),ssh服务,内存使用量,硬盘或目录使用率,当前用户登录数,cpu使用率,系统负载,系统运行时间,进程数等
若有多个主机,复制此配置文件只将host_name更改一下即可。
define service {
host_name Nagios-Server
service_description check-host-alive
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check-host-alive
}
define service {
host_name Nagios-Server
service_description check-mem
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_mem
}
define service {
host_name Nagios-Server
service_description check-disk
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_sda3
}
define service {
host_name Nagios-Server
service_description check-users
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_users
}
define service {
host_name Nagios-Server
service_description check-load
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_load
}
define service {
host_name Nagios-Server
service_description check-cpu
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_cpu.sh
}
define service {
host_name Nagios-Server
service_description check-uptime
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_uptime.sh
}
define service {
host_name Nagios-Server
service_description check-total-procs
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_total_procs
}
define service{
uselocal-service; Name of service template to use
host_nameNagios-Server
service_descriptionSSH
check_commandcheck_ssh
notifications_enabled0
}
配置文件检查命令:
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
重启nagios服务:
/etc/init.d/nagios restart
/etc/init.d/httpd restart
验证:
再次打开http://ip/nagios确认配置已经生效。
配置服务开机启动
chkconfig --add nagios
chkconfig nagios on
chkconfig --level 35 httpd on
注:用nagios监控windows机器,需要在被监控的机器上安装NSClient软件。
被监控端的安装配置
首先为被监控端准备nagios-plugins和nrpe的安装包。
添加用户nagios:
usraddnagios
我的安装包放在/usr目录下面;
cd/usr
tar xzvf nagios-plugins-XXX.tar.gz
cd nagios-plugins-XXX
./configure --prefix=/usr/local/nagios
make
make install
./configure --with-nagios-user=nagios --with-nagios-group=nagios --prefix=/usr/local/nagios &&make && make install
chown nagios.nagios /usr/local/nagios
chown -R nagios.nagios /usr/local/nagios/libexec
再在被监控主机上安装NRPE扩展插件程序:
tar -zxvf nrpe-xxx.tar.gz
cd nrpe-xxx
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
make install-xinetd
相关文件的修改
vi/etc/xinetd.d/nrpe
only_from= 127.0.0.1 10.2.5.130
(注意,10.2.5.130监控服务器的IP,也就是所允许10.2.5.130来监控本机的服务)
vi/etc/services
nrpe5666/tcp# NRPE(添加一行)
service xinetd restart
如果配置没有问题的话,输入如下命令。查看配置是否配好
netstat -at | grep nrpe
tcp 00 *:nrpe *:*LISTEN
找到NRPE运行的执行程序
ll /usr/local/nagios/bin/nrpe
-rwxrwxr-x 1 nagios nagios 93420 Oct7 01:16 /usr/local/nagios/bin/nrpe
了解NRPE的执行程序的使用方式
/usr/local/nagios/bin/nrpe -h
测试NEPE本地是否正常启动
/usr/local/nagios/libexec/check_nrpe –Hlocalhost
如果正确的话,回返回NRPE的版本号
将NRPE运行添加加入系统启动脚本中
echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" >> /etc/rc.d/rc.local
到此,被监控主机要做的工作已经完成了。