This kill switch stops machines from running when cooling systems go down and runs from /etc/cron.hourly. Tested on on SL6.6 Dell R410 and dell R710s. It requires you install Dell's Openmanage software.
# cat omreportkillswitch.sh
#!/bin/bash
#Kill Switch using chassis temptemp=$(/opt/dell/srvadmin/bin/omreport chassis temps|grep Reading|awk '{ print $3}'|cut -d'.' -f1)
#disable condor killswitch
if [ $temp -gt 28 ]
then #WARNING temps over 28C stop condor
/etc/init.d/condor stop
fi
#shutdown node killswitch
if [ $temp -gt 32 ]
then #CRITICAL temps over 32C shutdown node
/sbin/shutdown -h now
fi
No comments:
Post a Comment