# aptitude update && aptitiude install heartbeat # mkdir -p /var/run/heartbeat/rsctmp/send_arp
# tar cvf /var/lib/dokuwiki/data/media/openqrm_had.tar /etc/ha.d/authkeys /etc/ha.d/ha.cf /etc/ha.d/haresources /etc/ha.d/resource.d/drbddisk /etc/ha.d/resource.d/fsck_jfs /etc/ha.d/resource.d/myopenqrm /etc/ha.d/resource.d/mydrbd /etc/ha.d/resource.d/killpid
(echo -ne "auth 1\n1 sha1 "; dd if=/dev/urandom bs=512 count=1 | openssl sha1) > /etc/ha.d/authkeys chmod 0600 /etc/ha.d/authkeys
Die so erstellte Datei muss auf beide Knoten kopiert werden.
rbopenqrm01 \
mac::bond0::00:19:19:19:19:19 \
IPaddr2::192.168.1.100/24/bond0:0 \
mydrbd \
drbddisk::data \
LVM::data \
mount_all_dev \
check_lsof \
mysql \
ntp \
squid \
apache2 \
nagios-nrpe-server
if [ -z "${PIDDATEI}" ] ; then
SCRIPT="$(basename ${0})"
PIDNAME="$(echo "${SCRIPT}"|rev|sed 's/.*\.//'|rev)"
PIDDATEI="/var/run/${PIDNAME}.pid"
fi
##------------------------------------------------------------------
pid_beginn()
{
if [ -e "${PIDDATEI}" ] ; then
exit 0
else
echo "$$" > ${PIDDATEI} && echo "${PIDDATEI} wurde angelegt" || echo "${HOSTNAME}:${SCRIPT} konnte ${PIDDATEI} nicht anlegen..."
fi
}
##------------------------------------------------------------------
pid_status()
{
if [ -e "${PIDDATEI}" ] ; then
echo "${SCRIPT} läuft schon mit der PID $(cat ${PIDDATEI}) ..."
else
echo "${SCRIPT} ist gestoppt ..."
fi
}
##------------------------------------------------------------------
pid_ende()
{
if [ -e "${PIDDATEI}" ] ; then
rm -fv ${PIDDATEI} || echo "${HOSTNAME}:${SCRIPT} PID-File (${PIDDATEI}) kann nicht geloescht werden..."
fi
}
##------------------------------------------------------------------
#!/bin/bash
#set -x
# damit das Script LSB-konform ist
. /opt/pidfile.cfg
KOMMANDO="$(echo "${@}" | awk '{print $NF}')"
case ${KOMMANDO} in
start)
pid_beginn
cat $(find /sys/ -type f | fgrep 'address' | fgrep "${1}") > ${PIDDATEI}
ifdown ${1}
ifconfig ${1} hw ether ${2}
ifup ${1}
;;
status)
pid_status
;;
stop)
if [ -e "${PIDDATEI}" ] ; then
ifdown ${1}
ifconfig ${1} hw ether $(cat ${PIDDATEI})
ifup ${1}
fi
pid_ende
;;
esac
exit 0
#!/bin/bash
#
# This script is inteded to be used as resource script by heartbeat
#
# Copright 2003-2008 LINBIT Information Technologies
# Philipp Reisner, Lars Ellenberg
#
###
DEFAULTFILE="/etc/default/drbd"
DRBDADM="/sbin/drbdadm"
if [ -f $DEFAULTFILE ]; then
. $DEFAULTFILE
fi
if [ "$#" -eq 2 ]; then
RES="$1"
CMD="$2"
else
RES="all"
CMD="$1"
fi
## EXIT CODES
# since this is a "legacy heartbeat R1 resource agent" script,
# exit codes actually do not matter that much as long as we conform to
# http://wiki.linux-ha.org/HeartbeatResourceAgent
# but it does not hurt to conform to lsb init-script exit codes,
# where we can.
# http://refspecs.linux-foundation.org/LSB_3.1.0/
# LSB-Core-generic/LSB-Core-generic/iniscrptact.html
####
case "$CMD" in
start)
# try several times, in case heartbeat deadtime
# was smaller than drbd ping time
try=6
while true; do
$DRBDADM primary $RES && break
let "--try" || exit 1 # LSB generic error
sleep 1
done
;;
stop)
$DRBDADM secondary $RES
ex=$?
case $ex in
0)
exit 0
;;
11)
# see drbdadm_main.c adm_generic and m_system
# as well as drbdsetup.c:
# in fact a role change was attempted, but failed.
echo >&2 "$DRBDADM secondary $RES: exit code $ex, mapping to 1"
exit 1 # LSB generic error
;;
*)
# other error, may be syntax error in config file,
# anything else: to not confuse heartbeat further,
# and avoid reboot due so "failed stop recovery",
# pretend that we succeeded in stopping this.
echo >&2 "$DRBDADM secondary $RES: exit code $ex, mapping to 0"
exit 0
;;
esac
;;
status)
if [ "$RES" = "all" ]; then
echo "A resource name is required for status inquiries."
exit 10
fi
ST=$( $DRBDADM role $RES )
STATE=${ST%/*}
case $STATE in
Primary)
echo "running (Primary)"
exit 0 # LSB status "service is OK"
;;
Secondary|Unconfigured)
echo "stopped ($STATE)" ;;
"")
echo "stopped" ;;
*)
# unexpected. whatever...
echo "stopped ($ST)" ;;
esac
exit 3 # LSB status "service is not running"
;;
*)
echo "Usage: drbddisk [resource] {start|stop|status}"
exit 1
;;
esac
exit 0
#!/bin/bash
VOLGRUPPE="data"
# damit das Script LSB-konform ist
. /opt/pidfile.cfg
case ${1} in
start)
pid_beginn
for mountdev in $(fgrep noauto /etc/fstab | awk '{print $1}' | while read L1BEZ REST; do echo "${L1BEZ}" | egrep "^/dev/mapper/${VOLGRUPPE}-" | awk '{gsub("[\"]","");print $1}'; echo "${L1BEZ}" | egrep "^/dev/${VOLGRUPPE}/" | awk '{gsub("[\"]","");print $1}'; echo "${L1BEZ}" | awk -F'=' '/^LABEL=/ {gsub("[\"]","");print $2}' | while read L2BEZ; do blkid -L ${L2BEZ}; done; done)
do
for umgebung in $(blkid -o udev ${mountdev})
do
export ${umgebung}
done
if [ -n "${ID_FS_TYPE}" ] ; then
fsck -t ${ID_FS_TYPE} -y ${mountdev} && mount -v ${mountdev}
fi
done
;;
status)
pid_status
;;
stop)
for mountpoint in $(tac /etc/mtab | egrep "^/dev/mapper/${VOLGRUPPE}-" | awk '{print $2}')
do
umount -v ${mountpoint}
done
pid_ende
;;
esac
exit 0
#!/bin/sh
HAVOLGR="lager" # HA-Volumen-Gruppe
MPOINTS="$(fgrep 'noauto' /etc/fstab | egrep "^LABEL=|^/dev/mapper/${HAVOLGR}-|^/dev/${HAVOLGR}/" | awk '{print $2}' | awk '{print " "$1}' | tr -s '\n' '|' | sed 's/|$//' | head -n1)"
case $1 in
start)
;;
status)
lsof | egrep "${MPOINTS}";echo
;;
stop)
while [ -n "$(lsof | egrep "${MPOINTS}")" ]
do
echo "Warte bis die noch offenen Datei-Haendler geschlossen sind..."
sleep 4
RPIDS="$(lsof | fgrep '/collectd/' | awk '{print $2}' | sort | uniq)";
if [ -n "${RPIDS}" ] ; then
kill ${RPIDS};
fi;
sleep 4 ;
done
;;
esac
exit 0
# /etc/init.d/heartbeat standby && tail -f /var/log/syslog May 21 11:40:58 rbopenqrm01 heartbeat: [1893]: info: rbopenqrm02 wants to go standby [all] May 21 11:40:58 rbopenqrm01 ipfail: [2172]: debug: Other side is unstable. May 21 11:40:58 rbopenqrm01 kernel: [ 5357.069101] block drbd1: peer( Primary -> Secondary ) May 21 11:40:59 rbopenqrm01 heartbeat: [1893]: info: standby: acquire [all] resources from rbopenqrm02 May 21 11:40:59 rbopenqrm01 heartbeat: [3830]: info: acquire all HA resources (standby). May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Acquiring resource group: rbopenqrm01 IPaddr::10.10.5.80/24/br0 mydrbd drbddisk::data LVM::data Filesystem::/dev/data/mysql::/var/lib/mysql::jfs Filesystem::/dev/data/etcmysql::/etc/mysql::jfs mysql May 21 11:40:59 rbopenqrm01 IPaddr[3871]: INFO: Resource is stopped May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Running /etc/ha.d/resource.d/IPaddr 10.10.5.80/24/br0 start May 21 11:40:59 rbopenqrm01 IPaddr[3951]: INFO: Using calculated netmask for 10.10.5.80: 255.255.255.0 May 21 11:40:59 rbopenqrm01 IPaddr[3951]: INFO: eval ifconfig br0:0 10.10.5.80 netmask 255.255.255.0 broadcast 10.10.5.255 May 21 11:40:59 rbopenqrm01 IPaddr[3927]: INFO: Success May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Running /etc/ha.d/resource.d/mydrbd start May 21 11:40:59 rbopenqrm01 mountd[3767]: Caught signal 15, un-registering and exiting. May 21 11:40:59 rbopenqrm01 kernel: [ 5357.466126] nfsd: last server has exited, flushing export cache May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Running /etc/ha.d/resource.d/drbddisk data start May 21 11:40:59 rbopenqrm01 kernel: [ 5357.492064] block drbd1: role( Secondary -> Primary ) May 21 11:40:59 rbopenqrm01 LVM[4119]: INFO: LVM Volume data is offline May 21 11:40:59 rbopenqrm01 LVM[4113]: INFO: Resource is stopped May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Running /etc/ha.d/resource.d/LVM data start May 21 11:40:59 rbopenqrm01 LVM[4171]: INFO: Activating volume group data May 21 11:40:59 rbopenqrm01 LVM[4171]: INFO: File descriptor 4 (socket:[5467]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 5 (socket:[5468]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 6 (/proc/loadavg) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 7 (socket:[5861]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 8 (socket:[5479]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 9 (socket:[5474]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 10 (socket:[5867]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 11 (socket:[5476]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File descriptor 12 (socket:[5481]) leaked on vgscan invocation. Parent PID 4171: /bin/sh File des criptor 13 (pipe:[9664]) leaked on vgscan invocation. Parent PID 4171: /bin/sh Reading all physical volumes. This may take a while... Found volume group "data" using metadata type lvm2 May 21 11:40:59 rbopenqrm01 LVM[4171]: INFO: File descriptor 4 (socket:[5467]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 5 (socket:[5468]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 6 (/proc/loadavg) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 7 (socket:[5861]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 8 (socket:[5479]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 9 (socket:[5474]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 10 (socket:[5867]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 11 (socket:[5476]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 12 (socket:[5481]) leaked on vgchange invocation. Parent PID 4171: /bin/sh File descriptor 13 (pipe:[9664]) leaked on vgchange invocation. Parent PID 4171: /bin/sh 2 logical volume(s) in volume group "data" now active May 21 11:40:59 rbopenqrm01 LVM[4165]: INFO: Success May 21 11:40:59 rbopenqrm01 Filesystem[4233]: INFO: Resource is stopped May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Running /etc/ha.d/resource.d/Filesystem /dev/data/mysql /var/lib/mysql jfs start May 21 11:40:59 rbopenqrm01 Filesystem[4303]: INFO: Running start for /dev/data/mysql on /var/lib/mysql May 21 11:40:59 rbopenqrm01 Filesystem[4297]: INFO: Success May 21 11:40:59 rbopenqrm01 Filesystem[4375]: INFO: Resource is stopped May 21 11:40:59 rbopenqrm01 ResourceManager[3844]: info: Running /etc/ha.d/resource.d/Filesystem /dev/data/etcmysql /etc/mysql jfs start May 21 11:40:59 rbopenqrm01 Filesystem[4444]: INFO: Running start for /dev/data/etcmysql on /etc/mysql May 21 11:41:00 rbopenqrm01 Filesystem[4438]: INFO: Success May 21 11:41:00 rbopenqrm01 ResourceManager[3844]: info: Running /etc/init.d/mysql start May 21 11:41:00 rbopenqrm01 kernel: [ 5358.319849] type=1505 audit(1274434860.115:10): operation="profile_replace" pid=4529 name="/usr/sbin/mysqld" May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4544]: Upgrading MySQL tables if necessary. May 21 11:41:01 rbopenqrm01 heartbeat: [3830]: info: all HA resource acquisition completed (standby). May 21 11:41:01 rbopenqrm01 heartbeat: [1893]: info: Standby resource acquisition done [all]. May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4547]: /usr/bin/mysql_upgrade: the '--basedir' option is always ignored May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4547]: Looking for 'mysql' as: /usr/bin/mysql May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4547]: Looking for 'mysqlcheck' as: /usr/bin/mysqlcheck May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4547]: This installation of MySQL is already upgraded to 5.1.41, use --force if you still need to run mysql_upgrade May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4554]: Checking for insecure root accounts. May 21 11:41:01 rbopenqrm01 /etc/mysql/debian-start[4558]: Triggering myisam-recover for all MyISAM tables May 21 11:41:02 rbopenqrm01 heartbeat: [1893]: info: remote resource transition completed.
Bei ein sauberen Schwenk sollte hier "remote resource transition completed." stehen.
Pacemaker ⇒ Pacemaker und Corosync
Pacemaker, welches ursprünglich aus dem Entwicklungsstrang von HA-Linux (Heartbeat) entkoppelt wurde, erledigt hier einen wesentlich besseren Job als Heartbeat. Vor allem lassen sich nicht nur komplette Knoten, sondern auch einzelne Services überwachen und bei Bedarf auf demselben oder einem anderen Knoten starten.
DRBD is frequently found in system configurations using the Linux-HA cluster manager ("Heartbeat"). Heartbeat has been superseded by the Pacemaker cluster manager and the latter should be used whenever possible — please see Chapter 8, Integrating DRBD with Pacemaker clusters for more information. Nonetheless, this chapter outlines Heartbeat configurations and is intended for users who must maintain existing legacy Heartbeat systems for policy reasons.