#!/bin/sh
#
# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#
#ident	"@(#)bootcluster	1.50	04/09/03 SMI"

#
# Startup script for clusters.
#

# failfastd additional check
FAILFASTD_FILE=/etc/cluster/.failfastd
# clexecd additional check
CLEXECD_FILE=/etc/cluster/.clexecd

# binaries core handling
SCCOREDIR=/var/tmp/SUNWscu/core
if [ ! -d ${SCCOREDIR} ]
then
	mkdir -p ${SCCOREDIR}
fi
/usr/bin/coreadm -p ${SCCOREDIR}/core.%n.%f.%p.%t $$

CORES_FILES=`ls ${SCCOREDIR}`

if [ "${CORES_FILES}" != "" ]
then
	echo "Core files in "${SCCOREDIR}": "${CORES_FILES}
	/usr/bin/logger -p kern.err "Core files in ${SCCOREDIR}: ${CORES_FILES}"
fi



# Need to init DID so that we can use DID names for quorum
# devices. This works due to the fact that the scdidadm
# command knows how to fallback to the CCR file interfaces
# when the ORB is not available.
if [ -c /dev/did/admin -a -x /usr/cluster/bin/scdidadm ]
then
	/usr/cluster/bin/scdidadm -u -i
fi

HALT_MESSAGE="Please reboot in non cluster mode(boot -x) and Repair"
#
# Test if we are booting as part of a cluster.
#
/usr/sbin/clinfo > /dev/null 2>&1
if [ $? != 0 ] ; then
	clustered=0
	echo "Not booting as part of a cluster"
else
	/usr/cluster/lib/sc/chkinfr 2>/etc/cluster/chkinfr.err
	if [ $? != 0 ] ; then
		/usr/bin/cat /etc/cluster/chkinfr.err
		echo "UNRECOVERABLE ERROR: /etc/cluster/ccr/infrastructure file is corrupted"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: /etc/cluster/ccr/infrastructure file is corrupted"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	else
		if [ -s /etc/cluster/chkinfr.err ] ; then
			/usr/bin/cat /etc/cluster/chkinfr.err
		else
			/usr/bin/rm -f /etc/cluster/chkinfr.err
		fi
	fi
	clustered=1
	echo "Booting as part of a cluster"

	#
	# Configure lo0 with IPv6 loopback address (::1). Since
	# link local address will be configured on transport adapters,
	# some applications (such as ping) might try to connect to
	# ::1 when communicating with "localhost". Since we do not
	# have /etc/hostname6.<adp> files for transport adapters,
	# we need to do this loopback plumb ourselves.
	#
	/sbin/ifconfig lo0 inet6 >/dev/null 2>&1
	if [ $? != 0 ]; then
		/sbin/ifconfig lo0 inet6 plumb ::1 up
	fi

	#
	# Load the cl_comm module to avoid a modload() deadlock when
	# clconfig -c calls ORB::initialize() which tries to modload the
	# transport module.
	#
	m=misc/cl_comm
	/usr/sbin/modload -p $m
	if [ "$?" != "0" ] ; then
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: Could not load module $m"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: Could not load module $m"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# Startup the ORB, transport, and the CCR.
	#
	/usr/cluster/lib/sc/clconfig -c
	if [ "$?" != "0" ] ; then
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: Could not initialize cluster framework"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: Could not initialize cluster framework"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# This will cause any node which is not in "installmode" to
	# automatically reset its vote count to one whenever it boots back into
	# the cluster.
	#   
	/usr/cluster/bin/scconf -c -q ifnotinstallmode,reset,node=`/sbin/uname -n`
	res=$?
	if [ "$res" != "0" ] ; then
		echo "Sun Cluster boot: reset vote returns "$res
		/usr/bin/logger -p kern.err "Sun Cluster boot: reset vote returns "$res
	fi
fi

# Init DID again now that we've got a current
# copy of the CCR.
if [ -c /dev/did/admin -a -x /usr/cluster/bin/scdidadm ]
then
	/usr/cluster/bin/scdidadm -u -i
fi

if [ $clustered = 1 ]
then
	#
	# Start the failfastd daemon.
	# This needs to happen before launch of clexecd
	#
	if [ -x /usr/cluster/lib/sc/failfastd ]
	then
		if [ -f ${FAILFASTD_FILE} ]
		then
			/bin/rm -f ${FAILFASTD_FILE}
		fi
		/usr/cluster/lib/sc/failfastd
		if [ $? -ne 0 ]
		then
			echo "UNRECOVERABLE ERROR: Sun Cluster boot: failfastd not started"
			/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: failfastd not started"
			echo $HALT_MESSAGE
			/usr/sbin/halt
		fi
		if [ ! -f ${FAILFASTD_FILE} ]
		then
			echo "Sun Cluster boot: failfastd start problem"
			# force a crash dump
			/usr/sbin/uadmin 5 1
		fi
	else
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/failfastd not found"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/failfastd not found"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# Start the HA mounter daemon.
	# This needs to happen before enabling global mounts.
	#
	if [ -x /usr/cluster/lib/sc/clexecd ]
	then
		if [ -f ${CLEXECD_FILE} ]
		then
			/bin/rm -f ${CLEXECD_FILE}
		fi
		/usr/cluster/lib/sc/clexecd
		if [ $? -ne 0 ]
		then
			echo "UNRECOVERABLE ERROR: Sun Cluster boot: clexecd not started"
			/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: clexecd not started"
			echo $HALT_MESSAGE
			/usr/sbin/halt
		fi
		if [ ! -f ${CLEXECD_FILE} ]
		then
			echo "Sun Cluster boot: clexecd start problem"
			# force a crash dump
			/usr/sbin/uadmin 5 1
		fi
	else
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/clexecd not found"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/clexecd not found"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

fi
