#! /usr/bin/ksh
#
# ident	"@(#)hwrreconfig.sh	1.9	04/02/19 SMI"
#
# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

#
# hwrreconfig - reconfiguration program for Hardware RAID devices
#
# Input:
#       Environment variables:
#               ${CLUSTNAME}
#               ${CURRSTEP}
#               ${ALLNODES}
#               ${CURRNODES}
#               ${LOCALNODEID}
#               ${RECONF_SCRIPTS}
#
# Action:	Run reconfiguration programs based on current ucmm step
#
# Output:	Return 0 if success
#		Return 1 if failure
#		Return 200 if result is to be ignored
#               Return 205 if another reconfiguration is needed
#

#
# set some flags (NOTE: they are not set implicitly in subroutines)
#
# set -x # print commands as executed
# set -e # execute ERR trap on command error
# set -u # treat unset variables as an error
set -f # disable file name generation

#
# Cluster Events
#
#
# command used to publish substep events
#
PUB_EVENT="/usr/cluster/lib/ucmm/ucmm_substep_event";
#
# command used to get timestamp data to be published
#
TIMESTAMP="/usr/cluster/lib/ucmm/ucmm_timestamp";
#
# get beginning timeofday and hrtime for calculating event times later
#
# we use an external program to get timestamps in nanosecond resolution
# as the format of our event publication messages is in this data type.
#
# NOTE: if an error occurs, do not report the error as availability is
# more important than event execution times. Since we do math on these
# variables later, if the command fails we need to ensure that they
# are not null, else the math will cause the script to exit causing
# node death.
#
BEGIN_HRTIME=$(${TIMESTAMP} -h) || BEGIN_HRTIME=0;
BEGIN_TOD=$(${TIMESTAMP} -t) || BEGIN_TOD=0;
#
# Cluster Event Severities
#
# NOTE: these values must be in sync with the cl_eventdefs.h file
#
CL_EVENT_SEV_INFO=0;
CL_EVENT_SEV_WARNING=1;
CL_EVENT_SEV_ERROR=2;
CL_EVENT_SEV_CRITICAL=3;
CL_EVENT_SEV_FATAL=4;

#
# Local variables
#
pre="SUNWschwr.hwrreconfig";
cfgmatch="${RECONF_SCRIPTS}/cfgmatch";
cfgfile="/dev/null";
hatimerun="/usr/cluster/bin/hatimerun";
run_reserve="/usr/cluster/lib/sc/reserve";

#
# Include common files
#
include=.;
RECONF_SCRIPTS="${RECONF_SCRIPTS:-/usr/cluster/lib/ucmm}";
${include} ${RECONF_SCRIPTS}/ucmm_reconf.common;


#
#	make sure that device fencing has completed
#
hw_reserve() {
	${run_reserve} -l || return $?;

	set_hwraid_status OK "";
	return 0;
}

#############################################################
# set_hwraid_status
#   Sets resource status
#       Parameter 1: <status> (not validated)
#                OK,  DEGRADED,  FAULTED, UNKNOWN, or OFFLINE.
#       Parameter 2: <message>
#
#############################################################

set_hwraid_status()
{
    (
	typeset rs_status="${1:-UNKNOWN}";
	typeset msg=${2:-""};
	typeset rs_name="";
	typeset group="";
	typeset rt_name="SUNW.rac_hwraid:*";
	typeset rs_name_key=hwraid.resource_name;
	typeset rg_name_key=hwraid.resource_group_name;
	typeset conf_file=/usr/cluster/lib/ucmm/rt/rac_hwraid/etc/hwraid.conf;

	rs_name=$(${cfgmatch} ${rs_name_key} ${conf_file});

	if [ -z "${rs_name}" ]; then
		# resource is not configured
		return 0;
	fi

	group=$(${cfgmatch} ${rg_name_key} ${conf_file});
	if [ -z "${group}" ]; then
		# Cannot find group name, don't update status
		return 0;
	fi

	${SCHA_RS_SETSTATUS} -R ${rs_name} -G ${group} -s ${rs_status} -m "${msg}";

	return 0;
    ) &
}

#
# turns on tracing for all functions
#
# typeset -tf $(typeset +f)

#
#	main switch statement, execute appropriate reconfiguration step
#
status=0;
PUB_SUBSTEP="unknown";
case ${CURRSTEP} in

	cmmstart)
		PUB_SUBSTEP="hwrstart";
		;;

	cmmstep1)
		PUB_SUBSTEP="hwrstep1";
		hw_reserve || status=$?;;

	validate)
		PUB_SUBSTEP="validate";
		status=0
		;;

	*)
		scds_syslog -p error -t "${pre}" -m \
			"Unknown step: %s" "${CURRSTEP}";
		status=200;
		;;

esac

#
# calculate execution time
#
# NOTE: if an error occurs, do not report the error as availability is
# more important than event execution times. Since we do math on these
# variables later, if the command fails we need to ensure that they
# are not null, else the math will cause the script to exit causing
# node death.
#
END_HRTIME=$(${TIMESTAMP} -h) || END_HRTIME=0;
(( DURATION = END_HRTIME - BEGIN_HRTIME ));

#
# publish the event
#
if (( status == 0 || status == 200 || status == 205 )); then
	#
	# publish non-fatal return codes as informational severity events
	#
	${PUB_EVENT} -s ${CL_EVENT_SEV_INFO} \
		-N ${CURRSTEP} \
		-n "${PUB_SUBSTEP}" \
		-t ${BEGIN_TOD} -d ${DURATION} -r ${status};
else
	#
	# publish fatal return codes as error severity events
	#
	${PUB_EVENT} -s ${CL_EVENT_SEV_ERROR} \
		-N ${CURRSTEP} \
		-n "${PUB_SUBSTEP}" \
		-t ${BEGIN_TOD} -d ${DURATION} -r ${status};

	set_hwraid_status "FAULTED" "Error in step ${PUB_SUBSTEP}"

fi

exit ${status};
