#! /usr/bin/ksh
#
# ident	"@(#)udlmreconfig.sh	1.13	03/04/15 SMI"
#
# Copyright 1997-2003 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

#
# udlmreconfig - reconfiguration program for Oracle Unix DLM
#
# Input:
#	Environment variables:
#		${CLUSTNAME}
#		${CURRSTEP}
#		${CURRNODES}
#		${LOCALNODEID}
#		${RECONF_SCRIPTS}
#		${LKMGR_LOG}
#
# Action:       Run Reconfiguration Programs based on current ucmm step
#
# Output:       Return 0 if success
#               Return 1 if failure
#               Return 200 if result is to be ignored
#               Return 205 if another reconfiguration is needed
#

#
# set some flags (NOTE: they are not set implicitly in subroutines)
#
# set -x # print commands as executed
# set -e # execute ERR trap on command error
# set -u # treat unset variables as an error
set -f # disable file name generation

#
# Cluster Events
#
#
# command used to publish substep events
#
PUB_EVENT="/usr/cluster/lib/ucmm/ucmm_substep_event";
#
# command used to get timestamp data to be published
#
TIMESTAMP="/usr/cluster/lib/ucmm/ucmm_timestamp";
#
# get beginning timeofday and hrtime for calculating event times later
#
# we use an external program to get timestamps in nanosecond resolution
# as the format of our event publication messages is in this data type.
#
# NOTE: if an error occurs, do not report the error as availability is
# more important than event execution times. Since we do math on these
# variables later, if the command fails we need to ensure that they
# are not null, else the math will cause the script to exit causing
# node death.
#
BEGIN_HRTIME=$(${TIMESTAMP} -h) || BEGIN_HRTIME=0;
BEGIN_TOD=$(${TIMESTAMP} -t) || BEGIN_TOD=0;
#
# Cluster Event Severities
#
# NOTE: these values must be in sync with the cl_eventdefs.h file
#
CL_EVENT_SEV_INFO=0;
CL_EVENT_SEV_WARNING=1;
CL_EVENT_SEV_ERROR=2;
CL_EVENT_SEV_CRITICAL=3;
CL_EVENT_SEV_FATAL=4;

#
# Local variables
#
pre="SUNWudlm.udlmreconfig";
udlmctlbin="/opt/SUNWudlm/bin";
udlm_cfgfile="/opt/SUNWudlm/etc/udlm.conf";
udlmbin="/opt/SUNWcluster/bin";
udlmctl="${udlmctlbin}/udlmctl";
default_lkmgr_log="/var/cluster/ucmm";
num_dlmds=2;
udlm_dbg_flag="-d 0xfff";
udlm_dbg_flag="";
udlmctl_dbg_flag="-d 0xfff";
udlmctl_dbg_flag="";
status_message="";

#
# Include common files
#
include=".";
RECONF_SCRIPTS="${RECONF_SCRIPTS:-/usr/cluster/lib/ucmm}";
${include} ${RECONF_SCRIPTS}/ucmm_reconf.common;

udlm_cfg_match() {

	${RECONF_SCRIPTS}/cfgmatch $1 ${udlm_cfgfile} || return $?;
	return 0;

}

udlmstart_cmd() {

	udlminit || return $?;

	# if no log location is specified, assign a default
	export LKMGR_LOG=${LKMGR_LOG:-${default_lkmgr_log}};

	# if log dir has been overridden need to check for dir
	# note: this assumes the default will always exist
	if [[ ! -d ${LKMGR_LOG} ]]; then
		export LKMGR_LOG=${default_lkmgr_log};
	fi
 
	# The DLM should not be running yet, error if it is
	/bin/pgrep -u 0 lkmgr >/dev/null 2>&1;
	if [[ $? = 0 ]]; then
		scds_syslog -p error -t "${pre}.udlmstart_cmd" -m \
			"Unix DLM already running";
		return 1;
	fi

	# start the Unix DLM.  We can't do it before this point because
	# clustd has to determine the nodeid.
	log_info "${pre}.udlmstart_cmd" "Starting the Unix DLM.";
	cpus=$(/usr/sbin/psrinfo | /bin/grep on-line | /bin/wc -l);
	if (( cpus == 1 )); then
		/usr/bin/priocntl -c TS -p 59 -m 59 \
			-e ${udlmbin}/lkmgr -n ${num_dlmds} \
			-c ${udlm_cfgfile} \
			-i ${LOCALNODEID} ${udlm_dbg_flag} > /dev/console 2>&1 \
				|| return $?;
	else
		/usr/bin/priocntl -c $(udlm_cfg_match udlm.schedclass) \
			-p $(udlm_cfg_match udlm.schedpriority) \
			-e ${udlmbin}/lkmgr -n ${num_dlmds} \
			-c ${udlm_cfgfile} \
			-i ${LOCALNODEID} ${udlm_dbg_flag} > /dev/console 2>&1 \
				|| return $?;
	fi

	return 0;

}


udlmabort_cmd() {

	udlminit || return $?;

	set_udlm_status OFFLINE "abort step started";

	/bin/pgrep -u 0 lkmgr >/dev/null 2>&1;
	if [[ $? = 0 ]]; then
		${udlmctl} ${udlmctl_dbg_flag} abort ${CLUSTNAME} \
			$(udlm_cfg_match udlm.abort_timeout);
		if [[ $? != 0 ]]; then
			scds_syslog -p error -t "${pre}.udlmabort_cmd" -m \
				"Unix DLM abort failed";
			return 200;
		fi
	fi
	set_udlm_status OFFLINE "abort step completed";

	return 0;

}

udlmstep1_cmd() {

	udlminit || return $?;

	set_udlm_status DEGRADED "reconfiguration in progress";

	# The Unix DLM is "lkmgr" before initialization, "dlmmon" after
        #    initialization, and multiple dlmmon processes during fork.
	/bin/pgrep -u 0 lkmgr >/dev/null 2>&1;
	if [[ $? != 0 ]]; then
		scds_syslog -p error -t "${pre}.udlmstep1_cmd" -m \
			"Unix DLM no longer running";
		return 1;
	fi
	${udlmctl} ${udlmctl_dbg_flag} step1 ${CLUSTNAME} \
		$(udlm_cfg_match udlm.step1_timeout) || return $?;

	return 0;

}

udlminit() {

	if [[ -d ${udlmbin}/lkmgr || ! -x ${udlmbin}/lkmgr ]]; then
		scds_syslog -p error -t "${pre}.udlminit" -m \
			"Oracle UDLM package is not properly installed. %s not found." \
			"${udlmbin}/lkmgr";
		return 1;
	fi

	if [[ -d ${udlmctl} || ! -x ${udlmctl} ]]; then
		scds_syslog -p error -t "${pre}.udlminit" -m \
			"%s not found." "${udlmctl}";
		return 1;
	fi

	return 0;

}
#
# This function will create link from udlmctl to udlmctl_32 or udlmctl_64 
# depending on the architecture of Oracle lock Manager binary lkmgr.
# 
# If lkmgr binary does not exist on the system, this indites that Oracle UDLM
# link is not created and error is returned.
#
create_udlmctl_link()
{

	typeset rc=0

	if [[ -d ${udlmbin}/lkmgr || ! -x ${udlmbin}/lkmgr ]]; then
		scds_syslog -p notice -t "${pre}.validate" -m \
			"Oracle UDLM package is not installed. %s not found." \
			"${udlmbin}/lkmgr";
		status_message="Validation error. Unable to locate lock manager ${udlmbin}/lkmgr";

		return 1;
	fi

	lkmgr_arch=$(LC_MESSAGES=C /bin/file ${udlmbin}/lkmgr | \
		/bin/awk '/(32|64)-/ {print substr($3,0,2);}');

	#
	# Test using [ file1 -ef file2 ]  to check file1 is the same file as file file2.
	#
	if [[ ${udlmctlbin}/udlmctl_${lkmgr_arch} -ef ${udlmctlbin}/udlmctl ]]; then
		# udlmctl and udlmctl_${lkmgr_arch} files are identical.
		# No need to link the file
		return 0;
	fi

	/bin/ln ${udlmctlbin}/udlmctl_${lkmgr_arch} ${udlmctlbin}/udlmctl
	rc=$?

	if [[ ${rc} -ne 0 || -d ${udlmctl} || ! -x ${udlmctl} ]]; then
		scds_syslog -p error -t "${pre}.validate" -m \
			"Error in creating udlmctl link: %s. Error (%s)" "${udlmctl}" "${rc}";
		status_message="Validation error. Unable to to create  ${udlmctlbin}/udlmctl";
		return 1;
	fi

	return 0;
}

udlm_validate() {
	typeset rc=0

	# Verify existance of Oracle lkmgr binary and 
	# create udlmctl link, if successful, check 
	# lkmgr architecture 

	create_udlmctl_link && check_lkmgr_arch;

	rc=$?;

	if [[ ${rc} -ne 0 ]]; then 
		return ${rc};
	fi

	return 0

}


check_lkmgr_arch() {


	if [[ -d /bin/isainfo || ! -x /bin/isainfo ]]; then
		if [[ ${lkmgr_arch} != "32" ]]; then
		    scds_syslog -p error -t "${pre}.check_lkmgr_arch" -m \
		      "Oracle UDLM package wrong instruction set architecture.";
		    status_message="Validation error. Oracle UDLM package wrong instruction set architecture.";
		    return 1;
		fi
	else
	     if [[ ${lkmgr_arch} != "32" ]]; then
		if [[ ${lkmgr_arch} -ne $(/bin/isainfo -b) ]]; then
		   scds_syslog -p error \
		   -t "${pre}.check_lkmgr_arch" \
		   -m "Oracle UDLM package wrong instruction set architecture.";
		    status_message="Validation error. Oracle UDLM package wrong instruction set architecture.";
		   return 1;
		 fi
	     fi
	fi

	return 0;
}

#############################################################
# set_udlm_status
#   Sets resource status
#       Parameter 1: <status> (not validated)
#                OK,  DEGRADED,  FAULTED, UNKNOWN, or OFFLINE.
#       Parameter 2: <message>
#
#############################################################
set_udlm_status()
{
	typeset rs_status="${1:-UNKNOWN}";
	typeset msg=${2:-""};
	typeset group="";
        typeset udlm_rt='SUNW.rac_udlm:*';
	typeset rs_name_key="udlm.resource_name";
	typeset rg_name_key="udlm.resource_group_name";
	typeset rs_name="";

	rs_name=$(udlm_cfg_match ${rs_name_key});

	if [ -z "${rs_name}" ]; then
		# UDLM resource is not configured.
		# Entry not found in udlm.conf file
		# Don't update status

		return 0;
	fi

	group=$(udlm_cfg_match ${rg_name_key});
	if [ -z "${group}" ]; then
		# Cannot find group name
		# Don't update status
		return 0;
	fi

	${SCHA_RS_SETSTATUS} -R ${rs_name} -G ${group} -s ${rs_status} -m "${msg}";

	return 0;
}


#
# turns on tracing for all functions
#
# typeset -tf $(typeset +f)

#
#       main switch statement, execute appropriate reconfiguration step
#
status=0;
PUB_SUBSTEP="unknown";
case ${CURRSTEP} in

	cmmstart)
		PUB_SUBSTEP="udlmstart";
		udlmstart_cmd || status=$?;
		;;

	cmmabort|cmmstop)
		PUB_SUBSTEP="udlmabort";
		udlmabort_cmd || status=$?;
		;;

	cmmstep2)
		PUB_SUBSTEP="udlmstep1";
		udlmstep1_cmd || status=$?;
		;;

	cmmstep4)
		PUB_SUBSTEP="udlmstep2";
		udlminit || status=$?;
		if (( status == 0 )); then
			${udlmctl} ${udlmctl_dbg_flag} step2 \
			    ${CLUSTNAME} \
			    $(udlm_cfg_match udlm.step2_timeout) || status=$?;
		fi
		;;

	cmmstep5)
		PUB_SUBSTEP="udlmstep3";
		udlminit || status=$?;
		if (( status == 0 )); then
			${udlmctl} ${udlmctl_dbg_flag} step3 \
			    ${CLUSTNAME} \
			    $(udlm_cfg_match udlm.step3_timeout) || status=$?;
		fi
		;;

	cmmstep6)
		PUB_SUBSTEP="udlmstep4";
		udlminit || status=$?;
		if (( status == 0 )); then
			${udlmctl} ${udlmctl_dbg_flag} step4 \
			    ${CLUSTNAME} \
			    $(udlm_cfg_match udlm.step4_timeout) || status=$?;
		fi
		;;

	cmmstep7)
		PUB_SUBSTEP="udlmstep5";
		udlminit || status=$?;
		if (( status == 0 )); then
			${udlmctl} ${udlmctl_dbg_flag} step5 \
			    ${CLUSTNAME} \
			    $(udlm_cfg_match udlm.step5_timeout) || status=$?;

		fi

		if (( status == 0 )); then
			set_udlm_status OK "";
		fi
		
		;;

	validate)
		PUB_SUBSTEP="validate";
		udlm_validate || status=$?;
		;;
	*)
		scds_syslog -p error -t "${pre}" -m \
			"Unknown step: %s" "${CURRSTEP}";
		status=200;
		;;

esac

#
# calculate execution time
#
# NOTE: if an error occurs, do not report the error as availability is
# more important than event execution times. Since we do math on these
# variables later, if the command fails we need to ensure that they
# are not null, else the math will cause the script to exit causing
# node death.
#
END_HRTIME=$(${TIMESTAMP} -h) || END_HRTIME=0;
(( DURATION = END_HRTIME - BEGIN_HRTIME ));

#
# publish the event
#
if (( status == 0 || status == 200 || status == 205 )); then
	#
	# publish non-fatal return codes as informational severity events
	#
	${PUB_EVENT} -s ${CL_EVENT_SEV_INFO} \
		-N ${CURRSTEP} \
		-n "${PUB_SUBSTEP}" \
		-t ${BEGIN_TOD} -d ${DURATION} -r ${status};
else
	#
	# publish fatal return codes as error severity events
	#
	${PUB_EVENT} -s ${CL_EVENT_SEV_ERROR} \
		-N ${CURRSTEP} \
		-n "${PUB_SUBSTEP}" \
		-t ${BEGIN_TOD} -d ${DURATION} -r ${status};

	if [ -n "${status_message}" ]; then
		set_udlm_status "FAULTED" "${status_message}";
	else
		set_udlm_status "FAULTED" "Error in step ${PUB_SUBSTEP}";
	fi
fi

exit ${status};
