#!/bin/ksh -p
#
#pragma ident   "@(#)reconf_ener.sh 1.30     01/11/02  SMI"
#Copyright (C) 1997-1998 Sun Microsystems, Inc.
#All rights reserved.
#

#
# reconf_ener - Energizer Cluster Reconfiguration Programs
#
# The reconf_ener provides framework for adding "reconfiguration
# programs" required by the PDD modules and by other applications
# involved in the Cluster Framework.
#

# defined for i18n
TEXTDOMAIN=reconf_ener; export TEXTDOMAIN
TEXTDOMAINDIR=/opt/SUNWcluster/locale; export TEXTDOMAINDIR
#TEXTDOMAINDIR=${ROOT}/opt/SUNWcluster/locale; export TEXTDOMAINDIR

# should be used only the reconf_framework.
pre="SUNWcluster.reconf"
export RECONF_DIR=/opt/SUNWcluster/etc/reconf/conf.d
export RECONF_SCRIPTS=/opt/SUNWcluster/etc/reconf/scripts/
reconf_error_file=/opt/SUNWcluster/etc/reconf/.reconf_ener_error

INCLUDE=.
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.disks
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.common
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.networks

#
# This Error Number needs to be consistent with other
# components, a common include file should be used
# by energizer components.
RECONF_REQUIRED_ERR=205


######################################################################
# init - Initialize the Environment for execution of reconf programs.#
######################################################################
function init
{
	# set require path/common directories for execution	
	#
	# All subsequent variables that are defined are automatically exported.
	set -a

        LD_LIBRARY_PATH=/opt/SUNWcluster/lib
	CLUSTERBIN=/opt/SUNWcluster/bin
	CLUSTERETC=/etc/opt/SUNWcluster
	CLUSTERVAR=/var/opt/SUNWcluster
	SSACLI=${CLUSTERBIN}/scssa
	PATH=/usr/sbin:/usr/bin/:${CLUSTERBIN}:/usr/ccs/bin/:/bin/:/sbin

	# export CDB and LOGfile path.
	cdbfile=${CLUSTERETC}/conf/${clustname}.cdb
        ccdfile=${CLUSTERETC}/conf/ccd.database.init
        cdbfilter=${CLUSTERETC}/conf/cdb.filter
        tmpdir=$(enmatch env.tmpdir)
        admindir=${CLUSTERVAR}/admindir
	logfile=${CLUSTERVAR}/scadmin.log
	# needs to be in sync with scadmin

	# export variable w.r.t nodes/network interface which are 
	# statically determined from CDB/CCD file.

	numnodes=$(enmatch cmm.nodes)
	net_if_type=$(enmatch cluster.net_if_type)

	# set core SC applications available.(pdbapps)
	# Convert the Bit Mapping into Modules. At a Later
	# stage we can use module names only and remove bit mapping.
	# can do a ccdmatch to get the modules listed , instead
	# of doing multiple Appmatches.

	# Assume All modules configured.
	MODNOTCONFIG=""

        pdbapps=$(enmatch cluster.pdbapps)
	vm=$(${CLUSTERBIN}/appmatch ${pdbapps} ${CVM})
        if [ "${vm}" = "1" ]; then
                vm=cvm
        else
		MODNOTCONFIG="${MODNOTCONFIG} cvm"
                vm=$(${CLUSTERBIN}/appmatch ${pdbapps} ${VxVM})
                if [[ "${vm}" = "1" ]]; then
                        vm=vxvm
                else
			vm=$(${CLUSTERBIN}/appmatch ${pdbapps} ${SDS})
			if [[ "${vm}" = "1" ]]; then
				vm=sds
				# If the volume manager is SDS,
				# we don't need to use the TC for
				# failure fencing, so we turn off
				# the tcmon module.  We don't require
				# or support the use of a quorum device
				# in SDS configurations, either.
				MODNOTCONFIG="${MODNOTCONFIG} tcmon quorum"
			else
                                log_info "$pre.4340" \
                                    "Volume manager is improperly configured"
				exit 1
			fi
		fi
        fi

	#
	# If TC is not configured (for any node), don't run it
	#
	integer i=0;
	while [ $i -lt $numnodes ]; do
		if [ "$(enmatch cluster.node.${i}.tc_ssp.port)" ]; then
			break
		fi
		((i+=1))
	done
	if [ $i -eq $numnodes ]; then
		MODNOTCONFIG="${MODNOTCONFIG} tcmon"
	fi

        udlm=$(${CLUSTERBIN}/appmatch ${pdbapps} ${OPS})
	if [ ${udlm} -eq 0 ]; then
		MODNOTCONFIG="${MODNOTCONFIG} udlm"
	fi

        ccd=$(${CLUSTERBIN}/appmatch ${pdbapps} ${CCD})
	if [ ${ccd} -eq 0 ]; then
		MODNOTCONFIG="${MODNOTCONFIG} ccd"
	fi

	# set it in a array to once so that allocation is reduced
	# while checking. 
	set -A MODULES ${MODNOTCONFIG}

	# Create static files pathnames used in synchronization.
	# of various reconf programs
	# export CCM selected files.
        ccm_selected_net_file=$(${CLUSTERBIN}/cdbmatch ccm.script.net.file ${cdbfile})
        ccm_top_net_file=$(${CLUSTERBIN}/cdbmatch ccm.script.topnet.file ${cdbfile})

        # export variables used by cluster applications.
        DOINGSTOPFLAG=${tmpdir}/didstopprogs
        	# hooks for execution of user cluster applications
        RESERVEDCTLS=${tmpdir}/ssa_is_reserved
                # control disk reservation
        FORCESTARTFLAG=${tmpdir}/do_not_use_ssa_reservations
                # ok to continue without majority quorum?
        ISRUNNINGFLAG=${tmpdir}/cluster_is_running
                # is cluster is already online
	CMMSEQNUM=$(${CLUSTERBIN}/cdbmatch cmm.sequencefile ${cdbfile})
        if [ -z "$CMMSEQNUM"   ]; then   
		CMMSEQNUM=/var/opt/SUNWcluster/cmm/reconf-seqnum.${clustname}; 
	fi

	# create temprorary file locations
	if [ -z "$tmpdir" ]; then 
		tmpdir=${CLUSTERVAR}; 
	fi
	if [ ! -d $tmpdir ]; then 
		mkdir -p $tmpdir; 
	fi
	set +a
}

#####################################################################
# validate_env_vars:                                                #
#  This function verifies that the required env variables are set by#
#  clustd, Otherwise it will get it from clustm.                    #
#####################################################################
function validate_env_vars
{
	set -a 

	if [ "X${CURRNODES}" = "X" ]; then
                currnodes=$(${CLUSTERBIN}/clustm getcurrmembers $clustname)
        else
                currnodes=${CURRNODES}
        fi
        if [ "X${LOCALNODEID}" = "X" ]; then
                localnodeid=$(${CLUSTERBIN}/clustm getlocalnodeid $clustname)
        else
                localnodeid=${LOCALNODEID}
        fi
        if [ "X${SEQNUM}" = "X" ]; then
                seqnum=$(${CLUSTERBIN}/clustm getseqnum $clustname)
        else
                seqnum=${SEQNUM}
        fi
        if [ "X${ALLNODES}" = "X" ]; then
                allnodes=$(${CLUSTERBIN}/clustm getallnodes ${clustname})
        else
                allnodes=${ALLNODES}
        fi
	set +a
}

##########################################################
# Usage:  Echo the correct usage of the program.         #
##########################################################
_usage() {
	lmsg=`gettext "\
Usage: 	%s [-a] [-f] startnode cluster_name\n\
	%s [-a]      stopnode  cluster_name\n\n\
	The [startnode|stopnode] subcommands are invoked\n\
	manually by the system administrator or automatically\n\
	from /etc/rc3.d scripts via the 'scadmin' script.\n\n\
	The reldisks subcommand is invoked manually by the\n\
	system administrator, usually by the \'scadmin\' script."`	 	
	printf "${lmsg}\n" "${prog}" "${prog}" 
        exit 2

        # The following are not public entrypoints
	lmsg=`gettext "\
	%s [cmmstart|cmmstop|cmmabort]	cluster_name\n\
	%s [cmmstep0-9|cmmreturn]	cluster_name\n\
	%s reldisks cluster_name\n\
	%s resdisks cluster_name\n\n\
	The [cmmstart|cmmstepN|cmmstop|cmmabort|cmmreturn]\n\
	commands are invoked by the cluster membership\n\
	monitor during cluster state transitions"`
	printf "${lmsg}\n" "${prog}" "${prog}" "${prog}" "${prog}"
        exit 2
}

###################################################################
# Reconfiguration FrameWork for executing reconf programs         #
###################################################################
function check_execution_error
{
        case $2 in

           0)   # success

                #log_info "$pre.4050"  \
                #        "$1 completed successfully in ${CURRSTEP}";
		# for now store at pdbadmin.log
		lmsg=`gettext "%s %s.1050 %s completed successfully in %s"`
		printf "${lmsg}\n" "$(/bin/date)" "${pre}" "$1" "${CURRSTEP}"
                return 0;;
 
           200)   # status ignore
                log_info "$pre.5001"  \
                        "$1 errors status ignored in ${CURRSTEP}" ;
                echo $1 >> ${reconf_error_file} 2>/dev/null;
                return 0;;

           ${RECONF_REQUIRED_ERR})
		log_info "$pre.5002" \
			"$1 requests reconfiguration in ${CURRSTEP}";
		if [[	"${CURRSTEP}" = "cmmabort" || \
			"${CURRSTEP}" = "stopnode" ]] ; then
		    log_info "$pre.4051" "$1 exited with 205 in ${CURRSTEP}" ;
		    sleep 1
		    exit 1
		elif [[	"${CURRSTEP}" != "cmmstart" && \
			"${CURRSTEP}" != "cmmreturn" && \
			"${CURRSTEP}" != "startnode" ]] ; then
                   # Reconfigure Status
                   # Means that after the step completes, send
                   # 205 to cmm so that it can reconfigure.
		    reconf_required=1
		fi
		return 0;;
 
           *)   # fail all other cases.
                log_info "$pre.4051" "$1 exited with $2 in ${CURRSTEP}" ;
		# to make sure that it is logged in syslog.
		sleep 1
                exit 1;;
        esac
 
}

##############################################################
#  Check if the module needs to be executed.                 #
#  return 1 if module needs to be  executed.                 #
#  return 0 if module is not configured to be executed OR    #
#           is in reconfiguration error file                 #
##############################################################
function check_module_configured
{
	integer i
	integer found

	set +e

	modname=$1
	if [ -z ${modname} ]; then
		return 0
	fi

	i=0
	found=1
	while [ ! -z ${MODULES[i]} ]
	do
	   if [ ${MODULES[$i]} = ${modname} ]; then
		# name found in not configured list.
		return 0;
	   fi
	   i=i+1
	done

	grep ${modname} ${reconf_error_file}  2>/dev/null
	if [ $? -eq 0 ]
	then
		found=0
	fi

	return ${found};
}

########################################################################
#								       #
# Execute Reconfiguration Programs:                                    #
#                                                                      #
# Parameter 1: Directory From which we have to execute the             #
#              Reconfiguration Programs.                               #
# Variables  :                                                         #
#   reconf_prog:                                                       #
#        Reconfiguring Programs for StepN                              #
#   current_reconf_prog:                                               #
#        Reconfiguration Programs for Current executionsequence        #
#   cur_seqno,seqno: Execution sequence Numbers.                       #
#   count: count is the number of programs for concurrent execution.   #
#                                                                      #
########################################################################
function execute_reconf
{
	integer i
	integer count

	reconfdir=$1
	set +e

	#
	# check for the existence of the script file.
	#
	if [ ! -d ${reconfdir} ]
	then
   		#echo "No actions for this step"
   		return 0
	fi
	#
	# Get  the complete listing og the actions to be 
	# to be executed for step N
	cd $reconfdir   
	CURDIR=${PWD}
	set -A reconf_prog $(/bin/ls ??_* 2>/dev/null)

	reconf_required=0


	# set the index to start of the array
	i=0
	while [ ! -z  ${reconf_prog[i]} ]
	do
        	prog=${reconf_prog[i]}
        	#
        	# extract sequence number and module name.
        	#
        	component=${prog#*_}
        	cur_seqno=${prog%%_*}
		count=1
		current_reconf_prog=${reconf_prog[i]}
		i=i+1
		check_module_configured ${component}
		if [ $? -eq 0 ]; then
			 continue;
		fi
		# collect the items with the same sequence number.
		while [ ! -z ${reconf_prog[i]} ]
        	do
            		prog=${reconf_prog[i]}
	    		seqno=${prog%%_*}
			component=${prog#*_}
            		if [ ${cur_seqno} -eq ${seqno} ]
			then
			# check for error in reconf_error_file.
			   check_module_configured ${component}
			   if [ $? -ne 0 ]; then
                 	       current_reconf_prog="${current_reconf_prog} ${reconf_prog[$i]}"
                 	       count=count+1
                	   fi
                        else
		           # No more reconf programs with same seqno.
		          break;
	                fi
	                i=i+1
	        done

		# Now execute the reconf programs.
		if [ ${count} -eq 1 ] 
		then
                    component=${current_reconf_prog#*_}
		    lmsg=`gettext "%s %s.1340 %s started in %s"`
		    printf "${lmsg}\n" "$(/bin/date)" "$pre" \
			   "${component}" "${CURRSTEP}"
	            eval ${CURDIR}/${current_reconf_prog}
		    error=$?
                    check_execution_error ${component} ${error}
		else
		    # Multiple reconfiguration programs to be 
		    # executed.
	   	    set -A execute_items ${current_reconf_prog}
                    integer execute=0
                    while [ ! -z ${execute_items[execute]} ]
                    do 
		       prog=${execute_items[execute]}
                       component=${prog#*_}
		       # execute the items
		       rm -rf ${tmpdir}/${component}.${CURRSTEP}
			lmsg="`gettext '%s %s.1340\n\
%s started in %s'`"	 
			printf "${lmsg}\n" "$(/bin/date)" "$pre" \
			       "${component}" "${CURRSTEP}"
                       (eval ${CURDIR}/${prog} ||  \
                            echo $? > ${tmpdir}/${component}.${CURRSTEP}) &
                       execute=execute+1
                    done   
	            # wait for the result
                    wait
                    # check for the results using the log file.
                    execute=0
                    while [ ! -z ${execute_items[execute]} ]
                    do 
	               prog=${execute_items[execute]}
                       component=${prog#*_}
		       error=0
                       if [ -f ${tmpdir}/${component}.${CURRSTEP} ]
                       then
			     error=$(cat ${tmpdir}/${component}.${CURRSTEP})
                       fi
                       check_execution_error ${component} ${error}
                       execute=execute+1
                    done
	     fi
	     # 
             # if reconfiguration was required by the component
             # then do exeuctue any more components in the step
             # and exit with RECONF_REQUIRED_ERR;
             #
             if [ ${reconf_required} -eq 1 ]; then
                  exit ${RECONF_REQUIRED_ERR};
             fi

      done
	set -e
}

############################################################
#  Handle All CMM-Transactions requested by clustd         #
############################################################
# join the localnode to the cluster.
function startnode_cmd
{
	log_trace  startnode
	
	#
	# sanity checks for a well configured Node.
	# It could also include <timeout-Calculation>.
	# 
	if [ -f ${ISRUNNINGFLAG} ]; then
           	# check if clustd is really running
                ${CLUSTERBIN}/timed_run -q 3 ${CLUSTERBIN}/clustm getstate \
			${clustname} >/dev/null 2>&1 &&  ( 
	lmsg="`gettext ' This node is already running as part of the %s \
cluster'`"
	printf "${lmsg}\n" "${clustname}" >&3
			/bin/rm -f ${tmpdir}/startcluster
                        exit 1)
        fi
	#
	#  But if they are installed by packages then it must be 
	#  Ok.
	execute_reconf ${RECONF_DIR}/rcI.d/

	#
	# Make Sure that the node is present, in the database.
	#
        nodename=$(eval /bin/uname -n)
        integer i=0;
        while [ $i -lt $numnodes ]; do
                if [ $(enmatch cluster.node.$i.hostname) = ${nodename} ]; then
                        break;
                fi
                i=i+1
        done

	touch ${ISRUNNINGFLAG}
        log_info "$pre.1150" "Starting Sun Cluster: node $i ($nodename) joining
the ${clustname} cluster"
	lmsg="`gettext 'Starting Sun Cluster software - joining the %s \
cluster '`"
	printf "${lmsg}\n" "${clustname}" >&3

	# Clean Up Stale Files.
	if [ -f ${ccm_selected_net_file} ]; then
		/bin/rm -f ${ccm_selected_net_file}
	fi
	/usr/bin/rm -f ${CMMSEQNUM}
	/usr/bin/rm -f ${tmpdir}/nodelock.$i

	log_trace start_networks
	start_networks
	log_trace_end start_networks

	# stdout/err are redirected by caller to the logfile
        ${CLUSTERBIN}/clustd ${newcluster} -f ${cdbfile}

	/bin/rm -f ${DOINGSTOPFLAG}

	# unless the '-a' flag was specified, wait for the node to do
        # the first reconfiguration.
        if [ "${async}" != 1 ]; then
                while [ -f  ${ISRUNNINGFLAG} ]; do
                        # check if clustd is still running
                        state=$(${CLUSTERBIN}/timed_run -q 20 ${CLUSTERBIN}/clustm getstate ${clustname} 2>/dev/null) ||\
                                 exit 1
                        if [ "$state" = "end" ]; then
                                break
                        else
                                sleep 3
                        fi
                done
        fi
        if [ ! -f  ${ISRUNNINGFLAG} ]; then
                exit 1
        fi
	log_trace_end startnode
}

# pdb cluster graceful shutdown
function stopnode_cmd
{
	log_trace stopnode

        if [ ! -f ${ISRUNNINGFLAG} ]; then
		lmsg="`gettext 'The Sun Cluster software is not currently \
running on this node'`"
		printf "${lmsg}\n" >&3
                exit 1  # for bugID 1166404
        fi
	lmsg="`gettext 'Stopping the Sun Cluster software - leaving the \
%s cluster'`"
	printf "${lmsg}\n" "${clustname}" >&3 

	set -a
	currnodes=$(${CLUSTERBIN}/clustm getcurrmembers ${clustname})
        localnodeid=$(${CLUSTERBIN}/clustm getlocalnodeid ${clustname})
        allnodes=$(${CLUSTERBIN}/clustm getallnodes ${clustname})
	set +a
	touch ${DOINGSTOPFLAG}

	# handle rcK.d stopnode transaction.
	execute_reconf ${RECONF_DIR}/rcK.d/

	${CLUSTERBIN}/clustm stop ${clustname} this

        # unless the '-a' flag was specified, wait for the node to shutdown
        if [ "${async}" != 1 ]; then
                while [ -f ${ISRUNNINGFLAG} ]; do
                        sleep 1
                done
        fi

	if [ "$currnodes" = "$localnodeid" ]; then
		lmsg="`gettext 'The %s cluster has no active hosts.'`"
		printf "${lmsg}\n" "${clustname}" >&3
	fi
        #log_info "$pre.1070" "${clustname} cluster is stopped on this node"
        log_trace_end stopnode
}

########################################################################
# called from cluster membership monitor "abort" and "stop" transitions
########################################################################
function cmmabort_cmd
{
	log_trace cmmabort_cmd

        log_info "${pre}.1200" \
                "Reconfiguration step abort started"
	validate_env_vars
	export CURRSTEP=cmmabort

	rm -rf ${reconf_error_file}

	# Handle RcA.d Trasactions
	execute_reconf ${RECONF_DIR}/rcA.d/
	sleep 2
	log_trace stop_networks
        stop_networks
	log_trace_end stop_networks

        /bin/rm -rf ${ISRUNNINGFLAG}
 
	log_info "${pre}.1201" \
		"Reconfiguration step abort completed"
        log_trace_end cmmabort_cmd
}

function cmmreturn_cmd
{
	log_trace cmmreturn_cmd

	log_info "${pre}.1200" \
		"Reconfiguration step return started"

	validate_env_vars
	# Handle rcR.d
	execute_reconf ${RECONF_DIR}/rcR.d/

	log_info "${pre}.1200" \
		"Reconfiguration step return completed"

	log_trace cmmreturn_cmd
}

function cmmstart_cmd
{
        log_trace cmmstart_cmd

        validate_env_vars
	rm -rf ${reconf_error_file}

	log_info "${pre}.1200" \
		"Reconfiguration step start started"
	# Handle rcS.d transactions
	execute_reconf ${RECONF_DIR}/rcS.d/

	log_info "${pre}.1201" \
		"Reconfiguration step start completed"
        log_trace_end cmmstart_cmd
}

function cmmstep1_cmd
{
	validate_env_vars

	log_info "${pre}.1200" \
		"Reconfiguration Step 1 started"
	set -A nodes $(echo ${currnodes})
	integer i=0
	names=""
	while [ ! -z ${nodes[i]} ]
	do
		thisname=$(enmatch cluster.node.${nodes[i]}.hostname)
		names="$names ${thisname}"
		i=i+1
	done
	log_info "$pre.1120" \
		"${clustname} reconfiguration ${seqnum} started on ${names}"

	if [ "${currnodes}" = "${localnodeid}" ]; then
		# the cluster has only one node. We can no longer 
		# tell which  nets are active.
		/usr/bin/rm -f ${ccm_top_net_file}
	fi

	execute_reconf ${RECONF_DIR}/rc1.d/

	if [ -f ${admindir}/reserve.pid ]; then
		pid=$(cat ${admindir}/reserve.pid)
		kill -KILL ${pid} || echo $? > /dev/null 2>&1
		/bin/rm -fr ${admindir}
	fi

	log_info "${pre}.1201" \
		"Reconfiguration Step 1 completed"
}

function common_cmmstep
{
	log_trace  common_cmmstep


	step=$1
	validate_env_vars
	CMMPREFIX=cmmstep
	#
	# Extract "number"
	#
	step_number=${step##$CMMPREFIX}
	#
	# Extract the number of Steps for CDB
	#
	log_info "${pre}.1200" \
		"Reconfiguration Step ${step_number} started"
	last_step=$(enmatch cmm.transition.steps)
	reconfdir=${RECONF_DIR}/rc${step_number}.d/
	execute_reconf ${reconfdir}

	log_info "${pre}.1201" \
		"Reconfiguration Step ${step_number} completed"
	if [ $step_number -eq $last_step ]
	then
		eval cmmend_step;
	fi
	log_trace_end common_cmmstep
}

function cmmend_step
{
        set +e
        validate_env_vars

        set -e
	# Handle rcP.d (post reconfiguration script).
	reconfdir=${RECONF_DIR}/rcP.d/
	execute_reconf ${reconfdir}
}

#
#  End of All Functions 
#
# get program options
set -- $(getopt afn $*)
if [ $? != 0 ]; then
        _usage
fi

scluster=0
for i in  $*; do
        case $i in
        -a) async=1; shift ;;
        -f) forcestart=1; shift ;;
        -n) newcluster="-n"; scluster=1; shift ;;
        --) shift; break;;
        esac
done
 
prog=$0
cmd=$1
export clustname=$2
export CURRSTEP=$cmd

# Cluster Application Bit Assignment for cluster.pdbapps vector in cdb file.
OPS=0
CVM=3
VxVM=4
SDS=5
CCD=7

set -e
init $*

if [ $scluster -eq  1 ]; then
	/bin/touch ${tmpdir}/startcluster
fi

#
# Dispatch the call.
#
# We are carefully passing file descriptor 2 from "startnode" to clustd
# and then from clustd to the "cmm???" calls.
# file descriptor #3 is the original stdout if you need to send
# messages to the interactive user.
#
case ${cmd} in
        # interactive commands
        startnode)              startnode_cmd    3>&1 1>>${logfile} 2>&1 ;;
        stopnode)               stopnode_cmd     3>&1 1>>${logfile} 2>&1 ;;
 
        # async commands - not interactive
        cmmstop | cmmabort)     cmmabort_cmd     3>&1 1>>${logfile} 2>&1 ;;

	cmmstart | cmmreturn | cmmstep1) \
				eval ${cmd}_cmd  3>&1 1>>${logfile} 2>&1 ;;
	cmmstep[2-9]| cmmstep[0-9][0-9])
                                common_cmmstep $cmd 3>&1 1>>${logfile} 2>&1 ;;
        resdisks)               reserve_all_shared_devs 3>&1 1>>${logfile} 2>&1
;;
        reldisks)               release_all_shared_devs 3>&1 1>>${logfile} 2>&1
;;
 
        *)                      _usage                              ;;
esac
exit 0
