#! /bin/ksh
#
#pragma ident "@(#)nshttp_probe.shi 1.9     01/03/28 SMI"
#
#
#	Copyright 12/20/96 Sun Microsystems, Inc.  All Rights Reserved.
#

# Usage: nshttp_probe <instance name>
# Started up in the background via pmfd in nshttp_fm_start
# during reconfiguration.

#
# Add the path to framework binaries, since the probe is not called in the
# context of the methods
#
PATH=${PATH}

INST_NAME=$1

#
#	Copyright 11/18/96 Sun Microsystems, Inc.  All Rights Reserved.
#
#
#pragma ident "@(#)ds_boiler	1.1 97/06/12 SMI"
#
#ident "@(#)ds_boiler		1.7	96/11/18 SMI"
#
# common boiler for HA Internet Pro data services
#
#


ARGV0=`basename $0`
LOGGER=logger
HA_SLOGFACILITY=`haget -f syslog_facility`
HA_SLOGTAG=hadf
prog_path=`dirname $0`

# source in ha-services common utilities
. ds_utilities

# add the ha-service specific clust_progs
expr "$prog_path" : '.*/clust_progs' >/dev/null 2>&1
if [ $? -eq 0 ]; then
	PATH=${prog_path}:${PATH}
else
	PATH=${prog_path}:${prog_path}/../clust_progs:${PATH}
fi

# add the ha-service specific fault_progs
expr "$prog_path" : '.*/fault_progs' >/dev/null 2>&1
if [ $? -eq 0 ]; then
    PATH=${prog_path}:${PATH}
else
    PATH=${prog_path}:${prog_path}/../fault_progs:${PATH}
fi

#
# for use by subsequent hactl command, get hostnames of local and remote hosts
#
REMOTEHOSTS=
LOCALHOST=`uname -n`

if [ $? -ne 0 ]; then
	logerr `gettext "Cannot obtain name of local host"`
	exit 1
fi
# compute hostnames of remote nodes
PHYS_HOSTS="`haget -f all_physical_hosts`"
for i in $PHYS_HOSTS; do
	if [ "$i" != "$LOCALHOST" ]; then
		REMOTEHOSTS="$REMOTEHOSTS $i"
	fi
done
#! /bin/sh 
#
#	Copyright 12/20/96 Sun Microsystems, Inc.  All Rights Reserved.
#
#
#pragma ident "@(#)do_service	1.8 01/06/08 SMI"
#
#ident "@(#)do_service		1.13	96/12/20 SMI"
#
#

ARG_MASTERED=$1
ARG_NOT_MASTERED=$2
SYSLOG_PREFIX="SUNWcluster.ha.nshttp"

# Replace comma with space to form an sh word list
MASTERED="`echo $ARG_MASTERED | tr ',' ' '`"
NOT_MASTERED="`echo $ARG_NOT_MASTERED | tr ',' ' '`"

source_env NSHTTP

if [ $? -ne 0 ]; then
	# source_env logs error message if it fails.
	# No need to log another; just exit.
	exit 1
fi

#
# Timeout to waiting for SIGTERM to stop a process
# This should be in the config file
#
STOP_TIMEOUT=15
#
# bundle_do_svc <action>
#
# is called for each instance
#
bundle_do_svc ()
{
	typeset method_timeout
	typeset wait_time

	action=$1

	HTTP_START=${_INST_BASE_DIR}/start
	HTTP_STOP=${_INST_BASE_DIR}/stop

	prefix="$SYSLOG_PREFIX.$action"

	case $action in

	'start')

		# First do some error checking.

		if [ ! -x $HTTP_START ]; then
			logerr "$prefix.4000" \
				`gettext "<$HTTP_START> is not executable."`
			exit 1
		fi

		# Note that we're using pmf to start/stop, but not to probe.

		grep "Security on" ${_INST_BASE_DIR}/config/magnus.conf
		if [ $? -eq 0 ] ;
		then
			if [ ! -f  ${_INST_BASE_DIR}/keyPass ] ; then
				logerr "$prefix.4011" \
					`gettext "Password File not found"`
				exit 1
			fi
			HTTP_START_PARAM=0
		else
			HTTP_START_PARAM=1
		fi


		if [ ${_INST_RETRY} = "n" ]; then

			if [ $HTTP_START_PARAM -eq 1 ]; then
				pmfadm -c ${_INST_NAME} $HTTP_START
			else
				pmfadm -c ${_INST_NAME} /bin/sh -c \
                                        "$HTTP_START < ${_INST_BASE_DIR}/keyPass "
			fi
				
		else
			if [ $HTTP_START_PARAM  -eq 1 ]; then
				pmfadm -c ${_INST_NAME}          \
				       -n ${_INST_RETRY_TIMES}    \
				       -t ${_INST_RETRY_INTERVAL} \
				       -a ${_INST_PROBE_CALLBACK_1} \
				       $HTTP_START
			else
				pmfadm -c ${_INST_NAME}          \
				       -n ${_INST_RETRY_TIMES}    \
				       -t ${_INST_RETRY_INTERVAL} \
				       -a ${_INST_PROBE_CALLBACK_1} \
					 /bin/sh -c \
					"$HTTP_START < ${_INST_BASE_DIR}/keyPass "
			fi
		fi

		if [ $? -ne 0 ]; then
			logerr "$prefix.4001" \
				`gettext "pmfadm failed to start HTTP instance ${_INST_NAME}"`
			exit 1
		else
			lognotice "$prefix.2000" \
				`gettext "Started HTTP instance ${_INST_NAME}"`

		fi
	;;

	'stop' | 'abort')

		# delete from queue, but don't kill
		pmfadm -s ${_INST_NAME}
		if [ $? -ne 0 ]; then
			logerr "$prefix.4002" \
				`gettext "pmfadm failed to delete ${_INST_NAME} from queue"`
			exit 1
		fi

		# use Netscape's stop script to stop nshttp instance
		if [ ! -x $HTTP_STOP ]; then
			logerr "$prefix.4003" `gettext "<$HTTP_STOP> is not executable"`
			exit 1
		fi
		
		method_timeout=`hareg -q nshttp -T stop`
		wait_time=`expr $method_timeout - 5`

		hatimerun -t $wait_time $HTTP_STOP

		ha_svc_not_running ${_INST_NAME}

		if [[ $? -ne 0 ]]; then
        	# Now kill any processes left out
        	pmfadm -s ${_INST_NAME} KILL
        	if [ $? -ne 0 ]; then
            	logerr "$prefix.4009" \
                	`gettext "pmfadm failed to kill ${_INST_NAME}'s process and its sub-processes (if any) : pmfadm returned $?"`
            	exit 1
        	fi
		fi

	;;

	'fm_start')

		# XXX
		need_to_run_probe ${_INST_LOGICAL_HOST} ${LOCALHOST}

		if [ $? -ne 0 ]; then
			exit 0
		fi

		# pmf starts nshttp_probe
		# nshttp_probe runs until nshttp_fm_stop kills it.
		# Don't start probe if diskset is in maintenance mode.

		# If this HTTP instance's diskset is in maint mode, exit now.
		MAINT=`haget -f is_maint -h ${_INST_LOGICAL_HOST}`
		if [ "$MAINT" = "1" ]; then
			exit 0
		fi

		pmfadm -c ${_INST_NAME}.probe ${_INST_PROBE_PROG_1} \
		    ${_INST_NAME}

		if [ $? -ne 0 ]; then
			logerr "$prefix.4005" \
	`gettext "pmfadm failed to start HTTP probe for instance ${_INST_NAME}"`
			exit 1
		else
			lognotice "$prefix.2002" \
		`gettext "Started HTTP probe instance ${_INST_NAME}.probe"`
		fi
	;;

	'fm_stop')

		# If probe not running, do nothing
		ha_svc_not_running ${_INST_NAME}.probe && exit 0

		# pmf kills nshttp_probe
		pmfadm -s ${_INST_NAME}.probe -w ${STOP_TIMEOUT} TERM || \
			pmfadm -s ${_INST_NAME}.probe KILL
		if [ $? -ne 0 ]; then
			logerr "$prefix.4006" \
`gettext "pmfadm failed to stop HTTP probe instance ${_INST_NAME}.probe"`
			exit 1
		else
			lognotice "$prefix.2003" \
		`gettext "Stopped HTTP probe instance ${_INST_NAME}.probe"`
		fi
	;;

	'fm_check_this_host_ok')

		# If the HA-HTTP logical host for this instance
		# is not currently mastered by this machine, exit now.

		is_member "${_INST_LOGICAL_HOST}" "$MASTERED"
		if [ $? -ne 0 ]; then
			exit 0
		fi

		# Otherwise, probe Web service now.
		# If dead, request will time out in
		# ${_INST_PROBE_TIMEOUT_1} secs.

		HTTPPROBEFILE=/var/opt/SUNWcluster/run/${_INST_NAME}.probe.$$

		hatimerun -t ${_INST_PROBE_TIMEOUT_1} \
		    /usr/bin/telnet ${_INST_LOGICAL_HOST} \
		    ${_INST_PORT} <<EOF > $HTTPPROBEFILE 2>&1

EOF
		if [ $? -eq 99 ]; then
			# timeout
logerr "$prefix.4007" \
	`gettext "This server is supposed to be providing HTTP service for instance <${_INST_NAME}>, but request timed out"`
			exit 1
		fi
		grep refused $HTTPPROBEFILE > /dev/null 2>&1
		if [ $? -eq 0 ]; then
logerr "$prefix.4008" \
	`gettext "This server is supposed to be providing HTTP service for instance <${_INST_NAME}>, but isn't"`
			exit 1
		fi
	;;

	esac

	exit 0
}
#include_boiler

set_inst_name ${INST_NAME}

prefix="SUNWcluster.ha.nshttp.probe"

if [ -z "$INST_NAME" ]; then
	logerr "$prefix.4009" `gettext "Usage: $ARGV0 <instance>"`
	exit 1
fi


MASTERED_LOGICAL_HOSTS="`haget -f mastered`"

HTTP_PORT=`get_config_param $INST_NAME PORT`
# required parameter
if [ -z "$HTTP_PORT" ]; then
	logerr "$prefix.4010" \
	    `gettext "HTTP_PORT value not set for instance $INST_NAME"`
	exit 1
fi

HTTP_HOST=`get_config_param $INST_NAME LOGICAL_HOST`

# parser requires this to be set

HTTP_PROBE_INTERVAL=`get_config_param $INST_NAME PROBE_1_INTERVAL`
# parser requires this to be set, but doesn't check for negative values
if [ $HTTP_PROBE_INTERVAL -lt 0 ]; then
	lognotice "$prefix.2004" \
`gettext "INTERVAL value is negative for instance $INST_NAME; using 60 seconds"`
	HTTP_PROBE_INTERVAL=60
fi

HTTP_PROBE_TIMEOUT=`get_config_param $INST_NAME PROBE_1_TIMEOUT`
# optional parameter, parser doesn't check for <= 0 values
if [ -z "$HTTP_PROBE_TIMEOUT" ]; then
	lognotice "$prefix.2005" \
`gettext "TIMEOUT value not set for instance $INST_NAME; using 60 seconds"`
	HTTP_PROBE_TIMEOUT=60
fi
# what timeout value is too low?
if [ $HTTP_PROBE_TIMEOUT -le 0 ]; then
	lognotice "$prefix.2006" \
`gettext "TIMEOUT is <= zero for instance $INST_NAME; resetting to 60 seconds"`
	HTTP_PROBE_TIMEOUT=60
fi

HTTP_TAKEOVER=`get_config_param $INST_NAME PROBE_1_TAKEOVER`
# optional parameter
if [ -z "$HTTP_TAKEOVER" ]; then
	lognotice "$prefix.2007" \
	   `gettext "TAKEOVER value not set for instance $INST_NAME; using 'y'"`
	HTTP_TAKEOVER=y
fi

LOCAL=no
is_member "$HTTP_HOST" "$MASTERED_LOGICAL_HOSTS"
if [ $? -eq 0 ]; then
	# HTTP_HOST is running locally
	LOCAL=yes
fi

HTTPGRACE=0
HTTPPROBEFILE=/var/opt/SUNWcluster/run/${INST_NAME}.nshttp_probe
RETRY=0
FAIL=0

while : ; do

	# Take a nap here, instead of at the end of loop
	# At start-up, this gives more time to server to initialize itself

	sleep $HTTP_PROBE_INTERVAL

	hatimerun -t $HTTP_PROBE_TIMEOUT /usr/bin/telnet $HTTP_HOST $HTTP_PORT \
	    <<EOF > $HTTPPROBEFILE 2>&1

EOF
	telnet_result=$?
	grep refused $HTTPPROBEFILE > /dev/null 2>&1
	if [ $? -eq 0 -o $telnet_result -eq 99 -o $telnet_result -eq 98 ]; then
                FAIL=1
		# If running locally, restart it.
		# If it fails to restart successfully, the sibling
		# will eventually take over if TAKEOVER set to 'y'.
		if [ $LOCAL = "yes" ]; then
			logerr "$prefix.5001" \
			   `gettext "nshttp instance $INST_NAME failed locally"`
			RETRY=`expr $RETRY + 1`
			logerr "$prefix.5002" \
	`gettext "restarting nshttp instance $INST_NAME; restart number $RETRY"`
			nshttp_svc_start "$MASTERED_LOGICAL_HOSTS" ""
		else
			logerr "$prefix.5003" \
		`gettext "nshttp instance $INST_NAME failed on sibling"`
			# give sibling chance to restart nshttp
			# before doing a takeover
			if [ $HTTPGRACE -eq 0 ]; then
				HTTPGRACE=1
				continue
			fi
			if [ "$HTTP_TAKEOVER" = "y" ]; then
				# $REMOTEHOSTS set in ds_boiler
				# hactl is not yet available
				# TODO:	clean this up
				# Get "current master"
				CURRENT_MASTER="`haget -f master -h $HTTP_HOST`"
				#hactl -t -s nshttp -p $REMOTEHOST
				pmfadm -c ${INST_NAME}.hactl hactl -t -s nshttp \
					-l $HTTP_HOST

        	if [ $? -ne 0 ]; then
            	logerr "$prefix.4014" \
                	`gettext "pmfadm failed to launch hactl for ${INST_NAME}"`
            	exit 1
        	else
            	lognotice "$prefix.2008" \
                	`gettext "Launched hactl for ${INST_NAME}"`
			fi

			fi
		fi
	else
                if [ $FAIL -eq 1 ]; then
                        lognotice "$prefix.2009"\
                        `gettext "HTTP instance ${INST_NAME} is up and running"`
                fi
                FAIL=0
		HTTPGRACE=0
	fi

done
