#!/bin/ksh
#
#pragma ident   "@(#)nfs_probe_loghost.sh 1.19 01/11/05"
# Copyright (c) 1997,2001 by Sun Microsystems, Inc.
# All rights reserved.
#
##########################################################################
# nfs_probe_loghost.sh                                                   #
#                                                                        #
# This file contains routines for fault monitoring handling the list     #
# of logical host                                                        #
# It is invoked by the  nfs_fm_start.sh with the following paramaters    #
# parameters.                                                            #
# 	<LogicalHost> to be probed.					 #
#	<isremote> boolean to indicate if it is local/remote probe       #
#	<OnceOnly> boolean to indicate if we need to probe only once     #
#	<timeout>  timeout used to start the probe                       #
# Note: OnceOnly is supposed to be used only for HA_FM_CHECK method.        #
##########################################################################

######################nfs_probe_one_common_routines#########################
# All the Routines are related to nfs_probe_one_common_routines            #
############################################################################
function cleanup
{
	${FMBIN}/nfs_umount_all ${LOGHOST}
	exit 0
}

#########################################################################
#  Function  nosmbak.						        #
#								        #
#  checks /var/statmon/sm.bak and decides it NFSD/LOCKD needs to be     #
#  probed or not.							#
#  Usually doing it for local						#
#  For remote case should check can check it thru pmfadm	        #
#########################################################################
nosmbak()
{
   typeset nattempts sleeptime i j RC testls

   nattempts=8
   sleeptime=6

    #
    # Returns true (zero) if there are no files in /var/statmon/sm.bak
    # and if no errors occurred.
    #
    /bin/cp /bin/sh ${TMP} 2>/dev/null
    if [ ! -s ${TMP} ]; then
        exit1setstatus
    fi

   #
   # for remote case.
   # Try to USE pmfadm.
   #
   i=`echo ${FM_VALID_IPNAMES} | /usr/bin/awk ' { print $1 } '`
   j=0
   while [ $j -lt ${nattempts} ]
   do
	if [ ${ISREMOTE} -eq 0 ]; then
		RC=0

		# XXX : Need to check if this is required
        	# testls=`/bin/ls /var/statmon/sm.bak 2>/dev/null`
		# if [ -z ${testls} ]; then
		#	return 0
		# fi
		# RC=1
	else
		pmfadm -q "nfs_lockd_statd_recover" -h ${i} >/dev/null 2>&1
   		RC=$?
		if [ ${RC} -ne 0 ]; then
			# we assume that network is ok
			return 0;
		fi
		# pmfadm exists.
		# The lockd/statd may go way with 30 seconds , so wait
		# and check again.
	fi

	if [ ${j} -eq ${nattempts} ]; then
		# we have exhausted the retries. so return 1
		 return 1
	fi
	sleep ${sleeptime}
	j=`/bin/expr $j + 1`
   done
}

#########################################################################
# function : setsmbak							#
#     Sets the  HA_FM_NFS_SMBAK variable depending on statmon status    #
#########################################################################
setsmbak()
{
    if nosmbak ; then
        HA_FM_NFS_SMBAK=0
    else
        HA_FM_NFS_SMBAK=1
    fi
    export HA_FM_NFS_SMBAK
}

#########################################################################
# function : nfs_probe_one_common					#
#      This is the main function which handles the probing either       #
#      remote or end.                                                   #
#########################################################################
function nfs_probe_one_common
{	
	typeset rc clusterstate

        while :
        do
		if [ ${ISREMOTE} -eq 0 ]; then
                	${FMBIN}/nfs_check_my_lockd_statd
			if [ $? -ne 0 ]; then
				# No need to display error message. 
				# The failed command has displayed enough messages
				# we assume local restart we do the rest for US.
				sleep 60
				continue
			fi
		fi

		try_command 0 nfs_fault_monitor
		rc=$?
		if [[ ${ISREMOTE} -eq 1 && ${rc} -eq ${TAKEOVER} ]]; then
			# since hactl has been issued, wait here
			sleep 120
		fi

		clusterstate=`/opt/SUNWcluster/bin/clustm getstate \
				${clustname} 2>/dev/null`
		if [ $? -ne 0 ]; then 
       	      		exit 1; 
		fi 

		# The reason we have returned here is an
		# intermittent error which could be the file
		# system being Full.
       	         ${FMBIN}/filesystem_full -c ${prog} -w

		# sleep for some duration , hoping that things will turn up.
		if [ ${ISREMOTE} -eq 0 ]; then
			sleep 300
		else
			sleep 5
		fi
        done
}

#########################################################################
# nfs_fault_monitor:							#
#	Main Function which does the Fault monitoring			#
#########################################################################
function nfs_fault_monitor
{
    typeset RC IS_RETRY SAVESW

    mkdir -p ${fmdir}/nfs_mon
    chmod 755 ${fmdir}/nfs_mon

#    setsmbak

    IS_RETRY=$1

    # 
    # If this is a retry attempt, then save off the original
    # value of ONCESW so that it can be reset later.
    #
    if [ ${IS_RETRY} -eq 1 ]; then 
	SAVESW=${ONCESW}
	ONCESW="-1"
    fi

    if [ -h ${fmdir}/nfs_mon/${LOGHOST} ]; then
        rm ${fmdir}/nfs_mon/${LOGHOST}
    fi
    if [ -f ${fmdir}/nfs_mon/${LOGHOST} ]; then
	mv ${fmdir}/nfs_mon/${LOGHOST} ${fmdir}/nfs_mon/${LOGHOST}.bak 2>/dev/null
    fi

    ${FMBIN}/nfs_mon ${ONCESW} ${clustname} ${LOGHOST} ${ISREMOTE} \
		2>>${fmdir}/nfs_mon/${LOGHOST}
    RC=$?

    #
    # Reset the ONCESW flag back to its original value.
    # Only do this if the IS_RETRY was set to 1. These will preserve
    # the semantics of the ONCEONLY flag.
    #
    if [ ${IS_RETRY} -eq 1 ]; then 
	ONCESW=${SAVESW}
    fi
    return ${RC}
}

##########################################################################
# Usage: setstatus identifier                                            #
# Record status for the hastat_nfs command.                              #
#	setstatus:							 #
#	csetstatus_unknown()						 #
#	exit1setstatus()						 #
# All Status Related Routines.						 #
##########################################################################
setstatus()
{
    STATUS="$1"
#
# check for symbolic link
#
    if [ -h ${STATUSFILE} ]; then
        log_error "${STATUSFILE} is a symbolic link - deleting it"
        rm -f ${STATUSFILE}
    fi

    echo "${STATUS}" > ${STATUSFILE}
    if [ $? -ne 0 -o ! -s ${STATUSFILE} ]; then
        log_error "Could not write file ${STATUSFILE}"
        # Delete the file and let query routine map absent file to Unknown
        /bin/rm -f ${STATUSFILE}
        EXITCODE=1
    fi
    DIDSETSTATUS=1
}


csetstatus_unknown()
{
    # Usage: csetstatus_unknown
    # Conditional sets the status to "Unknown".  The condition
    # is that we only do it if the old value of status was neither
    # NotOk nor Degraded.  The assumption is that whatever code
    # set status to those values had better info.
    if [ "${STATUS}" != "NotOk" -a "${STATUS}" != "Degraded" ]; then
        setstatus Unknown
    fi
}      

exit1setstatus()
{
    setstatus Unknown
    cleanup
    exit 1
}


########################################################################
# diagcomm  					                       #
#								       #
# Noop if ISREMOTE is FALSE (just return).			       #
# Calls net_diagnose_comm.  Case on the answer:	                       #
#   UNSURE: we return.						       #
#   MYNETOKAY: we return normally.				       #
#   REMOTEOKAY: we return normally.				       #
#   TAKEOVER: net_diagnose_comm started takeover.                      #
# So Postcondition is MYNETOKAY | REMOTEOKAY			       #
########################################################################
diagcomm()
{

    typeset RC

    if [ ${ISREMOTE} -eq 0 ]; then
        return 0
    fi
    #
    # We will have to find as what is the exact reason that
    # NFS fault monitoring failed with TAKEOVER.
    # XXX: The main reason that we cannot call hactl is that if the
    # All the network is UP then we retry the NFS for a grace period.
    # If we call hactl and if the networks are up then it will do a takeover
    # which is not what we want.
    # The main reason why we want to call net_diagnose_comm is that if we
    # have network failure indicating remotenet is not okay then we can do a
    # a takeover immediately rather than waiting for grace period to get
    # over.
    #
    ${FMBIN}/net_diagnose_comm ${LOGHOST}
    RC=$?
    if [ ${RC} -eq ${REMOTEOKAY} -o ${RC} -eq ${MYNETOKAY} ]; then
		return 0
    fi

    if [ ${RC} -eq ${UNSURE} ]; then
        log_info "${pre}.1950" \
	"net_diagnose_comm returned UNSURE so inhibiting takeover"
	csetstatus_unknown
	return 0
    fi

    if [ ${RC} -eq ${TAKEOVER} ]; then
        setstatus NotOk
	# should call hactl here, since network diagnosis indicated that
	# this node has some problems and remote node is better than my node.
    	${CLUSTERBIN}/pmfadm -c "hactl_nfs_${LOGHOST}" \
    		${CLUSTERBIN}/hactl -t -s nfs -l ${LOGHOST} -L soft

	# We have to set a global Variable of exit status with
	# TAKEOVER_ISSUED so that probe_local/probe_remote  will
	# wait for about 180 seconds , before commencing the next
	# cycle of monitoring. this is required since there is no
	# quarantee that hactl will succeed.
	# hactl needs ito be issued thru pmfadm.
	return ${TAKEOVER}
    else
	# Since we are not sure as to what is happening, we exit from
	# the main routine and get back to nfs_probe_local/nfs_probe_remote
        csetstatus_unknown
	return ${RC}
    fi

}


##########################################################################
# Suppress_rpcbind_return                                                  #
#									 #
# Usage: suppressrpcbindreturn						 #
# Checks if we're configured to suppress takeover for rpcbind problems   #
# AND if other host's rpcbind isn't responding.  If both are true,	 #
# log msg that we're suppressing takeover and return 1.			 #
##########################################################################
suppress_rpcbind_return()
{
    UPRELOHOST=`echo ${FM_VALID_IPNAMES} | /usr/bin/awk ' { print $1 } '`
    if [ ${ISREMOTE} -eq 1 -a ${HA_FM_NFS_SUPPRESSTAKEOVER_RPCBIND} -eq 1 ]; then
	net_rpcprobe -T udp ${UPRELOHOST} ${RPCPROG_rpcbind}
	if [ $? -ne 0 ]; then
            log_info "${pre}.1800" \
		"rpcbind for host ${UPRELOHOST} is not responding and configured to suppress takeover"
            setstatus NotOk
            return 1
        fi
    fi
}

##########################################################################
# Function : have_expected_problem                                       #
# First, checks if the communication path via loghost is still	         #
# working.  If not, then we don't really have an NFS problem,		 #
# so return 0.  The next outer iteration will use a different hostname   #
# and will have the opportunity to find any NFS problems.		 #
#									 #
# Then, checks if the target machine has one or more of the 		 #
# following conditions:							 #
#   (1) if ISREMOTE==1, check whether my local lockd or statd are	 #
#       broken.								 #
#   (2) cannot use the name service on my local host.			 #
#       If the probing host's name service				 #
#       is working okay and the target's is not, then a takeover	 #
#       gives better availability to clients.  Therefore, the		 #
#       real test of the name service is whether this host		 #
#       (the probing host) is getting name service response, not whether #
#       the target is.							 #
# If any are true we return 0.						 #
# Otherwise (false or not sure) return 1.				 #
##########################################################################
have_expected_problem()
{
	typeset i rval

	if [ -z ${FM_VALID_IPNAMES} ] ; then
		log_error "${pre}.4501" "FM_VALID_IPNAMES not defined"
		return  0
	fi

	# Now If any of valid ipnames is not working then it
	# may not be an NFS problem. So we return 0.
	for i in ${FM_VALID_IPNAMES}
	do
		check_if_logical_ip_ok  ${i}
 		rval=$?
		if [ ${rval} -eq 1 ]; then
			# ipname not working.the caller will use the 
			# remaining set of ipnames.
			diagcomm 0
			return $?
		fi
	done

	if [ ${ISREMOTE} -eq 1 ]; then
		nfs_check_my_lockd_statd
		rval=$?
		if [ ${rval} -ne 0 ]; then
			return ${rval}
		fi
	fi

	checknameservice
	rval=$?
	if [ ${rval} -eq 99 ]; then
		log_error "${pre}.4600" \
			"name service not responding or taking too long"
		return ${rval}
	fi
	return 0
}

############################################################################
# Function try_command                                                     #
# Usage: try_command suppressTakeoverBool command     			   #
#									   #
# Tries the command,  with the hostname for the		                   #
# service inserted in the arg list as the very first arg.		   #
# Thus, try_command is a packaging up of a control structure               #
# that we need to use for several different commands.                      #
#							                   #
# The suppressTakeoverBool parameter is a boolean that		           #
# says whether or not we are configured to suppress takeovers		   #
# when this particular command fails.					   #
#									   #
# The command should be written to exit with the following exit codes:     #
#     0 for success,						           #
#     99 for takeover                                                      #
#     other non-zero for failure and takeover inappropriate.               #
#								           #
# An example of the other non-zero is a mount that fails with              #
# 'access denied'.							   #
#									   #
# What try_command then does depends on the exit status		           #
# of the command: 						           #
#									   #
# If zero, just return 0.						   #
# If not zero and not 99, log error and return the value.		   #
#									   #
# Otherwise, try_command does some diagnosis of what might be		   #
# the underlying problem, including calling have_expected_problem	   #
# and diagcomm.							           #
# If those potential problems check out as okay, then try_command          #
# sleeps some to let local restart on the remote host restart its          #
# daemons.  try_command then retries the command.                          #
# IMPORTANT: the fact that the retry of the command is done takes care of  #
# the following tricky situation:					   #
# The remote file system may be (1) under lockfs -h,			   #
# in which case nfsd doesn't respond at all for it, or (2) undergoing      #
# fsck.  In both these cases, a remote mount command will get              #
# "access denied" or estale.  Our mount probe will treat those             #
# error returns as not being grounds for takeover.                         #
#									   #
# To handle a multi-homed host, which might have some network interfaces   #
# working and some not, try_command tries the command at		   #
# least twice, with an intervening call to uprelohostname, before          #
# doing a takeover.  (The logic to decide whether a some working           #
# network interfaces and some not working should cause a takeover          #
# is elsewhere, in the networking fault probes.)                           #
#									   #
# The code path is simpler when ISREMOTE is FALSE, because we aren't       #
# trying to decide whether to do takeover or assisted suicide, all         #
# we are doing is logging problems.					   #
############################################################################
try_command()
{
    typeset CMD CMDPROG CMDARGS rval
    typeset IS_RETRY

    IS_RETRY=0
    #gather the parameters
    SUPPRESSTAKEOVER="$1"
    shift
    CMDPROG=$1
    shift
    CMDARGS="$*"

    CMD="${CMDPROG} ${IS_RETRY} ${CMDARGS}"
    # execute the command
    ${CMD} > /dev/null

    RC=$?
    if [ ${RC} -eq 0 ]; then
	# it was success, so return so that the main loop
	# can run the commands.
        return 0
    fi

    if [ ${RC} -ne ${FAILEDTAKEOVER} ]; then
	# command has exited with not takeover.
        log_info "${pre}.5650" \
	"nfs_mon ${LOGHOST} failed with exit status that is not for takeover: ${RC}"
	# set the status to Unknown
        csetstatus_unknown
        return ${RC}
    fi

    START=`fdl_timesecs`
    # code below is for takeover's only
    # Local Probing, we do not do anything else, we allow it to be restarted.
    # If it still fails then we can do a giveaway in  ...

    if [ ${ISREMOTE} -eq 0 ]; then
	# lockfs problem should have been resolved in nfs_mon itself.
        have_expected_problem
	RC=$?
	if [ ${RC} -eq 0 ]; then
            # The fact that we return 0 for expected problems means that
            # the hastat command will show no problems.
            return 0
        fi
        setstatus NotOk
        EXITCODE=1
        # Since he's supposed to be local, don't bother to retry.
        log_error "${pre}.4651" \
		"Logical Host ${LOGHOST} is having some problems"
        return 1
    fi

    log_info "${pre}.4702" \
	"command exited with status that suggests takeover: ${CMD}"

    # It would be tempting to call the remote host here and ask him
    # to restart his services.  Alas, we cannot do that because he
    # may be in the middle of cluster transition, such that he doesn't
    # want to be running his services.  On the remote end, we cannot
    # get enough locking to ensure that if we attempted to restart
    # his services, that he is not in the middle of cluster transition.
    # The following while loop runs until HA_FM_NFS_LOCALRESTARTGRACESECS
    # have elapsed, as measured from where START was latched above.
    while : ; do
	have_expected_problem
	RC=$?
	if ${RC} -ne 0 ]; then
            # The fact that we return 0 for expected problems means that
            # the hastat command will show no problems.
            return ${RC}
        fi

	# diagonize the communication.
	diagcomm 0
	get_all_valid_logical_ipnames
	suppress_rpcbind_return
	
        clusterstate=`/opt/SUNWcluster/bin/clustm getstate \
			${clustname} 2>/dev/null`
	
	if [ $? != 0 ]; then
		exit 1;
	fi

    	IS_RETRY=1
	CMD="${CMDPROG} ${IS_RETRY} ${CMDARGS}"
	${CMD} >/dev/null
        RC=$?

	case ${RC} in
	
		0) 	return 0;;

		${FAILEDTAKEOVER})
            		log_info "${pre}.5671" \
				"command failed again with status that suggests takeover: ${CMD}"
			;;

		*)
            		log_info "${pre}.5670" "command failed again with status not for takeover: ${CMD}"
			csetstatus_unknown
			return ${RC};;
	esac

	# We have got a takeover situation , but we keep retrying until 
	# the grace time expires. The HA_MIRROR_SECS says how much extra time 
	# a data service fault monitor must allow, over and above its other time-outs,
	# for mirroring to mask a disk fault.  The motivation is that Solaris disk 
	# drivers take some time to notice that a disk is bad. Mirroring will mask 
	# a single disk fault, but because of the time consumed by the disk driver, 
	# it takes some elapsed time to mask the fault. If a data service is too 
	# eager to time-out, then there won't be enough time for the mirroring 
	# recovery code to come into play. Thus, a data service fault monitor must 
	# *add* HA_FM_MIRROR_SECS onto whatever time-out would otherwise be using.
	#
        NOW=`fdl_timesecs`
        ELAPSED=`/bin/expr ${NOW} - ${START}`
        LOWBOUND=${HA_FM_NFS_LOCALRESTARTGRACESECS}
        if [ -n "${HA_FM_MIRROR_SECS}" ] ; then
            if [ ${HA_FM_MIRROR_SECS} -gt ${LOWBOUND} ]; then
                LOWBOUND=${HA_FM_MIRROR_SECS}
            fi
        fi
        if [ ${ELAPSED} -gt ${LOWBOUND} ]; then
            log_info "${pre}.1500" \
		"Just exceeded HA_FM_NFS_LOCALRESTARTGRACESECS (${HA_FM_NFS_LOCALRESTARTGRACESECS}) and HA_FM_MIRROR_SECS (${HA_FM_MIRROR_SECS})"
            break;
        fi

    done

    # is very small and can be backed up quickly.
    if [ ${SUPPRESSTAKEOVER} -eq 1 ]; then
        setstatus NotOk
        EXITCODE=1
        return 1
    fi

    log_info "${pre}.1571" \
	"Since command still failed, calling hactl takeover"

    setstatus NotOk
    EXITCODE=1

    # HACTL needs to be issued with pmfadm when nfs_fm_stop is called
    # as part of switchover, we kill all the processes associated with
    # loghost. 
    ${CLUSTERBIN}/pmfadm -c "hactl_nfs_${LOGHOST}" \
    	${CLUSTERBIN}/hactl -t -s nfs -l ${LOGHOST} -L soft

    return ${TAKEOVER}
}

###################################################################
# Function: get_all_logical_ipnames                               #
#   - Gets all the logical ipnames associated with Logical Host   #
###################################################################
function get_all_logical_ipnames
{
	typeset ipids i ipname loghostccd

        loghostccd=`${CLUSTERBIN}/scccd -f ${ccdfile} ${clustname} \
			LOGHOST query lname ${LOGHOST}`

	# Get iplist
	ipids=`echo ${loghostccd} | /usr/bin/awk -F: '{ print $5}' | \
                         /usr/bin/tr ',' ' '`

	IPNAMES=""
	for i in  ${ipids}
	do
		ipname=`${CLUSTERBIN}/scccd -f ${ccdfile} ${clustname} \
			LOGIP query logif  ${i} | \
					/usr/bin/awk -F: '{ print $4}'`
		IPNAMES="${IPNAMES} ${ipname}"
	done
}

########################################################################
# Function: check_if_logical_ip_ok   ${ipname}                         #
#                                                                      #
#       returns 0 - if logical ipname is Ok.                           #
#       returns 1 - if logical ipname is not working                   #
########################################################################
function check_if_logical_ip_ok
{
  typeset host attempts RC
  host=$1

  # If ping works then it is IPNAME is ok.
  if /usr/sbin/ping -n ${host} 10 >/dev/null 2>&1 ; then
            return 0
  fi

  # Ping can fail, if it is not able map the name-ipaddr.
  # hence use net_pinghost.

  IPADDR=`${CLUSTERBIN}/lookuphost ${host}`
  if [ $? -ne 0 ]; then
	log_info "${pre}.5200" \
			"lookup of hostname ${host} failed"
	# assume that logical ip is not working;
	return 1 
  fi

  # The -n switch is to avoid name service lookups for gateway info printout.
  # Using Attempts less than 20 gives spurious timeouts.
  sleep 5
  attempts=20
  /usr/sbin/ping -n ${IPADDR} ${attempts} >/dev/null 2>&1
  RC=$?
  if [ ${RC} -ne 0 ]; then
	return 1
  fi

  # everything is fine , so return 0
  return 0
}

######################################################################
# Function : get_all_valid_logical_ipnames                           #
#     1. Gets all the Valid and working logical IP names for         #
# 	 a given logical host.                                       #
#         FM_VALID_IPNAMES=                                             #
######################################################################
function get_all_valid_logical_ipnames
{
   typeset i rval

   FM_VALID_IPNAMES=""
   for i in ${IPNAMES}
   do
	check_if_logical_ip_ok  ${i}
	rval=$?
	if [ ${rval} -eq 0 ]; then
		FM_VALID_IPNAMES="${FM_VALID_IPNAMES} ${i}"
	fi
   done
}

############################################################################
# End of Routines realted to nfs_probe_one_common                          #
############################################################################


############################################################################
# Main Program of nfs_probe_loghost                                        #
############################################################################
INCLUDE=.
exec 2>/dev/null
PROG=$(/bin/basename $0)
pre="SUNWcluster.nfs_probe_loghost"
LOGHOST=$1
ISREMOTE=$2
ONCEONLY=$3
TIMEOUT=$4

CLUSTERBIN=/opt/SUNWcluster/bin/
FMBIN=/opt/SUNWcluster/ha/nfs/
PATH=${CLUSTERBIN}:${FMBIN}:/usr/bin/:/bin/:/sbin/:/usr/sbin/:

${INCLUDE} ${FMBIN}/nfs_common_util 

ECH_TRAPSIGNALS="1 2 3 15"
trap "cleanup ; trap 0 ; exit 1" $ECH_TRAPSIGNALS
initnfsenv

FAILEDTAKEOVER=99
FAILEDBENIGN=98
FAILEDUSEMP=97
EXITCODE=0
DIDSETSTATUS=0
UNSURE=1
MYNETOKAY=97
REMOTEOKAY=98
TAKEOVER=99
HACTLISSUED=100
ONCESW=""

#
# XXX: Following things needs be evaluated from CCD.
# enmatch cdbfile <nfsfmdir>
#
if [ ! -d ${tmpdir} ]; then
    mkdir -p ${tmpdir} >/dev/null 2>&1
    if [ ! -d ${tmpdir} ]; then
        cleanup
        exit 1
    fi
fi

TMP=${tmpdir}/${PROG}.$$
TMPERR=${tmpdir}/${PROG}.$$ 

#Initialize all the  environment needed.  
HA_NFS_DIR=${hanfsdir}; export HA_NFS_DIR
HA_NFS_MOUNT_DIR=${hanfsdir}/.nfs_probe_mountpoints; export HA_NFS_MOUNT_DIR
MPDIR=${hanfsdir}/.nfs_probe_mountpoints

if [ $ONCEONLY -eq 1 ]; then
    MPDIR=${MPDIR}.checkme
    ONCESW="-1"
fi

STATUSDIR=${FMSTATUSDIR}/${LOGHOST}
STATUSFILE=${STATUSDIR}/status

# create mount point directory.
if [ ! -d ${MPDIR} ]; then
    mkdir -p ${MPDIR} > /dev/null 2>&1
    if [ ! -d ${MPDIR} ]; then
        log_info "${pre}.4602" \
		"mkdir -p ${MPDIR} failed"
        cleanup
        exit 1
    fi
fi

# Create status Directory.

if [ ! -d ${STATUSDIR} ]; then
    mkdir -p ${STATUSDIR} > /dev/null 2>&1
    if [ ! -d ${STATUSDIR} ]; then
        log_info "$pre.4600" \
                "mkdir -p ${STATUSDIR}  failed"
        cleanup
        exit 1
    fi
fi

#
# change permissions to 755
#
chmod 755 ${STATUSDIR}

# start fresh STATUUSFILE.
/bin/rm -rf ${STATUSFILE}

if [ -z "${HA_FM_NFS_LOCKFILE}" ]; then
    HA_FM_NFS_LOCKFILE=1
fi

setstatus Ok

get_all_logical_ipnames

#
# Store the list of all valid logical ipnames, and then
# we can pass this to nfs_mon.
# Getting the initial list is important since we have a
# a logical ip bad on both the local and remote node and
# we might need to to take care of it by ignoring the
# logical IP.
#
FM_VALID_IPNAMES=""
export FM_VALID_IPNAMES

get_all_valid_logical_ipnames
INIT_FM_VALID_IPNAMES=${FM_VALID_IPNAMES}

nfs_probe_one_common

# All is Done with fault monitoring, hence exit with ZERO.
# Main CleanUP routine.
cleanup
exit $EXITCODE

