#!/bin/ksh

#pragma ident "@(#)pdbnfs.sh   1.39     01/08/06 SMI"
#
# 	Copyright (C) 1996-2001 Sun Microsystems, Inc.
#

#
#	SC/NFS takeover and switchover program
#
#	Usage : scnfs <cluster name> [ mount | umount <logical-host-names> \
#                                   | startd <currnodes> <localnodeid> | killd ]
#
function log_trace
{
  if [ -n "${TRACE_SC}" ]; then
    echo "# + ${cmd}: $*" >&3;
  fi
}

function log_trace_end
{
  if [ -n "${TRACE_SC}" ]; then
    echo "# - ${cmd}: $*" >&3;
  fi
}

function usage
{
  print "Invalid commnd: ${cmd}"
  print	" Usage : ${prog} <cluster name> [ mount | umount <logical-host-names> \\ "
  print "                                        | startd <currnodes> <localnodeid> | killd ]"
  exit 2
}

function init
{
	log_trace init

	myetc=/etc/opt/SUNWcluster
	myvar=/var/opt/SUNWcluster

	cdbfile=${myetc}/conf/${clustname}.cdb
	tmpdir=$(cdbmatch env.tmpdir ${cdbfile})
	myhanfs=$(cdbmatch cluster.hanfsdir ${cdbfile})
	haadmindir=$(cdbmatch cluster.haadmindir ${cdbfile})
	if [[ "${haadmindir}" = "" ]]; then
	  haadmindir="/"
	fi
	if [ "${myhanfs}" = "" ]; then
		myhanfs=${myetc}/conf/hanfs
	fi

	# Following file is used in mount_cmd_per_loghost(), mount_cmd()
	# unmount_cmd_per_loghost() 
	sc_mounted_fs=${tmpdir}/mounted
	/bin/rm -f ${sc_mounted_fs}
	/usr/sbin/mount > ${sc_mounted_fs}

	log_trace_end init
}

getpid()  {
        pid=`/usr/bin/ps -e | /usr/bin/grep -w $1 | \
			/usr/bin/sed -e 's/^  *//' -e 's/ .*//'`
}

# Search the /etc/init.d/nfs.server command file for the line that starts
# up the nfsd, in case the system administrator has customized that line,
# for example, by raising the number of daemon threads or by using the other
# nfsd switches, such as what network protocols to use. If there are multiple
# nfsd lines in the nfs.server command file,use the first.Thus, if the system
# administrator has increased the number of servers in /etc/init.d/nfs.server,
# we'll use that value.
# According to SMCC NFS Server Performance and Tuning Guide:
#  "The default setting, 16, in Solaris 2.4 software environments
#  results in poor NFS response times.  Scale the setting with
#  the number of processors and networks.  Increase the number of
#  NFS server threads by editing the invocation of nfsd in
#  /etc/init.d/nfs.server."
# That is, the manual recommends to the administrator that the way to tune the
# nfsd is to edit the /etc/init.d/nfs.server script, thus, the reconfiguration
# tries to honor any such edits.
restart_nfsd()  {

	typeset nfsdcmd

	nfsdcmd="`/bin/egrep '^[^#]*/usr/lib/nfs/nfsd' /etc/init.d/nfs.server \
		2>/dev/null | head -1`"
	if [ -z "${nfsdcmd}" ]; then
		nfsdcmd="/usr/lib/nfs/nfsd -a 16"  # this is default
	fi
	/bin/priocntl -c ${DFLT_CLASS} -e ${nfsdcmd}
	if [ $? -ne 0 ]; then
		log_info "${pre}.2703" "Failed to start ${nfsdcmd}"
	fi
}

kill_daemons() {

  log_trace kill_daemons

  typeset pid

  getpid lockd
  if [ -n "${pid}" ]; then
    
    # Gracefully shutdown lockd instead of kill -9. This would 
    # clean up few data structures in kernel starting from Solaris 2.
    
    lm_shutdown -n
    if [ $? -ne 0 ]; then
      log_info "${pre}.2702" "Could not kill lockd gracefully"
      kill -9 ${pid}
    fi
  fi
  
  getpid statd
  if [ -n "${pid}" ]; then
    kill -9 ${pid}
  fi

  /bin/rm -f ${myvar}/statd.args
  
  getpid mountd
  if [ -n "${pid}" ]; then
    kill -9 ${pid}
  fi
  
  getpid nfsd
  if [ -n "${pid}" ]; then
    kill -9 ${pid}
  fi
  log_trace_end kill_daemons
}

# The function does
#    - exports file systems which are configured for ha-nfs
#    - prepares arguments required to start statd
#    - Starts mountd, nfsd, lockd and statd
# 
start_daemons()   {

  log_trace start_daemons
  typeset retval 

  # Prepare a file which will have arguments for statd.
  # This file is used while restarting statd daemon.
  prep_for_statd

  retval=0
  if [[ -f ${myvar}/statd.args ]]; then
    /bin/priocntl -c ${DFLT_CLASS} -e \
     /usr/lib/nfs/statd $(/bin/cat ${myvar}/statd.args) || retval=$?
    
    if [ "${retval}" -ne 0 ]; then
      log_info "${pre}.3010" "Could not start statd"
      # If statd is could not be started, then lockd should not be started too
      return
    fi
  fi
  
  # Lockd needs a grace period of 90 sec. The defualt grace period is 45.
  /bin/priocntl -c ${DFLT_CLASS} -e /usr/lib/nfs/lockd -g 90
  if [ $? -ne 0 ]; then
    log_info "${pre}.3020" "Could not start lockd"
    # Without lockd no point in starting mount/nfsd
    return
  fi

  # Need to restart mountd and nfsd. Here's why we killed them before.
  # The client-server connection may be a tcp connection. We are about
  # to failover a logical host and its ip addresses to another server.
  # If we later need to move the logical host and logical ip addresses back,
  # due to switchover/failover, the tcp connection state that is left over
  # from before the switchover can have the tcp sequence numbers way out of
  # alignment, because the client has continued to advance the sequence
  # numbers while using the logical ip address on a different physical host.
  # When the logical ip address is moved back to this physical host, the
  # client's sequence numbers are way ahead of what this physical host thinks
  # they should be.
  # To work around this, we do the following: we kill and restart all the
  # daemons, which has the side effect of closing and cleaning up all the
  # tcp connections.  Note that it is only necessary to kill and restart
  # the daemons for the switchover case. In the case where the failover
  # there is no leftover tcp connection state pertaining to the logical
  # ip addresses to confuse us.


  # Need to truncate /etc/rmtab incase it is too large. See BugID# 4309231.
  if [ -f /etc/rmtab ]; then
    > /etc/rmtab
  fi

  /bin/priocntl -c ${DFLT_CLASS} -e /usr/lib/nfs/mountd
  if [ $? -ne 0 ]; then
    log_info "${pre}.2704" "Failed to start /usr/lib/nfs/mountd"
  fi

  # /etc/init.d/nfs.server cleans up the sharetab and so can 
  # not be used from cmmstep7_cmd(). So start nfsd seperately.
  restart_nfsd

  log_trace_end start_daemons

}  # end of start_daemons()

# Mounts all file systems in the file $1
function mount_all_fs
{
	log_trace mount_all_fs

	typeset fsfile dev_to_mount mount_pt fstype mntoptions
	typeset rc
	
	fsfile=$1

	if [[ -z ${fsfile} ]]; then
		break
	else
		while read dev_to_mount mount_pt fstype mntoptions
		do
			# check if fsck failed on this device
			echo ${fs_good_list} | /bin/grep -w `basename ${dev_to_mount}` > /dev/null
			if  [ $? -ne 0 ]; then
			  continue
			fi

			# ignore if mount point does not exist
			if [ ! -d "${mount_pt}" ]; then
				/bin/mkdir -p ${mount_pt}
			fi
	
			log_info "${pre}.3041" "mount ${mount_pt}"
			mount_res=`/usr/sbin/mount -F ${fstype} -o "${mntoptions}" ${dev_to_mount} \
				${mount_pt} > /dev/null 2>&1`
			rc=$?
			if [ ${rc} -ne 0 ]; then
			   # check if the filesystem are already mounted.
			   /usr/sbin/mount | /usr/bin/grep -w "^${mount_pt}"
			   if [ $? -ne 0 ]; then
		 	       log_info "${pre}.3040" \
			  	   "mount -F ${fstype} -o \"${mntoptions}\" ${dev_to_mount} ${mount_pt} failed. "
				log_info "${pre}.3040" "The following was reported by mount : $mount_res"
				log_info "${pre}.3040" "Check /var/opt/SUNWcluster/ccd/ccd.log for more info"

                                {
                                mount_pids=`/usr/sbin/fuser ${mount_pt} 2>/dev/null | xargs -L 10`
                                if [ "${mount_pids}" != "" ]; then
					print "==== `/bin/date '+%b %d %H:%M:%S'` ===" 
					print "==== Following processes being opened on ${mount_pt} ==="
					ps -fp "${mount_pids}"
					if [ -x /usr/proc/bin/ptree ]; then
                                               for mount_pid in ${mount_pids}
                                               do
                                                       /usr/proc/bin/ptree $mount_pid
                                               done
                                       fi
                                fi
                                } >> ${myvar}/ccd/ccd.log 2>&1
				exit 1
			   fi
			   # else everything is ok.
			fi
		done < ${fsfile}
	fi
	log_info "${pre}.3041" "mounting finished"
	log_trace_end mount_all_fs

}


# Function first does non-interactive fsck on a list of file systems
# If this fails, then it does fsck individually
function do_fsck
{
	log_trace do_fsck

	typeset dev_special

	# this list is used by mount_all_fs() to check if fsck was success or not
	fs_good_list=""

	# fsck all ufs file systems non-interactively
	if [ -z "${ufslist}" ]; then
		break
	else
		# fsck might exit with one of two non zero codes, which are not errors
		# 33 means file system already mounted; 40 means for root id
		log_info "${pre}.3045" "fsck ${ufslist}"
		/usr/sbin/fsck -F ufs -o p ${ufslist} > /dev/null 
		case $? in
			0 | 33 | 40 )   # everything went OK
				fs_good_list="${ufslist}"
				;;

			# now do fsck on individual devices
			* ) 
			for dev_special in ${ufslist}
			do
				log_info "${pre}.3050" "fsck of ${dev_special}"
				/usr/sbin/fsck -F ufs -y ${dev_special} > /dev/null 
				if [ $? -ne 0 ]; then
					log_info "${pre}.3050" "fsck -F ufs -y ${dev_special} failed"
					exit 1
				else
					fs_good_list="${fs_good_list} ${dev_special}"
				fi
			done
			;;
		esac
	log_info "${pre}.3061" "fsck (ufs) complete"
	fi

	# fsck all vxfs file systems
	if [ -z "${vxfslist}" ]; then
		break
	else
		for dev_special in ${vxfslist}
		do
			log_info "${pre}.3050" "fsck of ${dev_special}"
			# fsck_vxfs might exit with non zero code, which is not error
			# 33 means file system is already mounted
			/usr/sbin/fsck -F vxfs -y ${dev_special} > /dev/null
			case $? in
				0 | 33 )    # everything went OK
					fs_good_list="${fs_good_list} ${dev_special}"
				;;

			* ) 
				log_info "${pre}.3060" "fsck -F vxfs -y ${dev_special} failed"
				exit 1
				;;
			esac
		done
	log_info "${pre}.3061" "fsck (vxfs) complete"	
	fi
	log_trace_end do_fsck
}

# 
# For a given logical host , this function mounts all ufs file systems 
# in that logical which are listed in <vfstab.loghost> file
# Usage: mount_cmd_per_loghost <loghost>
#
function mount_cmd_per_loghost
{
	log_trace mount_cmd_per_loghost

	typeset loghost dev_to_mount dev_to_fsck mount_pt fstype 
	typeset fspass automnt mntoptions fs_to_mount

	loghost=$1
	fs_to_mount=${tmpdir}/fs_to_mount.${loghost}
	/bin/rm -f ${fs_to_mount}

	vfstabfile=${myhanfs}/vfstab.${lname}

	# These variables are used by do_fsck()
	ufslist=""
	vxfslist=""

	# Sort the filesystem table by the mount point and do the mounts 
	# in that order. This ensures that directories higher in the 
	# hierarchy get mounted first, before directories which are 
	# lower in the hierarchy and which have their mount points living 
	# in the higher directories
	# Read each line from the sorted file system table and gather
	# the list of filesystems for which we have to run fsck in parallel.
	/usr/bin/sort -b +2 ${vfstabfile} | \
	while read dev_to_mount dev_to_fsck mount_pt fstype fspass automnt mntoptions
	do
		case ${dev_to_mount} in
			'#'* | ' '* | '-' | "" ) continue ;;
			* ) ;;
		esac

		# if file system is already mounted, continue
		/bin/grep "[ 	]${dev_to_mount}[ 	]" ${sc_mounted_fs} > /dev/null 2>&1
		if [ $? -eq 0 ]; then
			continue
		fi

		if [ "${mntoptions}" = "-" ]; then
			mntoptions=""
		fi

		if [ "${fstype}" = "ufs" ]; then
			ufslist="${ufslist} ${dev_to_fsck}"
		elif [ "${fstype}" = "vxfs" ]; then
			vxfslist="${vxfslist} ${dev_to_fsck}"
		else
			# we support only ufs and vxfs file systems
			log_info "${pre}.3070" "Invalid file system type $fstype for ${dev_to_mount}"
			continue
		fi

		# save the line in a temp file
		# this file is used by mount_all_fs() to mount all file systems
		print "${dev_to_mount} ${mount_pt} ${fstype} ${mntoptions}" >> ${fs_to_mount}

	done 

	# fsck all file systems 
	do_fsck

	# mount all file systems
	mount_all_fs ${fs_to_mount}
	/bin/rm -f ${fs_to_mount}

	log_trace_end mount_cmd_per_loghost

}

function share_cmd 
{
	log_trace share_cmd
	typeset lnamelist lname dfstabfile
   
	lnamelist=$*
	for lname in ${lnamelist}
	do
		dfstabfile="${myhanfs}/dfstab.${lname}"
		if [[ ! -r "${dfstabfile}" || ! -s "${dfstabfile}" ]]; then
			continue
		fi
		share_cmd_per_loghost ${lname}
	done

	log_trace_end share_cmd
}


function share_cmd_per_loghost
{
	log_trace share_cmd_per_loghost
	
	typeset lname dfstabfile line vfstabfile

	lname=$1
	dfstabfile="${myhanfs}/dfstab.${lname}"
	vfstabfile="${myhanfs}/vfstab.${lname}"

	# share_to_mountp is used to build a temporary dsftab file
	# For each directory path in the dfstab file, share_to_mountp
	# checks if a prefix of the path is present (as a file system)
	# in vfstab file. If so, share_to_mountp command checks if the 
	# file system is mounted. For more explanation of what
	# the command does, see comments in share_to_mountp.c
	/opt/SUNWcluster/bin/share_to_mountp ${vfstabfile} ${dfstabfile} \
		> ${myvar}/dfstab.new  2> /dev/null
	if [ $? -ne 0 ]; then
		log_info "${pre}.3080" "share_to_mountp failed for ${lname} logical host"
	fi

	# fix the /etc/dfs/sharetab if it is corrupted 
	# It is not possible to make this script as a function 
	# in this file because it is called recursively 
	/opt/SUNWcluster/bin/nfs_fix_sharetab

	# Little optimisation by doing exec which informs shell not to 
	# create a new processs to execute share command, instead use 
	# the current process
	exec < ${myvar}/dfstab.new
	while read line	
	do
		eval ${line} < /dev/null > /dev/null 2>&1
		if [ $? -ne 0 ]; then
			log_info "${pre}.3090" "${line} command failed"
		fi

	done
	
	rm -f ${myvar}/dfstab.new

	log_trace_end share_cmd_per_loghost

}  # end of share_cmd_per_loghost()

# Function does three things in preperation to start statd
#   - prepares -a option arguments; list of logical hosts
#   - prepares -p option arguments; list of directories
#   - removes certain NFS client files from statmon directories
# For details see comments inside this function
function prep_for_statd
{
	log_trace prep_for_statd

	typeset aopts popts localhost dglist
	typeset dg r statd_dir union_log_ips mount_list currhosts remove_lock_hosts
	typeset m n loghost_row loghost_nodelist loghost_dglist loghost_iflist
	typeset i iprow ipaddr
	typeset found

	localhost=`cdbmatch cluster.node.${localnodeid}.hostname ${cdbfile}`

	currhosts=""
	for r in ${currnodes}
	do
		currhosts="${currhosts} $(cdbmatch cluster.node.${r}.hostname ${cdbfile})"
	done

	# variable which will have -a options args for statd
	# This option would list all logical hosts which are being served
	# by this statd daemon. This list includes 
	#   - local host
	#   - logical addresses for all logical hosts mastered by this node
	# Note that statd will do local host all by itself. So no need
	# to pass localhost as an argument.
	aopts=""

	# variable which will have -p options args for statd
	# This option will list all directories where statd has to maintain
	# the lock files, when ever an NFS client aquires file lock.
	# statd synchronises all directories
	popts="" 

	# The nfs fault probes will cause each sc server to look like a
	# locking client of the other. If a node is down, we don't want 
	# statd to suffer a long timeout trying to contact it. We figure out 
	# whether a node is down or not by doing ping. If there is no
	# reply from the node, then either the node is down or this node's
	# public network interface adapter has goofed up. Right now, assume 
	# that some external agent will monitor the network adapter. So we 
	# are doing nothing for that case here. If the node is down we will 
	# remove its locks from statmon directory.
	# The following list is used to remove lock files from statmon directory 
	# The list will contain following host names
	#   - Local host name
	#   - logical IPs for all logical hosts mastered by this node
	remove_lock_hosts="${localhost}"

	# Make a list of logical IPs on the local host. This is actually not 
	# a list of IP addresses. This is a list of logical host names. The 
	# name "logical_ips" is used to avoid confusion with the concept of 
	# HA frameworks's  logical host which is an entity by itself consisiting 
	# multiple resources. There could be multiple logical IPs on a node. 
	union_log_ips=""
	dglist=""
	
	for m in ${m_loghosts}; do
	  loghost_row=$(scccd -f ${ccdfile} ${clustname} LOGHOST query lname ${m})
	  
	  loghost_dglist=${loghost_row%:*:*}
	  loghost_dglist=${loghost_dglist#*:*:*:}
	  dglist="${dglist} ${loghost_dglist}"

	  loghost_nodelist=${loghost_row%:*:*:*}
	  loghost_nodelist=${loghost_nodelist#*:*:}
	  loghost_nodelist=$(print ${loghost_nodelist} | tr ',' ' ')

	  loghost_iflist=${loghost_row%:*}
	  loghost_iflist=${loghost_iflist#*:*:*:*:}
	  loghost_iflist=$(print ${loghost_iflist} | tr ',' ' ')

	  for i in ${loghost_iflist}; do
	    iprow=$(scccd -f ${ccdfile} ${clustname} LOGIP query logif ${i})

	    ipaddr=${iprow%:*}
	    ipaddr=${ipaddr#*:*:*:}

	    union_log_ips="${union_log_ips} ${ipaddr}"
	    remove_lock_hosts="${remove_lock_hosts} ${ipaddr}"
	  done

	  for n in ${loghost_nodelist}; do
	    # avoid duplicate entries in remove_lock_hosts list

	    let found=0
	    for r in ${remove_lock_hosts}; do
	      if [[ "${n}" = "${r}" ]]; then
		let found=1
		break
	      fi
	    done
	    if (( found == 0 )); then
			remove_lock_hosts="${remove_lock_hosts} ${n}"
	    fi
	  done
	done
	
	# Now, prepare the -a option arguments
	for r in ${union_log_ips}; do
		aopts="${aopts} -a ${r}"
	done   

        #
        # Since we now have only one Adminstartive File System
        # where the NFS mount will go.
        # prep_for_stat should use "haget -f "
        # for now use "$haadmindir loghost"
        mountlist=$(/usr/sbin/mount | cut -d" " -f1)
        for m in ${m_loghosts}; do
                # Construct popts list
                # Check if HA administrative File System  for this
                # this logical host got mounted..
                # If it is mounted then inform statd else return error code

                # The following is done to take care of // etc..
                # in the path.
		mountdir="${haadmindir}/${m}"
                statd_dir="${haadmindir}/${m}/.statmon"
                cur_dir=`pwd`
                if [ -d ${mountdir} ]; then
                        cd $mountdir
                        mountdir=`pwd`
                fi
                cd ${cur_dir}
                echo ${mountlist} | /bin/fgrep "${mountdir}" > /dev/null 2>&1
                if [ $? -eq 0 ]; then
                	if [ ! -d ${statd_dir} ]; then
						mkdir -p ${statd_dir}
					fi
                	if [ ! -d ${statd_dir}/statmon/sm ]; then
						mkdir -p ${statd_dir}/statmon/sm
					fi
                	if [ ! -d ${statd_dir}/statmon/sm.bak ]; then
						mkdir -p ${statd_dir}/statmon/sm.bak
					fi
                        # List of directories to be used while restarting statd
                        # Every directory entry has to be prefixed with "-p"
                        popts="${popts} -p ${statd_dir}"
                else
                        log_info "${pre}.3100" "HA administrative file system ${mountdir} is not mounted for logical host ${m}"
                fi

                for r in ${remove_lock_hosts}
                do
                # checking existance of file through shell built-in
                # is faster than executing rm
                if [[ -f ${statd_dir}/statmon/sm/${r} || \
                        -f ${statd_dir}/statmon/sm.bak/${r} ]]; then
                        /bin/rm -f ${statd_dir}/statmon/sm/${r} \
                                ${statd_dir}/statmon/sm.bak/${r}
                fi
                done  # remove_lock_hosts
        done  # m_loghosts

	# Now remove lock files in system's statmon directory 
	for r in ${remove_lock_hosts}; do
	  if [[ -f /var/statmon/sm/${r} || -f /var/statmon/sm.bak/${r} ]]; then
	    /bin/rm -f /var/statmon/sm/${r} /var/statmon/sm.bak/${r}
	  fi
	done 

	# statd.args will be used by reconf_ener while starting statd
	/bin/rm -f ${myvar}/statd.args
	echo "${aopts} ${popts}" > ${myvar}/statd.args

	log_trace_end prep_for_statd

}    # end of prep_for_statd()


function mount_cmd
{
	log_trace mount_cmd

	typeset lname lnamelist

	lnamelist=$*
	for lname in ${lnamelist}
	do
		# If there are HA file systems in this logical host
		# the vfstab.<loghost>
		vfstabfile=${myhanfs}/vfstab.${lname}
		if [ ! -f "$vfstabfile" ]; then
        		continue
		fi

		mount_cmd_per_loghost ${lname}
	done

	log_trace_end mount_cmd

}    # end of mount_cmd()

function unshare_cmd
{
	log_trace unshare_cmd
	typeset lnamelist lname dfstabfile
   
	lnamelist=$*
	for lname in ${lnamelist}
	do
		dfstabfile="${myhanfs}/dfstab.${lname}"
		if [[ ! -r "${dfstabfile}" || ! -s "${dfstabfile}" ]]; then
			continue
		fi
		unshare_cmd_per_loghost ${dfstabfile}
	done

	log_trace_end unshare_cmd
}

# Unshares all file systems listed dfstab for the logical host.
# The return nodes are neglected since unshare doesnt return 
# unique error code.
function unshare_cmd_per_loghost
{
	log_trace unshare_cmd_per_loghost
 
	typeset dfstabfile

	dfstabfile=$1

	# fix the /etc/dfs/sharetab if it is corrupted 
	# It is not possible to make this script as a function 
	# in this file because it is called recursively 
	nfs_fix_sharetab
	
	/bin/sed 's/^[ 	]*#.*//' ${dfstabfile} | /bin/nawk 'NF != 0 {print $NF}' | \
	while read line
	do
		/usr/sbin/unshare ${line}
	done

	log_trace_end unshare_cmd_per_loghost

}  # end of unshare_cmd_per_loghost()

# The function exits if can not a umount a file system 
# in this 
function unmount_cmd_per_loghost
{
	log_trace unmount_cmd_per_loghost

	typeset lname dev_special dev_to_fsck mount_pt fstype 
	typeset fspass automnt mntoptions
	typeset pid_list pid n retval
	typeset failed

	lname=$1
	vfstabfile=${myhanfs}/vfstab.${lname}
	dfstabfile=${myhanfs}/dfstab.${lname}

	let failed=0
	/bin/sort -r -b +2 ${vfstabfile} | \
	while read dev_special dev_to_fsck mount_pt fstype fspass automnt mntoptions
	do
		case ${dev_special} in
			'#'* | ' ' | '-' ) continue ;;
			* ) ;;
		esac

		# The mount list has to be checked within the loop to avoid the
		# scenario of locking the undesired file system.
		# If the argument passed to the lockfs command is not a mounted
		# file system, then it would lock the underlying file system
		# which is holding that directory. This might, sometimes, lock
		# the root file system, which is a catastrophic action.
		# This is a typical case when a diskgroup with the same name is 
		# configured on two different pair of nodes and mount point is
		# in root. Altough it is a valid configuration, the disaster
		# has to be avoided. It is done by checking the mount list just 
		# before doing lockfs.
		/usr/sbin/mount | /bin/grep "^${mount_pt} " | /bin/grep " ${dev_special} " > /dev/null 2>&1

		if [ $? -eq 0 ]; then
			#
			# In case there are empty lines in vfstab file
			#
			if [ -z "${mount_pt}" ]; then
				continue
			fi
			if [ "${fstype}" = "ufs" ]; then
				#
				# Hard locked ufs filesystems can get umounted
				# even when busy; sort of forced umount.
				#
				/usr/sbin/lockfs -h ${mount_pt}
				if [ $? -ne 0 ]; then
					log_info "${pre}.3110" "lockfs ${mount_pt} failed"
				fi
			elif [ "${fstype}" = "vxfs" ]; then
				#
				# VXFS does not support forced umount.
				# Thus we need to make the filesystem as
				# quiescent as good as we can.
				#
				# Gather all the processes that use the mount point and/or
				# a file under the mount point.
				#
				maxretry=5
				retrycnt=0
				while [ ${maxretry} -ge ${retrycnt} ]
					do
					pid_list=`/usr/sbin/fuser -ck ${mount_pt} 2>/dev/null`
					if [ -z "${pid_list}" ]; then
						break
					fi
					#
					# Loop over all processes with open references to
					# this filesystem.
					#
					for pid in ${pid_list}; do
						kill -s 0 ${pid} 2>/dev/null
						retval=$?
						#
						# zero indicates process still alive.
						#
						let n=0
						while (( ${retval} == 0 && n < 10)); do
							log_info "${pre}.4013" " process ${pid} still alive \(${n}\)"
							sleep 1
							kill -s 0 ${pid} 2>/dev/null
							retval=$?
							let n=n+1
						done
					done
					let retrycnt=retrycnt+1
				done
			fi
			#
			# At this place we assume that all processes
			# are gone. The retry logic around umount()
			# will give us some extra buffer for slowly
			# dying processes.
			#
			let umount_success=0
			let maxretry=4
			let retrycnt=0
			while [ ${maxretry} -ge ${retrycnt} ]
				do
				/usr/sbin/umount ${mount_pt}
				if [ $? -eq 0 ]; then
					umount_success=1
					break
				fi
				log_info "${pre}.4011" "umounting ${mount_pt} failed; retrying"
				let retrycnt=retrycnt+1
				sleep 1
			done
			#
			# Check for final return code and log for diags.
			#
			if [ $umount_success -eq 0 ]; then
				let failed=1
				log_info "${pre}.4010" "umount of ${mount_pt} failed"
			else
				log_info "${pre}.4012" "umount of ${mount_pt} succeeded"
			fi
		fi
	done

	if (( failed == 1 )); then
		exit 1
	fi

	log_trace_end unmount_cmd_per_loghost

}    # end of unmount_cmd_per_loghost()

function unmount_cmd
{
	log_trace unmount_cmd

	typeset lnamelist lname 

	lnamelist=$*

	# sort the filesystem table according to the reverse order of mount
	# points and do the unmounts in that order for specified disk groups.
	for lname in ${lnamelist}
	do
		vfstabfile=${myhanfs}/vfstab.${lname}
		if [ ! -f "${vfstabfile}" ]; then
			continue
		fi
		
		# Unmount file systems in this diskgroup
		# Name of diskgroup need not be passed as an argument as the
		# function uses $filetoread
		unmount_cmd_per_loghost ${lname}
	done

	log_trace_end unmount_cmd
	
}    # end of unmount_cmd()


#
# main()
#

if [[ $# -lt 2 ]]; then
	usage
fi

# reconf_ener has done the set -e, which is inherited by this script.
# The following set will avoid reconf_ener from exiting when ever a 
# command in this script exits with non zero.
set +e 

prog=$0
clustname=$1
cmd=$2

PATH=/sbin:/usr/sbin:/bin:/opt/SUNWcluster/bin

# Following string is used to log the information
pre="SUNWcluster.scnfs"


CONF_FILE=/etc/opt/SUNWcluster/conf/dflt_sched_class
DFLT_CLASS=TS

if [ -f ${CONF_FILE} ] ; then
        DFLT_CLASS=`cat ${CONF_FILE}`
fi


init
case ${cmd} in
  mount )
	shift 2
	mount_cmd $* ;;
  unmount ) 
	shift 2
	unmount_cmd $* ;;
  share )
	shift 2
	share_cmd $* ;;
  unshare )
	shift 2
	unshare_cmd $* ;;
  startd )
	currnodes=$3
	localnodeid=$4
	ccdfile=$5
	shift 5
	m_loghosts=$*
	start_daemons ;;
  killd )
	kill_daemons ;;
  * ) usage ;;
esac

# Cleanup files created in init()
/bin/rm -f ${sc_mounted_fs}

exit 0

