#!/bin/sh
#
#	Resource Agent for managing the DLM controld process.
#
# Copyright (c) 2009 Novell, Inc
#                    All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
. ${OCF_FUNCTIONS}
: ${__OCF_ACTION=$1}

#######################################################################

if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then
    ocf_log info "Using heartbeat controld agent"
    $OCF_ROOT/resource.d/heartbeat/controld $1
    exit $?
fi

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="controld" version="0.9">
<version>1.0</version>

<longdesc lang="en">
This Resource Agent can control the dlm_controld services needed by cluster-aware file systems.
It assumes that dlm_controld is in your default PATH.
In most cases, it should be run as an anonymous clone.
</longdesc>
<shortdesc lang="en">DLM Agent for cluster file systems</shortdesc>

<parameters>

<parameter name="args" unique="1">
<longdesc lang="en">
Any additional options to start the dlm_controld service with
</longdesc>
<shortdesc lang="en">DLM Options</shortdesc>
<content type="string" default="-s 0" />
</parameter>

<parameter name="configdir" unique="1">
<longdesc lang="en">
The location where configfs is or should be mounted
</longdesc>
<shortdesc lang="en">Location of configfs</shortdesc>
<content type="string" default="/sys/kernel/config" />
</parameter>

<parameter name="daemon" unique="1">
<longdesc lang="en">
The daemon to start - supports gfs_controld(.pcmk) and dlm_controld(.pcmk)
</longdesc>
<shortdesc lang="en">The daemon to start</shortdesc>
<content type="string" default="dlm_controld.pcmk" />
</parameter>

<parameter name="allow_stonith_disabled">
<longdesc lang="en">
Allow DLM start-up even if STONITH/fencing is disabled in the cluster.

Setting this option to true will cause cluster malfunction and hangs on
fail-over for DLM clients that require fencing (such as GFS2, OCFS2, and
cLVM2).

This option is advanced use only.
</longdesc>
<shortdesc lang="en">Allow start-up even without STONITH/fencing</shortdesc>
<content type="string" default="false" />
</parameter>

</parameters>

<actions>
<action name="start"        timeout="90" />
<action name="stop"         timeout="100" />
<action name="monitor"      timeout="20" interval="10" depth="0" start-delay="0" />
<action name="meta-data"    timeout="5" />
<action name="validate-all"   timeout="30" />
</actions>
</resource-agent>
END
}

#######################################################################

DLM_SYSFS_DIR="/sys/kernel/dlm"

controld_usage() {
	cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}

check_uncontrolled_locks()
{
    local tmp
    tmp=$(ls $DLM_SYSFS_DIR 2>&1)
    if [ $? -eq 0 ]; then
        if [ -n "$tmp" ]; then

            ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing"
            stonith_admin --reboot=$(crm_node -n) --tag controld

            exit $OCF_ERR_GENERIC
        fi
    fi
}

controld_start() {
    controld_monitor; rc=$?

    case $rc in
      $OCF_SUCCESS)     return $OCF_SUCCESS;;
      $OCF_NOT_RUNNING) ;;
      *) return $OCF_ERR_GENERIC;;
    esac

        if [ ! -e $OCF_RESKEY_configdir ]; then
    	    modprobe configfs
    	    if [ ! -e $OCF_RESKEY_configdir ]; then
               ocf_log err "$OCF_RESKEY_configdir not available"
       	       return $OCF_ERR_INSTALLED
	    fi
	fi

    	mount | grep "type configfs" > /dev/null
    	if [ $? != 0 ]; then
	   mount -t configfs none $OCF_RESKEY_configdir    
    	fi

    	if [ ! -e $OCF_RESKEY_configdir/dlm ]; then
           modprobe dlm
    	   if [ ! -e $OCF_RESKEY_configdir/dlm ]; then
              ocf_log err "$OCF_RESKEY_configdir/dlm not available"
       	      return $OCF_ERR_INSTALLED
	   fi
    	fi

    if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \
    	! ocf_is_true "`crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true`"; then
        ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM"
        return $OCF_ERR_CONFIGURED
    fi

    ${OCF_RESKEY_daemon} $OCF_RESKEY_args

    while true
    do
        sleep 1

        controld_monitor; rc=$?
        case $rc in
          $OCF_SUCCESS)
            local addr_list
            addr_list="$(cat /sys/kernel/config/dlm/cluster/comms/*/addr_list 2>/dev/null)"
            if [ $? -eq 0 ] && [ -n "$addr_list" ]; then
                return $OCF_SUCCESS
            fi
            ;;
          $OCF_NOT_RUNNING) 
            return $OCF_NOT_RUNNING
            ;;
          *) 
            return $OCF_ERR_GENERIC
            ;;
        esac

        ocf_log debug "Waiting for ${OCF_RESKEY_daemon} to be ready"
    done
}

controld_stop() {
    controld_monitor; rc=$?

    if [ $rc = $OCF_NOT_RUNNING ]; then
	return $OCF_SUCCESS
    fi

    killall -TERM ${OCF_RESKEY_daemon}; rc=$?

    if [ $rc != 0 ]; then
	return $OCF_ERR_GENERIC
    fi

    rc=$OCF_SUCCESS
    while [ $rc = $OCF_SUCCESS ]; do
	controld_monitor; rc=$?
	sleep 1
    done

    if [ $rc = $OCF_NOT_RUNNING ]; then
    	rc=$OCF_SUCCESS
    fi

    return $rc
}

controld_monitor() {
    local rc
    killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; rc=$?

    case $rc in
      0) smw=$(dlm_tool status -v | grep "stateful_merge_wait=" | cut -d= -f2)
         if [ -n "$smw" ] && [ $smw -eq 1 ]; then
             ocf_log err "DLM status is: stateful_merge_wait"
             rc=$OCF_ERR_GENERIC
         elif [ -z "$smw" ] && dlm_tool ls | grep -q "wait fencing" && \
	      ! stonith_admin -H '*' -V | grep -q "wishes to"; then
             ocf_log err "DLM status is: wait fencing"
             rc=$OCF_ERR_GENERIC
         else
             rc=$OCF_SUCCESS
         fi
         ;;
      1) rc=$OCF_NOT_RUNNING;;
      *) rc=$OCF_ERR_GENERIC;;
    esac

    # if the dlm is not successfully running, but
    # dlm lockspace bits are left over, we self must fence.
    if [ $rc -ne $OCF_SUCCESS ]; then
        check_uncontrolled_locks
    fi

    return $rc
}

controld_validate() {
    check_binary killall
    check_binary ${OCF_RESKEY_daemon}

    case ${OCF_RESKEY_CRM_meta_globally_unique} in
	yes|Yes|true|True|1) 
	    ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute"
	    exit $OCF_ERR_CONFIGURED
	    ;;
    esac

    [ -d /var/run/cluster ] || mkdir /var/run/cluster

    return $OCF_SUCCESS
}

: ${OCF_RESKEY_sctp=false}
: ${OCF_RESKEY_configdir=/sys/kernel/config}
: ${OCF_RESKEY_CRM_meta_globally_unique:="false"}

case "$HA_quorum_type" in
    pcmk) daemon_ext=".pcmk";;
    *) daemon_ext="";;
esac

case "$OCF_RESOURCE_INSTANCE" in
    *[gG][fF][sS]*) 
	: ${OCF_RESKEY_args=-g 0}
	: ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}}
	;;
    *[dD][lL][mM]*)
	: ${OCF_RESKEY_args=-s 0}
	: ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
	;;
    *)
	: ${OCF_RESKEY_args=-s 0}
	: ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
esac

case $__OCF_ACTION in
meta-data)	meta_data
		exit $OCF_SUCCESS
		;;
start)		controld_validate; controld_start;;
stop)		controld_stop;;
monitor)	controld_validate; controld_monitor;;
validate-all)	controld_validate;;
usage|help)	controld_usage
		exit $OCF_SUCCESS
		;;
*)		controld_usage
		exit $OCF_ERR_UNIMPLEMENTED
		;;
esac
rc=$?

exit $rc

