From c4989f093913556355ee9f94318ca092c79de2d0 Mon Sep 17 00:00:00 2001 From: Jared Morrow Date: Wed, 16 Jan 2013 10:47:23 -0700 Subject: [PATCH] Change simplenode template files to work on more platforms When developing Riak, we have found bugs and other issues due to the number of platforms we support. Here is an overview of the changes: - Fix command-line syntax for commands to work on *BSD / Sun - Add chkconfig and getpid to nodetool - Replace platform specific 'kill' commands with a nodetool getpid method - Fix RUNNER_USER settings to work on *BSD --- priv/templates/simplenode.erl.script | 24 +++-- priv/templates/simplenode.nodetool | 58 ++++++++++-- priv/templates/simplenode.reltool.config | 2 + priv/templates/simplenode.runner | 115 +++++++++++++++++------ 4 files changed, 156 insertions(+), 43 deletions(-) mode change 100644 => 100755 priv/templates/simplenode.nodetool diff --git a/priv/templates/simplenode.erl.script b/priv/templates/simplenode.erl.script index 6f65e3f..7919d69 100644 --- a/priv/templates/simplenode.erl.script +++ b/priv/templates/simplenode.erl.script @@ -1,13 +1,23 @@ #!/bin/sh -## This script replaces the default "erl" in erts-VSN/bin. This is necessary -## as escript depends on erl and in turn, erl depends on having access to a -## bootscript (start.boot). Note that this script is ONLY invoked as a side-effect -## of running escript -- the embedded node bypasses erl and uses erlexec directly -## (as it should). +# /bin/sh on Solaris is not a POSIX compatible shell, but /usr/bin/ksh is. +if [ `uname -s` = 'SunOS' -a "${POSIX_SHELL}" != "true" ]; then + POSIX_SHELL="true" + export POSIX_SHELL + exec /usr/bin/ksh $0 "$@" +fi + +# clear it so if we invoke other scripts, they run as ksh as well +unset POSIX_SHELL + +## This script replaces the default "erl" in erts-VSN/bin. This is +## necessary as escript depends on erl and in turn, erl depends on +## having access to a bootscript (start.boot). Note that this script +## is ONLY invoked as a side-effect of running escript -- the embedded +## node bypasses erl and uses erlexec directly (as it should). ## -## Note that this script makes the assumption that there is a start_clean.boot -## file available in $ROOTDIR/release/VSN. +## Note that this script makes the assumption that there is a +## start_clean.boot file available in $ROOTDIR/release/VSN. # Determine the abspath of where this script is executing from. ERTS_BIN_DIR=$(cd ${0%/*} && pwd) diff --git a/priv/templates/simplenode.nodetool b/priv/templates/simplenode.nodetool old mode 100644 new mode 100755 index eb08fa4..54ee6d6 --- a/priv/templates/simplenode.nodetool +++ b/priv/templates/simplenode.nodetool @@ -1,3 +1,4 @@ +#!/usr/bin/env escript %% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*- %% ex: ft=erlang ts=4 sw=4 et %% ------------------------------------------------------------------- @@ -5,25 +6,53 @@ %% nodetool: Helper Script for interacting with live nodes %% %% ------------------------------------------------------------------- +-mode(compile). main(Args) -> ok = start_epmd(), %% Extract the args {RestArgs, TargetNode} = process_args(Args, [], undefined), + %% any commands that don't need a running node + case RestArgs of + ["chkconfig", File] -> + case file:consult(File) of + {ok, _} -> + io:format("ok\n"), + halt(0); + {error, {Line, Mod, Term}} -> + io:format(standard_error, ["Error on line ", + file:format_error({Line, Mod, Term}), "\n"], []), + halt(1); + {error, R} -> + io:format(standard_error, ["Error reading config file: ", + file:format_error(R), "\n"], []), + halt(1) + end; + _ -> + ok + end, + %% See if the node is currently running -- if it's not, we'll bail - case {net_kernel:hidden_connect_node(TargetNode), net_adm:ping(TargetNode)} of + case {net_kernel:hidden_connect_node(TargetNode), + net_adm:ping(TargetNode)} of {true, pong} -> ok; + {false,pong} -> + io:format("Failed to connect to node ~p .\n", [TargetNode]), + halt(1); {_, pang} -> io:format("Node ~p not responding to pings.\n", [TargetNode]), halt(1) end, case RestArgs of + ["getpid"] -> + io:format("~p\n", + [list_to_integer(rpc:call(TargetNode, os, getpid, []))]); ["ping"] -> - %% If we got this far, the node already responsed to a ping, so just dump - %% a "pong" + %% If we got this far, the node already responsed to a + %% ping, so just dump a "pong" io:format("pong\n"); ["stop"] -> io:format("~p\n", [rpc:call(TargetNode, init, stop, [], 60000)]); @@ -32,7 +61,9 @@ main(Args) -> ["reboot"] -> io:format("~p\n", [rpc:call(TargetNode, init, reboot, [], 60000)]); ["rpc", Module, Function | RpcArgs] -> - case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), + case rpc:call(TargetNode, + list_to_atom(Module), + list_to_atom(Function), [RpcArgs], 60000) of ok -> ok; @@ -42,8 +73,23 @@ main(Args) -> _ -> halt(1) end; + ["rpc_infinity", Module, Function | RpcArgs] -> + case rpc:call(TargetNode, + list_to_atom(Module), + list_to_atom(Function), + [RpcArgs], infinity) of + ok -> + ok; + {badrpc, Reason} -> + io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]), + halt(1); + _ -> + halt(1) + end; ["rpcterms", Module, Function, ArgsAsString] -> - case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), + case rpc:call(TargetNode, + list_to_atom(Module), + list_to_atom(Function), consult(ArgsAsString), 60000) of {badrpc, Reason} -> io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]), @@ -53,7 +99,7 @@ main(Args) -> end; Other -> io:format("Other: ~p\n", [Other]), - io:format("Usage: nodetool {ping|stop|restart|reboot}\n") + io:format("Usage: nodetool {chkconfig|getpid|ping|stop|restart|reboot|rpc|rpc_infinity|rpcterms}\n") end, net_kernel:stop(). diff --git a/priv/templates/simplenode.reltool.config b/priv/templates/simplenode.reltool.config index b580c2a..4189329 100644 --- a/priv/templates/simplenode.reltool.config +++ b/priv/templates/simplenode.reltool.config @@ -1,3 +1,5 @@ +%% -*- mode: erlang -*- +%% ex: ft=erlang {sys, [ {lib_dirs, []}, {erts, [{mod_cond, derived}, {app_file, strip}]}, diff --git a/priv/templates/simplenode.runner b/priv/templates/simplenode.runner index 43d90bc..8c55e3d 100755 --- a/priv/templates/simplenode.runner +++ b/priv/templates/simplenode.runner @@ -2,6 +2,19 @@ # -*- tab-width:4;indent-tabs-mode:nil -*- # ex: ts=4 sw=4 et +# /bin/sh on Solaris is not a POSIX compatible shell, but /usr/bin/ksh is. +if [ `uname -s` = 'SunOS' -a "${POSIX_SHELL}" != "true" ]; then + POSIX_SHELL="true" + export POSIX_SHELL + # To support 'whoami' add /usr/ucb to path + PATH=/usr/ucb:$PATH + export PATH + exec /usr/bin/ksh $0 "$@" +fi + +# clear it so if we invoke other scripts, they run as ksh +unset POSIX_SHELL + RUNNER_SCRIPT_DIR=$(cd ${0%/*} && pwd) CALLER_DIR=$PWD @@ -11,10 +24,17 @@ RUNNER_ETC_DIR=$RUNNER_BASE_DIR/etc # Note the trailing slash on $PIPE_DIR/ PIPE_DIR=/tmp/$RUNNER_BASE_DIR/ RUNNER_USER= +WHOAMI=$(whoami) # Make sure this script is running as the appropriate user -if [ ! -z "$RUNNER_USER" ] && [ `whoami` != "$RUNNER_USER" ]; then - exec sudo -u $RUNNER_USER -i $0 $@ +if ([ "$RUNNER_USER" ] && [ "x$WHOAMI" != "x$RUNNER_USER" ]); then + type sudo > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "sudo doesn't appear to be installed and your EUID isn't $RUNNER_USER" 1>&2 + exit 1 + fi + echo "Attempting to restart script through sudo -H -u $RUNNER_USER" >&2 + exec sudo -H -u $RUNNER_USER -i $RUNNER_SCRIPT_DIR/$RUNNER_SCRIPT $@ fi # Identify the script name @@ -25,7 +45,8 @@ START_ERL=`cat $RUNNER_BASE_DIR/releases/start_erl.data` ERTS_VSN=${START_ERL% *} APP_VSN=${START_ERL#* } -# Use $CWD/vm.args if exists, otherwise releases/APP_VSN/vm.args, or else etc/vm.args +# Use $CWD/vm.args if exists, otherwise releases/APP_VSN/vm.args, or +# else etc/vm.args if [ -e "$CALLER_DIR/vm.args" ]; then VMARGS_PATH=$CALLER_DIR/vm.args USE_DIR=$CALLER_DIR @@ -54,7 +75,7 @@ else fi # Extract the target node name from node.args -NAME_ARG=`egrep '^-s?name' $VMARGS_PATH` +NAME_ARG=`egrep '^\-s?name' $VMARGS_PATH` if [ -z "$NAME_ARG" ]; then echo "vm.args needs to have either -name or -sname parameter." exit 1 @@ -64,12 +85,13 @@ fi REMSH_TYPE=`echo $NAME_ARG | awk '{print $1}'` REMSH_NAME=`echo $NAME_ARG | awk '{print $2}'` -# Note the `date +%s`, used to allow multiple remsh to the same node transparently +# Note the `date +%s`, used to allow multiple remsh to the same node +# transparently REMSH_NAME_ARG="$REMSH_TYPE remsh`date +%s`@`echo $REMSH_NAME | awk -F@ '{print $2}'`" REMSH_REMSH_ARG="-remsh $REMSH_NAME" # Extract the target cookie -COOKIE_ARG=`grep '^-setcookie' $VMARGS_PATH` +COOKIE_ARG=`grep '^\-setcookie' $VMARGS_PATH` if [ -z "$COOKIE_ARG" ]; then echo "vm.args needs to have a -setcookie parameter." exit 1 @@ -81,7 +103,6 @@ cd $USE_DIR # Make sure log directory exists mkdir -p $USE_DIR/log - # Add ERTS bin dir to our path ERTS_PATH=$RUNNER_BASE_DIR/erts-$ERTS_VSN/bin @@ -91,11 +112,35 @@ NODETOOL="$ERTS_PATH/escript $ERTS_PATH/nodetool $NAME_ARG $COOKIE_ARG" # Setup remote shell command to control node REMSH="$ERTS_PATH/erl $REMSH_NAME_ARG $REMSH_REMSH_ARG $COOKIE_ARG" +# Common functions + +# Ping node without allowing nodetool to take stdin +ping_node() { + $NODETOOL ping < /dev/null +} + +# Set the PID global variable, return 1 on error +get_pid() { + PID=`$NODETOOL getpid < /dev/null` + ES=$? + if [ "$ES" -ne 0 ]; then + echo "Node is not running!" + return 1 + fi + + # don't allow empty or init pid's + if [ -z $PID ] || [ "$PID" -le 1 ]; then + return 1 + fi + + return 0 +} + # Check the first argument for instructions case "$1" in start|start_boot) # Make sure there is not already a node running - RES=`$NODETOOL ping` + RES=`ping_node` if [ "$RES" = "pong" ]; then echo "Node is already running!" exit 1 @@ -122,27 +167,28 @@ case "$1" in stop) # Wait for the node to completely stop... case `uname -s` in - Linux|Darwin|FreeBSD|DragonFly|NetBSD|OpenBSD) - # PID COMMAND - PID=`ps ax -o pid= -o command=|\ - grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $1}'` - ;; - SunOS) - # PID COMMAND - PID=`ps -ef -o pid= -o args=|\ - grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $1}'` - ;; - CYGWIN*) - # UID PID PPID TTY STIME COMMAND - PID=`ps -efW|grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $2}'` - ;; + Darwin) + # Make sure we explicitly set this because iTerm.app doesn't for + # some reason. + COMMAND_MODE=unix2003 esac + + # Get the PID from nodetool + get_pid + GPR=$? + if [ "$GPR" -ne 0 ] || [ -z $PID ]; then + exit $GPR + fi + + # Tell nodetool to initiate a stop $NODETOOL stop ES=$? if [ "$ES" -ne 0 ]; then exit $ES fi - while `kill -0 $PID 2>/dev/null`; + + # Wait for the node to completely stop... + while `kill -s 0 $PID 2>/dev/null` do sleep 1 done @@ -168,7 +214,7 @@ case "$1" in ping) ## See if the VM is alive - $NODETOOL ping + ping_node ES=$? if [ "$ES" -ne 0 ]; then exit $ES @@ -176,8 +222,8 @@ case "$1" in ;; attach) - # Make sure a node IS running - RES=`$NODETOOL ping` + # Make sure a node is running + ping_node ES=$? if [ "$ES" -ne 0 ]; then echo "Node is not running!" @@ -189,8 +235,8 @@ case "$1" in ;; remote_console) - # Make sure a node IS running - RES=`$NODETOOL ping` + # Make sure a node is running + ping_node ES=$? if [ "$ES" -ne 0 ]; then echo "Node is not running!" @@ -210,7 +256,7 @@ case "$1" in fi # Make sure a node IS running - RES=`$NODETOOL ping` + ping_node ES=$? if [ "$ES" -ne 0 ]; then echo "Node is not running!" @@ -283,8 +329,17 @@ case "$1" in # Start the VM exec $CMD -- ${1+"$@"} ;; + getpid) + # Get the PID from nodetool + get_pid + ES=$? + if [ "$ES" -ne 0 ] || [ -z $PID ]; then + exit $ES + fi + echo $PID + ;; *) - echo "Usage: $SCRIPT {start|start_boot |foreground|stop|restart|reboot|ping|console|console_clean|console_boot |attach|remote_console|upgrade}" + echo "Usage: $SCRIPT {start|start_boot |foreground|stop|restart|reboot|ping|console|getpid||console_clean|console_boot |attach|remote_console|upgrade}" exit 1 ;; esac