#!/bin/sh if [ $# != 5 ] then echo "usage: $0 " >&2 exit 1 fi GLEXEC="$1" PROXY="$2" SANDBOX="$3" MAX_GLEXEC_ERRORS="$4" MIN_GLEXEC_RETRY_DELAY="$5" # tar up the contents of the user-owned sandbox and pipe them # back to a process running as the condor user. make sure not to # stomp on the condor-owned copy of the user proxy # SH=`readlink -f /bin/sh` GLEXEC_CLIENT_CERT="$SANDBOX.condor/$PROXY" export GLEXEC_CLIENT_CERT # prevent glexec from creating a proxy in /tmp that is not cleaned up GLEXEC_TARGET_PROXY=/dev/null export GLEXEC_TARGET_PROXY glexec_errors=0 while [ 1 ]; do { "$GLEXEC" "$SH" -c "rm -f \"$SANDBOX/$PROXY\" && \ tar -C \"$SANDBOX\" -c . && \ rm -rf \"$SANDBOX\""; echo $? > "$SANDBOX.condor/_condor_glexec_rc" } | tar -C "$SANDBOX.condor" -x tar_rc=$? glexec_rc=$(cat "$SANDBOX.condor/_condor_glexec_rc") rm -f "$SANDBOX.condor/_condor_glexec_rc" if [ "$glexec_rc" = "" ]; then glexec_rc=2 fi if [ $glexec_rc -eq 0 ]; then break elif [ $glexec_rc -ne 202 ] && [ $glexec_rc -ne 203 ]; then # Either a non-transient glexec error, or an error from the command # we are asking glexec to run. Try to finish cleaning up # and then exit non-zero at the end. break fi # This _could_ be a transient issue, such as a communication error with GUMS, # so retry up to some limit. glexec_errors=$(( $glexec_errors + 1 )) if [ $glexec_errors -gt $MAX_GLEXEC_ERRORS ]; then break fi # truncated exponential backoff # sleep for X * min(100,1+random_number_in_range(0,glexec_errors-1)) delay_rand=$(( (1 + 0x0$(hexdump -n 4 -e '"%4x\n"' /dev/urandom) % $glexec_errors) % 100 )) delay=$(( $MIN_GLEXEC_RETRY_DELAY * $delay_rand )) echo "Glexec exited with status $glexec_rc; retrying in $delay seconds." >&2 sleep $delay done # move the condor-owned sandbox back in place # mv -f "$SANDBOX.condor" "$SANDBOX" mv_rc=$? if [ $mv_rc -ne 0 ] || [ $tar_rc -ne 0 ] || [ $glexec_rc -ne 0 ]; then exit 1 fi