| #! /usr/bin/env python |
| |
| # Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer; |
| # redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution; |
| # neither the name of the copyright holders nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # Authors: Steve Reinhardt |
| # Ali Saidi |
| |
| # Important! |
| # This script expects a simple $ prompt, if you are using a shell other than |
| # sh which defaults to this you'll need to add something like the following |
| # to your bashrc/bash_profile script: |
| #if [ "$OAR_USER" = "xxxx" ]; then |
| # PS1='$ ' |
| |
| |
| import sys |
| import os |
| import re |
| import time |
| import optparse |
| |
| import pexpect |
| |
| progname = os.path.basename(sys.argv[0]) |
| |
| usage = "%prog [options] command [command arguments]" |
| optparser = optparse.OptionParser(usage=usage) |
| optparser.allow_interspersed_args=False |
| optparser.add_option('-e', dest='stderr_file', |
| help='command stderr output file') |
| optparser.add_option('-o', dest='stdout_file', |
| help='command stdout output file') |
| optparser.add_option('-l', dest='save_log', action='store_true', |
| help='save oarsub output log file') |
| optparser.add_option('-N', dest='job_name', |
| help='oarsub job name') |
| optparser.add_option('-q', dest='dest_queue', |
| help='oarsub destination queue') |
| optparser.add_option('--qwait', dest='oarsub_timeout', type='int', |
| help='oarsub queue wait timeout', default=30*60) |
| optparser.add_option('-t', dest='cmd_timeout', type='int', |
| help='command execution timeout', default=600*60) |
| |
| (options, cmd) = optparser.parse_args() |
| |
| if cmd == []: |
| print >>sys.stderr, "%s: missing command" % progname |
| sys.exit(1) |
| |
| # If we want to do this, need to add check here to make sure cmd[0] is |
| # a valid PBS job name, else oarsub will die on us. |
| # |
| #if not options.job_name: |
| # options.job_name = cmd[0] |
| |
| cwd = os.getcwd() |
| |
| # Deal with systems where /n is a symlink to /.automount |
| if cwd.startswith('/.automount/'): |
| cwd = cwd.replace('/.automount/', '/n/', 1) |
| |
| if not cwd.startswith('/n/poolfs/'): |
| print >>sys.stderr, "Error: current directory must be under /n/poolfs." |
| sys.exit(1) |
| |
| # The Shell class wraps pexpect.spawn with some handy functions that |
| # assume the thing on the other end is a Bourne/bash shell. |
| class Shell(pexpect.spawn): |
| # Regexp to match the shell prompt. We change the prompt to |
| # something fixed and distinctive to make it easier to match |
| # reliably. |
| prompt_re = re.compile('qdo\$ ') |
| |
| def __init__(self, cmd): |
| # initialize base pexpect.spawn object |
| try: |
| pexpect.spawn.__init__(self, cmd) |
| except pexpect.ExceptionPexpect, exc: |
| print "%s:" % progname, exc |
| sys.exit(1) |
| # full_output accumulates the full output of the session |
| self.full_output = "" |
| self.quick_timeout = 15 |
| # wait for a prompt, then change it |
| try: |
| self.expect('\$ ', options.oarsub_timeout) |
| except pexpect.TIMEOUT: |
| print >>sys.stderr, "%s: oarsub timed out." % progname |
| self.kill(9) |
| self.safe_close() |
| sys.exit(1) |
| self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') |
| |
| # version of expect that updates full_output too |
| def expect(self, regexp, timeout = -1): |
| pexpect.spawn.expect(self, regexp, timeout) |
| self.full_output += self.before + self.after |
| |
| # Just issue a command and wait for the next prompt. |
| # Returns a string containing the output of the command. |
| def do_bare_command(self, cmd, timeout = -1): |
| global full_output |
| self.sendline(cmd) |
| # read back the echo of the command |
| self.readline() |
| # wait for the next prompt |
| self.expect(self.prompt_re, timeout) |
| output = self.before.rstrip() |
| return output |
| |
| # Issue a command, then query its exit status. |
| # Returns a (string, int) tuple with the command output and the status. |
| def do_command(self, cmd, timeout = -1): |
| # do the command itself |
| output = self.do_bare_command(cmd, timeout) |
| # collect status |
| status = int(self.do_bare_command("echo $?", self.quick_timeout)) |
| return (output, status) |
| |
| # Check to see if the given directory exists. |
| def dir_exists(self, dirname): |
| (output, status) = shell.do_command('[ -d %s ]' % dirname, |
| self.quick_timeout) |
| return status == 0 |
| |
| # Don't actually try to close it.. just wait until it closes by itself |
| # We can't actually kill the pid which is what it's trying to do, and if |
| # we call wait we could be in an unfortunate situation of it printing input |
| # right as we call wait, so the input is never read and the process never ends |
| def safe_close(self): |
| count = 0 |
| while self.isalive() and count < 10: |
| time.sleep(1) |
| self.close(force=False) |
| |
| # Spawn the interactive pool job. |
| |
| # Hack to do link on poolfs... disabled for now since |
| # compiler/linker/library versioning problems between poolfs and |
| # nodes. May never work since poolfs is x86-64 and nodes are 32-bit. |
| if False and len(cmd) > 50: |
| shell_cmd = 'ssh -t poolfs /bin/sh -l' |
| print "%s: running %s on poolfs" % (progname, cmd[0]) |
| else: |
| shell_cmd = 'oarsub -I' |
| if options.job_name: |
| shell_cmd += ' -n "%s"' % options.job_name |
| if options.dest_queue: |
| shell_cmd += ' -q ' + options.dest_queue |
| shell_cmd += ' -d %s' % cwd |
| |
| shell = Shell(shell_cmd) |
| |
| try: |
| # chdir to cwd |
| (output, status) = shell.do_command('cd ' + cwd) |
| |
| if status != 0: |
| raise OSError, "Can't chdir to %s" % cwd |
| |
| # wacky hack: sometimes scons will create an output directory then |
| # fork a job to generate files in that directory, and the job will |
| # get run before the directory creation propagates through NFS. |
| # This hack looks for a '-o' option indicating an output file and |
| # waits for the corresponding directory to appear if necessary. |
| try: |
| if 'cc' in cmd[0] or 'g++' in cmd[0]: |
| output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) |
| elif 'm5' in cmd[0]: |
| output_dir = cmd[cmd.index('-d')+1] |
| else: |
| output_dir = None |
| except (ValueError, IndexError): |
| # no big deal if there's no '-o'/'-d' or if it's the final argument |
| output_dir = None |
| |
| if output_dir: |
| secs_waited = 0 |
| while not shell.dir_exists(output_dir) and secs_waited < 90: |
| time.sleep(5) |
| secs_waited += 5 |
| if secs_waited > 30: |
| print "waited", secs_waited, "seconds for", output_dir |
| |
| # run command |
| if options.stdout_file: |
| cmd += ['>', options.stdout_file] |
| if options.stderr_file: |
| cmd += ['2>', options.stderr_file] |
| try: |
| (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) |
| except pexpect.TIMEOUT: |
| print >>sys.stderr, "%s: command timed out after %d seconds." \ |
| % (progname, options.cmd_timeout) |
| shell.sendline('~.') # oarsub/ssh termination escape sequence |
| shell.safe_close() |
| status = 3 |
| if output: |
| print output |
| finally: |
| # end job |
| if shell.isalive(): |
| shell.sendline('exit') |
| shell.expect('Disconnected from OAR job .*') |
| shell.safe_close() |
| |
| # if there was an error, log the output even if not requested |
| if status != 0 or options.save_log: |
| log = file('qdo-log.' + str(os.getpid()), 'w') |
| log.write(shell.full_output) |
| log.close() |
| del shell |
| |
| sys.exit(status) |