Andrea Mondelli | 3d54f6d | 2019-01-10 10:12:41 -0500 | [diff] [blame] | 1 | #! /usr/bin/env python2.7 |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 2 | |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 3 | # Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 4 | # All rights reserved. |
| 5 | # |
| 6 | # Redistribution and use in source and binary forms, with or without |
| 7 | # modification, are permitted provided that the following conditions are |
| 8 | # met: redistributions of source code must retain the above copyright |
| 9 | # notice, this list of conditions and the following disclaimer; |
| 10 | # redistributions in binary form must reproduce the above copyright |
| 11 | # notice, this list of conditions and the following disclaimer in the |
| 12 | # documentation and/or other materials provided with the distribution; |
| 13 | # neither the name of the copyright holders nor the names of its |
| 14 | # contributors may be used to endorse or promote products derived from |
| 15 | # this software without specific prior written permission. |
| 16 | # |
| 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
Ali Saidi | cb0cf2d | 2006-05-31 19:26:56 -0400 | [diff] [blame] | 28 | # |
| 29 | # Authors: Steve Reinhardt |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 30 | # Ali Saidi |
| 31 | |
| 32 | # Important! |
Steve Reinhardt | dc8018a | 2016-02-06 17:21:18 -0800 | [diff] [blame] | 33 | # This script expects a simple $ prompt, if you are using a shell other than |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 34 | # sh which defaults to this you'll need to add something like the following |
| 35 | # to your bashrc/bash_profile script: |
| 36 | #if [ "$OAR_USER" = "xxxx" ]; then |
| 37 | # PS1='$ ' |
| 38 | |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 39 | |
| 40 | import sys |
| 41 | import os |
| 42 | import re |
| 43 | import time |
| 44 | import optparse |
| 45 | |
| 46 | import pexpect |
| 47 | |
| 48 | progname = os.path.basename(sys.argv[0]) |
| 49 | |
| 50 | usage = "%prog [options] command [command arguments]" |
| 51 | optparser = optparse.OptionParser(usage=usage) |
| 52 | optparser.allow_interspersed_args=False |
| 53 | optparser.add_option('-e', dest='stderr_file', |
| 54 | help='command stderr output file') |
| 55 | optparser.add_option('-o', dest='stdout_file', |
| 56 | help='command stdout output file') |
| 57 | optparser.add_option('-l', dest='save_log', action='store_true', |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 58 | help='save oarsub output log file') |
Steve Reinhardt | 9c68bde | 2005-10-20 13:25:43 -0400 | [diff] [blame] | 59 | optparser.add_option('-N', dest='job_name', |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 60 | help='oarsub job name') |
Steve Reinhardt | 9c68bde | 2005-10-20 13:25:43 -0400 | [diff] [blame] | 61 | optparser.add_option('-q', dest='dest_queue', |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 62 | help='oarsub destination queue') |
| 63 | optparser.add_option('--qwait', dest='oarsub_timeout', type='int', |
| 64 | help='oarsub queue wait timeout', default=30*60) |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 65 | optparser.add_option('-t', dest='cmd_timeout', type='int', |
| 66 | help='command execution timeout', default=600*60) |
| 67 | |
| 68 | (options, cmd) = optparser.parse_args() |
| 69 | |
| 70 | if cmd == []: |
| 71 | print >>sys.stderr, "%s: missing command" % progname |
| 72 | sys.exit(1) |
| 73 | |
Steve Reinhardt | 5540021 | 2005-10-23 22:18:50 -0400 | [diff] [blame] | 74 | # If we want to do this, need to add check here to make sure cmd[0] is |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 75 | # a valid PBS job name, else oarsub will die on us. |
Steve Reinhardt | 5540021 | 2005-10-23 22:18:50 -0400 | [diff] [blame] | 76 | # |
| 77 | #if not options.job_name: |
| 78 | # options.job_name = cmd[0] |
Steve Reinhardt | 9c68bde | 2005-10-20 13:25:43 -0400 | [diff] [blame] | 79 | |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 80 | cwd = os.getcwd() |
| 81 | |
| 82 | # Deal with systems where /n is a symlink to /.automount |
| 83 | if cwd.startswith('/.automount/'): |
| 84 | cwd = cwd.replace('/.automount/', '/n/', 1) |
| 85 | |
| 86 | if not cwd.startswith('/n/poolfs/'): |
| 87 | print >>sys.stderr, "Error: current directory must be under /n/poolfs." |
| 88 | sys.exit(1) |
| 89 | |
| 90 | # The Shell class wraps pexpect.spawn with some handy functions that |
| 91 | # assume the thing on the other end is a Bourne/bash shell. |
| 92 | class Shell(pexpect.spawn): |
| 93 | # Regexp to match the shell prompt. We change the prompt to |
| 94 | # something fixed and distinctive to make it easier to match |
| 95 | # reliably. |
| 96 | prompt_re = re.compile('qdo\$ ') |
| 97 | |
| 98 | def __init__(self, cmd): |
| 99 | # initialize base pexpect.spawn object |
Steve Reinhardt | dc8018a | 2016-02-06 17:21:18 -0800 | [diff] [blame] | 100 | try: |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 101 | pexpect.spawn.__init__(self, cmd) |
Steve Reinhardt | dc8018a | 2016-02-06 17:21:18 -0800 | [diff] [blame] | 102 | except pexpect.ExceptionPexpect, exc: |
| 103 | print "%s:" % progname, exc |
| 104 | sys.exit(1) |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 105 | # full_output accumulates the full output of the session |
| 106 | self.full_output = "" |
| 107 | self.quick_timeout = 15 |
| 108 | # wait for a prompt, then change it |
| 109 | try: |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 110 | self.expect('\$ ', options.oarsub_timeout) |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 111 | except pexpect.TIMEOUT: |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 112 | print >>sys.stderr, "%s: oarsub timed out." % progname |
Steve Reinhardt | 902e27e | 2005-11-03 13:14:28 -0500 | [diff] [blame] | 113 | self.kill(9) |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 114 | self.safe_close() |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 115 | sys.exit(1) |
Steve Reinhardt | 70a799a | 2005-09-29 16:03:36 -0400 | [diff] [blame] | 116 | self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 117 | |
| 118 | # version of expect that updates full_output too |
| 119 | def expect(self, regexp, timeout = -1): |
| 120 | pexpect.spawn.expect(self, regexp, timeout) |
| 121 | self.full_output += self.before + self.after |
| 122 | |
| 123 | # Just issue a command and wait for the next prompt. |
| 124 | # Returns a string containing the output of the command. |
| 125 | def do_bare_command(self, cmd, timeout = -1): |
| 126 | global full_output |
| 127 | self.sendline(cmd) |
| 128 | # read back the echo of the command |
| 129 | self.readline() |
| 130 | # wait for the next prompt |
| 131 | self.expect(self.prompt_re, timeout) |
| 132 | output = self.before.rstrip() |
| 133 | return output |
| 134 | |
| 135 | # Issue a command, then query its exit status. |
| 136 | # Returns a (string, int) tuple with the command output and the status. |
| 137 | def do_command(self, cmd, timeout = -1): |
| 138 | # do the command itself |
| 139 | output = self.do_bare_command(cmd, timeout) |
| 140 | # collect status |
| 141 | status = int(self.do_bare_command("echo $?", self.quick_timeout)) |
| 142 | return (output, status) |
| 143 | |
| 144 | # Check to see if the given directory exists. |
| 145 | def dir_exists(self, dirname): |
| 146 | (output, status) = shell.do_command('[ -d %s ]' % dirname, |
| 147 | self.quick_timeout) |
| 148 | return status == 0 |
Steve Reinhardt | dc8018a | 2016-02-06 17:21:18 -0800 | [diff] [blame] | 149 | |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 150 | # Don't actually try to close it.. just wait until it closes by itself |
Steve Reinhardt | dc8018a | 2016-02-06 17:21:18 -0800 | [diff] [blame] | 151 | # We can't actually kill the pid which is what it's trying to do, and if |
| 152 | # we call wait we could be in an unfortunate situation of it printing input |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 153 | # right as we call wait, so the input is never read and the process never ends |
| 154 | def safe_close(self): |
| 155 | count = 0 |
| 156 | while self.isalive() and count < 10: |
| 157 | time.sleep(1) |
| 158 | self.close(force=False) |
Steve Reinhardt | dc8018a | 2016-02-06 17:21:18 -0800 | [diff] [blame] | 159 | |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 160 | # Spawn the interactive pool job. |
| 161 | |
| 162 | # Hack to do link on poolfs... disabled for now since |
| 163 | # compiler/linker/library versioning problems between poolfs and |
| 164 | # nodes. May never work since poolfs is x86-64 and nodes are 32-bit. |
| 165 | if False and len(cmd) > 50: |
| 166 | shell_cmd = 'ssh -t poolfs /bin/sh -l' |
| 167 | print "%s: running %s on poolfs" % (progname, cmd[0]) |
| 168 | else: |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 169 | shell_cmd = 'oarsub -I' |
Steve Reinhardt | 5540021 | 2005-10-23 22:18:50 -0400 | [diff] [blame] | 170 | if options.job_name: |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 171 | shell_cmd += ' -n "%s"' % options.job_name |
Steve Reinhardt | 9c68bde | 2005-10-20 13:25:43 -0400 | [diff] [blame] | 172 | if options.dest_queue: |
| 173 | shell_cmd += ' -q ' + options.dest_queue |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 174 | shell_cmd += ' -d %s' % cwd |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 175 | |
| 176 | shell = Shell(shell_cmd) |
| 177 | |
| 178 | try: |
| 179 | # chdir to cwd |
| 180 | (output, status) = shell.do_command('cd ' + cwd) |
| 181 | |
| 182 | if status != 0: |
| 183 | raise OSError, "Can't chdir to %s" % cwd |
| 184 | |
| 185 | # wacky hack: sometimes scons will create an output directory then |
| 186 | # fork a job to generate files in that directory, and the job will |
| 187 | # get run before the directory creation propagates through NFS. |
| 188 | # This hack looks for a '-o' option indicating an output file and |
| 189 | # waits for the corresponding directory to appear if necessary. |
| 190 | try: |
| 191 | if 'cc' in cmd[0] or 'g++' in cmd[0]: |
| 192 | output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) |
| 193 | elif 'm5' in cmd[0]: |
| 194 | output_dir = cmd[cmd.index('-d')+1] |
| 195 | else: |
| 196 | output_dir = None |
| 197 | except (ValueError, IndexError): |
| 198 | # no big deal if there's no '-o'/'-d' or if it's the final argument |
| 199 | output_dir = None |
| 200 | |
| 201 | if output_dir: |
| 202 | secs_waited = 0 |
Steve Reinhardt | 1141610 | 2006-03-11 22:01:33 -0500 | [diff] [blame] | 203 | while not shell.dir_exists(output_dir) and secs_waited < 90: |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 204 | time.sleep(5) |
| 205 | secs_waited += 5 |
Steve Reinhardt | 1141610 | 2006-03-11 22:01:33 -0500 | [diff] [blame] | 206 | if secs_waited > 30: |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 207 | print "waited", secs_waited, "seconds for", output_dir |
| 208 | |
| 209 | # run command |
| 210 | if options.stdout_file: |
| 211 | cmd += ['>', options.stdout_file] |
| 212 | if options.stderr_file: |
| 213 | cmd += ['2>', options.stderr_file] |
| 214 | try: |
| 215 | (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) |
| 216 | except pexpect.TIMEOUT: |
| 217 | print >>sys.stderr, "%s: command timed out after %d seconds." \ |
| 218 | % (progname, options.cmd_timeout) |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 219 | shell.sendline('~.') # oarsub/ssh termination escape sequence |
| 220 | shell.safe_close() |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 221 | status = 3 |
| 222 | if output: |
| 223 | print output |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 224 | finally: |
| 225 | # end job |
| 226 | if shell.isalive(): |
| 227 | shell.sendline('exit') |
Ali Saidi | c648044 | 2007-10-10 23:24:16 -0400 | [diff] [blame] | 228 | shell.expect('Disconnected from OAR job .*') |
| 229 | shell.safe_close() |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 230 | |
| 231 | # if there was an error, log the output even if not requested |
| 232 | if status != 0 or options.save_log: |
| 233 | log = file('qdo-log.' + str(os.getpid()), 'w') |
| 234 | log.write(shell.full_output) |
| 235 | log.close() |
Steve Reinhardt | b15a7aa | 2005-09-22 15:27:42 -0400 | [diff] [blame] | 236 | del shell |
| 237 | |
| 238 | sys.exit(status) |