blob: 8008a40c4748e029e891e0a70691c95835f3cd48 [file] [log] [blame]
Andrea Mondelli3d54f6d2019-01-10 10:12:41 -05001#! /usr/bin/env python2.7
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -04002
Ali Saidic6480442007-10-10 23:24:16 -04003# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -04004# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Ali Saidicb0cf2d2006-05-31 19:26:56 -040028#
29# Authors: Steve Reinhardt
Ali Saidic6480442007-10-10 23:24:16 -040030# Ali Saidi
31
32# Important!
Steve Reinhardtdc8018a2016-02-06 17:21:18 -080033# This script expects a simple $ prompt, if you are using a shell other than
Ali Saidic6480442007-10-10 23:24:16 -040034# sh which defaults to this you'll need to add something like the following
35# to your bashrc/bash_profile script:
36#if [ "$OAR_USER" = "xxxx" ]; then
37# PS1='$ '
38
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -040039
40import sys
41import os
42import re
43import time
44import optparse
45
46import pexpect
47
48progname = os.path.basename(sys.argv[0])
49
50usage = "%prog [options] command [command arguments]"
51optparser = optparse.OptionParser(usage=usage)
52optparser.allow_interspersed_args=False
53optparser.add_option('-e', dest='stderr_file',
54 help='command stderr output file')
55optparser.add_option('-o', dest='stdout_file',
56 help='command stdout output file')
57optparser.add_option('-l', dest='save_log', action='store_true',
Ali Saidic6480442007-10-10 23:24:16 -040058 help='save oarsub output log file')
Steve Reinhardt9c68bde2005-10-20 13:25:43 -040059optparser.add_option('-N', dest='job_name',
Ali Saidic6480442007-10-10 23:24:16 -040060 help='oarsub job name')
Steve Reinhardt9c68bde2005-10-20 13:25:43 -040061optparser.add_option('-q', dest='dest_queue',
Ali Saidic6480442007-10-10 23:24:16 -040062 help='oarsub destination queue')
63optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
64 help='oarsub queue wait timeout', default=30*60)
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -040065optparser.add_option('-t', dest='cmd_timeout', type='int',
66 help='command execution timeout', default=600*60)
67
68(options, cmd) = optparser.parse_args()
69
70if cmd == []:
71 print >>sys.stderr, "%s: missing command" % progname
72 sys.exit(1)
73
Steve Reinhardt55400212005-10-23 22:18:50 -040074# If we want to do this, need to add check here to make sure cmd[0] is
Ali Saidic6480442007-10-10 23:24:16 -040075# a valid PBS job name, else oarsub will die on us.
Steve Reinhardt55400212005-10-23 22:18:50 -040076#
77#if not options.job_name:
78# options.job_name = cmd[0]
Steve Reinhardt9c68bde2005-10-20 13:25:43 -040079
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -040080cwd = os.getcwd()
81
82# Deal with systems where /n is a symlink to /.automount
83if cwd.startswith('/.automount/'):
84 cwd = cwd.replace('/.automount/', '/n/', 1)
85
86if not cwd.startswith('/n/poolfs/'):
87 print >>sys.stderr, "Error: current directory must be under /n/poolfs."
88 sys.exit(1)
89
90# The Shell class wraps pexpect.spawn with some handy functions that
91# assume the thing on the other end is a Bourne/bash shell.
92class Shell(pexpect.spawn):
93 # Regexp to match the shell prompt. We change the prompt to
94 # something fixed and distinctive to make it easier to match
95 # reliably.
96 prompt_re = re.compile('qdo\$ ')
97
98 def __init__(self, cmd):
99 # initialize base pexpect.spawn object
Steve Reinhardtdc8018a2016-02-06 17:21:18 -0800100 try:
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400101 pexpect.spawn.__init__(self, cmd)
Steve Reinhardtdc8018a2016-02-06 17:21:18 -0800102 except pexpect.ExceptionPexpect, exc:
103 print "%s:" % progname, exc
104 sys.exit(1)
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400105 # full_output accumulates the full output of the session
106 self.full_output = ""
107 self.quick_timeout = 15
108 # wait for a prompt, then change it
109 try:
Ali Saidic6480442007-10-10 23:24:16 -0400110 self.expect('\$ ', options.oarsub_timeout)
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400111 except pexpect.TIMEOUT:
Ali Saidic6480442007-10-10 23:24:16 -0400112 print >>sys.stderr, "%s: oarsub timed out." % progname
Steve Reinhardt902e27e2005-11-03 13:14:28 -0500113 self.kill(9)
Ali Saidic6480442007-10-10 23:24:16 -0400114 self.safe_close()
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400115 sys.exit(1)
Steve Reinhardt70a799a2005-09-29 16:03:36 -0400116 self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400117
118 # version of expect that updates full_output too
119 def expect(self, regexp, timeout = -1):
120 pexpect.spawn.expect(self, regexp, timeout)
121 self.full_output += self.before + self.after
122
123 # Just issue a command and wait for the next prompt.
124 # Returns a string containing the output of the command.
125 def do_bare_command(self, cmd, timeout = -1):
126 global full_output
127 self.sendline(cmd)
128 # read back the echo of the command
129 self.readline()
130 # wait for the next prompt
131 self.expect(self.prompt_re, timeout)
132 output = self.before.rstrip()
133 return output
134
135 # Issue a command, then query its exit status.
136 # Returns a (string, int) tuple with the command output and the status.
137 def do_command(self, cmd, timeout = -1):
138 # do the command itself
139 output = self.do_bare_command(cmd, timeout)
140 # collect status
141 status = int(self.do_bare_command("echo $?", self.quick_timeout))
142 return (output, status)
143
144 # Check to see if the given directory exists.
145 def dir_exists(self, dirname):
146 (output, status) = shell.do_command('[ -d %s ]' % dirname,
147 self.quick_timeout)
148 return status == 0
Steve Reinhardtdc8018a2016-02-06 17:21:18 -0800149
Ali Saidic6480442007-10-10 23:24:16 -0400150 # Don't actually try to close it.. just wait until it closes by itself
Steve Reinhardtdc8018a2016-02-06 17:21:18 -0800151 # We can't actually kill the pid which is what it's trying to do, and if
152 # we call wait we could be in an unfortunate situation of it printing input
Ali Saidic6480442007-10-10 23:24:16 -0400153 # right as we call wait, so the input is never read and the process never ends
154 def safe_close(self):
155 count = 0
156 while self.isalive() and count < 10:
157 time.sleep(1)
158 self.close(force=False)
Steve Reinhardtdc8018a2016-02-06 17:21:18 -0800159
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400160# Spawn the interactive pool job.
161
162# Hack to do link on poolfs... disabled for now since
163# compiler/linker/library versioning problems between poolfs and
164# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
165if False and len(cmd) > 50:
166 shell_cmd = 'ssh -t poolfs /bin/sh -l'
167 print "%s: running %s on poolfs" % (progname, cmd[0])
168else:
Ali Saidic6480442007-10-10 23:24:16 -0400169 shell_cmd = 'oarsub -I'
Steve Reinhardt55400212005-10-23 22:18:50 -0400170 if options.job_name:
Ali Saidic6480442007-10-10 23:24:16 -0400171 shell_cmd += ' -n "%s"' % options.job_name
Steve Reinhardt9c68bde2005-10-20 13:25:43 -0400172 if options.dest_queue:
173 shell_cmd += ' -q ' + options.dest_queue
Ali Saidic6480442007-10-10 23:24:16 -0400174 shell_cmd += ' -d %s' % cwd
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400175
176shell = Shell(shell_cmd)
177
178try:
179 # chdir to cwd
180 (output, status) = shell.do_command('cd ' + cwd)
181
182 if status != 0:
183 raise OSError, "Can't chdir to %s" % cwd
184
185 # wacky hack: sometimes scons will create an output directory then
186 # fork a job to generate files in that directory, and the job will
187 # get run before the directory creation propagates through NFS.
188 # This hack looks for a '-o' option indicating an output file and
189 # waits for the corresponding directory to appear if necessary.
190 try:
191 if 'cc' in cmd[0] or 'g++' in cmd[0]:
192 output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
193 elif 'm5' in cmd[0]:
194 output_dir = cmd[cmd.index('-d')+1]
195 else:
196 output_dir = None
197 except (ValueError, IndexError):
198 # no big deal if there's no '-o'/'-d' or if it's the final argument
199 output_dir = None
200
201 if output_dir:
202 secs_waited = 0
Steve Reinhardt11416102006-03-11 22:01:33 -0500203 while not shell.dir_exists(output_dir) and secs_waited < 90:
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400204 time.sleep(5)
205 secs_waited += 5
Steve Reinhardt11416102006-03-11 22:01:33 -0500206 if secs_waited > 30:
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400207 print "waited", secs_waited, "seconds for", output_dir
208
209 # run command
210 if options.stdout_file:
211 cmd += ['>', options.stdout_file]
212 if options.stderr_file:
213 cmd += ['2>', options.stderr_file]
214 try:
215 (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
216 except pexpect.TIMEOUT:
217 print >>sys.stderr, "%s: command timed out after %d seconds." \
218 % (progname, options.cmd_timeout)
Ali Saidic6480442007-10-10 23:24:16 -0400219 shell.sendline('~.') # oarsub/ssh termination escape sequence
220 shell.safe_close()
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400221 status = 3
222 if output:
223 print output
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400224finally:
225 # end job
226 if shell.isalive():
227 shell.sendline('exit')
Ali Saidic6480442007-10-10 23:24:16 -0400228 shell.expect('Disconnected from OAR job .*')
229 shell.safe_close()
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400230
231 # if there was an error, log the output even if not requested
232 if status != 0 or options.save_log:
233 log = file('qdo-log.' + str(os.getpid()), 'w')
234 log.write(shell.full_output)
235 log.close()
Steve Reinhardtb15a7aa2005-09-22 15:27:42 -0400236del shell
237
238sys.exit(status)