| import MySQLdb, re, string |
| |
| def statcmp(a, b): |
| v1 = a.split('.') |
| v2 = b.split('.') |
| |
| last = min(len(v1), len(v2)) - 1 |
| for i,j in zip(v1[0:last], v2[0:last]): |
| if i != j: |
| return cmp(i, j) |
| |
| # Special compare for last element. |
| if len(v1) == len(v2): |
| return cmp(v1[last], v2[last]) |
| else: |
| return cmp(len(v1), len(v2)) |
| |
| class RunData: |
| def __init__(self, row): |
| self.run = int(row[0]) |
| self.name = row[1] |
| self.user = row[2] |
| self.project = row[3] |
| |
| class SubData: |
| def __init__(self, row): |
| self.stat = int(row[0]) |
| self.x = int(row[1]) |
| self.y = int(row[2]) |
| self.name = row[3] |
| self.descr = row[4] |
| |
| class Data: |
| def __init__(self, row): |
| if len(row) != 5: |
| raise 'stat db error' |
| self.stat = int(row[0]) |
| self.run = int(row[1]) |
| self.x = int(row[2]) |
| self.y = int(row[3]) |
| self.data = float(row[4]) |
| |
| def __repr__(self): |
| return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, |
| self.run, self.x, self.y, self.data) |
| |
| class StatData(object): |
| def __init__(self, row): |
| self.stat = int(row[0]) |
| self.name = row[1] |
| self.desc = row[2] |
| self.type = row[3] |
| self.prereq = int(row[5]) |
| self.precision = int(row[6]) |
| |
| import flags |
| self.flags = 0 |
| if int(row[4]): self.flags |= flags.printable |
| if int(row[7]): self.flags |= flags.nozero |
| if int(row[8]): self.flags |= flags.nonan |
| if int(row[9]): self.flags |= flags.total |
| if int(row[10]): self.flags |= flags.pdf |
| if int(row[11]): self.flags |= flags.cdf |
| |
| if self.type == 'DIST' or self.type == 'VECTORDIST': |
| self.min = float(row[12]) |
| self.max = float(row[13]) |
| self.bktsize = float(row[14]) |
| self.size = int(row[15]) |
| |
| if self.type == 'FORMULA': |
| self.formula = self.db.allFormulas[self.stat] |
| |
| class Node(object): |
| def __init__(self, name): |
| self.name = name |
| def __str__(self): |
| return name |
| |
| class Database(object): |
| def __init__(self): |
| self.host = 'zizzer.pool' |
| self.user = '' |
| self.passwd = '' |
| self.db = 'm5stats' |
| self.cursor = None |
| |
| self.allStats = [] |
| self.allStatIds = {} |
| self.allStatNames = {} |
| |
| self.allSubData = {} |
| |
| self.allRuns = [] |
| self.allRunIds = {} |
| self.allRunNames = {} |
| |
| self.allBins = [] |
| self.allBinIds = {} |
| self.allBinNames = {} |
| |
| self.allFormulas = {} |
| |
| self.stattop = {} |
| self.statdict = {} |
| self.statlist = [] |
| |
| self.mode = 'sum'; |
| self.runs = None |
| self.bins = None |
| self.ticks = None |
| self.__dict__['get'] = type(self).sum |
| |
| def query(self, sql): |
| self.cursor.execute(sql) |
| |
| def update_dict(self, dict): |
| dict.update(self.stattop) |
| |
| def append(self, stat): |
| statname = re.sub(':', '__', stat.name) |
| path = string.split(statname, '.') |
| pathtop = path[0] |
| fullname = '' |
| |
| x = self |
| while len(path) > 1: |
| name = path.pop(0) |
| if not x.__dict__.has_key(name): |
| x.__dict__[name] = Node(fullname + name) |
| x = x.__dict__[name] |
| fullname = '%s%s.' % (fullname, name) |
| |
| name = path.pop(0) |
| x.__dict__[name] = stat |
| |
| self.stattop[pathtop] = self.__dict__[pathtop] |
| self.statdict[statname] = stat |
| self.statlist.append(statname) |
| |
| def connect(self): |
| # connect |
| self.thedb = MySQLdb.connect(db=self.db, |
| host=self.host, |
| user=self.user, |
| passwd=self.passwd) |
| |
| # create a cursor |
| self.cursor = self.thedb.cursor() |
| |
| self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project |
| from runs''') |
| for result in self.cursor.fetchall(): |
| run = RunData(result); |
| self.allRuns.append(run) |
| self.allRunIds[run.run] = run |
| self.allRunNames[run.name] = run |
| |
| self.query('select * from bins') |
| for id,name in self.cursor.fetchall(): |
| self.allBinIds[int(id)] = name |
| self.allBinNames[name] = int(id) |
| |
| self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') |
| for result in self.cursor.fetchall(): |
| subdata = SubData(result) |
| if self.allSubData.has_key(subdata.stat): |
| self.allSubData[subdata.stat].append(subdata) |
| else: |
| self.allSubData[subdata.stat] = [ subdata ] |
| |
| self.query('select * from formulas') |
| for id,formula in self.cursor.fetchall(): |
| self.allFormulas[int(id)] = formula.tostring() |
| |
| StatData.db = self |
| self.query('select * from stats') |
| import info |
| for result in self.cursor.fetchall(): |
| stat = info.NewStat(StatData(result)) |
| self.append(stat) |
| self.allStats.append(stat) |
| self.allStatIds[stat.stat] = stat |
| self.allStatNames[stat.name] = stat |
| |
| # Name: listbins |
| # Desc: Prints all bins matching regex argument, if no argument |
| # is given all bins are returned |
| def listBins(self, regex='.*'): |
| print '%-50s %-10s' % ('bin name', 'id') |
| print '-' * 61 |
| names = self.allBinNames.keys() |
| names.sort() |
| for name in names: |
| id = self.allBinNames[name] |
| print '%-50s %-10d' % (name, id) |
| |
| # Name: listruns |
| # Desc: Prints all runs matching a given user, if no argument |
| # is given all runs are returned |
| def listRuns(self, user=None): |
| print '%-40s %-10s %-5s' % ('run name', 'user', 'id') |
| print '-' * 62 |
| for run in self.allRuns: |
| if user == None or user == run.user: |
| print '%-40s %-10s %-10d' % (run.name, run.user, run.run) |
| |
| # Name: listTicks |
| # Desc: Prints all samples for a given run |
| def listTicks(self, runs=None): |
| print "tick" |
| print "----------------------------------------" |
| sql = 'select distinct dt_tick from data where dt_stat=1180 and (' |
| if runs != None: |
| first = True |
| for run in runs: |
| if first: |
| # sql += ' where' |
| first = False |
| else: |
| sql += ' or' |
| sql += ' dt_run=%s' % run.run |
| sql += ')' |
| self.query(sql) |
| for r in self.cursor.fetchall(): |
| print r[0] |
| |
| # Name: retTicks |
| # Desc: Prints all samples for a given run |
| def retTicks(self, runs=None): |
| sql = 'select distinct dt_tick from data where dt_stat=1180 and (' |
| if runs != None: |
| first = True |
| for run in runs: |
| if first: |
| first = False |
| else: |
| sql += ' or' |
| sql += ' dt_run=%s' % run.run |
| sql += ')' |
| self.query(sql) |
| ret = [] |
| for r in self.cursor.fetchall(): |
| ret.append(r[0]) |
| return ret |
| |
| # Name: liststats |
| # Desc: Prints all statistics that appear in the database, |
| # the optional argument is a regular expression that can |
| # be used to prune the result set |
| def listStats(self, regex=None): |
| print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') |
| print '-' * 80 |
| |
| rx = None |
| if regex != None: |
| rx = re.compile(regex) |
| |
| stats = [ stat.name for stat in self.allStats ] |
| stats.sort(statcmp) |
| for stat in stats: |
| stat = self.allStatNames[stat] |
| if rx == None or rx.match(stat.name): |
| print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) |
| |
| # Name: liststats |
| # Desc: Prints all statistics that appear in the database, |
| # the optional argument is a regular expression that can |
| # be used to prune the result set |
| def listFormulas(self, regex=None): |
| print '%-60s %s' % ('formula name', 'formula') |
| print '-' * 80 |
| |
| rx = None |
| if regex != None: |
| rx = re.compile(regex) |
| |
| stats = [ stat.name for stat in self.allStats ] |
| stats.sort(statcmp) |
| for stat in stats: |
| stat = self.allStatNames[stat] |
| if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): |
| print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) |
| |
| def getStat(self, stats): |
| if type(stats) is not list: |
| stats = [ stats ] |
| |
| ret = [] |
| for stat in stats: |
| if type(stat) is int: |
| ret.append(self.allStatIds[stat]) |
| |
| if type(stat) is str: |
| rx = re.compile(stat) |
| for stat in self.allStats: |
| if rx.match(stat.name): |
| ret.append(stat) |
| return ret |
| |
| def getBin(self, bins): |
| if type(bins) is not list: |
| bins = [ bins ] |
| |
| ret = [] |
| for bin in bins: |
| if type(bin) is int: |
| ret.append(bin) |
| elif type(bin) is str: |
| ret.append(self.allBinNames[bin]) |
| else: |
| for name,id in self.allBinNames.items(): |
| if bin.match(name): |
| ret.append(id) |
| |
| return ret |
| |
| def getNotBin(self, bin): |
| map = {} |
| for bin in getBin(bin): |
| map[bin] = 1 |
| |
| ret = [] |
| for bin in self.allBinIds.keys(): |
| if not map.has_key(bin): |
| ret.append(bin) |
| |
| return ret |
| |
| ######################################### |
| # get the data |
| # |
| def inner(self, op, stat, bins, ticks, group=False): |
| sql = 'select ' |
| sql += 'dt_stat as stat, ' |
| sql += 'dt_run as run, ' |
| sql += 'dt_x as x, ' |
| sql += 'dt_y as y, ' |
| if group: |
| sql += 'dt_tick as tick, ' |
| sql += '%s(dt_data) as data ' % op |
| sql += 'from data ' |
| sql += 'where ' |
| |
| if isinstance(stat, list): |
| val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) |
| sql += ' (%s)' % val |
| else: |
| sql += ' dt_stat=%d' % stat.stat |
| |
| if self.runs != None and len(self.runs): |
| val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) |
| sql += ' and (%s)' % val |
| |
| if bins != None and len(bins): |
| val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) |
| sql += ' and (%s)' % val |
| |
| if ticks != None and len(ticks): |
| val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) |
| sql += ' and (%s)' % val |
| |
| sql += ' group by dt_stat,dt_run,dt_x,dt_y' |
| if group: |
| sql += ',dt_tick' |
| return sql |
| |
| def outer(self, op_out, op_in, stat, bins, ticks): |
| sql = self.inner(op_in, stat, bins, ticks, True) |
| sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) |
| sql += 'group by stat,run,x,y' |
| return sql |
| |
| # Name: sum |
| # Desc: given a run, a stat and an array of samples and bins, |
| # sum all the bins and then get the standard deviation of the |
| # samples for non-binned runs. This will just return the average |
| # of samples, however a bin array still must be passed |
| def sum(self, stat, bins, ticks): |
| return self.inner('sum', stat, bins, ticks) |
| |
| # Name: avg |
| # Desc: given a run, a stat and an array of samples and bins, |
| # sum all the bins and then average the samples for non-binned |
| # runs this will just return the average of samples, however |
| # a bin array still must be passed |
| def avg(self, stat, bins, ticks): |
| return self.outer('avg', 'sum', stat, bins, ticks) |
| |
| # Name: stdev |
| # Desc: given a run, a stat and an array of samples and bins, |
| # sum all the bins and then get the standard deviation of the |
| # samples for non-binned runs. This will just return the average |
| # of samples, however a bin array still must be passed |
| def stdev(self, stat, bins, ticks): |
| return self.outer('stddev', 'sum', stat, bins, ticks) |
| |
| def __getattribute__(self, attr): |
| if attr != 'get': |
| return super(Database, self).__getattribute__(attr) |
| |
| if self.__dict__['get'] == type(self).sum: |
| return 'sum' |
| elif self.__dict__['get'] == type(self).avg: |
| return 'avg' |
| elif self.__dict__['get'] == type(self).stdev: |
| return 'stdev' |
| else: |
| return '' |
| |
| def __setattr__(self, attr, value): |
| if attr != 'get': |
| super(Database, self).__setattr__(attr, value) |
| return |
| |
| if value == 'sum': |
| self.__dict__['get'] = type(self).sum |
| elif value == 'avg': |
| self.__dict__['get'] = type(self).avg |
| elif value == 'stdev': |
| self.__dict__['get'] = type(self).stdev |
| else: |
| raise AttributeError, "can only set get to: sum | avg | stdev" |
| |
| def data(self, stat, bins=None, ticks=None): |
| if bins is None: |
| bins = self.bins |
| if ticks is None: |
| ticks = self.ticks |
| sql = self.__dict__['get'](self, stat, bins, ticks) |
| self.query(sql) |
| |
| runs = {} |
| for x in self.cursor.fetchall(): |
| data = Data(x) |
| if not runs.has_key(data.run): |
| runs[data.run] = {} |
| if not runs[data.run].has_key(data.x): |
| runs[data.run][data.x] = {} |
| |
| runs[data.run][data.x][data.y] = data.data |
| return runs |