util/stats/dbinit.py - arm/gem5 - Git at Google

 # Copyright (c) 2003-2004 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
 # notice, this list of conditions and the following disclaimer;
 # redistributions in binary form must reproduce the above copyright
 # notice, this list of conditions and the following disclaimer in the
 # documentation and/or other materials provided with the distribution;
 # neither the name of the copyright holders nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # Authors: Nathan Binkert

 import MySQLdb

 class MyDB(object):
     def __init__(self, options):
         self.name = options.db
         self.host = options.host
         self.user = options.user
         self.passwd = options.passwd
         self.mydb = None
         self.cursor = None

     def admin(self):
         self.close()
         self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
                                     passwd=self.passwd)
         self.cursor = self.mydb.cursor()

     def connect(self):
         self.close()
         self.mydb = MySQLdb.connect(db=self.name, host=self.host,
                                     user=self.user, passwd=self.passwd)
         self.cursor = self.mydb.cursor()

     def close(self):
         if self.mydb is not None:
             self.mydb.close()
         self.cursor = None

     def query(self, sql):
         self.cursor.execute(sql)

     def drop(self):
         self.query('DROP DATABASE IF EXISTS %s' % self.name)

     def create(self):
         self.query('CREATE DATABASE %s' % self.name)

     def populate(self):
         #
         # Each run (or simulation) gets its own entry in the runs table to
         # group stats by where they were generated
         #
         # COLUMNS:
         #   'id' is a unique identifier for each run to be used in other
         #       tables.
         #   'name' is the user designated name for the data generated.  It is
         #       configured in the simulator.
         #   'user' identifies the user that generated the data for the given
         #       run.
         #   'project' another name to identify runs for a specific goal
         #   'date' is a timestamp for when the data was generated.  It can be
         #       used to easily expire data that was generated in the past.
         #   'expire' is a timestamp for when the data should be removed from
         #       the database so we don't have years worth of junk.
         #
         # INDEXES:
         #   'run' is indexed so you can find out details of a run if the run
         #       was retreived from the data table.
         #   'name' is indexed so that two all run names are forced to be unique
         #
         self.query('''
         CREATE TABLE runs(
             rn_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
             rn_name	VARCHAR(200)		NOT NULL,
             rn_sample	VARCHAR(32)		NOT NULL,
             rn_user	VARCHAR(32)		NOT NULL,
             rn_project	VARCHAR(100)            NOT NULL,
             rn_date	TIMESTAMP		NOT NULL,
             rn_expire	TIMESTAMP               NOT NULL,
             PRIMARY KEY (rn_id),
             UNIQUE (rn_name,rn_sample)
         ) TYPE=InnoDB''')

         #
         # The stat table gives us all of the data for a particular stat.
         #
         # COLUMNS:
         #   'stat' is a unique identifier for each stat to be used in other
         #       tables for references.
         #   'name' is simply the simulator derived name for a given
         #       statistic.
         #   'descr' is the description of the statistic and what it tells
         #       you.
         #   'type' defines what the stat tells you.  Types are:
         #       SCALAR: A simple scalar statistic that holds one value
         #       VECTOR: An array of statistic values.  Such a something that
         #           is generated per-thread.  Vectors exist to give averages,
         #	     pdfs, cdfs, means, standard deviations, etc across the
         #           stat values.
         #       DIST: Is a distribution of data.  When the statistic value is
         #	     sampled, its value is counted in a particular bucket.
         #           Useful for keeping track of utilization of a resource.
         #           (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
         #       VECTORDIST: Can be used when the distribution needs to be
         #	     factored out into a per-thread distribution of data for
         #	     example.  It can still be summed across threads to find
         #           the total distribution.
         #       VECTOR2D: Can be used when you have a stat that is not only
         #           per-thread, but it is per-something else.  Like
         #           per-message type.
         #       FORMULA: This statistic is a formula, and its data must be
         #	     looked up in the formula table, for indicating how to
         #           present its values.
         #   'subdata' is potentially used by any of the vector types to
         #       give a specific name to all of the data elements within a
         #       stat.
         #   'print' indicates whether this stat should be printed ever.
         #       (Unnamed stats don't usually get printed)
         #   'prereq' only print the stat if the prereq is not zero.
         #   'prec' number of decimal places to print
         #   'nozero' don't print zero values
         #   'nonan' don't print NaN values
         #   'total' for vector type stats, print the total.
         #   'pdf' for vector type stats, print the pdf.
         #   'cdf' for vector type stats, print the cdf.
         #
         #   The Following are for dist type stats:
         #   'min' is the minimum bucket value. Anything less is an underflow.
         #   'max' is the maximum bucket value. Anything more is an overflow.
         #   'bktsize' is the approximate number of entries in each bucket.
         #   'size' is the number of buckets. equal to (min/max)/bktsize.
         #
         # INDEXES:
         #   'stat' is indexed so that you can find out details about a stat
         #       if the stat id was retrieved from the data table.
         #   'name' is indexed so that you can simply look up data about a
         #       named stat.
         #
         self.query('''
         CREATE TABLE stats(
             st_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
             st_name	VARCHAR(255)		NOT NULL,
             st_descr	TEXT			NOT NULL,
             st_type	ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
                 "VECTOR2D", "FORMULA")	NOT NULL,
             st_print	BOOL			NOT NULL,
             st_prereq	SMALLINT UNSIGNED	NOT NULL,
             st_prec	TINYINT			NOT NULL,
             st_nozero	BOOL			NOT NULL,
             st_nonan	BOOL			NOT NULL,
             st_total	BOOL			NOT NULL,
             st_pdf	BOOL			NOT NULL,
             st_cdf	BOOL			NOT NULL,
             st_min	DOUBLE			NOT NULL,
             st_max	DOUBLE			NOT NULL,
             st_bktsize	DOUBLE			NOT NULL,
             st_size	SMALLINT UNSIGNED	NOT NULL,
             PRIMARY KEY (st_id),
             UNIQUE (st_name)
         ) TYPE=InnoDB''')

         #
         # This is the main table of data for stats.
         #
         # COLUMNS:
         #   'stat' refers to the stat field given in the stat table.
         #
         #   'x' referrs to the first dimension of a multi-dimensional stat. For
         #       a vector, x will start at 0 and increase for each vector
         #       element.
         #       For a distribution:
         #       -1: sum (for calculating standard deviation)
         #       -2: sum of squares (for calculating standard deviation)
         #       -3: total number of samples taken (for calculating
         #           standard deviation)
         #       -4: minimum value
         #       -5: maximum value
         #       -6: underflow
         #       -7: overflow
         #   'y' is used by a VECTORDIST and the VECTOR2D to describe the second
         #       dimension.
         #   'run' is the run that the data was generated from.  Details up in
         #       the run table
         #   'tick' is a timestamp generated by the simulator.
         #   'data' is the actual stat value.
         #
         # INDEXES:
         #   'stat' is indexed so that a user can find all of the data for a
         #       particular stat. It is not unique, because that specific stat
         #       can be found in many runs and samples, in addition to
         #       having entries for the mulidimensional cases.
         #   'run' is indexed to allow a user to remove all of the data for a
         #       particular execution run.  It can also be used to allow the
         #       user to print out all of the data for a given run.
         #
         self.query('''
         CREATE TABLE data(
             dt_stat	SMALLINT UNSIGNED	NOT NULL,
             dt_x	SMALLINT		NOT NULL,
             dt_y	SMALLINT		NOT NULL,
             dt_run	SMALLINT UNSIGNED	NOT NULL,
             dt_tick	BIGINT UNSIGNED		NOT NULL,
             dt_data	DOUBLE			NOT NULL,
             INDEX (dt_stat),
             INDEX (dt_run),
             UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)
         ) TYPE=InnoDB;''')

         #
         # Names and descriptions for multi-dimensional stats (vectors, etc.)
         # are stored here instead of having their own entry in the statistics
         # table. This allows all parts of a single stat to easily share a
         # single id.
         #
         # COLUMNS:
         #   'stat' is the unique stat identifier from the stat table.
         #   'x' is the first dimension for multi-dimensional stats
         #       corresponding to the data table above.
         #   'y' is the second dimension for multi-dimensional stats
         #       corresponding to the data table above.
         #   'name' is the specific subname for the unique stat,x,y combination.
         #   'descr' is the specific description for the uniqe stat,x,y
         #        combination.
         #
         # INDEXES:
         #   'stat' is indexed so you can get the subdata for a specific stat.
         #
         self.query('''
         CREATE TABLE subdata(
             sd_stat	SMALLINT UNSIGNED	NOT NULL,
             sd_x	SMALLINT		NOT NULL,
             sd_y	SMALLINT		NOT NULL,
             sd_name	VARCHAR(255)		NOT NULL,
             sd_descr	TEXT,
             UNIQUE (sd_stat,sd_x,sd_y)
         ) TYPE=InnoDB''')


         #
         # The formula table is maintained separately from the data table
         # because formula data, unlike other stat data cannot be represented
         # there.
         #
         # COLUMNS:
         #   'stat' refers to the stat field generated in the stat table.
         #   'formula' is the actual string representation of the formula
         #       itself.
         #
         # INDEXES:
         #   'stat' is indexed so that you can just look up a formula.
         #
         self.query('''
         CREATE TABLE formulas(
             fm_stat	SMALLINT UNSIGNED	NOT NULL,
             fm_formula	BLOB			NOT NULL,
             PRIMARY KEY(fm_stat)
         ) TYPE=InnoDB''')

         #
         # Each stat used in each formula is kept in this table.  This way, if
         # you want to print out a particular formula, you can simply find out
         # which stats you need by looking in this table.  Additionally, when
         # you remove a stat from the stats table and data table, you remove
         # any references to the formula in this table.  When a formula is no
         # longer referred to, you remove its entry.
         #
         # COLUMNS:
         #   'stat' is the stat id from the stat table above.
         #   'child' is the stat id of a stat that is used for this formula.
         #       There may be many children for any given 'stat' (formula)
         #
         # INDEXES:
         #   'stat' is indexed so you can look up all of the children for a
         #       particular stat.
         #   'child' is indexed so that you can remove an entry when a stat is
         #       removed.
         #
         self.query('''
         CREATE TABLE formula_ref(
             fr_stat	SMALLINT UNSIGNED	NOT NULL,
             fr_run	SMALLINT UNSIGNED	NOT NULL,
             UNIQUE (fr_stat,fr_run),
             INDEX (fr_stat),
             INDEX (fr_run)
         ) TYPE=InnoDB''')

         # COLUMNS:
         #   'event' is the unique event id from the event_desc table
         #   'run' is simulation run id that this event took place in
         #   'tick' is the tick when the event happened
         #
         # INDEXES:
         #   'event' is indexed so you can look up all occurences of a
         #       specific event
         #   'run' is indexed so you can find all events in a run
         #   'tick' is indexed because we want the unique thing anyway
         #   'event,run,tick' is unique combination
         self.query('''
         CREATE TABLE events(
             ev_event	SMALLINT UNSIGNED	NOT NULL,
             ev_run	SMALLINT UNSIGNED	NOT NULL,
             ev_tick	BIGINT   UNSIGNED       NOT NULL,
             INDEX(ev_event),
             INDEX(ev_run),
             INDEX(ev_tick),
             UNIQUE(ev_event,ev_run,ev_tick)
         ) TYPE=InnoDB''')

         # COLUMNS:
         #   'id' is the unique description id
         #   'name' is the name of the event that occurred
         #
         # INDEXES:
         #   'id' is indexed because it is the primary key and is what you use
         #       to look up the descriptions
         #   'name' is indexed so one can find the event based on name
         #
         self.query('''
         CREATE TABLE event_names(
             en_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
             en_name	VARCHAR(255)		NOT NULL,
             PRIMARY KEY (en_id),
             UNIQUE (en_name)
         ) TYPE=InnoDB''')

     def clean(self):
         self.query('''
         DELETE data
         FROM data
         LEFT JOIN runs ON dt_run=rn_id
         WHERE rn_id IS NULL''')

         self.query('''
         DELETE formula_ref
         FROM formula_ref
         LEFT JOIN runs ON fr_run=rn_id
         WHERE rn_id IS NULL''')

         self.query('''
         DELETE formulas
         FROM formulas
         LEFT JOIN formula_ref ON fm_stat=fr_stat
         WHERE fr_stat IS NULL''')

         self.query('''
         DELETE stats
         FROM stats
         LEFT JOIN data ON st_id=dt_stat
         WHERE dt_stat IS NULL''')

         self.query('''
         DELETE subdata
         FROM subdata
         LEFT JOIN data ON sd_stat=dt_stat
         WHERE dt_stat IS NULL''')

         self.query('''
         DELETE events
         FROM events
         LEFT JOIN runs ON ev_run=rn_id
         WHERE rn_id IS NULL''')

         self.query('''
         DELETE event_names
         FROM event_names
         LEFT JOIN events ON en_id=ev_event
         WHERE ev_event IS NULL''')
	# Copyright (c) 2003-2004 The Regents of The University of Michigan
	# All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met: redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer;
	# redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution;
	# neither the name of the copyright holders nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	#
	# Authors: Nathan Binkert

	import MySQLdb

	class MyDB(object):
	def __init__(self, options):
	self.name = options.db
	self.host = options.host
	self.user = options.user
	self.passwd = options.passwd
	self.mydb = None
	self.cursor = None

	def admin(self):
	self.close()
	self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
	passwd=self.passwd)
	self.cursor = self.mydb.cursor()

	def connect(self):
	self.close()
	self.mydb = MySQLdb.connect(db=self.name, host=self.host,
	user=self.user, passwd=self.passwd)
	self.cursor = self.mydb.cursor()

	def close(self):
	if self.mydb is not None:
	self.mydb.close()
	self.cursor = None

	def query(self, sql):
	self.cursor.execute(sql)

	def drop(self):
	self.query('DROP DATABASE IF EXISTS %s' % self.name)

	def create(self):
	self.query('CREATE DATABASE %s' % self.name)

	def populate(self):
	#
	# Each run (or simulation) gets its own entry in the runs table to
	# group stats by where they were generated
	#
	# COLUMNS:
	# 'id' is a unique identifier for each run to be used in other
	# tables.
	# 'name' is the user designated name for the data generated. It is
	# configured in the simulator.
	# 'user' identifies the user that generated the data for the given
	# run.
	# 'project' another name to identify runs for a specific goal
	# 'date' is a timestamp for when the data was generated. It can be
	# used to easily expire data that was generated in the past.
	# 'expire' is a timestamp for when the data should be removed from
	# the database so we don't have years worth of junk.
	#
	# INDEXES:
	# 'run' is indexed so you can find out details of a run if the run
	# was retreived from the data table.
	# 'name' is indexed so that two all run names are forced to be unique
	#
	self.query('''
	CREATE TABLE runs(
	rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
	rn_name VARCHAR(200) NOT NULL,
	rn_sample VARCHAR(32) NOT NULL,
	rn_user VARCHAR(32) NOT NULL,
	rn_project VARCHAR(100) NOT NULL,
	rn_date TIMESTAMP NOT NULL,
	rn_expire TIMESTAMP NOT NULL,
	PRIMARY KEY (rn_id),
	UNIQUE (rn_name,rn_sample)
	) TYPE=InnoDB''')

	#
	# The stat table gives us all of the data for a particular stat.
	#
	# COLUMNS:
	# 'stat' is a unique identifier for each stat to be used in other
	# tables for references.
	# 'name' is simply the simulator derived name for a given
	# statistic.
	# 'descr' is the description of the statistic and what it tells
	# you.
	# 'type' defines what the stat tells you. Types are:
	# SCALAR: A simple scalar statistic that holds one value
	# VECTOR: An array of statistic values. Such a something that
	# is generated per-thread. Vectors exist to give averages,
	# pdfs, cdfs, means, standard deviations, etc across the
	# stat values.
	# DIST: Is a distribution of data. When the statistic value is
	# sampled, its value is counted in a particular bucket.
	# Useful for keeping track of utilization of a resource.
	# (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
	# VECTORDIST: Can be used when the distribution needs to be
	# factored out into a per-thread distribution of data for
	# example. It can still be summed across threads to find
	# the total distribution.
	# VECTOR2D: Can be used when you have a stat that is not only
	# per-thread, but it is per-something else. Like
	# per-message type.
	# FORMULA: This statistic is a formula, and its data must be
	# looked up in the formula table, for indicating how to
	# present its values.
	# 'subdata' is potentially used by any of the vector types to
	# give a specific name to all of the data elements within a
	# stat.
	# 'print' indicates whether this stat should be printed ever.
	# (Unnamed stats don't usually get printed)
	# 'prereq' only print the stat if the prereq is not zero.
	# 'prec' number of decimal places to print
	# 'nozero' don't print zero values
	# 'nonan' don't print NaN values
	# 'total' for vector type stats, print the total.
	# 'pdf' for vector type stats, print the pdf.
	# 'cdf' for vector type stats, print the cdf.
	#
	# The Following are for dist type stats:
	# 'min' is the minimum bucket value. Anything less is an underflow.
	# 'max' is the maximum bucket value. Anything more is an overflow.
	# 'bktsize' is the approximate number of entries in each bucket.
	# 'size' is the number of buckets. equal to (min/max)/bktsize.
	#
	# INDEXES:
	# 'stat' is indexed so that you can find out details about a stat
	# if the stat id was retrieved from the data table.
	# 'name' is indexed so that you can simply look up data about a
	# named stat.
	#
	self.query('''
	CREATE TABLE stats(
	st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
	st_name VARCHAR(255) NOT NULL,
	st_descr TEXT NOT NULL,
	st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
	"VECTOR2D", "FORMULA") NOT NULL,
	st_print BOOL NOT NULL,
	st_prereq SMALLINT UNSIGNED NOT NULL,
	st_prec TINYINT NOT NULL,
	st_nozero BOOL NOT NULL,
	st_nonan BOOL NOT NULL,
	st_total BOOL NOT NULL,
	st_pdf BOOL NOT NULL,
	st_cdf BOOL NOT NULL,
	st_min DOUBLE NOT NULL,
	st_max DOUBLE NOT NULL,
	st_bktsize DOUBLE NOT NULL,
	st_size SMALLINT UNSIGNED NOT NULL,
	PRIMARY KEY (st_id),
	UNIQUE (st_name)
	) TYPE=InnoDB''')

	#
	# This is the main table of data for stats.
	#
	# COLUMNS:
	# 'stat' refers to the stat field given in the stat table.
	#
	# 'x' referrs to the first dimension of a multi-dimensional stat. For
	# a vector, x will start at 0 and increase for each vector
	# element.
	# For a distribution:
	# -1: sum (for calculating standard deviation)
	# -2: sum of squares (for calculating standard deviation)
	# -3: total number of samples taken (for calculating
	# standard deviation)
	# -4: minimum value
	# -5: maximum value
	# -6: underflow
	# -7: overflow
	# 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
	# dimension.
	# 'run' is the run that the data was generated from. Details up in
	# the run table
	# 'tick' is a timestamp generated by the simulator.
	# 'data' is the actual stat value.
	#
	# INDEXES:
	# 'stat' is indexed so that a user can find all of the data for a
	# particular stat. It is not unique, because that specific stat
	# can be found in many runs and samples, in addition to
	# having entries for the mulidimensional cases.
	# 'run' is indexed to allow a user to remove all of the data for a
	# particular execution run. It can also be used to allow the
	# user to print out all of the data for a given run.
	#
	self.query('''
	CREATE TABLE data(
	dt_stat SMALLINT UNSIGNED NOT NULL,
	dt_x SMALLINT NOT NULL,
	dt_y SMALLINT NOT NULL,
	dt_run SMALLINT UNSIGNED NOT NULL,
	dt_tick BIGINT UNSIGNED NOT NULL,
	dt_data DOUBLE NOT NULL,
	INDEX (dt_stat),
	INDEX (dt_run),
	UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)
	) TYPE=InnoDB;''')

	#
	# Names and descriptions for multi-dimensional stats (vectors, etc.)
	# are stored here instead of having their own entry in the statistics
	# table. This allows all parts of a single stat to easily share a
	# single id.
	#
	# COLUMNS:
	# 'stat' is the unique stat identifier from the stat table.
	# 'x' is the first dimension for multi-dimensional stats
	# corresponding to the data table above.
	# 'y' is the second dimension for multi-dimensional stats
	# corresponding to the data table above.
	# 'name' is the specific subname for the unique stat,x,y combination.
	# 'descr' is the specific description for the uniqe stat,x,y
	# combination.
	#
	# INDEXES:
	# 'stat' is indexed so you can get the subdata for a specific stat.
	#
	self.query('''
	CREATE TABLE subdata(
	sd_stat SMALLINT UNSIGNED NOT NULL,
	sd_x SMALLINT NOT NULL,
	sd_y SMALLINT NOT NULL,
	sd_name VARCHAR(255) NOT NULL,
	sd_descr TEXT,
	UNIQUE (sd_stat,sd_x,sd_y)
	) TYPE=InnoDB''')


	#
	# The formula table is maintained separately from the data table
	# because formula data, unlike other stat data cannot be represented
	# there.
	#
	# COLUMNS:
	# 'stat' refers to the stat field generated in the stat table.
	# 'formula' is the actual string representation of the formula
	# itself.
	#
	# INDEXES:
	# 'stat' is indexed so that you can just look up a formula.
	#
	self.query('''
	CREATE TABLE formulas(
	fm_stat SMALLINT UNSIGNED NOT NULL,
	fm_formula BLOB NOT NULL,
	PRIMARY KEY(fm_stat)
	) TYPE=InnoDB''')

	#
	# Each stat used in each formula is kept in this table. This way, if
	# you want to print out a particular formula, you can simply find out
	# which stats you need by looking in this table. Additionally, when
	# you remove a stat from the stats table and data table, you remove
	# any references to the formula in this table. When a formula is no
	# longer referred to, you remove its entry.
	#
	# COLUMNS:
	# 'stat' is the stat id from the stat table above.
	# 'child' is the stat id of a stat that is used for this formula.
	# There may be many children for any given 'stat' (formula)
	#
	# INDEXES:
	# 'stat' is indexed so you can look up all of the children for a
	# particular stat.
	# 'child' is indexed so that you can remove an entry when a stat is
	# removed.
	#
	self.query('''
	CREATE TABLE formula_ref(
	fr_stat SMALLINT UNSIGNED NOT NULL,
	fr_run SMALLINT UNSIGNED NOT NULL,
	UNIQUE (fr_stat,fr_run),
	INDEX (fr_stat),
	INDEX (fr_run)
	) TYPE=InnoDB''')

	# COLUMNS:
	# 'event' is the unique event id from the event_desc table
	# 'run' is simulation run id that this event took place in
	# 'tick' is the tick when the event happened
	#
	# INDEXES:
	# 'event' is indexed so you can look up all occurences of a
	# specific event
	# 'run' is indexed so you can find all events in a run
	# 'tick' is indexed because we want the unique thing anyway
	# 'event,run,tick' is unique combination
	self.query('''
	CREATE TABLE events(
	ev_event SMALLINT UNSIGNED NOT NULL,
	ev_run SMALLINT UNSIGNED NOT NULL,
	ev_tick BIGINT UNSIGNED NOT NULL,
	INDEX(ev_event),
	INDEX(ev_run),
	INDEX(ev_tick),
	UNIQUE(ev_event,ev_run,ev_tick)
	) TYPE=InnoDB''')

	# COLUMNS:
	# 'id' is the unique description id
	# 'name' is the name of the event that occurred
	#
	# INDEXES:
	# 'id' is indexed because it is the primary key and is what you use
	# to look up the descriptions
	# 'name' is indexed so one can find the event based on name
	#
	self.query('''
	CREATE TABLE event_names(
	en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
	en_name VARCHAR(255) NOT NULL,
	PRIMARY KEY (en_id),
	UNIQUE (en_name)
	) TYPE=InnoDB''')

	def clean(self):
	self.query('''
	DELETE data
	FROM data
	LEFT JOIN runs ON dt_run=rn_id
	WHERE rn_id IS NULL''')

	self.query('''
	DELETE formula_ref
	FROM formula_ref
	LEFT JOIN runs ON fr_run=rn_id
	WHERE rn_id IS NULL''')

	self.query('''
	DELETE formulas
	FROM formulas
	LEFT JOIN formula_ref ON fm_stat=fr_stat
	WHERE fr_stat IS NULL''')

	self.query('''
	DELETE stats
	FROM stats
	LEFT JOIN data ON st_id=dt_stat
	WHERE dt_stat IS NULL''')

	self.query('''
	DELETE subdata
	FROM subdata
	LEFT JOIN data ON sd_stat=dt_stat
	WHERE dt_stat IS NULL''')

	self.query('''
	DELETE events
	FROM events
	LEFT JOIN runs ON ev_run=rn_id
	WHERE rn_id IS NULL''')

	self.query('''
	DELETE event_names
	FROM event_names
	LEFT JOIN events ON en_id=ev_event
	WHERE ev_event IS NULL''')