Profiling hooks
- Added profiling hooks to measure synchronization usage and overhead in M5
- Updated README file
diff --git a/README b/README
index 4600ee7..cd4f485 100644
--- a/README
+++ b/README
@@ -5,6 +5,13 @@
Changelog
---------
+
+14-Feb-09
+- Added support for OpenMP in SPARC.
+- Fixed stack guard to work in SPARC64 (stack bias was insufficient).
+- Added optional profiling hooks to measure synchronization use. Compile with -DM5_PROFILING to use M5 profiling syscalls.
+- The Makefile now builds test programs linked with both m5threads (test_XXX) and the standard pthread library (test_XXX_p). This is done for debugging purposes, but note that **the _p binaries won't work in M5**.
+
27-Jan-09
- Added support for TLS in SPARC and x86-64 in static binaries. Alpha no longer works due to having unimplemented TLS support.
- Fixed a race condition in rwlocks and condition variables.
diff --git a/profiling_hooks.h b/profiling_hooks.h
new file mode 100644
index 0000000..98e57a1
--- /dev/null
+++ b/profiling_hooks.h
@@ -0,0 +1,66 @@
+/*
+ m5threads, a pthread library for the M5 simulator
+ Copyright (C) 2009, Stanford University
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ Author: Daniel Sanchez
+*/
+
+/* Profiling hooks used by m5threads to measure synchronization usage */
+
+//TODO: Profiling hooks for non-M5 mode
+
+#if defined(M5_PROFILING)
+
+ /* M5 profiling syscall asm */
+ #if defined (__sparc)
+ #define m5_prof_syscall(syscall_num, arg) __asm__ __volatile__ ( \
+ "mov " #syscall_num ", %%g1\n\t" \
+ "mov %0, %%o0\n\t" \
+ "ta 0x6d\n\t" \
+ :: "r"(arg) : "g1", "o0" \
+ );
+ #else
+ #error "M5 profiling hooks not implemented for your architecture, write them"
+ #endif
+
+ #define PROFILE_LOCK_START(addr) m5_prof_syscall(1040, addr)
+ #define PROFILE_LOCK_END(addr) m5_prof_syscall(1041, addr)
+
+ #define PROFILE_UNLOCK_START(addr) m5_prof_syscall(1042, addr)
+ #define PROFILE_UNLOCK_END(addr) m5_prof_syscall(1043, addr)
+
+ #define PROFILE_BARRIER_WAIT_START(addr) m5_prof_syscall(1044, addr)
+ #define PROFILE_BARRIER_WAIT_END(addr) m5_prof_syscall(1045, addr)
+
+ #define PROFILE_COND_WAIT_START(addr) m5_prof_syscall(1046, addr)
+ #define PROFILE_COND_WAIT_END(addr) m5_prof_syscall(1047, addr)
+
+#else
+ /* Empty hooks */
+ #define PROFILE_LOCK_START(addr)
+ #define PROFILE_LOCK_END(addr)
+
+ #define PROFILE_UNLOCK_START(addr)
+ #define PROFILE_UNLOCK_END(addr)
+
+ #define PROFILE_BARRIER_WAIT_START(addr)
+ #define PROFILE_BARRIER_WAIT_END(addr)
+
+ #define PROFILE_COND_WAIT_START(addr)
+ #define PROFILE_COND_WAIT_END(addr)
+#endif
+
diff --git a/pthread.c b/pthread.c
index 972e47a..b9b85d9 100644
--- a/pthread.c
+++ b/pthread.c
@@ -45,6 +45,7 @@
#include "pthread_defs.h"
#include "tls_defs.h"
+#include "profiling_hooks.h"
#define restrict
@@ -126,8 +127,9 @@
}
//Set a stack guard size
- //In SPARC/M5, this is needed to avoid out-of-range accesses on register saves...
- //See src/arch/sparc/process.hh -- sets stackBias to 2047
+ //In SPARC, this is actually needed to avoid out-of-range accesses on register saves...
+ //Largest I have seen is 2048 (sparc64)
+ //You could avoid this in theory by compiling with -mnostack-bias
thread_block_info.stack_guard_size = 2048;
//Total thread block size -- this is what we'll request to mmap
@@ -303,12 +305,16 @@
}
int pthread_mutex_lock (pthread_mutex_t* lock) {
+ PROFILE_LOCK_START(lock);
spin_lock((int*)&lock->PTHREAD_MUTEX_T_COUNT);
+ PROFILE_LOCK_END(lock);
return 0;
}
int pthread_mutex_unlock (pthread_mutex_t* lock) {
+ PROFILE_UNLOCK_START(lock);
spin_unlock((int*)&lock->PTHREAD_MUTEX_T_COUNT);
+ PROFILE_UNLOCK_END(lock);
return 0;
}
@@ -319,6 +325,9 @@
int pthread_mutex_trylock (pthread_mutex_t* mutex) {
int acquired = trylock((int*)&mutex->PTHREAD_MUTEX_T_COUNT);
if (acquired == 1) {
+ //Profiling not really accurate here...
+ PROFILE_LOCK_START(mutex);
+ PROFILE_LOCK_END(mutex);
return 0;
}
return EBUSY;
@@ -339,6 +348,7 @@
}
int pthread_rwlock_rdlock (pthread_rwlock_t* lock) {
+ PROFILE_LOCK_START(lock);
do {
// this is to reduce the contention and a possible live-lock to lock->access_lock
while (1) {
@@ -352,14 +362,17 @@
if ((pthread_t)PTHREAD_RWLOCK_T_WRITER(lock) == -1) {
PTHREAD_RWLOCK_T_READERS(lock)++;
spin_unlock((int*)&(PTHREAD_RWLOCK_T_LOCK(lock)));
+ PROFILE_LOCK_END(lock);
return 0;
}
spin_unlock((int*)&(PTHREAD_RWLOCK_T_LOCK(lock)));
} while (1);
+ PROFILE_LOCK_END(lock);
return 0;
}
int pthread_rwlock_wrlock (pthread_rwlock_t* lock) {
+ PROFILE_LOCK_START(lock);
do {
while (1) {
pthread_t writer = PTHREAD_RWLOCK_T_WRITER(lock);
@@ -376,14 +389,17 @@
if ((pthread_t)PTHREAD_RWLOCK_T_WRITER(lock) == -1 && PTHREAD_RWLOCK_T_READERS(lock) == 0) {
PTHREAD_RWLOCK_T_WRITER(lock) = pthread_self();
spin_unlock((int*)&(PTHREAD_RWLOCK_T_LOCK(lock)));
+ PROFILE_LOCK_END(lock);
return 0;
}
spin_unlock((int*)&(PTHREAD_RWLOCK_T_LOCK(lock)));
} while (1);
+ PROFILE_LOCK_END(lock);
return 0;
}
int pthread_rwlock_unlock (pthread_rwlock_t* lock) {
+ PROFILE_UNLOCK_START(lock);
spin_lock((int*)&(PTHREAD_RWLOCK_T_LOCK(lock)));
if (pthread_self() == PTHREAD_RWLOCK_T_WRITER(lock)) {
// the write lock will be released
@@ -393,6 +409,7 @@
PTHREAD_RWLOCK_T_READERS(lock) = PTHREAD_RWLOCK_T_READERS(lock) - 1;
}
spin_unlock((int*)&(PTHREAD_RWLOCK_T_LOCK(lock)));
+ PROFILE_UNLOCK_END(lock);
return 0;
}
@@ -489,6 +506,7 @@
}
int pthread_cond_wait (pthread_cond_t* cond, pthread_mutex_t* lock) {
+ PROFILE_COND_WAIT_START(cond);
volatile int* thread_count = &(PTHREAD_COND_T_THREAD_COUNT(cond));
volatile int* flag = &(PTHREAD_COND_T_FLAG(cond));
volatile int* count_lock = &(PTHREAD_COND_T_COUNT_LOCK(cond));
@@ -514,6 +532,7 @@
}
spin_unlock(count_lock);
pthread_mutex_lock(lock);
+ PROFILE_COND_WAIT_END(cond);
return 0;
}
@@ -602,6 +621,7 @@
int pthread_barrier_wait (pthread_barrier_t* barrier)
{
+ PROFILE_BARRIER_WAIT_START(barrier);
int const initial_direction = PTHREAD_BARRIER_T_DIRECTION(barrier); //0 == up, 1 == down
if (initial_direction == 0) {
@@ -627,7 +647,7 @@
//spin
direction = PTHREAD_BARRIER_T_DIRECTION(barrier);
}
-
+ PROFILE_BARRIER_WAIT_END(barrier);
return 0;
}
diff --git a/tests/Makefile b/tests/Makefile
index ddd295f..59224b7 100755
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -13,7 +13,7 @@
#CPP := g++
#CFLAGS := -ggdb3 -O3 -D__DEBUG
-CFLAGS := -g -O3
+CFLAGS := -g -O3 -DM5_PROFILING
CPPFLAGS := $(CFLAGS)