Merge Sandia changes with upstream
diff --git a/pthread.c b/pthread.c
index efd4b44..d9316da 100644
--- a/pthread.c
+++ b/pthread.c
@@ -39,6 +39,8 @@
#include "spinlock_alpha.h"
#elif defined(__sparc)
#include "spinlock_sparc.h"
+#elif defined (__arm__)
+ #include "spinlock_arm.h"
#else
#error "spinlock routines not available for your arch!\n"
#endif
@@ -61,9 +63,9 @@
#endif
//Size and alignment requirements of "real" (NPTL/LinuxThreads) thread control block
-#define TCB_SIZE 512
-#define TCB_ALIGN sizeof(double)
-//TODO: Figure out real (NPTL/LinuxThreads) TCB space. 512 bytes should be enough.
+#define NPTL_TCB_SIZE 1184 // sizeof (struct pthread)
+#define NPTL_TCB_ALIGN sizeof(double)
+#define NPTL_TCBHEAD_T_SIZE (sizeof(tcbhead_t))
//Thread control structure
typedef struct {
@@ -133,26 +135,52 @@
thread_block_info.stack_guard_size = 2048;
//Total thread block size -- this is what we'll request to mmap
- size_t sz = sizeof(pthread_tcb_t) + thread_block_info.tls_memsz + TCB_SIZE + thread_block_info.stack_guard_size + CHILD_STACK_SIZE;
+ #if TLS_TCB_AT_TP
+ size_t sz = sizeof(pthread_tcb_t) + thread_block_info.tls_memsz + NPTL_TCBHEAD_T_SIZE + thread_block_info.stack_guard_size + CHILD_STACK_SIZE;
+ #elif TLS_DTV_AT_TP
+ size_t sz = sizeof(pthread_tcb_t) + thread_block_info.tls_memsz + NPTL_TCB_SIZE + NPTL_TCBHEAD_T_SIZE + thread_block_info.stack_guard_size + CHILD_STACK_SIZE;
+ #else
+ #error "TLS_TCB_AT_TP xor TLS_DTV_AT_TP must be defined"
+ #endif
//Note that TCB_SIZE is the "real" TCB size, not ours, which we leave zeroed (but some variables, notably errno, are somewhere inside there)
//Align to multiple of CHILD_STACK_SIZE
sz += CHILD_STACK_SIZE - 1;
thread_block_info.total_size = (sz>>CHILD_STACK_BITS)<<CHILD_STACK_BITS;
-
}
-
//Set up TLS block in current thread
+// @param th_block_addr: beginning of entire thread memory space
static void setup_thread_tls(void* th_block_addr) {
+ size_t tcb_offset = 0;
+ void *tlsblock = NULL;
+ char *tls_start_ptr = NULL;
+
+ #if TLS_DTV_AT_TP
+ th_block_addr += NPTL_TCB_SIZE;
+ #endif
+
/* Compute the (real) TCB offset */
- size_t tcb_offset = roundup(thread_block_info.tls_memsz, TCB_ALIGN);
+ #if TLS_DTV_AT_TP
+ tcb_offset = roundup(NPTL_TCBHEAD_T_SIZE, NPTL_TCB_ALIGN);
+ #elif TLS_TCB_AT_TP
+ tcb_offset = roundup(thread_block_info.tls_memsz, NPTL_TCB_ALIGN);
+ #else
+ #error "TLS_TCB_AT_TP xor TLS_DTV_AT_TP must be defined"
+ #endif
+
/* Align the TLS block. */
- void* tlsblock = (void *) (((uintptr_t) th_block_addr + thread_block_info.tls_align - 1)
+ tlsblock = (void *) (((uintptr_t) th_block_addr + thread_block_info.tls_align - 1)
& ~(thread_block_info.tls_align - 1));
/* Initialize the TLS block. */
- char* tls_start_ptr = ((char *) tlsblock + tcb_offset
- - roundup (thread_block_info.tls_memsz, thread_block_info.tls_align ?: 1));
+ #if TLS_DTV_AT_TP
+ tls_start_ptr = ((char *) tlsblock + tcb_offset);
+ #elif TLS_TCB_AT_TP
+ tls_start_ptr = ((char *) tlsblock + tcb_offset
+ - roundup (thread_block_info.tls_memsz, thread_block_info.tls_align ?: 1));
+ #else
+ #error "TLS_TCB_AT_TP xor TLS_DTV_AT_TP must be defined"
+ #endif
//DEBUG("Init TLS: Copying %d bytes from 0x%llx to 0x%llx\n", filesz, (uint64_t) initimage, (uint64_t) tls_start_ptr);
memcpy (tls_start_ptr, thread_block_info.tls_initimage, thread_block_info.tls_filesz);
@@ -161,7 +189,13 @@
//Note: We don't care about DTV pointers for x86/SPARC -- they're never used in static mode
/* Initialize the thread pointer. */
+ #if TLS_DTV_AT_TP
+ TLS_INIT_TP (tlsblock, 0);
+ #elif TLS_TCB_AT_TP
TLS_INIT_TP ((char *) tlsblock + tcb_offset, 0);
+ #else
+ #error "TLS_TCB_AT_TP xor TLS_DTV_AT_TP must be defined"
+ #endif
}
//Some NPTL definitions
@@ -174,7 +208,7 @@
__libc_multiple_threads = 1; //tell libc we're multithreaded (NPTL-specific)
populate_thread_block_info();
void* ptr = mmap(0, thread_block_info.total_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- setup_thread_tls(ptr);
+ setup_thread_tls(ptr + sizeof(pthread_tcb_t));
}
@@ -211,7 +245,7 @@
tcb->child_finished = 0;
tcb->start_routine = start_routine;
tcb->arg = arg;
- tcb->tls_start_addr = (void*)(((char*)thread_block) + sizeof(pthread_tcb_t)); //right after tcb
+ tcb->tls_start_addr = (void*)(((char*)thread_block) + sizeof(pthread_tcb_t)); //right after m5's tcb
tcb->stack_start_addr = (void*) (((char*) thread_block) + thread_block_size - thread_block_info.stack_guard_size); //end of thread_block
*thread=(pthread_t) thread_block;
diff --git a/spinlock_arm.h b/spinlock_arm.h
new file mode 100644
index 0000000..6f6803e
--- /dev/null
+++ b/spinlock_arm.h
@@ -0,0 +1,70 @@
+/*
+ m5threads, a pthread library for the M5 simulator
+ Copyright (C) 2009, Stanford University
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+
+#ifndef __SPINLOCK_ARM_H__
+#define __SPINLOCK_ARM_H__
+
+static __inline__ void spin_lock (volatile int* lock) {
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+"1: ldrex %0, [%1]\n"
+" cmp %0, #0\n"
+" strexeq %0, %2, [%1]\n"
+" cmpeq %0, #0\n"
+" bne 1b\n"
+" dmb\n"
+ : "=&r" (tmp)
+ : "r" (lock), "r" (1)
+ : "cc");
+
+}
+
+static __inline__ void spin_unlock (volatile int* lock) {
+
+
+ __asm__ __volatile__(
+" dmb\n"
+" str %1, [%0]\n"
+ :
+ : "r" (lock), "r" (0)
+ : "cc");
+}
+
+
+static __inline__ int trylock (volatile int* lock) {
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+" ldrex %0, [%1]\n"
+" cmp %0, #0\n"
+" strexeq %0, %2, [%1]\n"
+" eor %0, %0, #1\n"
+" bne fail\n"
+" dmb\n"
+"fail: nop\n"
+ : "=&r" (tmp)
+ : "r" (lock), "r" (1)
+ : "cc", "memory");
+
+ return tmp;
+}
+
+#endif // __SPINLOCK_ARM_H__
diff --git a/tests/Makefile b/tests/Makefile
index 59224b7..4fc059b 100755
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -4,20 +4,27 @@
# 64-bit compiles
#Uncomment to use sparc/alpha cross-compilers
-CC := sparc64-unknown-linux-gnu-gcc
-CPP := sparc64-unknown-linux-gnu-g++
+#CC := sparc64-unknown-linux-gnu-gcc
+#CPP := sparc64-unknown-linux-gnu-g++
#CC := alpha-unknown-linux-gnu-gcc
#CPP := alpha-unknown-linux-gnu-g++
-
+CC := arm-linux-gnueabi-gcc
+CPP := arm-linux-gnueabi-g++
#CC := gcc
#CPP := g++
+# Needed for support of v7 assembly instructions on ARM architecture
+ARM_FLAGS := -march=armv7-a -marm
+
#CFLAGS := -ggdb3 -O3 -D__DEBUG
-CFLAGS := -g -O3 -DM5_PROFILING
+#CFLAGS := -g -O3 -DM5_PROFILING
+CFLAGS := -g -O3 $(ARM_FLAGS)
CPPFLAGS := $(CFLAGS)
+# ARM support for OpenMP not tested (test_omp.o)
TEST_OBJS := test_stackgrow.o test_pthreadbasic.o test_pthread.o test_atomic.o test_barrier.o test_lock.o test_malloc.o test_sieve.o test___thread.o test_omp.o
+#TEST_OBJS := test_stackgrow.o test_pthreadbasic.o test_pthread.o test_atomic.o test_barrier.o test_lock.o test_malloc.o test_sieve.o test___thread.o
TEST_PROGS := $(TEST_OBJS:.o=)
diff --git a/tests/test___thread.cpp b/tests/test___thread.cpp
index d597155..1a4617f 100644
--- a/tests/test___thread.cpp
+++ b/tests/test___thread.cpp
@@ -37,7 +37,7 @@
{
long long int id = (long long int)arg;
int i;
- printf("&local[%d]=%p\n", id, &local);
+ printf("&local[%lld]=%p\n", id, &local);
local += id;
for (i = 0; i < count; i++) {
local++;
@@ -79,13 +79,13 @@
}
long long int local = (long long int)run((void*)0);
- printf("local[0] = %d\n", local);
+ printf("local[0] = %lld\n", local);
for (i = 1 ; i < thread_count; i++) {
int joinResult = pthread_join(threads[i],
(void**)&local);
assert(joinResult == 0);
- printf("local[%d] = %d\n", i, local);
+ printf("local[%d] = %lld\n", i, local);
}
/*struct timeval endTime;
diff --git a/tests/test_atomic.cpp b/tests/test_atomic.cpp
index 5ead4cb..0deaa3a 100644
--- a/tests/test_atomic.cpp
+++ b/tests/test_atomic.cpp
@@ -48,7 +48,7 @@
pthread_mutex_lock(&lock);
int current = next;
- printf("[Iteration %d, Thread %d] Got lock\n", iteration, id);
+ printf("[Iteration %d, Thread %lld] Got lock\n", iteration, id);
intArray[current]++;
//Uncomment this snip for longer-running critical section
@@ -60,7 +60,7 @@
next = id;
- printf("[Iteration %d, Thread %d] Critical section done, previously next=%d, now next=%d\n", iteration, id, current, next);
+ printf("[Iteration %d, Thread %lld] Critical section done, previously next=%d, now next=%d\n", iteration, id, current, next);
pthread_mutex_unlock(&lock);
pthread_barrier_wait(&barrier);
diff --git a/tests/test_barrier.cpp b/tests/test_barrier.cpp
index 68683e5..0e7819b 100644
--- a/tests/test_barrier.cpp
+++ b/tests/test_barrier.cpp
@@ -29,9 +29,9 @@
void* run (void* arg) {
long long int my_id = (long long int) arg;
//A[my_id][0]++;
- printf("%i BEFORE\n", my_id);
+ printf("%lli BEFORE\n", my_id);
pthread_barrier_wait(&barrier);
- printf("%i AFTER\n", my_id);
+ printf("%lli AFTER\n", my_id);
//A[my_id][0]++;
return NULL;
}
diff --git a/tests/test_lock.cpp b/tests/test_lock.cpp
index f8670f6..da39a67 100644
--- a/tests/test_lock.cpp
+++ b/tests/test_lock.cpp
@@ -27,7 +27,7 @@
void* run1(void* arglist)
{
pthread_t id = pthread_self();
- printf("[run1] TID=%d\n", id);
+ printf("[run1] TID=%d\n", (int)id);
printf("[run1] started\n");
diff --git a/tests/test_malloc.cpp b/tests/test_malloc.cpp
index 5d6ca88..3c6c591 100644
--- a/tests/test_malloc.cpp
+++ b/tests/test_malloc.cpp
@@ -50,12 +50,12 @@
int bytes = iteration*(id +1);
void* ptr = malloc(bytes);
ptr_matrix[iteration][id] = ptr;
- printf("[ALLOC %d, Thread %d] Allocated %d bytes, from %x to %x\n", iteration, id, bytes, (uint32)ptr, (uint32)(((char*)ptr) + bytes - 1));
+ printf("[ALLOC %d, Thread %lld] Allocated %d bytes, from %p to %p\n", iteration, id, bytes, ptr, ((char*)ptr) + bytes - 1);
pthread_barrier_wait(&barrier);
int target = (id + iteration) % nthreads;
free(ptr_matrix[iteration][target]);
- printf("[ALLOC %d, Thread %d] Freed %d's allocation, %x\n", iteration, id, target, (uint32)ptr_matrix[iteration][target]);
+ printf("[ALLOC %d, Thread %lld] Freed %d's allocation, %p\n", iteration, id, target, ptr_matrix[iteration][target]);
//free(ptr_matrix[iteration][target]);
return NULL;
}
diff --git a/tests/test_pthreadbasic.cpp b/tests/test_pthreadbasic.cpp
index 85af155..001123c 100644
--- a/tests/test_pthreadbasic.cpp
+++ b/tests/test_pthreadbasic.cpp
@@ -35,7 +35,7 @@
pthread_t pth;
pthread_attr_t attr;
- printf("Main thread initialized. TID=%d\n", pthread_self());
+ printf("Main thread initialized. TID=%d\n", (int)pthread_self());
int result = pthread_attr_init(&attr);
assert(result == 0);
printf("Main thread called pthread_attr_init\n");
@@ -52,11 +52,11 @@
printf("Main thread creating 2nd thread...\n");
result = pthread_create(&pth2, &attr, run, NULL);
- printf("Main thread calling join w/ 1st thread (id=%llx)... (self=%llx)\n", pth, pthread_self());
+ printf("Main thread calling join w/ 1st thread (id=%lx)... (self=%lx)\n", pth, pthread_self());
pthread_join(pth, NULL);
- printf("Main thread calling join w/ 2nd thread (id=%llx)... (self=%llx)\n", pth2, pthread_self());
+ printf("Main thread calling join w/ 2nd thread (id=%lx)... (self=%lx)\n", pth2, pthread_self());
pthread_join(pth2, NULL);
- printf("Main thread has self=%d\n", pthread_self());
+ printf("Main thread has self=%d\n", (int)pthread_self());
printf("Main thread done.\n");
}
diff --git a/tests/test_stackgrow.cpp b/tests/test_stackgrow.cpp
index 5386a32..5f49620 100644
--- a/tests/test_stackgrow.cpp
+++ b/tests/test_stackgrow.cpp
@@ -31,7 +31,7 @@
void func (int* f1) {
int f2;
- printf("Addr frame 1 = %llx, Addr frame 2 = %llx\n", f1, &f2);
+ printf("Addr frame 1 = %p, Addr frame 2 = %p\n", f1, &f2);
if (&f2 > f1) {
printf("Stack grows up (and this threading library needs to be fixed for your arch...)\n");
} else {
diff --git a/tls_defs.h b/tls_defs.h
index 0b51a06..154275b 100644
--- a/tls_defs.h
+++ b/tls_defs.h
@@ -28,8 +28,19 @@
//These are mostly taken verbatim from glibc 2.3.6
//32 for ELF32 binaries, 64 for ELF64
-//TODO: Macro it
+#if defined(__LP64__)
#define __ELF_NATIVE_CLASS 64
+#else
+#define __ELF_NATIVE_CLASS 32
+#endif
+
+//Seems like all non-ARM M5 targets use TLS_TCB_AT_TP (defined in
+// platform-specific 'tls.h')
+#if defined(__arm__)
+#define TLS_DTV_AT_TP 1
+#else
+#define TLS_TCB_AT_TP 1
+#endif
/* Standard ELF types. */
@@ -164,6 +175,47 @@
# define TLS_INIT_TP(descr, secondcall) \
(__thread_self = (__typeof (__thread_self)) (descr), NULL)
+#elif defined (__arm__)
+
+typedef struct
+{
+ void *dtv;
+ void *private;
+} tcbhead_t;
+
+#define INTERNAL_SYSCALL_RAW(name, err, nr, args...) \
+ ({ unsigned int _sys_result; \
+ { \
+ register int _a1 asm ("a1"); \
+ LOAD_ARGS_##nr (args) \
+ asm volatile ("mov r7, #0xf0000\n" \
+ "add r7, r7, #0x0005\n" \
+ "swi #0 @ syscall " #name \
+ : "=r" (_a1) \
+ : "i" (name) ASM_ARGS_##nr \
+ : "memory"); \
+ _sys_result = _a1; \
+ } \
+ (int) _sys_result; })
+
+#undef INTERNAL_SYSCALL_ARM
+#define INTERNAL_SYSCALL_ARM(name, err, nr, args...) \
+ INTERNAL_SYSCALL_RAW(__ARM_NR_##name, err, nr, args)
+
+#define LOAD_ARGS_0()
+
+#define ASM_ARGS_0
+
+#define LOAD_ARGS_1(a1) \
+ int _a1tmp = (int) (a1); \
+ LOAD_ARGS_0 () \
+ _a1 = _a1tmp;
+
+#define ASM_ARGS_1 ASM_ARGS_0, "r" (_a1)
+
+# define TLS_INIT_TP(descr, secondcall) \
+ INTERNAL_SYSCALL_ARM(set_tls, 0, 1, (descr))
+
#else
#error "No TLS defs for your architecture"
#endif