blob: 52990e9d48117b868b5391ca2dbd4840514d2f68 [file] [log] [blame]
/**
* Copyright (C) ARM Limited 2010-2014. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <trace/events/sched.h>
#include "gator.h"
#define TASK_MAP_ENTRIES 1024 /* must be power of 2 */
#define TASK_MAX_COLLISIONS 2
enum {
STATE_WAIT_ON_OTHER = 0,
STATE_CONTENTION,
STATE_WAIT_ON_IO,
CPU_WAIT_TOTAL
};
static DEFINE_PER_CPU(uint64_t *, taskname_keys);
static DEFINE_PER_CPU(int, collecting);
// this array is never read as the cpu wait charts are derived counters
// the files are needed, nonetheless, to show that these counters are available
static ulong cpu_wait_enabled[CPU_WAIT_TOTAL];
static ulong sched_cpu_key[CPU_WAIT_TOTAL];
static int sched_trace_create_files(struct super_block *sb, struct dentry *root)
{
struct dentry *dir;
// CPU Wait - Contention
dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_contention");
if (!dir) {
return -1;
}
gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_CONTENTION]);
gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_CONTENTION]);
// CPU Wait - I/O
dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_io");
if (!dir) {
return -1;
}
gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_WAIT_ON_IO]);
gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_WAIT_ON_IO]);
return 0;
}
static void emit_pid_name(struct task_struct *task)
{
bool found = false;
char taskcomm[TASK_COMM_LEN + 3];
unsigned long x, cpu = get_physical_cpu();
uint64_t *keys = &(per_cpu(taskname_keys, cpu)[(task->pid & 0xFF) * TASK_MAX_COLLISIONS]);
uint64_t value;
value = gator_chksum_crc32(task->comm);
value = (value << 32) | (uint32_t)task->pid;
// determine if the thread name was emitted already
for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
if (keys[x] == value) {
found = true;
break;
}
}
if (!found) {
// shift values, new value always in front
uint64_t oldv, newv = value;
for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
oldv = keys[x];
keys[x] = newv;
newv = oldv;
}
// emit pid names, cannot use get_task_comm, as it's not exported on all kernel versions
if (strlcpy(taskcomm, task->comm, TASK_COMM_LEN) == TASK_COMM_LEN - 1) {
// append ellipses if task->comm has length of TASK_COMM_LEN - 1
strcat(taskcomm, "...");
}
marshal_thread_name(task->pid, taskcomm);
}
}
static void collect_counters(u64 time, struct task_struct *task)
{
int *buffer, len, cpu = get_physical_cpu();
long long *buffer64;
struct gator_interface *gi;
if (marshal_event_header(time)) {
list_for_each_entry(gi, &gator_events, list) {
if (gi->read) {
len = gi->read(&buffer);
marshal_event(len, buffer);
} else if (gi->read64) {
len = gi->read64(&buffer64);
marshal_event64(len, buffer64);
}
if (gi->read_proc && task != NULL) {
len = gi->read_proc(&buffer64, task);
marshal_event64(len, buffer64);
}
}
// Only check after writing all counters so that time and corresponding counters appear in the same frame
buffer_check(cpu, BLOCK_COUNTER_BUF, time);
// Commit buffers on timeout
if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF };
int i;
for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
gator_commit_buffer(cpu, buftypes[i], time);
}
// spinlocks are noops on uniprocessor machines and mutexes do not work in sched_switch context in
// RT-Preempt full, so disable proactive flushing of the annotate frame on uniprocessor machines.
#ifdef CONFIG_SMP
// Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full
if (on_primary_core() && spin_trylock(&annotate_lock)) {
gator_commit_buffer(0, ANNOTATE_BUF, time);
spin_unlock(&annotate_lock);
}
#endif
}
}
}
// special case used during a suspend of the system
static void trace_sched_insert_idle(void)
{
marshal_sched_trace_switch(0, 0, 0, 0);
}
GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
{
int cookie;
int cpu = get_physical_cpu();
cookie = get_exec_cookie(cpu, child);
emit_pid_name(child);
marshal_sched_trace_start(child->tgid, child->pid, cookie);
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
#else
GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
#endif
{
int cookie;
int state;
int cpu = get_physical_cpu();
per_cpu(in_scheduler_context, cpu) = true;
// do as much work as possible before disabling interrupts
cookie = get_exec_cookie(cpu, next);
emit_pid_name(next);
if (prev->state == TASK_RUNNING) {
state = STATE_CONTENTION;
} else if (prev->in_iowait) {
state = STATE_WAIT_ON_IO;
} else {
state = STATE_WAIT_ON_OTHER;
}
per_cpu(collecting, cpu) = 1;
collect_counters(gator_get_time(), prev);
per_cpu(collecting, cpu) = 0;
marshal_sched_trace_switch(next->tgid, next->pid, cookie, state);
per_cpu(in_scheduler_context, cpu) = false;
}
GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p))
{
marshal_sched_trace_exit(p->tgid, p->pid);
}
static void do_nothing(void *info)
{
// Intentionally do nothing
(void)info;
}
static int register_scheduler_tracepoints(void)
{
// register tracepoints
if (GATOR_REGISTER_TRACE(sched_process_fork))
goto fail_sched_process_fork;
if (GATOR_REGISTER_TRACE(sched_switch))
goto fail_sched_switch;
if (GATOR_REGISTER_TRACE(sched_process_free))
goto fail_sched_process_free;
pr_debug("gator: registered tracepoints\n");
// Now that the scheduler tracepoint is registered, force a context switch
// on all cpus to capture what is currently running.
on_each_cpu(do_nothing, NULL, 0);
return 0;
// unregister tracepoints on error
fail_sched_process_free:
GATOR_UNREGISTER_TRACE(sched_switch);
fail_sched_switch:
GATOR_UNREGISTER_TRACE(sched_process_fork);
fail_sched_process_fork:
pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
return -1;
}
static int gator_trace_sched_start(void)
{
int cpu, size;
for_each_present_cpu(cpu) {
size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t);
per_cpu(taskname_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL);
if (!per_cpu(taskname_keys, cpu))
return -1;
memset(per_cpu(taskname_keys, cpu), 0, size);
}
return register_scheduler_tracepoints();
}
static void gator_trace_sched_offline(void)
{
trace_sched_insert_idle();
}
static void unregister_scheduler_tracepoints(void)
{
GATOR_UNREGISTER_TRACE(sched_process_fork);
GATOR_UNREGISTER_TRACE(sched_switch);
GATOR_UNREGISTER_TRACE(sched_process_free);
pr_debug("gator: unregistered tracepoints\n");
}
static void gator_trace_sched_stop(void)
{
int cpu;
unregister_scheduler_tracepoints();
for_each_present_cpu(cpu) {
kfree(per_cpu(taskname_keys, cpu));
}
}
static void gator_trace_sched_init(void)
{
int i;
for (i = 0; i < CPU_WAIT_TOTAL; i++) {
cpu_wait_enabled[i] = 0;
sched_cpu_key[i] = gator_events_get_key();
}
}