This patch adds a very simple process tracing mechanism to the Linux kernel. The intent is to use this for tracing of early boot processes (usually program and commands run in your RC scripts). This will add a lot of extra printk messages during startup. It is recommended that you increase your kernel log buffer size by setting CONFIG_LOG_BUF_SHIFT to 17 or more. After applying the patch and booting, collect the messages with 'dmesg' and use scripts/procgraph to sort the processes that were seen during the collection interval. See "procgraph -h" for online usage help. fs/exec.c | 1 + kernel/exit.c | 20 +++++ kernel/fork.c | 2 + kernel/kthread.c | 1 + scripts/procgraph | 216 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 240 insertions(+), 0 deletions(-) Signed-off-by: Tim Bird diff --git a/fs/exec.c b/fs/exec.c index cecee50..435389b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1340,6 +1340,7 @@ int do_execve(char * filename, retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ + printk("exec: %d -> %s\n", current->pid, filename); security_bprm_free(bprm); acct_update_integrals(current); free_bprm(bprm); diff --git a/kernel/exit.c b/kernel/exit.c index 85a83c8..587e95f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -157,6 +157,8 @@ void release_task(struct task_struct * p) { struct task_struct *leader; int zap_leader; + struct timespec t, p_wall_time, p_idle; + repeat: tracehook_prepare_release_task(p); atomic_dec(&p->user->processes); @@ -195,6 +197,24 @@ repeat: write_unlock_irq(&tasklist_lock); release_thread(p); + + /* write out process times also */ + do_posix_clock_monotonic_gettime(&t); + monotonic_to_bootbased(&t); + p_wall_time = timespec_sub(t, p->real_start_time); + printk("exit: %d - real %lu.%03lu", p->pid, p_wall_time.tv_sec, + p_wall_time.tv_nsec / (NSEC_PER_SEC / 1000)); + cputime_to_timespec(p->utime, &t); + p_idle = timespec_sub(p_wall_time, t); + printk(" user %lu.%03lu", t.tv_sec, + t.tv_nsec / (NSEC_PER_SEC / 1000)); + cputime_to_timespec(p->stime, &t); + printk(" sys %lu.%03lu", t.tv_sec, + t.tv_nsec / (NSEC_PER_SEC / 1000)); + p_idle = timespec_sub(p_idle, t); + printk(" nonrun %lu.%03lu\n", p_idle.tv_sec, + p_idle.tv_nsec / (NSEC_PER_SEC / 1000)); + call_rcu(&p->rcu, delayed_put_task_struct); p = leader; diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe..360b4ac 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1371,6 +1371,8 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } + printk("fork: %d -> %d\n", p->real_parent->pid, p->pid); + tracehook_report_clone(trace, regs, clone_flags, nr, p); /* diff --git a/kernel/kthread.c b/kernel/kthread.c index 96cff2f..073fa98 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -92,6 +92,7 @@ static void create_kthread(struct kthread_create_info *create) /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); + printk("kthread: %d -> %pF\n", pid, create->threadfn); if (pid < 0) { create->result = ERR_PTR(pid); } else { diff --git a/scripts/procgraph b/scripts/procgraph new file mode 100755 index 0000000..e704fa0 --- /dev/null +++ b/scripts/procgraph @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# +# procgraph: Read list of kernel messages with fork, exec and exit +# instrumented, and generate a list of processes. +# +# display list in sorted order +# +# Copyright 2008 Sony Corporation +# +# GPL 2.0 applies. + +import os, sys +import re + +def err_usage(msg): + print "Error:", msg + usage() + +def usage(): + print """Usage: procgraph [] + +This program parses the output from a set of printk message lines which +have timestampes (see the CONFIG_PRINTK_TIME option or +the kernel command line option "time" is specified). + +Options: + -h Show this usage help. + -n Only show lines of output + -s Sort the results by the indicated column. + Allowed column ids are: + 'p' - pid (default) + 'n' - program name + 's' - start time + 'd' - duration + 'a' - active time + 'i' - idle time +""" + sys.exit(1) + +# average lines looks like: +#[ 17.000000] kthread: 1 -> ksoftirqd+0x0/0xd8 +#[ 22.718505] fork: 824 -> 867 +#[ 22.728607] exec: 867 -> /bin/echo +#[ 22.756256] exit: 867 - real 0.038 user 0.007 sys 0.023 nonrun 0.007 + +class proc_class: + def __init__(self, pid): + self.pid = pid + self.ppid = -1 + self.start_time = 0 + self.name = "unknown" + self.exec_time = 0 + self.duration = 99999999 + self.exit_time = 0.0 + self.real = 0.0 + self.user = 0.0 + self.sys = 0.0 + self.nonrun = 0.0 + self.duration = 0.0 + self.active_time = 0.0 + self.idle = 0.0 + +sort_order = "p" +allowed_sort_orders = "psndai" +def cmp_proc(a, b): + global sort_order + + if sort_order == 'p': + return cmp(a.pid, b.pid) + if sort_order == 's': + return cmp(a.start_time, b.start_time) + if sort_order == 'n': + return cmp(os.path.basename(a.name), os.path.basename(b.name)) + if sort_order == 'd': + return cmp(a.duration, b.duration) + if sort_order == 'a': + return cmp(a.active_time, b.active_time) + if sort_order == 'i': + return cmp(a.idle, b.idle) + return cmp(a.pid, b.pid) + +def parse_procs(lines): + pm = {} + kthread_ppid = -1 + kthread_name = "" + for line in lines: + #print line + if not line.startswith('['): + continue + + # split on closing bracket + (time_str, rest) = line[1:].split(']',1) + time = float(time_str) + + m = re.match(" kthread: ([0-9]+) -> (.*)", rest) + if m: + # save name for later use with fork + kthread_ppid = int(m.group(1)) + kthread_name = '['+m.group(2).split("+")[0]+']' + continue + + m = re.match(" fork: ([0-9]+) -> ([0-9]+)", rest) + if m: + pid = int(m.group(2)) + #print "pid=", pid + proc = proc_class(pid) + + proc.ppid = int(m.group(1)) + proc.start_time = time + if kthread_name: + # could double-check kthread_ppid here + proc.name = kthread_name + kthread_name = "" + + if proc.name=="unknown": + try: + proc.name=pm[proc.ppid].name + '*' + except: + pass + pm[pid] = proc + continue + + m = re.match(" exec: ([0-9]+) -> (.+)", rest) + if m: + pid = int(m.group(1)) + #print "pid=", pid + if not pm.has_key(pid): + proc = proc_class(pid) + pm[pid] = proc + + pm[pid].name = m.group(2) + pm[pid].exec_time = time + continue + + m = re.match(" exit: ([0-9]+) - real ([0-9.]+) user ([0-9.]+) sys ([0-9.]+) nonrun ([0-9.]+)", rest) + if m: + pid = int(m.group(1)) + #print "pid=", pid + if not pm.has_key(pid): + proc = proc_class(pid) + pm[pid] = proc + + proc = pm[pid] + proc.exit_time = time + proc.real = float(m.group(2)) + proc.user = float(m.group(3)) + proc.sys = float(m.group(4)) + proc.nonrun = float(m.group(5)) + proc.duration = proc.exit_time - proc.start_time + proc.active_time = proc.user + proc.sys + proc.idle = proc.duration - proc.active_time + continue + + return pm + +def show_title(): + print "PPid Pid Program Start Duration Active Idle" + print "----- ----- ------------------------ -------- -------- -------- --------" + +def show_proc(proc): + print "%5d %5d %24s %8.3f %8.3f %8.3f %8.3f" % \ + (proc.ppid, proc.pid, proc.name, proc.start_time, + proc.duration, proc.active_time, proc.idle) + +def main(): + global sort_order + + filein = "" + output_limit = 0 + + args = sys.argv[1:] + i = 0 + while i < len(args): + arg = args[i] + if arg=="-h": + usage() + if arg=="-s": + sort_order = args[i+1] + if sort_order not in allowed_sort_orders: + err_usage("Invalid sort order '%s' specified" % sort_order) + i += 2 + continue + + if arg=="-n": + try: + output_limit = int(args[i+1]) + except: + err_usage("Invalid count '%s' for option -n" % args[i+1]) + i += 2 + continue + + filein = arg + i += 1 + + if not filein: + err_usage("No input file specified") + + try: + lines = open(filein,"r").readlines() + except: + err_usage("Problem opening input file: %s" % filein) + + pm = parse_procs(lines) + + show_title() + proc_list = pm.values() + proc_list.sort(cmp_proc) + i = 0 + for proc in proc_list: + show_proc(proc) + i += 1 + if output_limit and i>=output_limit: + break + + +main()