diff options
-rw-r--r-- | libc-parts/ChangeLog | 35 | ||||
-rw-r--r-- | libc-parts/profile.c | 332 | ||||
-rw-r--r-- | libc-parts/profile.h | 18 | ||||
-rw-r--r-- | viengoos/ChangeLog | 6 | ||||
-rw-r--r-- | viengoos/ager.c | 6 | ||||
-rw-r--r-- | viengoos/pager.c | 4 |
6 files changed, 275 insertions, 126 deletions
diff --git a/libc-parts/ChangeLog b/libc-parts/ChangeLog index a0fc1f6..692ea9f 100644 --- a/libc-parts/ChangeLog +++ b/libc-parts/ChangeLog @@ -1,3 +1,38 @@ +2008-11-18 Neal H. Walfield <neal@gnu.org> + + * profile.h (profile_start): Take additional parameter name2. + Update users. + (profile_region): Create the id from the function name and the + description. + * profile.c [__gnu_hurd_viengoos__]: Include <s-printf.h>. + [! __gnu_hurd_viengoos__]: Include <pthread.h>. + (THREADS): Define. + (struct site): Rename from this... + (struct call_site): ... to this. Add fields name2 and + nested_calls. + (SIZE): Rename from this... + (CALL_SITES): ... to this. + (sites_hash): Replace this variable... + (sites_hash_buffer): ... and this one... + (init): ... and this one... + (sites): ... and this one... + (used): ... and this one... + (epoch): ... and this one... + (calls): ... and this one... + (total_time): ... and this one... + (extant): ... and this one... + (struct profile_block): ... with this new structure... + (thread_count): ... this variable... + (profile_blocks): ... and this variable. Update users. + (struct profile_block): New function. + (profile_stats_dump): Improve output. + (profile_end): Increment SITE->NESTED_CALLS. + (profile_start): Take additional parameter name2. Save it in + SITE->NAME2. Call profile_block to get the thread's profile + block. Remove initialization code. + (profile_end): Call profile_block to get the thread's profile + block. + 2008-11-14 Neal H. Walfield <neal@gnu.org> * backtrace.c (backtrace_print): New function. diff --git a/libc-parts/profile.c b/libc-parts/profile.c index 2be8c9e..01525e5 100644 --- a/libc-parts/profile.c +++ b/libc-parts/profile.c @@ -32,7 +32,9 @@ #ifdef __gnu_hurd_viengoos__ # include <l4.h> # include <hurd/rm.h> +# include <s-printf.h> #else +# include <pthread.h> # include <sys/time.h> # ifndef s_printf # define s_printf printf @@ -72,140 +74,242 @@ now (void) #endif } - -static struct hurd_ihash sites_hash; -static char sites_hash_buffer[PAGESIZE]; -static int init; - -/* XXX: This implementation assumes that we are single threaded! In - the case of Viengoos, this is essentially true: all relevant - functionality is with the kernel lock held. We also don't - currently support nested calls. */ - -#define SIZE 1000 -struct site +#define CALL_SITES 100 +struct call_site { const char *name; + const char *name2; uint64_t time; uint64_t start; int calls; + int nested_calls; int pending; -} sites[SIZE]; -static int used; - -static uint64_t epoch; -static uint64_t calls; -/* It's nothing but it prevents a divide by zero. */ -static uint64_t total_time = 1; -/* Number of extant profiling calls. We only update total_time if - EXTANT is 0. The result is that the time spent profiling is - correct, and the percent of the time profile that a function has - been is more meaningful. */ -static int extant; +}; -#undef profile_stats_dump -void -profile_stats_dump (void) +/* The number of threads we support. */ +#define THREADS 4 +static int thread_count; + +struct profile_block { - uint64_t n = now (); +#ifdef __gnu_hurd_viengoos__ +#define MYSELF() l4_myself () + l4_thread_id_t tid; +#else +#define MYSELF() pthread_self () + pthread_t tid; +#endif - int width = 0; - int count = 0; + /* When we started profiling. */ + uint64_t epoch; + uint64_t profiled_time; - int i; - for (i = 0; i < used; i ++) - { - if (sites[i].calls) - count ++; - if (sites[i].calls && sites[i].name && strlen (sites[i].name) > width) - width = strlen (sites[i].name); - } + uint64_t calls; + /* Number of extant profiling calls. We only update profiled_time + if EXTANT is 0. The result is that the time spent profiling is + correct, and the percent of the time profile that a function has + been is more meaningful. */ + int extant; - char spaces[width + 1]; - memset (spaces, ' ', sizeof (spaces)); - spaces[width] = 0; + int init_done; - /* Do a simple bubble sort. */ - int order[count]; + struct call_site call_sites[CALL_SITES]; + int call_sites_used; - int j = 0; - for (i = 0; i < used; i ++) - if (sites[i].calls) - order[j ++] = i; + struct hurd_ihash sites_hash; +#ifndef RM_INTERN + char sites_hash_buffer[PAGESIZE]; +#endif +}; - for (i = 0; i < count; i ++) - for (j = 0; j < count - 1; j ++) - if (sites[order[j]].time < sites[order[j + 1]].time) - { - int t = order[j]; - order[j] = order[j + 1]; - order[j + 1] = t; - } +static struct profile_block profile_blocks[THREADS]; - for (j = 0; j < count; j ++) - { - i = order[j]; - if (sites[i].calls) - s_printf ("%s%s: %d calls,\t%lld ms,\t%lld.%d us per call,\t" - "%d%% total time,\t%d%% profiled time\n", - &spaces[strlen (sites[i].name)], sites[i].name, - sites[i].calls, - sites[i].time / 1000, - sites[i].time / sites[i].calls, - (int) ((10 * sites[i].time) / sites[i].calls) % 10, - (int) ((100 * sites[i].time) / (n - epoch)), - (int) ((100 * sites[i].time) / total_time)); - } +/* Return the profile block associated with the calling thread. If + there is none, initialize one. */ +static struct profile_block * +profile_block (void) +{ + int i; + for (i = 0; i < thread_count; i ++) + if (profile_blocks[i].tid == MYSELF()) + return &profile_blocks[i]; - s_printf ("profiled time: %lld ms, calls: %lld\n", - total_time / 1000, calls); - s_printf ("uptime: %lld ms\n", (n - epoch) / 1000); -} + /* New thread. */ -#undef profile_start -void -profile_start (uintptr_t id, const char *name) -{ - if (! init) - { - init = 1; + i = __sync_fetch_and_add (&thread_count, 1); + if (i >= THREADS) + panic ("More threads than profile space available!"); + + struct profile_block *pb = &profile_blocks[i]; + + pb->tid = MYSELF (); - size_t size; - void *buffer; + size_t size; + void *buffer; #ifdef RM_INTERN - size = hurd_ihash_buffer_size (SIZE, false, 0); - /* Round up to a multiple of the page size. */ - size = (size + PAGESIZE - 1) & ~(PAGESIZE - 1); + size = hurd_ihash_buffer_size (CALL_SITES, false, 0); + /* Round up to a multiple of the page size. */ + size = (size + PAGESIZE - 1) & ~(PAGESIZE - 1); - buffer = (void *) zalloc (size); + buffer = (void *) zalloc (size); #else - size = sizeof (sites_hash_buffer); - buffer = sites_hash_buffer; + size = sizeof (pb->sites_hash_buffer); + buffer = pb->sites_hash_buffer; #endif - if (! buffer) - panic ("Failed to allocate memory for object hash!\n"); + if (! buffer) + panic ("Failed to allocate memory for object hash!\n"); - memset (buffer, 0, size); + memset (buffer, 0, size); - hurd_ihash_init_with_buffer (&sites_hash, false, - HURD_IHASH_NO_LOCP, - buffer, size); + hurd_ihash_init_with_buffer (&pb->sites_hash, false, + HURD_IHASH_NO_LOCP, + buffer, size); - epoch = now (); + pb->epoch = now (); + pb->init_done = true; - init = 2; + return pb; +} + +/* XXX: This implementation assumes that we are single threaded! In + the case of Viengoos, this is essentially true: all relevant + functionality is with the kernel lock held. We also don't + currently support nested calls. */ + +#undef profile_stats_dump +void +profile_stats_dump (void) +{ + uint64_t n = now (); + + char digit_whitespace[11]; + memset (digit_whitespace, ' ', sizeof (digit_whitespace)); + digit_whitespace[sizeof (digit_whitespace) - 1] = 0; + + char *dws (uint64_t number) + { + int w = 0; + if (number < 0) + w ++; + + while (number != 0) + { + w ++; + number /= 10; + } + if (w == 0) + w = 1; + + if (w > sizeof (digit_whitespace)) + return ""; + else + return &digit_whitespace[w]; + } + + int t; + for (t = 0; t < thread_count; t ++) + { + struct profile_block *pb = &profile_blocks[t]; + if (! pb->init_done) + continue; + + int width = 0; + int count = 0; + + int i; + for (i = 0; i < pb->call_sites_used; i ++) + { + if (pb->call_sites[i].calls) + count ++; + if (pb->call_sites[i].calls) + { + int w = 0; + if (pb->call_sites[i].name) + w += strlen (pb->call_sites[i].name); + if (pb->call_sites[i].name2) + w += strlen (pb->call_sites[i].name2); + if (pb->call_sites[i].name && pb->call_sites[i].name2) + w += 2; + if (w > width) + width = w; + } + } + + char spaces[width + 1]; + memset (spaces, ' ', sizeof (spaces)); + spaces[width] = 0; + + /* Do a simple bubble sort. */ + int order[count]; + + int j = 0; + for (i = 0; i < pb->call_sites_used; i ++) + if (pb->call_sites[i].calls) + order[j ++] = i; + + for (i = 0; i < count; i ++) + for (j = 0; j < count - 1; j ++) + if (pb->call_sites[order[j]].time + < pb->call_sites[order[j + 1]].time) + { + int t = order[j]; + order[j] = order[j + 1]; + order[j + 1] = t; + } + + s_printf ("Thread: %x\n", pb->tid); + for (j = 0; j < count; j ++) + { + i = order[j]; + if (pb->call_sites[i].calls) + s_printf ("%s%s%s%s%s: %s%d calls (%d nested),\t%s%lld ms," + "\t%lld.%d us/call,\t%d%% total,\t%d%% profiled\n", + &spaces[strlen (pb->call_sites[i].name ?: "") + + strlen (pb->call_sites[i].name2 ?: "") + + (pb->call_sites[i].name + && pb->call_sites[i].name2 ? 2 : 0)], + pb->call_sites[i].name ?: "", + pb->call_sites[i].name && pb->call_sites[i].name2 + ? "(" : "", + pb->call_sites[i].name2 ?: "", + pb->call_sites[i].name && pb->call_sites[i].name2 + ? ")" : "", + dws (pb->call_sites[i].calls), pb->call_sites[i].calls, + pb->call_sites[i].nested_calls, + dws (pb->call_sites[i].time / 1000), + pb->call_sites[i].time / 1000, + pb->call_sites[i].time / pb->call_sites[i].calls, + (int) ((10 * pb->call_sites[i].time) + / pb->call_sites[i].calls) % 10, + (int) ((100 * pb->call_sites[i].time) / (n - pb->epoch)), + (int) ((100 * pb->call_sites[i].time) + / pb->profiled_time)); + } + + s_printf ("profiled time: %lld ms (%d%%), calls: %lld\n", + pb->profiled_time / 1000, + (int) ((100 * pb->profiled_time) / (n - pb->epoch)), + pb->calls); + s_printf ("total time: %lld ms\n", (n - pb->epoch) / 1000); } - else if (init == 1) +} + +#undef profile_start +void +profile_start (uintptr_t id, const char *name, const char *name2) +{ + struct profile_block *pb = profile_block (); + if (! pb->init_done) return; - struct site *site = hurd_ihash_find (&sites_hash, id); + struct call_site *site = hurd_ihash_find (&pb->sites_hash, id); if (! site) { - site = &sites[used ++]; - if (used == SIZE) + site = &pb->call_sites[pb->call_sites_used ++]; + if (pb->call_sites_used == CALL_SITES) panic ("Out of profile space."); - error_t err = hurd_ihash_add (&sites_hash, id, site); + error_t err = hurd_ihash_add (&pb->sites_hash, id, site); if (err) panic ("Failed to add to hash: %d.", err); @@ -218,10 +322,11 @@ profile_start (uintptr_t id, const char *name) #else site->name = "unknown"; #endif - } + } + site->name2 = name2; } - extant ++; + pb->extant ++; site->pending ++; if (site->pending == 1) @@ -232,10 +337,13 @@ profile_start (uintptr_t id, const char *name) void profile_end (uintptr_t id) { - if (init == 1) + uint64_t n = now (); + + struct profile_block *pb = profile_block (); + if (! pb->init_done) return; - struct site *site = hurd_ihash_find (&sites_hash, id); + struct call_site *site = hurd_ihash_find (&pb->sites_hash, id); if (! site) panic ("profile_end called without corresponding profile_begin (%p)!", (void *) id); @@ -243,24 +351,24 @@ profile_end (uintptr_t id) if (! site->pending) panic ("profile_end called but no extant profile_start!"); - extant --; + pb->extant --; site->pending --; if (site->pending == 0) { - uint64_t n = now (); - site->time += n - site->start; - if (extant == 0) - total_time += n - site->start; + if (pb->extant == 0) + pb->profiled_time += n - site->start; site->calls ++; - calls ++; + pb->calls ++; do_debug (5) - if (calls % 100000 == 0) + if (pb->calls % 100000 == 0) profile_stats_dump (); } + else + site->nested_calls ++; } diff --git a/libc-parts/profile.h b/libc-parts/profile.h index 5caf970..911cb83 100644 --- a/libc-parts/profile.h +++ b/libc-parts/profile.h @@ -24,8 +24,8 @@ safe. */ /* Currently, only enable by default for Viengoos. */ -#ifndef RM_INTERN -// #define NPROFILE +#ifdef RM_INTERN +#define NPROFILE #endif #ifndef NPROFILE @@ -33,7 +33,7 @@ /* Start a timer for the profile site ID (this must be unique per site, can be the function's address). NAME is the symbolic name. */ -extern void profile_start (uintptr_t id, const char *name); +extern void profile_start (uintptr_t id, const char *name, const char *name2); /* End the timer for the profile site ID. */ extern void profile_end (uintptr_t id); @@ -42,17 +42,17 @@ extern void profile_stats_dump (void); #else -#define profile_start(id, name) do { } while (0) +#define profile_start(id, name, name2) do { } while (0) #define profile_end(id) do { } while (0) #define profile_stats_dump() do { } while (0) #endif -#define profile_region(__pr_id) \ - { \ - const char *__pr_id_ = (__pr_id); \ - profile_start((uintptr_t) __pr_id_, __pr_id_) +#define profile_region(__pr_name) \ + { \ + uintptr_t __pr_id_ = (uintptr_t) __func__ + (uintptr_t) (__pr_name); \ + profile_start (__pr_id_, __func__, (__pr_name)) #define profile_region_end() \ - profile_end ((uintptr_t) __pr_id_); \ + profile_end (__pr_id_); \ } diff --git a/viengoos/ChangeLog b/viengoos/ChangeLog index d3601b1..af1b916 100644 --- a/viengoos/ChangeLog +++ b/viengoos/ChangeLog @@ -1,3 +1,9 @@ +2008-11-18 Neal H. Walfield <neal@gnu.org> + + * ager.c (ager_loop): Update use of profile_start to match new + API. + * pager.c (pager_collect): Likewise. + 2008-11-17 Neal H. Walfield <neal@gnu.org> * object.h (struct object_desc): Add field maybe_folio_desc. diff --git a/viengoos/ager.c b/viengoos/ager.c index 808a257..2ca73b7 100644 --- a/viengoos/ager.c +++ b/viengoos/ager.c @@ -65,7 +65,7 @@ static void update_stats (void) { ss_mutex_lock (&kernel_lock); - profile_region ("update_stats"); + profile_region (NULL); /* XXX: Update the statistics. We need to average some of the fields including the number of active, inactive, clean and dirty @@ -525,7 +525,7 @@ ager_loop (void) while (frame < frames) { ss_mutex_lock (&kernel_lock); - profile_start ((uintptr_t) &ager_loop, "ager"); + profile_start ((uintptr_t) &ager_loop, "ager", NULL); int count = grab (); if (count == 0) @@ -548,7 +548,7 @@ ager_loop (void) thread, it does not for subsequent threads. Moreover, we would have to access the pages at fault time to ensure that they are mapped, which is just ugly. */ - profile_start ((uintptr_t) &ager_loop + 1, "l4_unmap"); + profile_start ((uintptr_t) &ager_loop + 1, "l4_unmap", NULL); l4_flush_fpages (count, fpages); if (also_unmap) { diff --git a/viengoos/pager.c b/viengoos/pager.c index 245ff33..65ccb5f 100644 --- a/viengoos/pager.c +++ b/viengoos/pager.c @@ -244,7 +244,7 @@ pager_collect (int goal) laundry_list_count (&laundry), PAGER_LOW_WATER_MARK, goal); - profile_start ((uintptr_t) &pager_collect, __FUNCTION__); + profile_start ((uintptr_t) &pager_collect, __FUNCTION__, NULL); /* Find a victim. */ struct activity *victim; @@ -354,7 +354,7 @@ pager_collect (int goal) return false; } - profile_region ("pager_collect(find victim)"); + profile_region ("find victim"); victim = root_activity; do |