summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libc-parts/ChangeLog35
-rw-r--r--libc-parts/profile.c332
-rw-r--r--libc-parts/profile.h18
-rw-r--r--viengoos/ChangeLog6
-rw-r--r--viengoos/ager.c6
-rw-r--r--viengoos/pager.c4
6 files changed, 275 insertions, 126 deletions
diff --git a/libc-parts/ChangeLog b/libc-parts/ChangeLog
index a0fc1f6..692ea9f 100644
--- a/libc-parts/ChangeLog
+++ b/libc-parts/ChangeLog
@@ -1,3 +1,38 @@
+2008-11-18 Neal H. Walfield <neal@gnu.org>
+
+ * profile.h (profile_start): Take additional parameter name2.
+ Update users.
+ (profile_region): Create the id from the function name and the
+ description.
+ * profile.c [__gnu_hurd_viengoos__]: Include <s-printf.h>.
+ [! __gnu_hurd_viengoos__]: Include <pthread.h>.
+ (THREADS): Define.
+ (struct site): Rename from this...
+ (struct call_site): ... to this. Add fields name2 and
+ nested_calls.
+ (SIZE): Rename from this...
+ (CALL_SITES): ... to this.
+ (sites_hash): Replace this variable...
+ (sites_hash_buffer): ... and this one...
+ (init): ... and this one...
+ (sites): ... and this one...
+ (used): ... and this one...
+ (epoch): ... and this one...
+ (calls): ... and this one...
+ (total_time): ... and this one...
+ (extant): ... and this one...
+ (struct profile_block): ... with this new structure...
+ (thread_count): ... this variable...
+ (profile_blocks): ... and this variable. Update users.
+ (struct profile_block): New function.
+ (profile_stats_dump): Improve output.
+ (profile_end): Increment SITE->NESTED_CALLS.
+ (profile_start): Take additional parameter name2. Save it in
+ SITE->NAME2. Call profile_block to get the thread's profile
+ block. Remove initialization code.
+ (profile_end): Call profile_block to get the thread's profile
+ block.
+
2008-11-14 Neal H. Walfield <neal@gnu.org>
* backtrace.c (backtrace_print): New function.
diff --git a/libc-parts/profile.c b/libc-parts/profile.c
index 2be8c9e..01525e5 100644
--- a/libc-parts/profile.c
+++ b/libc-parts/profile.c
@@ -32,7 +32,9 @@
#ifdef __gnu_hurd_viengoos__
# include <l4.h>
# include <hurd/rm.h>
+# include <s-printf.h>
#else
+# include <pthread.h>
# include <sys/time.h>
# ifndef s_printf
# define s_printf printf
@@ -72,140 +74,242 @@ now (void)
#endif
}
-
-static struct hurd_ihash sites_hash;
-static char sites_hash_buffer[PAGESIZE];
-static int init;
-
-/* XXX: This implementation assumes that we are single threaded! In
- the case of Viengoos, this is essentially true: all relevant
- functionality is with the kernel lock held. We also don't
- currently support nested calls. */
-
-#define SIZE 1000
-struct site
+#define CALL_SITES 100
+struct call_site
{
const char *name;
+ const char *name2;
uint64_t time;
uint64_t start;
int calls;
+ int nested_calls;
int pending;
-} sites[SIZE];
-static int used;
-
-static uint64_t epoch;
-static uint64_t calls;
-/* It's nothing but it prevents a divide by zero. */
-static uint64_t total_time = 1;
-/* Number of extant profiling calls. We only update total_time if
- EXTANT is 0. The result is that the time spent profiling is
- correct, and the percent of the time profile that a function has
- been is more meaningful. */
-static int extant;
+};
-#undef profile_stats_dump
-void
-profile_stats_dump (void)
+/* The number of threads we support. */
+#define THREADS 4
+static int thread_count;
+
+struct profile_block
{
- uint64_t n = now ();
+#ifdef __gnu_hurd_viengoos__
+#define MYSELF() l4_myself ()
+ l4_thread_id_t tid;
+#else
+#define MYSELF() pthread_self ()
+ pthread_t tid;
+#endif
- int width = 0;
- int count = 0;
+ /* When we started profiling. */
+ uint64_t epoch;
+ uint64_t profiled_time;
- int i;
- for (i = 0; i < used; i ++)
- {
- if (sites[i].calls)
- count ++;
- if (sites[i].calls && sites[i].name && strlen (sites[i].name) > width)
- width = strlen (sites[i].name);
- }
+ uint64_t calls;
+ /* Number of extant profiling calls. We only update profiled_time
+ if EXTANT is 0. The result is that the time spent profiling is
+ correct, and the percent of the time profile that a function has
+ been is more meaningful. */
+ int extant;
- char spaces[width + 1];
- memset (spaces, ' ', sizeof (spaces));
- spaces[width] = 0;
+ int init_done;
- /* Do a simple bubble sort. */
- int order[count];
+ struct call_site call_sites[CALL_SITES];
+ int call_sites_used;
- int j = 0;
- for (i = 0; i < used; i ++)
- if (sites[i].calls)
- order[j ++] = i;
+ struct hurd_ihash sites_hash;
+#ifndef RM_INTERN
+ char sites_hash_buffer[PAGESIZE];
+#endif
+};
- for (i = 0; i < count; i ++)
- for (j = 0; j < count - 1; j ++)
- if (sites[order[j]].time < sites[order[j + 1]].time)
- {
- int t = order[j];
- order[j] = order[j + 1];
- order[j + 1] = t;
- }
+static struct profile_block profile_blocks[THREADS];
- for (j = 0; j < count; j ++)
- {
- i = order[j];
- if (sites[i].calls)
- s_printf ("%s%s: %d calls,\t%lld ms,\t%lld.%d us per call,\t"
- "%d%% total time,\t%d%% profiled time\n",
- &spaces[strlen (sites[i].name)], sites[i].name,
- sites[i].calls,
- sites[i].time / 1000,
- sites[i].time / sites[i].calls,
- (int) ((10 * sites[i].time) / sites[i].calls) % 10,
- (int) ((100 * sites[i].time) / (n - epoch)),
- (int) ((100 * sites[i].time) / total_time));
- }
+/* Return the profile block associated with the calling thread. If
+ there is none, initialize one. */
+static struct profile_block *
+profile_block (void)
+{
+ int i;
+ for (i = 0; i < thread_count; i ++)
+ if (profile_blocks[i].tid == MYSELF())
+ return &profile_blocks[i];
- s_printf ("profiled time: %lld ms, calls: %lld\n",
- total_time / 1000, calls);
- s_printf ("uptime: %lld ms\n", (n - epoch) / 1000);
-}
+ /* New thread. */
-#undef profile_start
-void
-profile_start (uintptr_t id, const char *name)
-{
- if (! init)
- {
- init = 1;
+ i = __sync_fetch_and_add (&thread_count, 1);
+ if (i >= THREADS)
+ panic ("More threads than profile space available!");
+
+ struct profile_block *pb = &profile_blocks[i];
+
+ pb->tid = MYSELF ();
- size_t size;
- void *buffer;
+ size_t size;
+ void *buffer;
#ifdef RM_INTERN
- size = hurd_ihash_buffer_size (SIZE, false, 0);
- /* Round up to a multiple of the page size. */
- size = (size + PAGESIZE - 1) & ~(PAGESIZE - 1);
+ size = hurd_ihash_buffer_size (CALL_SITES, false, 0);
+ /* Round up to a multiple of the page size. */
+ size = (size + PAGESIZE - 1) & ~(PAGESIZE - 1);
- buffer = (void *) zalloc (size);
+ buffer = (void *) zalloc (size);
#else
- size = sizeof (sites_hash_buffer);
- buffer = sites_hash_buffer;
+ size = sizeof (pb->sites_hash_buffer);
+ buffer = pb->sites_hash_buffer;
#endif
- if (! buffer)
- panic ("Failed to allocate memory for object hash!\n");
+ if (! buffer)
+ panic ("Failed to allocate memory for object hash!\n");
- memset (buffer, 0, size);
+ memset (buffer, 0, size);
- hurd_ihash_init_with_buffer (&sites_hash, false,
- HURD_IHASH_NO_LOCP,
- buffer, size);
+ hurd_ihash_init_with_buffer (&pb->sites_hash, false,
+ HURD_IHASH_NO_LOCP,
+ buffer, size);
- epoch = now ();
+ pb->epoch = now ();
+ pb->init_done = true;
- init = 2;
+ return pb;
+}
+
+/* XXX: This implementation assumes that we are single threaded! In
+ the case of Viengoos, this is essentially true: all relevant
+ functionality is with the kernel lock held. We also don't
+ currently support nested calls. */
+
+#undef profile_stats_dump
+void
+profile_stats_dump (void)
+{
+ uint64_t n = now ();
+
+ char digit_whitespace[11];
+ memset (digit_whitespace, ' ', sizeof (digit_whitespace));
+ digit_whitespace[sizeof (digit_whitespace) - 1] = 0;
+
+ char *dws (uint64_t number)
+ {
+ int w = 0;
+ if (number < 0)
+ w ++;
+
+ while (number != 0)
+ {
+ w ++;
+ number /= 10;
+ }
+ if (w == 0)
+ w = 1;
+
+ if (w > sizeof (digit_whitespace))
+ return "";
+ else
+ return &digit_whitespace[w];
+ }
+
+ int t;
+ for (t = 0; t < thread_count; t ++)
+ {
+ struct profile_block *pb = &profile_blocks[t];
+ if (! pb->init_done)
+ continue;
+
+ int width = 0;
+ int count = 0;
+
+ int i;
+ for (i = 0; i < pb->call_sites_used; i ++)
+ {
+ if (pb->call_sites[i].calls)
+ count ++;
+ if (pb->call_sites[i].calls)
+ {
+ int w = 0;
+ if (pb->call_sites[i].name)
+ w += strlen (pb->call_sites[i].name);
+ if (pb->call_sites[i].name2)
+ w += strlen (pb->call_sites[i].name2);
+ if (pb->call_sites[i].name && pb->call_sites[i].name2)
+ w += 2;
+ if (w > width)
+ width = w;
+ }
+ }
+
+ char spaces[width + 1];
+ memset (spaces, ' ', sizeof (spaces));
+ spaces[width] = 0;
+
+ /* Do a simple bubble sort. */
+ int order[count];
+
+ int j = 0;
+ for (i = 0; i < pb->call_sites_used; i ++)
+ if (pb->call_sites[i].calls)
+ order[j ++] = i;
+
+ for (i = 0; i < count; i ++)
+ for (j = 0; j < count - 1; j ++)
+ if (pb->call_sites[order[j]].time
+ < pb->call_sites[order[j + 1]].time)
+ {
+ int t = order[j];
+ order[j] = order[j + 1];
+ order[j + 1] = t;
+ }
+
+ s_printf ("Thread: %x\n", pb->tid);
+ for (j = 0; j < count; j ++)
+ {
+ i = order[j];
+ if (pb->call_sites[i].calls)
+ s_printf ("%s%s%s%s%s: %s%d calls (%d nested),\t%s%lld ms,"
+ "\t%lld.%d us/call,\t%d%% total,\t%d%% profiled\n",
+ &spaces[strlen (pb->call_sites[i].name ?: "")
+ + strlen (pb->call_sites[i].name2 ?: "")
+ + (pb->call_sites[i].name
+ && pb->call_sites[i].name2 ? 2 : 0)],
+ pb->call_sites[i].name ?: "",
+ pb->call_sites[i].name && pb->call_sites[i].name2
+ ? "(" : "",
+ pb->call_sites[i].name2 ?: "",
+ pb->call_sites[i].name && pb->call_sites[i].name2
+ ? ")" : "",
+ dws (pb->call_sites[i].calls), pb->call_sites[i].calls,
+ pb->call_sites[i].nested_calls,
+ dws (pb->call_sites[i].time / 1000),
+ pb->call_sites[i].time / 1000,
+ pb->call_sites[i].time / pb->call_sites[i].calls,
+ (int) ((10 * pb->call_sites[i].time)
+ / pb->call_sites[i].calls) % 10,
+ (int) ((100 * pb->call_sites[i].time) / (n - pb->epoch)),
+ (int) ((100 * pb->call_sites[i].time)
+ / pb->profiled_time));
+ }
+
+ s_printf ("profiled time: %lld ms (%d%%), calls: %lld\n",
+ pb->profiled_time / 1000,
+ (int) ((100 * pb->profiled_time) / (n - pb->epoch)),
+ pb->calls);
+ s_printf ("total time: %lld ms\n", (n - pb->epoch) / 1000);
}
- else if (init == 1)
+}
+
+#undef profile_start
+void
+profile_start (uintptr_t id, const char *name, const char *name2)
+{
+ struct profile_block *pb = profile_block ();
+ if (! pb->init_done)
return;
- struct site *site = hurd_ihash_find (&sites_hash, id);
+ struct call_site *site = hurd_ihash_find (&pb->sites_hash, id);
if (! site)
{
- site = &sites[used ++];
- if (used == SIZE)
+ site = &pb->call_sites[pb->call_sites_used ++];
+ if (pb->call_sites_used == CALL_SITES)
panic ("Out of profile space.");
- error_t err = hurd_ihash_add (&sites_hash, id, site);
+ error_t err = hurd_ihash_add (&pb->sites_hash, id, site);
if (err)
panic ("Failed to add to hash: %d.", err);
@@ -218,10 +322,11 @@ profile_start (uintptr_t id, const char *name)
#else
site->name = "unknown";
#endif
- }
+ }
+ site->name2 = name2;
}
- extant ++;
+ pb->extant ++;
site->pending ++;
if (site->pending == 1)
@@ -232,10 +337,13 @@ profile_start (uintptr_t id, const char *name)
void
profile_end (uintptr_t id)
{
- if (init == 1)
+ uint64_t n = now ();
+
+ struct profile_block *pb = profile_block ();
+ if (! pb->init_done)
return;
- struct site *site = hurd_ihash_find (&sites_hash, id);
+ struct call_site *site = hurd_ihash_find (&pb->sites_hash, id);
if (! site)
panic ("profile_end called without corresponding profile_begin (%p)!",
(void *) id);
@@ -243,24 +351,24 @@ profile_end (uintptr_t id)
if (! site->pending)
panic ("profile_end called but no extant profile_start!");
- extant --;
+ pb->extant --;
site->pending --;
if (site->pending == 0)
{
- uint64_t n = now ();
-
site->time += n - site->start;
- if (extant == 0)
- total_time += n - site->start;
+ if (pb->extant == 0)
+ pb->profiled_time += n - site->start;
site->calls ++;
- calls ++;
+ pb->calls ++;
do_debug (5)
- if (calls % 100000 == 0)
+ if (pb->calls % 100000 == 0)
profile_stats_dump ();
}
+ else
+ site->nested_calls ++;
}
diff --git a/libc-parts/profile.h b/libc-parts/profile.h
index 5caf970..911cb83 100644
--- a/libc-parts/profile.h
+++ b/libc-parts/profile.h
@@ -24,8 +24,8 @@
safe. */
/* Currently, only enable by default for Viengoos. */
-#ifndef RM_INTERN
-// #define NPROFILE
+#ifdef RM_INTERN
+#define NPROFILE
#endif
#ifndef NPROFILE
@@ -33,7 +33,7 @@
/* Start a timer for the profile site ID (this must be unique per
site, can be the function's address). NAME is the symbolic
name. */
-extern void profile_start (uintptr_t id, const char *name);
+extern void profile_start (uintptr_t id, const char *name, const char *name2);
/* End the timer for the profile site ID. */
extern void profile_end (uintptr_t id);
@@ -42,17 +42,17 @@ extern void profile_stats_dump (void);
#else
-#define profile_start(id, name) do { } while (0)
+#define profile_start(id, name, name2) do { } while (0)
#define profile_end(id) do { } while (0)
#define profile_stats_dump() do { } while (0)
#endif
-#define profile_region(__pr_id) \
- { \
- const char *__pr_id_ = (__pr_id); \
- profile_start((uintptr_t) __pr_id_, __pr_id_)
+#define profile_region(__pr_name) \
+ { \
+ uintptr_t __pr_id_ = (uintptr_t) __func__ + (uintptr_t) (__pr_name); \
+ profile_start (__pr_id_, __func__, (__pr_name))
#define profile_region_end() \
- profile_end ((uintptr_t) __pr_id_); \
+ profile_end (__pr_id_); \
}
diff --git a/viengoos/ChangeLog b/viengoos/ChangeLog
index d3601b1..af1b916 100644
--- a/viengoos/ChangeLog
+++ b/viengoos/ChangeLog
@@ -1,3 +1,9 @@
+2008-11-18 Neal H. Walfield <neal@gnu.org>
+
+ * ager.c (ager_loop): Update use of profile_start to match new
+ API.
+ * pager.c (pager_collect): Likewise.
+
2008-11-17 Neal H. Walfield <neal@gnu.org>
* object.h (struct object_desc): Add field maybe_folio_desc.
diff --git a/viengoos/ager.c b/viengoos/ager.c
index 808a257..2ca73b7 100644
--- a/viengoos/ager.c
+++ b/viengoos/ager.c
@@ -65,7 +65,7 @@ static void
update_stats (void)
{
ss_mutex_lock (&kernel_lock);
- profile_region ("update_stats");
+ profile_region (NULL);
/* XXX: Update the statistics. We need to average some of the
fields including the number of active, inactive, clean and dirty
@@ -525,7 +525,7 @@ ager_loop (void)
while (frame < frames)
{
ss_mutex_lock (&kernel_lock);
- profile_start ((uintptr_t) &ager_loop, "ager");
+ profile_start ((uintptr_t) &ager_loop, "ager", NULL);
int count = grab ();
if (count == 0)
@@ -548,7 +548,7 @@ ager_loop (void)
thread, it does not for subsequent threads. Moreover, we
would have to access the pages at fault time to ensure
that they are mapped, which is just ugly. */
- profile_start ((uintptr_t) &ager_loop + 1, "l4_unmap");
+ profile_start ((uintptr_t) &ager_loop + 1, "l4_unmap", NULL);
l4_flush_fpages (count, fpages);
if (also_unmap)
{
diff --git a/viengoos/pager.c b/viengoos/pager.c
index 245ff33..65ccb5f 100644
--- a/viengoos/pager.c
+++ b/viengoos/pager.c
@@ -244,7 +244,7 @@ pager_collect (int goal)
laundry_list_count (&laundry),
PAGER_LOW_WATER_MARK, goal);
- profile_start ((uintptr_t) &pager_collect, __FUNCTION__);
+ profile_start ((uintptr_t) &pager_collect, __FUNCTION__, NULL);
/* Find a victim. */
struct activity *victim;
@@ -354,7 +354,7 @@ pager_collect (int goal)
return false;
}
- profile_region ("pager_collect(find victim)");
+ profile_region ("find victim");
victim = root_activity;
do