[PATCH (3.4) 0/2] value-pairs: Efficient sorting
In the two patches that will follow, a subtle, yet very important change will be made to the core of value-pairs: instead of building a GHashTable during a value_pairs_foreach(), we now build a binary tree, that's sorted at insert time, and traversing through it is much faster. This also buys us sorted keys almost free of charge! And once we have that, the second patch exports a value_pairs_foreach_sorted() function, so that modules who want to override the default alphabetical sort will be able to do so. -- |8]
Using a GTree instead of a GHashTable has the advantage of providing faster access and a sorted key list, both of which are desired features. Signed-off-by: Gergely Nagy <algernon@balabit.hu> --- lib/value-pairs.c | 25 ++++++++++++------------- 1 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/value-pairs.c b/lib/value-pairs.c index 6ecd810..c5649ea 100644 --- a/lib/value-pairs.c +++ b/lib/value-pairs.c @@ -184,7 +184,7 @@ vp_pairs_foreach(gpointer key, gpointer value, gpointer user_data) ValuePairs *vp = ((gpointer *)user_data)[0]; LogMessage *msg = ((gpointer *)user_data)[2]; gint32 seq_num = GPOINTER_TO_INT (((gpointer *)user_data)[3]); - GHashTable *scope_set = ((gpointer *)user_data)[5]; + GTree *scope_set = ((gpointer *)user_data)[5]; ScratchBuffer *sb = scratch_buffer_acquire(); log_template_format((LogTemplate *)value, msg, NULL, LTZ_LOCAL, @@ -196,7 +196,7 @@ vp_pairs_foreach(gpointer key, gpointer value, gpointer user_data) return; } - g_hash_table_insert(scope_set, vp_transform_apply (vp, key), sb_string(sb)->str); + g_tree_insert(scope_set, vp_transform_apply(vp, key), sb_string(sb)->str); g_string_steal(sb_string(sb)); scratch_buffer_release(sb); } @@ -208,7 +208,7 @@ vp_msg_nvpairs_foreach(NVHandle handle, gchar *name, gpointer user_data) { ValuePairs *vp = ((gpointer *)user_data)[0]; - GHashTable *scope_set = ((gpointer *)user_data)[5]; + GTree *scope_set = ((gpointer *)user_data)[5]; gint j; gboolean inc = FALSE; @@ -225,7 +225,7 @@ vp_msg_nvpairs_foreach(NVHandle handle, gchar *name, inc) { /* NOTE: the key is a borrowed reference in the hash, and value is freed */ - g_hash_table_insert(scope_set, vp_transform_apply(vp, name), g_strndup(value, value_len)); + g_tree_insert(scope_set, vp_transform_apply(vp, name), g_strndup(value, value_len)); } return FALSE; @@ -233,7 +233,7 @@ vp_msg_nvpairs_foreach(NVHandle handle, gchar *name, /* runs over a set of ValuePairSpec structs and merges them into the value-pair set */ static void -vp_merge_set(ValuePairs *vp, LogMessage *msg, gint32 seq_num, ValuePairSpec *set, GHashTable *dest) +vp_merge_set(ValuePairs *vp, LogMessage *msg, gint32 seq_num, ValuePairSpec *set, GTree *dest) { gint i; ScratchBuffer *sb = scratch_buffer_acquire(); @@ -273,7 +273,7 @@ vp_merge_set(ValuePairs *vp, LogMessage *msg, gint32 seq_num, ValuePairSpec *set if (!sb_string(sb)->str[0]) continue; - g_hash_table_insert(dest, vp_transform_apply(vp, set[i].name), sb_string(sb)->str); + g_tree_insert(dest, vp_transform_apply(vp, set[i].name), sb_string(sb)->str); g_string_steal(sb_string(sb)); } scratch_buffer_release(sb); @@ -284,12 +284,11 @@ value_pairs_foreach (ValuePairs *vp, VPForeachFunc func, LogMessage *msg, gint32 seq_num, gpointer user_data) { gpointer args[] = { vp, func, msg, GINT_TO_POINTER (seq_num), user_data, NULL }; - GHashTable *scope_set; - - scope_set = g_hash_table_new_full(g_str_hash, g_str_equal, - (GDestroyNotify) g_free, - (GDestroyNotify) g_free); + GTree *scope_set; + scope_set = g_tree_new_full((GCompareDataFunc)g_strcmp0, NULL, + (GDestroyNotify)g_free, + (GDestroyNotify)g_free); args[5] = scope_set; /* @@ -316,9 +315,9 @@ value_pairs_foreach (ValuePairs *vp, VPForeachFunc func, g_hash_table_foreach(vp->vpairs, (GHFunc) vp_pairs_foreach, args); /* Aaand we run it through the callback! */ - g_hash_table_foreach(scope_set, (GHFunc)func, user_data); + g_tree_foreach(scope_set, (GTraverseFunc)func, user_data); - g_hash_table_destroy(scope_set); + g_tree_destroy(scope_set); } -- 1.7.9
Export value_pairs_foreach_sorted(), so that if any module wants to use a custom sort function, they can do that. Signed-off-by: Gergely Nagy <algernon@balabit.hu> --- lib/value-pairs.c | 19 ++++++++++++++----- lib/value-pairs.h | 8 ++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/lib/value-pairs.c b/lib/value-pairs.c index c5649ea..671d4f4 100644 --- a/lib/value-pairs.c +++ b/lib/value-pairs.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2011 BalaBit IT Ltd, Budapest, Hungary - * Copyright (c) 2011 Gergely Nagy <algernon@balabit.hu> + * Copyright (c) 2011-2012 BalaBit IT Ltd, Budapest, Hungary + * Copyright (c) 2011-2012 Gergely Nagy <algernon@balabit.hu> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -280,13 +280,14 @@ vp_merge_set(ValuePairs *vp, LogMessage *msg, gint32 seq_num, ValuePairSpec *set } void -value_pairs_foreach (ValuePairs *vp, VPForeachFunc func, - LogMessage *msg, gint32 seq_num, gpointer user_data) +value_pairs_foreach_sorted (ValuePairs *vp, VPForeachFunc func, + GCompareDataFunc compare_func, + LogMessage *msg, gint32 seq_num, gpointer user_data) { gpointer args[] = { vp, func, msg, GINT_TO_POINTER (seq_num), user_data, NULL }; GTree *scope_set; - scope_set = g_tree_new_full((GCompareDataFunc)g_strcmp0, NULL, + scope_set = g_tree_new_full((GCompareDataFunc)compare_func, NULL, (GDestroyNotify)g_free, (GDestroyNotify)g_free); args[5] = scope_set; @@ -320,6 +321,14 @@ value_pairs_foreach (ValuePairs *vp, VPForeachFunc func, g_tree_destroy(scope_set); } +void +value_pairs_foreach(ValuePairs *vp, VPForeachFunc func, + LogMessage *msg, gint32 seq_num, + gpointer user_data) +{ + value_pairs_foreach_sorted(vp, func, (GCompareDataFunc)g_strcmp0, + msg, seq_num, user_data); +} static void value_pairs_init_set(ValuePairSpec *set) diff --git a/lib/value-pairs.h b/lib/value-pairs.h index 9b77266..1de97ca 100644 --- a/lib/value-pairs.h +++ b/lib/value-pairs.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2011 BalaBit IT Ltd, Budapest, Hungary - * Copyright (c) 2011 Gergely Nagy <algernon@balabit.hu> + * Copyright (c) 2011-2012 BalaBit IT Ltd, Budapest, Hungary + * Copyright (c) 2011-2012 Gergely Nagy <algernon@balabit.hu> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -37,6 +37,10 @@ void value_pairs_add_pair(ValuePairs *vp, GlobalConfig *cfg, const gchar *key, c void value_pairs_add_transforms(ValuePairs *vp, gpointer *vpts); +void value_pairs_foreach_sorted(ValuePairs *vp, VPForeachFunc func, + GCompareDataFunc compare_func, + LogMessage *msg, gint32 seq_num, + gpointer user_data); void value_pairs_foreach(ValuePairs *vp, VPForeachFunc func, LogMessage *msg, gint32 seq_num, gpointer user_data); -- 1.7.9
Gergely Nagy <algernon@balabit.hu> writes:
In the two patches that will follow, a subtle, yet very important change will be made to the core of value-pairs: instead of building a GHashTable during a value_pairs_foreach(), we now build a binary tree, that's sorted at insert time, and traversing through it is much faster.
This also buys us sorted keys almost free of charge!
Just did a quick benchmark, and bombarded both the current value-pairs and the GTree version with logs on 10 connections, outputting JSON to a file. The results are very close, within the margin of error, but the GTree version won by about 300 msg/sec more (which is about 0.7% of an increase). There's one other thing I want to do: the ValuePairs structure currently has a GHashTable for the explicitly specified key=value pairs. I will try to turn that into a pointer array instead. This has the advantage of being a lot more efficient than a hash table, with the downside that we allow dups (but due to the sorting, only the latest entry will be used, but all others will be evaluated too). I believe that such dups are going to be reasonably rare, so the performance hit this causes will be outweighted by the performance gain we get from dropping GHashTable. -- |8]
As hinted at earlier, I changed the internals of value-pairs to use a GPtrArray instead of a GHashTable, because of performance considerations, and other reasons. It turns out, that this is not as big a gain as I thought (my tests suggest that the improvement is well within the margin of error, after a dozen of test runs, each for 2 minutes of constant log sending, the difference is about 30 messages / sec). Nevertheless, I like PtrArrays better, not only due to the minor performance gain, but because I plan to reuse them later on for another feature. -- |8]
Instead of storing the name-value pairs explicitly set by the user in a GHashTable, store them in a GPtrArray. This has the advantage of being both lighter and faster to both insert and traverse, at the cost of making collissions more costy. If there are two entries with the same key, the templates of both will be evaluated, but only the latter will be used, which is a minor performance hit. But not using GHashTable outweights that. Signed-off-by: Gergely Nagy <algernon@balabit.hu> --- lib/value-pairs.c | 44 ++++++++++++++++++++++++++++++++------------ 1 files changed, 32 insertions(+), 12 deletions(-) diff --git a/lib/value-pairs.c b/lib/value-pairs.c index 671d4f4..86649dd 100644 --- a/lib/value-pairs.c +++ b/lib/value-pairs.c @@ -39,10 +39,16 @@ typedef struct gboolean include; } VPPatternSpec; +typedef struct +{ + gchar *name; + gchar *template; +} VPPairConf; + struct _ValuePairs { VPPatternSpec **patterns; - GHashTable *vpairs; + GPtrArray *vpairs; GList *transforms; /* guint32 as CfgFlagHandler only supports 32 bit integers */ @@ -148,11 +154,13 @@ value_pairs_add_glob_pattern(ValuePairs *vp, const gchar *pattern, void value_pairs_add_pair(ValuePairs *vp, GlobalConfig *cfg, const gchar *key, const gchar *value) { - LogTemplate *t = NULL; + VPPairConf *p = g_new(VPPairConf, 1); + + p->name = g_strdup(key); + p->template = log_template_new(cfg, NULL); + log_template_compile(p->template, value, NULL); - t = log_template_new(cfg, NULL); - log_template_compile(t, value, NULL); - g_hash_table_insert(vp->vpairs, g_strdup(key), t); + g_ptr_array_add(vp->vpairs, p); } static gchar * @@ -179,15 +187,16 @@ vp_transform_apply (ValuePairs *vp, gchar *key) /* runs over the name-value pairs requested by the user (e.g. with value_pairs_add_pair) */ static void -vp_pairs_foreach(gpointer key, gpointer value, gpointer user_data) +vp_pairs_foreach(gpointer data, gpointer user_data) { ValuePairs *vp = ((gpointer *)user_data)[0]; LogMessage *msg = ((gpointer *)user_data)[2]; gint32 seq_num = GPOINTER_TO_INT (((gpointer *)user_data)[3]); GTree *scope_set = ((gpointer *)user_data)[5]; ScratchBuffer *sb = scratch_buffer_acquire(); + VPPairConf *vpc = (VPPairConf *)data; - log_template_format((LogTemplate *)value, msg, NULL, LTZ_LOCAL, + log_template_format((LogTemplate *)vpc->template, msg, NULL, LTZ_LOCAL, seq_num, NULL, sb_string(sb)); if (!sb_string(sb)->str[0]) @@ -196,7 +205,8 @@ vp_pairs_foreach(gpointer key, gpointer value, gpointer user_data) return; } - g_tree_insert(scope_set, vp_transform_apply(vp, key), sb_string(sb)->str); + g_tree_insert(scope_set, vp_transform_apply(vp, vpc->name), + sb_string(sb)->str); g_string_steal(sb_string(sb)); scratch_buffer_release(sb); } @@ -313,7 +323,7 @@ value_pairs_foreach_sorted (ValuePairs *vp, VPForeachFunc func, vp_merge_set(vp, msg, seq_num, all_macros, scope_set); /* Merge the explicit key-value pairs too */ - g_hash_table_foreach(vp->vpairs, (GHFunc) vp_pairs_foreach, args); + g_ptr_array_foreach(vp->vpairs, (GFunc)vp_pairs_foreach, args); /* Aaand we run it through the callback! */ g_tree_foreach(scope_set, (GTraverseFunc)func, user_data); @@ -355,6 +365,16 @@ value_pairs_init_set(ValuePairSpec *set) } } +static void +vp_free_pair(gpointer data) +{ + VPPairConf *vpc = (VPPairConf *)data; + + log_template_unref(vpc->template); + g_free(vpc->name); + g_free(vpc); +} + ValuePairs * value_pairs_new(void) { @@ -363,8 +383,8 @@ value_pairs_new(void) GArray *a; vp = g_new0(ValuePairs, 1); - vp->vpairs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, - (GDestroyNotify) log_template_unref); + vp->vpairs = g_ptr_array_sized_new(8); + g_ptr_array_set_free_func(vp->vpairs, vp_free_pair); if (!value_pair_sets_initialized) { @@ -403,7 +423,7 @@ value_pairs_free (ValuePairs *vp) gint i; GList *l; - g_hash_table_destroy(vp->vpairs); + g_ptr_array_free(vp->vpairs, TRUE); for (i = 0; i < vp->patterns_size; i++) { -- 1.7.9
Hi, Applied all patches in the series to 3.4 master. Thanks Gergely. On Fri, 2012-02-10 at 17:35 +0100, Gergely Nagy wrote:
In the two patches that will follow, a subtle, yet very important change will be made to the core of value-pairs: instead of building a GHashTable during a value_pairs_foreach(), we now build a binary tree, that's sorted at insert time, and traversing through it is much faster.
This also buys us sorted keys almost free of charge!
And once we have that, the second patch exports a value_pairs_foreach_sorted() function, so that modules who want to override the default alphabetical sort will be able to do so.
-- |8]
______________________________________________________________________________ Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng FAQ: http://www.balabit.com/wiki/syslog-ng-faq
-- Bazsi
On Sat, 2012-02-25 at 17:03 +0100, Balazs Scheidler wrote:
Hi,
Applied all patches in the series to 3.4 master. Thanks Gergely.
I've applied some followup patches to make it compileable with old glib versions. Hope I didn't mess up anything. -- Bazsi
Balazs Scheidler <bazsi@balabit.hu> writes:
On Sat, 2012-02-25 at 17:03 +0100, Balazs Scheidler wrote:
Hi,
Applied all patches in the series to 3.4 master. Thanks Gergely.
I've applied some followup patches to make it compileable with old glib versions. Hope I didn't mess up anything.
I've had a look at the followups earlier today, and they seemed to be fine, I would've done pretty much the same if I didn't forget what glib version we're targetting O:) -- |8]
participants (2)
-
Balazs Scheidler
-
Gergely Nagy