[syslog-ng]performance test questions

Balazs Scheidler syslog-ng@lists.balabit.hu
Thu, 20 Feb 2003 18:15:16 +0100


--EVF5PPMfhYS0aIcm
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline

On Thu, Feb 20, 2003 at 04:47:53PM +0100, Roberto Nibali wrote:
> Hi,
> 
> >no, your feedback is appreciated.
> 
> Another thing is the rather high performance loss with enabled templates. I 
> understand that in case of templates, more of a message has to be inspected 
> but I'm asking myself if there would be a possibility to add some kind of a 
> caching mechanism?

I'm not really surprised the macro expansion has performance problems. When
I wrote that code only 6-8 macros existed, I've checked my table and it
contains 51 entries. All looked up sequentially. Very bad performance wise.

Attached you'll find a patch which changes this to a faster algorithm. I
have also tested it.

> >http://www.balabit.com/downloads/syslog-ng/1.5/src-snapshot/
> 
> Nice, except the hindering shockwave and html stuff around that on the 
> page. Would it be possible to export it to a txt-only directory (for the 
> links/lynx users)? ;)

Our new webmaster wanted to remove the old 'apache directory index' pages
specifically. You can grab the tree with wget however.

> 
> >Can you check if it works for you?
> 
> I'll compile and beat it tonight.

The new snapshot to be generated tomorrow at 6AM CET will contain the patch
I've attached now.

-- 
Bazsi
PGP info: KeyID 9AF8D0A9 Fingerprint CD27 CFB0 802C 0944 9CFD 804E C82C 8EB1

--EVF5PPMfhYS0aIcm
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: attachment; filename="macro-performance.diff"

Index: macros.c
===================================================================
RCS file: /var/cvs/syslog-ng/syslog-ng/src/macros.c,v
retrieving revision 1.1
diff -u -r1.1 macros.c
--- macros.c	31 Jan 2003 14:26:48 -0000	1.1
+++ macros.c	20 Feb 2003 16:56:31 -0000
@@ -63,6 +63,7 @@
 #include <string.h>
 #include <arpa/inet.h>
 #include <stdio.h>
+#include <assert.h>
 
 #include "format.h"
 #include "cfgfile.h"
@@ -73,10 +74,6 @@
 #define MAX_MACRO_ARGS        32
 #define MAX_EXPANDED_MACRO    2048
 
-struct macro_def {
-	char *name;
-	int id;
-};
 
 #define M_FACILITY 0
 #define M_LEVEL    1
@@ -413,88 +410,148 @@
 	default:
 		break;
 	}
-	if (length < 0 || length > *left) 
+	if (length < 0 || (unsigned int) length > *left) 
 		length = *left;
 
 	*left -= length;
 	*dest += length;
 }
 
+struct macro_def {
+	char *name;
+	int id;
+	int len;
+};
+
+static struct macro_def macros[] = {
+	{ "FACILITY", M_FACILITY },
+	{ "PRIORITY", M_LEVEL },
+	{ "LEVEL", M_LEVEL },
+	{ "TAG", M_TAG },
+
+	{ "DATE", M_DATE },
+	{ "FULLDATE", M_FULLDATE },
+	{ "ISODATE", M_ISODATE },
+	{ "YEAR", M_YEAR },
+	{ "MONTH", M_MONTH },
+	{ "DAY", M_DAY },
+	{ "HOUR", M_HOUR },
+	{ "MIN", M_MIN },
+	{ "SEC", M_SEC },
+	{ "WEEKDAY", M_WEEKDAY },
+	{ "UNIXTIME", M_UNIXTIME },
+	{ "TZOFFSET", M_TZOFFSET },
+	{ "TZ", M_TZ },
+
+	{ "R_DATE", M_DATE_RECVD },
+	{ "R_FULLDATE", M_FULLDATE_RECVD },
+	{ "R_ISODATE", M_ISODATE_RECVD },
+	{ "R_YEAR", M_YEAR_RECVD },
+	{ "R_MONTH", M_MONTH_RECVD },
+	{ "R_DAY", M_DAY_RECVD },
+	{ "R_HOUR", M_HOUR_RECVD },
+	{ "R_MIN", M_MIN_RECVD },
+	{ "R_SEC", M_SEC_RECVD },
+	{ "R_WEEKDAY", M_WEEKDAY_RECVD },
+	{ "R_UNIXTIME", M_UNIXTIME_RECVD },
+	{ "R_TZOFFSET", M_TZOFFSET_RECVD },
+	{ "R_TZ", M_TZ_RECVD },
+
+	{ "S_DATE", M_DATE_STAMP },
+	{ "S_FULLDATE", M_FULLDATE_STAMP },
+	{ "S_ISODATE", M_ISODATE_STAMP },
+	{ "S_YEAR", M_YEAR_STAMP },
+	{ "S_MONTH", M_MONTH_STAMP },
+	{ "S_DAY", M_DAY_STAMP },
+	{ "S_HOUR", M_HOUR_STAMP },
+	{ "S_MIN", M_MIN_STAMP },
+	{ "S_SEC", M_SEC_STAMP },
+	{ "S_WEEKDAY", M_WEEKDAY_STAMP },
+	{ "S_UNIXTIME", M_UNIXTIME_STAMP },
+	{ "S_TZOFFSET", M_TZOFFSET_STAMP },
+	{ "S_TZ", M_TZ_STAMP },
+	
+	{ "HOST_FROM", M_HOST_FROM },
+	{ "FULLHOST_FROM", M_FULLHOST_FROM },
+	{ "HOST", M_HOST },
+	{ "FULLHOST", M_FULLHOST },
+
+	{ "PROGRAM", M_PROGRAM },
+	{ "MSG", M_MESSAGE },
+	{ "MESSAGE", M_MESSAGE },
+	{ "SOURCEIP", M_SOURCE_IP }
+};
+
+static int macro_cache[sizeof(macros) / sizeof(struct macro_def)];
+static int macro_cache_inited = 0;
+
+int 
+cmpmacro(const void *k1, const void *k2)
+{
+	return strcmp(macros[*((int *) k1)].name, macros[*((int *) k2)].name);
+}
+
+void
+cache_macros(void)
+{
+	int i;
+	int num;
+	
+	num = sizeof(macros) / sizeof(struct macro_def);	
+	for (i = 0; i < num; i++) {
+		macro_cache[i] = i;
+		macros[i].len = strlen(macros[i].name);
+	}
+	qsort(macro_cache, num, sizeof(int), cmpmacro);
+}
+
+struct macro_def *
+find_macro(char *template, int template_left)
+{
+	int l, h, m, len, cmp;
+	
+	if (!macro_cache_inited) {
+		cache_macros();
+		macro_cache_inited = 1;
+	}
+	
+	l = 0; 
+	h = sizeof(macros) / sizeof(struct macro_def);
+	while (l <= h) {
+		m = (l + h) >> 1;
+		len = MIN(template_left, macros[macro_cache[m]].len);
+		
+		cmp = strncmp(template, macros[macro_cache[m]].name, len);
+		if (cmp == 0) {
+			return &macros[macro_cache[m]];
+		}
+		else if (cmp < 0) {
+			h = m - 1;
+		}
+		else if (cmp > 0) {
+			l = m + 1;
+		}
+	}
+	return NULL;
+}
+
 struct ol_string *
 expand_macros(struct syslog_config *cfg, struct ol_string *template, int template_escape, struct log_info *msg)
 {
-	static struct macro_def macros[] = {
-		{ "FACILITY", M_FACILITY },
-		{ "PRIORITY", M_LEVEL },
-		{ "LEVEL", M_LEVEL },
-		{ "TAG", M_TAG },
-
-		{ "DATE", M_DATE },
-		{ "FULLDATE", M_FULLDATE },
-		{ "ISODATE", M_ISODATE },
-		{ "YEAR", M_YEAR },
-		{ "MONTH", M_MONTH },
-		{ "DAY", M_DAY },
-		{ "HOUR", M_HOUR },
-		{ "MIN", M_MIN },
-		{ "SEC", M_SEC },
-		{ "WEEKDAY", M_WEEKDAY },		{ "UNIXTIME", M_UNIXTIME },
-		{ "TZOFFSET", M_TZOFFSET },
-		{ "TZ", M_TZ },
-
- 		{ "R_DATE", M_DATE_RECVD },
- 		{ "R_FULLDATE", M_FULLDATE_RECVD },
- 		{ "R_ISODATE", M_ISODATE_RECVD },
- 		{ "R_YEAR", M_YEAR_RECVD },
- 		{ "R_MONTH", M_MONTH_RECVD },
- 		{ "R_DAY", M_DAY_RECVD },
- 		{ "R_HOUR", M_HOUR_RECVD },
- 		{ "R_MIN", M_MIN_RECVD },
- 		{ "R_SEC", M_SEC_RECVD },
- 		{ "R_WEEKDAY", M_WEEKDAY_RECVD },
-		{ "R_UNIXTIME", M_UNIXTIME_RECVD },
-		{ "R_TZOFFSET", M_TZOFFSET_RECVD },
-		{ "R_TZ", M_TZ_RECVD },
- 
- 		{ "S_DATE", M_DATE_STAMP },
- 		{ "S_FULLDATE", M_FULLDATE_STAMP },
- 		{ "S_ISODATE", M_ISODATE_STAMP },
- 		{ "S_YEAR", M_YEAR_STAMP },
- 		{ "S_MONTH", M_MONTH_STAMP },
- 		{ "S_DAY", M_DAY_STAMP },
- 		{ "S_HOUR", M_HOUR_STAMP },
- 		{ "S_MIN", M_MIN_STAMP },
- 		{ "S_SEC", M_SEC_STAMP },
- 		{ "S_WEEKDAY", M_WEEKDAY_STAMP },
- 		{ "S_UNIXTIME", M_UNIXTIME_STAMP },
- 		{ "S_TZOFFSET", M_TZOFFSET_STAMP },
- 		{ "S_TZ", M_TZ_STAMP },
-		
-		{ "HOST_FROM", M_HOST_FROM },
-		{ "FULLHOST_FROM", M_FULLHOST_FROM },
-		{ "HOST", M_HOST },
-		{ "FULLHOST", M_FULLHOST },
-
-		{ "PROGRAM", M_PROGRAM },
-		{ "MSG", M_MESSAGE },
-		{ "MESSAGE", M_MESSAGE },
-		{ "SOURCEIP", M_SOURCE_IP }
-	};
 	char format[cfg->log_msg_size + 1], *format_ptr = format;
 	unsigned int left = sizeof(format) - 1;
-	unsigned int i, j;
+	unsigned int i;
 
 	i = 0;
 	while (left && (i < template->length)) {
 
 		if (template->data[i] == '$') {
 			/* beginning of a macro */
-			for (j = 0; j < (sizeof(macros) / sizeof(struct macro_def)); j++) {
-				if (strncmp(macros[j].name, &template->data[i + 1], strlen(macros[j].name)) == 0) {
-					break;
-				}
-			}
-			if (j == (sizeof(macros) / sizeof(struct macro_def))) {
+			struct macro_def *m;
+			
+			m = find_macro(&template->data[i+1], template->length - i - 1);
+			if (m == NULL) {
+				/* skip unknown macro */
 				i++;
 				while ((template->data[i] >= 'A' && 
 					template->data[i] <= 'Z') ||
@@ -502,8 +559,8 @@
 					i++;
 			}
 			else {
-				i += strlen(macros[j].name) + 1;
-				expand_macro(cfg, macros[j].id, template_escape, &format_ptr, &left, msg);
+				i += m->len + 1;
+				expand_macro(cfg, m->id, template_escape, &format_ptr, &left, msg);
 			}
 		}
 		else {

--EVF5PPMfhYS0aIcm--