[syslog-ng] [patch] Add filters to strip unwanted data from incoming syslog messages.

William Pitcock nenolod at sacredspiral.co.uk
Mon Jul 9 13:51:13 CEST 2007


Hello,

A project that I am involved with has requirements for the ability to 
strip personally identifiable data from their syslogs. The attached patch 
allows us to accomplish this functionality.

I believe it would be useful if it is included in upstream syslog-ng. 
Please consider it for inclusion in the next release.

Thanks in advance,
   - William
-------------- next part --------------
diff -urN syslog-ng-2.0.4.orig/doc/examples/syslog-ng-anon.conf syslog-ng-2.0.4/doc/examples/syslog-ng-anon.conf
--- syslog-ng-2.0.4.orig/doc/examples/syslog-ng-anon.conf	1969-12-31 18:00:00.000000000 -0600
+++ syslog-ng-2.0.4/doc/examples/syslog-ng-anon.conf	2007-07-08 23:32:28.000000000 -0500
@@ -0,0 +1,243 @@
+#
+# Configuration file for syslog-ng under Debian.
+# Customized for riseup.net using syslog-ng-anon patch
+# (http://dev.riseup.net/patches/syslog-ng/)
+#
+# see http://www.campin.net/syslog-ng/expanded-syslog-ng.conf
+# for examples.
+#
+# levels: emerg alert crit err warning notice info debug
+#
+
+############################################################
+## global options
+
+options {
+    chain_hostnames(0);
+    time_reopen(10);
+    time_reap(360);
+    sync(0);
+    log_fifo_size(2048);
+    create_dirs(yes);
+    group(adm);
+    perm(0640);
+    dir_perm(0755);
+    use_dns(no);
+};
+
+############################################################
+## universal source
+
+source s_all {
+    internal();
+    unix-stream("/dev/log");
+    file("/proc/kmsg" log_prefix("kernel: "));
+};
+
+############################################################
+## generic destinations
+
+destination df_facility_dot_info   { file("/var/log/$FACILITY.info");   };
+destination df_facility_dot_notice { file("/var/log/$FACILITY.notice"); };
+destination df_facility_dot_warn   { file("/var/log/$FACILITY.warn");   };
+destination df_facility_dot_err    { file("/var/log/$FACILITY.err");    };
+destination df_facility_dot_crit   { file("/var/log/$FACILITY.crit");   };
+
+############################################################
+## generic filters
+
+filter f_strip { strip(ips); };
+filter f_at_least_info   { level(info..emerg);   };
+filter f_at_least_notice { level(notice..emerg); };
+filter f_at_least_warn   { level(warn..emerg);   };
+filter f_at_least_err    { level(err..emerg);    };
+filter f_at_least_crit   { level(crit..emerg);   };
+
+############################################################
+## auth.log
+
+filter f_auth { facility(auth, authpriv); };
+destination df_auth { file("/var/log/auth.log"); };
+log {
+    source(s_all);
+    filter(f_auth);
+    destination(df_auth);
+};
+
+############################################################
+## daemon.log
+
+filter f_daemon { facility(daemon); };
+destination df_daemon { file("/var/log/daemon.log"); };
+log {
+    source(s_all);
+    filter(f_daemon);
+    destination(df_daemon);
+};
+
+############################################################
+## kern.log
+
+filter f_kern { facility(kern); };
+destination df_kern { file("/var/log/kern.log"); };
+log {
+    source(s_all);
+    filter(f_kern);
+    destination(df_kern);
+};
+
+############################################################
+## user.log
+
+filter f_user { facility(user); };
+destination df_user { file("/var/log/user.log"); };
+log {
+    source(s_all);
+    filter(f_user);
+    destination(df_user);
+};
+
+############################################################
+## sympa.log
+
+filter f_sympa { program("^(sympa|bounced|archived|task_manager)"); };
+destination d_sympa { file("/var/log/sympa.log"); };
+log {
+	source(s_all);
+	filter(f_sympa);
+	destination(d_sympa);
+	flags(final);
+};
+
+############################################################
+## wwsympa.log
+
+filter f_wwsympa { program("^wwsympa"); };
+destination d_wwsympa { file("/var/log/wwsympa.log"); };
+log {
+	source(s_all);
+	filter(f_wwsympa);
+	filter(f_strip);
+	destination(d_wwsympa);
+	flags(final);
+};
+
+############################################################
+## ldap.log
+
+filter f_ldap { program("slapd"); };
+destination d_ldap { file("/var/log/ldap.log"); };
+log {
+	source(s_all);
+	filter(f_ldap);
+	destination(d_ldap);
+	flags(final);
+};
+
+############################################################
+## postfix.log
+
+# special source because of chroot jail
+#source s_postfix { unix-stream("/var/spool/postfix/dev/log" keep-alive(yes)); }; 
+filter f_postfix { program("^postfix/"); };
+destination d_postfix { file("/var/log/postfix.log"); };
+log {
+	source(s_all);
+	filter(f_postfix);
+	filter(f_strip);
+	destination(d_postfix);
+	flags(final);
+};
+
+############################################################
+## courier.log
+
+filter f_courier { program("courier|imap|pop"); };
+destination d_courier { file("/var/log/courier.log"); };
+log {
+	source(s_all);
+	filter(f_courier);
+	filter(f_strip);
+	destination(d_courier);
+	flags(final);
+};
+
+############################################################
+## maildrop.log
+
+filter f_maildrop { program("^maildrop"); };
+destination d_maildrop { file("/var/log/maildrop.log"); };
+log {
+	source(s_all);
+	filter(f_maildrop);
+	destination(d_courier);
+	flags(final);
+};
+
+############################################################
+## mail.log
+
+filter f_mail { facility(mail); };
+destination df_mail { file("/var/log/mail.log"); };
+
+log {
+    source(s_all);
+    filter(f_mail);
+    destination(df_mail);
+};
+
+############################################################
+## messages.log
+
+filter f_messages {
+	level(debug,info,notice)
+	and not facility(auth,authpriv,daemon,mail,user,kern);
+};
+destination df_messages { file("/var/log/messages.log"); };
+log {
+    source(s_all);
+    filter(f_messages);
+    destination(df_messages);
+};
+
+############################################################
+## errors.log
+
+filter f_errors {
+	level(warn,err,crit,alert,emerg)
+	and not facility(auth,authpriv,daemon,mail,user,kern);
+};
+destination df_errors { file("/var/log/errors.log"); };
+log {
+	source(s_all);
+	filter(f_errors);
+	destination(df_errors);
+};
+
+############################################################
+## emergencies
+
+filter f_emerg { level(emerg); };
+destination du_all { usertty("*"); };
+log {
+	source(s_all);
+	filter(f_emerg);
+	destination(du_all);
+};
+
+############################################################
+## console messages
+
+filter f_xconsole {
+    facility(daemon,mail)
+    or level(debug,info,notice,warn)
+    or (facility(news)
+    and level(crit,err,notice));
+};
+destination dp_xconsole { pipe("/dev/xconsole"); };
+log {
+    source(s_all);
+    filter(f_xconsole);
+    destination(dp_xconsole);
+};
+
diff -urN syslog-ng-2.0.4.orig/doc/Makefile.am syslog-ng-2.0.4/doc/Makefile.am
--- syslog-ng-2.0.4.orig/doc/Makefile.am	2007-04-19 14:37:16.000000000 -0500
+++ syslog-ng-2.0.4/doc/Makefile.am	2007-07-08 23:34:14.000000000 -0500
@@ -6,8 +6,10 @@
 	reference/syslog-ng.xml \
 	reference/syslog-ng.txt \
 	reference/syslog-ng.xsl \
+	reference/README.syslog-ng-anon \
 	examples/syslog-ng.conf.sample \
-	examples/syslog-ng.conf.solaris
+	examples/syslog-ng.conf.solaris \
+	examples/syslog-ng-anon.conf
 
 man_MANS = man/syslog-ng.8 man/syslog-ng.conf.5
 
diff -urN syslog-ng-2.0.4.orig/doc/Makefile.in syslog-ng-2.0.4/doc/Makefile.in
--- syslog-ng-2.0.4.orig/doc/Makefile.in	2007-05-15 09:40:53.000000000 -0500
+++ syslog-ng-2.0.4/doc/Makefile.in	2007-07-08 23:35:39.000000000 -0500
@@ -135,8 +135,10 @@
 	reference/syslog-ng.xml \
 	reference/syslog-ng.txt \
 	reference/syslog-ng.xsl \
+	reference/README.syslog-ng-anon \
 	examples/syslog-ng.conf.sample \
-	examples/syslog-ng.conf.solaris
+	examples/syslog-ng.conf.solaris \
+	examples/syslog-ng-anon.conf
 
 
 man_MANS = man/syslog-ng.8 man/syslog-ng.conf.5
diff -urN syslog-ng-2.0.4.orig/doc/reference/README.syslog-ng-anon syslog-ng-2.0.4/doc/reference/README.syslog-ng-anon
--- syslog-ng-2.0.4.orig/doc/reference/README.syslog-ng-anon	1969-12-31 18:00:00.000000000 -0600
+++ syslog-ng-2.0.4/doc/reference/README.syslog-ng-anon	2007-07-08 23:32:09.000000000 -0500
@@ -0,0 +1,93 @@
+syslog-ng-anon
+
+ This patch adds the capability to syslog-ng that allows you to strip
+ out any given regexp or all IP addresses from log messages before
+ they are written to disk. The goal is to give the system administrator
+ the means to implement site logging policies, by allowing them easy
+ control over exactly what data they retain in their logfiles,
+ regardless of what a particular daemon might think is best.
+
+Background:
+
+ Data retention has become a hot legal topic for ISPs and other Online
+ Service Providers (OSPs). There are many instances where it is preferable
+ to keep less information on users than is collected by default on many
+ systems. In the United States it is not currently required to retain
+ data on users of a server, but you may be required to provide all data
+ on a user which you have retained. OSPs can protect themselves from legal
+ hassles and added work by choosing what data they wish to retain.
+
+ From "Best Practices for Online Service Providers"
+ (http://www.eff.org/osp):
+
+  As an intermediary, the OSP [Online Service Provider] finds itself in
+  a position to collect and store detailed information about its users
+  and their online activities that may be of great interest to third
+  parties. The USA PATRIOT Act also provides the government with
+  expanded powers to request this information. As a result, OSP owners
+  must deal with requests from law enforcement and lawyers to hand over
+  private user information and logs. Yet, compliance with these demands
+  takes away from an OSP's goal of providing users with reliable,
+  secure network services. In this paper, EFF offers some suggestions,
+  both legal and technical, for best practices that balance the needs
+  of OSPs and their users' privacy and civil liberties.
+ 
+  Rather than scrubbing the information you don't want in logs, this patch
+  ensures that the information is never written to disk. Also, for those 
+  daemons which log through syslog facilities, this patch provides a 
+  convenient single configuration to limit what you wish to log.
+  
+  Here are some related links:
+  
+  Best Practices for Online Service Providers
+  http://www.eff.org/osp
+  http://www.eff.org/osp/20040819_OSPBestPractices.pdf
+  
+  EPIC International Data Retention Page
+  http://www.epic.org/privacy/intl/data_retention.html
+  
+  Working Paper on Usage Log Data Management (from Computer, Freedom, and 
+  Privacy conference) http://cryptome.org/usage-logs.htm
+  
+
+Installing syslog-ng-anon 
+  
+ Applying the patch
+
+  This patch has been tested against the following versions of syslog-ng:
+ 	. version 1.6.7
+ 	. Debian package syslog-ng_1.6.7-2
+
+
+  To use this patch, obtain the source for syslog-ng 
+  (http://www.balabit.com/downloads/syslog-ng/1.6/src/) and the latest
+  syslog-ng-anon patch (http://dev.riseup.net/patches/syslog-ng/). 
+  Uncompress the syslog-ng source and then apply the patch:
+
+  % tar -zxvf syslog-ng.tar.gz
+  % cd syslog-ng
+  % patch -p1 < syslog-ng-anon.diff
+ 
+  Then compile and install syslog-ng as normal.
+
+ Debian package
+
+  Alternately, you can install syslog-ng-anon from this repository:
+  deb http://deb.riseup.net/debian unstable main
+
+ How to use it
+
+  This patch adds the filter "strip". For example:
+
+ 	filter f_strip {strip(<regexp>);};
+
+  This will strip out all matches of the regular expression on logs to
+  which the filter is applied and replaces all matches with the fixed length
+  four dashes ("----").
+
+  In place of a regular expression, you can put "ips", which will replace all
+  internet addresses with 0.0.0.0. For example:
+
+ 	filter f_strip {strip(ips);};
+
+  You can alter what the replacement strings are by using replace:
diff -urN syslog-ng-2.0.4.orig/src/cfg-grammar.y syslog-ng-2.0.4/src/cfg-grammar.y
--- syslog-ng-2.0.4.orig/src/cfg-grammar.y	2007-04-20 15:24:08.000000000 -0500
+++ syslog-ng-2.0.4/src/cfg-grammar.y	2007-07-08 23:38:07.000000000 -0500
@@ -107,7 +107,7 @@
 %token KW_USE_TIME_RECVD
 
 /* filter items*/
-%token KW_FACILITY KW_LEVEL KW_HOST KW_MATCH KW_NETMASK
+%token KW_FACILITY KW_LEVEL KW_HOST KW_MATCH KW_NETMASK KW_STRIP KW_REPLACE
 
 /* yes/no switches */
 %token KW_YES KW_NO
@@ -803,6 +803,8 @@
 	| KW_PROGRAM '(' string ')'		{ $$ = filter_prog_new($3); free($3); }
 	| KW_HOST '(' string ')'		{ $$ = filter_host_new($3); free($3); }	
 	| KW_MATCH '(' string ')'		{ $$ = filter_match_new($3); free($3); }
+	| KW_STRIP '(' string ')'		{ $$ = filter_strip_new($3); free($3); }
+	| KW_REPLACE '(' string string ')'	{ $$ = filter_replace_new($3, $4); free($3); free($4); }
 	| KW_FILTER '(' string ')'		{ $$ = filter_call_new($3, configuration); free($3); }
 	| KW_NETMASK '(' string ')'		{ $$ = filter_netmask_new($3); free($3); }
 	;
@@ -908,4 +910,4 @@
   last_reader_options = NULL;
   last_writer_options = NULL;
   last_template = NULL;
-}
\ No newline at end of file
+}
diff -urN syslog-ng-2.0.4.orig/src/cfg-lex.l syslog-ng-2.0.4/src/cfg-lex.l
--- syslog-ng-2.0.4.orig/src/cfg-lex.l	2007-04-19 14:37:16.000000000 -0500
+++ syslog-ng-2.0.4/src/cfg-lex.l	2007-07-08 23:38:51.000000000 -0500
@@ -165,6 +165,8 @@
         { "host",               KW_HOST },
         { "match",		KW_MATCH },
         { "netmask",		KW_NETMASK },
+	{ "strip",		KW_STRIP },
+	{ "replace",		KW_REPLACE },
 
 	/* on/off switches */
 	{ "yes",		KW_YES },
diff -urN syslog-ng-2.0.4.orig/src/filter.c syslog-ng-2.0.4/src/filter.c
--- syslog-ng-2.0.4.orig/src/filter.c	2007-04-29 11:59:54.000000000 -0500
+++ syslog-ng-2.0.4/src/filter.c	2007-07-09 00:29:40.000000000 -0500
@@ -226,6 +226,7 @@
 typedef struct _FilterRE
 {
   FilterExprNode super;
+  GString *replace;
   regex_t regex;
 } FilterRE;
 
@@ -310,6 +311,9 @@
 filter_re_free(FilterExprNode *s)
 {
   FilterRE *self = (FilterRE *) s;
+
+  if (self->replace != NULL)
+    g_string_free(self->replace, TRUE);
   
   regfree(&self->regex);
   g_free(s);
@@ -494,3 +498,88 @@
   self->super.eval = filter_netmask_eval;
   return &self->super;
 }
+
+FilterExprNode *
+filter_strip_new(const gchar *re)
+{
+  if (g_ascii_strcasecmp(re, "ips") == 0)
+    return filter_replace_new(re, "0.0.0.0");
+
+  return filter_replace_new(re, "----");
+}
+
+#define FMIN(a, b) (a) < (b) ? (a) : (b)
+#define NEW_MSG_SIZE 2048
+
+static gboolean
+filter_replace_eval(FilterExprNode *s, LogMessage *log)
+{
+  FilterRE *self = (FilterRE *) s;
+  gchar *buffer = log->msg.str;
+  gint snippet_size;
+  regmatch_t pmatch;
+  gchar new_msg[NEW_MSG_SIZE];
+  gchar *new_msg_max = new_msg + NEW_MSG_SIZE;
+  gchar *new_msg_ptr = new_msg;
+  gint replace_length = self->replace->len;
+  gint error;
+
+  error = regexec(&self->regex, buffer, 1, &pmatch, 0);
+  if (error)
+    return TRUE;
+  while (!error)
+    {
+      /* copy string snippet which preceeds matched text */
+      snippet_size = FMIN(pmatch.rm_so, new_msg_max - new_msg_ptr);
+      memcpy(new_msg_ptr, buffer, snippet_size);
+      new_msg_ptr += snippet_size;
+
+      /* copy replacement */
+      snippet_size = FMIN(replace_length, new_msg_max - new_msg_ptr);
+      memcpy(new_msg_ptr, self->replace->str, snippet_size);
+      new_msg_ptr += snippet_size;
+
+      /* search for next match */
+      buffer += pmatch.rm_eo;
+      error = regexec(&self->regex, buffer, 1, &pmatch, REG_NOTBOL);
+    }
+
+  /* copy the rest of the old message */
+  snippet_size = FMIN(log->msg.len, new_msg_max - new_msg_ptr);
+  memcpy(new_msg_ptr, buffer, snippet_size);
+  new_msg_ptr += snippet_size;
+
+  g_string_erase(&log->msg, 0, -1);
+  g_string_append(&log->msg, new_msg);
+
+  return TRUE;
+}
+
+FilterExprNode *
+filter_replace_new(const gchar *re, const gchar *replacement)
+{
+  FilterRE *self = g_new0(FilterRE, 1);
+  gint regerr;
+
+  if (!g_ascii_strcasecmp(re, "ips"))
+    re = "25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])([\\.\\-](25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])){3}";
+
+  regerr = regcomp(&self->regex, re, REG_ICASE | REG_EXTENDED);
+  if (regerr)
+    {
+      gchar errorbuf[256];
+      regerror(regerr, &self->regex, errorbuf, sizeof(errorbuf));
+      msg_error("Error compiling regular expression:",
+                evt_tag_str("re", re),
+                evt_tag_str("error", errorbuf),
+                NULL);
+      g_free(self);
+      return NULL;
+    }
+
+  self->replace = g_string_new(replacement);
+  self->super.eval = filter_replace_eval;
+  self->super.free_fn = filter_re_free;
+
+  return &self->super;
+}
diff -urN syslog-ng-2.0.4.orig/src/filter.h syslog-ng-2.0.4/src/filter.h
--- syslog-ng-2.0.4.orig/src/filter.h	2007-04-19 14:37:16.000000000 -0500
+++ syslog-ng-2.0.4/src/filter.h	2007-07-09 00:10:57.000000000 -0500
@@ -54,6 +54,8 @@
 FilterExprNode *filter_match_new(gchar *re);
 FilterExprNode *filter_call_new(gchar *rule, struct _GlobalConfig *cfg);
 FilterExprNode *filter_netmask_new(gchar *cidr);
+FilterExprNode *filter_strip_new(const gchar *re);
+FilterExprNode *filter_replace_new(const gchar *re, const gchar *replacement);
 
 typedef struct _LogFilterRule
 {


More information about the syslog-ng mailing list