[syslog-ng] [patch] Add filters to strip unwanted data from incoming
syslog messages.
William Pitcock
nenolod at sacredspiral.co.uk
Mon Jul 9 13:51:13 CEST 2007
Hello,
A project that I am involved with has requirements for the ability to
strip personally identifiable data from their syslogs. The attached patch
allows us to accomplish this functionality.
I believe it would be useful if it is included in upstream syslog-ng.
Please consider it for inclusion in the next release.
Thanks in advance,
- William
-------------- next part --------------
diff -urN syslog-ng-2.0.4.orig/doc/examples/syslog-ng-anon.conf syslog-ng-2.0.4/doc/examples/syslog-ng-anon.conf
--- syslog-ng-2.0.4.orig/doc/examples/syslog-ng-anon.conf 1969-12-31 18:00:00.000000000 -0600
+++ syslog-ng-2.0.4/doc/examples/syslog-ng-anon.conf 2007-07-08 23:32:28.000000000 -0500
@@ -0,0 +1,243 @@
+#
+# Configuration file for syslog-ng under Debian.
+# Customized for riseup.net using syslog-ng-anon patch
+# (http://dev.riseup.net/patches/syslog-ng/)
+#
+# see http://www.campin.net/syslog-ng/expanded-syslog-ng.conf
+# for examples.
+#
+# levels: emerg alert crit err warning notice info debug
+#
+
+############################################################
+## global options
+
+options {
+ chain_hostnames(0);
+ time_reopen(10);
+ time_reap(360);
+ sync(0);
+ log_fifo_size(2048);
+ create_dirs(yes);
+ group(adm);
+ perm(0640);
+ dir_perm(0755);
+ use_dns(no);
+};
+
+############################################################
+## universal source
+
+source s_all {
+ internal();
+ unix-stream("/dev/log");
+ file("/proc/kmsg" log_prefix("kernel: "));
+};
+
+############################################################
+## generic destinations
+
+destination df_facility_dot_info { file("/var/log/$FACILITY.info"); };
+destination df_facility_dot_notice { file("/var/log/$FACILITY.notice"); };
+destination df_facility_dot_warn { file("/var/log/$FACILITY.warn"); };
+destination df_facility_dot_err { file("/var/log/$FACILITY.err"); };
+destination df_facility_dot_crit { file("/var/log/$FACILITY.crit"); };
+
+############################################################
+## generic filters
+
+filter f_strip { strip(ips); };
+filter f_at_least_info { level(info..emerg); };
+filter f_at_least_notice { level(notice..emerg); };
+filter f_at_least_warn { level(warn..emerg); };
+filter f_at_least_err { level(err..emerg); };
+filter f_at_least_crit { level(crit..emerg); };
+
+############################################################
+## auth.log
+
+filter f_auth { facility(auth, authpriv); };
+destination df_auth { file("/var/log/auth.log"); };
+log {
+ source(s_all);
+ filter(f_auth);
+ destination(df_auth);
+};
+
+############################################################
+## daemon.log
+
+filter f_daemon { facility(daemon); };
+destination df_daemon { file("/var/log/daemon.log"); };
+log {
+ source(s_all);
+ filter(f_daemon);
+ destination(df_daemon);
+};
+
+############################################################
+## kern.log
+
+filter f_kern { facility(kern); };
+destination df_kern { file("/var/log/kern.log"); };
+log {
+ source(s_all);
+ filter(f_kern);
+ destination(df_kern);
+};
+
+############################################################
+## user.log
+
+filter f_user { facility(user); };
+destination df_user { file("/var/log/user.log"); };
+log {
+ source(s_all);
+ filter(f_user);
+ destination(df_user);
+};
+
+############################################################
+## sympa.log
+
+filter f_sympa { program("^(sympa|bounced|archived|task_manager)"); };
+destination d_sympa { file("/var/log/sympa.log"); };
+log {
+ source(s_all);
+ filter(f_sympa);
+ destination(d_sympa);
+ flags(final);
+};
+
+############################################################
+## wwsympa.log
+
+filter f_wwsympa { program("^wwsympa"); };
+destination d_wwsympa { file("/var/log/wwsympa.log"); };
+log {
+ source(s_all);
+ filter(f_wwsympa);
+ filter(f_strip);
+ destination(d_wwsympa);
+ flags(final);
+};
+
+############################################################
+## ldap.log
+
+filter f_ldap { program("slapd"); };
+destination d_ldap { file("/var/log/ldap.log"); };
+log {
+ source(s_all);
+ filter(f_ldap);
+ destination(d_ldap);
+ flags(final);
+};
+
+############################################################
+## postfix.log
+
+# special source because of chroot jail
+#source s_postfix { unix-stream("/var/spool/postfix/dev/log" keep-alive(yes)); };
+filter f_postfix { program("^postfix/"); };
+destination d_postfix { file("/var/log/postfix.log"); };
+log {
+ source(s_all);
+ filter(f_postfix);
+ filter(f_strip);
+ destination(d_postfix);
+ flags(final);
+};
+
+############################################################
+## courier.log
+
+filter f_courier { program("courier|imap|pop"); };
+destination d_courier { file("/var/log/courier.log"); };
+log {
+ source(s_all);
+ filter(f_courier);
+ filter(f_strip);
+ destination(d_courier);
+ flags(final);
+};
+
+############################################################
+## maildrop.log
+
+filter f_maildrop { program("^maildrop"); };
+destination d_maildrop { file("/var/log/maildrop.log"); };
+log {
+ source(s_all);
+ filter(f_maildrop);
+ destination(d_courier);
+ flags(final);
+};
+
+############################################################
+## mail.log
+
+filter f_mail { facility(mail); };
+destination df_mail { file("/var/log/mail.log"); };
+
+log {
+ source(s_all);
+ filter(f_mail);
+ destination(df_mail);
+};
+
+############################################################
+## messages.log
+
+filter f_messages {
+ level(debug,info,notice)
+ and not facility(auth,authpriv,daemon,mail,user,kern);
+};
+destination df_messages { file("/var/log/messages.log"); };
+log {
+ source(s_all);
+ filter(f_messages);
+ destination(df_messages);
+};
+
+############################################################
+## errors.log
+
+filter f_errors {
+ level(warn,err,crit,alert,emerg)
+ and not facility(auth,authpriv,daemon,mail,user,kern);
+};
+destination df_errors { file("/var/log/errors.log"); };
+log {
+ source(s_all);
+ filter(f_errors);
+ destination(df_errors);
+};
+
+############################################################
+## emergencies
+
+filter f_emerg { level(emerg); };
+destination du_all { usertty("*"); };
+log {
+ source(s_all);
+ filter(f_emerg);
+ destination(du_all);
+};
+
+############################################################
+## console messages
+
+filter f_xconsole {
+ facility(daemon,mail)
+ or level(debug,info,notice,warn)
+ or (facility(news)
+ and level(crit,err,notice));
+};
+destination dp_xconsole { pipe("/dev/xconsole"); };
+log {
+ source(s_all);
+ filter(f_xconsole);
+ destination(dp_xconsole);
+};
+
diff -urN syslog-ng-2.0.4.orig/doc/Makefile.am syslog-ng-2.0.4/doc/Makefile.am
--- syslog-ng-2.0.4.orig/doc/Makefile.am 2007-04-19 14:37:16.000000000 -0500
+++ syslog-ng-2.0.4/doc/Makefile.am 2007-07-08 23:34:14.000000000 -0500
@@ -6,8 +6,10 @@
reference/syslog-ng.xml \
reference/syslog-ng.txt \
reference/syslog-ng.xsl \
+ reference/README.syslog-ng-anon \
examples/syslog-ng.conf.sample \
- examples/syslog-ng.conf.solaris
+ examples/syslog-ng.conf.solaris \
+ examples/syslog-ng-anon.conf
man_MANS = man/syslog-ng.8 man/syslog-ng.conf.5
diff -urN syslog-ng-2.0.4.orig/doc/Makefile.in syslog-ng-2.0.4/doc/Makefile.in
--- syslog-ng-2.0.4.orig/doc/Makefile.in 2007-05-15 09:40:53.000000000 -0500
+++ syslog-ng-2.0.4/doc/Makefile.in 2007-07-08 23:35:39.000000000 -0500
@@ -135,8 +135,10 @@
reference/syslog-ng.xml \
reference/syslog-ng.txt \
reference/syslog-ng.xsl \
+ reference/README.syslog-ng-anon \
examples/syslog-ng.conf.sample \
- examples/syslog-ng.conf.solaris
+ examples/syslog-ng.conf.solaris \
+ examples/syslog-ng-anon.conf
man_MANS = man/syslog-ng.8 man/syslog-ng.conf.5
diff -urN syslog-ng-2.0.4.orig/doc/reference/README.syslog-ng-anon syslog-ng-2.0.4/doc/reference/README.syslog-ng-anon
--- syslog-ng-2.0.4.orig/doc/reference/README.syslog-ng-anon 1969-12-31 18:00:00.000000000 -0600
+++ syslog-ng-2.0.4/doc/reference/README.syslog-ng-anon 2007-07-08 23:32:09.000000000 -0500
@@ -0,0 +1,93 @@
+syslog-ng-anon
+
+ This patch adds the capability to syslog-ng that allows you to strip
+ out any given regexp or all IP addresses from log messages before
+ they are written to disk. The goal is to give the system administrator
+ the means to implement site logging policies, by allowing them easy
+ control over exactly what data they retain in their logfiles,
+ regardless of what a particular daemon might think is best.
+
+Background:
+
+ Data retention has become a hot legal topic for ISPs and other Online
+ Service Providers (OSPs). There are many instances where it is preferable
+ to keep less information on users than is collected by default on many
+ systems. In the United States it is not currently required to retain
+ data on users of a server, but you may be required to provide all data
+ on a user which you have retained. OSPs can protect themselves from legal
+ hassles and added work by choosing what data they wish to retain.
+
+ From "Best Practices for Online Service Providers"
+ (http://www.eff.org/osp):
+
+ As an intermediary, the OSP [Online Service Provider] finds itself in
+ a position to collect and store detailed information about its users
+ and their online activities that may be of great interest to third
+ parties. The USA PATRIOT Act also provides the government with
+ expanded powers to request this information. As a result, OSP owners
+ must deal with requests from law enforcement and lawyers to hand over
+ private user information and logs. Yet, compliance with these demands
+ takes away from an OSP's goal of providing users with reliable,
+ secure network services. In this paper, EFF offers some suggestions,
+ both legal and technical, for best practices that balance the needs
+ of OSPs and their users' privacy and civil liberties.
+
+ Rather than scrubbing the information you don't want in logs, this patch
+ ensures that the information is never written to disk. Also, for those
+ daemons which log through syslog facilities, this patch provides a
+ convenient single configuration to limit what you wish to log.
+
+ Here are some related links:
+
+ Best Practices for Online Service Providers
+ http://www.eff.org/osp
+ http://www.eff.org/osp/20040819_OSPBestPractices.pdf
+
+ EPIC International Data Retention Page
+ http://www.epic.org/privacy/intl/data_retention.html
+
+ Working Paper on Usage Log Data Management (from Computer, Freedom, and
+ Privacy conference) http://cryptome.org/usage-logs.htm
+
+
+Installing syslog-ng-anon
+
+ Applying the patch
+
+ This patch has been tested against the following versions of syslog-ng:
+ . version 1.6.7
+ . Debian package syslog-ng_1.6.7-2
+
+
+ To use this patch, obtain the source for syslog-ng
+ (http://www.balabit.com/downloads/syslog-ng/1.6/src/) and the latest
+ syslog-ng-anon patch (http://dev.riseup.net/patches/syslog-ng/).
+ Uncompress the syslog-ng source and then apply the patch:
+
+ % tar -zxvf syslog-ng.tar.gz
+ % cd syslog-ng
+ % patch -p1 < syslog-ng-anon.diff
+
+ Then compile and install syslog-ng as normal.
+
+ Debian package
+
+ Alternately, you can install syslog-ng-anon from this repository:
+ deb http://deb.riseup.net/debian unstable main
+
+ How to use it
+
+ This patch adds the filter "strip". For example:
+
+ filter f_strip {strip(<regexp>);};
+
+ This will strip out all matches of the regular expression on logs to
+ which the filter is applied and replaces all matches with the fixed length
+ four dashes ("----").
+
+ In place of a regular expression, you can put "ips", which will replace all
+ internet addresses with 0.0.0.0. For example:
+
+ filter f_strip {strip(ips);};
+
+ You can alter what the replacement strings are by using replace:
diff -urN syslog-ng-2.0.4.orig/src/cfg-grammar.y syslog-ng-2.0.4/src/cfg-grammar.y
--- syslog-ng-2.0.4.orig/src/cfg-grammar.y 2007-04-20 15:24:08.000000000 -0500
+++ syslog-ng-2.0.4/src/cfg-grammar.y 2007-07-08 23:38:07.000000000 -0500
@@ -107,7 +107,7 @@
%token KW_USE_TIME_RECVD
/* filter items*/
-%token KW_FACILITY KW_LEVEL KW_HOST KW_MATCH KW_NETMASK
+%token KW_FACILITY KW_LEVEL KW_HOST KW_MATCH KW_NETMASK KW_STRIP KW_REPLACE
/* yes/no switches */
%token KW_YES KW_NO
@@ -803,6 +803,8 @@
| KW_PROGRAM '(' string ')' { $$ = filter_prog_new($3); free($3); }
| KW_HOST '(' string ')' { $$ = filter_host_new($3); free($3); }
| KW_MATCH '(' string ')' { $$ = filter_match_new($3); free($3); }
+ | KW_STRIP '(' string ')' { $$ = filter_strip_new($3); free($3); }
+ | KW_REPLACE '(' string string ')' { $$ = filter_replace_new($3, $4); free($3); free($4); }
| KW_FILTER '(' string ')' { $$ = filter_call_new($3, configuration); free($3); }
| KW_NETMASK '(' string ')' { $$ = filter_netmask_new($3); free($3); }
;
@@ -908,4 +910,4 @@
last_reader_options = NULL;
last_writer_options = NULL;
last_template = NULL;
-}
\ No newline at end of file
+}
diff -urN syslog-ng-2.0.4.orig/src/cfg-lex.l syslog-ng-2.0.4/src/cfg-lex.l
--- syslog-ng-2.0.4.orig/src/cfg-lex.l 2007-04-19 14:37:16.000000000 -0500
+++ syslog-ng-2.0.4/src/cfg-lex.l 2007-07-08 23:38:51.000000000 -0500
@@ -165,6 +165,8 @@
{ "host", KW_HOST },
{ "match", KW_MATCH },
{ "netmask", KW_NETMASK },
+ { "strip", KW_STRIP },
+ { "replace", KW_REPLACE },
/* on/off switches */
{ "yes", KW_YES },
diff -urN syslog-ng-2.0.4.orig/src/filter.c syslog-ng-2.0.4/src/filter.c
--- syslog-ng-2.0.4.orig/src/filter.c 2007-04-29 11:59:54.000000000 -0500
+++ syslog-ng-2.0.4/src/filter.c 2007-07-09 00:29:40.000000000 -0500
@@ -226,6 +226,7 @@
typedef struct _FilterRE
{
FilterExprNode super;
+ GString *replace;
regex_t regex;
} FilterRE;
@@ -310,6 +311,9 @@
filter_re_free(FilterExprNode *s)
{
FilterRE *self = (FilterRE *) s;
+
+ if (self->replace != NULL)
+ g_string_free(self->replace, TRUE);
regfree(&self->regex);
g_free(s);
@@ -494,3 +498,88 @@
self->super.eval = filter_netmask_eval;
return &self->super;
}
+
+FilterExprNode *
+filter_strip_new(const gchar *re)
+{
+ if (g_ascii_strcasecmp(re, "ips") == 0)
+ return filter_replace_new(re, "0.0.0.0");
+
+ return filter_replace_new(re, "----");
+}
+
+#define FMIN(a, b) (a) < (b) ? (a) : (b)
+#define NEW_MSG_SIZE 2048
+
+static gboolean
+filter_replace_eval(FilterExprNode *s, LogMessage *log)
+{
+ FilterRE *self = (FilterRE *) s;
+ gchar *buffer = log->msg.str;
+ gint snippet_size;
+ regmatch_t pmatch;
+ gchar new_msg[NEW_MSG_SIZE];
+ gchar *new_msg_max = new_msg + NEW_MSG_SIZE;
+ gchar *new_msg_ptr = new_msg;
+ gint replace_length = self->replace->len;
+ gint error;
+
+ error = regexec(&self->regex, buffer, 1, &pmatch, 0);
+ if (error)
+ return TRUE;
+ while (!error)
+ {
+ /* copy string snippet which preceeds matched text */
+ snippet_size = FMIN(pmatch.rm_so, new_msg_max - new_msg_ptr);
+ memcpy(new_msg_ptr, buffer, snippet_size);
+ new_msg_ptr += snippet_size;
+
+ /* copy replacement */
+ snippet_size = FMIN(replace_length, new_msg_max - new_msg_ptr);
+ memcpy(new_msg_ptr, self->replace->str, snippet_size);
+ new_msg_ptr += snippet_size;
+
+ /* search for next match */
+ buffer += pmatch.rm_eo;
+ error = regexec(&self->regex, buffer, 1, &pmatch, REG_NOTBOL);
+ }
+
+ /* copy the rest of the old message */
+ snippet_size = FMIN(log->msg.len, new_msg_max - new_msg_ptr);
+ memcpy(new_msg_ptr, buffer, snippet_size);
+ new_msg_ptr += snippet_size;
+
+ g_string_erase(&log->msg, 0, -1);
+ g_string_append(&log->msg, new_msg);
+
+ return TRUE;
+}
+
+FilterExprNode *
+filter_replace_new(const gchar *re, const gchar *replacement)
+{
+ FilterRE *self = g_new0(FilterRE, 1);
+ gint regerr;
+
+ if (!g_ascii_strcasecmp(re, "ips"))
+ re = "25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])([\\.\\-](25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])){3}";
+
+ regerr = regcomp(&self->regex, re, REG_ICASE | REG_EXTENDED);
+ if (regerr)
+ {
+ gchar errorbuf[256];
+ regerror(regerr, &self->regex, errorbuf, sizeof(errorbuf));
+ msg_error("Error compiling regular expression:",
+ evt_tag_str("re", re),
+ evt_tag_str("error", errorbuf),
+ NULL);
+ g_free(self);
+ return NULL;
+ }
+
+ self->replace = g_string_new(replacement);
+ self->super.eval = filter_replace_eval;
+ self->super.free_fn = filter_re_free;
+
+ return &self->super;
+}
diff -urN syslog-ng-2.0.4.orig/src/filter.h syslog-ng-2.0.4/src/filter.h
--- syslog-ng-2.0.4.orig/src/filter.h 2007-04-19 14:37:16.000000000 -0500
+++ syslog-ng-2.0.4/src/filter.h 2007-07-09 00:10:57.000000000 -0500
@@ -54,6 +54,8 @@
FilterExprNode *filter_match_new(gchar *re);
FilterExprNode *filter_call_new(gchar *rule, struct _GlobalConfig *cfg);
FilterExprNode *filter_netmask_new(gchar *cidr);
+FilterExprNode *filter_strip_new(const gchar *re);
+FilterExprNode *filter_replace_new(const gchar *re, const gchar *replacement);
typedef struct _LogFilterRule
{
More information about the syslog-ng
mailing list