[syslog-ng] Filtering duplicate messages

chris packham chris.packham at alliedtelesis.co.nz
Thu May 29 03:04:09 CEST 2008


Actually the new message does hit the templates, just not the filters.

On Thu, 2008-05-29 at 12:57 +1200, chris packham wrote:
> Here's a rough cut of my patch. It still needs a bit of cleanup and I
> have yet to implement the time-based behaviour. I just wanted to get
> some
> feedback.
> 
> One particular thing I wanted to clarify is where in the log message
> pipeline would best to attach this behaviour to? I've currently
> attached it to the LogWriter. The disadvantage of that is that the new
> message I generate does not get passed through the filters or
templates
> (and we've just done a bunch of wasted processing on a message that
> gets dropped)
> 
> The behaviour implemented is as follows.
> 
> As a log message is output to a destination it is remembered
> as the last message seen.
> 
> When a new message is to be output it is checked against the
> last message seen. If its contents (message text, including
> hostname and pid if present) are the same the message is dropped
> and a couter incremented. If the message is different to the
> last message seen a new log message indicating the value of the
> last message counter will be inserted into the queue and the new
> message will be processed.
> 
> TODO: Implement a time based flush of last seen messages
> 
> ---
>  src/cfg-grammar.y |    3 +-
>  src/cfg-lex.l     |    3 +-
>  src/logmsg.c      |   26 +++++++++++++++++++++++
>  src/logmsg.h      |    2 +
>  src/logwriter.c   |   59
> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  src/logwriter.h   |    9 ++++++++
>  6 files changed, 99 insertions(+), 3 deletions(-)
> 
> diff --git a/src/cfg-grammar.y b/src/cfg-grammar.y
> index 39ff172..386c231 100644
> --- a/src/cfg-grammar.y
> +++ b/src/cfg-grammar.y
> @@ -62,7 +62,7 @@ gint last_addr_family = AF_INET;
>  %token  KW_USER KW_DOOR KW_SUN_STREAMS KW_PROGRAM
>  
>  /* option items */
> -%token KW_FSYNC KW_MARK_FREQ KW_STATS_FREQ KW_FLUSH_LINES
> KW_FLUSH_TIMEOUT KW_LOG_MSG_SIZE KW_FILE_TEMPLATE KW_PROTO_TEMPLATE
> +%token KW_FSYNC KW_MARK_FREQ KW_STATS_FREQ KW_FLUSH_LINES KW_SUPPRESS
> KW_FLUSH_TIMEOUT KW_LOG_MSG_SIZE KW_FILE_TEMPLATE KW_PROTO_TEMPLATE
>  
>  %token KW_CHAIN_HOSTNAMES KW_NORMALIZE_HOSTNAMES KW_KEEP_HOSTNAME
> KW_CHECK_HOSTNAME KW_BAD_HOSTNAME
>  %token KW_KEEP_TIMESTAMP
> @@ -671,6 +671,7 @@ dest_writer_option
>  	: KW_FLAGS '(' dest_writer_options_flags ')' {
> last_writer_options->options = $3; }
>  	| KW_LOG_FIFO_SIZE '(' NUMBER ')'	{
> last_writer_options->fifo_size = $3; }
>  	| KW_FLUSH_LINES '(' NUMBER ')'		{
> last_writer_options->flush_lines = $3; }
> +	| KW_SUPPRESS '(' yesno ')'		{
> last_writer_options->suppress= $3; }
>  	| KW_FLUSH_TIMEOUT '(' NUMBER ')'	{
> last_writer_options->flush_timeout = $3; }
>  	| KW_TEMPLATE '(' string ')'       	{ 
>  	                                         
> last_writer_options->template = cfg_lookup_template(configuration,
$3);
> diff --git a/src/cfg-lex.l b/src/cfg-lex.l
> index 5c5f54f..4eb0c63 100644
> --- a/src/cfg-lex.l
> +++ b/src/cfg-lex.l
> @@ -6,7 +6,7 @@
>  
> 
>
/***************************************************************************
>   *
> - * Copyright (c) 1999 Balzs Scheidler
> + * Copyright (c) 1999 Bal�zs Scheidler
>   * Copyright (c) 1999-2007 BalaBit IT Ltd.
>   * 
>   * This program is free software; you can redistribute it and/or
modify
> @@ -86,6 +86,7 @@ static struct keyword keywords[] = {
>  	{ "flush_timeout", 	KW_FLUSH_TIMEOUT },
>  	{ "sync_freq", 		KW_FLUSH_LINES },  /* obsolete */
>  	{ "sync", 		KW_FLUSH_LINES },  /* obsolete */
> +	{ "suppress", 		KW_SUPPRESS },
>  	{ "fsync",		KW_FSYNC },
>  	{ "long_hostnames",	KW_CHAIN_HOSTNAMES },
>          { "chain_hostnames",    KW_CHAIN_HOSTNAMES },
> diff --git a/src/logmsg.c b/src/logmsg.c
> index 05354b5..4da3af5 100644
> --- a/src/logmsg.c
> +++ b/src/logmsg.c
> @@ -766,3 +766,29 @@ log_msg_drop(LogMessage *m, guint path_flags)
>      log_msg_ack
(m);
>    log_msg_unref(m);
>  }
> +
> +/**
> + * log_msg_dup:
> + * @m: LogMessage instance
> + *
> + * This function duplicates an existing log message. The new message
is
> not parsed.
> + */
> +LogMessage *
> +log_msg_dup(LogMessage *m)
> +{
> +  LogMessage *nm = g_new0(LogMessage, 1);
> +
> +  log_msg_init(nm, m->saddr);
> +  nm->flags
>  = m->flags;
> +  nm->pri = m->pri;
> +  g_string_assign_len(&nm->date, m->date.str, m->date.len);
> +  g_string_assign_len(&nm->host, m->host.str, m->host.len);
> +  g_string_assign_len(&nm->host_from, m->host_from.str,
> m->host_from.len);
> +  g_string_assign_len(&nm->program, m->program.str, m->program.len);
> +  g_string_assign_len(&nm->msg, m->msg.str, m->msg.len);
> +  memcpy(&nm->stamp, &m->stamp, sizeof(LogStamp));
> +  memcpy(&nm->recvd, &m->recvd, sizeof(LogStamp));
> +
> +  return nm;
> +}
> +
> diff --git a/src/logmsg.h b/src/logmsg.h
> index 420a711..9dbb886 100644
> --- a/src/logmsg.h
> +++ b/src/logmsg.h
> @@ -107,6 +107,8 @@ void log_msg_ack_block_end(LogMessage *m);
>  void log_msg_ack(LogMessage *msg);
>  void log_msg_drop(LogMessage *msg, guint path_flags);
>  void log_msg_clear_matches(LogMessage *self);
> +LogMessage *log_msg_dup(LogMessage *m);
> +
>  
>  gchar *log_msg_find_cr_or_lf(gchar *s, gsize n);
>  
> diff --git a/src/logwriter.c b/src/logwriter.c
> index bb82b43..4843100 100644
> --- a/src/logwriter.c
> +++ b/src/logwriter.c
> @@ -178,11 +178,63 @@ log_writer_watch_new(LogWriter *writer, FDWrite
> *fd)
>    return &self->super;
>  }
>  
> +static gboolean
> +log_writer_supress_duplicate(LogWriter *self, LogMessage *lm, gint
> path_flags)
> +{
> +  if (!self->options->suppress)
> +    return FALSE;
> +
> +  if(self->suppress.hash)
> +    {
> +      guint hash = g_string_hash(&lm->msg);
> +      if(self->suppress.hash == hash)
> +        {
> +          msg_debug("Dropping duplicate message",
> +                    NULL);
> +          self->suppress.count ++;
> +          log_msg_drop(lm, path_flags);
> +          return TRUE;
> +        }
> +
> +      if (self->suppress.count)
> +        {
> +          gchar *buf;
> +          LogMessage *m;
> +          GString *line = g_string_sized_new(128);
> +
> +          buf = g_strdup_printf("<%d> syslog-ng[%d]: Last message
> '%.20s' repeated %d times\n",
> +                                self->suppress.msg->pri,
> +                                getpid(),
> +                                self->suppress.msg->msg.str,
> +                                self->suppress.count);
> +          m = log_msg_new(buf, strlen(buf),
self->suppress.msg->saddr,
> 0, NULL);
> +          g_queue_push_tail(self->queue, m);
> +          g_queue_push_tail(self->queue,
GUINT_TO_POINTER(0x80000000));
> +          g_free(buf);
> +        }
> +      log_msg_drop(self->suppress.msg, 0);
> +      self->suppress.msg = log_msg_dup(lm);
> +      self->suppress.hash = g_string_hash(&lm->msg);
> +      self->suppress.count = 0;
> +    }
> +  else
> +    {
> +      self->suppress.msg = log_msg_dup(lm);
> +      self->suppress.hash = g_string_hash(&lm->msg);
> +      self->suppress.count = 0;
> +    }
> +
> +  return FALSE;
> +}
> +
>  static void
>  log_writer_queue(LogPipe *s, LogMessage *lm, gint path_flags)
>  {
>    LogWriter *self = (LogWriter *) s;
> -  
> +
> +  if(log_writer_supress_duplicate(self, lm, path_flags))
> +    return;
> +
>    if ((self->queue->length / 2) == self->options->fifo_size)
>      {
>        /* drop incoming message, we must ack here, otherwise the
sender
> might
> @@ -462,6 +514,9 @@ log_writer_new(guint32 flags, LogPipe *control,
> LogWriterOptions *options)
>    self->queue = g_queue_new();
>    self->flags = flags;
>    self->control = control;
> +  self->suppress.msg = NULL;
> +  self->suppress.hash = 0;
> +  self->suppress.count = 0;
>    return &self->super;
>  >    options->zone_offset = -1;
>    options->frac_digits = -1;
> +  options->suppress = FALSE;
> +
>  }
> 
 
>  void 
> diff --git a/src/logwriter.h b/src/logwriter.h
> index 284899b..d3a136f 100644
> --- a/src/logwriter.h
> +++ b/src/logwriter.h
> @@ -41,6 +41,13 @@
>  /* several writers use the same counter */
>  #define LWOF_SHARE_STATS     0x0002
>  
> +typedef struct _LogSuppress
> +{
> +  LogMessage *msg;
> +  guint      hash;
> +  guint32    count;
> +} LogSuppress;
> +
>     glong zone_offset;
>    gshort frac_digits;
> +  gboolean suppress;
>  } LogWriterOptions;
>  
>  typedef struct _LogWriter
> @@ 
> -79,6 +87,7 @@ typedef struct _LogWriter
>    gint partial_pos;
>    LogPipe *control;
>    LogWriterOptions *options;
> +  LogSuppress suppress;
>  } LogWriter;
>  
>  void log_writer_set_options(LogWriter *self, LogPipe *control,
> LogWriterOptions *options);
> -- 
> 1.5.4.5
> 
>
______________________________________________________________________________
> Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
> Documentation:
http://www.balabit.com/support/documentation/?product=syslog-ng
> FAQ: http://www.campin.net/syslog-ng/faq.html
> 



More information about the syslog-ng mailing list