[syslog-ng]Re: Unaligned trap errors on Alpha-Linux

Jon Stearley jrstear@sandia.gov
Fri, 28 Sep 2001 09:39:43 -0600


--k1lZvvs/B4yU6o8G
Content-Type: text/plain; 
 charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: 7bit

On Sun, Sep 23, 2001 at 04:17:38PM -0600, jrstear wrote:
> has a fix been found to the "unaligned trap" messages alpha users 
> have been seeing?  

i found the exact c lines causing the unalignments (see attached), but
was not able to fix the code.  perhaps someone here can.

i also found an option for the compaq compiler which fixes the
unalignment problems on alpha (see attached).  that's my fix, and i'll
leave the code to a better programmer than me.

fyi.

-- 
+--------------------------------------------------------------+
| Jon Stearley			(505) 845-7571  (FAX 844-2067) |
| Compaq Federal LLC		High Performance Solutions     |
| Sandia National Laboratories	Scalable Systems Integration   |
+--------------------------------------------------------------+

--k1lZvvs/B4yU6o8G
Content-Type: text/plain; 
 charset=us-ascii
Content-Disposition: attachment; 
 filename=syslog-notes
Content-Transfer-Encoding: 7bit

# here's a set of syslog-ng messages indicating a problem:
  Sep 25 16:07:31 local@ross-sss2-0 dhcpd: BOOTREQUEST from 00:d0:58:cf:ce:00 via eth1
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 000000012000664c: 000000012012ddc4 2d 1
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 00000001200066ac: 000000012012ddc4 29 3
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 0000000120006728: 000000012012dd9c 2d 1
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 0000000120006730: 000000012012ddc4 29 2
  Sep 25 16:07:31 local@ross-sss2-0 dhcpd: No applicable record for BOOTP host 00:d0:58:cf:ce:00 via eth1
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 000000012000664c: 000000012012ddc4 2d 1
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 00000001200066ac: 000000012012ddc4 29 3
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 0000000120006728: 000000012012dd9c 2d 1
  Sep 25 16:07:49 local@ross-sss2-0 syslog-ng(32110): unaligned trap at 0000000120006730: 000000012012ddc4 29 2
  Sep 25 16:08:08 local@ross-sss2-0 dhcpd: BOOTREQUEST from 00:d0:58:cf:ce:00 via eth1

# compile with debugging (gcc -g)
# run under gdb (gdb syslog-ng)
# it will appear to barf, (i think) because gdb can't keep track of
# multiple threads (assuming it's multithreaded), anyway, once it is
# loaded you can dissassemble it whether it died or not
(gdb) disassemble 0x01200066ac
# ah, the problem is in function parse_log_msg
(gdb) disassemble parse_log_msg
# i saved this to a file for easy review/examination
# this line in the dissassemble output shows that the offending
# instruction occurs +812 bytes (decimal) into the function
  0x1200066ac <parse_log_msg+812>:        ldq     t2,56(t0)

# find parse_log_msg in source, in this case it's in log.c.
# now examine log.o using objdump
  objdump -S -d log.o |less
  # note that parse_log_msg begins at 0xc0
  00000000000000c0 <parse_log_msg>:

# use bc for some simple hex arithmetic (bc requires hex in CAPS)
  bc
    obase=16
    812
    32C
    ibase=16
    C0 + 32C
    3EC
# so, look at 0x3EC in the objdump output
   ...
   3dc:   00 40 5b 6b     jsr     ra,(t12),3e0 <parse_log_msg+0x320>
   3e0:   00 00 ba 27     ldah    gp,0(ra)
   3e4:   00 00 bd 23     lda     gp,0(gp)
                          strptime(lm->date->data, "%b %e %H:%M:%S", &tm);
   3e8:   10 00 2f a4     ldq     t0,16(fp)
   3ec:   38 00 61 a4     ldq     t2,56(t0)
   3f0:   02 94 61 40     addq    t2,0xc,t1
   3f4:   01 14 ea 41     addq    fp,0x50,t0
   ...

# this trap results from the strptime call!

# do similarly to find the others.  see attached log.c commented where
# all the unalignments happen.  they are all close together in the
# code, three involve lm->date, and one involves lm->stamp/
# further examination of the code shows that this memory is aloocated
# by libol.  

# from examination of kernel source and real-time observation of trap
# occurance, the trap messages are processed/printed after the message
# which causes it.  messages pass through sockets, and are buffered.
# therefore it is not always the case that the message which generates
# the trap appears immediately before the trap messages.  the "bad"
# message can determined by real-time observation however.  the trap
# messages always appear as the 4-trap-message sequence, ie never a
# single trap, always the same sequence of four.

# intimidated by libol's memory management and garbage collection
# (which i'm not even sure is the culprit, maybe it is syslog-ng's use
# of the memory), i decided to further investigate compaq's linux
# compiler.  sure enough, if compiled with compaq's compiler using
# "-assume noaligned_objects", the trap messages no longer occur.
# this results in longword moves taking five instructions rather than
# a single one, but this is far faster than kernel traps, which (as
# kernel source comments say) is extremely expensive and should be
# fixed.  my errors are gone, i'll leave the actual code to someone
# else.

--k1lZvvs/B4yU6o8G
Content-Type: text/x-csrc; 
 charset=unknown-8bit
Content-Disposition: attachment; 
 filename=log.c
Content-Transfer-Encoding: 8bit

/***************************************************************************
 *
 * Copyright (c) 1999 Balázs Scheidler
 * Copyright (c) 1999-2001 BalaBit IT Ltd.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Inspired by nsyslog, originally written by Darren Reed.
 *
 * $Id: log.c,v 1.19.4.3 2001/06/11 07:44:55 bazsi Exp $
 *
 ***************************************************************************/

#include "log.h"
#include "xalloc.h"
#include "format.h"

#include <sys/time.h>
#include <syslog.h>
#include <ctype.h>
#include <unistd.h>
#include <time.h>
#include <string.h>

#define CLASS_DEFINE
#include "log.h.x"
#undef CLASS_DEFINE

static void parse_log_msg(struct log_info *lm, UINT32 length, UINT8 *data)
{
	char *src;
	int left;
	int pri;
	time_t now = time(NULL);
	char *oldsrc;
	int oldleft;
	
	src = data;
	left = length;
	
	if (left && src[0] == '<') {
		src++;
		left--;
		pri = 0;
		while (left && *src != '>') {
			if (isdigit(*src)) {
				pri = pri * 10 + ((*src) - '0');
			}
			else {
				lm->msg = c_format_cstring("unparseable log message: \"%s\"", length, data);
				lm->pri = LOG_SYSLOG | LOG_ERR;
				return;
			}
			src++;
			left--;
		}
		lm->pri = pri;
		if (left) {
			src++;
			left--;
		}
	}
	else {
		lm->pri = LOG_USER | LOG_NOTICE;
	}
	while (left && *src == ' ') {
		src++;
		left--;
	}
	if (left >= 15) {
		if (src[3] == ' ' && src[6] == ' ' && src[9] == ':' && src[12] == ':') {
			struct tm tm, *nowtm;

			/* ##### unaligned trap from next line */
			lm->date = c_format_cstring("%s", 15, src);
			src += 15;
			left -= 15;

			nowtm = localtime(&now);
			memset(&tm, 0, sizeof(tm));
			/* ##### unaligned trap from next line */
			strptime(lm->date->data, "%b %e %H:%M:%S", &tm);
			tm.tm_isdst = -1;
			tm.tm_year = nowtm->tm_year;
			if (tm.tm_mon > nowtm->tm_mon)
				tm.tm_year--;
			/* ##### unaligned trap from next line */
			lm->stamp = mktime(&tm);
		}
	}

	/* ##### unaligned trap from next line */
	if (lm->date) {
		while (left && *src == ' ') {
			src++; /* skip whitespace */
			left--;
		}
		oldsrc = src;	
		oldleft = left;
		while (left && *src != ' ' && *src != ':' && *src != '[') {
			src++;
			left--;
		}
		if (left && *src == ' ') {
			/* this was a hostname */
			lm->host = c_format_cstring("%s", oldleft - left, oldsrc);
		}
		else {
			src = oldsrc;
			left = oldleft;
		}

		while (left && *src == ' ') {
			src++; /* skip whitespace */
			left--;
		}
		
		oldsrc = src;
		oldleft = left;

		while (left && *src != ':' && *src != '[') {
			src++;
			left--;
		}
		if (left) {
			lm->program = c_format_cstring("%s", oldleft - left, oldsrc);
		}
		src = oldsrc;
		left = oldleft;
	}
	else {
		oldsrc = src;
		oldleft = left;
		if ((lm->pri & LOG_FACMASK) == LOG_KERN) {
			lm->program = c_format_cstring("kernel");
		}
		else {
			while (left && *src != ' ' && *src != '[' && *src != ':' && 
			       *src != '/' && *src != ',' && *src != '<') {
				src++;
				left--;
			}
			if (left) {
				lm->program = c_format_cstring("%s", oldleft - left, oldsrc);
			}
			left = oldleft;
			src = oldsrc;
		}
		lm->stamp = now;
	}

	for (oldsrc = src, oldleft = left; oldleft >= 0; oldleft--, oldsrc++) {
		if (*oldsrc == '\n' || *oldsrc == '\r') *oldsrc = ' ';
	}
	lm->msg = c_format_cstring("%s", left, src);
}

struct log_info *log_info_use(struct log_info *msg)
{
	msg->use_cnt++;
	return msg;
}

void log_info_free(struct log_info *msg)
{
	if (--msg->use_cnt == 0) {
		ol_string_free(msg->host);
		ol_string_free(msg->program);
		ol_string_free(msg->date);
		ol_string_free(msg->msg);
		ol_space_free(msg);
	}
}

struct log_info *make_log_info(UINT32 length, UINT8 *msg)
{
	struct log_info *self;

	NEW_SPACE(self);
  	parse_log_msg(self, length, msg);
	self->use_cnt = 1;
  	self->recvd = time(NULL);
	return self;
}

struct log_info *make_internal_message(UINT32 pri, UINT32 length, UINT8 *data)
{
	struct log_info *self;

	NEW_SPACE(self);
	self->msg = c_format_cstring("syslog-ng[%i]: %s", getpid(), length, data);
	self->program = c_format_cstring("syslog-ng");
	self->stamp = self->recvd = time(NULL);
	self->pri = pri;
	self->flags = LF_INTERNAL;
	self->use_cnt = 1;
	return self;
}

struct log_info *make_mark_message(void)
{
	struct log_info *self = make_internal_message(LOG_SYSLOG | LOG_NOTICE, 12, "--- MARK ---");
	self->flags |= LF_MARK;
	return self;
}

--k1lZvvs/B4yU6o8G--