Sorry, I forgot this while working on something unrelated.
It does seem to iterate endlessly on the valuepairs result, but I don't see why. I'll probably have to reproduce it.
I hope I can work on it later today.
Is there anything else that I can do to help ?On Sun, Oct 20, 2013 at 10:21 PM, Alexandre Biancalana <biancalana@gmail.com> wrote:
Follow the backtrace of thread that is spinning:(gdb) info threads
^C
Program received signal SIGINT, Interrupt.
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install glib2-2.22.5-7.el6.x86_64 glibc-2.12-1.107.el6.x86_64 json-c-0.10-2.el6.x86_64 keyutils-libs-1.4-4.el6.x86_64 krb5-libs-1.10.3-10.el6.x86_64 libcom_err-1.41.12-14.el6.x86_64 libnet-1.1.5-1.el6.x86_64 libselinux-2.0.94-5.3.el6.x86_64 openssl-1.0.0-27.el6.x86_64 pcre-7.8-6.el6.x86_64 zlib-1.2.3-29.el6.x86_64
2 Thread 0x7ffff7fe3700 (LWP 3480) 0x00007ffff7b9a468 in g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:412
* 1 Thread 0x7ffff7fed920 (LWP 3477) 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
(gdb) thread 2
[Switching to thread 2 (Thread 0x7ffff7fe3700 (LWP 3480))]#0 0x00007ffff7b9a468 in g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0)
at /usr/include/glib-2.0/glib/gutils.h:412
412 for (data = *stack_p; data; data = data->next)
(gdb) c
Continuing.
Advancing patterndb current time because of timer tick; utc='1382304120'[Switching to Thread 0x7ffff7fed920 (LWP 3477)]
^C
Program received signal SIGINT, Interrupt.Advancing patterndb current time because of timer tick; utc='1382304127'
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
(gdb) bt
#0 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
#1 0x00007ffff7baaecd in iv_fd_epoll_poll (st=0x605df0, active=0x7fffffffe3c0, to=0x7fffffffe400) at iv_fd_epoll.c:131
#2 0x00007ffff7ba9837 in iv_fd_poll_and_run (st=0x605df0, to=0x7fffffffe400) at iv_fd.c:147
#3 0x00007ffff7baa054 in iv_main () at iv_main_posix.c:117
#4 0x00007ffff7b8c517 in main_loop_run () at mainloop.c:736
#5 0x0000000000401641 in main (argc=1, argv=0x7fffffffe548) at main.c:267
(gdb) c
Continuing.
Advancing patterndb current time because of timer tick; utc='1382304128'
Advancing patterndb current time because of timer tick; utc='1382304129'
Advancing patterndb current time because of timer tick; utc='1382304130'
Advancing patterndb current time because of timer tick; utc='1382304131'
Advancing patterndb current time because of timer tick; utc='1382304132'
Advancing patterndb current time because of timer tick; utc='1382304133'
Advancing patterndb current time because of timer tick; utc='1382304134'
Advancing patterndb current time because of timer tick; utc='1382304135'(gdb) info threads
^C
Program received signal SIGINT, Interrupt.
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
(gdb) bt
#0 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
#1 0x00007ffff7baaecd in iv_fd_epoll_poll (st=0x605df0, active=0x7fffffffe3c0, to=0x7fffffffe400) at iv_fd_epoll.c:131
#2 0x00007ffff7ba9837 in iv_fd_poll_and_run (st=0x605df0, to=0x7fffffffe400) at iv_fd.c:147
#3 0x00007ffff7baa054 in iv_main () at iv_main_posix.c:117
#4 0x00007ffff7b8c517 in main_loop_run () at mainloop.c:736
#5 0x0000000000401641 in main (argc=1, argv=0x7fffffffe548) at main.c:267
2 Thread 0x7ffff7fe3700 (LWP 3480) g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
* 1 Thread 0x7ffff7fed920 (LWP 3477) 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
(gdb) thread 2
[Switching to thread 2 (Thread 0x7ffff7fe3700 (LWP 3480))]#0 g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
413 i++;
(gdb) bt
#0 g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
#1 vp_walker_name_split (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at value-pairs.c:455
#2 value_pairs_walker (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at value-pairs.c:494
#3 0x00007ffff70a4876 in g_tree_foreach () from /lib64/libglib-2.0.so.0
#4 0x00007ffff7b9a9d6 in value_pairs_foreach_sorted (vp=0x6d7280, func=0x7ffff7b9a390 <value_pairs_walker>, compare_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=0x7ffff7fe07e0)
at value-pairs.c:329
#5 0x00007ffff7b9ab0f in value_pairs_walk (vp=0x6d7280, obj_start_func=<value optimized out>, process_value_func=<value optimized out>, obj_end_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=
0x7ffff7fe0850) at value-pairs.c:534
#6 0x00007ffff3c2ec92 in tf_json_append (self=<value optimized out>, s=0x6d62e0, args=0x7ffff7fe0900, result=0x7fffec0013d8) at format-json.c:193
#7 tf_json_call (self=<value optimized out>, s=0x6d62e0, args=0x7ffff7fe0900, result=0x7fffec0013d8) at format-json.c:206
#8 0x00007ffff7b981a3 in log_template_append_format_with_context (self=0x6d2400, messages=0x7ffff7fe0998, num_messages=1, opts=0x0, tz=0, seq_num=1, context_id=0x0, result=0x7fffec0013d8) at templates.c:1234
#9 0x00007ffff7b9844f in log_template_append_format (self=<value optimized out>, lm=0x61f010, opts=<value optimized out>, tz=<value optimized out>, seq_num=<value optimized out>,
context_id=<value optimized out>, result=0x7fffec0013d8) at templates.c:1261
#10 0x00007ffff3e3cf6b in afamqp_worker_publish (self=0x6d2480, msg=0x61f010) at afamqp.c:432
#11 0x00007ffff3e3da12 in afamqp_worker_insert (arg=0x6d2480) at afamqp.c:474
#12 afamqp_worker_thread (arg=0x6d2480) at afamqp.c:535
#13 0x00007ffff7b8e18b in worker_thread_func (st=0x616600) at misc.c:580
#14 0x00007ffff70a2004 in ?? () from /lib64/libglib-2.0.so.0
#15 0x00007ffff67f6851 in start_thread () from /lib64/libpthread.so.0
#16 0x00007ffff654490d in clone () from /lib64/libc.so.6
(gdb) c
Continuing.
Advancing patterndb current time because of timer tick; utc='1382304156'[Switching to Thread 0x7ffff7fed920 (LWP 3477)]
^C
Program received signal SIGINT, Interrupt.
(gdb) thread 2
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
[Switching to thread 2 (Thread 0x7ffff7fe3700 (LWP 3480))]#0 g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
413 i++;
(gdb) bt
#0 g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
#1 vp_walker_name_split (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at value-pairs.c:455
#2 value_pairs_walker (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at value-pairs.c:494
#3 0x00007ffff70a4876 in g_tree_foreach () from /lib64/libglib-2.0.so.0
#4 0x00007ffff7b9a9d6 in value_pairs_foreach_sorted (vp=0x6d7280, func=0x7ffff7b9a390 <value_pairs_walker>, compare_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=0x7ffff7fe07e0)
at value-pairs.c:329
#5 0x00007ffff7b9ab0f in value_pairs_walk (vp=0x6d7280, obj_start_func=<value optimized out>, process_value_func=<value optimized out>, obj_end_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=
0x7ffff7fe0850) at value-pairs.c:534
#6 0x00007ffff3c2ec92 in tf_json_append (self=<value optimized out>, s=0x6d62e0, args=0x7ffff7fe0900, result=0x7fffec0013d8) at format-json.c:193
#7 tf_json_call (self=<value optimized out>, s=0x6d62e0, args=0x7ffff7fe0900, result=0x7fffec0013d8) at format-json.c:206
#8 0x00007ffff7b981a3 in log_template_append_format_with_context (self=0x6d2400, messages=0x7ffff7fe0998, num_messages=1, opts=0x0, tz=0, seq_num=1, context_id=0x0, result=0x7fffec0013d8) at templates.c:1234
#9 0x00007ffff7b9844f in log_template_append_format (self=<value optimized out>, lm=0x61f010, opts=<value optimized out>, tz=<value optimized out>, seq_num=<value optimized out>,
context_id=<value optimized out>, result=0x7fffec0013d8) at templates.c:1261
#10 0x00007ffff3e3cf6b in afamqp_worker_publish (self=0x6d2480, msg=0x61f010) at afamqp.c:432
#11 0x00007ffff3e3da12 in afamqp_worker_insert (arg=0x6d2480) at afamqp.c:474
#12 afamqp_worker_thread (arg=0x6d2480) at afamqp.c:535
#13 0x00007ffff7b8e18b in worker_thread_func (st=0x616600) at misc.c:580
#14 0x00007ffff70a2004 in ?? () from /lib64/libglib-2.0.so.0
#15 0x00007ffff67f6851 in start_thread () from /lib64/libpthread.so.0
#16 0x00007ffff654490d in clone () from /lib64/libc.so.6
(gdb) c
Continuing.
Advancing patterndb current time because of timer tick; utc='1382304164'
Advancing patterndb current time because of timer tick; utc='1382304165'[Switching to Thread 0x7ffff7fed920 (LWP 3477)]
^C
Program received signal SIGINT, Interrupt.(gdb) thread 2
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
[Switching to thread 2 (Thread 0x7ffff7fe3700 (LWP 3480))]#0 g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
413 i++;
(gdb) bt
#0 g_trash_stack_height (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at /usr/include/glib-2.0/glib/gutils.h:413
#1 vp_walker_name_split (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at value-pairs.c:455
#2 value_pairs_walker (name=<value optimized out>, value=0x7fffec000960 "10228", user_data=0x7ffff7fe07e0) at value-pairs.c:494
#3 0x00007ffff70a4876 in g_tree_foreach () from /lib64/libglib-2.0.so.0
#4 0x00007ffff7b9a9d6 in value_pairs_foreach_sorted (vp=0x6d7280, func=0x7ffff7b9a390 <value_pairs_walker>, compare_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=0x7ffff7fe07e0)
at value-pairs.c:329
#5 0x00007ffff7b9ab0f in value_pairs_walk (vp=0x6d7280, obj_start_func=<value optimized out>, process_value_func=<value optimized out>, obj_end_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=
0x7ffff7fe0850) at value-pairs.c:534
#6 0x00007ffff3c2ec92 in tf_json_append (self=<value optimized out>, s=0x6d62e0, args=0x7ffff7fe0900, result=0x7fffec0013d8) at format-json.c:193
#7 tf_json_call (self=<value optimized out>, s=0x6d62e0, args=0x7ffff7fe0900, result=0x7fffec0013d8) at format-json.c:206
#8 0x00007ffff7b981a3 in log_template_append_format_with_context (self=0x6d2400, messages=0x7ffff7fe0998, num_messages=1, opts=0x0, tz=0, seq_num=1, context_id=0x0, result=0x7fffec0013d8) at templates.c:1234
#9 0x00007ffff7b9844f in log_template_append_format (self=<value optimized out>, lm=0x61f010, opts=<value optimized out>, tz=<value optimized out>, seq_num=<value optimized out>,
context_id=<value optimized out>, result=0x7fffec0013d8) at templates.c:1261
#10 0x00007ffff3e3cf6b in afamqp_worker_publish (self=0x6d2480, msg=0x61f010) at afamqp.c:432
#11 0x00007ffff3e3da12 in afamqp_worker_insert (arg=0x6d2480) at afamqp.c:474
#12 afamqp_worker_thread (arg=0x6d2480) at afamqp.c:535
#13 0x00007ffff7b8e18b in worker_thread_func (st=0x616600) at misc.c:580
#14 0x00007ffff70a2004 in ?? () from /lib64/libglib-2.0.so.0
#15 0x00007ffff67f6851 in start_thread () from /lib64/libpthread.so.0
#16 0x00007ffff654490d in clone () from /lib64/libc.so.6
(gdb)On Sun, Oct 20, 2013 at 10:43 AM, Balazs Scheidler <bazsi77@gmail.com> wrote:
The problem with these backtraces is that it shows the main thread, and syslog-ng is probably spinning somewhere else.
Can you check which thread is spinning (using top -L iirc) and then change to the appropriate thread using the thread command in gdb?
Alternatively "thread apply backtrace" should display backtrace of all threads.
But itd be better to locate the spinning thread first.
Thanks in advance.
On Oct 19, 2013 10:36 PM, "Alexandre Biancalana" <biancalana@gmail.com> wrote:Compilling with CFLAGS="-ggdb3 -O0" does not reproduce the issue...Follow the snapshots Balazs asked for:
Advancing patterndb current time because of timer tick; utc='1382204151'
^C
Program received signal SIGINT, Interrupt.
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install glib2-2.22.5-7.el6.x86_64 glibc-2.12-1.107.el6.x86_64 json-c-0.10-2.el6.x86_64 keyutils-libs-1.4-4.el6.x86_64 krb5-libs-1.10.3-10.el6.x86_64 libcom_err-1.41.12-14.el6.x86_64 libnet-1.1.5-1.el6.x86_64 libselinux-2.0.94-5.3.el6.x86_64 openssl-1.0.0-27.el6.x86_64 pcre-7.8-6.el6.x86_64 zlib-1.2.3-29.el6.x86_64
(gdb) bt
#0 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
#1 0x00007ffff7baaecd in iv_fd_epoll_poll (st=0x605df0, active=0x7fffffffe3c0, to=0x7fffffffe400) at iv_fd_epoll.c:131
#2 0x00007ffff7ba9837 in iv_fd_poll_and_run (st=0x605df0, to=0x7fffffffe400) at iv_fd.c:147
#3 0x00007ffff7baa054 in iv_main () at iv_main_posix.c:117
#4 0x00007ffff7b8c517 in main_loop_run () at mainloop.c:736
#5 0x0000000000401641 in main (argc=1, argv=0x7fffffffe548) at main.c:267
(gdb) c
Continuing.
Advancing patterndb current time because of timer tick; utc='1382204154'
Advancing patterndb current time because of timer tick; utc='1382204155'
Advancing patterndb current time because of timer tick; utc='1382204156'
^C
Program received signal SIGINT, Interrupt.
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
(gdb) bt
#0 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
#1 0x00007ffff7baaecd in iv_fd_epoll_poll (st=0x605df0, active=0x7fffffffe3c0, to=0x7fffffffe400) at iv_fd_epoll.c:131
#2 0x00007ffff7ba9837 in iv_fd_poll_and_run (st=0x605df0, to=0x7fffffffe400) at iv_fd.c:147
#3 0x00007ffff7baa054 in iv_main () at iv_main_posix.c:117
#4 0x00007ffff7b8c517 in main_loop_run () at mainloop.c:736
#5 0x0000000000401641 in main (argc=1, argv=0x7fffffffe548) at main.c:267
(gdb) c
Continuing.
Advancing patterndb current time because of timer tick; utc='1382204161'
Advancing patterndb current time because of timer tick; utc='1382204162'
Advancing patterndb current time because of timer tick; utc='1382204163'
Advancing patterndb current time because of timer tick; utc='1382204164'
^C
Program received signal SIGINT, Interrupt.
0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
(gdb) bt
#0 0x00007ffff6544f03 in epoll_wait () from /lib64/libc.so.6
#1 0x00007ffff7baaecd in iv_fd_epoll_poll (st=0x605df0, active=0x7fffffffe3c0, to=0x7fffffffe400) at iv_fd_epoll.c:131
#2 0x00007ffff7ba9837 in iv_fd_poll_and_run (st=0x605df0, to=0x7fffffffe400) at iv_fd.c:147
#3 0x00007ffff7baa054 in iv_main () at iv_main_posix.c:117
#4 0x00007ffff7b8c517 in main_loop_run () at mainloop.c:736
#5 0x0000000000401641 in main (argc=1, argv=0x7fffffffe548) at main.c:267
(gdb) c
Continuing.
On Sat, Oct 19, 2013 at 3:41 AM, Balazs Scheidler <bazsi77@gmail.com> wrote:
Can you make several snapshots? I mean let it run, break, backtrace and repeat this a number of times?
This backtrace shows that format-json calls the value pairs subsystem to iterate name value pairs. The question is where the infinite loop sits, that's why I would need to look at the backtrace a number of times.
Thanks in advance
On Oct 18, 2013 4:06 PM, "Alexandre Biancalana" <biancalana@gmail.com> wrote:
On Fri, Oct 18, 2013 at 6:01 AM, Balazs Scheidler <bazsi77@gmail.com> wrote:
I remember a bug in that caused similar symptoms when the output template produced zero length strings.
That has been fixed though. But can you try that regardless?
Can you perhaps use gdb to check where it is spinning?
If not, I would need to reproduce it.
It only happens when I parse log using patterndb mentioned earlier with format_json --scope nv-pairs.Follow the gdb backtrace, let me know if you need anything else.
Program received signal SIGINT, Interrupt.
g_trash_stack_height (name=<value optimized out>, value=0x620d70 "10228", user_data=0x7fffffffe200) at /usr/include/glib-2.0/glib/gutils.h:413
413 i++;
(gdb) bt
#0 g_trash_stack_height (name=<value optimized out>, value=0x620d70 "10228", user_data=0x7fffffffe200) at /usr/include/glib-2.0/glib/gutils.h:413
#1 vp_walker_name_split (name=<value optimized out>, value=0x620d70 "10228", user_data=0x7fffffffe200) at value-pairs.c:455
#2 value_pairs_walker (name=<value optimized out>, value=0x620d70 "10228", user_data=0x7fffffffe200) at value-pairs.c:494
#3 0x00007ffff70a5876 in g_tree_foreach () from /lib64/libglib-2.0.so.0
#4 0x00007ffff7b9ae86 in value_pairs_foreach_sorted (vp=0x6d7590, func=0x7ffff7b9a840 <value_pairs_walker>, compare_func=<value optimized out>, msg=0x61f010, seq_num=0, user_data=0x7fffffffe200)
at value-pairs.c:329
#5 0x00007ffff7b9afbf in value_pairs_walk (vp=0x6d7590, obj_start_func=<value optimized out>, process_value_func=<value optimized out>, obj_end_func=<value optimized out>, msg=0x61f010, seq_num=0,
user_data=0x7fffffffe270) at value-pairs.c:534
#6 0x00007ffff3e5bca2 in tf_json_append (self=<value optimized out>, s=0x6d20c0, args=0x7fffffffe320, result=0x636c80) at format-json.c:193
#7 tf_json_call (self=<value optimized out>, s=0x6d20c0, args=0x7fffffffe320, result=0x636c80) at format-json.c:206
#8 0x00007ffff7b98653 in log_template_append_format_with_context (self=0x6d8130, messages=0x7fffffffe3b8, num_messages=1, opts=0x6d7fa0, tz=1, seq_num=0, context_id=0x0, result=0x636c80) at templates.c:1234
#9 0x00007ffff7b988ff in log_template_append_format (self=<value optimized out>, lm=0x61f010, opts=<value optimized out>, tz=<value optimized out>, seq_num=<value optimized out>,
context_id=<value optimized out>, result=0x636c80) at templates.c:1261
#10 0x00007ffff7b8b87c in log_writer_format_log (self=0x619740, lm=0x61f010, result=0x636c80) at logwriter.c:862
#11 0x00007ffff7b8c310 in log_writer_flush (self=0x619740, flush_mode=LW_FLUSH_NORMAL) at logwriter.c:1005
#12 0x00007ffff7b8c48d in log_writer_work_perform (s=0x619740) at logwriter.c:129
#13 0x00007ffff7b8c4f8 in log_writer_io_flush_output (s=0x619740) at logwriter.c:209
#14 0x00007ffff7ba82ea in iv_run_tasks (st=0x605ba0) at iv_task.c:48
#15 0x00007ffff7baa50c in iv_main () at iv_main_posix.c:106
#16 0x00007ffff7b8c9c7 in main_loop_run () at mainloop.c:736
#17 0x0000000000401641 in main ()
(gdb)
______________________________________________________________________________
Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng
FAQ: http://www.balabit.com/wiki/syslog-ng-faq
______________________________________________________________________________
Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng
FAQ: http://www.balabit.com/wiki/syslog-ng-faq
______________________________________________________________________________
Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng
FAQ: http://www.balabit.com/wiki/syslog-ng-faq
______________________________________________________________________________
Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng
FAQ: http://www.balabit.com/wiki/syslog-ng-faq
______________________________________________________________________________
Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng
FAQ: http://www.balabit.com/wiki/syslog-ng-faq