Hi Александр,

The problem is that the "answers" element of this entry is a list of dicts. Previously lists were not even properly supported. Recent versions of syslog-ng (that is very recent we merged quite a few PRs in this area, e.g. https://github.com/syslog-ng/syslog-ng/pull/3885) do support lists embedded in JSON objects, and turn them into a list of strings (in case they have simple types) or keep them a literal "JSON" typed value, which is reproduced verbatim.

I have configuration:

@version: 3.37

log {
        source { tcp(port(2000) flags(no-parse)); };
        parser { json-parser(prefix(".json.")); };
        destination { file("/tmp/json.out" template("$(format-flat-json --subkeys .json.)\n")); };
};

With a recent version of syslog-ng, the $(format-flat-json) would look like this:

{
  "timestamp-rfc3339": "2022-06-06T08:47:58.797332215Z",
  "response-port": "53",
  "response-ip": "192.168.yy.zz",
  "rcode": "NOERROR",
  "query-port": "51000",
  "query-ip": "192.168.xx.zz",
  "qtype": "TXT",
  "qname": "_dnsaddr.bootstrap.libp2p.io",
  "protocol": "TCP",
  "operation": "CLIENT_RESPONSE",
  "length": "691",
  "latency": "0.000000",
  "identity": "ns-server.example.com",
  "family": "INET",
  "country-isocode": "-",
  "answers": "[{\"name\":\"_dnsaddr.bootstrap.libp2p.io\",\"rdatatype\":\"TXT\",\"ttl\":600,\"rdata\":\"dnsaddr=\\/dnsaddr\\/ams-2.bootstrap.libp2p.io\\/p2p\\/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb\"},{\"name\":\"_dnsaddr.bootstrap.libp2p.io\",\"rdatatype\":\"TXT\",\"ttl\":600,\"rdata\":\"dnsaddr=\\/dnsaddr\\/sjc-1.bootstrap.libp2p.io\\/p2p\\/QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN\"},{\"name\":\"_dnsaddr.bootstrap.libp2p.io\",\"rdatatype\":\"TXT\",\"ttl\":600,\"rdata\":\"dnsaddr=\\/dnsaddr\\/ewr-1.bootstrap.libp2p.io\\/p2p\\/QmQCU2EcMqAqQPR2i9bChDtGNJchTbq5TbXJJ16u19uLTa\"},{\"name\":\"_dnsaddr.bootstrap.libp2p.io\",\"rdatatype\":\"TXT\",\"ttl\":600,\"rdata\":\"dnsaddr=\\/dnsaddr\\/ams-rust.bootstrap.libp2p.io\\/p2p\\/12D3KooWEZXjE41uU4EL2gpkAQeDXYok6wghN7wwNVPF5bwkaNfS\"},{\"name\":\"_dnsaddr.bootstrap.libp2p.io\",\"rdatatype\":\"TXT\",\"ttl\":600,\"rdata\":\"dnsaddr=\\/dnsaddr\\/sjc-2.bootstrap.libp2p.io\\/p2p\\/QmZa1sAxajnQjVM8WjWXoMbmPd7NsWhfKsPkErzpm9wGkp\"},{\"name\":\"_dnsaddr.bootstrap.libp2p.io\",\"rdatatype\":\"TXT\",\"ttl\":600,\"rdata\":\"dnsaddr=\\/dnsaddr\\/nrt-1.bootstrap.libp2p.io\\/p2p\\/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt\"}]"
}

"anwers" is now a string, containing a JSON expression that contains a list of objects.

If I set the syslog-ng config version to 4.0, I get the new typing behaviour, which means that ${.json.answers} is now a JSON literal, so the output becomes this:

{
  "timestamp-rfc3339": "2022-06-06T08:47:58.797332215Z",
  "response-port": "53",
  "response-ip": "192.168.yy.zz",
  "rcode": "NOERROR",
  "query-port": "51000",
  "query-ip": "192.168.xx.zz",
  "qtype": "TXT",
  "qname": "_dnsaddr.bootstrap.libp2p.io",
  "protocol": "TCP",
  "operation": "CLIENT_RESPONSE",
  "length": 691,
  "latency": "0.000000",
  "identity": "ns-server.example.com",
  "family": "INET",
  "country-isocode": "-",
  "answers": [
    {
      "name": "_dnsaddr.bootstrap.libp2p.io",
      "rdatatype": "TXT",
      "ttl": 600,
      "rdata": "dnsaddr=/dnsaddr/ams-2.bootstrap.libp2p.io/p2p/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb"
    },
    {
      "name": "_dnsaddr.bootstrap.libp2p.io",
      "rdatatype": "TXT",
      "ttl": 600,
      "rdata": "dnsaddr=/dnsaddr/sjc-1.bootstrap.libp2p.io/p2p/QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN"
    },
    {
      "name": "_dnsaddr.bootstrap.libp2p.io",
      "rdatatype": "TXT",
      "ttl": 600,
      "rdata": "dnsaddr=/dnsaddr/ewr-1.bootstrap.libp2p.io/p2p/QmQCU2EcMqAqQPR2i9bChDtGNJchTbq5TbXJJ16u19uLTa"
    },
    {
      "name": "_dnsaddr.bootstrap.libp2p.io",
      "rdatatype": "TXT",
      "ttl": 600,
      "rdata": "dnsaddr=/dnsaddr/ams-rust.bootstrap.libp2p.io/p2p/12D3KooWEZXjE41uU4EL2gpkAQeDXYok6wghN7wwNVPF5bwkaNfS"
    },
    {
      "name": "_dnsaddr.bootstrap.libp2p.io",
      "rdatatype": "TXT",
      "ttl": 600,
      "rdata": "dnsaddr=/dnsaddr/sjc-2.bootstrap.libp2p.io/p2p/QmZa1sAxajnQjVM8WjWXoMbmPd7NsWhfKsPkErzpm9wGkp"
    },
    {
      "name": "_dnsaddr.bootstrap.libp2p.io",
      "rdatatype": "TXT",
      "ttl": 600,
      "rdata": "dnsaddr=/dnsaddr/nrt-1.bootstrap.libp2p.io/p2p/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt"
    }
  ]
}

e.g. at least "answers" becomes an array and not a string. That's only slightly better. Let me parse the embedded list a second time. I am adding this parser to the config:

@version: 4.0

log {
        source { tcp(port(2000) flags(no-parse)); };

        parser { json-parser(prefix(".json.")); };

        parser { json-parser(prefix(".json.answers") template("${.json.answers}")); };

        destination { file("/tmp/json.out" template("$(format-flat-json --subkeys .json.)\n")); };
};

The 2nd parser finds that the input to be parsed is a list. The new syslog-ng 4.0 behaviour is to parse elements into $1, $2, etc. This is the trace output of the 2nd JSON parser:

[2022-06-10T18:20:12.927168] Setting value; name='1', value='{"name":"_dnsaddr.bootstrap.libp2p.io","rdatatype":"TXT","ttl":600,"rdata":"dnsaddr=\/dnsaddr\/ams-2.bootstrap.libp2p.io\/p2p\/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb"}', type='json', msg='0x7ffff0014190', rcptid='102'
[2022-06-10T18:20:12.927179] Setting value; name='2', value='{"name":"_dnsaddr.bootstrap.libp2p.io","rdatatype":"TXT","ttl":600,"rdata":"dnsaddr=\/dnsaddr\/sjc-1.bootstrap.libp2p.io\/p2p\/QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN"}', type='json', msg='0x7ffff0014190', rcptid='102'
[2022-06-10T18:20:12.927190] Setting value; name='3', value='{"name":"_dnsaddr.bootstrap.libp2p.io","rdatatype":"TXT","ttl":600,"rdata":"dnsaddr=\/dnsaddr\/ewr-1.bootstrap.libp2p.io\/p2p\/QmQCU2EcMqAqQPR2i9bChDtGNJchTbq5TbXJJ16u19uLTa"}', type='json', msg='0x7ffff0014190', rcptid='102'
[2022-06-10T18:20:12.927199] Setting value; name='4', value='{"name":"_dnsaddr.bootstrap.libp2p.io","rdatatype":"TXT","ttl":600,"rdata":"dnsaddr=\/dnsaddr\/ams-rust.bootstrap.libp2p.io\/p2p\/12D3KooWEZXjE41uU4EL2gpkAQeDXYok6wghN7wwNVPF5bwkaNfS"}', type='json', msg='0x7ffff0014190', rcptid='102'
[2022-06-10T18:20:12.927207] Setting value; name='5', value='{"name":"_dnsaddr.bootstrap.libp2p.io","rdatatype":"TXT","ttl":600,"rdata":"dnsaddr=\/dnsaddr\/sjc-2.bootstrap.libp2p.io\/p2p\/QmZa1sAxajnQjVM8WjWXoMbmPd7NsWhfKsPkErzpm9wGkp"}', type='json', msg='0x7ffff0014190', rcptid='102'
[2022-06-10T18:20:12.927216] Setting value; name='6', value='{"name":"_dnsaddr.bootstrap.libp2p.io","rdatatype":"TXT","ttl":600,"rdata":"dnsaddr=\/dnsaddr\/nrt-1.bootstrap.libp2p.io\/p2p\/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt"}', type='json', msg='0x7ffff0014190', rcptid='102'

e.g. $1 would be the first element of the JSON array, $2 being the 2nd and so on. We can turn these matches into a syslog-ng list, using the special macro "$*" and manipulate it using the list related template functions $(list-*).

We can reparse these elements back into the original message using a 3rd invocation of the json-parser.

Config:
@version: 4.0

log {
        source { tcp(port(2000) flags(no-parse)); };

        parser { json-parser(prefix(".json.")); };

parser { json-parser(prefix(".json.answers") template("${.json.answers}")); };

parser { json-parser(prefix(".json.answers_0_") template("$1")); };

        destination { file("/tmp/json.out" template("$(format-flat-json --subkeys .json.)\n")); };
};


Trace:
[2022-06-10T18:30:42.820943] Setting value; name='.json.answers_0_name', value='_dnsaddr.bootstrap.libp2p.io', type='string', msg='0x7ffff0014190', rcptid='104'
[2022-06-10T18:30:42.820970] Setting value; name='.json.answers_0_rdatatype', value='TXT', type='string', msg='0x7ffff0014190', rcptid='104'
[2022-06-10T18:30:42.820994] Setting value; name='.json.answers_0_ttl', value='600', type='int64', msg='0x7ffff0014190', rcptid='104'
[2022-06-10T18:30:42.821021] Setting value; name='.json.answers_0_rdata', value='dnsaddr=/dnsaddr/ams-2.bootstrap.libp2p.io/p2p/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb', type='string', msg='0x7ffff0014190', rcptid='104'

Getting somewhere. You can do this for each of your elements. The only issue is that you can't loop over the array. Yet.

So if you know there's a limited number of these elements, you can do this, by checking if $N is set, and do this parsing if it is.

I am afraid that's what we have at the moment. It is probably faster than doing it all in Python, but hey syslog-ng is not a programming language, is it? :)

I am giving a thought how we could do some level of iteration, but I don't have a very good idea at the moment.

Hope this helps,
Balazs

On Fri, Jun 10, 2022 at 5:07 PM Александр Масленников <alexander.a.maslennikov@gmail.com> wrote:
i'm not sure that *format-flat-json* works with lists same as with nested dicts.
My example uses format-flat-json, but without an additional filter on pothon, I was unable to flatten it
There is original message

{
	"operation": "CLIENT_RESPONSE",
	"identity": "ns-server.example.com",
	"family": "INET",
	"protocol": "TCP",
	"query-ip": "192.168.xx.zz",
	"query-port": "51000",
	"response-ip": "192.168.yy.zz",
	"response-port": "53",
	"length": 691,
	"rcode": "NOERROR",
	"qname": "_dnsaddr.bootstrap.libp2p.io",
	"qtype": "TXT",
	"latency": "0.000000",
	"timestamp-rfc3339": "2022-06-06T08:47:58.797332215Z",
	"answers": [{
		"name": "_dnsaddr.bootstrap.libp2p.io",
		"rdatatype": "TXT",
		"ttl": 600,
		"rdata": "dnsaddr=/dnsaddr/ams-2.bootstrap.libp2p.io/p2p/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb"
	}, {
		"name": "_dnsaddr.bootstrap.libp2p.io",
		"rdatatype": "TXT",
		"ttl": 600,
		"rdata": "dnsaddr=/dnsaddr/sjc-1.bootstrap.libp2p.io/p2p/QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN"
	}, {
		"name": "_dnsaddr.bootstrap.libp2p.io",
		"rdatatype": "TXT",
		"ttl": 600,
		"rdata": "dnsaddr=/dnsaddr/ewr-1.bootstrap.libp2p.io/p2p/QmQCU2EcMqAqQPR2i9bChDtGNJchTbq5TbXJJ16u19uLTa"
	}, {
		"name": "_dnsaddr.bootstrap.libp2p.io",
		"rdatatype": "TXT",
		"ttl": 600,
		"rdata": "dnsaddr=/dnsaddr/ams-rust.bootstrap.libp2p.io/p2p/12D3KooWEZXjE41uU4EL2gpkAQeDXYok6wghN7wwNVPF5bwkaNfS"
	}, {
		"name": "_dnsaddr.bootstrap.libp2p.io",
		"rdatatype": "TXT",
		"ttl": 600,
		"rdata": "dnsaddr=/dnsaddr/sjc-2.bootstrap.libp2p.io/p2p/QmZa1sAxajnQjVM8WjWXoMbmPd7NsWhfKsPkErzpm9wGkp"
	}, {
		"name": "_dnsaddr.bootstrap.libp2p.io",
		"rdatatype": "TXT",
		"ttl": 600,
		"rdata": "dnsaddr=/dnsaddr/nrt-1.bootstrap.libp2p.io/p2p/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt"
	}],
	"country-isocode": "-"
}
I would be very grateful to you if you have a solution using the built-in functions of syslog-ng.

пт, 10 июн. 2022 г. в 15:00, <syslog-ng-request@lists.balabit.hu>:
Send syslog-ng mailing list submissions to
        syslog-ng@lists.balabit.hu

To subscribe or unsubscribe via the World Wide Web, visit
        https://lists.balabit.hu/mailman/listinfo/syslog-ng
or, via email, send a message with subject or body 'help' to
        syslog-ng-request@lists.balabit.hu

You can reach the person managing the list at
        syslog-ng-owner@lists.balabit.hu

When replying, please edit your Subject line so it is more specific
than "Re: Contents of syslog-ng digest..."


Today's Topics:

   1.  need help with parser to make flat nested json list of
      dictionaries (????????? ???????????)
   2. Re:  need help with parser to make flat nested json list of
      dictionaries (Peter Kokai (pkokai))


----------------------------------------------------------------------

Message: 1
Date: Fri, 10 Jun 2022 11:02:55 +0300
From: ????????? ???????????  <alexander.a.maslennikov@gmail.com>
To: syslog-ng@lists.balabit.hu
Subject: [syslog-ng] need help with parser to make flat nested json
        list of dictionaries
Message-ID:
        <CA+G0nAjp1b6_50LbCROVPje1_B4R_AzNYiZ-_dT0m=fXcqwmHA@mail.gmail.com>
Content-Type: text/plain; charset="utf-8"

hi all
i have a json message that contains a nested json list of dicts

{"a":1,"b":[{"c":1},{"c":2},{"c":3}]}

i want to flat that message, so expected result looks like {
"a": 1,
"b_0_c": 1,
"b_1_c": 2,
"b_2_c": 3
}

My approach is a python implemented parser.
Is it possible to achieve the same result using the built-in syslog-ng
tools?
My solution below

@define kafka-implementation kafka-c

python {

import collections
import json

class FlattenedJson(object):

    def parse(self, log_message, flat_message=None):
        def flatten(d, parent_key='', sep='_'):
            items = []
            for k, v in d.items():
                new_key = parent_key + sep + k if parent_key else k
                if isinstance(v, collections.MutableMapping):
                    items.extend(flatten(v, new_key, sep=sep).items())
                elif isinstance(v, list):
                    for idx, value in enumerate(v):
                        items.extend(flatten(value, new_key + sep +
str(idx), sep).items())
                else:
                    items.append((new_key, v))
            return dict(items)
        try:
            decoded_msg = json.loads(log_message['MESSAGE'].decode('utf-8'))
            flat_message = flatten(decoded_msg)
            final_message =
str(json.dumps(flat_message)).encode(encoding='utf-8')
            log_message['MESSAGE'] = final_message
        except Exception as error:
            log_message['python_error'] = 'An exception occurred:
{}'.format(error)
        return True
};

destination d_kafka_dnstap {
  kafka(
    topic("mytopic")
    bootstrap-servers("localhost:9092")
    message("$(format-flat-json  --scope all-nv-pairs
application_name=myapp @timestamp=${ISODATE} )")
  );
};

source s_net_dnstap { network( transport(udp) port(514) flags(no-parse) ); };

parser p_dnstap { channel {
    parser { python(class("FlattenedJson")); };
    parser { json-parser(prefix("dnstap.")); };
  };
};

log { source(s_net_dnstap); parser(p_dnstap); destination(d_kafka_dnstap); };
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.balabit.hu/pipermail/syslog-ng/attachments/20220610/0fb59c2a/attachment-0001.htm>

------------------------------

Message: 2
Date: Fri, 10 Jun 2022 08:09:24 +0000
From: "Peter Kokai (pkokai)" <Peter.Kokai@oneidentity.com>
To: "syslog-ng@lists.balabit.hu" <syslog-ng@lists.balabit.hu>
Subject: Re: [syslog-ng] need help with parser to make flat nested
        json list of dictionaries
Message-ID:
        <SA1PR19MB5641CF9A4E2AB1502C5AE348F8A69@SA1PR19MB5641.namprd19.prod.outlook.com>

Content-Type: text/plain; charset="koi8-r"

Hello,

If the underlines are not a must in the key, yes you can use *format-flat-json* (it uses dot instead of underscore). It uses the same syntax as format-json.

--
Kokan

________________________________________
From: syslog-ng <syslog-ng-bounces@lists.balabit.hu> on behalf of ????????? ??????????? <alexander.a.maslennikov@gmail.com>
Sent: 10 June 2022 10:02
To: syslog-ng@lists.balabit.hu
Subject: [syslog-ng] need help with parser to make flat nested json list of dictionaries

CAUTION: This email originated from outside of the organization. Do not follow guidance, click links, or open attachments unless you recognize the sender and know the content is safe.

hi all
i have a json message that contains a nested json list of dicts

{"a":1,"b":[{"c":1},{"c":2},{"c":3}]}

i want to flat that message, so expected result looks like {
"a": 1,
"b_0_c": 1,
"b_1_c": 2,
"b_2_c": 3
}

My approach is a python implemented parser.
Is it possible to achieve the same result using the built-in syslog-ng tools?
My solution below


@define kafka-implementation kafka-c

python {

import collections
import json

class FlattenedJson(object):

    def parse(self, log_message, flat_message=None):
        def flatten(d, parent_key='', sep='_'):
            items = []
            for k, v in d.items():
                new_key = parent_key + sep + k if parent_key else k
                if isinstance(v, collections.MutableMapping):
                    items.extend(flatten(v, new_key, sep=sep).items())
                elif isinstance(v, list):
                    for idx, value in enumerate(v):
                        items.extend(flatten(value, new_key + sep + str(idx), sep).items())
                else:
                    items.append((new_key, v))
            return dict(items)
        try:
            decoded_msg = json.loads(log_message['MESSAGE'].decode('utf-8'))
            flat_message = flatten(decoded_msg)
            final_message = str(json.dumps(flat_message)).encode(encoding='utf-8')
            log_message['MESSAGE'] = final_message
        except Exception as error:
            log_message['python_error'] = 'An exception occurred: {}'.format(error)
        return True
};

destination d_kafka_dnstap {
  kafka(
    topic("mytopic")
    bootstrap-servers("localhost:9092")
    message("$(format-flat-json  --scope all-nv-pairs application_name=myapp @timestamp=${ISODATE} )")
  );
};

source s_net_dnstap { network( transport(udp) port(514) flags(no-parse) ); };

parser p_dnstap { channel {
    parser { python(class("FlattenedJson")); };
    parser { json-parser(prefix("dnstap.")); };
  };
};

log { source(s_net_dnstap); parser(p_dnstap); destination(d_kafka_dnstap); };


------------------------------

Subject: Digest Footer

_______________________________________________
syslog-ng maillist  -  syslog-ng@lists.balabit.hu
https://lists.balabit.hu/mailman/listinfo/syslog-ng


------------------------------

End of syslog-ng Digest, Vol 206, Issue 2
*****************************************
______________________________________________________________________________
Member info: https://lists.balabit.hu/mailman/listinfo/syslog-ng
Documentation: http://www.balabit.com/support/documentation/?product=syslog-ng
FAQ: http://www.balabit.com/wiki/syslog-ng-faq



--
Bazsi