You are missing the portion of parser code that takes the incoming log and parses the JSON. All logs are initially just text strings, in order to manipulate them you need to do some work to that text.
For JSON you can use the syntax noted here: https://cloud.google.com/chronicle/docs/reference/parser-syntax#extract_json_formatted_logs
Please make sure you use the option to split columns as that is needed to have the for loop work.
json {
source => "message"
array_function => "split_columns"
}
I just tested your code with the addition of that filter and a statedump at the end
filter {
json {
source => "message"
array_function => "split_columns"
}
for index, data in records {
if index == 0 {
mutate {
replace => {
"target.hostname" => "%{data.hostname}"
}
}
}
else {
mutate {
replace => {
"intermediary.hostname" => "%{data.hostname}"
}
}
mutate {
merge => {
"event.idm.readonly_udm.intermediary" => "intermediary"
}
}
}
}
statedump {}
}
I received the following as output:
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708007453
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host2"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host2"
},
{
"hostname": "host2"
}
]
}
}
},
"index": 2,
"intermediary": {
"hostname": "host2"
},
"iter": {
"records-6": -1
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
@Dimarskythank you so much for the breakdown. I followed the process you outlined and i was able to view the output in statedump.
However, based off the parser code, I was expecting the following output, which doesn't seem to match with the statedump though. what am I missing here.
{
"records": [
{
"hostname": "host",
"target": {
"hostname": "host"
}
},
{
"hostname": "host1",
"intermediary": {
"hostname": "host1"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": "intermediary"
}
}
}
},
{
"hostname": "host2",
"intermediary": {
"hostname": "host2"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": "intermediary"
}
}
}
}
]
}
i added the statedump{} inside the for loop:
filter {
json {
source => "message"
array_function => "split_columns"
}
for index, data in records {
if index == 0 {
mutate {
replace => { "target.hostname" => "%{data.hostname}" }
}
}
else {
mutate {
replace => { "intermediary.hostname" => "%{data.hostname}" }
}
mutate {
merge => { "event.idm.readonly_udm.intermediary" => "intermediary" }
}
}
statedump {}
}
}
which yielded the following output:
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708075176
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host"
},
"index": 0,
"iter": {
"records-7": 0
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}\\n",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708075176
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host1"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host1"
}
]
}
}
},
"index": 1,
"intermediary": {
"hostname": "host1"
},
"iter": {
"records-7": 1
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}\\n",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708075176
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host2"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host2"
},
{
"hostname": "host2"
}
]
}
}
},
"index": 2,
"intermediary": {
"hostname": "host2"
},
"iter": {
"records-7": 2
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}\\n",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
. But why is target.hostname showing in both index 1 and index 2, whereas per the code, it should show only in the element in index 0.
Also, the following is not showing in index 2:
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host2"
},
{
"hostname": "host2"
}
]
}
}
},
i added the statedump{} inside the for loop:
filter {
json {
source => "message"
array_function => "split_columns"
}
for index, data in records {
if index == 0 {
mutate {
replace => { "target.hostname" => "%{data.hostname}" }
}
}
else {
mutate {
replace => { "intermediary.hostname" => "%{data.hostname}" }
}
mutate {
merge => { "event.idm.readonly_udm.intermediary" => "intermediary" }
}
}
statedump {}
}
}
which yielded the following output:
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708075176
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host"
},
"index": 0,
"iter": {
"records-7": 0
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}\\n",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708075176
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host1"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host1"
}
]
}
}
},
"index": 1,
"intermediary": {
"hostname": "host1"
},
"iter": {
"records-7": 1
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}\\n",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708075176
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host2"
},
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host2"
},
{
"hostname": "host2"
}
]
}
}
},
"index": 2,
"intermediary": {
"hostname": "host2"
},
"iter": {
"records-7": 2
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}\\n",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
},
"target": {
"hostname": "host"
}
}
. But why is target.hostname showing in both index 1 and index 2, whereas per the code, it should show only in the element in index 0.
Also, the following is not showing in index 2:
"event": {
"idm": {
"readonly_udm": {
"intermediary": [
{
"hostname": "host2"
},
{
"hostname": "host2"
}
]
}
}
},
I'm not sure I understand the question, but I will walk through what I understand your code to be doing and why the statedumps you shared are exactly what I would expect.
- Parse the JSON string into a JSON like object structure
- start a loop to iterate over the records key/placeholder
- if this is the first iteration of the loop (index == 0) take the value currently contained in data.hostname (meaning the first array member of records) and place it in the newly created placeholder target.hostname.
- On the second iteration of the loop we go into the else statement. We take the value contained in data.hostname this time and place it into the intermediary.hostname placeholder
- We then merge the intermediary placeholder into event.idm.readonly_udm.intermediary
- Keep in mind, the original target.hostname value is still there because after placing it there in the first loop iteration nothing further was done to it
- On the third iteration of the loop we go into the else statement again. We take the value contained in data.hostname (now the third array member of records) and place it into the intermediary.hostname placeholder
- Just like before we merge the current value in intermediary into event.idm.readonly_udm.intermediary
This leaves us with an internal state of:
- The original JSON structure contained in the message that was parsed out via the JSON command
- A target placeholder with a subkey of hostname that contains the hostname value in the first array member of the input data
- An event placeholder with a subkey of idm, followed by readolny_udm, followed by intermediary, which is an array of two hostname keys with a value attached to each of them (the second and third array members of the original data)
- Some stray placeholders like index, intermediary, message, and others that were left over from processing the data.
There are a few things that I believe you're missing.
You should probably merge the target.hostname into the event placeholder the same way you're merging the intermediary.hostname
readonly_udm should actually be read_only_udm
You will need to add some other mandatory fields that are required before the event output will actually work.
Once you're done with everything else you want to do you need to a merge command to merge the event placeholder into output as seen here: https://cloud.google.com/chronicle/docs/reference/parser-syntax#output_data_to_a_udm_record
I noticed one more thing as I reviewed my response. It appears that the merge command is doing a merge by reference. That is why when you look at the final output you'll see both of the intermediary hostnames show as host2 instead of one showing host1 and the other showing host2.
I'm including the code with a remove_field command to fix this issue (as well as a merge of the target.hostname field into the event).
filter {
json {
source => "message"
array_function => "split_columns"
}
for index, data in records {
if index == 0 {
mutate {
replace => {
"target.hostname" => "%{data.hostname}"
}
}
mutate {
merge => {
"event.idm.read_only_udm.target" => "target"
}
}
mutate {
remove_field => ["target"]
}
}
else {
mutate {
replace => {
"intermediary.hostname" => "%{data.hostname}"
}
}
mutate {
merge => {
"event.idm.read_only_udm.intermediary" => "intermediary"
}
}
mutate {
remove_field => ["intermediary"]
}
}
}
statedump {}
}
And here is the output of that statedump
Internal State (label=):
{
"@createTimestamp": {
"nanos": 0,
"seconds": 1708093490
},
"@enableCbnForLoop": true,
"@onErrorCount": 0,
"@output": [],
"@timezone": "",
"data": {
"hostname": "host2"
},
"event": {
"idm": {
"read_only_udm": {
"intermediary": [
{
"hostname": "host1"
},
{
"hostname": "host2"
}
],
"target": [
{
"hostname": "host"
}
]
}
}
},
"index": 2,
"iter": {
"records-7": -1
},
"message": "{\\n \\"records\\": [\\n {\\n \\"hostname\\": \\"host\\"\\n },\\n {\\n \\"hostname\\": \\"host1\\"\\n },\\n {\\n \\"hostname\\": \\"host2\\"\\n }\\n ]\\n}",
"records": {
"0": {
"hostname": "host"
},
"1": {
"hostname": "host1"
},
"2": {
"hostname": "host2"
}
}
}
Hopefully this makes sense.
@Dimarsky thank you v much for your time and attention to this. I think i have understood quite a few things based off your response.
I have a quick followup though. I didn't quite get the part about merge by reference, and how does the `remove_field` command help to fix this issue.
I won't pretend to have the coding skills to explain this properly. The quick version is that when you pass a reference you're passing the address of the data vs passing a value being passing the actual data.
If the original data later changes then whatever you passed a reference to will also change, while the things you passed a value to will remain unchanged.
This is something that hapens in many languages. Here is a link to one explanation, but you can find many more (including videos) by searching for something along the lines of "pass by reference vs pass by value"
https://medium.com/front-end-weekly/understanding-pass-by-value-and-pass-by-reference-in-javascript-8e2a0806b175
Thanks @Dimarsky .
But I guess what i meant to ask is how does the `remove_field` command help in resolving the merge by reference problem.
Thanks @Dimarsky .
But I guess what i meant to ask is how does the `remove_field` command help in resolving the merge by reference problem.
When you use remove_field to remove the intermediary placeholder at the end of the first loop (after you've merged the value into your event placeholder) then the only placeholder that references the value (host1) that had been stored there is the event placeholder.
On the next loop you create a new intermediary placeholder. This has the same name as the old one, but it is no longer pointing to the same memory address as the old one (now only referenced by event.idm.read_only_udm.intermediary[0].hostname). That means that the new value that is stored at the address it only impacts data stored in the new intermediary placeholder and leaves event.idm.read_only_udm.intermediary[0].hostname untouched.
Thank you very much for breaking it all down. 🙂