Skip to main content

Hi guys,
i have a problem with a custom parser on chronicle.

This is the log:

<158>Jul 10 10:23:02 test-lb[1362591]: 12.12.12.12:47934 [10/Jul/2024:10:23:02.545] test~ back/test 0/0/0/3/3 200 51745 - - ---- 59/17/1/1/0 0/0 "GET https://www.example.c HTTP/2.0"

And this is the custom parser:

 

# Product: HAPROXY
# Category:  Load balancing
# Supported Format: SYSLOG
# Reference: N/A
# Last Updated: 2024-07-22
 
 
filter {
  mutate {
    replace => {
  "process" => ""
  "pid" => ""
  "client_ip" => ""
  "client_port" => ""
  "timestamp" => ""
  "frontend_name" => ""
  "backend_name" => ""
  "server_name" => ""
  "time_request" => ""
  "time_queue" => ""
  "time_backend_connect" => ""
  "time_backend_response" => ""
  "time_duration" => ""
  "http_status_code" => ""
  "bytes_read" => ""
  "captured_request_cookie" => ""
  "captured_response_cookie" => ""
  "termination_state" => ""
  "actconn" => ""
  "feconn" => ""
  "beconn" => ""
  "srvconn" => ""
  "retries" => ""
  "srv_queue" => ""
  "backend_queue" => ""
  "http_verb" => ""
  "http_proto" => ""
  "http_host" => ""
  "http_request" => ""
  "http_version" => ""
    }
  } 
  grok {
    match => {
      "message" => [
  "<%{INT}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{GREEDYDATA:process}\\[%{INT:pid}\\]: %{IP:client_ip}:%{INT:client_port} \\[%{GREEDYDATA:timestamp}\\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}\\/%{NOTSPACE:server_name} %{INT:time_request}\\/%{INT:time_queue}\\/%{INT:time_backend_connect}\\/%{INT:time_backend_response}\\/%{NOTSPACE:time_duration} %{INT:http_status_code}%{GREEDYDATA}\\"%{WORD:http_verb} %{GREEDYDATA:http_request} HTTP/%{NUMBER:http_version}\\"",
       ]
    }
    overwrite => ["process","pid","client_ip","client_port","timestamp","frontend_name","backend_name","server_name","time_request","time_queue","time_backend_connect","time_backend_response","time_duration","http_status_code","bytes_read","captured_request_cookie","captured_response_cookie","termination_state","actconn","feconn","beconn","srvconn","retries","srv_queue","backend_queue","http_verb","http_proto","http_host","http_request","http_version"]
   # on_error => "match_error"
  }
  if [match_error] {
    drop{
      tag => "TAG_NO_SECURITY_VALUE"
    }
  }
 
  if [timestamp] != "" {
    date {
      match => ["timestamp", "yyyy MMM dd HH🇲🇲ss.SSS"]
      on_error => "time_stamp_wrong_format"
    }
  }
 
  mutate {
    replace => {
  "event.idm.read_only_udm.metadata.event_type" => "NETWORK_HTTP"
  "event.idm.read_only_udm.principal.ip" => "%{client_ip}"
  "event.idm.read_only_udm.principal.port" => "%{client_port}"
  "event.idm.read_only_udm.target.application" => "%{process}"
  "event.idm.read_only_udm.network.http.response_code" => "%{http_status_code}"
  "event.idm.read_only_udm.network.http.method" => "%{http_verb}"
  "event.idm.read_only_udm.target.url" => "%{http_request}"
  "event.idm.read_only_udm.target.hostname" => "%{frontend_name}"
  "event.idm.read_only_udm.target.process.pid" => "%{pid}"
  "event.idm.read_only_udm.network.application_protocol" => "HTTP"
  "event.idm.read_only_udm.metadata.product_version" => "HTTP/%{http_version}"
  "event_type" => "STATUS_UPDATE"
      "event.idm.read_only_udm.metadata.product_name" => "HAProxy"
      "event.idm.read_only_udm.metadata.vendor_name" => "HAProxy Enterprise"
    }
  }
 
  mutate {
    merge => {
      "@output" => "event"
    }
  }
}

-----------------------------------------

I tried the grok on "grok debugger" and it works fine.

I receive this validation error but i don't understand where is the error:

generic::unknown: pipeline.ParseLogEntry failed: LOG_PARSING_CBN_ERROR: "generic::internal: pipeline failed: filter grok (1) failed: failed to parse data: error parsing regexp: unexpected 😞 `(<((?:[+-]?(?:[0-9]+)))>(?P<syslog_timestamp>(\\\\b(?:Jan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|ä)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?)\\\\b) +((?:(?:001-9])|(?::12]]0-9])|(?:3301])||1-9])) (((^0-9]?)((?:220123]||01]??0-9])):((?::0-5]]0-9]))(?::((?:(?::0-5]??0-9]|60)(?:::.,]]0-9]+)?)))((^0-9]?))) (?P<process>.*))(?P<pid>(?::+-]?(?::0-9]+)))]: (?P<client_ip>(?:(((((0-9A-Fa-f]{1,4}:){7}((0-9A-Fa-f]{1,4}|:))|(((0-9A-Fa-f]{1,4}:){6}(::0-9A-Fa-f]{1,4}|((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3})|:))|(((0-9A-Fa-f]{1,4}:){5}(((::0-9A-Fa-f]{1,4}){1,2})|:((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3})|:))|(((0-9A-Fa-f]{1,4}:){4}(((::0-9A-Fa-f]{1,4}){1,3})|((::0-9A-Fa-f]{1,4})?:((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3}))|:))|(((0-9A-Fa-f]{1,4}:){3}(((::0-9A-Fa-f]{1,4}){1,4})|((::0-9A-Fa-f]{1,4}){0,2}:((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3}))|:))|(((0-9A-Fa-f]{1,4}:){2}(((::0-9A-Fa-f]{1,4}){1,5})|((::0-9A-Fa-f]{1,4}){0,3}:((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3}))|:))|(((0-9A-Fa-f]{1,4}:){1}(((::0-9A-Fa-f]{1,4}){1,6})|((::0-9A-Fa-f]{1,4}){0,4}:((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3}))|:))|(:(((::0-9A-Fa-f]{1,4}){1,7})|((::0-9A-Fa-f]{1,4}){0,5}:((2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)(\\\\.(2550-5]|220-4]\\\\d|1\\\\d\\\\d||1-9]?\\\\d)){3}))|:)))(%.+)?)|((?:(?:2550-5]|220-4]]0-9]||01]??0-9]]0-9]?)\\\\.){3}(?:2550-5]|220-4]]0-9]||01]??0-9]]0-9]?)))):(?P<client_port>(?::+-]?(?::0-9]+))) (?P<timestamp>.*)] (?P<frontend_name>\\\\S+) (?P<backend_name>\\\\S+)/(?P<server_name>\\\\S+) (?P<time_request>(?::+-]?(?::0-9]+)))/(?P<time_queue>(?::+-]?(?::0-9]+)))/(?P<time_backend_connect>(?::+-]?(?::0-9]+)))/(?P<time_backend_response>(?::+-]?(?::0-9]+)))/(?P<time_duration>\\\\S+) (?P<http_status_code>(?::+-]?(?::0-9]+)))(.*)\\"(?P<http_verb>\\\\b\\\\w+\\\\b) (?P<http_request>.*) HTTP/(?P<http_version>(?:(((+-]?(?::0-9]+(?:\\\\..0-9]+)?)|\\\\..0-9]+))))\\")`"

 

Can anyone help me solve this problem?

Thanks in advance.

Bye

Mario

 

Hi @ziobill ,
The first problems I noticed are ;
1. #on_error => "match_error" , you are using this tag in the next line so you need to un-comment this line.
2. Many of the "replace" statements should not be used with the repeated UDM fields like event.idm.read_only_udm.principal.ip , you would need to use "merge" for repeated fields.
3. There is a match error in your GROK pattern, if you removed all the statements after the grok match you will see in the output of statedump that "on_error" = True ;



# Product: HAPROXY

# Category:  Load balancing

# Supported Format: SYSLOG

# Reference: N/A

# Last Updated: 2024-07-22

 

 

filter {

  mutate {

    replace => {

  "process" => ""

  "pid" => ""

  "client_ip" => ""

  "client_port" => ""

  "timestamp" => ""

  "frontend_name" => ""

  "backend_name" => ""

  "server_name" => ""

  "time_request" => ""

  "time_queue" => ""

  "time_backend_connect" => ""

  "time_backend_response" => ""

  "time_duration" => ""

  "http_status_code" => ""

  "bytes_read" => ""

  "captured_request_cookie" => ""

  "captured_response_cookie" => ""

  "termination_state" => ""

  "actconn" => ""

  "feconn" => ""

  "beconn" => ""

  "srvconn" => ""

  "retries" => ""

  "srv_queue" => ""

  "backend_queue" => ""

  "http_verb" => ""

  "http_proto" => ""

  "http_host" => ""

  "http_request" => ""

  "http_version" => ""

    }

  }

  grok {

    match => {

      "message" => [

  "<%{INT}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{GREEDYDATA:process}\\[%{INT:pid}\\]: %{IP:client_ip}:%{INT:client_port} \\[%{GREEDYDATA:timestamp}\\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}\\/%{NOTSPACE:server_name} %{INT:time_request}\\/%{INT:time_queue}\\/%{INT:time_backend_connect}\\/%{INT:time_backend_response}\\/%{NOTSPACE:time_duration} %{INT:http_status_code}%{GREEDYDATA}\\"%{WORD:http_verb} %{GREEDYDATA:http_request} HTTP/%{NUMBER:http_version}\\"",

       ]

    }

    overwrite => ["process","pid","client_ip","client_port","timestamp","frontend_name","backend_name","server_name","time_request","time_queue","time_backend_connect","time_backend_response","time_duration","http_status_code","bytes_read","captured_request_cookie","captured_response_cookie","termination_state","actconn","feconn","beconn","srvconn","retries","srv_queue","backend_queue","http_verb","http_proto","http_host","http_request","http_version"]

    on_error => "match_error"

  }








  statedump {label => "3"}



 




}


 


An additional tip for GROKs within parsers, you always have to use a double \\ within a parser, so always use \\\\. 


Hi @ziobill ,
The first problems I noticed are ;
1. #on_error => "match_error" , you are using this tag in the next line so you need to un-comment this line.
2. Many of the "replace" statements should not be used with the repeated UDM fields like event.idm.read_only_udm.principal.ip , you would need to use "merge" for repeated fields.
3. There is a match error in your GROK pattern, if you removed all the statements after the grok match you will see in the output of statedump that "on_error" = True ;



# Product: HAPROXY

# Category:  Load balancing

# Supported Format: SYSLOG

# Reference: N/A

# Last Updated: 2024-07-22

 

 

filter {

  mutate {

    replace => {

  "process" => ""

  "pid" => ""

  "client_ip" => ""

  "client_port" => ""

  "timestamp" => ""

  "frontend_name" => ""

  "backend_name" => ""

  "server_name" => ""

  "time_request" => ""

  "time_queue" => ""

  "time_backend_connect" => ""

  "time_backend_response" => ""

  "time_duration" => ""

  "http_status_code" => ""

  "bytes_read" => ""

  "captured_request_cookie" => ""

  "captured_response_cookie" => ""

  "termination_state" => ""

  "actconn" => ""

  "feconn" => ""

  "beconn" => ""

  "srvconn" => ""

  "retries" => ""

  "srv_queue" => ""

  "backend_queue" => ""

  "http_verb" => ""

  "http_proto" => ""

  "http_host" => ""

  "http_request" => ""

  "http_version" => ""

    }

  }

  grok {

    match => {

      "message" => [

  "<%{INT}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{GREEDYDATA:process}\\[%{INT:pid}\\]: %{IP:client_ip}:%{INT:client_port} \\[%{GREEDYDATA:timestamp}\\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}\\/%{NOTSPACE:server_name} %{INT:time_request}\\/%{INT:time_queue}\\/%{INT:time_backend_connect}\\/%{INT:time_backend_response}\\/%{NOTSPACE:time_duration} %{INT:http_status_code}%{GREEDYDATA}\\"%{WORD:http_verb} %{GREEDYDATA:http_request} HTTP/%{NUMBER:http_version}\\"",

       ]

    }

    overwrite => ["process","pid","client_ip","client_port","timestamp","frontend_name","backend_name","server_name","time_request","time_queue","time_backend_connect","time_backend_response","time_duration","http_status_code","bytes_read","captured_request_cookie","captured_response_cookie","termination_state","actconn","feconn","beconn","srvconn","retries","srv_queue","backend_queue","http_verb","http_proto","http_host","http_request","http_version"]

    on_error => "match_error"

  }








  statedump {label => "3"}



 




}


 


This is my fix for the parser, please try this one.
In general ;
1. You did not escape the characters "[" and "]" properly, you would need to test the parsre in Chronicle SIEM parser after testing in GROK debugger as there are some differences.
2. Some event types will require some mandatory fields, you could test first with GENERIC_EVENT and then change to the desired type.
3. There were some duplicate mappings.
4. Principal IP is a repeated field.
5. Some UDM fields like port and status code must be converted to integers to be valid UDM fields.



filter {

  mutate {

    replace => {

  "syslog_timestamp" => ""

  "process" => ""

  "pid" => ""

  "client_ip" => ""

  "client_port" => ""

  "timestamp" => ""

  "frontend_name" => ""

  "backend_name" => ""

  "server_name" => ""

  "time_request" => ""

  "time_queue" => ""

  "time_backend_connect" => ""

  "time_backend_response" => ""

  "time_duration" => ""

  "http_status_code" => ""

  #"bytes_read" => ""

  #"captured_request_cookie" => ""

  #"captured_response_cookie" => ""

  #"termination_state" => ""

  #"actconn" => ""

  #"feconn" => ""

  #"beconn" => ""

  #"srvconn" => ""

  #"retries" => ""

  #"srv_queue" => ""

  #"backend_queue" => ""

  "http_verb" => ""

  #"http_proto" => ""

  #"http_host" => ""

  "http_request" => ""

  "http_version" => ""

    }

  }

  grok {

    match => {

      "message" => ["<%{INT}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{GREEDYDATA:process}\\\\[%{INT:pid}\\\\]: %{IP:client_ip}:%{INT:client_port} \\\\[%{GREEDYDATA:timestamp}\\\\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}\\/%{NOTSPACE:server_name} %{INT:time_request}\\/%{INT:time_queue}\\/%{INT:time_backend_connect}\\/%{INT:time_backend_response}\\/%{NOTSPACE:time_duration} %{INT:http_status_code}%{GREEDYDATA} \\"%{WORD:http_verb} %{GREEDYDATA:http_request} HTTP/%{NUMBER:http_version}\\""

      ]

    }

    overwrite => ["syslog_timestamp","process", "pid", "client_ip", "client_port","timestamp"

    ,"frontend_name","backend_name","server_name","time_request","time_queue","time_backend_connect",

    "time_backend_response","time_duration","http_status_code","http_verb","http_request","http_version"]

    on_error => "match_error"

 



  }

  if [match_error] {

    drop{

      tag => "TAG_NO_SECURITY_VALUE"

    }

  }




  if [timestamp] != "" {

    date {

      match => ["timestamp", "yyyy MMM dd HH:mm:ss.SSS"]

      on_error => "time_stamp_wrong_format"

    }

  }





    mutate {

        replace => {

            "event.idm.read_only_udm.principal.port" => "%{client_port}"

            "event.idm.read_only_udm.network.http.response_code" => "%{http_status_code}"



        }

    }



    mutate {

        convert => {

            "event.idm.read_only_udm.principal.port" => "integer"

            "event.idm.read_only_udm.network.http.response_code" => "integer"

        }

    }



    mutate {

        merge => {

            "event.idm.read_only_udm.principal.ip" => "client_ip"



        }

    }





  mutate {

    replace => {

    #"event.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT"

    "event.idm.read_only_udm.metadata.event_type" => "NETWORK_HTTP"

    #"event.idm.read_only_udm.principal.port" => "%{client_port}"

    "event.idm.read_only_udm.target.application" => "%{process}"

    #"event.idm.read_only_udm.network.http.response_code" => "%{http_status_code}"

    "event.idm.read_only_udm.network.http.method" => "%{http_verb}"

    "event.idm.read_only_udm.target.url" => "%{http_request}"

    "event.idm.read_only_udm.target.hostname" => "%{frontend_name}"

    "event.idm.read_only_udm.target.process.pid" => "%{pid}"

    "event.idm.read_only_udm.network.application_protocol" => "HTTP"

    "event.idm.read_only_udm.metadata.product_version" => "HTTP/%{http_version}"

    "event.idm.read_only_udm.metadata.product_name" => "HAProxy"

    "event.idm.read_only_udm.metadata.vendor_name" => "HAProxy Enterprise"

    }

  }




   mutate {

    merge => {

      "@output" => "event"

    }

  }



  statedump {label => "3"}




}



References;


From <https://cloud.google.com/chronicle/docs/unified-data-model/udm-usage#network-metadata>






filter {


#Parse the json into composite json fields
json {
source => "message"
array_function => "split_columns"
on_error => "not_json"
}

#For debugging
#statedump {label => "2"}


 


#Iterate through the json objects
for id,behaviorProcessed in behaviors {


#Convert the id to string to be used in the tokens IDs
mutate {
convert => {
"id" => "string"
}
on_error => "conversion_id_error"
}

#initialize the token variable ruleLabel in every loop iteration
mutate {
replace => {
"rule_labels" => ""
}
on_error => "init_ruleLabel_error"
}

#In case the initialization does not go wrong, assign values to the token composite variables based on the json object
if ![init_ruleLabel_error] {

mutate {
replace => {
"rule_labels.value" => "%{behaviorProcessed.scenario}"
}
on_error => "eventData_no_scenarioValue"
}



if ![eventData_no_scenarioValue] and [behaviorProcessed] != "" {
mutate {
replace => {
"rule_labels.key" => "behaviorScenario_%{id}"
}
on_error => "eventData_no_scenarioLabel"
}
}


 



}


#Fill in the repeated rule_labels field
if ![eventData_no_scenarioLabel] {
mutate{
merge => {
"security_result.rule_labels" => "rule_labels"
}
on_error => "merge_error"
}


}
}


#Fill in the repeated security_result field
mutate {
merge => {
"event.idm.read_only_udm.security_result" => "security_result"
}
}


#Write to the events
mutate {
merge => {
"@output" => "event"
}
}



#For debugging at the end of the parser
statedump {label => "3"}
}


Reply