Skip to main content

 

# Product:  Web Access
# Category: Custom - HTTP Access Parser
# Last Updated: 08-Sep-2025
# Author: Ayush Gupta
# Version: 0.1
#
# Fields Mapped to UDM (ACCESS LOGS ONLY)
# metadata.event_timestamp ← timestamp (from [01/Sep/2025:07:20:09 +0000])
# metadata.event_type ← GENERIC_EVENT
# network.application_protocol← HTTP
# principal.ip ← client_ip
# principal.platform ← user_agent (client device info)
# intermediary.hostname ← observer_id (web server instance)
# target.hostname ← vhost
# target.url ← http://{vhost}{request_path}
# network.http.method ← http_method
# network.http.response_code ← status
# network.http.user_agent ← user_agent
# network.http.referral_url ← referrer (when present)

filter {

# Initialize working variables
mutate {
replace => {
"observer_id" => ""
"client_ip" => ""
"vhost" => ""
"ident" => ""
"ts" => ""
"http_method" => ""
"request_path" => ""
"http_version" => ""
"status" => ""
"bytes" => ""
"referrer" => ""
"user_agent" => ""
"req_url" => ""
}
}

grok {
match => {
"message" => [
'(%{DATA:observer_id} %{IP:client_ip} %{HOSTNAME:vhost} %{DATA:ident} \[%{HTTPDATE:ts}\] "%{WORD:http_method} %{DATA:request_path} HTTP/%{NUMBER:http_version}" %{INT:status} %{INT:bytes} "%{DATA:referrer}" "%{DATA:user_agent}")'
]
}
overwrite => ["observer_id","client_ip","vhost","ident","ts","http_method","request_path","http_version","status","bytes","referrer","user_agent"]
on_error => "no_match"
}

# Event type & protocol
mutate {
replace => {
"event.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT"
"event.idm.read_only_udm.metadata.vendor_name" => "WP Engine"
"event.idm.read_only_udm.metadata.product_name" => "NGINX SERVER"
}
}

if [observer_id] != "" {
mutate {
replace => {
"event.idm.read_only_udm.principal.asset_id" => "%{observer_id}"
}
}
}

# Hand off to UDM
mutate { merge => { "@output" => "event" } }
}

---
Raw Log 
 

web-97071-i-0928c2ewewwf08f2092 98.23.66.172 gptssd.co.uk - [05/Sep/2025:02:45:35 +0000] "GET /blog/changing-face-of-luxury-retirement-living/ HTTP/1.0" 200 161665 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36"---In UDM I can only see the static fields 
  • metadata.event_type: "GENERIC_EVENT"
  • metadata.vendor_name: "WP Engine"
  • metadata.product_name: "NGINX SERVER"
  • metadata.log_type: "NGINX"
I tried to get the observer_id first but no success I think the issue is with the grok pattern I am using. Can any one help !!

But this pattern is not working in the Chronicle Parser 

 


 

 


Looks like your grok syntax doesn’t match what the parser is expecting. Did a quick update and now I’m seeing values match so that should get you on your way :)

 

 

-mike


Hi Mike,
Last night I also got stuck at parsing Date

# Product: OakNorth Web Access

# Category: Custom - HTTP Access Parser

# Last Updated: 08-Sep-2025

# Author: Ayush Gupta

# Version: 0.1

#

# Fields Mapped to UDM (ACCESS LOGS ONLY)

# metadata.event_timestamp ← timestamp (from [01/Sep/2025:07:20:09 +0000])

# metadata.event_type ← GENERIC_EVENT

# network.application_protocol← HTTP

# principal.ip ← client_ip

# principal.platform ← user_agent (client device info)

# intermediary.hostname ← observer_id (web server instance)

# target.hostname ← vhost

# target.url ← http://{vhost}{request_path}

# network.http.method ← http_method

# network.http.response_code ← status

# network.http.user_agent ← user_agent

# network.http.referral_url ← referrer (when present)

filter {

# # Initialize working variables

mutate {

replace => {

"observer_id" => ""

"client_ip" => ""

"vhost" => ""

"ident" => ""

"ts" => ""

"http_method" => ""

"request_path" => ""

"http_version" => ""

"status" => ""

"bytes" => ""

"referrer" => ""

"user_agent" => ""

"req_url" => ""

}

}

# Parse ACCESS LOG:

# <instance> <client_ip> <vhost> <ident> [<HTTPDATE>] "<METHOD> <PATH> HTTP/<ver>" <status> <bytes> "<referrer>" "<UA>"

# First grok (which we know works)

grok {

match => {

"message" => '%{DATA:observer_id} %{IP:client_ip} %{HOSTNAME:vhost} %{DATA:ident} %{GREEDYDATA:rest_of_line}'

}

on_error => "no_match"

overwrite => ["observer_id","client_ip","vhost","ident","rest_of_line"]

}


# Second GROK: parse timestamp, method, path, version, status, bytes, referrer, UA

# grok {

# match => {

# "rest_of_line" => '\[%{DATA:ts}\] \\"%{DATA:request_line}\\" %{INT:status} %{INT:bytes} \\"%{DATA:referrer}\\" \\"%{DATA:user_agent}\\"'

# }

# tag_on_failure => ["_rest_parse_failure"]

# overwrite => ["ts", "request_line", "status", "bytes", "referrer", "user_agent"]

# }

# # Then parse request_line separately

# grok {

# match => {

# "request_line" => '%{WORD:http_method} %{NOTSPACE:request_path} HTTP/%{NUMBER:http_version}'

# }

# tag_on_failure => ["_request_parse_failure"]

# overwrite => ["http_method", "request_path", "http_version"]

# }


# Set event timestamp (Chronicle copies 'timestamp' → metadata.event_timestamp)

mutate { replace => { "timestamp" => "%{ts}" } }

# Normalize referrer dash to empty

if [referrer] == "-" { mutate { replace => { "referrer" => "" } } }

# Build a simple absolute URL (assume HTTP in the log line's request line)

mutate { replace => { "req_url" => "http://%{vhost}%{request_path}" } }

# Convert numeric fields (only if not empty)

if [status] != "" {

mutate { convert => { "status" => "integer" } }

}

if [bytes] != "" {

mutate { convert => { "bytes" => "integer" } }

}

# --- Map ACCESS LOG to UDM ---

# Event type & protocol

mutate {

replace => {

"event.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT"

"event.idm.read_only_udm.network.application_protocol" => "HTTP"

}

}

# Actor and target

if [client_ip] != "" {

mutate { merge => { "event.idm.read_only_udm.principal.ip" => "client_ip" } }

}

if [user_agent] != "" {

mutate { replace => { "event.idm.read_only_udm.principal.platform" => "%{user_agent}" } }

}

if [observer_id] != "" {

mutate { replace => { "event.idm.read_only_udm.intermediary.hostname" => "%{observer_id}" } }

}

if [vhost] != "" {

mutate { replace => { "event.idm.read_only_udm.target.hostname" => "%{vhost}" } }

}

if [req_url] != "" {

mutate { replace => { "event.idm.read_only_udm.target.url" => "%{req_url}" } }

}

# HTTP details

if [http_method] != "" {

mutate { replace => { "event.idm.read_only_udm.network.http.method" => "%{http_method}" } }

}

if [user_agent] != "" {

mutate { replace => { "event.idm.read_only_udm.network.http.user_agent" => "%{user_agent}" } }

}

if [status] != "" {

mutate { replace => { "event.idm.read_only_udm.network.http.response_code" => "%{status}" } }

}

if [referrer] != "" {

mutate { replace => { "event.idm.read_only_udm.network.http.referral_url" => "%{referrer}" } }

}

statedump {}

# Hand off to UDM

mutate { merge => { "@output" => "event" } }

}

filter {

# Initialize working variables
mutate {
replace => {
"observer_id" => ""
"client_ip" => ""
"vhost" => ""
"ident" => ""
"ts" => ""
"http_method" => ""
"request_path" => ""
"http_version" => ""
"status" => ""
"bytes" => ""
"referrer" => ""
"user_agent" => ""
"req_url" => ""
}
}

# Parse ACCESS LOG:
# <instance> <client_ip> <vhost> <ident> [<HTTPDATE>] "<METHOD> <PATH> HTTP/<ver>" <status> <bytes> "<referrer>" "<UA>"
# First grok (which we know works)
grok {
match => {
"message" => '%{DATA:observer_id} %{IP:client_ip} %{HOSTNAME:vhost} %{DATA:ident} %{GREEDYDATA:rest_of_line}'
}
on_error => "no_match"
overwrite => ["observer_id","client_ip","vhost","ident","rest_of_line"]
}


# Second GROK: parse timestamp, method, path, version, status, bytes, referrer, UA
# grok {
# match => {
# "rest_of_line" => '\[%{DATA:ts}\] \\"%{DATA:request_line}\\" %{INT:status} %{INT:bytes} \\"%{DATA:referrer}\\" \\"%{DATA:user_agent}\\"'
# }
# tag_on_failure => ["_rest_parse_failure"]
# overwrite => ["ts", "request_line", "status", "bytes", "referrer", "user_agent"]
# }

# # Then parse request_line separately
# grok {
# match => {
# "request_line" => '%{WORD:http_method} %{NOTSPACE:request_path} HTTP/%{NUMBER:http_version}'
# }
# tag_on_failure => ["_request_parse_failure"]
# overwrite => ["http_method", "request_path", "http_version"]
# }


# Set event timestamp (Chronicle copies 'timestamp' → metadata.event_timestamp)
mutate { replace => { "timestamp" => "%{ts}" } }

# Normalize referrer dash to empty
if [referrer] == "-" { mutate { replace => { "referrer" => "" } } }

# Build a simple absolute URL (assume HTTP in the log line's request line)
mutate { replace => { "req_url" => "http://%{vhost}%{request_path}" } }

# Convert numeric fields (only if not empty)
if [status] != "" {
mutate { convert => { "status" => "integer" } }
}
if [bytes] != "" {
mutate { convert => { "bytes" => "integer" } }
}

# --- Map ACCESS LOG to UDM ---

# Event type & protocol
mutate {
replace => {
"event.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT"
"event.idm.read_only_udm.network.application_protocol" => "HTTP"
}
}

# Actor and target
if [client_ip] != "" {
mutate { merge => { "event.idm.read_only_udm.principal.ip" => "client_ip" } }
}
if [user_agent] != "" {
mutate { replace => { "event.idm.read_only_udm.principal.platform" => "%{user_agent}" } }
}
if [observer_id] != "" {
mutate { replace => { "event.idm.read_only_udm.intermediary.hostname" => "%{observer_id}" } }
}
if [vhost] != "" {
mutate { replace => { "event.idm.read_only_udm.target.hostname" => "%{vhost}" } }
}
if [req_url] != "" {
mutate { replace => { "event.idm.read_only_udm.target.url" => "%{req_url}" } }
}

# HTTP details
if [http_method] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.method" => "%{http_method}" } }
}
if [user_agent] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.user_agent" => "%{user_agent}" } }
}
if [status] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.response_code" => "%{status}" } }
}
if [referrer] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.referral_url" => "%{referrer}" } }
}

statedump {}

# Hand off to UDM
mutate { merge => { "@output" => "event" } }
}

Output:

Internal State (label=):{ "@createTimestamp": { "nanos": 0, "seconds": [removed by moderator] }, "@enableCbnForLoop": true, "@onErrorCount": 0, "@output": [], "@timezone": "", "bytes": "", "client_ip": "34.39.252.22", "event": { "idm": { "read_only_udm": { "intermediary": { "hostname": "web-97071-i-0928c2f8bf23f2092" }, "metadata": { "event_type": "GENERIC_EVENT" }, "network": { "application_protocol": "HTTP" }, "principal": { "ip": [ "34.39.252.22" ] }, "target": { "hostname": "osdsrth.co.uk", "url": "http://osdsrth.co.uk" } } } }, "http_method": "", "http_version": "", "ident": "-", "message": "web-97071-i-0928c2f8bf23f2092 34.39.252.22 osdsrth.co.uk - [05/Sep/2025:02:45:26 +0000] \"GET /deals/palamon-capital-partners/ HTTP/1.0\" 200 149717 \"-\" \"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36\"", "no_match": false, "observer_id": "web-97071-i-0928c2f8bf23f2092", "referrer": "", "req_url": "http://osdsrth.co.uk", "request_path": "", "rest_of_line": "[05/Sep/2025:02:45:26 +0000] \"GET /deals/palamon-capital-partners/ HTTP/1.0\" 200 149717 \"-\" \"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36\"", "status": "", "timestamp": "", "ts": "", "user_agent": "", "vhost": "osdsrth.co.uk"}


"raw_timestamp": "[05/Sep/2025:02:45:26 +0000] ",Just stuck with one 

Here’s how I’d grok out the date and then use the date function to parsed it to UDM.

 

 grok {
match => {
"message" => "%{DATA:observer_id} %{IP:client_ip} %{HOSTNAME:vhost} %{DATA:ident} \\[%{DATA:datetime} %{DATA:timezone}\\] %{GREEDYDATA:rest_of_line}"
}
on_error => "no_match"
overwrite => ["observer_id","client_ip","vhost","ident","rest_of_line"]
}

date {
match => ["datetime", "dd/MMM/yyyy:HH:mm:ss"]
on_error => "date_no_match"
}

 

-mike


Reply