Hi Mike,
Last night I also got stuck at parsing Date
# Product: OakNorth Web Access
# Category: Custom - HTTP Access Parser
# Last Updated: 08-Sep-2025
# Author: Ayush Gupta
# Version: 0.1
#
# Fields Mapped to UDM (ACCESS LOGS ONLY)
# metadata.event_timestamp ← timestamp (from [01/Sep/2025:07:20:09 +0000])
# metadata.event_type ← GENERIC_EVENT
# network.application_protocol← HTTP
# principal.ip ← client_ip
# principal.platform ← user_agent (client device info)
# intermediary.hostname ← observer_id (web server instance)
# target.hostname ← vhost
# target.url ← http://{vhost}{request_path}
# network.http.method ← http_method
# network.http.response_code ← status
# network.http.user_agent ← user_agent
# network.http.referral_url ← referrer (when present)
filter {
# # Initialize working variables
mutate {
replace => {
"observer_id" => ""
"client_ip" => ""
"vhost" => ""
"ident" => ""
"ts" => ""
"http_method" => ""
"request_path" => ""
"http_version" => ""
"status" => ""
"bytes" => ""
"referrer" => ""
"user_agent" => ""
"req_url" => ""
}
}
# Parse ACCESS LOG:
# <instance> <client_ip> <vhost> <ident> [<HTTPDATE>] "<METHOD> <PATH> HTTP/<ver>" <status> <bytes> "<referrer>" "<UA>"
# First grok (which we know works)
grok {
match => {
"message" => '%{DATA:observer_id} %{IP:client_ip} %{HOSTNAME:vhost} %{DATA:ident} %{GREEDYDATA:rest_of_line}'
}
on_error => "no_match"
overwrite => ["observer_id","client_ip","vhost","ident","rest_of_line"]
}
# Second GROK: parse timestamp, method, path, version, status, bytes, referrer, UA
# grok {
# match => {
# "rest_of_line" => '\[%{DATA:ts}\] \\"%{DATA:request_line}\\" %{INT:status} %{INT:bytes} \\"%{DATA:referrer}\\" \\"%{DATA:user_agent}\\"'
# }
# tag_on_failure => ["_rest_parse_failure"]
# overwrite => ["ts", "request_line", "status", "bytes", "referrer", "user_agent"]
# }
# # Then parse request_line separately
# grok {
# match => {
# "request_line" => '%{WORD:http_method} %{NOTSPACE:request_path} HTTP/%{NUMBER:http_version}'
# }
# tag_on_failure => ["_request_parse_failure"]
# overwrite => ["http_method", "request_path", "http_version"]
# }
# Set event timestamp (Chronicle copies 'timestamp' → metadata.event_timestamp)
mutate { replace => { "timestamp" => "%{ts}" } }
# Normalize referrer dash to empty
if [referrer] == "-" { mutate { replace => { "referrer" => "" } } }
# Build a simple absolute URL (assume HTTP in the log line's request line)
mutate { replace => { "req_url" => "http://%{vhost}%{request_path}" } }
# Convert numeric fields (only if not empty)
if [status] != "" {
mutate { convert => { "status" => "integer" } }
}
if [bytes] != "" {
mutate { convert => { "bytes" => "integer" } }
}
# --- Map ACCESS LOG to UDM ---
# Event type & protocol
mutate {
replace => {
"event.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT"
"event.idm.read_only_udm.network.application_protocol" => "HTTP"
}
}
# Actor and target
if [client_ip] != "" {
mutate { merge => { "event.idm.read_only_udm.principal.ip" => "client_ip" } }
}
if [user_agent] != "" {
mutate { replace => { "event.idm.read_only_udm.principal.platform" => "%{user_agent}" } }
}
if [observer_id] != "" {
mutate { replace => { "event.idm.read_only_udm.intermediary.hostname" => "%{observer_id}" } }
}
if [vhost] != "" {
mutate { replace => { "event.idm.read_only_udm.target.hostname" => "%{vhost}" } }
}
if [req_url] != "" {
mutate { replace => { "event.idm.read_only_udm.target.url" => "%{req_url}" } }
}
# HTTP details
if [http_method] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.method" => "%{http_method}" } }
}
if [user_agent] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.user_agent" => "%{user_agent}" } }
}
if [status] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.response_code" => "%{status}" } }
}
if [referrer] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.referral_url" => "%{referrer}" } }
}
statedump {}
# Hand off to UDM
mutate { merge => { "@output" => "event" } }
}
filter {
# Initialize working variables
mutate {
replace => {
"observer_id" => ""
"client_ip" => ""
"vhost" => ""
"ident" => ""
"ts" => ""
"http_method" => ""
"request_path" => ""
"http_version" => ""
"status" => ""
"bytes" => ""
"referrer" => ""
"user_agent" => ""
"req_url" => ""
}
}
# Parse ACCESS LOG:
# <instance> <client_ip> <vhost> <ident> [<HTTPDATE>] "<METHOD> <PATH> HTTP/<ver>" <status> <bytes> "<referrer>" "<UA>"
# First grok (which we know works)
grok {
match => {
"message" => '%{DATA:observer_id} %{IP:client_ip} %{HOSTNAME:vhost} %{DATA:ident} %{GREEDYDATA:rest_of_line}'
}
on_error => "no_match"
overwrite => ["observer_id","client_ip","vhost","ident","rest_of_line"]
}
# Second GROK: parse timestamp, method, path, version, status, bytes, referrer, UA
# grok {
# match => {
# "rest_of_line" => '\[%{DATA:ts}\] \\"%{DATA:request_line}\\" %{INT:status} %{INT:bytes} \\"%{DATA:referrer}\\" \\"%{DATA:user_agent}\\"'
# }
# tag_on_failure => ["_rest_parse_failure"]
# overwrite => ["ts", "request_line", "status", "bytes", "referrer", "user_agent"]
# }
# # Then parse request_line separately
# grok {
# match => {
# "request_line" => '%{WORD:http_method} %{NOTSPACE:request_path} HTTP/%{NUMBER:http_version}'
# }
# tag_on_failure => ["_request_parse_failure"]
# overwrite => ["http_method", "request_path", "http_version"]
# }
# Set event timestamp (Chronicle copies 'timestamp' → metadata.event_timestamp)
mutate { replace => { "timestamp" => "%{ts}" } }
# Normalize referrer dash to empty
if [referrer] == "-" { mutate { replace => { "referrer" => "" } } }
# Build a simple absolute URL (assume HTTP in the log line's request line)
mutate { replace => { "req_url" => "http://%{vhost}%{request_path}" } }
# Convert numeric fields (only if not empty)
if [status] != "" {
mutate { convert => { "status" => "integer" } }
}
if [bytes] != "" {
mutate { convert => { "bytes" => "integer" } }
}
# --- Map ACCESS LOG to UDM ---
# Event type & protocol
mutate {
replace => {
"event.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT"
"event.idm.read_only_udm.network.application_protocol" => "HTTP"
}
}
# Actor and target
if [client_ip] != "" {
mutate { merge => { "event.idm.read_only_udm.principal.ip" => "client_ip" } }
}
if [user_agent] != "" {
mutate { replace => { "event.idm.read_only_udm.principal.platform" => "%{user_agent}" } }
}
if [observer_id] != "" {
mutate { replace => { "event.idm.read_only_udm.intermediary.hostname" => "%{observer_id}" } }
}
if [vhost] != "" {
mutate { replace => { "event.idm.read_only_udm.target.hostname" => "%{vhost}" } }
}
if [req_url] != "" {
mutate { replace => { "event.idm.read_only_udm.target.url" => "%{req_url}" } }
}
# HTTP details
if [http_method] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.method" => "%{http_method}" } }
}
if [user_agent] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.user_agent" => "%{user_agent}" } }
}
if [status] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.response_code" => "%{status}" } }
}
if [referrer] != "" {
mutate { replace => { "event.idm.read_only_udm.network.http.referral_url" => "%{referrer}" } }
}
statedump {}
# Hand off to UDM
mutate { merge => { "@output" => "event" } }
}
Output:
Internal State (label=):{ "@createTimestamp": { "nanos": 0, "seconds": [removed by moderator] }, "@enableCbnForLoop": true, "@onErrorCount": 0, "@output": [], "@timezone": "", "bytes": "", "client_ip": "34.39.252.22", "event": { "idm": { "read_only_udm": { "intermediary": { "hostname": "web-97071-i-0928c2f8bf23f2092" }, "metadata": { "event_type": "GENERIC_EVENT" }, "network": { "application_protocol": "HTTP" }, "principal": { "ip": [ "34.39.252.22" ] }, "target": { "hostname": "osdsrth.co.uk", "url": "http://osdsrth.co.uk" } } } }, "http_method": "", "http_version": "", "ident": "-", "message": "web-97071-i-0928c2f8bf23f2092 34.39.252.22 osdsrth.co.uk - [05/Sep/2025:02:45:26 +0000] \"GET /deals/palamon-capital-partners/ HTTP/1.0\" 200 149717 \"-\" \"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36\"", "no_match": false, "observer_id": "web-97071-i-0928c2f8bf23f2092", "referrer": "", "req_url": "http://osdsrth.co.uk", "request_path": "", "rest_of_line": "[05/Sep/2025:02:45:26 +0000] \"GET /deals/palamon-capital-partners/ HTTP/1.0\" 200 149717 \"-\" \"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36\"", "status": "", "timestamp": "", "ts": "", "user_agent": "", "vhost": "osdsrth.co.uk"}