Skip to content

normalize HTTPD_COMBINEDLOG matching #280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
4 changes: 2 additions & 2 deletions patterns/ecs-v1/httpd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ HTTPDUSER %{EMAILADDRESS}|%{USER}
HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}

# Log formats
HTTPD_COMMONLOG %{IPORHOST:clientip} %{HTTPDUSER:ident} %{HTTPDUSER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-)
HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} %{QS:referrer} %{QS:agent}
HTTPD_COMMONLOG %{IPORHOST:clientip} (?:-|%{HTTPDUSER:ident}) (?:-|%{HTTPDUSER:auth}) \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" (?:-|%{NUMBER:response}) (?:-|%{NUMBER:bytes})
HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} "(?:-|%{DATA:referrer})" "(?:-|%{DATA:agent})"

# Error logs
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:message}
Expand Down
4 changes: 2 additions & 2 deletions patterns/legacy/httpd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ HTTPDUSER %{EMAILADDRESS}|%{USER}
HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}

# Log formats
HTTPD_COMMONLOG %{IPORHOST:clientip} %{HTTPDUSER:ident} %{HTTPDUSER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-)
HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} %{QS:referrer} %{QS:agent}
HTTPD_COMMONLOG %{IPORHOST:clientip} (?:-|%{HTTPDUSER:ident}) (?:-|%{HTTPDUSER:auth}) \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" (?:-|%{NUMBER:response}) (?:-|%{NUMBER:bytes})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure making the legacy patterns capture IFF value is not - is an appropriate change, and feels "breaking" to me (since users may define pipelines that depend on the value being - to represent empty).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, thought it would ease transition to have a two step migration process (esp. since there are other legacy patterns that are pretty much useless and a breaking change could make them more "useful").
but if we rather not break anything in the legacy parts (even if we do a major release) I am fine with that.

Copy link
Contributor Author

@kares kares Aug 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverted, have only kept missing (-) http response matching in legacy mode: 0887051
considering this a fix (previously failed to parse) + it's best to keep the legacy vs ecs patterns matching ~ the same. a spec is included in the changeset already.

HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} "(?:-|%{DATA:referrer})" "(?:-|%{DATA:agent})"

# Error logs
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:message}
Expand Down
83 changes: 62 additions & 21 deletions spec/patterns/httpd_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,52 @@

describe "HTTPD_COMBINEDLOG" do

context "HTTPD_COMBINEDLOG", "Typical test case" do
let(:pattern) { 'HTTPD_COMBINEDLOG' }
let(:grok) { grok_match(pattern, message) }

let(:value) { '83.149.9.216 - - [24/Feb/2015:23:13:42 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"'}
context "typical test case" do

it "generates the clientip field" do
expect(grok_match(subject, value)).to include(
let(:message) { '83.149.9.216 - - [24/Feb/2015:23:13:42 +0000] "GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1" 200 203023 "http://semicomplete.com/presentations/logstash-monitorama-2013/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"'}

it "matches" do
expect(grok).to include(
'clientip' => '83.149.9.216',
'verb' => 'GET',
'request' => '/presentations/logstash-monitorama-2013/images/kibana-search.png',
'httpversion' => '1.1',
'response' => '200',
'bytes' => '203023',
'referrer' => '"http://semicomplete.com/presentations/logstash-monitorama-2013/"',
'agent' => '"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36"'
'referrer' => 'http://semicomplete.com/presentations/logstash-monitorama-2013/',
'agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36'
)
end

it "does not capture 'null' fields" do
expect(grok.keys).to_not include('ident', 'auth')
end

end

context "email address in auth field" do

let(:message) { '10.0.0.1 - username@example.com [07/Apr/2016:18:42:24 +0000] "GET /bar/foo/users/1/username%40example.com/authenticate?token=blargh&client_id=15 HTTP/1.1" 400 75 "" "Mozilla/5.0 (iPad; CPU OS 9_3_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13E238 Safari/601.1"'}

it "gets captured" do
expect(grok).to include("auth" => "username@example.com")
end

end

context "HTTPD_COMBINEDLOG", "Email address in auth field" do
context 'sample OPTIONS line' do

let(:message) { '83.149.9.216 - a.user [11/Jan/2020:23:05:27 +0100] "OPTIONS /remote.php/ HTTP/1.1" - 7908 "-" "monitoring-client (v2.2)"' }

let(:value) { '10.0.0.1 - username@example.com [07/Apr/2016:18:42:24 +0000] "GET /bar/foo/users/1/username%40example.com/authenticate?token=blargh&client_id=15 HTTP/1.1" 400 75 "" "Mozilla/5.0 (iPad; CPU OS 9_3_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13E238 Safari/601.1"'}
it 'matches' do
expect(grok).to include("verb" => "OPTIONS", 'request' => '/remote.php/', 'httpversion' => '1.1', "bytes" => '7908')
end

it "generates the clientip field" do
expect(grok_match(subject, value)).to include("auth" => "username@example.com")
it 'does not capture optional response code' do
expect(grok.keys).to_not include("response")
end

end
Expand All @@ -37,13 +58,16 @@

describe "HTTPD_ERRORLOG" do

context "HTTPD_ERRORLOG", "matches a full httpd 2.4 message" do
let(:value) {
let(:pattern) { 'HTTPD_ERRORLOG' }
let(:grok) { grok_match(pattern, message) }

context "matches a full httpd 2.4 message" do
let(:message) do
"[Mon Aug 31 09:30:48.958285 2015] [proxy_fcgi:error] [pid 28787:tid 140169587934976] (70008)Partial results are valid but processing is incomplete: [client 58.13.45.166:59307] AH01075: Error dispatching request to : (reading input brigade), referer: http://example.com/index.php?id_product=11&controller=product"
}
end
it "generates the fields" do

expect(grok_match(subject, value)).to include(
expect(grok).to include(
'timestamp' => 'Mon Aug 31 09:30:48.958285 2015',
'module' => 'proxy_fcgi',
'loglevel' => 'error',
Expand All @@ -54,26 +78,26 @@
'clientip' => '58.13.45.166',
'clientport' => '59307',
'errorcode' => 'AH01075',
'message' => [ value, 'Error dispatching request to : (reading input brigade), referer: http://example.com/index.php?id_product=11&controller=product' ],
'message' => [ message, 'Error dispatching request to : (reading input brigade), referer: http://example.com/index.php?id_product=11&controller=product' ],
)
end
end

context "HTTPD_ERRORLOG", "matches a httpd 2.2 log message" do
let(:value) {
let(:message) do
"[Mon Aug 31 16:27:04 2015] [error] [client 10.17.42.3] Premature end of script headers: example.com"
}
end
it "generates the fields" do
expect(grok_match(subject, value)).to include(
expect(grok).to include(
'timestamp' => 'Mon Aug 31 16:27:04 2015',
'loglevel' => 'error',
'clientip' => '10.17.42.3',
'message' => [ value, 'Premature end of script headers: example.com' ]
'message' => [ message, 'Premature end of script headers: example.com' ]
)
end
end

context "HTTPD_ERRORLOG", "matches a short httpd 2.4 message" do
context "HTTPD_ERRORLOG", "a short httpd 2.4 message" do
let(:value1) {
"[Mon Aug 31 07:15:38.664897 2015] [proxy_fcgi:error] [pid 28786:tid 140169629898496] [client 81.139.1.34:52042] AH01071: Got error 'Primary script unknown\n'"
}
Expand Down Expand Up @@ -107,7 +131,7 @@
end
end

context "HTTPD_ERRORLOG", "matches an httpd 2.4 restart" do
context "HTTPD_ERRORLOG", "a httpd 2.4 restart message" do
let(:value1) {
"[Mon Aug 31 06:29:47.406518 2015] [mpm_event:notice] [pid 24968:tid 140169861986176] AH00489: Apache/2.4.16 (Ubuntu) configured -- resuming normal operations"
}
Expand Down Expand Up @@ -139,5 +163,22 @@
end
end

context 'a debug message' do
let(:message) do
'[Fri Feb 01 22:03:08.319124 2019] [authz_core:debug] [pid 9:tid 140597881775872] mod_authz_core.c(820): [client 172.17.0.1:50752] AH01626: authorization result of <RequireAny>: granted'
end

it 'matches imperfectly (legacy)' do
expect(grok).to include({
"timestamp"=>"Fri Feb 01 22:03:08.319124 2019",
"module"=>"authz_core",
"loglevel"=>"debug",
"pid"=>"9",
"tid"=>"140597881775872",
"errorcode"=>"mod_authz_core.c(820)",
"message"=>[message, "[client 172.17.0.1:50752] AH01626: authorization result of <RequireAny>: granted"]
})
end
end

end