144 lines
6.0 KiB
Markdown
144 lines
6.0 KiB
Markdown
<h1><center>LogStash 数据过滤</center></h1>
|
||
|
||
作者:行癫(盗版必究)
|
||
|
||
------
|
||
|
||
## 一:grok插件
|
||
|
||
#### 1.简介
|
||
|
||
grok插件有非常强大的功能,他能匹配一切数据,但是他的性能和对资源的损耗同样让人诟病
|
||
|
||
filter的grok是目前logstash中解析非结构化日志数据最好的方式
|
||
|
||
grok位于正则表达式之上,所以任何正则表达式在grok中都是有效的
|
||
|
||
#### 2.语法格式
|
||
|
||
```shell
|
||
%{语法:语义}
|
||
```
|
||
|
||
注意:
|
||
|
||
语法指的是匹配的模式
|
||
|
||
例如使用NUMBER模式可以匹配出数字,IP模式则会匹配出127.0.0.1这样的IP地址
|
||
|
||
#### 3.案例
|
||
|
||
实验数据:Nginx的访问日志
|
||
|
||
Logstash输入输出配置文件:
|
||
|
||
```shell
|
||
input {
|
||
stdin {
|
||
}
|
||
}
|
||
filter{
|
||
grok{
|
||
match => {"message" => "%{IP:client}"}
|
||
}
|
||
}
|
||
output {
|
||
stdout {
|
||
}
|
||
}
|
||
```
|
||
|
||
注意:
|
||
|
||
```shell
|
||
USERNAME [a-zA-Z0-9._-]+
|
||
USER %{USERNAME}
|
||
EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
|
||
EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
|
||
INT (?:[+-]?(?:[0-9]+))
|
||
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
|
||
NUMBER (?:%{BASE10NUM})
|
||
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
|
||
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
|
||
|
||
POSINT \b(?:[1-9][0-9]*)\b
|
||
NONNEGINT \b(?:[0-9]+)\b
|
||
WORD \b\w+\b
|
||
NOTSPACE \S+
|
||
SPACE \s*
|
||
DATA .*?
|
||
GREEDYDATA .*
|
||
QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
|
||
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
|
||
# URN, allowing use of RFC 2141 section 2.3 reserved characters
|
||
URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
|
||
# Networking
|
||
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
|
||
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
|
||
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
|
||
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
|
||
IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
|
||
IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
|
||
IP (?:%{IPV6}|%{IPV4})
|
||
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
|
||
IPORHOST (?:%{IP}|%{HOSTNAME})
|
||
HOSTPORT %{IPORHOST}:%{POSINT}
|
||
# paths
|
||
PATH (?:%{UNIXPATH}|%{WINPATH})
|
||
UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
|
||
TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
|
||
WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
|
||
URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
|
||
URIHOST %{IPORHOST}(?::%{POSINT:port})?
|
||
# uripath comes loosely from RFC1738, but mostly from what Firefox
|
||
# doesn't turn into %XX
|
||
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
|
||
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
|
||
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
|
||
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
|
||
URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
|
||
# Months: January, Feb, 3, 03, 12, December
|
||
MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
|
||
MONTHNUM (?:0?[1-9]|1[0-2])
|
||
MONTHNUM2 (?:0[1-9]|1[0-2])
|
||
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
|
||
# Days: Monday, Tue, Thu, etc...
|
||
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
|
||
# Years?
|
||
YEAR (?>\d\d){1,2}
|
||
HOUR (?:2[0123]|[01]?[0-9])
|
||
MINUTE (?:[0-5][0-9])
|
||
# '60' is a leap second in most time standards and thus is valid.
|
||
SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
|
||
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
||
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
|
||
DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
|
||
DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
|
||
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
|
||
ISO8601_SECOND (?:%{SECOND}|60)
|
||
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
|
||
DATE %{DATE_US}|%{DATE_EU}
|
||
DATESTAMP %{DATE}[- ]%{TIME}
|
||
TZ (?:[APMCE][SD]T|UTC)
|
||
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
|
||
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
|
||
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
|
||
DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
|
||
# Syslog Dates: Month Day HH:MM:SS
|
||
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
|
||
PROG [\x21-\x5a\x5c\x5e-\x7e]+
|
||
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
|
||
SYSLOGHOST %{IPORHOST}
|
||
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
|
||
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
|
||
# Shortcuts
|
||
QS %{QUOTEDSTRING}
|
||
# Log formats
|
||
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
|
||
```
|
||
|
||
|
||
|
||
|
||
|