分享
用go实现web日志分析及网页挂马关键字检测
学思 · · 4972 次点击 · · 开始浏览这是一个创建于 的文章,其中的信息可能已经有所发展或是发生改变。
本程序主要实现网页挂马关键字检测,原理是这样的,通过定时分析用户访问的IIS web日志,对日志的里的URL进行访问,然后检索是否包含相关的关键字,有就入库,这只是go实现自动检索及入库,可以加个前端,实现加关键及报警功能
packagemain
import(
"bufio"
"code.google.com/p/mahonia"
"fmt"
"io"
"io/ioutil"
"labix.org/v2/mgo"
"labix.org/v2/mgo/bson"
"log"
"net"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"time"
//qqwry为IP库
//ftp"github.com/jlaffaye/ftp"
//"github.com/yinheli/qqwry"
)
constVersion="CDN1.0"
//varServerList[]ServerMember
//PageKeyWord用来保存可疑Url及这个页面包含有哪些关键字,以及日志记录于哪台服务器及日志文件,并最终插入数据库
typePageHasKeyWordstruct{
Urlstring
KeyWords[]string
//UserIpstring
LogFilestring
ServerIpstring
Auditbool
Auditorstring//记录谁审核
AuditTimestring//记录审核时间
Levelint//可疑级别
}
typeDoubtfulKeyWordstruct{
Idbson.ObjectId"_id"
KeyWordstring
//Levelstring
}
typeUrlsToAnalysitstruct{
Urls[]string
LogFilestring
ServerIpstring
Domainstring
}
//定义MonthTOstr映射,用来将月份换为数字string,go里的月份用英文单词表示,要注意的go赋值要在函数体里赋值
varMonthTOstr=make(map[string]string)
funcmain(){
//fmt.Printf("%s",getLogFileName())
iflen(os.Args)!=4{
log.Fatal("Usage:"+filepath.Base(os.Args[0])+"log_dir"+"db_server_ip_addresstest")
os.Exit(1)
}
logFileName,_,_:=getLogFileName()
fmt.Println(time.Now())
logDbServer:=os.Args[2]
dir:=os.Args[1]
dbname:="webmonitordb"
//doubtfulKW为string数组,从数据库中获取可疑检索的URL,注意的是go是区分大小写的,因此要注意mongodb里的库名及collection名大小写
//doubtfulKW:=[]string{}
keyWords:=getdoubtfulkeyword(logDbServer,dbname,"DangeroursWords")
//wordlist:=[]string{"百家乐","博彩网","网上赌场","娱乐城","赌博网站","皇冠投注","皇冠开户","真龙娱乐城","三亚娱乐城"}
/*forn,item:=rangekeyWords{
fmt.Println(n,item.KeyWord)
}*/
ifos.Args[3]=="test"{
fmt.Println("wait!")
time.Sleep(time.Duration(90)*time.Second)
}
fmt.Println("start!")
filepath.Walk(dir,func(pathstring,fos.FileInfo,errerror)error{
iff==nil{
returnerr
}
iff.IsDir(){
returnnil
}
iff.Name()==logFileName{
//fmt.Println(path)
//fmt.Println(time.Now())
urls:=logFileAnalysit(path)
//fmt.Println(urls)
for_,v:=rangeurls.Urls{
//fmt.Println(n,":",v)
url:="http://"+urls.Domain+v
//fmt.Println(n,url)
pagestr:=getPage(url)
findWord(pagestr,url,urls.LogFile,urls.ServerIp,keyWords)
//fmt.Println(n)
}
}
returnnil
})
}
funclogFileAnalysit(logFilestring)UrlsToAnalysit{
readLogFile,err:=os.Open(logFile)
iferr!=nil{
log.Fatal(err)
}
deferreadLogFile.Close()
//pathFields的作用是将日志path解析成一个数据,从而可以得到日志的域名
pathFields:=strings.Split(logFile,"\\")
vardomainNamestring
iflen(pathFields)>3{
domainName=pathFields[len(pathFields)-3]
}
varUrlsUrlsToAnalysit
Urls.Domain=domainName
Urls.LogFile=logFile
Urls.ServerIp=getLocalIpAddr()
Urls.Urls=append(Urls.Urls,"/")//监控站点首页
//analysitTime:=time.Now()
bfLogFile:=bufio.NewReader(readLogFile)
//定义一个gbk转utf8解码器
//enc:=mahonia.NewDecoder("gbk")
for{
logRecordStr,err:=bfLogFile.ReadString('\n')
iferr==io.EOF{
//注意,这里要使用break,而不是return,return会使整个个程序退出了,break只是中断当前的for循环
break
}
//以"#"开头的要跳过,iiS日志前几行是做了注释的
ifstrings.HasPrefix(logRecordStr,"#"){
continue
}
//recordItems是个string数组,用来临时保存每行记录里的字段
//#Fields:datetimes-sitenames-ipcs-methodcs-uri-stemcs-uri-querys-portcs-usernamec-ipcs(User-Agent)sc-statussc-substatussc-win32-statustime-taken
fields:=strings.Split(logRecordStr,"")
//scstatus为服务器状态码的index
iflen(fields)<13{//不满16个字段的跳过
continue
}
url:=strings.ToLower(fields[5])
//fields[9]!=Urls.ServerIp过滤掉自己访问的IP记录
if(strings.HasSuffix(url,"htm")||strings.HasSuffix(url,"html"))&&fields[12]=="200"&&fields[9]!=Urls.ServerIp{
flag:=true
for_,v:=rangeUrls.Urls{
ifv==url{
flag=false
break
}
}
ifflag{
Urls.Urls=append(Urls.Urls,url)
}
}
}
returnUrls
}
//getLogFileName()根据当前的时间,生成一个将要被分析日志文件名,因为不同IIS站点每小时时生成一个日志文件,命名格式不exyymmddhh.log
funcgetLogFileName()(string,string,string){
MonthTOstr:=map[string]string{"January":"01",
"February":"02",
"March":"03",
"April":"04",
"May":"05",
"June":"06",
"July":"07",
"August":"08",
"September":"09",
"October":"10",
"November":"11",
"December":"12"}
timenow:=time.Now()
year,month,day:=timenow.Date()
//monthStr:=month.String()
hour,_,_:=timenow.Clock()
yearStr:=strings.TrimLeft(strconv.Itoa(year),"20")//去掉前面的四位数年份如"2014"年的"20"
dayStr,hourStr:=strconv.Itoa(day),strconv.Itoa(hour)
ifday<10{
dayStr="0"+dayStr
}
ifhour<10{
hourStr="0"+hourStr
}
fileName:="ex"+yearStr+MonthTOstr[month.String()]+dayStr+hourStr+".log"
logDay:=yearStr+MonthTOstr[month.String()]+dayStr
logMonth:=yearStr+MonthTOstr[month.String()]
//monthSrt:=strconv.Itoa(timenow.Month())
//fmt.Println(fileName,logDay)
returnfileName,logDay,logMonth
//fmt.Println(fileName)
}
funcgetPage(pagestring)string{
resp,err:=http.Get(page)
iferr!=nil{
log.Fatal(err)
}
deferresp.Body.Close()
enc:=mahonia.NewDecoder("gb2312")
pageData,_:=ioutil.ReadAll(resp.Body)
//returnstring(pageData)
PageStr:=enc.ConvertString(string(pageData))
returnPageStr
}
funcfindWord(str,url,logfile,serveripstring,wordlist[]DoubtfulKeyWord){
varphkwPageHasKeyWord
flag:=false
for_,item:=rangewordlist{
ifstrings.Contains(str,item.KeyWord){
//fmt.Println("thepagecontainstheword:",item.KeyWord,url)
phkw.KeyWords=append(phkw.KeyWords,item.KeyWord)
flag=true
}
}
ifflag{
phkw.Url=url
phkw.LogFile=logfile
phkw.ServerIp=serverip
phkw.Audit=false
phkw.Level=len(phkw.KeyWords)
interdatadb("192.168.111.28","webmonitordb","dangerPage",phkw)
}
}
funcinterdatadb(dbserver,dbname,celectionstring,itemsPageHasKeyWord){
session,err:=mgo.Dial(dbserver)
iferr!=nil{
panic(err)
}
defersession.Close()
//Optional.Switchthesessiontoamonotonicbehavior.
session.SetMode(mgo.Monotonic,true)
c:=session.DB(dbname).C(celection)
//fmt.Println(items)
//time.Sleep(time.Duration(90)*time.Second)
err=c.Insert(&PageHasKeyWord{items.Url,items.KeyWords,items.LogFile,items.ServerIp,items.Audit,items.Auditor,items.AuditTime,items.Level})
//err=c.Insert(&LogItems(logItem))
iferr!=nil{
panic(err)
}
}
funcgetdoubtfulkeyword(dbserver,dbname,collectionstring)[]DoubtfulKeyWord{
//连接数据库
session,err:=mgo.Dial(dbserver)
iferr!=nil{
panic(err)
}
defersession.Close()
//获取数据库,获取集合
c:=session.DB(dbname).C(collection)
kws:=[]DoubtfulKeyWord{}
//kw:=[]string{}
//vartatask
err=c.Find(bson.M{}).All(&kws)
iferr!=nil{
panic(err)
}
/*forn,item:=rangekws{
fmt.Println(n,item.Id,item.KeyWord)
}*/
returnkws
}
funcgetLocalIpAddr()string{
//这里使用一个合法的IP就行了,端口随便,即使没有打开也行,也许因为使用UDP,如果用TCP的话,对端不打开就会有问题
conn,err:=net.Dial("udp","192.168.18.51:80")
iferr!=nil{
//fmt.Println(err.Error())
return"127.0.0.1"
}
deferconn.Close()
//fmt.Println(conn.LocalAddr().String())
//conn.
//fmt.Println(strings.Split(conn.LocalAddr().String(),":")[0])
returnstrings.Split(conn.LocalAddr().String(),":")[0]
}
有疑问加站长微信联系(非本文作者)
入群交流(和以上内容无关):加入Go大咖交流群,或添加微信:liuxiaoyan-s 备注:入群;或加QQ群:692541889
关注微信4972 次点击
添加一条新回复
(您需要 后才能回复 没有账号 ?)
- 请尽量让自己的回复能够对别人有帮助
- 支持 Markdown 格式, **粗体**、~~删除线~~、
`单行代码` - 支持 @ 本站用户;支持表情(输入 : 提示),见 Emoji cheat sheet
- 图片支持拖拽、截图粘贴等方式上传
收入到我管理的专栏 新建专栏
本程序主要实现网页挂马关键字检测,原理是这样的,通过定时分析用户访问的IIS web日志,对日志的里的URL进行访问,然后检索是否包含相关的关键字,有就入库,这只是go实现自动检索及入库,可以加个前端,实现加关键及报警功能
packagemain
import(
"bufio"
"code.google.com/p/mahonia"
"fmt"
"io"
"io/ioutil"
"labix.org/v2/mgo"
"labix.org/v2/mgo/bson"
"log"
"net"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"time"
//qqwry为IP库
//ftp"github.com/jlaffaye/ftp"
//"github.com/yinheli/qqwry"
)
constVersion="CDN1.0"
//varServerList[]ServerMember
//PageKeyWord用来保存可疑Url及这个页面包含有哪些关键字,以及日志记录于哪台服务器及日志文件,并最终插入数据库
typePageHasKeyWordstruct{
Urlstring
KeyWords[]string
//UserIpstring
LogFilestring
ServerIpstring
Auditbool
Auditorstring//记录谁审核
AuditTimestring//记录审核时间
Levelint//可疑级别
}
typeDoubtfulKeyWordstruct{
Idbson.ObjectId"_id"
KeyWordstring
//Levelstring
}
typeUrlsToAnalysitstruct{
Urls[]string
LogFilestring
ServerIpstring
Domainstring
}
//定义MonthTOstr映射,用来将月份换为数字string,go里的月份用英文单词表示,要注意的go赋值要在函数体里赋值
varMonthTOstr=make(map[string]string)
funcmain(){
//fmt.Printf("%s",getLogFileName())
iflen(os.Args)!=4{
log.Fatal("Usage:"+filepath.Base(os.Args[0])+"log_dir"+"db_server_ip_addresstest")
os.Exit(1)
}
logFileName,_,_:=getLogFileName()
fmt.Println(time.Now())
logDbServer:=os.Args[2]
dir:=os.Args[1]
dbname:="webmonitordb"
//doubtfulKW为string数组,从数据库中获取可疑检索的URL,注意的是go是区分大小写的,因此要注意mongodb里的库名及collection名大小写
//doubtfulKW:=[]string{}
keyWords:=getdoubtfulkeyword(logDbServer,dbname,"DangeroursWords")
//wordlist:=[]string{"百家乐","博彩网","网上赌场","娱乐城","赌博网站","皇冠投注","皇冠开户","真龙娱乐城","三亚娱乐城"}
/*forn,item:=rangekeyWords{
fmt.Println(n,item.KeyWord)
}*/
ifos.Args[3]=="test"{
fmt.Println("wait!")
time.Sleep(time.Duration(90)*time.Second)
}
fmt.Println("start!")
filepath.Walk(dir,func(pathstring,fos.FileInfo,errerror)error{
iff==nil{
returnerr
}
iff.IsDir(){
returnnil
}
iff.Name()==logFileName{
//fmt.Println(path)
//fmt.Println(time.Now())
urls:=logFileAnalysit(path)
//fmt.Println(urls)
for_,v:=rangeurls.Urls{
//fmt.Println(n,":",v)
url:="http://"+urls.Domain+v
//fmt.Println(n,url)
pagestr:=getPage(url)
findWord(pagestr,url,urls.LogFile,urls.ServerIp,keyWords)
//fmt.Println(n)
}
}
returnnil
})
}
funclogFileAnalysit(logFilestring)UrlsToAnalysit{
readLogFile,err:=os.Open(logFile)
iferr!=nil{
log.Fatal(err)
}
deferreadLogFile.Close()
//pathFields的作用是将日志path解析成一个数据,从而可以得到日志的域名
pathFields:=strings.Split(logFile,"\\")
vardomainNamestring
iflen(pathFields)>3{
domainName=pathFields[len(pathFields)-3]
}
varUrlsUrlsToAnalysit
Urls.Domain=domainName
Urls.LogFile=logFile
Urls.ServerIp=getLocalIpAddr()
Urls.Urls=append(Urls.Urls,"/")//监控站点首页
//analysitTime:=time.Now()
bfLogFile:=bufio.NewReader(readLogFile)
//定义一个gbk转utf8解码器
//enc:=mahonia.NewDecoder("gbk")
for{
logRecordStr,err:=bfLogFile.ReadString('\n')
iferr==io.EOF{
//注意,这里要使用break,而不是return,return会使整个个程序退出了,break只是中断当前的for循环
break
}
//以"#"开头的要跳过,iiS日志前几行是做了注释的
ifstrings.HasPrefix(logRecordStr,"#"){
continue
}
//recordItems是个string数组,用来临时保存每行记录里的字段
//#Fields:datetimes-sitenames-ipcs-methodcs-uri-stemcs-uri-querys-portcs-usernamec-ipcs(User-Agent)sc-statussc-substatussc-win32-statustime-taken
fields:=strings.Split(logRecordStr,"")
//scstatus为服务器状态码的index
iflen(fields)<13{//不满16个字段的跳过
continue
}
url:=strings.ToLower(fields[5])
//fields[9]!=Urls.ServerIp过滤掉自己访问的IP记录
if(strings.HasSuffix(url,"htm")||strings.HasSuffix(url,"html"))&&fields[12]=="200"&&fields[9]!=Urls.ServerIp{
flag:=true
for_,v:=rangeUrls.Urls{
ifv==url{
flag=false
break
}
}
ifflag{
Urls.Urls=append(Urls.Urls,url)
}
}
}
returnUrls
}
//getLogFileName()根据当前的时间,生成一个将要被分析日志文件名,因为不同IIS站点每小时时生成一个日志文件,命名格式不exyymmddhh.log
funcgetLogFileName()(string,string,string){
MonthTOstr:=map[string]string{"January":"01",
"February":"02",
"March":"03",
"April":"04",
"May":"05",
"June":"06",
"July":"07",
"August":"08",
"September":"09",
"October":"10",
"November":"11",
"December":"12"}
timenow:=time.Now()
year,month,day:=timenow.Date()
//monthStr:=month.String()
hour,_,_:=timenow.Clock()
yearStr:=strings.TrimLeft(strconv.Itoa(year),"20")//去掉前面的四位数年份如"2014"年的"20"
dayStr,hourStr:=strconv.Itoa(day),strconv.Itoa(hour)
ifday<10{
dayStr="0"+dayStr
}
ifhour<10{
hourStr="0"+hourStr
}
fileName:="ex"+yearStr+MonthTOstr[month.String()]+dayStr+hourStr+".log"
logDay:=yearStr+MonthTOstr[month.String()]+dayStr
logMonth:=yearStr+MonthTOstr[month.String()]
//monthSrt:=strconv.Itoa(timenow.Month())
//fmt.Println(fileName,logDay)
returnfileName,logDay,logMonth
//fmt.Println(fileName)
}
funcgetPage(pagestring)string{
resp,err:=http.Get(page)
iferr!=nil{
log.Fatal(err)
}
deferresp.Body.Close()
enc:=mahonia.NewDecoder("gb2312")
pageData,_:=ioutil.ReadAll(resp.Body)
//returnstring(pageData)
PageStr:=enc.ConvertString(string(pageData))
returnPageStr
}
funcfindWord(str,url,logfile,serveripstring,wordlist[]DoubtfulKeyWord){
varphkwPageHasKeyWord
flag:=false
for_,item:=rangewordlist{
ifstrings.Contains(str,item.KeyWord){
//fmt.Println("thepagecontainstheword:",item.KeyWord,url)
phkw.KeyWords=append(phkw.KeyWords,item.KeyWord)
flag=true
}
}
ifflag{
phkw.Url=url
phkw.LogFile=logfile
phkw.ServerIp=serverip
phkw.Audit=false
phkw.Level=len(phkw.KeyWords)
interdatadb("192.168.111.28","webmonitordb","dangerPage",phkw)
}
}
funcinterdatadb(dbserver,dbname,celectionstring,itemsPageHasKeyWord){
session,err:=mgo.Dial(dbserver)
iferr!=nil{
panic(err)
}
defersession.Close()
//Optional.Switchthesessiontoamonotonicbehavior.
session.SetMode(mgo.Monotonic,true)
c:=session.DB(dbname).C(celection)
//fmt.Println(items)
//time.Sleep(time.Duration(90)*time.Second)
err=c.Insert(&PageHasKeyWord{items.Url,items.KeyWords,items.LogFile,items.ServerIp,items.Audit,items.Auditor,items.AuditTime,items.Level})
//err=c.Insert(&LogItems(logItem))
iferr!=nil{
panic(err)
}
}
funcgetdoubtfulkeyword(dbserver,dbname,collectionstring)[]DoubtfulKeyWord{
//连接数据库
session,err:=mgo.Dial(dbserver)
iferr!=nil{
panic(err)
}
defersession.Close()
//获取数据库,获取集合
c:=session.DB(dbname).C(collection)
kws:=[]DoubtfulKeyWord{}
//kw:=[]string{}
//vartatask
err=c.Find(bson.M{}).All(&kws)
iferr!=nil{
panic(err)
}
/*forn,item:=rangekws{
fmt.Println(n,item.Id,item.KeyWord)
}*/
returnkws
}
funcgetLocalIpAddr()string{
//这里使用一个合法的IP就行了,端口随便,即使没有打开也行,也许因为使用UDP,如果用TCP的话,对端不打开就会有问题
conn,err:=net.Dial("udp","192.168.18.51:80")
iferr!=nil{
//fmt.Println(err.Error())
return"127.0.0.1"
}
deferconn.Close()
//fmt.Println(conn.LocalAddr().String())
//conn.
//fmt.Println(strings.Split(conn.LocalAddr().String(),":")[0])
returnstrings.Split(conn.LocalAddr().String(),":")[0]
}