中文网页正文内容提取 基于《基于行块分布函数的通用网页正文抽取算法》实现
go get github.com/yqingp/extractor
import ( "github.com/yqingp/extractor" ) .... extract_worker := extractor.NewExtractor(url) content, err := extract_worker.Extract() if err != nil { fmt.Println(content) }
go run example/server.go
require 'rest_client' RestClient.post("http://localhost:8000/work", {:url => "http://www.baidu.com"})