分享
  1. 首页
  2. 文章

go版下载妹子图

qii · · 2073 次点击 · · 开始浏览
这是一个创建于 的文章,其中的信息可能已经有所发展或是发生改变。

package main
import(
 "fmt"
 "io/ioutil"
 "net/http"
 "regexp"
 "strings"
 "os"
 "image/png"
 "image/jpeg"
 "image"
)
var filePath = "F:/girls/"
func fileExist(fileName string) bool {
 if _,ok:=os.Stat(fileName);ok == nil{
 return true
 }
 return false
}
func Substr(str string, start, length int) string {
 rs := []rune(str)
 rl := len(rs)
 end := 0
 if start < 0 {
 start = rl - 1 + start
 }
 end = start + length
 if start > end {
 start, end = end, start
 }
 if start < 0 {
 start = 0
 }
 if start > rl {
 start = rl
 }
 if end < 0 {
 end = 0
 }
 if end > rl {
 end = rl
 }
 return string(rs[start:end])
}
func getImageList(url string, c chan int){
 fmt.Println("get page link url==>", url)
 body:=getUrl(url)
 if body == ""{
 return
 }
 reg := regexp.MustCompile("http://www.meizitu.com/a/[0-9]+.html")
 links:=reg.FindAllString(body, -1)
 getImageLink(links, c)
}
func getImageLink(links []string, c chan int){
 for _, uri := range links{
 fmt.Println("Get images url, page link==>", uri)
 body:=getUrl(uri)
 if ""==body{
 return
 }
 reg:=regexp.MustCompile("http://pic.meizitu.com/wp-content/uploads/[^\\.]+\\.(jpg|png|gif)")
 images:=reg.FindAllString(body, -1)
 downloadImage(images)
 }
 c <- 1
}
func downloadImage(images []string){
 for _,v:=range images{
 fmt.Println("Download image, url==>", v)
 imageType:=Substr(v, -2, 3)
 resp,ok:=http.Get(v)
 if nil!=ok{
 continue
 }
 defer resp.Body.Close()
 flag:=false
 var iImage image.Image
 content,ok:=ioutil.ReadAll(resp.Body)
 body:=string(content)
 if imageType=="jpg"{
 iImage,ok=jpeg.Decode(strings.NewReader(body))
 flag=true
 if nil!=ok{
 continue
 }
 } else if imageType == "png"{
 iImage,ok=png.Decode(strings.NewReader(body))
 flag=true
 if nil!=ok{
 continue
 }
 }
 if flag{
 rect:=iImage.Bounds()
 if rect.Max.X < 200 || rect.Max.Y < 200{
 //只下载大图,小图跳过
 fmt.Println("Skip download image, url ==>", v)
 continue
 }
 }
 // body:=getUrl(v)
 if nil!=ok || "" == body{
 fmt.Println("content is null")
 continue
 }
 paths:=strings.Split(v,"/")
 len:=len(paths)
 fileName:=filePath + paths[len-4]+ paths[len-3]+ paths[len-2] + paths[len-1]
 if fileExist(fileName){
 continue
 }
 f,ok:=os.Create(fileName)
 if ok!=nil{
 fmt.Println("open file error")
 return
 }
 defer f.Close()
 f.WriteString(body)
 }
}
func getUrl(url string) string{
 resp,ok:=http.Get(url)
 if nil!=ok{
 return ""
 }
 defer resp.Body.Close()
 str,ok:=ioutil.ReadAll(resp.Body)
 if ok!=nil{
 return ""
 }
 return string(str)
}
func main() {
 fms:="http://www.meizitu.com/a/sifang_5_%d.html"
 max_page:=10
 cur_page:=1
 offset:=cur_page+max_page
 ch:=make(chan int, max_page)
 for ;cur_page<offset;cur_page++{
 go func(page int){
 url:=fmt.Sprintf(fms, page)
 fmt.Println("Parse url:",url)
 getImageList(url, ch)
 }(cur_page)
 }
 sum:=0
 forEnd:
 for{
 select{
 case <- ch:
 sum+=1
 if sum == max_page{
 break forEnd
 }
 }
 }
 fmt.Println("done!")
}

重构的代码:

package main
import(
 "fmt"
 "io/ioutil"
 "net/http"
 "regexp"
 "strings"
 "os"
 "image/png"
 "image/jpeg"
 "image/gif"
 "image"
 "errors"
)
type Config struct{
 SavePath string
 MinWidth int
 MinHeight int
 Overwrite bool
 MaxPage int
 StartPage int
}
func NewConfig(savePath string, minWidth, minHeight, maxPage, startPage int, overwrite bool) *Config{
 return &Config{
 savePath,
 minWidth,
 minHeight,
 overwrite,
 maxPage,
 startPage,
 }
}
const (
 PAGE_URL string = "http://www.meizitu.com/a/sifang_5_%d.html"
 IMAGE_LIST_LINKS string = "http://www.meizitu.com/a/[0-9]+.html"
 IMAGE_IMAGE_LINKS string = "http://pic.meizitu.com/wp-content/uploads/[^\\.]+\\.(jpg|png|gif)"
)
type Webpage struct {
 Config *Config
}
func NewWebpage(config *Config) *Webpage{
 return &Webpage{Config: config}
}
func (self *Webpage) ParsePage(url string) []string{
 offset := self.Config.StartPage + self.Config.MaxPage
 var urls []string
 for curPage := self.Config.StartPage; curPage < offset; curPage ++{
 urls = append(urls, fmt.Sprintf(url, curPage))
 }
 return urls
}
func (self *Webpage) Get(url string) (body string){
 resp,ok:=http.Get(url)
 if nil!=ok{
 return ""
 }
 defer resp.Body.Close()
 str,ok:=ioutil.ReadAll(resp.Body)
 if ok!=nil{
 return ""
 }
 return string(str)
}
func (self *Webpage) ParseUrl(url, pattern string) (links []string){
 fmt.Println("Parse url ==>", url)
 body := self.Get(url)
 if "" == body{
 return []string{}
 }
 reg := regexp.MustCompile(pattern)
 return reg.FindAllString(body, -1)
}
func (self *Webpage) GetSaveName(url string) string{
 paths:=strings.Split(url, "/")
 len:=len(paths)
 fileName:=self.Config.SavePath + paths[len-4]+ paths[len-3]+ paths[len-2] + paths[len-1]
 return fileName
}
func (self *Webpage) Download(urls []string) {
 for _,url := range urls{
 fmt.Println("Start download image from url ==>", url)
 fileName := self.GetSaveName(url)
 if self.FileExist(fileName) && !self.Config.Overwrite{
 fmt.Println("Image already exists, skip download ==>", url)
 continue
 }
 body := self.Get(url)
 if "" == body{
 continue
 }
 if !self.CheckSize(body, self.GetExt(url)){
 fmt.Println("Image size too small, skip download ==>", url)
 continue
 }
 if !self.SaveImage(body, fileName){
 fmt.Println("Save image failed ==>", url)
 }
 }
}
func (self *Webpage) SaveImage(body, name string) bool {
 f,ok:=os.Create(name)
 if ok!=nil{
 fmt.Println("open file error")
 return false
 }
 defer f.Close()
 if _,err:=f.WriteString(body);err == nil{
 return true
 }
 return false
}
func (self *Webpage) GetExt(url string) string{
 if url == ""{
 return ""
 }
 temp := strings.Split(url, ".")
 return temp[len(temp) - 1]
}
func (self *Webpage) CheckSize(body, ext string) bool {
 if self.Config.MinWidth <= 0 && self.Config.MinHeight <= 0 {
 return true
 }
 var iImage image.Image
 var ok error = errors.New("Unknow image type")
 switch ext {
 case "jpg":
 iImage,ok=jpeg.Decode(strings.NewReader(body))
 case "png":
 iImage,ok=png.Decode(strings.NewReader(body))
 case "gif":
 iImage,ok=gif.Decode(strings.NewReader(body))
 default:
 fmt.Println("Unknow image format")
 return false
 }
 if ok == nil {
 rect := iImage.Bounds()
 if self.Config.MinWidth <= rect.Max.X && self.Config.MinHeight <= rect.Max.Y{
 return true
 }
 }
 return false
}
func (self *Webpage) FileExist(name string) bool{
 if _, ok := os.Stat(name); ok == nil {
 return true
 }
 return false
}
func (self *Webpage) RunTask(){
 urls:=self.ParsePage(PAGE_URL)
 sum:=0
 l:=len(urls)
 c:=make(chan int, l)
 for _, url := range urls{
 go func(url string){
 links := self.ParseUrl(url, IMAGE_LIST_LINKS)
 for _,v := range links{
 uris := self.ParseUrl(v, IMAGE_IMAGE_LINKS)
 self.Download(uris)
 }
 c <- 1
 }(url)
 }
 forEnd:
 for {
 select{
 case <-c:
 sum ++;
 if sum == l{
 break forEnd
 }
 }
 }
}
func main() {
 config := NewConfig(
 "F:/girls/",
 400,
 400,
 1,
 11,
 false,
 )
 webpage := NewWebpage(config)
 webpage.RunTask()
 fmt.Println("done!")
}

有疑问加站长微信联系(非本文作者)

本文来自:开源中国博客

感谢作者:qii

查看原文:go版下载妹子图

入群交流(和以上内容无关):加入Go大咖交流群,或添加微信:liuxiaoyan-s 备注:入群;或加QQ群:692541889

关注微信
2073 次点击
2 回复 | 直到 2018年05月23日 16:51:12
暂无回复
添加一条新回复 (您需要 后才能回复 没有账号 ?)
  • 请尽量让自己的回复能够对别人有帮助
  • 支持 Markdown 格式, **粗体**、~~删除线~~、`单行代码`
  • 支持 @ 本站用户;支持表情(输入 : 提示),见 Emoji cheat sheet
  • 图片支持拖拽、截图粘贴等方式上传

用户登录

没有账号?注册
(追記) (追記ここまで)

今日阅读排行

    加载中
(追記) (追記ここまで)

一周阅读排行

    加载中

关注我

  • 扫码关注领全套学习资料 关注微信公众号
  • 加入 QQ 群:
    • 192706294(已满)
    • 731990104(已满)
    • 798786647(已满)
    • 729884609(已满)
    • 977810755(已满)
    • 815126783(已满)
    • 812540095(已满)
    • 1006366459(已满)
    • 692541889

  • 关注微信公众号
  • 加入微信群:liuxiaoyan-s,备注入群
  • 也欢迎加入知识星球 Go粉丝们(免费)

给该专栏投稿 写篇新文章

每篇文章有总共有 5 次投稿机会

收入到我管理的专栏 新建专栏