Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 14e362d

Browse files
author
kw.lei
committed
First commit
0 parents commit 14e362d

File tree

8 files changed

+353
-0
lines changed

8 files changed

+353
-0
lines changed

‎.idea/csdn-hexo.iml

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/misc.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/workspace.xml

Lines changed: 203 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module csdn-hexo
2+
3+
go 1.12

‎hexo.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
package main

‎main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
package main

‎spider.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"io/ioutil"
7+
"net/http"
8+
"regexp"
9+
"strings"
10+
)
11+
12+
// Crawl posts from CSDN
13+
14+
const (
15+
ListPostURL = "https://blog.csdn.net/%s/article/list/%d?"
16+
PostDetailURL = "https://mp.csdn.net/mdeditor/getArticle?id=%s"
17+
)
18+
19+
type DetailData struct {
20+
Data PostDetail `json:"data"`
21+
}
22+
23+
type PostDetail struct {
24+
Title string `json:"title"`
25+
Description string `json:"description"`
26+
Markdowncontent string `json:"markdowncontent"`
27+
Tags string `json:"tags"`
28+
}
29+
30+
func GetPageSize(username string) (int, error) {
31+
client := http.Client{}
32+
33+
resp, err := client.Get(fmt.Sprintf(ListPostURL, username, 1))
34+
if err != nil {
35+
return 0,err
36+
}
37+
38+
data, err := ioutil.ReadAll(resp.Body)
39+
40+
r := regexp.MustCompile(`class="ui-pager">.*?</li>`)
41+
finds := r.FindAll(data, -1)
42+
43+
for _,f := range finds {
44+
ss := strings.Split(string(f), `<`)
45+
fmt.Println(ss)
46+
}
47+
48+
return 0, nil
49+
}
50+
51+
// Crawl posts by username
52+
func CrawlPosts(username string, page int) ([]string, error) {
53+
client := http.Client{}
54+
55+
resp, err := client.Get(fmt.Sprintf(ListPostURL, username, page))
56+
if err != nil {
57+
return nil,err
58+
}
59+
60+
data, err := ioutil.ReadAll(resp.Body)
61+
62+
r := regexp.MustCompile(`<h4 class="">\s*<a href=".*?"`)
63+
finds := r.FindAll(data, -1)
64+
65+
var urls []string
66+
67+
for _,f := range finds {
68+
ss := strings.Split(string(f), `"`)
69+
if len(ss) >= 4 {
70+
urls = append(urls, ss[3])
71+
}
72+
}
73+
74+
return urls,err
75+
}
76+
77+
func CrawlPostMarkdown(url string) (*PostDetail, error){
78+
79+
index := strings.LastIndex(url, "/")
80+
id := url[index+1:]
81+
82+
client := http.Client{}
83+
84+
req, _ := http.NewRequest("GET", fmt.Sprintf(PostDetailURL, id), nil)
85+
req.Header.Set("cookie","uuid_tt_dd=10_33227520360-1562155374449-785950; UN=junmoxi; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_33227520360-1562155374449-785950!5744*1*junmoxi!1788*1*PC_VC; smidV2=20190705154448794d4aea42482882ccb01b435d4655850093278d5d0bb12e0; OUTFOX_SEARCH_USER_ID_NCOO=1275289703.8182168; dc_session_id=10_1565764323161.169173; UserName=junmoxi; UserInfo=de709e85392f4b8a8d19d69eb2273c56; UserToken=de709e85392f4b8a8d19d69eb2273c56; UserNick=java%E6%B4%BE%E5%A4%A7%E6%98%9F; AU=B09; BT=1567597499382; p_uid=U000000; notice=1; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1569480050,1569545487,1569720826,1569734799; Hm_lpvt_6bcd52f51e9b3dce32bec4a3")
86+
87+
resp, err := client.Do(req)
88+
if err != nil {
89+
return nil, err
90+
}
91+
92+
data, err := ioutil.ReadAll(resp.Body)
93+
if err != nil {
94+
return nil, err
95+
}
96+
97+
detail := new(DetailData)
98+
err = json.Unmarshal(data, detail)
99+
if err != nil {
100+
return nil, err
101+
}
102+
fmt.Println(string(data))
103+
104+
fmt.Printf("%+v \n", detail)
105+
106+
return nil, nil
107+
}
108+
109+
func main() {
110+
//urls, err := CrawlPosts("junmoxi", 1)
111+
//if err != nil {
112+
// panic(err)
113+
//}
114+
//
115+
//for _,url := range urls{
116+
// fmt.Print(url)
117+
//}
118+
119+
CrawlPostMarkdown("https://blog.csdn.net/junmoxi/article/details/101631412")
120+
121+
// GetPageSize("junmoxi")
122+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /