Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 06af507

Browse files
add build hexo post
1 parent 14e362d commit 06af507

File tree

8 files changed

+413
-171
lines changed

8 files changed

+413
-171
lines changed

‎.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.idea/

‎.idea/workspace.xml

Lines changed: 232 additions & 48 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# 批量导出CSDN博客
2+
> 批量导出csnd博客,并转化为hexo博客样式
3+
4+
## 使用
5+
6+
```bash
7+
go run main.go -username 你的csdn用户名 -page 1
8+
```
9+
> page不写,默认为下载全部页

‎go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
module csdn-hexo
22

33
go 1.12
4+
5+
require (
6+
github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed
7+
github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751 // indirect
8+
)

‎go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed h1:/nQRgal0OAvl64fVVo0IrwlMt8vXypxc/a+N0Is80VY=
2+
github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed/go.mod h1:4YWkn3EVkh8c1BDlVmw+Zh2QLhs+MbAg4xy4RqcKMsA=
3+
github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751 h1:3EYaPrwMGOaFxBbiLlsfRGFNlSLJ3ETjkPbTfkG5IGQ=
4+
github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751/go.mod h1:HYAQIJIdgW9cGr75BDsucQMgKREt00mECJHOskH5n5k=

‎hexo.go

Lines changed: 0 additions & 1 deletion
This file was deleted.

‎main.go

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,163 @@
11
package main
2+
3+
import (
4+
"encoding/json"
5+
"flag"
6+
"fmt"
7+
"io/ioutil"
8+
"math/rand"
9+
"net/http"
10+
"os"
11+
"regexp"
12+
"strings"
13+
"sync"
14+
"time"
15+
16+
"github.com/qianlnk/pgbar"
17+
)
18+
19+
// Crawl posts from CSDN
20+
const (
21+
ListPostURL = "https://blog.csdn.net/%s/article/list/%d?"
22+
PostDetailURL = "https://mp.csdn.net/mdeditor/getArticle?id=%s"
23+
HexoHeader = `
24+
---
25+
title: %s
26+
date: %s
27+
tags: [%s]
28+
categories: %s
29+
---
30+
`
31+
)
32+
33+
var postTime = time.Now()
34+
35+
type DetailData struct {
36+
Data PostDetail `json:"data"`
37+
}
38+
39+
type PostDetail struct {
40+
Title string `json:"title"`
41+
Description string `json:"description"`
42+
Markdowncontent string `json:"markdowncontent"`
43+
Tags string `json:"tags"`
44+
Categories string `json:"categories"`
45+
}
46+
47+
var (
48+
username string
49+
page int
50+
currentPage = 1
51+
count int
52+
wg sync.WaitGroup
53+
bar *pgbar.Bar
54+
)
55+
56+
func init() {
57+
flag.StringVar(&username, "username", "junmoxi", "your csdn username")
58+
flag.IntVar(&page, "page", -1, "download pages")
59+
flag.Parse()
60+
}
61+
62+
func main() {
63+
urls, err := crawlPosts(username)
64+
if err != nil {
65+
panic(err)
66+
}
67+
bar = pgbar.NewBar(0, "下载进度", len(urls))
68+
for _, url := range urls {
69+
wg.Add(1)
70+
go crawlPostMarkdown(url)
71+
}
72+
73+
wg.Wait()
74+
}
75+
76+
// Crawl posts by username
77+
func crawlPosts(username string) ([]string, error) {
78+
client := http.Client{}
79+
var (
80+
urls []string
81+
err error
82+
)
83+
84+
for {
85+
resp, err := client.Get(fmt.Sprintf(ListPostURL, username, currentPage))
86+
if err != nil {
87+
return nil, err
88+
}
89+
90+
data, err := ioutil.ReadAll(resp.Body)
91+
92+
r := regexp.MustCompile(`<h4 class="">\s*<a href=".*?"`)
93+
finds := r.FindAll(data, -1)
94+
95+
for _, f := range finds {
96+
ss := strings.Split(string(f), `"`)
97+
if len(ss) >= 4 {
98+
urls = append(urls, ss[3])
99+
}
100+
}
101+
102+
if len(finds) == 0 {
103+
return urls, nil
104+
}
105+
106+
if page != -1 && currentPage >= page {
107+
return urls, nil
108+
}
109+
currentPage++
110+
}
111+
112+
return urls, err
113+
}
114+
115+
func crawlPostMarkdown(url string) (*PostDetail, error) {
116+
index := strings.LastIndex(url, "/")
117+
id := url[index+1:]
118+
119+
client := http.Client{}
120+
121+
req, _ := http.NewRequest("GET", fmt.Sprintf(PostDetailURL, id), nil)
122+
req.Header.Set("cookie", "UserName=junmoxi; UserToken=de709e85392f4b8a8d19d69eb2273c56;")
123+
124+
resp, err := client.Do(req)
125+
if err != nil {
126+
return nil, err
127+
}
128+
129+
data, err := ioutil.ReadAll(resp.Body)
130+
if err != nil {
131+
return nil, err
132+
}
133+
134+
post := new(DetailData)
135+
err = json.Unmarshal(data, post)
136+
if err != nil {
137+
return nil, err
138+
}
139+
140+
go buildPost(post.Data)
141+
142+
return nil, nil
143+
}
144+
145+
func buildPost(post PostDetail) {
146+
147+
date := postTime.Format("2006年01月02日 15:03:04")
148+
header := fmt.Sprintf(HexoHeader, post.Title, date, post.Tags, post.Categories)
149+
150+
ioutil.WriteFile(
151+
fmt.Sprintf("%s.md", post.Title),
152+
[]byte(fmt.Sprintf("%s\n%s", header, post.Markdowncontent)),
153+
os.ModePerm)
154+
155+
rand.Seed(time.Now().UnixNano())
156+
d := rand.Intn(3) + 1
157+
postTime = postTime.AddDate(0, 0, -d)
158+
159+
count++
160+
161+
defer bar.Add()
162+
defer wg.Done()
163+
}

‎spider.go

Lines changed: 0 additions & 122 deletions
This file was deleted.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /