Skip to content

Commit 6a8bc17

Browse files
merge
1 parent 37aeabc commit 6a8bc17

File tree

5 files changed

+6518
-129
lines changed

5 files changed

+6518
-129
lines changed

‎scripts/LeetCodeProvider.js

+30-38
Original file line numberDiff line numberDiff line change
@@ -9,41 +9,33 @@ const Utils = require('./utils')
99
const { PROBLEMS_URL, QUESTION_DOM_SELECTOR, BASE_MARKDWON_DOWNLOAD_URL, ENGLISH_MARKDOWN_SIGN } = require('./constants')
1010

1111
module.exports = LeetCodeProvider = {
12-
13-
14-
15-
getProblemsTitle() {
16-
17-
return Utils.httpGet(PROBLEMS_URL)
18-
.then((body)=> {
19-
let titles = []
20-
let sHtml = Iconv.decode(body, 'utf-8').toString()
21-
cheerio.load(sHtml)(QUESTION_DOM_SELECTOR).each((idx, ele) => titles.push(ele.attribs['title']))
22-
Logger.success('获取问题列表成功')
23-
24-
return titles.filter(name => !name.endsWith(ENGLISH_MARKDOWN_SIGN))
25-
})
26-
.catch(error => {
27-
Logger.error('获取问题列表失败', error)
28-
})
29-
},
30-
31-
32-
33-
34-
getProblemDetail(problemNameWithExt) {
35-
36-
return Utils.httpGet(`${BASE_MARKDWON_DOWNLOAD_URL}${problemNameWithExt}`)
37-
.then(body => {
38-
39-
let markdown = Iconv.decode(body, 'utf-8').toString()
40-
Logger.success(`抓取问题 "${problemNameWithExt}" 成功!`)
41-
return markdown
42-
})
43-
.catch(error => {
44-
Logger.error(`抓取问题 "${problemNameWithExt}" 失败`, error)
45-
})
46-
}
47-
48-
}
49-
12+
getProblemsTitle() {
13+
return Utils.httpGet(PROBLEMS_URL)
14+
.then((body) => {
15+
let titles = [];
16+
let sHtml = Iconv.decode(body, "utf-8").toString();
17+
cheerio
18+
.load(sHtml)(QUESTION_DOM_SELECTOR)
19+
.each((idx, ele) => titles.push(ele.attribs["title"]));
20+
Logger.success("获取问题列表成功");
21+
return titles.filter(Boolean).filter((name) => !name.endsWith(ENGLISH_MARKDOWN_SIGN));
22+
})
23+
.catch((error) => {
24+
Logger.error("获取问题列表失败", error);
25+
});
26+
},
27+
28+
getProblemDetail(problemNameWithExt) {
29+
return Utils.httpGet(`${BASE_MARKDWON_DOWNLOAD_URL}${problemNameWithExt}`)
30+
.then((body) => {
31+
let markdown = Iconv.decode(body, "utf-8").toString();
32+
Logger.success(`抓取问题 "${problemNameWithExt}" 成功!`);
33+
return markdown;
34+
})
35+
.catch((error) => {
36+
Logger.error(`抓取问题 "${problemNameWithExt}" 失败`, error);
37+
});
38+
},
39+
};
40+
41+
LeetCodeProvider.getProblemsTitle()

‎scripts/constants.js

+47-53
Original file line numberDiff line numberDiff line change
@@ -2,56 +2,50 @@
22

33

44
module.exports = {
5-
6-
/**
7-
* 需解析的语言类型
8-
*/
9-
SUPPORT_LANGUAGE: [
10-
'java',
11-
'js',
12-
'cpp',
13-
'py'
14-
],
15-
16-
/**
17-
* 是否启用强制更新
18-
* 如开启,会跳过读取本地缓存,拉取最新文件
19-
*/
20-
IS_FORCE_UPDATE_MODE: true,
21-
22-
/**
23-
* 请求处理频率 ms
24-
*/
25-
REQUEST_RATE: 300,
26-
27-
/**
28-
* markdown输出目录
29-
*/
30-
RAW_MARKDOWN_OUTPUT_DIR: 'spider/raw-markdown',
31-
32-
/**
33-
* 转化后的json输出目录
34-
*/
35-
DB_JSON_OUTPUT_DIR: 'spider/yield-db-json',
36-
37-
/**
38-
* 获取问题列表地址
39-
*/
40-
PROBLEMS_URL: 'https://github.com/azl397985856/leetcode/tree/master/problems',
41-
42-
/**
43-
* 抓取页面问题内容的dom元素选择器
44-
*/
45-
QUESTION_DOM_SELECTOR: '.js-navigation-item .content .js-navigation-open',
46-
47-
/**
48-
* markdwon下载地址
49-
*/
50-
BASE_MARKDWON_DOWNLOAD_URL: 'https://raw.githubusercontent.com/azl397985856/leetcode/master/problems/',
51-
52-
/**
53-
* 过滤英文文档末尾标识
54-
*/
55-
ENGLISH_MARKDOWN_SIGN: '.en.md'
56-
57-
}
5+
/**
6+
* 需解析的语言类型
7+
*/
8+
SUPPORT_LANGUAGE: ["java", "js", "cpp", "py"],
9+
10+
/**
11+
* 是否启用强制更新
12+
* 如开启,会跳过读取本地缓存,拉取最新文件
13+
*/
14+
IS_FORCE_UPDATE_MODE: true,
15+
16+
/**
17+
* 请求处理频率 ms
18+
*/
19+
REQUEST_RATE: 300,
20+
21+
/**
22+
* markdown输出目录
23+
*/
24+
RAW_MARKDOWN_OUTPUT_DIR: "spider/raw-markdown",
25+
26+
/**
27+
* 转化后的json输出目录
28+
*/
29+
DB_JSON_OUTPUT_DIR: "spider/yield-db-json",
30+
31+
/**
32+
* 获取问题列表地址
33+
*/
34+
PROBLEMS_URL: "https://github.com/azl397985856/leetcode/tree/master/problems",
35+
36+
/**
37+
* 抓取页面问题内容的dom元素选择器
38+
*/
39+
QUESTION_DOM_SELECTOR: ".position-relative.js-navigation-item .js-navigation-open",
40+
41+
/**
42+
* markdwon下载地址
43+
*/
44+
BASE_MARKDWON_DOWNLOAD_URL:
45+
"https://raw.githubusercontent.com/azl397985856/leetcode/master/problems/",
46+
47+
/**
48+
* 过滤英文文档末尾标识
49+
*/
50+
ENGLISH_MARKDOWN_SIGN: ".en.md",
51+
};

‎scripts/curlleetcode.js

+33-34
Original file line numberDiff line numberDiff line change
@@ -17,40 +17,39 @@ let requsetNumber = 0
1717
Utils.mkdirSync(RAW_MARKDOWN_OUTPUT_DIR)
1818

1919
const getProblemDetail = (questionsName, requsetNumber) => {
20-
21-
const cachedFilesName = Utils.getDirsFileNameSync(RAW_MARKDOWN_OUTPUT_DIR)
22-
23-
if (!IS_FORCE_UPDATE_MODE && cachedFilesName.includes(questionsName[requsetNumber])) {
24-
25-
Logger.success(`${questionsName[requsetNumber]}命中缓存, 跳过。。。`)
26-
27-
requsetNumber++
28-
29-
30-
getProblemDetail(questionsName, requsetNumber)
31-
32-
}
33-
else {
34-
35-
questionsName[requsetNumber] && LeetCodeProvider.getProblemDetail(questionsName[requsetNumber]).then(markDown => {
36-
if (markDown) {
37-
38-
Logger.success(`问题: "${questionsName[requsetNumber]}" | 结果: ${JSON.stringify(markDown)}`)
39-
40-
Utils.writeFileSync(RAW_MARKDOWN_OUTPUT_DIR, questionsName[requsetNumber], markDown)
41-
42-
requsetNumber++
43-
} else {
44-
Logger.error(`获取${questionsName[requsetNumber]} markdown 失败!`)
45-
}
46-
47-
}).catch(Logger.error).then(() => {
48-
49-
setTimeout(() => {
50-
51-
questionsName[requsetNumber] && getProblemDetail(questionsName, requsetNumber)
52-
53-
}, REQUEST_RATE)
20+
const cachedFilesName = Utils.getDirsFileNameSync(RAW_MARKDOWN_OUTPUT_DIR);
21+
22+
if (
23+
!IS_FORCE_UPDATE_MODE &&
24+
cachedFilesName.includes(questionsName[requsetNumber])
25+
) {
26+
Logger.success(`${questionsName[requsetNumber]}命中缓存, 跳过。。。`);
27+
28+
requsetNumber++;
29+
30+
getProblemDetail(questionsName, requsetNumber);
31+
} else {
32+
questionsName[requsetNumber] &&
33+
LeetCodeProvider.getProblemDetail(questionsName[requsetNumber])
34+
35+
.then((markDown) => {
36+
if (markDown) {
37+
Logger.success(
38+
`问题: "${
39+
questionsName[requsetNumber]
40+
}" | 结果: ${JSON.stringify(markDown)}`
41+
);
42+
43+
Utils.writeFileSync(
44+
RAW_MARKDOWN_OUTPUT_DIR,
45+
questionsName[requsetNumber],
46+
markDown
47+
);
48+
49+
requsetNumber++;
50+
} else {
51+
Logger.error(`获取${questionsName[requsetNumber]} markdown 失败!`);
52+
}
5453
})
5554
}
5655

‎scripts/generateleetcode.js

+2-4
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@ const {
44
SUPPORT_LANGUAGE,
55
DB_JSON_OUTPUT_DIR,
66
RAW_MARKDOWN_OUTPUT_DIR,
7-
ENGLISH_MARKDOWN_SIGN
87
} = require('./constants')
98

109

1110

1211

1312
const genertateLeetcodeToJson = () => {
14-
console.time('genertateLeetcodeToJson');
15-
16-
const rawMarkdowns = Utils.getDirsFileNameSync(RAW_MARKDOWN_OUTPUT_DIR).filter(name => !name.endsWith(ENGLISH_MARKDOWN_SIGN))
13+
// console.time("genertateLeetcodeToJson");
1714

1815
rawMarkdowns.forEach(filename => {
1916

@@ -58,6 +55,7 @@ const genertateLeetcodeToJson = () => {
5855
})
5956
markdown.replace(Utils.getSatelliteDataReg().pre, (noUseMatch, $1) => {
6057

58+
console.log($1.replace(/[\s\r\n]/g, '').replace(/-/g,'').split('-'))
6159
preKnowledge.push({
6260
text: $1,
6361
link: null,

0 commit comments

Comments
 (0)