Skip to content

Commit 0fa9a77

Browse files
fix:前置知识移除dash符号
1 parent a6df67c commit 0fa9a77

File tree

5 files changed

+977
-6772
lines changed

5 files changed

+977
-6772
lines changed

‎scripts/LeetCodeProvider.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,15 @@ const {
1212

1313
module.exports = LeetCodeProvider = {
1414
getProblemsTitle() {
15+
Logger.success('开始抓取问题列表。。。。')
1516
return Utils.httpGet(PROBLEMS_URL)
1617
.then((body) => {
1718
let titles = [];
1819
let sHtml = Iconv.decode(body, "utf-8").toString();
1920
cheerio
2021
.load(sHtml)(QUESTION_DOM_SELECTOR)
2122
.each((idx, ele) => titles.push(ele.attribs["title"]));
22-
Logger.success("获取问题列表成功");
23+
Logger.success("获取问题列表成功");
2324
/**
2425
* 由于QUESTION_DOM_SELECTOR 所选择的结构包含非问题标签,获取title会是undefined,在此需将其过滤掉
2526
*/

‎scripts/constants.js

+12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
module.exports = {
2+
3+
/**
4+
* 爬取的数据源站点 github | gitee
5+
*/
6+
7+
CRAWL_SITE: "github",
8+
29
/**
310
* 需解析的语言类型
411
*/
@@ -45,4 +52,9 @@ module.exports = {
4552
* 过滤英文文档末尾标识
4653
*/
4754
ENGLISH_MARKDOWN_SIGN: ".en.md",
55+
56+
/**
57+
* 爬虫抓取同一文件时的最大失败次数(多为网络原因导致)
58+
*/
59+
MAX_CRAWL_RETRY_NUMBER : 100
4860
};

‎scripts/curlleetcode.js

+21-8
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@ const {
88
RAW_MARKDOWN_OUTPUT_DIR,
99
REQUEST_RATE,
1010
IS_FORCE_UPDATE_MODE,
11+
MAX_CRAWL_RETRY_NUMBER
1112
} = require("./constants");
1213

1314
/**
1415
* 当前请求问题索引
1516
*/
1617
let requsetNumber = 0;
18+
let retryCounter = 0;
1719

1820
Utils.mkdirSync(RAW_MARKDOWN_OUTPUT_DIR);
1921

2022
const getProblemDetail = (questionsName, requsetNumber) => {
23+
2124
const cachedFilesName = Utils.getDirsFileNameSync(RAW_MARKDOWN_OUTPUT_DIR);
2225

2326
if (
@@ -30,14 +33,16 @@ const getProblemDetail = (questionsName, requsetNumber) => {
3033

3134
getProblemDetail(questionsName, requsetNumber);
3235
} else {
36+
Logger.success(`开始抓取${questionsName[requsetNumber]}`)
3337
questionsName[requsetNumber] &&
3438
LeetCodeProvider.getProblemDetail(questionsName[requsetNumber])
35-
.then((markDown) => {
39+
.then(markDown => {
3640
if (markDown) {
41+
retryCounter = 0;
3742
Logger.success(
3843
`问题: "${
3944
questionsName[requsetNumber]
40-
}" | 结果: ${JSON.stringify(markDown)}`
45+
}" | 结果: ${JSON.stringify(markDown).slice(100)}...`
4146
);
4247

4348
Utils.writeFileSync(
@@ -48,19 +53,27 @@ const getProblemDetail = (questionsName, requsetNumber) => {
4853

4954
requsetNumber++;
5055
} else {
51-
Logger.error(`获取${questionsName[requsetNumber]} markdown 失败!`);
56+
retryCounter++;
57+
Logger.error(`获取${questionsName[requsetNumber]} markdown 第${retryCounter}次 失败!`);
5258
}
5359
})
5460
.catch(Logger.error)
5561
.then(() => {
56-
setTimeout(() => {
57-
questionsName[requsetNumber] &&
58-
getProblemDetail(questionsName, requsetNumber);
59-
}, REQUEST_RATE);
62+
if (retryCounter >= MAX_CRAWL_RETRY_NUMBER) {
63+
Logger.error(
64+
`抓去问题 "${questionsName[requsetNumber]}" 失败次数已达上限, 请调整抓取速率 [REQUEST_RATE] 或稍后再试`
65+
);
66+
process.exit(0);
67+
} else {
68+
setTimeout(() => {
69+
questionsName[requsetNumber] &&
70+
getProblemDetail(questionsName, requsetNumber);
71+
}, REQUEST_RATE);
72+
}
6073
});
6174
}
6275
};
6376

64-
LeetCodeProvider.getProblemsTitle().then((questionsName) => {
77+
LeetCodeProvider.getProblemsTitle().then(questionsName => {
6578
getProblemDetail(questionsName, requsetNumber);
6679
});

‎scripts/generateleetcode.js

+23-19
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,17 @@ const {
44
SUPPORT_LANGUAGE,
55
DB_JSON_OUTPUT_DIR,
66
RAW_MARKDOWN_OUTPUT_DIR,
7-
} = require('./constants')
8-
9-
10-
7+
ENGLISH_MARKDOWN_SIGN
8+
} = require("./constants");
119

1210
const genertateLeetcodeToJson = () => {
1311
console.time("genertateLeetcodeToJson");
1412

1513
const rawMarkdowns = Utils.getDirsFileNameSync(
1614
RAW_MARKDOWN_OUTPUT_DIR
17-
).filter((name) => !name.endsWith(ENGLISH_MARKDOWN_SIGN));
15+
).filter(name => !name.endsWith(ENGLISH_MARKDOWN_SIGN));
1816

19-
rawMarkdowns.forEach((filename) => {
17+
rawMarkdowns.forEach(filename => {
2018
let languageResloved = [];
2119
let preKnowledge = [];
2220
let keyPoints = [];
@@ -39,21 +37,25 @@ const genertateLeetcodeToJson = () => {
3937
markdown = markdown.replace(/```python/g, "```py");
4038
markdown = markdown.replace(/```c\+\+/g, "```cpp");
4139

42-
SUPPORT_LANGUAGE.forEach((lang) => {
40+
SUPPORT_LANGUAGE.forEach(lang => {
4341
markdown.replace(Utils.genCodeRegByLang(lang), (noUseMatch, $1) => {
4442
languageResloved.push({
4543
language: lang,
46-
text: $1,
44+
text: $1
4745
});
4846
});
4947
});
5048
markdown.replace(Utils.getSatelliteDataReg().pre, (noUseMatch, $1) => {
51-
52-
preKnowledge.push({
53-
text: $1.replace('-',''),
54-
link: null,
55-
color: "red",
56-
});
49+
$1.replace(/-/g, "")
50+
.split("\n")
51+
.filter(Boolean)
52+
.forEach(preTagName => {
53+
preKnowledge.push({
54+
text: preTagName,
55+
link: null,
56+
color: "red"
57+
});
58+
});
5759
});
5860

5961
markdown.replace(
@@ -62,8 +64,8 @@ const genertateLeetcodeToJson = () => {
6264
keyPoints = $1
6365
.replace(/\s/g, "")
6466
.split("-")
65-
.filter((s) => s && s !== "解析")
66-
.map((s) => ({ text: s, link: null, color: "blue" }));
67+
.filter(s => s && s !== "解析")
68+
.map(s => ({ text: s, link: null, color: "blue" }));
6769
}
6870
);
6971

@@ -80,20 +82,22 @@ const genertateLeetcodeToJson = () => {
8082
pre: preKnowledge,
8183
keyPoints,
8284
solution: `https://github.com/azl397985856/leetcode/blob/master/problems/${filename}`,
83-
code: languageResloved,
85+
code: languageResloved
8486
};
8587

88+
console.log(oCustomStruct);
89+
8690
Logger.success(`开始生成 "${filename}"`);
8791

8892
Utils.writeFileSync(
8993
"spider/yield-db-json",
90-
`${name}.json`,
94+
`${filename}.json`,
9195
JSON.stringify(oCustomStruct, null, 2)
9296
);
9397

9498
Logger.success(`生成 "${filename}" 完毕`);
95-
console.timeEnd("genertateLeetcodeToJson");
9699
});
100+
console.timeEnd("genertateLeetcodeToJson");
97101
};
98102

99103
const generateCollectionIndexFile = () => {

0 commit comments

Comments
 (0)