Skip to content

Commit 48e68fb

Browse files
mod:修改抓取文件策略
1 parent 29e04bd commit 48e68fb

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

‎scripts/LeetCodeProvider.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ const {
77
PROBLEMS_URL,
88
QUESTION_DOM_SELECTOR,
99
BASE_MARKDWON_DOWNLOAD_URL,
10-
ENGLISH_MARKDOWN_SIGN,
10+
CRAWL_IGNORE_SUFFIX,
1111
} = require("./constants");
1212

1313
module.exports = LeetCodeProvider = {
@@ -24,7 +24,7 @@ module.exports = LeetCodeProvider = {
2424
/**
2525
* 由于QUESTION_DOM_SELECTOR 所选择的结构包含非问题���签,获取title会是undefined,在此需将其过滤掉
2626
*/
27-
return titles.filter(Boolean).filter((name) => !name.endsWith(ENGLISH_MARKDOWN_SIGN));
27+
return titles.filter(Boolean).filter((name) => !CRAWL_IGNORE_SUFFIX.some(suffix => name.endsWith(suffix)));
2828
})
2929
.catch((error) => {
3030
Logger.error("获取问题列表失败", error);

‎scripts/constants.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ module.exports = {
5555
/**
5656
* 过滤英文文档末尾标识
5757
*/
58-
ENGLISH_MARKDOWN_SIGN: ".en.md",
58+
CRAWL_IGNORE_SUFFIX: [".en.md", "-en.md"],
5959

6060
/**
6161
* 爬虫抓取同一文件时的最大失败次数(多为网络原因导致, 切换到gitee源)

‎scripts/generateleetcode.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ const {
44
SUPPORT_LANGUAGE,
55
DB_JSON_OUTPUT_DIR,
66
RAW_MARKDOWN_OUTPUT_DIR,
7-
ENGLISH_MARKDOWN_SIGN
7+
CRAWL_IGNORE_SUFFIX
88
} = require("./constants");
99

1010
const genertateLeetcodeToJson = () => {
1111
console.time("genertateLeetcodeToJson");
1212

1313
const rawMarkdowns = Utils.getDirsFileNameSync(
1414
RAW_MARKDOWN_OUTPUT_DIR
15-
).filter(name => !name.endsWith(ENGLISH_MARKDOWN_SIGN));
15+
).filter(name => !CRAWL_IGNORE_SUFFIX.some(suffix => name.endsWith(suffix)));
1616

1717
rawMarkdowns.forEach(filename => {
1818
let languageResloved = [];

0 commit comments

Comments
 (0)