-
Notifications
You must be signed in to change notification settings - Fork 28.5k
/
Copy pathstructured_logging_style.py
executable file
·94 lines (78 loc) · 3.83 KB
/
structured_logging_style.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import sys
import re
import glob
def main():
log_pattern = r"log(?:Info|Warning|Error)\(.*?\)\n"
inner_log_pattern = r'".*?"\.format\(.*\)|s?".*?(?:\$|\"\+(?!s?")).*|[^"]+\+\s*".*?"'
compiled_inner_log_pattern = re.compile(inner_log_pattern)
# Regex patterns for file paths to exclude from the Structured Logging style check
excluded_file_patterns = [
"[Tt]est",
"sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen"
"/CodeGenerator.scala",
"streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala",
"sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver"
"/SparkSQLCLIService.scala",
"core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala",
]
nonmigrated_files = {}
scala_files = glob.glob(os.path.join("../", "**", "*.scala"), recursive=True)
for file in scala_files:
skip_file = False
for exclude_pattern in excluded_file_patterns:
if re.search(exclude_pattern, file):
skip_file = True
break
if not skip_file and not os.path.isdir(file):
with open(file, "r") as f:
content = f.read()
log_statements = re.finditer(log_pattern, content, re.DOTALL)
if log_statements:
nonmigrated_files[file] = []
for log_statement in log_statements:
log_statement_str = log_statement.group(0).strip()
# trim first ( and last )
first_paren_index = log_statement_str.find("(")
inner_log_statement = re.sub(
r"\s+", "", log_statement_str[first_paren_index + 1 : -1]
)
if compiled_inner_log_pattern.fullmatch(inner_log_statement):
start_pos = log_statement.start()
preceding_content = content[:start_pos]
line_number = preceding_content.count("\n") + 1
start_char = start_pos - preceding_content.rfind("\n") - 1
nonmigrated_files[file].append((line_number, start_char))
if all(len(issues) == 0 for issues in nonmigrated_files.values()):
print("Structured logging style check passed.", file=sys.stderr)
sys.exit(0)
else:
for file_path, issues in nonmigrated_files.items():
for line_number, start_char in issues:
print(f"[error] {file_path}:{line_number}:{start_char}", file=sys.stderr)
print(
"[error]\tPlease use the Structured Logging Framework for logging messages "
'with variables. For example: log"...${{MDC(TASK_ID, taskId)}}..."'
"\n\tRefer to the guidelines in the file `internal/Logging.scala`.",
file=sys.stderr,
)
sys.exit(-1)
if __name__ == "__main__":
main()