codeql-coding-standards/scripts/generate_rules/generate_package_files.py at michalerfairhurst/implement-package-io5 · github/codeql-coding-standards · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
from argparse import ArgumentParser
from jinja2 import Environment, FileSystemLoader, Template
import json
from pathlib import Path
import sys
import subprocess
import os
from coding_standards_utils import *

#
# PS Oneliner for regenerating a language:
# Get-ChildItem .\rule_packages\cpp\*.json | ForEach-Object { python .\scripts\generate_rules\generate_package_files.py cpp $_.BaseName }
#

help_statement = """
A tool for generating query related files, given a package description in JSON format.

The JSON file at <repo_root>/rule_packages/<language_name>/<package_name>.json is loaded, and for each
entry in the `queries` array for each rule, we generate the following:
 - Query file in <language>/<standard>/src/rules/<rule_id>/<short_name>.ql
 - Query help file in c/<standard>/src/rules/<ruleid>/<short_name>.qhelp or cpp/<standard>/src/rules/<ruleid>/<short_name>.md
 - Test reference in <language>/<standard>/test/<rule_id>/<short_name>.qlref
 - Test file in <language>/<standard>/test/<rule_id>/test.<language_extension>

If the files already exist:
 - The metadata of the query file will be overwritten with the new metadata
 - The QHelp file will be overwritten entirely
 - The autogenerated sections of Markdown query help file will be updated.
 - The implementation section will not be overwritten or modified.
 - The test reference will be overwritten.
 - The file file will not be overwritten or modified.

This generator does not directly support the modification of the query short_name. To
modify a query short name, first rename the relevant files manually, then re-run this
script, ensuring that the package description has also been updated.
"""
########################################################
# Configuration Data
########################################################
ql_language_mappings = {
    "cpp": "cpp",
    "c": "cpp"
}

standard_metadata = {
    "CERT-C++" : {
        "standard_title" : "CERT-C++",
        "standard_url"   : "https://wiki.sei.cmu.edu/confluence/pages/viewpage.action?pageId=88046682"
    },
    "AUTOSAR" : {
        "standard_title" : "AUTOSAR: Guidelines for the use of the C++14 language in critical and safety-related systems",
        "standard_url"   : "https://www.autosar.org/fileadmin/standards/R22-11/AP/AUTOSAR_RS_CPP14Guidelines.pdf"
    },
    "CERT-C" : {
        "standard_title" : "CERT-C",
        "standard_url"   : "https://wiki.sei.cmu.edu/confluence/display/c"
    },
    "MISRA-C-2012" : {
        "standard_title" : "MISRA-C:2012 Guidelines for the use of the C language in critical systems",
        "standard_url"   : "https://www.misra.org.uk/"
    },
    "MISRA-C++-2023" : {
        "standard_title" : "MISRA C++:2023 Guidelines for the use C++:17 in critical systems",
        "standard_url"   : "https://misra.org.uk/product/misra-cpp2023/"
    }
}

# The help files of these standards cannot be distributed in our repository.
external_help_file_standards = ["AUTOSAR", "MISRA-C-2012", "MISRA-C++-2023"]

# Mapping from the QL language to source file extension used to generate a help example file.
ql_language_ext_mappings = {
    "cpp": "cpp",
    "c": "c"
}

parser = ArgumentParser(description=help_statement)
parser.add_argument(
    "-a",
    "--anonymise",
    action="store_true",
    dest="anonymise",
    default=False,
    help="create anonymized versions of the queries, without identifying rule information",
)
# Skip the generation of tests. This is useful when creating releases
# wherein we should preserve the author's intention to not provide c-specific
# test cases.
parser.add_argument(
    "--skip-shared-test-generation",
    action="store_true",
    dest="skip_shared_test_generation",
    default=False,
    help="Do not generate tests.",
)
parser.add_argument("language_name", help="the language of the package")
parser.add_argument(
    "-e",
    "--external-help-dir",
    dest="external_help_dir",
    default=Path("../codeql-coding-standards-help"),
    type=Path,
    help="directory containing external help files"
)
parser.add_argument(
    "package_names", help="the name of the package to generate query files for", metavar='FILE', nargs='+')
########################################################


args = parser.parse_args()
language_name = args.language_name.lower()

# validate language
if not language_name in ql_language_mappings:
    exit(f"Unsupported language '{language_name}'")
else:
    ql_language_name = ql_language_mappings[language_name]

# set up some basic paths
repo_root = Path(__file__).parent.parent.parent
rule_packages_file_path = repo_root.joinpath("rule_packages")
env = Environment(loader=FileSystemLoader(Path(__file__).parent.joinpath(
    "templates")), trim_blocks=True, lstrip_blocks=True)

def write_shared_implementation(package_name, rule_id, query, language_name, ql_language_name, common_src_pack_dir, common_test_pack_dir, test_src_dir, skip_tests=False):

    shared_impl_dir_name = query["shared_implementation_short_name"].lower()

    shared_impl_dir = common_src_pack_dir.joinpath(
        "codingstandards",
        ql_language_name,
        "rules",
        shared_impl_dir_name
    )

    shared_impl_dir.mkdir(exist_ok=True, parents=True)
    shared_impl_query_library_path = shared_impl_dir.joinpath(
        query["shared_implementation_short_name"] + ".qll")

    #
    # Write out the implementation. Implementations are
    # always stored in the `ql_language_name` directory.
    #
    if not shared_impl_query_library_path.exists():

        if len(query["short_name"]) > 50:
            exit(f"Error: {query['short_name']} has more than 50 characters.")

        shared_library_template = env.get_template(
            "shared_library.ql.template"
            )

        print(f"{rule_id}: Writing out shared implementation file to {str(shared_impl_query_library_path)}")

        write_template(
            shared_library_template,
            query,
            package_name,
            shared_impl_query_library_path
            )
    else:
        print(f"{rule_id}: Skipping writing shared implementation file to {str(shared_impl_query_library_path)}")

    # Write out the test. Test are always stored under the `language_name`
    # directory.
    if not skip_tests:
        shared_impl_test_dir = common_test_pack_dir.joinpath(
            "rules",
            shared_impl_dir_name
        )

        shared_impl_test_dir.mkdir(exist_ok=True, parents=True)

        # Generate test query file
        shared_impl_test_query_path = shared_impl_test_dir.joinpath(
            f"{query['shared_implementation_short_name']}.ql"
        )

        with open(shared_impl_test_query_path, "w", newline="\n") as f:
            f.write("// GENERATED FILE - DO NOT MODIFY\n")
            f.write(
                "import "
                + str(shared_impl_query_library_path.relative_to(common_src_pack_dir).with_suffix(''))
                .replace("\\", "/")
                .replace("/", ".")
                + "\n"
            )
            f.write("\n")
            class_name = str(query["shared_implementation_short_name"]) + "SharedQuery"
            f.write("class TestFileQuery extends " + class_name + ",")
            # ql formatting of this line depends on the line length
            if len(class_name) > 61:
                # Line break required after comma
                f.write("\n  TestQuery\n{ }\n")
            elif len(class_name) == 61:
                # Line break required before `{`
                f.write(" TestQuery\n{ }\n")
            elif len(class_name) > 57:
                # Line break required after `{`
                f.write(" TestQuery {\n}\n")
            else:
                # Under 100 characters, can be formatted on the same line
                f.write(" TestQuery { }\n")

        # Create an empty test file, if one doesn't already exist
        shared_impl_test_dir.joinpath(
            "test." + language_name).touch()

        # Add an empty expected results file - this makes it possible to see the results the
        # first time you run the test in VS Code
        expected_results_file = shared_impl_test_dir.joinpath(
            query["shared_implementation_short_name"] + ".expected")
        if not expected_results_file.exists():
            with open(expected_results_file, "w", newline="\n") as f:
                f.write(
                    "No expected results have yet been specified")

        # Add a testref file for this query, that refers to the shared library
        test_ref_file = test_src_dir.joinpath(
            query["short_name"] + ".testref")

        # don't write it if it already exists
        if not test_ref_file.exists():
            with open(test_ref_file, "w", newline="\n") as f:
                f.write(str(shared_impl_test_query_path.relative_to(
                    repo_root)).replace("\\", "/"))

def write_non_shared_testfiles(rule_id, query, language_name, query_path, test_src_dir, src_pack_dir):
        # Add qlref test file
    print(
        rule_id + ": Writing out query test files to " + str(test_src_dir))
    with open(test_src_dir.joinpath(query["short_name"] + ".qlref"), "w", newline="\n") as f:
        f.write(str(query_path.relative_to(
            src_pack_dir)).replace("\\", "/"))

    # Add an empty expected results file - this makes it possible to see the results the
    # first time you run the test in VS Code
    expected_results_file = test_src_dir.joinpath(
        f"{query['short_name']}.expected"
    )

    if not expected_results_file.exists():
        with open(expected_results_file, "w", newline="\n") as f:
            f.write(
                "No expected results have yet been specified")


def resolve_package(package_name: str) -> Path:
    global rule_packages_file_path, language_name
    return rule_packages_file_path.joinpath(
    language_name, package_name + ".json")

def generate_package_files(package_name: str) -> None:
    global language_name, env
    rule_package_file_path = resolve_package(package_name)
    print(str(rule_package_file_path))
    try:
        rule_package_file = rule_package_file_path.open("r")
    except PermissionError:
        print("Error: No permission to read the rule package file located at '" +
            str(rule_package_file_path) + "'")
        sys.exit(1)
    else:
        with rule_package_file:
            package_definition = json.load(rule_package_file)

            # Initialize exclusion
            exclusion_query = []

            # Check query standard name is unique before proceeding
            query_names = []
            for standard_name, rules in package_definition.items():
                for rule_id, rule_details in rules.items():
                    for query in rule_details["queries"]:
                        query_names.append(query["short_name"])
            if len(query_names) > len(set(query_names)):
                print(
                    "Error: " + "Duplicate query name detected, each query must have a unique query name.")
                sys.exit(1)

            for standard_name, rules in package_definition.items():

                # Identify the short name for the standard, used for directory and tag names
                standard_short_name = standard_name.split("-")[0].lower()
                # Currently assumes that language_name is also the subdirectory name
                standard_dir = repo_root.joinpath(
                    language_name).joinpath(standard_short_name)
                # Identify common src and test packs
                common_dir = repo_root.joinpath(
                    ql_language_name).joinpath("common")
                common_src_pack_dir = common_dir.joinpath("src")
                # The language specific files always live under the commons for that
                # language
                common_test_pack_dir = repo_root.joinpath(language_name, "common", "test")
                # Identify the source pack for this standard
                src_pack_dir = standard_dir.joinpath("src")
                for rule_id, rule_details in rules.items():
                    # Identify and create the directories required for this rule
                    rule_src_dir = src_pack_dir.joinpath("rules").joinpath(rule_id)
                    rule_src_dir.mkdir(exist_ok=True, parents=True)
                    test_src_dir = standard_dir.joinpath(
                        "test/rules").joinpath(rule_id)
                    test_src_dir.mkdir(exist_ok=True, parents=True)
                    # Extract the rule category from the obligation property.
                    assert("properties" in rule_details and "obligation" in rule_details["properties"])
                    rule_category = rule_details["properties"]["obligation"]
                    # Build list of tags for this rule to apply to each query
                    rule_query_tags = []
                    for key, value in rule_details["properties"].items():
                        if isinstance(value, list):
                            for v in value:
                                rule_query_tags.append(
                                    standard_tag(standard_short_name, key, v))
                        else:
                            rule_query_tags.append(standard_tag(
                                standard_short_name, key, value))

                    for q in rule_details["queries"]:

                        # extract metadata and model
                        query, exclusion_model = extract_metadata_from_query(
                            rule_id,
                            rule_details["title"],
                            rule_category,
                            q,
                            rule_query_tags,
                            language_name,
                            ql_language_name,
                            standard_name,
                            standard_short_name,
                            standard_metadata,
                            args.anonymise
                        )
                        # add query to each dict
                        exclusion_query.append(exclusion_model)

                        # Path to query file we want to generate or modify
                        query_path = rule_src_dir.joinpath(
                            query["short_name"] + ".ql")
                        if not query_path.exists():
                            # Doesn't already exist, generate full template, including imports and select
                            if len(query["short_name"]) > 50:
                                print(
                                    "Error: " + query["short_name"] + " has more than 50 characters. Query name should be less than 50 characters. ")
                                sys.exit(1)
                            print(rule_id + ": Writing out query file to " +
                                str(query_path))
                            query_template = env.get_template("query.ql.template")
                            write_template(query_template, query,
                                        package_name, query_path)
                        else:
                            # Query file does already exist, so we only re-write the metadata
                            print(
                                rule_id + ": Re-writing metadata for query file at " + str(query_path))
                            query_metadata_template = env.get_template(
                                "query.metadata.template")
                            # Generate the new metadata
                            new_metadata = query_metadata_template.render(**query)
                            with open(query_path, "r+", newline="\n") as query_file:
                                # Read the existing query file contents
                                existing_contents = query_file.read()
                                # Move cursor back to the start of the file, so we can write later
                                query_file.seek(0)
                                # Confirm that the query file is valid
                                if not existing_contents.startswith("/**"):
                                    print("Error: " + " cannot modify the metadata for query file at " + str(
                                        query_path) + " - does not start with /**.")
                                    sys.exit(1)
                                pos_of_comment_end = existing_contents.find("*/")
                                if pos_of_comment_end == -1:
                                    print("Error: " + " cannot modify the metadata for query file at " + str(
                                        query_path) + " - does not include a */.")
                                    sys.exit(1)

                                # Write the new contents to the query file
                                new_contents = new_metadata + \
                                    existing_contents[pos_of_comment_end + 2:]
                                # Write the new contents to the file
                                query_file.writelines(new_contents)
                                # Ensure any trailing old data is deleted
                                query_file.truncate()

                        # Add some metadata for each supported standard
                        if standard_name == "CERT-C++":
                            query["standard_title"] = "CERT-C++"
                            query["standard_url"] = "https://wiki.sei.cmu.edu/confluence/pages/viewpage.action?pageId=88046682"
                        elif standard_name == "AUTOSAR":
                            query["standard_title"] = "AUTOSAR: Guidelines for the use of the C++14 language in critical and safety-related systems"
                            query[
                                "standard_url"
                            ] = "https://www.autosar.org/fileadmin/standards/R22-11/AP/AUTOSAR_RS_CPP14Guidelines.pdf"

                        help_dir = None
                        if standard_name in external_help_file_standards:
                            if args.external_help_dir.is_dir() and args.external_help_dir.exists():
                                help_dir = Path(args.external_help_dir).resolve() / (rule_src_dir.relative_to(repo_root))
                                help_dir.mkdir(parents=True, exist_ok=True)
                            else:
                                print(f"{rule_id} : Skipping writing of help file for {query_path} because no existing external help directory is provided!")
                        else:
                            help_dir = rule_src_dir
                        if help_dir:
                            write_query_help_file(help_dir, env, query, package_name, rule_id, standard_name)

                        if "shared_implementation_short_name" in query:
                            write_shared_implementation(package_name, rule_id, query, language_name, ql_language_name, common_src_pack_dir, common_test_pack_dir, test_src_dir, args.skip_shared_test_generation)
                        else:
                            write_non_shared_testfiles(rule_id, query, language_name, query_path, test_src_dir, src_pack_dir)
            # Exclusions
            exclusions_template = env.get_template("exclusions.qll.template")
            common_exclusions_dir = common_src_pack_dir.joinpath(
                "codingstandards",
                ql_language_name,
                "exclusions")
            # assign package and sanitize
            package_name = package_name.replace("-", "")
            package_name = package_name[:1].upper() + package_name[1:]
            exclusion_library_file = common_exclusions_dir.joinpath(language_name,
                                                                    package_name + ".qll")
            # write exclusions file
            print(package_name + ": Writing out exclusions file to " +
                str(exclusion_library_file))

            os.makedirs(common_exclusions_dir.joinpath(
                language_name), exist_ok=True)

            write_exclusion_template(exclusions_template, exclusion_query,
                                    package_name, language_name, exclusion_library_file)

for package_name in args.package_names:
    generate_package_files(package_name)

# After updating these files, the metadata should be regenerated
print("==========================================================")
print(f"Regenerating RuleMetadata.qll for {language_name.upper()}")
print("==========================================================")

repo_root = Path(__file__).parent.parent.parent
update_metadata_path = repo_root.joinpath(
    "scripts", "generate_metadata", "generate_metadata_for_language.py")
subprocess.run([sys.executable, update_metadata_path, language_name])