diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f7e125cf1..840814f7f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -71,6 +71,8 @@ jobs: run: ci/editorconfig.sh && git diff --exit-code - name: Remove useless declarations run: ci/remove_useless_declarations.sh && git diff --cached --exit-code + - name: Sort and group includes + run: ci/sort_and_group_includes.sh && git diff --exit-code - name: Normalize test output run: ci/normalize_expected.sh && git diff --exit-code - name: Check for C-style comments in migration files diff --git a/ci/README.md b/ci/README.md index 37ef94f4f..b8dad35ac 100644 --- a/ci/README.md +++ b/ci/README.md @@ -385,3 +385,18 @@ definitions are in alphabetical order. ## `print_stack_trace.sh` This script prints stack traces for failed tests, if they left core files. + +## `sort_and_group_includes.sh` + +This script checks and fixes issues with include grouping and sorting in C files. + +Includes are grouped in the following groups: + - System includes (eg. `#include `) + - Postgres.h include (eg. `#include "postgres.h"`) + - Toplevel postgres includes (includes not in a directory eg. `#include "miscadmin.h`) + - Postgres includes in a directory (eg. `#include "catalog/pg_type.h"`) + - Toplevel citus includes (includes not in a directory eg. `#include "pg_version_constants.h"`) + - Columnar includes (eg. `#include "columnar/columnar.h"`) + - Distributed includes (eg. `#include "distributed/maintenanced.h"`) + +Within every group the include lines are sorted alphabetically. diff --git a/ci/fix_style.sh b/ci/fix_style.sh index 3d6e7ae83..bb78d5f50 100755 --- a/ci/fix_style.sh +++ b/ci/fix_style.sh @@ -19,3 +19,4 @@ ci/disallow_long_changelog_entries.sh ci/normalize_expected.sh ci/fix_gitignore.sh ci/print_stack_trace.sh +ci/sort_and_group_includes.sh diff --git a/ci/include_grouping.py b/ci/include_grouping.py new file mode 100755 index 000000000..4b1370d61 --- /dev/null +++ b/ci/include_grouping.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +""" +easy command line to run against all citus-style checked files: + +$ git ls-files \ + | git check-attr --stdin citus-style \ + | grep 'citus-style: set' \ + | awk '{print $1}' \ + | cut -d':' -f1 \ + | xargs -n1 ./ci/include_grouping.py +""" + +import collections +import os +import sys + + +def main(args): + if len(args) < 2: + print("Usage: include_grouping.py ") + return + + file = args[1] + if not os.path.isfile(file): + sys.exit(f"File '{file}' does not exist") + + with open(file, "r") as in_file: + with open(file + ".tmp", "w") as out_file: + includes = [] + skipped_lines = [] + + # This calls print_sorted_includes on a set of consecutive #include lines. + # This implicitly keeps separation of any #include lines that are contained in + # an #ifdef, because it will order the #include lines inside and after the + # #ifdef completely separately. + for line in in_file: + # if a line starts with #include we don't want to print it yet, instead we + # want to collect all consecutive #include lines + if line.startswith("#include"): + includes.append(line) + skipped_lines = [] + continue + + # if we have collected any #include lines, we want to print them sorted + # before printing the current line. However, if the current line is empty + # we want to perform a lookahead to see if the next line is an #include. + # To maintain any separation between #include lines and their subsequent + # lines we keep track of all lines we have skipped inbetween. + if len(includes) > 0: + if len(line.strip()) == 0: + skipped_lines.append(line) + continue + + # we have includes that need to be grouped before printing the current + # line. + print_sorted_includes(includes, file=out_file) + includes = [] + + # print any skipped lines + print("".join(skipped_lines), end="", file=out_file) + skipped_lines = [] + + print(line, end="", file=out_file) + + # move out_file to file + os.rename(file + ".tmp", file) + + +def print_sorted_includes(includes, file=sys.stdout): + default_group_key = 1 + groups = collections.defaultdict(set) + + # define the groups that we separate correctly. The matchers are tested in the order + # of their priority field. The first matcher that matches the include is used to + # assign the include to a group. + # The groups are printed in the order of their group_key. + matchers = [ + { + "name": "system includes", + "matcher": lambda x: x.startswith("<"), + "group_key": -2, + "priority": 0, + }, + { + "name": "toplevel postgres includes", + "matcher": lambda x: "/" not in x, + "group_key": 0, + "priority": 9, + }, + { + "name": "postgres.h", + "matcher": lambda x: x.strip() in ['"postgres.h"'], + "group_key": -1, + "priority": -1, + }, + { + "name": "toplevel citus inlcudes", + "matcher": lambda x: x.strip() + in [ + '"citus_version.h"', + '"pg_version_compat.h"', + '"pg_version_constants.h"', + ], + "group_key": 3, + "priority": 0, + }, + { + "name": "columnar includes", + "matcher": lambda x: x.startswith('"columnar/'), + "group_key": 4, + "priority": 1, + }, + { + "name": "distributed includes", + "matcher": lambda x: x.startswith('"distributed/'), + "group_key": 5, + "priority": 1, + }, + ] + matchers.sort(key=lambda x: x["priority"]) + + # throughout our codebase we have some includes where either postgres or citus + # includes are wrongfully included with the syntax for system includes. Before we + # try to match those we will change the <> to "" to make them match our system. This + # will also rewrite the include to the correct syntax. + common_system_include_error_prefixes = [" 0: + print(file=file) + includes = group[1] + print("".join(sorted(includes)), end="", file=file) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/ci/sort_and_group_includes.sh b/ci/sort_and_group_includes.sh new file mode 100755 index 000000000..1c3a91458 --- /dev/null +++ b/ci/sort_and_group_includes.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -euo pipefail +# shellcheck disable=SC1091 +source ci/ci_helpers.sh + +git ls-files \ + | git check-attr --stdin citus-style \ + | grep 'citus-style: set' \ + | awk '{print $1}' \ + | cut -d':' -f1 \ + | xargs -n1 ./ci/include_grouping.py