#!/usr/bin/env python3 """ easy command line to run against all citus-style checked files: $ git ls-files \ | git check-attr --stdin citus-style \ | grep 'citus-style: set' \ | awk '{print $1}' \ | cut -d':' -f1 \ | xargs -n1 ./ci/include_grouping.py """ import collections import os import sys def main(args): if len(args) < 2: print("Usage: include_grouping.py ") return file = args[1] if not os.path.isfile(file): sys.exit(f"File '{file}' does not exist") with open(file, "r") as in_file: with open(file + ".tmp", "w") as out_file: includes = [] skipped_lines = [] # This calls print_sorted_includes on a set of consecutive #include lines. # This implicitly keeps separation of any #include lines that are contained in # an #ifdef, because it will order the #include lines inside and after the # #ifdef completely separately. for line in in_file: # if a line starts with #include we don't want to print it yet, instead we # want to collect all consecutive #include lines if line.startswith("#include"): includes.append(line) skipped_lines = [] continue # if we have collected any #include lines, we want to print them sorted # before printing the current line. However, if the current line is empty # we want to perform a lookahead to see if the next line is an #include. # To maintain any separation between #include lines and their subsequent # lines we keep track of all lines we have skipped inbetween. if len(includes) > 0: if len(line.strip()) == 0: skipped_lines.append(line) continue # we have includes that need to be grouped before printing the current # line. print_sorted_includes(includes, file=out_file) includes = [] # print any skipped lines print("".join(skipped_lines), end="", file=out_file) skipped_lines = [] print(line, end="", file=out_file) # move out_file to file os.rename(file + ".tmp", file) def print_sorted_includes(includes, file=sys.stdout): default_group_key = 1 groups = collections.defaultdict(set) # define the groups that we separate correctly. The matchers are tested in the order # of their priority field. The first matcher that matches the include is used to # assign the include to a group. # The groups are printed in the order of their group_key. matchers = [ { "name": "system includes", "matcher": lambda x: x.startswith("<"), "group_key": -2, "priority": 0, }, { "name": "toplevel postgres includes", "matcher": lambda x: "/" not in x, "group_key": 0, "priority": 9, }, { "name": "postgres.h", "matcher": lambda x: x.strip() in ['"postgres.h"'], "group_key": -1, "priority": -1, }, { "name": "toplevel citus inlcudes", "matcher": lambda x: x.strip() in [ '"citus_version.h"', '"pg_version_compat.h"', '"pg_version_constants.h"', ], "group_key": 3, "priority": 0, }, { "name": "columnar includes", "matcher": lambda x: x.startswith('"columnar/'), "group_key": 4, "priority": 1, }, { "name": "distributed includes", "matcher": lambda x: x.startswith('"distributed/'), "group_key": 5, "priority": 1, }, ] matchers.sort(key=lambda x: x["priority"]) # throughout our codebase we have some includes where either postgres or citus # includes are wrongfully included with the syntax for system includes. Before we # try to match those we will change the <> to "" to make them match our system. This # will also rewrite the include to the correct syntax. common_system_include_error_prefixes = [" 0: print(file=file) includes = group[1] print("".join(sorted(includes)), end="", file=file) if __name__ == "__main__": main(sys.argv)