Avoid obscuring regression test diffs with normalization

First, diff is updated to not update the files in-place For some reason diff is being called multiple times, so $file1.unmodified becomes normalized on second invocation Secondly, diff-filter updates output to come from the unmodified version Normalization is serving two purposes: - avoid diff noise in regressions - avoid diff noise in commits when expected result is updated The first purpose only wants to reduce the lines which diff registers, whereas the second wants those changes to be committed
2020-01-23 01:52:31 +00:00 · 2020-01-23 01:52:31 +00:00 · 5e55a36172
parent 65dfcaa54b
commit 5e55a36172
2 changed files with 44 additions and 10 deletions
--- a/src/test/regress/bin/diff
+++ b/src/test/regress/bin/diff
@ -18,7 +18,7 @@ file2="${@:(-1):1}"
 # filename of the expected file as the test name. We can have multiple
 # expected files for a single test with _0, _1, ... suffixes.
 # So for the test name, we also strip the additional suffix.
-test=$(basename $file1 .out | sed -E "s/_[0-9]+$//")
+test=$(basename "$file1" .out | sed -E "s/_[0-9]+$//")
 args=${@:1:$#-2}
 BASEDIR=$(dirname "$0")
@ -33,16 +33,11 @@ fi
 if test -z "${VANILLATEST:-}"
 then
 	touch "$file1"  # when adding a new test the expected file does not exist
-	sed -Ef $BASEDIR/normalize.sed < $file1 > "$file1.modified"
+	sed -Ef $BASEDIR/normalize.sed < "$file1" > "$file1.modified"
 	mv "$file1" "$file1.unmodified"
 	mv "$file1.modified" "$file1"
 	sed -Ef $BASEDIR/normalize.sed < "$file2" > "$file2.modified"
-	mv "$file2" "$file2.unmodified"
+	"$DIFF" -w $args "$file1.modified" "$file2.modified" | diff-filter
-	mv "$file2.modified" "$file2"
+	exit ${PIPESTATUS[0]}
 	$DIFF -w $args $file1 $file2
 	exitcode=$?
 	exit $exitcode
 else
-	exec $DIFF -w $args $file1 $file2
+	exec "$DIFF" -w $args "$file1" "$file2"
 fi
--- a/src/test/regress/bin/diff-filter
+++ b/src/test/regress/bin/diff-filter
@ -0,0 +1,39 @@
 #!/usr/bin/env python3
 """
 diff-filter denormalizes diff output by having lines beginning with ' ' or '+'
 come from file2's unmodified version.
 """
 def main():
 	from sys import stdin, stdout
 	for line in stdin:
 		if line.startswith('+++ '):
 			tab = line.rindex('\t')
 			fname = line[4:tab]
 			file2 = iter(open(fname.replace('.modified', '')))
 			file2line = 1
 			stdout.write(line)
 		elif line.startswith('@@ '):
 			idx_start = line.index('+') + 1
 			idx_end = idx_start + 1
 			while line[idx_end].isdigit():
 				idx_end += 1
 			linenum = int(line[idx_start:idx_end])
 			while file2line < linenum:
 				next(file2)
 				file2line += 1
 			stdout.write(line)
 		elif line.startswith(' '):
 			stdout.write(' ')
 			stdout.write(next(file2))
 			file2line += 1
 		elif line.startswith('+'):
 			stdout.write('+')
 			stdout.write(next(file2))
 			file2line += 1
 		else:
 			stdout.write(line)
 main()