Merge pull request #4818 from citusdata/col/show-projected-cols

2021-09-02 19:14:19 +03:00 · 2021-09-02 19:14:19 +03:00 · e41854f590
parent 4fb05efabb 37d0ecfbb7
commit e41854f590
17 changed files with 469 additions and 51 deletions
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@ -17,6 +17,7 @@
 #include "access/amapi.h"
 #include "access/skey.h"
 #include "nodes/extensible.h"
+#include "nodes/makefuncs.h"
 #include "nodes/pg_list.h"
 #include "nodes/plannodes.h"
 #include "optimizer/cost.h"
@ -25,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"
 #include "utils/relcache.h"
+#include "utils/ruleutils.h"
 #include "utils/spccache.h"

 #include "columnar/columnar.h"
@ -85,6 +87,10 @@ static void ColumnarScan_EndCustomScan(CustomScanState *node);
 static void ColumnarScan_ReScanCustomScan(CustomScanState *node);
 static void ColumnarScan_ExplainCustomScan(CustomScanState *node, List *ancestors,
 										   ExplainState *es);
+static const char * ColumnarScanProjectedColumnsStr(ColumnarScanState *columnarScanState,
+													List *ancestors, ExplainState *es);
+static List * ColumnarVarNeeded(ColumnarScanState *columnarScanState);
+static Bitmapset * ColumnarAttrNeeded(ScanState *ss);

 /* saved hook value in case of unload */
 static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL;
@ -671,6 +677,12 @@ ColumnarScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int ef
 }


+/*
+ * ColumnarAttrNeeded returns a list of AttrNumber's for the ones that are
+ * needed during columnar custom scan.
+ * Throws an error if finds a Var referencing to an attribute not supported
+ * by ColumnarScan.
+ */
 static Bitmapset *
 ColumnarAttrNeeded(ScanState *ss)
 {
@ -821,12 +833,115 @@ static void
 ColumnarScan_ExplainCustomScan(CustomScanState *node, List *ancestors,
 							   ExplainState *es)
 {
-	TableScanDesc scanDesc = node->ss.ss_currentScanDesc;
+	ColumnarScanDesc columnarScanDesc = (ColumnarScanDesc) node->ss.ss_currentScanDesc;

-	if (scanDesc != NULL)
+	if (columnarScanDesc != NULL)
 	{
-		int64 chunkGroupsFiltered = ColumnarScanChunkGroupsFiltered(scanDesc);
+		int64 chunkGroupsFiltered = ColumnarScanChunkGroupsFiltered(columnarScanDesc);
 		ExplainPropertyInteger("Columnar Chunk Groups Removed by Filter", NULL,
 							   chunkGroupsFiltered, es);
 	}
+
+	ColumnarScanState *columnarScanState = (ColumnarScanState *) node;
+	const char *projectedColumnsStr =
+		ColumnarScanProjectedColumnsStr(columnarScanState, ancestors, es);
+	ExplainPropertyText("Columnar Projected Columns", projectedColumnsStr, es);
+}
+
+
+/*
+ * ColumnarScanProjectedColumnsStr generates projected column string for
+ * explain output.
+ */
+static const char *
+ColumnarScanProjectedColumnsStr(ColumnarScanState *columnarScanState, List *ancestors,
+								ExplainState *es)
+{
+	ScanState *scanState = &columnarScanState->custom_scanstate.ss;
+
+	List *neededVarList = ColumnarVarNeeded(columnarScanState);
+	if (list_length(neededVarList) == 0)
+	{
+		return "<columnar optimized out all columns>";
+	}
+
+#if PG_VERSION_NUM >= 130000
+	List *context =
+		set_deparse_context_plan(es->deparse_cxt, scanState->ps.plan, ancestors);
+#else
+	List *context =
+		set_deparse_context_planstate(es->deparse_cxt, (Node *) &scanState->ps,
+									  ancestors);
+#endif
+
+	bool useTableNamePrefix = false;
+	bool showImplicitCast = false;
+	return deparse_expression((Node *) neededVarList, context,
+							  useTableNamePrefix, showImplicitCast);
+}
+
+
+/*
+ * ColumnarVarNeeded returns a list of Var objects for the ones that are
+ * needed during columnar custom scan.
+ * Throws an error if finds a Var referencing to an attribute not supported
+ * by ColumnarScan.
+ */
+static List *
+ColumnarVarNeeded(ColumnarScanState *columnarScanState)
+{
+	ScanState *scanState = &columnarScanState->custom_scanstate.ss;
+
+	List *varList = NIL;
+
+	Bitmapset *neededAttrSet = ColumnarAttrNeeded(scanState);
+	int bmsMember = -1;
+	while ((bmsMember = bms_next_member(neededAttrSet, bmsMember)) >= 0)
+	{
+		Relation columnarRelation = scanState->ss_currentRelation;
+
+		/* neededAttrSet already represents 0-indexed attribute numbers */
+		Form_pg_attribute columnForm =
+			TupleDescAttr(RelationGetDescr(columnarRelation), bmsMember);
+		if (columnForm->attisdropped)
+		{
+			ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN),
+							errmsg("cannot explain column with attrNum=%d "
+								   "of columnar table %s since it is dropped",
+								   bmsMember + 1,
+								   RelationGetRelationName(columnarRelation))));
+		}
+		else if (columnForm->attnum <= 0)
+		{
+			/*
+			 * ColumnarAttrNeeded should have already thrown an error for
+			 * system columns. Similarly, it should have already expanded
+			 * whole-row references to individual attributes.
+			 */
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg("cannot explain column with attrNum=%d "
+								   "of columnar table %s since it is either "
+								   "a system column or a whole-row "
+								   "reference", columnForm->attnum,
+								   RelationGetRelationName(columnarRelation))));
+		}
+
+
+		/*
+		 * varlevelsup is used to figure out the (query) level of the Var
+		 * that we are investigating. Since we are dealing with a particular
+		 * relation, it is useless here.
+		 */
+		Index varlevelsup = 0;
+
+		CustomScanState *customScanState = (CustomScanState *) columnarScanState;
+		CustomScan *customScan = (CustomScan *) customScanState->ss.ps.plan;
+		Index scanrelid = customScan->scan.scanrelid;
+		Var *var = makeVar(scanrelid, columnForm->attnum, columnForm->atttypid,
+						   columnForm->atttypmod, columnForm->attcollation,
+						   varlevelsup);
+		varList = lappend(varList, var);
+	}
+
+	return varList;
 }
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@ -1843,9 +1843,8 @@ columnar_tableam_finish()
 * Get the number of chunks filtered out during the given scan.
 */
 int64
-ColumnarScanChunkGroupsFiltered(TableScanDesc scanDesc)
+ColumnarScanChunkGroupsFiltered(ColumnarScanDesc columnarScanDesc)
 {
-	ColumnarScanDesc columnarScanDesc = (ColumnarScanDesc) scanDesc;
 	ColumnarReadState *readState = columnarScanDesc->cs_readState;

 	/* readState is initialized lazily */
--- a/src/include/columnar/columnar_tableam.h
+++ b/src/include/columnar/columnar_tableam.h
@ -42,6 +42,10 @@
 #define VALID_BLOCKNUMBERS ((uint64) (MaxBlockNumber + 1))


+struct ColumnarScanDescData;
+typedef struct ColumnarScanDescData *ColumnarScanDesc;
+
+
 const TableAmRoutine * GetColumnarTableAmRoutine(void);
 extern void columnar_tableam_init(void);
 extern void columnar_tableam_finish(void);
@ -51,7 +55,7 @@ extern TableScanDesc columnar_beginscan_extended(Relation relation, Snapshot sna
 												 ParallelTableScanDesc parallel_scan,
 												 uint32 flags, Bitmapset *attr_needed,
 												 List *scanQual);
-extern int64 ColumnarScanChunkGroupsFiltered(TableScanDesc scanDesc);
+extern int64 ColumnarScanChunkGroupsFiltered(ColumnarScanDesc columnarScanDesc);
 extern bool ColumnarSupportsIndexAM(char *indexAMName);
 extern bool IsColumnarTableAmTable(Oid relationId);
 extern TableDDLCommand * ColumnarGetTableOptionsDDL(Oid relationId);
--- a/src/test/regress/expected/columnar_alter.out
+++ b/src/test/regress/expected/columnar_alter.out
@ -304,10 +304,11 @@ SELECT count(*) FROM zero_col_columnar;
 (1 row)

 EXPLAIN (costs off, summary off) SELECT * FROM zero_col_columnar;
-                   QUERY PLAN
+                             QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on zero_col_columnar
-(1 row)
+   Columnar Projected Columns: <columnar optimized out all columns>
+(2 rows)

 INSERT INTO zero_col_columnar DEFAULT VALUES;
 INSERT INTO zero_col_columnar DEFAULT VALUES;
@ -323,10 +324,11 @@ SELECT count(*) FROM zero_col_columnar;
 (1 row)

 EXPLAIN (costs off, summary off) SELECT * FROM zero_col_columnar;
-                   QUERY PLAN
+                             QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on zero_col_columnar
-(1 row)
+   Columnar Projected Columns: <columnar optimized out all columns>
+(2 rows)

 VACUUM VERBOSE zero_col_columnar;
 INFO:  statistics for "zero_col_columnar":
--- a/src/test/regress/expected/columnar_chunk_filtering.out
+++ b/src/test/regress/expected/columnar_chunk_filtering.out
@ -128,7 +128,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
   Filter: (i > 123456)
   Rows Removed by Filter: 3457
   Columnar Chunk Groups Removed by Filter: 12
-(4 rows)
+   Columnar Projected Columns: i
+(5 rows)

 SET columnar.enable_qual_pushdown = false;
 EXPLAIN (analyze on, costs off, timing off, summary off)
@ -139,7 +140,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
   Filter: (i > 123456)
   Rows Removed by Filter: 123457
   Columnar Chunk Groups Removed by Filter: 0
-(4 rows)
+   Columnar Projected Columns: i
+(5 rows)

 SET columnar.enable_qual_pushdown TO DEFAULT;
 -- https://github.com/citusdata/citus/issues/4555
@ -154,7 +156,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
   Filter: (i > 180000)
   Rows Removed by Filter: 1
   Columnar Chunk Groups Removed by Filter: 18
-(4 rows)
+   Columnar Projected Columns: i
+(5 rows)

 DROP TABLE simple_chunk_filtering;
 CREATE TABLE multi_column_chunk_filtering(a int, b int) USING columnar;
@ -168,7 +171,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
         Filter: (a > 50000)
         Rows Removed by Filter: 1
         Columnar Chunk Groups Removed by Filter: 5
-(5 rows)
+         Columnar Projected Columns: a
+(6 rows)

 EXPLAIN (analyze on, costs off, timing off, summary off)
  SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;
@ -179,9 +183,115 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
         Filter: ((a > 50000) AND (b > 50000))
         Rows Removed by Filter: 1
         Columnar Chunk Groups Removed by Filter: 5
+         Columnar Projected Columns: a, b
+(6 rows)
+
+-- make next tests faster
+TRUNCATE multi_column_chunk_filtering;
+INSERT INTO multi_column_chunk_filtering SELECT generate_series(0,5);
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT b FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
+   Filter: ((a > 50000) AND (b > 50000))
+   Columnar Chunk Groups Removed by Filter: 1
+   Columnar Projected Columns: a, b
+(4 rows)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000;
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
+   Filter: (b > 50000)
+   Rows Removed by Filter: 6
+   Columnar Chunk Groups Removed by Filter: 0
+   Columnar Projected Columns: a, b
 (5 rows)

-DROP TABLE multi_column_chunk_filtering;
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT FROM multi_column_chunk_filtering WHERE a > 50000;
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
+   Filter: (a > 50000)
+   Columnar Chunk Groups Removed by Filter: 1
+   Columnar Projected Columns: a
+(4 rows)
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT FROM multi_column_chunk_filtering;
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1)
+   Columnar Chunk Groups Removed by Filter: 0
+   Columnar Projected Columns: <columnar optimized out all columns>
+(3 rows)
+
+BEGIN;
+  ALTER TABLE multi_column_chunk_filtering DROP COLUMN a;
+  ALTER TABLE multi_column_chunk_filtering DROP COLUMN b;
+  EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT * FROM multi_column_chunk_filtering;
+                                     QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1)
+   Columnar Chunk Groups Removed by Filter: 0
+   Columnar Projected Columns: <columnar optimized out all columns>
+(3 rows)
+
+ROLLBACK;
+CREATE TABLE another_columnar_table(x int, y int) USING columnar;
+INSERT INTO another_columnar_table SELECT generate_series(0,5);
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT a, y FROM multi_column_chunk_filtering, another_columnar_table WHERE x > 1;
+                                        QUERY PLAN
+---------------------------------------------------------------------
+ Nested Loop (actual rows=24 loops=1)
+   ->  Custom Scan (ColumnarScan) on another_columnar_table (actual rows=4 loops=1)
+         Filter: (x > 1)
+         Rows Removed by Filter: 2
+         Columnar Chunk Groups Removed by Filter: 0
+         Columnar Projected Columns: x, y
+   ->  Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4)
+         Columnar Chunk Groups Removed by Filter: 0
+         Columnar Projected Columns: a
+(9 rows)
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT y, * FROM another_columnar_table;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on another_columnar_table
+   Columnar Projected Columns: x, y
+(2 rows)
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT *, x FROM another_columnar_table;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on another_columnar_table
+   Columnar Projected Columns: x, y
+(2 rows)
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT y, another_columnar_table FROM another_columnar_table;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on another_columnar_table
+   Columnar Projected Columns: x, y
+(2 rows)
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT another_columnar_table, x FROM another_columnar_table;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on another_columnar_table
+   Columnar Projected Columns: x, y
+(2 rows)
+
+DROP TABLE multi_column_chunk_filtering, another_columnar_table;
 --
 -- https://github.com/citusdata/citus/issues/4780
 --
--- a/src/test/regress/expected/columnar_citus_integration.out
+++ b/src/test/regress/expected/columnar_citus_integration.out
@ -1093,5 +1093,63 @@ select result from run_command_on_placements('zero_col', 'select count(*) from %
 10
 (2 rows)

+CREATE TABLE weird_col_explain (
+  "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" INT,
+  "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!" INT)
+USING columnar;
+NOTICE:  identifier "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!" will be truncated to "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'"
+SELECT create_distributed_table('weird_col_explain', 'bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb''bbbbbbbbbbbbbbbbbbbbb''''bbbbbbbb');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+EXPLAIN (COSTS OFF, SUMMARY OFF)
+SELECT * FROM weird_col_explain;
+                                                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)
+   Task Count: 4
+   Tasks Shown: One of 4
+   ->  Task
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Custom Scan (ColumnarScan) on weird_col_explain_20090021 weird_col_explain
+               Columnar Projected Columns: "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb", "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'"
+(7 rows)
+
+EXPLAIN (COSTS OFF, SUMMARY OFF)
+SELECT *, "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb"
+FROM weird_col_explain
+WHERE "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" * 2 >
+      "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!";
+NOTICE:  identifier "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!" will be truncated to "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'"
+                                                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive)
+   Task Count: 4
+   Tasks Shown: One of 4
+   ->  Task
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Custom Scan (ColumnarScan) on weird_col_explain_20090021 weird_col_explain
+               Filter: (("bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" * 2) > "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'")
+               Columnar Projected Columns: "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb", "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'"
+(8 rows)
+
+-- should not project any columns
+EXPLAIN (COSTS OFF, SUMMARY OFF)
+SELECT COUNT(*) FROM weird_col_explain;
+                                             QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   ->  Custom Scan (Citus Adaptive)
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Aggregate
+                     ->  Custom Scan (ColumnarScan) on weird_col_explain_20090021 weird_col_explain
+                           Columnar Projected Columns: <columnar optimized out all columns>
+(9 rows)
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_citus_integration CASCADE;
--- a/src/test/regress/expected/columnar_cursor.out
+++ b/src/test/regress/expected/columnar_cursor.out
@ -11,7 +11,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM test_curs
   Filter: (a = 25)
   Rows Removed by Filter: 9899
   Columnar Chunk Groups Removed by Filter: 1
-(4 rows)
+   Columnar Projected Columns: a, b
+(5 rows)

 BEGIN;
 DECLARE a_25 SCROLL CURSOR
@ -112,7 +113,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM test_curs
   Filter: (a > 25)
   Rows Removed by Filter: 2626
   Columnar Chunk Groups Removed by Filter: 0
-(4 rows)
+   Columnar Projected Columns: a, b
+(5 rows)

 BEGIN;
 DECLARE a_25 SCROLL CURSOR
--- a/src/test/regress/expected/columnar_empty.out
+++ b/src/test/regress/expected/columnar_empty.out
@ -74,13 +74,15 @@ explain (costs off, summary off, timing off) select * from t_uncompressed;
                  QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t_uncompressed
-(1 row)
+   Columnar Projected Columns: a
+(2 rows)

 explain (costs off, summary off, timing off) select * from t_compressed;
                 QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t_compressed
-(1 row)
+   Columnar Projected Columns: a
+(2 rows)

 -- vacuum
 vacuum verbose t_compressed;
@ -150,13 +152,15 @@ explain table t_uncompressed;
                                   QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t_uncompressed  (cost=0.00..0.00 rows=1 width=32)
-(1 row)
+   Columnar Projected Columns: a
+(2 rows)

 explain table t_compressed;
                                  QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t_compressed  (cost=0.00..0.00 rows=1 width=32)
-(1 row)
+   Columnar Projected Columns: a
+(2 rows)

 -- drop
 drop table t_compressed;
--- a/src/test/regress/expected/columnar_join.out
+++ b/src/test/regress/expected/columnar_join.out
@ -23,15 +23,36 @@ SELECT count(*)
 FROM users
 JOIN things ON (users.id = things.user_id)
 WHERE things.id > 299990;
-                    QUERY PLAN
+                      QUERY PLAN
 ---------------------------------------------------------------------
 Aggregate
   ->  Nested Loop
         Join Filter: (users.id = things.user_id)
         ->  Custom Scan (ColumnarScan) on things
               Filter: (id > 299990)
+               Columnar Projected Columns: id, user_id
         ->  Custom Scan (ColumnarScan) on users
-(6 rows)
+               Columnar Projected Columns: id
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT u1.id, u2.id, COUNT(u2.*)
+FROM users u1
+JOIN users u2 ON (u1.id::text = u2.name)
+WHERE u2.id > 299990
+GROUP BY u1.id, u2.id;
+                     QUERY PLAN
+---------------------------------------------------------------------
+ HashAggregate
+   Group Key: u1.id, u2.id
+   ->  Nested Loop
+         Join Filter: ((u1.id)::text = u2.name)
+         ->  Custom Scan (ColumnarScan) on users u2
+               Filter: (id > 299990)
+               Columnar Projected Columns: id, name
+         ->  Custom Scan (ColumnarScan) on users u1
+               Columnar Projected Columns: id
+(9 rows)

 SET client_min_messages TO warning;
 DROP SCHEMA am_columnar_join CASCADE;
--- a/src/test/regress/expected/columnar_partitioning.out
+++ b/src/test/regress/expected/columnar_partitioning.out
@ -84,10 +84,13 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
         ->  Partial Aggregate
               ->  Parallel Append
                     ->  Custom Scan (ColumnarScan) on p3 parent_4
+                           Columnar Projected Columns: i
                     ->  Custom Scan (ColumnarScan) on p0 parent_1
+                           Columnar Projected Columns: i
                     ->  Custom Scan (ColumnarScan) on p1 parent_2
+                           Columnar Projected Columns: i
                     ->  Parallel Seq Scan on p2 parent_3
-(9 rows)
+(12 rows)

 SELECT count(*), sum(i), min(i), max(i) FROM parent;
 count  |   sum   | min | max
@ -135,12 +138,14 @@ WHERE ts > '2020-02-20' AND n < 5;
   ->  Append
         ->  Custom Scan (ColumnarScan) on p1 parent_1
               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+               Columnar Projected Columns: ts, i, n
         ->  Index Scan using p2_n_idx on p2 parent_2
               Index Cond: (n < '5'::numeric)
               Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
         ->  Custom Scan (ColumnarScan) on p3 parent_3
               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-(9 rows)
+               Columnar Projected Columns: ts, i, n
+(11 rows)

 BEGIN;
  SET LOCAL columnar.enable_custom_scan TO 'OFF';
@ -185,7 +190,8 @@ EXPLAIN (costs off) SELECT * FROM i_row;
   ->  Seq Scan on i_row i_row_1
   ->  Seq Scan on ij_row_row i_row_2
   ->  Custom Scan (ColumnarScan) on ij_row_col i_row_3
-(4 rows)
+         Columnar Projected Columns: i
+(5 rows)

 SELECT * FROM i_row;
  i
@ -212,9 +218,11 @@ EXPLAIN (costs off) SELECT * FROM i_col;
 ---------------------------------------------------------------------
 Append
   ->  Custom Scan (ColumnarScan) on i_col i_col_1
+         Columnar Projected Columns: i
   ->  Seq Scan on ij_col_row i_col_2
   ->  Custom Scan (ColumnarScan) on ij_col_col i_col_3
-(4 rows)
+         Columnar Projected Columns: i
+(6 rows)

 SELECT * FROM i_col;
  i
@ -228,7 +236,8 @@ EXPLAIN (costs off) SELECT * FROM ONLY i_col;
             QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on i_col
-(1 row)
+   Columnar Projected Columns: i
+(2 rows)

 SELECT * FROM ONLY i_col;
  i
@ -252,7 +261,8 @@ EXPLAIN (costs off) SELECT * FROM ij_row_col;
                QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on ij_row_col
-(1 row)
+   Columnar Projected Columns: i, j
+(2 rows)

 SELECT * FROM ij_row_col;
  i  |  j
@ -276,7 +286,8 @@ EXPLAIN (costs off) SELECT * FROM ij_col_col;
                QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on ij_col_col
-(1 row)
+   Columnar Projected Columns: i, j
+(2 rows)

 SELECT * FROM ij_col_col;
  i  |  j
--- a/src/test/regress/expected/columnar_partitioning_1.out
+++ b/src/test/regress/expected/columnar_partitioning_1.out
@ -84,10 +84,13 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
         ->  Partial Aggregate
               ->  Parallel Append
                     ->  Custom Scan (ColumnarScan) on p3
+                           Columnar Projected Columns: i
                     ->  Custom Scan (ColumnarScan) on p0
+                           Columnar Projected Columns: i
                     ->  Custom Scan (ColumnarScan) on p1
+                           Columnar Projected Columns: i
                     ->  Parallel Seq Scan on p2
-(9 rows)
+(12 rows)

 SELECT count(*), sum(i), min(i), max(i) FROM parent;
 count  |   sum   | min | max
@ -135,12 +138,14 @@ WHERE ts > '2020-02-20' AND n < 5;
   ->  Append
         ->  Custom Scan (ColumnarScan) on p1
               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+               Columnar Projected Columns: ts, i, n
         ->  Index Scan using p2_n_idx on p2
               Index Cond: (n < '5'::numeric)
               Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
         ->  Custom Scan (ColumnarScan) on p3
               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-(9 rows)
+               Columnar Projected Columns: ts, i, n
+(11 rows)

 BEGIN;
  SET LOCAL columnar.enable_custom_scan TO 'OFF';
@ -185,7 +190,8 @@ EXPLAIN (costs off) SELECT * FROM i_row;
   ->  Seq Scan on i_row
   ->  Seq Scan on ij_row_row
   ->  Custom Scan (ColumnarScan) on ij_row_col
-(4 rows)
+         Columnar Projected Columns: i
+(5 rows)

 SELECT * FROM i_row;
  i
@ -212,9 +218,11 @@ EXPLAIN (costs off) SELECT * FROM i_col;
 ---------------------------------------------------------------------
 Append
   ->  Custom Scan (ColumnarScan) on i_col
+         Columnar Projected Columns: i
   ->  Seq Scan on ij_col_row
   ->  Custom Scan (ColumnarScan) on ij_col_col
-(4 rows)
+         Columnar Projected Columns: i
+(6 rows)

 SELECT * FROM i_col;
  i
@ -228,7 +236,8 @@ EXPLAIN (costs off) SELECT * FROM ONLY i_col;
             QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on i_col
-(1 row)
+   Columnar Projected Columns: i
+(2 rows)

 SELECT * FROM ONLY i_col;
  i
@ -252,7 +261,8 @@ EXPLAIN (costs off) SELECT * FROM ij_row_col;
                QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on ij_row_col
-(1 row)
+   Columnar Projected Columns: i, j
+(2 rows)

 SELECT * FROM ij_row_col;
  i  |  j
@ -276,7 +286,8 @@ EXPLAIN (costs off) SELECT * FROM ij_col_col;
                QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on ij_col_col
-(1 row)
+   Columnar Projected Columns: i, j
+(2 rows)

 SELECT * FROM ij_col_col;
  i  |  j
--- a/src/test/regress/expected/columnar_paths.out
+++ b/src/test/regress/expected/columnar_paths.out
@ -241,6 +241,7 @@ WHERE ct_1.a < 3000;
   CTE cte
     ->  Custom Scan (ColumnarScan) on full_correlated
           Filter: (a > 1)
+           Columnar Projected Columns: a, d
   ->  Nested Loop Left Join
         ->  Hash Join
               Hash Cond: (cte.d = ht_1.a)
@ -253,7 +254,7 @@ WHERE ct_1.a < 3000;
                                 Index Cond: (a = ct_1.d)
         ->  Index Only Scan using heap_table_btree on heap_table ht_2
               Index Cond: (a = ct_1.c)
-(16 rows)
+(17 rows)

 -- same query but columnar custom scan is disabled
 BEGIN;
@ -297,6 +298,7 @@ WHERE w2.a = 123;
   Merge Cond: (w2.d = w1.a)
   CTE w
     ->  Custom Scan (ColumnarScan) on full_correlated
+           Columnar Projected Columns: a, b, c, d
   ->  Sort
         Sort Key: w2.d
         ->  CTE Scan on w w2
@ -305,7 +307,7 @@ WHERE w2.a = 123;
         ->  Sort
               Sort Key: w1.a
               ->  CTE Scan on w w1
-(12 rows)
+(13 rows)

 -- use index
 EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
@ -344,6 +346,7 @@ LIMIT 100;
                                       Sort Key: full_correlated.b DESC
                                       ->  Custom Scan (ColumnarScan) on full_correlated
                                             Filter: (a > 2)
+                                             Columnar Projected Columns: a, b
                     ->  Materialize
                           ->  Limit
                                 ->  GroupAggregate
@ -361,7 +364,7 @@ LIMIT 100;
                                             Filter: (sum(full_correlated_2.a) > 10)
                                             ->  Index Scan using full_correlated_btree on full_correlated full_correlated_2
                                                   Index Cond: (a > 2)
-(31 rows)
+(32 rows)

 DROP INDEX full_correlated_btree;
 CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
--- a/src/test/regress/expected/columnar_query.out
+++ b/src/test/regress/expected/columnar_query.out
@ -137,11 +137,14 @@ explain (costs off, summary off) select * from
 ---------------------------------------------------------------------
 Nested Loop Left Join
   ->  Custom Scan (ColumnarScan) on int8_tbl_columnar a
+         Columnar Projected Columns: q1, q2
   ->  Nested Loop
         ->  Custom Scan (ColumnarScan) on int8_tbl_columnar b
               Filter: (a.q2 = q1)
+               Columnar Projected Columns: q1
         ->  Custom Scan (ColumnarScan) on int8_tbl_columnar c
-(6 rows)
+               Columnar Projected Columns: q1
+(9 rows)

 explain (costs off, summary off)
  SELECT COUNT(*) FROM INT8_TBL_columnar t1 JOIN
@ -153,9 +156,11 @@ explain (costs off, summary off)
   ->  Hash Join
         Hash Cond: (t2.q1 = t1.q1)
         ->  Custom Scan (ColumnarScan) on int8_tbl_columnar t2
+               Columnar Projected Columns: q1
         ->  Hash
               ->  Custom Scan (ColumnarScan) on int8_tbl_columnar t1
-(6 rows)
+                     Columnar Projected Columns: q1
+(8 rows)

 CREATE TABLE INT8_TBL_heap (LIKE INT8_TBL_columnar) ;
 INSERT INTO INT8_TBL_heap SELECT * FROM INT8_TBL_columnar;
--- a/src/test/regress/expected/columnar_transactions.out
+++ b/src/test/regress/expected/columnar_transactions.out
@ -327,11 +327,12 @@ SELECT * FROM t ORDER BY a;
 -- SELECT with 0 params
 PREPARE p3 AS SELECT * FROM t WHERE a = 8;
 EXPLAIN (COSTS OFF) EXECUTE p3;
-           QUERY PLAN
+            QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t
   Filter: (a = 8)
-(2 rows)
+   Columnar Projected Columns: a, b
+(3 rows)

 EXECUTE p3;
 a | b
@ -347,7 +348,8 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p3;
   Filter: (a = 8)
   Rows Removed by Filter: 2
   Columnar Chunk Groups Removed by Filter: 8
-(4 rows)
+   Columnar Projected Columns: a, b
+(5 rows)

 SELECT * FROM t ORDER BY a;
 a  | b
@ -380,11 +382,12 @@ SELECT * FROM t ORDER BY a;
 -- SELECT with 1 param
 PREPARE p5(int) AS SELECT * FROM t WHERE a = $1;
 EXPLAIN (COSTS OFF) EXECUTE p5(16);
-           QUERY PLAN
+            QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t
   Filter: (a = 16)
-(2 rows)
+   Columnar Projected Columns: a, b
+(3 rows)

 EXECUTE p5(16);
 a | b
@ -398,7 +401,8 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p5(9);
   Filter: (a = 9)
   Rows Removed by Filter: 2
   Columnar Chunk Groups Removed by Filter: 8
-(4 rows)
+   Columnar Projected Columns: a, b
+(5 rows)

 SELECT * FROM t ORDER BY a;
 a  | b
@ -435,7 +439,8 @@ EXPLAIN (COSTS OFF) EXECUTE p6(30, 40);
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on t
   Filter: ((a = 31) AND (b = 41))
-(2 rows)
+   Columnar Projected Columns: a, b
+(3 rows)

 EXECUTE p6(30, 40);
 a  | b
@ -450,7 +455,8 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p6(50, 60);
   Filter: ((a = 51) AND (b = 61))
   Rows Removed by Filter: 3
   Columnar Chunk Groups Removed by Filter: 9
-(4 rows)
+   Columnar Projected Columns: a, b
+(5 rows)

 SELECT * FROM t ORDER BY a;
 a  | b
--- a/src/test/regress/sql/columnar_chunk_filtering.sql
+++ b/src/test/regress/sql/columnar_chunk_filtering.sql
@ -100,7 +100,48 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
 EXPLAIN (analyze on, costs off, timing off, summary off)
  SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;

-DROP TABLE multi_column_chunk_filtering;
+-- make next tests faster
+TRUNCATE multi_column_chunk_filtering;
+INSERT INTO multi_column_chunk_filtering SELECT generate_series(0,5);
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT b FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000;
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT FROM multi_column_chunk_filtering WHERE a > 50000;
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT FROM multi_column_chunk_filtering;
+
+BEGIN;
+  ALTER TABLE multi_column_chunk_filtering DROP COLUMN a;
+  ALTER TABLE multi_column_chunk_filtering DROP COLUMN b;
+  EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT * FROM multi_column_chunk_filtering;
+ROLLBACK;
+
+CREATE TABLE another_columnar_table(x int, y int) USING columnar;
+INSERT INTO another_columnar_table SELECT generate_series(0,5);
+
+EXPLAIN (analyze on, costs off, timing off, summary off)
+  SELECT a, y FROM multi_column_chunk_filtering, another_columnar_table WHERE x > 1;
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT y, * FROM another_columnar_table;
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT *, x FROM another_columnar_table;
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT y, another_columnar_table FROM another_columnar_table;
+
+EXPLAIN (costs off, timing off, summary off)
+  SELECT another_columnar_table, x FROM another_columnar_table;
+
+DROP TABLE multi_column_chunk_filtering, another_columnar_table;

 --
 -- https://github.com/citusdata/citus/issues/4780
--- a/src/test/regress/sql/columnar_citus_integration.sql
+++ b/src/test/regress/sql/columnar_citus_integration.sql
@ -413,5 +413,24 @@ ALTER TABLE zero_col ADD COLUMN a int;
 INSERT INTO zero_col SELECT i FROM generate_series(1, 10) i;
 select result from run_command_on_placements('zero_col', 'select count(*) from %s');

+CREATE TABLE weird_col_explain (
+  "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" INT,
+  "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!" INT)
+USING columnar;
+SELECT create_distributed_table('weird_col_explain', 'bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb''bbbbbbbbbbbbbbbbbbbbb''''bbbbbbbb');
+
+EXPLAIN (COSTS OFF, SUMMARY OFF)
+SELECT * FROM weird_col_explain;
+
+EXPLAIN (COSTS OFF, SUMMARY OFF)
+SELECT *, "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb"
+FROM weird_col_explain
+WHERE "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" * 2 >
+      "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!";
+
+-- should not project any columns
+EXPLAIN (COSTS OFF, SUMMARY OFF)
+SELECT COUNT(*) FROM weird_col_explain;
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_citus_integration CASCADE;
--- a/src/test/regress/sql/columnar_join.sql
+++ b/src/test/regress/sql/columnar_join.sql
@ -24,5 +24,12 @@ FROM users
 JOIN things ON (users.id = things.user_id)
 WHERE things.id > 299990;

+EXPLAIN (COSTS OFF)
+SELECT u1.id, u2.id, COUNT(u2.*)
+FROM users u1
+JOIN users u2 ON (u1.id::text = u2.name)
+WHERE u2.id > 299990
+GROUP BY u1.id, u2.id;
+
 SET client_min_messages TO warning;
 DROP SCHEMA am_columnar_join CASCADE;