pages

2012-11-29

String aggregation & inline views

In the SCOTT schema, we have this:
SQL> select dept.dname
  2        ,emp.ename
  3  from dept
  4  left join emp
  5  on dept.deptno=emp.deptno
  6  order by 1 asc
  7          ,2 asc
  8  ;

DNAME          ENAME
-------------- ----------
ACCOUNTING     CLARK
ACCOUNTING     KING
ACCOUNTING     MILLER
OPERATIONS
RESEARCH       ADAMS
RESEARCH       FORD
RESEARCH       JONES
RESEARCH       SCOTT
RESEARCH       SMITH
SALES          ALLEN
SALES          BLAKE
SALES          JAMES
SALES          MARTIN
SALES          TURNER
SALES          WARD

15 ligne(s) sÚlectionnÚe(s).

SQL>

And we would very much prefer to have something like this:
DNAME         ENAMES
---------------------------------------------------
ACCOUNTING    CLARK,KING,MILLER
OPERATIONS
RESEARCH      ADAMS,FORD,JONES,SCOTT,SMITH
SALES         ALLEN,BLAKE,JAMES,MARTIN,TURNER,WARD

Let's have a look at some common string aggregation techniques, how they compare to each other and how a left join affect them.

Queries:
-- 1
select dept.dname
      ,emp.ename
from dept
left join emp
on dept.deptno=emp.deptno
order by 1 asc
        ,2 asc
;

-- 2
select dpte.dname
      ,listagg(dpte.ename,',') within group (order by dpte.ename) enames
from (select dept.dname
            ,emp.ename
      from dept
      left join emp
      on dept.deptno=emp.deptno) dpte
group by dpte.dname
order by 1 asc
;        

-- 3
select dept.dname
      ,emps.enames
from dept
left join (select deptno
                 ,listagg(ename,',') within group (order by ename) enames
           from emp
           group by deptno) emps
on dept.deptno = emps.deptno
order by 1 asc
;

-- 4
select dlst.dname
      ,ltrim(max(sys_connect_by_path(dlst.ename,',')) keep (dense_rank last order by dlst.curr),',') enames
from (select dpte.dname
            ,dpte.ename
            ,row_number() over (partition by dpte.deptno order by dpte.ename) curr
            ,row_number() over (partition by dpte.deptno order by dpte.ename) -1 min1
      from (select dept.deptno
                  ,dept.dname
                  ,emp.ename
            from dept
            left join emp
            on dept.deptno=emp.deptno) dpte
      ) dlst
group by dlst.dname
connect by dlst.min1 = prior dlst.curr and dlst.dname = prior dlst.dname
start with dlst.curr = 1
order by 1 asc
;

-- 5
select dept.dname
      ,emps.enames
from dept
left join (select dpte.deptno
                 ,ltrim(max(sys_connect_by_path(dpte.ename,',')) keep (dense_rank last order by dpte.curr),',') enames
           from (select deptno
                       ,ename
                       ,row_number() over (partition by deptno order by ename) curr
                       ,row_number() over (partition by deptno order by ename) -1 min1
                 from emp) dpte
           group by dpte.deptno
           connect by dpte.min1 = prior dpte.curr and dpte.deptno = prior dpte.deptno
           start with dpte.curr = 1) emps
on dept.deptno = emps.deptno
order by 1 asc
;
  


Plans:
SQL> set linesize 130
SQL> set pagesize 1000
SQL> explain plan for select dept.dname
  2        ,emp.ename
  3  from dept
  4  left join emp
  5  on dept.deptno=emp.deptno
  6  order by 1 asc
  7          ,2 asc
  8  ;

ExplicitÚ.

SQL>
SQL> select * from table(dbms_xplan.display);

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------------------------------

Plan hash value: 780249577

-----------------------------------------------------------------------------------------
| Id  | Operation                     | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT              |         |    14 |   308 |     7  (29)| 00:00:01 |
|   1 |  SORT ORDER BY                |         |    14 |   308 |     7  (29)| 00:00:01 |
|   2 |   MERGE JOIN OUTER            |         |    14 |   308 |     6  (17)| 00:00:01 |
|   3 |    TABLE ACCESS BY INDEX ROWID| DEPT    |     4 |    52 |     2   (0)| 00:00:01 |
|   4 |     INDEX FULL SCAN           | PK_DEPT |     4 |       |     1   (0)| 00:00:01 |
|*  5 |    SORT JOIN                  |         |    14 |   126 |     4  (25)| 00:00:01 |
|   6 |     TABLE ACCESS FULL         | EMP     |    14 |   126 |     3   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   5 - access("DEPT"."DEPTNO"="EMP"."DEPTNO"(+))
       filter("DEPT"."DEPTNO"="EMP"."DEPTNO"(+))

19 ligne(s) sÚlectionnÚe(s).

SQL>
SQL> explain plan for select dpte.dname
  2        ,listagg(dpte.ename,',') within group (order by dpte.ename) enames
  3  from (select dept.dname
  4              ,emp.ename
  5        from dept
  6        left join emp
  7        on dept.deptno=emp.deptno) dpte
  8  group by dpte.dname
  9  order by 1 asc
 10  ;

ExplicitÚ.

SQL>
SQL> select * from table(dbms_xplan.display);

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------------------------------

Plan hash value: 4196575329

------------------------------------------------------------------------------------------
| Id  | Operation                      | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT               |         |     4 |    88 |     8  (38)| 00:00:01 |
|   1 |  SORT ORDER BY                 |         |     4 |    88 |     8  (38)| 00:00:01 |
|   2 |   SORT GROUP BY                |         |     4 |    88 |     8  (38)| 00:00:01 |
|   3 |    MERGE JOIN OUTER            |         |    14 |   308 |     6  (17)| 00:00:01 |
|   4 |     TABLE ACCESS BY INDEX ROWID| DEPT    |     4 |    52 |     2   (0)| 00:00:01 |
|   5 |      INDEX FULL SCAN           | PK_DEPT |     4 |       |     1   (0)| 00:00:01 |
|*  6 |     SORT JOIN                  |         |    14 |   126 |     4  (25)| 00:00:01 |
|   7 |      TABLE ACCESS FULL         | EMP     |    14 |   126 |     3   (0)| 00:00:01 |
------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   6 - access("DEPT"."DEPTNO"="EMP"."DEPTNO"(+))
       filter("DEPT"."DEPTNO"="EMP"."DEPTNO"(+))

20 ligne(s) sÚlectionnÚe(s).

SQL>
SQL> explain plan for select dept.dname
  2        ,emps.enames
  3  from dept
  4  left join (select deptno
  5                   ,listagg(ename,',') within group (order by ename) enames
  6             from emp
  7             group by deptno) emps
  8  on dept.deptno = emps.deptno
  9  order by 1 asc
 10  ;

ExplicitÚ.

SQL>
SQL> select * from table(dbms_xplan.display);

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------------------------------

Plan hash value: 740866522

-----------------------------------------------------------------------------------------
| Id  | Operation                     | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT              |         |     4 |  8072 |     8  (38)| 00:00:01 |
|   1 |  SORT ORDER BY                |         |     4 |  8072 |     8  (38)| 00:00:01 |
|   2 |   MERGE JOIN OUTER            |         |     4 |  8072 |     7  (29)| 00:00:01 |
|   3 |    TABLE ACCESS BY INDEX ROWID| DEPT    |     4 |    52 |     2   (0)| 00:00:01 |
|   4 |     INDEX FULL SCAN           | PK_DEPT |     4 |       |     1   (0)| 00:00:01 |
|*  5 |    SORT JOIN                  |         |     3 |  6015 |     5  (40)| 00:00:01 |
|   6 |     VIEW                      |         |     3 |  6015 |     4  (25)| 00:00:01 |
|   7 |      SORT GROUP BY            |         |     3 |    27 |     4  (25)| 00:00:01 |
|   8 |       TABLE ACCESS FULL       | EMP     |    14 |   126 |     3   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   5 - access("DEPT"."DEPTNO"="EMPS"."DEPTNO"(+))
       filter("DEPT"."DEPTNO"="EMPS"."DEPTNO"(+))

21 ligne(s) sÚlectionnÚe(s).

SQL>
SQL> explain plan for select dlst.dname
  2        ,ltrim(max(sys_connect_by_path(dlst.ename,',')) keep (dense_rank last order by dlst.curr),',') enames
  3  from (select dpte.dname
  4              ,dpte.ename
  5              ,row_number() over (partition by dpte.deptno order by dpte.ename) curr
  6              ,row_number() over (partition by dpte.deptno order by dpte.ename) -1 min1
  7        from (select dept.deptno
  8                    ,dept.dname
  9                    ,emp.ename
 10              from dept
 11              left join emp
 12              on dept.deptno=emp.deptno) dpte
 13        ) dlst
 14  group by dlst.dname
 15  connect by dlst.min1 = prior dlst.curr and dlst.dname = prior dlst.dname
 16  start with dlst.curr = 1
 17  order by 1 asc
 18  ;

ExplicitÚ.

SQL>
SQL> select * from table(dbms_xplan.display);

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------------------------------

Plan hash value: 2397983189

-----------------------------------------------------------------------------------------------------
| Id  | Operation                                 | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                          |         |     4 |   168 |     9  (45)| 00:00:01 |
|   1 |  SORT ORDER BY                            |         |     4 |   168 |     9  (45)| 00:00:01 |
|   2 |   SORT GROUP BY                           |         |     4 |   168 |     9  (45)| 00:00:01 |
|*  3 |    CONNECT BY NO FILTERING WITH START-WITH|         |       |       |            |          |
|   4 |     VIEW                                  |         |    14 |   588 |     7  (29)| 00:00:01 |
|   5 |      WINDOW SORT                          |         |    14 |   308 |     7  (29)| 00:00:01 |
|   6 |       MERGE JOIN OUTER                    |         |    14 |   308 |     6  (17)| 00:00:01 |
|   7 |        TABLE ACCESS BY INDEX ROWID        | DEPT    |     4 |    52 |     2   (0)| 00:00:01 |
|   8 |         INDEX FULL SCAN                   | PK_DEPT |     4 |       |     1   (0)| 00:00:01 |
|*  9 |        SORT JOIN                          |         |    14 |   126 |     4  (25)| 00:00:01 |
|  10 |         TABLE ACCESS FULL                 | EMP     |    14 |   126 |     3   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   3 - access("DLST"."MIN1"=PRIOR "DLST"."CURR" AND "DLST"."DNAME"=PRIOR "DLST"."DNAME")
       filter("DLST"."CURR"=1)
   9 - access("DEPT"."DEPTNO"="EMP"."DEPTNO"(+))
       filter("DEPT"."DEPTNO"="EMP"."DEPTNO"(+))

25 ligne(s) sÚlectionnÚe(s).

SQL>
SQL> explain plan for select dept.dname
  2        ,emps.enames
  3  from dept
  4  left join (select dpte.deptno
  5                   ,ltrim(max(sys_connect_by_path(dpte.ename,',')) keep (dense_rank last order by dpte.curr),',') enames
  6             from (select deptno
  7                         ,ename
  8                         ,row_number() over (partition by deptno order by ename) curr
  9                         ,row_number() over (partition by deptno order by ename) -1 min1
 10                   from emp) dpte
 11             group by dpte.deptno
 12             connect by dpte.min1 = prior dpte.curr and dpte.deptno = prior dpte.deptno
 13             start with dpte.curr = 1) emps
 14  on dept.deptno = emps.deptno
 15  order by 1 asc
 16  ;

ExplicitÚ.

SQL>
SQL> select * from table(dbms_xplan.display);

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------------------------------

Plan hash value: 4173785831

--------------------------------------------------------------------------------------------------------
| Id  | Operation                                    | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                             |         |     4 |  8072 |     9  (45)| 00:00:01 |
|   1 |  SORT ORDER BY                               |         |     4 |  8072 |     9  (45)| 00:00:01 |
|   2 |   MERGE JOIN OUTER                           |         |     4 |  8072 |     8  (38)| 00:00:01 |
|   3 |    TABLE ACCESS BY INDEX ROWID               | DEPT    |     4 |    52 |     2   (0)| 00:00:01 |
|   4 |     INDEX FULL SCAN                          | PK_DEPT |     4 |       |     1   (0)| 00:00:01 |
|*  5 |    SORT JOIN                                 |         |     3 |  6015 |     6  (50)| 00:00:01 |
|   6 |     VIEW                                     |         |     3 |  6015 |     5  (40)| 00:00:01 |
|   7 |      SORT GROUP BY                           |         |     3 |   138 |     5  (40)| 00:00:01 |
|*  8 |       CONNECT BY NO FILTERING WITH START-WITH|         |       |       |            |          |
|   9 |        VIEW                                  |         |    14 |   644 |     4  (25)| 00:00:01 |
|  10 |         WINDOW SORT                          |         |    14 |   126 |     4  (25)| 00:00:01 |
|  11 |          TABLE ACCESS FULL                   | EMP     |    14 |   126 |     3   (0)| 00:00:01 |
--------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   5 - access("DEPT"."DEPTNO"="EMPS"."DEPTNO"(+))
       filter("DEPT"."DEPTNO"="EMPS"."DEPTNO"(+))
   8 - access("DPTE"."MIN1"=PRIOR "DPTE"."CURR" AND "DPTE"."DEPTNO"=PRIOR "DPTE"."DEPTNO")
       filter("DPTE"."CURR"=1)

26 ligne(s) sÚlectionnÚe(s).

SQL>

Links: http://www.oracle-base.com/articles/misc/string-aggregation-techniques.php

No comments:

Post a Comment