WITH races AS (
SELECT "800M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits),
STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits),
STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits),
STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants)
SELECT
race,
participant
FROM races r
CROSS JOIN UNNEST(r.participants) as participant;
上述 SQL 结果为。
+------+---------------------------------------+
| race | participant |
+------+---------------------------------------+
| 800M | {Rudisha, [23.4, 26.3, 26.4, 26.1]} |
| 800M | {Makhloufi, [24.5, 25.4, 26.6, 26.1]} |
| 800M | {Murphy, [23.9, 26, 27, 26]} |
| 800M | {Bosse, [23.6, 26.2, 26.5, 27.1]} |
| 800M | {Rotich, [24.7, 25.6, 26.9, 26.4]} |
| 800M | {Lewandowski, [25, 25.7, 26.3, 27.2]} |
| 800M | {Kipketer, [23.2, 26.1, 27.3, 29.4]} |
| 800M | {Berian, [23.7, 26.1, 27, 29.3]} |
+------+---------------------------------------+
相当于1行,展开为了8行。
假如1行中,有两个同级的嵌套数组,例如:
WITH races AS (
SELECT "800M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits),
STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits),
STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits),
STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1,1] as splits),
STRUCT("Makhloufi" as name, [2,2] as splits),
STRUCT("Murphy" as name, [3,3] as splits)]
AS participants2
)
SELECT
race, participant, participant2
FROM races r, UNNEST(r.participants) as participant, UNNEST(r.participants2) as participant2;
其结果为 24(8 x 3)。
如果原来为多行,例如:
WITH races AS (
(SELECT "800M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits),
STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits),
STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits),
STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1,1] as splits),
STRUCT("Makhloufi" as name, [2,2] as splits),
STRUCT("Murphy" as name, [3,3] as splits)]
AS participants2 )
UNION ALL
(SELECT "600M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1,1] as splits),
STRUCT("Murphy" as name, [3,3] as splits)]
AS participants2 )
)
SELECT
race, participant, participant2
FROM races r, UNNEST(r.participants) as participant, UNNEST(r.participants2) as participant2;
其结果为32行(24+8)(3X8 + 4X2)
注意,在 BigQuery 中:
请注意,前面的查询使用英文逗号运算符 (,) 来执行隐式 CROSS JOIN。
而 CROSS JOIN 返回两个 from_item 的笛卡尔积。
使用 CROSS JOIN 展平数组会排除具有空数组或 NULL 数组的行。
注意,CROSS JOIN 只有当连接的两个 from 子句独立时,才是 m*n
由于这里初始的 race 是一致的,所以这里的结果是 24+8, 而不是 24*8
但是,如上面显示,participants 和 participants2 是同一层级的,假如800m,可以看到的只有 8+3 行,而不是24行,则可能更容易被我们接受。
例如下述SQL:
WITH races AS (
(SELECT "800M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits),
STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits),
STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits),
STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1.1,1.1] as splits),
STRUCT("Makhloufi" as name, [2.2,2.2] as splits),
STRUCT("Murphy" as name, [3.3,3.3] as splits)]
AS participants2 )
UNION ALL
(SELECT "600M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1.1,1.1] as splits),
STRUCT("Murphy" as name, [3.3,3.3] as splits)]
AS participants2 )
)
SELECT * FROM(SELECT
r.race, participant
FROM races as r cross join UNNEST(r.participants) as participant)
UNION ALL
(SELECT
r.race, participant
FROM races as r cross join UNNEST(r.participants2) as participant)
现在结果数为 17 行,(8+3+4+2)符合我们的预期了。
但是这块还存在一个问题,可以看到上面的 STRUCT 我们为了合并,强行设置其格式相同,但是实际操作中,我们几乎不可能这么做。
如果能够让 participant1 和 participant2 分两列显示,但是又不 CROSS 就好了。
WITH races AS (
(SELECT "800M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Bosse" as name, [23.6, 26.2, 26.5, 27.1] as splits),
STRUCT("Rotich" as name, [24.7, 25.6, 26.9, 26.4] as splits),
STRUCT("Lewandowski" as name, [25.0, 25.7, 26.3, 27.2] as splits),
STRUCT("Kipketer" as name, [23.2, 26.1, 27.3, 29.4] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1,1] as splits),
STRUCT("Makhloufi" as name, [2,2] as splits),
STRUCT("Murphy" as name, [3,3] as splits)]
AS participants2 )
UNION ALL
(SELECT "600M" AS race,
[STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits),
STRUCT("Makhloufi" as name, [24.5, 25.4, 26.6, 26.1] as splits),
STRUCT("Murphy" as name, [23.9, 26.0, 27.0, 26.0] as splits),
STRUCT("Berian" as name, [23.7, 26.1, 27.0, 29.3] as splits)]
AS participants,
[STRUCT("Rudisha" as name, [1,1] as splits),
STRUCT("Murphy" as name, [3,3] as splits)]
AS participants2 )
)
SELECT * FROM(SELECT
r.race, participant, struct('',[]) as participant2
FROM races as r cross join UNNEST(r.participants) as participant)
UNION ALL
(SELECT
r.race, struct('',[.0]) as participant, participant2
FROM races as r cross join UNNEST(r.participants2) as participant2)
使用上述方式,注意,区别在于,合并时,预留了位置,注意,这里的 UNION 格式必须相同,这里使用了 struct('',[]) 的构造方式。