BQ 标准 SQL 用户定义函数
参考链接
https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions
UDF 语法
UDF - user defined function 通过 UDF 可以在 SQL 中创建永久或临时的函数,接受列作为输入,并对其执行操作,然后以 值的形式返回这些操作的结果。
UDF 结构
CREATE [OR REPLACE] [TEMPORARY | TEMP] FUNCTION [IF NOT EXISTS]
[[`project_name`.]dataset_name.]function_name
([named_parameter[, ...]])
[RETURNS data_type]
AS (sql_expression)
named_parameter:
param_name param_type
示例:
CREATE TEMP FUNCTION
addFourAndDivide -- 函数名称
(x INT64, y INT64) -- 函数参数(x,y) & 函数类型(INT64)
AS ((x + 4) / y); -- 定义函数的 SQL 表达式
-- 构成表
WITH numbers AS
(SELECT 1 as val
UNION ALL
SELECT 3 as val
UNION ALL
SELECT 4 as val
UNION ALL
SELECT 5 as val)
-- 执行 UDF 函数
SELECT val, addFourAndDivide(val, 2) AS result
FROM numbers;
示例 2
原语句
with page_fact as (
select
fullVisitorId,
visitStartTime,
(SELECT value FROM UNNEST(sessions.customDimensions) WHERE index = 43 GROUP BY 1) as AID,
(CASE WHEN totals.newVisits = 1 THEN 'New Visitor' Else 'Returning Visitor' END ) as User_Type,
date,
hits.page.hostname as hostname,
hits.page.pagePath as page,
hits.contentGroup.contentGroup1 as page_group_1,
(SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 15 GROUP BY 1) as template,
(SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 14 GROUP BY 1) as area,
hits.isEntrance,
totals.bounces,
hits.isExit
FROM
`***.ga_sessions_20200518` AS sessions,
unnest(hits) as hits
LIMIT
10
)
select
date, User_Type,
(CASE
WHEN hostname = "www.truemetrics.cn" THEN "官网"
WHEN hostname = "peixun.truemetrics.cn" THEN '培训'
ELSE "OT_Others"
END
) as UFS_PAGE_GROUP3,
COUNT(DISTINCT CONCAT(fullVisitorId, CAST(visitStartTime AS STRING))) AS Unique_PageViews,
COUNT(DISTINCT fullVisitorId ) as NUM_USERS,
COUNT(DISTINCT AID) as NUM_AIDS,
sum(CASE WHEN isEntrance = TRUE THEN 1 ELSE 0 END) as NUM_ENTRANCES,
sum(bounces) as NUM_BOUNCES,
sum(CASE WHEN isExit = TRUE THEN 1 ELSE 0 END) as NUM_EXITS
from page_fact
group by 1, 2, 3
使用 UDF 后的语句
-- 创建函数
CREATE TEMP FUNCTION getPageGroup (x STRING) -- 这块创建的是临时函数,可以替换为永久表
RETURNS STRING -- 注意此处选用了 JS UDF 下面 AS 部分为常规的 JS 语法
LANGUAGE js AS """
if (x == 'www.truemetrics.cn'){
return '官网'
}
else if (x == 'peixun.truemetrics.cn'){
return '培训'
}
""";
with page_fact as (
select
fullVisitorId,
visitStartTime,
(SELECT value FROM UNNEST(sessions.customDimensions) WHERE index = 43 GROUP BY 1) as AID,
(CASE WHEN totals.newVisits = 1 THEN 'New Visitor' Else 'Returning Visitor' END ) as User_Type,
date,
hits.page.hostname as hostname,
hits.page.pagePath as page,
hits.contentGroup.contentGroup1 as page_group_1,
(SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 15 GROUP BY 1) as template,
(SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 14 GROUP BY 1) as area,
hits.isEntrance,
totals.bounces,
hits.isExit
FROM
`***.ga_sessions_20200518` AS sessions,
unnest(hits) as hits
LIMIT
10
)
select
date, User_Type,
getPageGroup(hostname) as UFS_PAGE_GROUP3_by_UDF, -- 这块为 UDF 函数的引用
(CASE
WHEN hostname = "www.truemetrics.cn" THEN "官网"
WHEN hostname = "peixun.truemetrics.cn" THEN '培训'
ELSE "OT_Others"
END
) as UFS_PAGE_GROUP3,
COUNT(DISTINCT CONCAT(fullVisitorId, CAST(visitStartTime AS STRING))) AS Unique_PageViews,
COUNT(DISTINCT fullVisitorId ) as NUM_USERS,
COUNT(DISTINCT AID) as NUM_AIDS,
sum(CASE WHEN isEntrance = TRUE THEN 1 ELSE 0 END) as NUM_ENTRANCES,
sum(bounces) as NUM_BOUNCES,
sum(CASE WHEN isExit = TRUE THEN 1 ELSE 0 END) as NUM_EXITS
from page_fact
group by 1, 2, 3, 4