BQ 标准 SQL 用户定义函数


BQ 标准 SQL 用户定义函数

参考链接

https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions

UDF 语法

UDF - user defined function 通过 UDF 可以在 SQL 中创建永久或临时的函数,接受列作为输入,并对其执行操作,然后以 值的形式返回这些操作的结果。

UDF 结构

CREATE [OR REPLACE] [TEMPORARY | TEMP] FUNCTION [IF NOT EXISTS]
    [[`project_name`.]dataset_name.]function_name
    ([named_parameter[, ...]])
  [RETURNS data_type]
  AS (sql_expression)

named_parameter:
  param_name param_type

示例:

CREATE TEMP FUNCTION 
    addFourAndDivide -- 函数名称
    (x INT64, y INT64) -- 函数参数(x,y) & 函数类型(INT64)
    AS ((x + 4) / y); -- 定义函数的 SQL 表达式

-- 构成表
WITH numbers AS
  (SELECT 1 as val
  UNION ALL
  SELECT 3 as val
  UNION ALL
  SELECT 4 as val
  UNION ALL
  SELECT 5 as val)

-- 执行 UDF 函数
SELECT val, addFourAndDivide(val, 2) AS result
FROM numbers;

示例 2

原语句

with page_fact as (
select 
    fullVisitorId, 
    visitStartTime,
    (SELECT value FROM UNNEST(sessions.customDimensions) WHERE index = 43 GROUP BY 1) as AID, 
    (CASE WHEN totals.newVisits = 1 THEN 'New Visitor' Else 'Returning Visitor' END ) as User_Type,
    date,
    hits.page.hostname as hostname, 
    hits.page.pagePath as page, 
    hits.contentGroup.contentGroup1 as page_group_1,
    (SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 15 GROUP BY 1) as template, 
    (SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 14 GROUP BY 1) as area, 
    hits.isEntrance,
    totals.bounces, 
    hits.isExit
FROM
  `***.ga_sessions_20200518` AS sessions,
  unnest(hits) as hits
LIMIT
  10
)
select 
    date, User_Type, 
  (CASE
    WHEN hostname = "www.truemetrics.cn" THEN "官网"
    WHEN hostname = "peixun.truemetrics.cn" THEN '培训'
    ELSE "OT_Others"
      END
) as UFS_PAGE_GROUP3,
      COUNT(DISTINCT CONCAT(fullVisitorId, CAST(visitStartTime AS STRING))) AS Unique_PageViews,
      COUNT(DISTINCT fullVisitorId ) as NUM_USERS, 
      COUNT(DISTINCT AID) as NUM_AIDS, 
      sum(CASE WHEN isEntrance = TRUE THEN 1 ELSE 0 END) as NUM_ENTRANCES, 
      sum(bounces) as NUM_BOUNCES,
      sum(CASE WHEN isExit = TRUE THEN 1 ELSE 0 END) as NUM_EXITS 
from page_fact
group by 1, 2, 3

使用 UDF 后的语句

-- 创建函数
CREATE TEMP FUNCTION getPageGroup (x STRING) -- 这块创建的是临时函数,可以替换为永久表
    RETURNS STRING -- 注意此处选用了 JS UDF 下面 AS 部分为常规的 JS 语法
    LANGUAGE js AS """ 
    if (x == 'www.truemetrics.cn'){
        return '官网'
    }
    else if (x == 'peixun.truemetrics.cn'){
        return '培训'
    }
""";

with page_fact as (
select 
    fullVisitorId, 
    visitStartTime,
    (SELECT value FROM UNNEST(sessions.customDimensions) WHERE index = 43 GROUP BY 1) as AID, 
    (CASE WHEN totals.newVisits = 1 THEN 'New Visitor' Else 'Returning Visitor' END ) as User_Type,
    date,
    hits.page.hostname as hostname, 
    hits.page.pagePath as page, 
    hits.contentGroup.contentGroup1 as page_group_1,
    (SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 15 GROUP BY 1) as template, 
    (SELECT value FROM UNNEST(hits.customDimensions) WHERE index = 14 GROUP BY 1) as area, 
    hits.isEntrance,
    totals.bounces, 
    hits.isExit
FROM
  `***.ga_sessions_20200518` AS sessions,
  unnest(hits) as hits
LIMIT
  10
)
select 
    date, User_Type, 
    getPageGroup(hostname) as UFS_PAGE_GROUP3_by_UDF, -- 这块为 UDF 函数的引用
  (CASE
    WHEN hostname = "www.truemetrics.cn" THEN "官网"
    WHEN hostname = "peixun.truemetrics.cn" THEN '培训'
    ELSE "OT_Others"
      END
) as UFS_PAGE_GROUP3,
      COUNT(DISTINCT CONCAT(fullVisitorId, CAST(visitStartTime AS STRING))) AS Unique_PageViews,
      COUNT(DISTINCT fullVisitorId ) as NUM_USERS, 
      COUNT(DISTINCT AID) as NUM_AIDS, 
      sum(CASE WHEN isEntrance = TRUE THEN 1 ELSE 0 END) as NUM_ENTRANCES, 
      sum(bounces) as NUM_BOUNCES,
      sum(CASE WHEN isExit = TRUE THEN 1 ELSE 0 END) as NUM_EXITS 
from page_fact
group by 1, 2, 3, 4