本文介绍AUC和GAUC
参考链接
编程实现
1 | def calculate_auc(ground_truth, predictions): |
SQL实现
详情见:深入理解AUC
推导思路:
- 统计每个正样本大于负样本的概率(排在该正样本后面的负样本数/总的负样本数)
- 对所有正样本的概率求均值
SQL实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15select
(ry - 0.5*n1*(n1+1))/n0/n1 as auc
from(
select
sum(if(y=0, 1, 0)) as n0,
sum(if(y=1, 1, 0)) as n1,
sum(if(y=1, r, 0)) as ry
from(
select y, row_number() over(order by score asc) as r
from(
select y, score
from some.table
)A
)B
)CSQL实现(分场景+pcoc实现)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21select
scene,
(ry - 0.5*n1*(n1+1))/n0/n1 as auc,
n1/(n1+n0) as ctr,
pctr,
pctr/(n1/(n1+n0)) as pcoc,
from(
select
scene,
sum(if(y=0, 1, 0)) as n0,
sum(if(y=1, 1, 0)) as n1,
sum(if(y=1, r, 0)) as ry,
avg(score) as pctr
from(
select scene, score, y, row_number() over(partition by scene order by score asc) as r
from(
select scene, y, score
from some.table
)A
)B
)C