前言
最近在公司接触到的项目需要用到HWM算法,网上看了许多资料,简单说HWM是一种基于概率的贪婪式分配算法。算法介绍
算法背景
- 广告投放
- 在线分配
- 最大量分配
Problem example
某广告主有三份广告订单,定向分别为
- {北京, 上海, 上海}; 400cpm
- {北京, 广州, 深圳}, {女} 300cpm
- {女} 500cpm
用户流量各种各样,覆盖不同的维度。
思路
- 合并定向条件,生成最小互斥散列
- 构造二分图
- 分配算法
这里我们详细介绍一下第1步,第2,3步可以在前言的链接里理解。
最小互斥散列
简单来说就是有多个集合,求最少的集合个数,使得每个集合互不相交。
上述问题每个维度的最小互斥散列如图:
supply
supply为上图的生成候选节点,计算方式是将不同的维度做笛卡尔积。
匹配原则
显然只要某条supply满足广告的定向要求(demand),就可以在二分图上构造邻接边。
那么条件就是,对于任意维度i,如果某条supply_x满足
- 维度i在supply中但是不在demand中
- 维度i同时在supply和demand并且supply每个维度下的value存在demand中
下面使用go简单实现的代码
package querylocktool
import (
"strconv"
"strings"
"github.com/golang/glog"
)
func (dailyAds *DailyAds) getTagSet() map[string][][]string {
tagSet := make(map[string][][]string)
for _, ad := range *dailyAds {
for _, target := range ad.Targets {
targetName := target.TargetName
tagSet[targetName] = append(tagSet[targetName], target.TargetValues)
}
}
return tagSet
}
func createEachSmallest(values [][]string) ([][]string, []string) {
var simpleSet []string
var eachSmallest [][]string
usedValue := make(map[string]bool)
fatherValue := make(map[string]string)
sonValue := make(map[string][]string)
for id, valueSet := range values {
for _, value := range valueSet {
fatherValue[value] = fatherValue[value] + strconv.Itoa(id)
if !usedValue[value] {
simpleSet = append(simpleSet, value)
usedValue[value] = true
}
}
}
for key, values := range fatherValue {
sonValue[values] = append(sonValue[values], key)
}
for _, values := range sonValue {
eachSmallest = append(eachSmallest, values)
}
eachSmallest = append(eachSmallest, []string{"not"})
return eachSmallest, simpleSet
}
// CreateSmallestSet for DailyAds
func (dailyAds *DailyAds) CreateSmallestSet() ([]SmallestSet, map[string][]string) {
tagSet := dailyAds.getTagSet()
var smallestSet []SmallestSet
simpleTagSet := make(map[string][]string)
for key, values := range tagSet {
eachSmallest, simpleSet := createEachSmallest(values)
smallestSet = append(smallestSet, SmallestSet{key, eachSmallest})
simpleTagSet[key] = simpleSet
}
return smallestSet, simpleTagSet
}
// MarkIDForSmallestSet smallest -> id
func MarkIDForSmallestSet(smallestSet *[]SmallestSet) (map[[2]string]int,
map[int]string, map[int][]string) {
maxMapID := 0
markToInt := make(map[[2]string]int)
markToKey := make(map[int]string)
markToValues := make(map[int][]string)
for _, sets := range *smallestSet {
key := sets.DimName
for _, set := range sets.DimValue {
maxMapID++
for _, value := range set {
markToInt[[2]string{key, value}] = maxMapID
markToKey[maxMapID] = key
markToValues[maxMapID] = append(markToValues[maxMapID], value)
}
}
}
return markToInt, markToKey, markToValues
}
// CreateDistinctOrderSearch distinct ordersearch
func CreateDistinctOrderSearch(orderSearch *[]string) []string {
var result []string
for _, str := range *orderSearch {
flag := false
for _, order := range result {
if str == order {
flag = true
break
}
}
if !flag {
result = append(result, str)
}
}
return result
}
func resursion(cpStrInfo *[]string, dep int, top int,
smallestSet *[]SmallestSet, markToInt *map[[2]string]int, cpStr string) {
if dep >= top {
*cpStrInfo = append(*cpStrInfo, cpStr)
return
}
key := (*smallestSet)[dep].DimName
for _, value := range (*(smallestSet))[dep].DimValue {
nowCpStr := cpStr + strconv.Itoa((*markToInt)[[2]string{key, value[0]}])
if dep != top-1 {
nowCpStr = nowCpStr + ";"
}
resursion(cpStrInfo, dep+1, top, smallestSet, markToInt, nowCpStr)
}
}
// CreateCartesianProduct return string CartesianProduct or empty []string
func CreateCartesianProduct(markToInt *map[[2]string]int,
smallestSet *[]SmallestSet) []string {
var cpStrInfo []string
count := len(*smallestSet)
resursion(&cpStrInfo, 0, count, smallestSet, markToInt, "")
return cpStrInfo
}
func match(supply, demand []string) bool {
inDemand := make(map[string]bool)
for _, value := range demand {
inDemand[value] = true
}
for _, value := range supply {
if !inDemand[value] {
return false
}
}
return true
}
func createEachEdge(demand *[]Target, supply *[]string,
markToKey *map[int]string, markToValues *map[int][]string) ([]int, error) {
var result []int
targetMap := make(map[string][]string)
for _, target := range *demand {
targetMap[target.TargetName] = target.TargetValues
}
for i, eachSupply := range *supply {
aryEachSupply := strings.Split(eachSupply, ";")
flag := true
for _, subEachSupply := range aryEachSupply {
intNum, err := strconv.Atoi(subEachSupply)
if err != nil {
glog.Errorln("string to int falied")
return nil, err
}
key := (*markToKey)[intNum]
if _, ok := targetMap[key]; ok {
if !match((*markToValues)[intNum], targetMap[key]) {
flag = false
break
}
}
}
if flag {
result = append(result, i)
}
}
return result, nil
}
// CreateEdgeFromSupplyToDemand create edge from supply -> demand
func (dailyAds *DailyAds) CreateEdgeFromSupplyToDemand(cpStrInfo *[]string,
markToKey *map[int]string, markToValues *map[int][]string) (map[uint64][]int, error) {
edgeMap := make(map[uint64][]int)
for _, ad := range *dailyAds {
matchAry, err := createEachEdge(&ad.Targets, cpStrInfo, markToKey, markToValues)
if err != nil {
glog.Errorln("create each edge error")
return nil, err
}
edgeMap[ad.AdID] = matchAry
}
return edgeMap, nil
}
package querylocktool
import (
"fmt"
"strconv"
"strings"
"testing"
)
func TestHWM(t *testing.T) {
// test input
dailyAds := DailyAds{
&Ad{
20180725,
100000,
10,
[]Target{
Target{
"nation", []string{"HE", "RU", "SA"},
},
},
},
&Ad{
20180725,
100001,
20,
[]Target{
Target{
"nation", []string{"HE", "RU", "SD"},
},
Target{
"adpid", []string{"mpp1_v3"},
},
},
},
&Ad{
20180725,
100002,
30,
[]Target{
Target{
"adpid",
[]string{"mpp1_v3"},
},
},
},
}
// algorithm
smallestSet, simpleTagSet := dailyAds.CreateSmallestSet()
markToInt, markToKey, markToValues := MarkIDForSmallestSet(&smallestSet)
cpStrInfo := CreateCartesianProduct(&markToInt, &smallestSet)
edges, _ := dailyAds.CreateEdgeFromSupplyToDemand(&cpStrInfo, &markToKey, &markToValues)
// output log
fmt.Println(simpleTagSet)
for key, values := range edges {
fmt.Println(key, ": ", values)
}
for id, supply := range cpStrInfo {
targetInfo := strings.Split(supply, ";")
fmt.Print(id, ": ")
for _, info := range targetInfo {
intNum, err := strconv.Atoi(info)
if err != nil {
break
}
fmt.Print(markToKey[intNum], ": ", markToValues[intNum], " ")
}
fmt.Println()
}
}