HWM-在线分配算法

前言

最近在公司接触到的项目需要用到HWM算法,网上看了许多资料,简单说HWM是一种基于概率的贪婪式分配算法。算法介绍

算法背景

  • 广告投放
  • 在线分配
  • 最大量分配

Problem example

某广告主有三份广告订单,定向分别为

  1. {北京, 上海, 上海}; 400cpm
  2. {北京, 广州, 深圳}, {女} 300cpm
  3. {女} 500cpm

用户流量各种各样,覆盖不同的维度。

思路

  1. 合并定向条件,生成最小互斥散列
  2. 构造二分图
  3. 分配算法

这里我们详细介绍一下第1步,第2,3步可以在前言的链接里理解。

最小互斥散列

简单来说就是有多个集合,求最少的集合个数,使得每个集合互不相交。

上述问题每个维度的最小互斥散列如图:

最小互斥散列

supply

supply为上图的生成候选节点,计算方式是将不同的维度做笛卡尔积。

匹配原则

显然只要某条supply满足广告的定向要求(demand),就可以在二分图上构造邻接边。

那么条件就是,对于任意维度i,如果某条supply_x满足

  • 维度i在supply中但是不在demand中
  • 维度i同时在supply和demand并且supply每个维度下的value存在demand中

下面使用go简单实现的代码

package querylocktool

import (
    "strconv"
    "strings"

    "github.com/golang/glog"
)

func (dailyAds *DailyAds) getTagSet() map[string][][]string {
    tagSet := make(map[string][][]string)
    for _, ad := range *dailyAds {
        for _, target := range ad.Targets {
            targetName := target.TargetName
            tagSet[targetName] = append(tagSet[targetName], target.TargetValues)
        }
    }
    return tagSet
}

func createEachSmallest(values [][]string) ([][]string, []string) {
    var simpleSet []string
    var eachSmallest [][]string
    usedValue := make(map[string]bool)
    fatherValue := make(map[string]string)
    sonValue := make(map[string][]string)

    for id, valueSet := range values {
        for _, value := range valueSet {
            fatherValue[value] = fatherValue[value] + strconv.Itoa(id)
            if !usedValue[value] {
                simpleSet = append(simpleSet, value)
                usedValue[value] = true
            }
        }
    }
    for key, values := range fatherValue {
        sonValue[values] = append(sonValue[values], key)
    }
    for _, values := range sonValue {
        eachSmallest = append(eachSmallest, values)
    }
    eachSmallest = append(eachSmallest, []string{"not"})
    return eachSmallest, simpleSet
}

// CreateSmallestSet for DailyAds
func (dailyAds *DailyAds) CreateSmallestSet() ([]SmallestSet, map[string][]string) {
    tagSet := dailyAds.getTagSet()
    var smallestSet []SmallestSet
    simpleTagSet := make(map[string][]string)
    for key, values := range tagSet {
        eachSmallest, simpleSet := createEachSmallest(values)
        smallestSet = append(smallestSet, SmallestSet{key, eachSmallest})
        simpleTagSet[key] = simpleSet
    }
    return smallestSet, simpleTagSet
}

// MarkIDForSmallestSet  smallest -> id
func MarkIDForSmallestSet(smallestSet *[]SmallestSet) (map[[2]string]int,
    map[int]string, map[int][]string) {
    maxMapID := 0
    markToInt := make(map[[2]string]int)
    markToKey := make(map[int]string)
    markToValues := make(map[int][]string)

    for _, sets := range *smallestSet {
        key := sets.DimName
        for _, set := range sets.DimValue {
            maxMapID++
            for _, value := range set {
                markToInt[[2]string{key, value}] = maxMapID
                markToKey[maxMapID] = key
                markToValues[maxMapID] = append(markToValues[maxMapID], value)
            }
        }
    }
    return markToInt, markToKey, markToValues
}

// CreateDistinctOrderSearch distinct ordersearch
func CreateDistinctOrderSearch(orderSearch *[]string) []string {
    var result []string
    for _, str := range *orderSearch {
        flag := false
        for _, order := range result {
            if str == order {
                flag = true
                break
            }
        }
        if !flag {
            result = append(result, str)
        }
    }
    return result
}

func resursion(cpStrInfo *[]string, dep int, top int,
    smallestSet *[]SmallestSet, markToInt *map[[2]string]int, cpStr string) {
    if dep >= top {
        *cpStrInfo = append(*cpStrInfo, cpStr)
        return
    }
    key := (*smallestSet)[dep].DimName
    for _, value := range (*(smallestSet))[dep].DimValue {
        nowCpStr := cpStr + strconv.Itoa((*markToInt)[[2]string{key, value[0]}])
        if dep != top-1 {
            nowCpStr = nowCpStr + ";"
        }
        resursion(cpStrInfo, dep+1, top, smallestSet, markToInt, nowCpStr)
    }
}

// CreateCartesianProduct return string CartesianProduct or empty []string
func CreateCartesianProduct(markToInt *map[[2]string]int,
    smallestSet *[]SmallestSet) []string {
    var cpStrInfo []string
    count := len(*smallestSet)
    resursion(&cpStrInfo, 0, count, smallestSet, markToInt, "")
    return cpStrInfo
}

func match(supply, demand []string) bool {
    inDemand := make(map[string]bool)
    for _, value := range demand {
        inDemand[value] = true
    }
    for _, value := range supply {
        if !inDemand[value] {
            return false
        }
    }
    return true
}

func createEachEdge(demand *[]Target, supply *[]string,
    markToKey *map[int]string, markToValues *map[int][]string) ([]int, error) {
    var result []int

    targetMap := make(map[string][]string)
    for _, target := range *demand {
        targetMap[target.TargetName] = target.TargetValues
    }

    for i, eachSupply := range *supply {
        aryEachSupply := strings.Split(eachSupply, ";")
        flag := true
        for _, subEachSupply := range aryEachSupply {
            intNum, err := strconv.Atoi(subEachSupply)
            if err != nil {
                glog.Errorln("string to int falied")
                return nil, err
            }
            key := (*markToKey)[intNum]
            if _, ok := targetMap[key]; ok {
                if !match((*markToValues)[intNum], targetMap[key]) {
                    flag = false
                    break
                }
            }
        }
        if flag {
            result = append(result, i)
        }
    }
    return result, nil
}

// CreateEdgeFromSupplyToDemand create edge from supply -> demand
func (dailyAds *DailyAds) CreateEdgeFromSupplyToDemand(cpStrInfo *[]string,
    markToKey *map[int]string, markToValues *map[int][]string) (map[uint64][]int, error) {
    edgeMap := make(map[uint64][]int)
    for _, ad := range *dailyAds {
        matchAry, err := createEachEdge(&ad.Targets, cpStrInfo, markToKey, markToValues)
        if err != nil {
            glog.Errorln("create each edge error")
            return nil, err
        }
        edgeMap[ad.AdID] = matchAry
    }
    return edgeMap, nil
}
package querylocktool

import (
    "fmt"
    "strconv"
    "strings"
    "testing"
)

func TestHWM(t *testing.T) {
    // test input
    dailyAds := DailyAds{
        &Ad{
            20180725,
            100000,
            10,
            []Target{
                Target{
                    "nation", []string{"HE", "RU", "SA"},
                },
            },
        },
        &Ad{
            20180725,
            100001,
            20,
            []Target{
                Target{
                    "nation", []string{"HE", "RU", "SD"},
                },
                Target{
                    "adpid", []string{"mpp1_v3"},
                },
            },
        },
        &Ad{
            20180725,
            100002,
            30,
            []Target{
                Target{
                    "adpid",
                    []string{"mpp1_v3"},
                },
            },
        },
    }

    // algorithm
    smallestSet, simpleTagSet := dailyAds.CreateSmallestSet()
    markToInt, markToKey, markToValues := MarkIDForSmallestSet(&smallestSet)
    cpStrInfo := CreateCartesianProduct(&markToInt, &smallestSet)
    edges, _ := dailyAds.CreateEdgeFromSupplyToDemand(&cpStrInfo, &markToKey, &markToValues)

    // output log
    fmt.Println(simpleTagSet)
    for key, values := range edges {
        fmt.Println(key, ": ", values)
    }
    for id, supply := range cpStrInfo {
        targetInfo := strings.Split(supply, ";")
        fmt.Print(id, ":  ")
        for _, info := range targetInfo {
            intNum, err := strconv.Atoi(info)
            if err != nil {
                break
            }
            fmt.Print(markToKey[intNum], ": ", markToValues[intNum], " ")
        }
        fmt.Println()
    }
}

 上一篇
golang多进程并发 golang多进程并发
前言以前听说什么golang一把梭什么的很厉害,到现在位置也接触golang半个多月时间了,最主要的时间都是在看document,学习语法。golang什么的变量名大小写区分公有私有神马的太坑爹了。。。最近接触到了golang最为称赞的第一
2019-01-21
下一篇 
广告系统 广告系统
广告系统往往比其它互联网系统更具商业价值,广告技术赋能商业让投放更智能。 广告系统 - 寻量锁量控制寻量控制广告系统往往会抓取用户的行为信息来预估流量,这一部分叫做日志系统。 日志系统按天进行更新,定时生成报表,这一部分是寻量的主要来源
2018-07-19
  目录