laravel示例代码

<?php
/*
 * Description: Redis HyperLogLog:统计基数算法
 * Author: Shuxiaoyuan
 * Email: sxy@shuxiaoyuan.com
 * DateTime: 2021/12/29 17:48
 *
 * 参考链接:
 * 1. https://zhuanlan.zhihu.com/p/58519480
 * 1. http://content.research.neustar.biz/blog/hll.html
 */

namespace App\Http\Controllers\Redis;

use App\Common\Tools;
use App\Http\Controllers\Controller;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Redis;

/**
 * Description: Redis HyperLogLog 是用来做基数统计的算法
 * 他不会真的存每一个元素的值,使用的是一个概率算法
 *
 * 对于一个输入的字符串,首先得到64位的hash值,用前14位来定位桶的位置(共有 2^14 ,即 16384 个桶)
 * 后面50位即为伯努利过程,每个桶有6bit,记录第一次出现1的位置count,如果count>oldcount,就用count替换oldcount。
 */
class RedisHyperLogLogController extends Controller
{
    public $key = 'RedisHyperLogLogController:';

    /**
     * Description: 添加指定元素到 HyperLogLog 中。
     * Author: Shuxiaoyuan
     * Email: sxy@shuxiaoyuan.com
     * DateTime: 2021/12/29 17:57
     *
     * @param Request $request
     *
     * @return array
     */
    public function pfadd(Request $request): array
    {
        set_time_limit(0);
        ini_set('memory_limit', '-1M');

        $key    = $request->input('redis_key');
        $number = (int)$request->input('number', 100);

        if (!$key) {
            $key = $this->key . __FUNCTION__;
        } else {
            $key = $this->key . $key;
        }

        $add_count = 0;
        // 注意:每个 HyperLogLog 键,大概可以计算接近 2^64 个不同元素的基数
        // 我本地好像一次只能塞大概一百万的数据进去,为了塞更多的数据,这里做了切分,多次插入
        if ($number > 1000001) {
            // 计算需要分几次写入
            $count = (int)ceil(bcdiv($number, 1000000, 10));

            for ($i = 1; $i <= $count; $i++) {
                $last_number = (1000000 * $i) < $number ? (1000000 * $i) : $number;
                for ($j = (1000000 * ($i - 1)); $j < $last_number; $j++) {
                    $user[] = 'user' . $j;
                    $add_count++;
                }
                Redis::pfadd($key, $user);
                unset($user);
            }
        } else {
            for ($i = 0; $i < $number; $i++) {
                $user[] = 'user' . $i;
                $add_count++;
            }

            // 第二个参数必须是一个数组
            Redis::pfadd($key, $user);
            unset($user);
        }

        $count = Redis::pfcount($key);

        $data = [
            'key'         => $key,
            'pfadd_count' => $count,
            'add_count'   => $add_count,
        ];

        return Tools::outSuccessInfo($data);
    }

    /**
     * Description: 返回给定 HyperLogLog 的基数估算值。
     * Author: Shuxiaoyuan
     * Email: sxy@shuxiaoyuan.com
     * DateTime: 2021/12/29 17:57
     *
     * @param Request $request
     *
     * @return array
     */
    public function pfcount(Request $request): array
    {
        $key = $this->key . $request->input('redis_key');

        if (!Redis::exists($key)) {
            return Tools::outErrorInfo(__LINE__, 'redis 键' . '<' . $key . '>' . '不存在');
        }

        $count = Redis::pfcount($key);

        $data = [
            'key'     => $key,
            'pfcount' => $count,
        ];

        return Tools::outSuccessInfo($data);
    }

    /**
     * Description: 将多个 HyperLogLog 合并为一个 HyperLogLog
     * Author: Shuxiaoyuan
     * Email: sxy@shuxiaoyuan.com
     * DateTime: 2021/12/29 17:58
     *
     * @param Request $request
     *
     * @return array
     */
    public function pfmerge(Request $request): array
    {
        $keys    = $request->input('redis_key');
        $new_key = $this->key . __FUNCTION__;

        Redis::del($new_key);

        foreach ($keys as &$value) {
            $value = $this->key . $value;
        }

        // 第二个参数必须是一个数组
        $pfmerge = Redis::pfmerge($new_key, $keys);

        $count = Redis::pfcount($new_key);

        $data = [
            'pfmerge' => $pfmerge,
            'count'   => $count,
            'new_key' => $new_key,
            'keys'    => $keys,
        ];

        return Tools::outSuccessInfo($data);
    }
}