Compare commits

...

2 Commits

Author SHA1 Message Date
d8a084c69b /php/php-2darray-filter 2024-11-11 10:45:11 +08:00
916f3a4405 php/hash-algo 2024-11-08 16:13:34 +08:00
3 changed files with 904 additions and 1 deletions

View File

@ -50,7 +50,9 @@ export default defineConfig({
{ text: '制作7.4的FPM环境镜像', link: '/php/docker-phpfpm74' }, { text: '制作7.4的FPM环境镜像', link: '/php/docker-phpfpm74' },
{ text: '基于PHP8.x制作workman环境镜像', link: '/php/docker-php8' }, { text: '基于PHP8.x制作workman环境镜像', link: '/php/docker-php8' },
{ text: '源码编译安装PHP8.2环境', link: '/php/php82' }, { text: '源码编译安装PHP8.2环境', link: '/php/php82' },
{ text: '为PHP7.4安装pgsql和pdo_pgsql扩展', link: '/php/php74-pgsql' } { text: '为PHP7.4安装pgsql和pdo_pgsql扩展', link: '/php/php74-pgsql' },
{ text: 'PHP的hash()函数中不同algo算法的性能对比', link: '/php/hash-algo' },
{ text: 'PHP对二维数组做去重并取得重复项10万+记录)', link: '/php/php-2darray-filter' }
] ]
}, },
{ {

534
docs/src/php/hash-algo.md Normal file
View File

@ -0,0 +1,534 @@
# PHP的hash()函数中不同algo算法的性能对比
> wandoubaba / 2022-08-02
## hash()函数用法
`php.net`上的介绍:
```txt
(PHP 5 >= 5.1.2, PHP 7, PHP 8, PECL hash >= 1.1)
hash — 生成散列值(消息摘要)
```
### 说明
```php
hash(
string $algo,
string $data,
bool $binary = false,
array $options = []
): string
```
### 参数
|参数|说明|
|---|---|
|algo|要使用的散列算法的名称例如“sha256”。 可以在 hash_algos() 中查看当前支持的算法。|
|data|要进行散列运算的消息。|
|binary|设置为 true 输出原始二进制数据, 设置为 false 输出小写 16 进制字符串。|
|options|各种散列算法的一系列选项数组。 目前 MurmurHash 算法仅支持 “seed” 参数。|
### 返回值
如果 `binary` 设置为 `true` 则返回原始二进制数据表示的信息摘要, 否则返回 16 进制小写字符串格式表示的信息摘要。
### 示例
```php
<?php
echo hash('sha256', 'The quick brown fox jumped over the lazy dog.');
?>
```
结果
```txt
68b1282b91de2c054c36629cb8dd447f12f096d3e3c587978dc2248444633483
```
## 性能测试代码
```php
function hashTest()
{
$algos = hash_algos();
$plaintext = (string)time();
$result = [];
foreach ($algos as $algo) {
// 这是一个取当前微秒时间戳的方法返回float
$start = microtimeFloat();
for ($i = 0; $i < 500000; $i++) {
$hash = hash($algo, $plaintext);
}
$end = microtimeFloat();
$duration = $end - $start;
$result[] = [
'algo' => $algo,
'plaintext' => $plaintext,
'hash' => $hash,
'hash_length' => mb_strlen($hash),
'duration' => $duration,
];
}
usort($result, function ($a, $b) {
if ($a['duration'] == $b['duration']) return 0;
return ($a['duration'] < $b['duration']) ? -1 : 1;
});
return $result;
}
/**
* 取当前系统的微秒时间戳浮点值
*
* @author Aaron <chenqiang@h024.cn>
*/
function microtimeFloat()
{
list($usec, $sec) = explode(" ", microtime());
return ((float)$usec + (float)$sec);
}
$result = hashTest();
// 保存结果
file_put_contents('hashtest.log', json_encode($result, JSON_PRETTY_PRINT));
// 打印写入结果到控制台
echo json_encode($result, JSON_PRETTY_PRINT);
```
第3行hash_algo()函数返回由当前系统所有已注册的哈希算法组成的数组不同的环境下hash_algo()函数的返回值可能不相同;
第9行可以调整循环次数来调整程序执行时间
第10行就是具体hash()函数的用法。
## 执行结果
下面是在一台阿里云2核2G的主机上以PHP8.2环境执行以上程序的结果:
```json
[
{
"algo": "adler32",
"plaintext": "1731052880",
"hash": "0b0b0204",
"hash_length": 8,
"duration": 0.0356450080871582
},
{
"algo": "fnv132",
"plaintext": "1731052880",
"hash": "3c5342f6",
"hash_length": 8,
"duration": 0.0357661247253418
},
{
"algo": "murmur3a",
"plaintext": "1731052880",
"hash": "318792bf",
"hash_length": 8,
"duration": 0.03708505630493164
},
{
"algo": "fnv1a32",
"plaintext": "1731052880",
"hash": "1c2b0c7c",
"hash_length": 8,
"duration": 0.03734302520751953
},
{
"algo": "joaat",
"plaintext": "1731052880",
"hash": "339d43f2",
"hash_length": 8,
"duration": 0.03812694549560547
},
{
"algo": "fnv1a64",
"plaintext": "1731052880",
"hash": "48cc0e5b3465f9bc",
"hash_length": 16,
"duration": 0.03898215293884277
},
{
"algo": "fnv164",
"plaintext": "1731052880",
"hash": "c0e414dd041b2b96",
"hash_length": 16,
"duration": 0.03917884826660156
},
{
"algo": "crc32b",
"plaintext": "1731052880",
"hash": "071aee1d",
"hash_length": 8,
"duration": 0.042388916015625
},
{
"algo": "crc32c",
"plaintext": "1731052880",
"hash": "b4fac63a",
"hash_length": 8,
"duration": 0.043972015380859375
},
{
"algo": "crc32",
"plaintext": "1731052880",
"hash": "1224fccf",
"hash_length": 8,
"duration": 0.0441899299621582
},
{
"algo": "xxh32",
"plaintext": "1731052880",
"hash": "1688db87",
"hash_length": 8,
"duration": 0.04661202430725098
},
{
"algo": "xxh64",
"plaintext": "1731052880",
"hash": "ca8bfe429f26faed",
"hash_length": 16,
"duration": 0.050657033920288086
},
{
"algo": "murmur3f",
"plaintext": "1731052880",
"hash": "adc032c75840b96e35f1ac67d5922176",
"hash_length": 32,
"duration": 0.0528111457824707
},
{
"algo": "xxh3",
"plaintext": "1731052880",
"hash": "7c27b6c5b10dd4b1",
"hash_length": 16,
"duration": 0.053314924240112305
},
{
"algo": "murmur3c",
"plaintext": "1731052880",
"hash": "c241c877191bf45f581534857e465d07",
"hash_length": 32,
"duration": 0.05788111686706543
},
{
"algo": "xxh128",
"plaintext": "1731052880",
"hash": "143f2715d4964c3b760d42ca57a6232d",
"hash_length": 32,
"duration": 0.0603790283203125
},
{
"algo": "md4",
"plaintext": "1731052880",
"hash": "679f2543a532cedfa7a6c80297f8c6b1",
"hash_length": 32,
"duration": 0.09148597717285156
},
{
"algo": "tiger128,3",
"plaintext": "1731052880",
"hash": "2995e18c5ebf2117d7022643dd533ded",
"hash_length": 32,
"duration": 0.11197614669799805
},
{
"algo": "tiger160,3",
"plaintext": "1731052880",
"hash": "2995e18c5ebf2117d7022643dd533dedbb6b3979",
"hash_length": 40,
"duration": 0.11655998229980469
},
{
"algo": "md5",
"plaintext": "1731052880",
"hash": "3d0fbe9a3f54ba9652b589de72f2c127",
"hash_length": 32,
"duration": 0.12003684043884277
},
{
"algo": "tiger192,3",
"plaintext": "1731052880",
"hash": "2995e18c5ebf2117d7022643dd533dedbb6b39795e7df52b",
"hash_length": 48,
"duration": 0.12221193313598633
},
{
"algo": "tiger128,4",
"plaintext": "1731052880",
"hash": "df6c1ff00efa220ded74c0f50c9115c3",
"hash_length": 32,
"duration": 0.12950611114501953
},
{
"algo": "sha1",
"plaintext": "1731052880",
"hash": "2eae88f9041654c26e246993ad9e42eb480bdc39",
"hash_length": 40,
"duration": 0.13037681579589844
},
{
"algo": "tiger160,4",
"plaintext": "1731052880",
"hash": "df6c1ff00efa220ded74c0f50c9115c36242e1c3",
"hash_length": 40,
"duration": 0.13109397888183594
},
{
"algo": "tiger192,4",
"plaintext": "1731052880",
"hash": "df6c1ff00efa220ded74c0f50c9115c36242e1c3632d5ae0",
"hash_length": 48,
"duration": 0.13560700416564941
},
{
"algo": "ripemd128",
"plaintext": "1731052880",
"hash": "ab449649d9bd2ae6e6ee528e06463e51",
"hash_length": 32,
"duration": 0.1749570369720459
},
{
"algo": "ripemd256",
"plaintext": "1731052880",
"hash": "21ff1562f38318ef507b5f62e6564916d5ab53c7c96f9f8518b3b5629f8d854c",
"hash_length": 64,
"duration": 0.18474388122558594
},
{
"algo": "sha224",
"plaintext": "1731052880",
"hash": "57b48e6adaeba2d83d834ea9887d850f3114beb29c901f848c442af5",
"hash_length": 56,
"duration": 0.23443293571472168
},
{
"algo": "sha256",
"plaintext": "1731052880",
"hash": "9e2f2214266e92c1576d577107795879535954efee4bc6666bd5967ba0a77214",
"hash_length": 64,
"duration": 0.24018502235412598
},
{
"algo": "ripemd160",
"plaintext": "1731052880",
"hash": "1d525e1ed3fe5ba16cae04e371b718962275d58d",
"hash_length": 40,
"duration": 0.25841307640075684
},
{
"algo": "ripemd320",
"plaintext": "1731052880",
"hash": "874becf53daf0e229f0b205cf867d713f371f59b755a0a47707e01d80a4a54ce2776d52310acb299",
"hash_length": 80,
"duration": 0.27494096755981445
},
{
"algo": "haval128,3",
"plaintext": "1731052880",
"hash": "30feb99f6d6aa0c7969bd3a25388ced1",
"hash_length": 32,
"duration": 0.2791287899017334
},
{
"algo": "haval224,3",
"plaintext": "1731052880",
"hash": "234f1dc727a58e475716464a330048f7181c92937676f81fb2919d20",
"hash_length": 56,
"duration": 0.28873682022094727
},
{
"algo": "haval256,3",
"plaintext": "1731052880",
"hash": "567602816b7d3dcf3fb19b1886ccef81ed793acc42ed6fde88d83785fba78567",
"hash_length": 64,
"duration": 0.29075002670288086
},
{
"algo": "haval160,3",
"plaintext": "1731052880",
"hash": "44b0fd3eecc2c81f5928a5c36217a6c229ec0086",
"hash_length": 40,
"duration": 0.29707789421081543
},
{
"algo": "sha512\/224",
"plaintext": "1731052880",
"hash": "8f560689714be2fff4fb97564455703beca83cd80a06260359c89fda",
"hash_length": 56,
"duration": 0.301954984664917
},
{
"algo": "sha512\/256",
"plaintext": "1731052880",
"hash": "d052458b49b5f376ae7fde86e701d0f785415e14e2cfa9293fc070628976f389",
"hash_length": 64,
"duration": 0.31970787048339844
},
{
"algo": "sha3-256",
"plaintext": "1731052880",
"hash": "5693220da172f29683f864620d09488e046ffbc8ab32c294aa876c3c9e952d3b",
"hash_length": 64,
"duration": 0.3242812156677246
},
{
"algo": "sha384",
"plaintext": "1731052880",
"hash": "3fcbfdee00c0743f029ef9fe41ca71201fc17bd4176fb55ad7de5e2bddbdc7c37b82abbf9c861b2e92b49a31f3747761",
"hash_length": 96,
"duration": 0.3272209167480469
},
{
"algo": "sha512",
"plaintext": "1731052880",
"hash": "3de3691d7f46b191f83de5c61ca2bf117304836e2043c8c6ccdc13ffabbfd730c09ccacdbd89ea775e7fcff307e8556b0db5013de9f03f8422a3e65ec239850a",
"hash_length": 128,
"duration": 0.3310129642486572
},
{
"algo": "sha3-224",
"plaintext": "1731052880",
"hash": "ff8db898790d35274858765174723373d3b9d5c84a9c444b168e7131",
"hash_length": 56,
"duration": 0.3320798873901367
},
{
"algo": "sha3-384",
"plaintext": "1731052880",
"hash": "2c4de9a4d171f9a6d828b43378020636f3f93c7ffe25af363962bb348e98b985821017837b1df535fc08e19c46e981b2",
"hash_length": 96,
"duration": 0.33756589889526367
},
{
"algo": "sha3-512",
"plaintext": "1731052880",
"hash": "92e0ce73c57e478a0db8d356a510605208976a7e1fd7fc6d121961594046b06c0269aa79febffb05cae20eb1b0417e49ee9459dd20edfa088d4083969ba9bd98",
"hash_length": 128,
"duration": 0.34814000129699707
},
{
"algo": "haval192,4",
"plaintext": "1731052880",
"hash": "aa197dfe3d13c6a24e13d8cf9c2bd2ecca7ecf67e2c54f10",
"hash_length": 48,
"duration": 0.37153005599975586
},
{
"algo": "whirlpool",
"plaintext": "1731052880",
"hash": "5d2b1196eed6ece358d26fcff177815e76dd86423ae14fcf392983a3d3a0ffaa535554c406a7c9a9368faa1432d4b64b1ed5d88b49e7b9268df8ecd5a747a790",
"hash_length": 128,
"duration": 0.37224292755126953
},
{
"algo": "haval128,4",
"plaintext": "1731052880",
"hash": "8ab2c21e36394629599a18ba1852df50",
"hash_length": 32,
"duration": 0.37239789962768555
},
{
"algo": "haval224,4",
"plaintext": "1731052880",
"hash": "19c27883c12eec6e54be9ee51fffa6f4140f5a05dc84f08687546e70",
"hash_length": 56,
"duration": 0.3732109069824219
},
{
"algo": "haval256,4",
"plaintext": "1731052880",
"hash": "8e2977b2732673850d01c775ab67443f63ea79b78b178a6b14b0bb494ec54e3f",
"hash_length": 64,
"duration": 0.3793179988861084
},
{
"algo": "haval192,3",
"plaintext": "1731052880",
"hash": "18f0a48cfa231836eed2aca2f8f98f048fcc7219eabee1bc",
"hash_length": 48,
"duration": 0.40006399154663086
},
{
"algo": "haval160,4",
"plaintext": "1731052880",
"hash": "429c20881cd6306fdcea546b96e953c3a2749e71",
"hash_length": 40,
"duration": 0.4010169506072998
},
{
"algo": "haval128,5",
"plaintext": "1731052880",
"hash": "dc042e7f4e96372c4563b71b2aacdf65",
"hash_length": 32,
"duration": 0.43589305877685547
},
{
"algo": "haval160,5",
"plaintext": "1731052880",
"hash": "2056ab3d4d1df6272abdc4b3628b8a951c5f0b85",
"hash_length": 40,
"duration": 0.4384310245513916
},
{
"algo": "haval224,5",
"plaintext": "1731052880",
"hash": "7ec23d7be7690f83842f6fd9ccaf44e1f40ec098c6510f1f97177463",
"hash_length": 56,
"duration": 0.4448680877685547
},
{
"algo": "haval192,5",
"plaintext": "1731052880",
"hash": "a35a42df32511f4060b5a0e41acab061b49fc41e0422dee9",
"hash_length": 48,
"duration": 0.44518280029296875
},
{
"algo": "haval256,5",
"plaintext": "1731052880",
"hash": "2944d853e1595824c0aeae5dd7350657b61670261649d7abcb6d642b72232148",
"hash_length": 64,
"duration": 0.44597697257995605
},
{
"algo": "gost",
"plaintext": "1731052880",
"hash": "7901ff33178cadc7fa90463bbb514eae0539b9f0b17e36090c388d112f0ebe0c",
"hash_length": 64,
"duration": 0.8522090911865234
},
{
"algo": "gost-crypto",
"plaintext": "1731052880",
"hash": "291febf288452d3d1aef8540f8681b0f04c690f8ea3c56deabd8a2f2d5eeef1b",
"hash_length": 64,
"duration": 0.8546960353851318
},
{
"algo": "snefru",
"plaintext": "1731052880",
"hash": "f109fe0633cb7cbf3b6d143982fd37e0693ea7bbc9106e3cc95210e7ffceb438",
"hash_length": 64,
"duration": 1.1700870990753174
},
{
"algo": "snefru256",
"plaintext": "1731052880",
"hash": "f109fe0633cb7cbf3b6d143982fd37e0693ea7bbc9106e3cc95210e7ffceb438",
"hash_length": 64,
"duration": 1.1712009906768799
},
{
"algo": "md2",
"plaintext": "1731052880",
"hash": "e182e2ab097103ae22afee2185cc2e1d",
"hash_length": 32,
"duration": 1.7355611324310303
}
]
```

View File

@ -0,0 +1,367 @@
# PHP对二维数组做去重并取得重复项10万行记录array_filter太慢array_map配合array_diff速度最快
## 需求背景
长度大约10万级别的二维数组元素内数组长度10个左右其实就是一个数据表的结果集合根据指定字段对数据进行去重最后要得到去重后 **被丢弃的** 数据明细。
> 典型应用场景就是Excel表格导入数据库前对表格中数据先做一次去重预处理并把原数据、处理后数据、丢弃的数据都展示给用户待用户确认后再执行数据导入数据库。
## 用array_map和array_diff实现
### 过程1 - 根据指定字段对数组内元素进行去重
```php
function arrayUniqueByKey(array $arr, string $key): array
{
$arr = array_reverse($arr); // 数组倒置保留重复的第1个元素如果不倒置就是保留重复的最后1个元素
if (isset($arr[0][$key])) {
foreach ($arr as $v) {
$newArr[$v[$key]] = $v;
}
}
$newArr = isset($newArr) ? array_values($newArr) : [];
$newArr = array_reverse($newArr); // 前面倒置了,后面就再给倒回去
return $newArr;
}
```
> 遍历1次数组是必须的遍历过程中只是拿关键字段的值当作键名去创建一个新数组这样当关键字段重复时赋值就相当于覆盖进而实现去重。
### 过程2 - 求原数组和被去重后数组的差集
```php
function arrayDiff(array $arr1, array $arr2): array
{
$arr1 = array_map(function ($v) {
return json_encode($v);
}, $arr1);
$arr2 = array_map(function ($v) {
return json_encode($v);
}, $arr2);
$diff = array_diff($arr1, $arr2);
$diff = array_map(function ($v) {
return json_decode($v, true);
}, $diff);
return array_values($diff);
}
```
> 先把两个多维数组转成一维数组再用array_diff()求得一维数组差集,最后把一维差集再转回多维。
### 完整测试代码
```php
<?php
ini_set('memory_limit', '1G');
/**
* 得到系统当前毫秒级时间的各种表示
*
* @author Aaron <chenqiang@h024.cn>
*/
function getSystemTimes()
{
list($usec, $sec) = explode(" ", microtime());
$microtime = ((float)$usec + (float)$sec);
$time = (int)floor($microtime);
$millsecond = round($microtime - $time, 3) * 1000;
$millsecond = str_pad($millsecond, 3, '0', STR_PAD_LEFT);
$weekarray = array("日", "一", "二", "三", "四", "五", "六");
$data['year'] = date('Y', $time);
$data['month'] = date('m', $time);
$data['day'] = date('d', $time);
$data['hour'] = date('H', $time);
$data['hour_pre'] = date('A', $time);
$data['hour_12'] = date('h', $time);
$data['minute'] = date('i', $time);
$data['second'] = date('s', $time);
$data['millisecond'] = $millsecond;
$data['date'] = date('Y-m-d', $time);
$data['time'] = date('H:i:s', $time);
$data['weekday'] = "星期{$weekarray[date('w',$time)]}";
$data['weekday_index'] = date('w', $time);
$data['time_milli'] = "{$data['time']}.{$millsecond}";
$data['datetime'] = date('Y-m-d H:i:s', $time);
$data['datetime_milli'] = "{$data['datetime']}.{$millsecond}";
$data['timestamp'] = $time;
$data['timestamp_micro'] = $microtime;
return $data;
}
/**
* 二维数组针对指定键名去重
*
* @author Aaron <chenqiang@h024.cn>
*
* @param array $arr
* @param string $key
*
* @return array
*/
function arrayUniqueByKey(array $arr, string $key): array
{
$arr = array_reverse($arr); // 数组倒置保留重复的第1个元素如果不倒置就是保留重复的最后1个元素
if (isset($arr[0][$key])) {
foreach ($arr as $v) {
$newArr[$v[$key]] = $v;
}
}
$newArr = isset($newArr) ? array_values($newArr) : [];
$newArr = array_reverse($newArr); // 前面倒置了,后面就再给倒回去
return $newArr;
}
/**
* 求两个多维数组的差集(两个参数别弄反了)
*
* @author Aaron <chenqiang@h024.cn>
*
* @param array $arr1
* @param array $arr2
*
* @return array 返回$arr1-$arr2的结果
*/
function arrayDiff(array $arr1, array $arr2): array
{
$arr1 = array_map(function ($v) {
return json_encode($v);
}, $arr1);
$arr2 = array_map(function ($v) {
return json_encode($v);
}, $arr2);
$diff = array_diff($arr1, $arr2);
$diff = array_map(function ($v) {
return json_decode($v, true);
}, $diff);
return array_values($diff);
}
$arr = [];
$total = 999999 + 1;
$stamp0 = getSystemTimes();
echo "\n数据量: {$total}";
echo "\n生成数组开始: " . $stamp0['datetime_milli'];
for ($i = 0; $i < $total; $i++) {
$index = $i;
if ($i % 101 == 0) {
$index = $i - 1; // 做几个重复的
}
$suffix = str_pad($index, 8, '0', STR_PAD_LEFT);
if ($index % 113 == 0) {
$suffix = str_pad($index - 1, 8, '0', STR_PAD_LEFT);
}
$arr[] = [
'name' => '优优' . $index,
'phone' => "133{$suffix}",
'var1' => $index,
'var2' => '',
'var3' => '',
'var4' => '',
'var5' => $index,
];
}
$stamp1 = getSystemTimes();
echo "\n生成数组完成: " . $stamp1['datetime_milli'] . ';耗时:' . $stamp1['timestamp_micro']-$stamp0['timestamp_micro'] . 's';
echo "\n取去重后数组开始: " . $stamp1['datetime_milli'];
$arr2 = arrayUniqueByKey($arr, 'phone');
$stamp2 = getSystemTimes();
echo "\n取去重后数组完成: " . $stamp2['datetime_milli'] . ';耗时:' . $stamp2['timestamp_micro']-$stamp1['timestamp_micro'] . 's';
echo "\n去重求差开始: " . $stamp2['datetime_milli'];
$repeated = arrayDiff($arr, $arr2);
$stamp3 = getSystemTimes();
echo "\n去重求差完成: " . $stamp3['datetime_milli'] . ';耗时:' . $stamp3['timestamp_micro']-$stamp2['timestamp_micro'] . 's';
echo "\n重复记录数量: " . count($repeated);
echo "\n计算总耗时: " . $stamp3['timestamp_micro'] - $stamp1['timestamp_micro'] . 's';
echo "\n";
```
结果
```txt
数据量: 1000000
生成数组开始: 2024-11-08 09:06:35.930
生成数组完成: 2024-11-08 09:06:36.467耗时0.53709101676941s
取去重后数组开始: 2024-11-08 09:06:36.467
取去重后数组完成: 2024-11-08 09:06:38.566耗时2.099249124527s
去重求差开始: 2024-11-08 09:06:38.566
去重求差完成: 2024-11-08 09:06:40.383耗时1.8165800571442s
重复记录数量: 8762
计算总耗时: 3.9158291816711s
```
从结果可以看到数据量是1000000生成模拟数据大概花了0.5s去重花的时间大概是2.1s求差集花的时间大概是1.8s,去重+求差大约在4秒完成这个时效对于这个数据量来说还是可以接受的。
### 结论
原生数组操作函数的性能一般都是很高的,能用原生就尽量用原生。
用array_map加工数组元素的时间效率很高
用array_diff求数组差集的时间效率很高
用array_reverse得到倒置数组的时间效率很高
## 用array_filter实现
### 核心代码
```php
$r = array_filter($arr1, function ($v) use ($arr2) {
  return !in_array($v, $arr2);
});
```
是的,就是这么几行,很简洁,下面我们就试试
### 完整测试代码
```php
<?php
ini_set('memory_limit', '1G');
/**
* 得到系统当前毫秒级时间的各种表示
*
* @author Aaron <chenqiang@h024.cn>
*/
function getSystemTimes()
{
list($usec, $sec) = explode(" ", microtime());
$microtime = ((float)$usec + (float)$sec);
$time = (int)floor($microtime);
$millsecond = round($microtime - $time, 3) * 1000;
$millsecond = str_pad($millsecond, 3, '0', STR_PAD_LEFT);
$weekarray = array("日", "一", "二", "三", "四", "五", "六");
$data['year'] = date('Y', $time);
$data['month'] = date('m', $time);
$data['day'] = date('d', $time);
$data['hour'] = date('H', $time);
$data['hour_pre'] = date('A', $time);
$data['hour_12'] = date('h', $time);
$data['minute'] = date('i', $time);
$data['second'] = date('s', $time);
$data['millisecond'] = $millsecond;
$data['date'] = date('Y-m-d', $time);
$data['time'] = date('H:i:s', $time);
$data['weekday'] = "星期{$weekarray[date('w',$time)]}";
$data['weekday_index'] = date('w', $time);
$data['time_milli'] = "{$data['time']}.{$millsecond}";
$data['datetime'] = date('Y-m-d H:i:s', $time);
$data['datetime_milli'] = "{$data['datetime']}.{$millsecond}";
$data['timestamp'] = $time;
$data['timestamp_micro'] = $microtime;
return $data;
}
/**
* 用array_filter函数实现对二维数据根据指定键去重得到去重后的数组
*
* @author Aaron Chen <qiang.c@wukezhenzhu.com>
*
* @param array $arr
* @param string $uniqueKey
*
* @return array
*/
function arrayUniqueFilter(array $arr, string $uniqueKey)
{
$uniqueIds = [];
// 得到去重后的记录
$arr2 = array_filter($arr, function ($item) use (&$uniqueIds, $uniqueKey) {
if (in_array($item[$uniqueKey], $uniqueIds)) {
return false; // 已经在$uniqueIds数组中返回false去除这个元素
} else {
$uniqueIds[] = $item[$uniqueKey]; // 否则,将键添加到$uniqueIds数组中
return true; // 并返回true保留这个元素
}
});
return $arr2;
}
/**
* 求两个多维数组的差集(两个参数别弄反了)
*
* @author Aaron <chenqiang@h024.cn>
*
* @param array $arr1
* @param array $arr2
*
* @return array 返回$arr1-$arr2的结果
*/
function arrayDiff(array $arr1, array $arr2): array
{
$arr1 = array_map(function ($v) {
return json_encode($v);
}, $arr1);
$arr2 = array_map(function ($v) {
return json_encode($v);
}, $arr2);
$diff = array_diff($arr1, $arr2);
$diff = array_map(function ($v) {
return json_decode($v, true);
}, $diff);
return array_values($diff);
}
$arr = [];
$total = 99999 + 1;
$stamp0 = getSystemTimes();
echo "\n数据量: {$total}";
echo "\n生成数组开始: " . $stamp0['datetime_milli'];
for ($i = 0; $i < $total; $i++) {
$index = $i;
if ($i % 101 == 0) {
$index = $i - 1; // 做几个重复的
}
$suffix = str_pad($index, 8, '0', STR_PAD_LEFT);
if ($index % 113 == 0) {
$suffix = str_pad($index - 1, 8, '0', STR_PAD_LEFT);
}
$arr[] = [
'name' => '优优' . $index,
'phone' => "133{$suffix}",
'var1' => $index,
'var2' => '',
'var3' => '',
'var4' => '',
'var5' => $index,
];
}
$stamp1 = getSystemTimes();
echo "\n生成数组完成: " . $stamp1['datetime_milli'] . ';耗时:' . $stamp1['timestamp_micro'] - $stamp0['timestamp_micro'] . 's';
echo "\n取去重后数组开始: " . $stamp1['datetime_milli'];
$arr2 = arrayUniqueFilter($arr, 'phone');
$stamp2 = getSystemTimes();
echo "\n取去重后数组完成: " . $stamp2['datetime_milli'] . ';耗时:' . $stamp2['timestamp_micro'] - $stamp1['timestamp_micro'] . 's';
echo "\n去重后数组长度: " . count($arr2);
echo "\n去重求差开始: " . $stamp2['datetime_milli'];
$repeated = arrayDiff($arr, $arr2);
$stamp3 = getSystemTimes();
echo "\n去重求差完成: " . $stamp3['datetime_milli'] . ';耗时:' . $stamp3['timestamp_micro'] - $stamp2['timestamp_micro'] . 's';
echo "\n重复记录数量: " . count($repeated);
echo "\n计算总耗时: " . $stamp3['timestamp_micro'] - $stamp1['timestamp_micro'] . 's';
echo "\n";
```
上面这段程序在阿里云2核2G主机上的运行结果如下
```sh
数据量: 100000
生成数组开始: 2024-11-09 02:14:22.305
生成数组完成: 2024-11-09 02:14:22.345耗时0.039474964141846s
取去重后数组开始: 2024-11-09 02:14:22.345
取去重后数组完成: 2024-11-09 02:18:30.495耗时248.1507999897s
去重后数组长度: 98143
去重求差开始: 2024-11-09 02:18:30.495
去重求差完成: 2024-11-09 02:18:30.634耗时0.13901495933533s
重复记录数量: 876
计算总耗时: 248.28981494904s
```
从上面的结果可以看到用array_filter()方法来处理二维数据时10万行记录竟然花了4分钟算了不能再继续研究了。
## 最终结论
如果要处理很大的数组时,在内存够大时可以尽量使用`array_map`来提高执行速度。