Skip to content

Commit 5375ac5

Browse files
committed
Add SORT_STRICT option to array_unique()
Provide a way to request the use of strict comparison (===) when using `array_unique`. SORT_STRICT falls back to normal (==) comparison when values are not identical and not ==. For array values which are == but not === we recursively compare the elements; for object values which are == but not === we first compare class names and if that doesn't break the tie we compare the `spl_object_id()` of the objects.
1 parent 78ef25b commit 5375ac5

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed

ext/standard/array.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ PHP_MINIT_FUNCTION(array) /* {{{ */
103103

104104
REGISTER_LONG_CONSTANT("SORT_REGULAR", PHP_SORT_REGULAR, CONST_CS | CONST_PERSISTENT);
105105
REGISTER_LONG_CONSTANT("SORT_NUMERIC", PHP_SORT_NUMERIC, CONST_CS | CONST_PERSISTENT);
106+
REGISTER_LONG_CONSTANT("SORT_STRICT", PHP_SORT_STRICT, CONST_CS | CONST_PERSISTENT);
106107
REGISTER_LONG_CONSTANT("SORT_STRING", PHP_SORT_STRING, CONST_CS | CONST_PERSISTENT);
107108
REGISTER_LONG_CONSTANT("SORT_LOCALE_STRING", PHP_SORT_LOCALE_STRING, CONST_CS | CONST_PERSISTENT);
108109
REGISTER_LONG_CONSTANT("SORT_NATURAL", PHP_SORT_NATURAL, CONST_CS | CONST_PERSISTENT);
@@ -349,6 +350,65 @@ static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucke
349350
}
350351
/* }}} */
351352

353+
/* return int to be compatible with compare_func_t */
354+
static int hash_zval_strict_function(zval *z1, zval *z2) /* {{{ */
355+
{
356+
// The most important thing about this comparison mode is that the result
357+
// is 0 when zend_is_identical, and non-zero otherwise.
358+
if (zend_is_identical(z1, z2)) {
359+
return 0;
360+
}
361+
// Below this point, the return value should always be non-zero.
362+
// First try to use a "normal" comparison.
363+
int c = zend_compare(z1, z2);
364+
if (c != 0) {
365+
return c;
366+
}
367+
// If the types are different, compare based on type.
368+
int t1 = Z_TYPE_P(z1);
369+
int t2 = Z_TYPE_P(z2);
370+
if ( t1 != t2 ) {
371+
return (t1 < t2 ) ? -1 : 1;
372+
}
373+
switch (t1) {
374+
case IS_ARRAY:
375+
// Do a recursive comparison
376+
return zend_hash_compare(
377+
Z_ARRVAL_P(z1), Z_ARRVAL_P(z2),
378+
(compare_func_t) hash_zval_strict_function, 1 /* ordered */
379+
);
380+
381+
case IS_OBJECT:
382+
{
383+
// First compare on class name, as that's consistent across runs
384+
zend_string *name1 = Z_OBJCE_P(z1)->name;
385+
zend_string *name2 = Z_OBJCE_P(z2)->name;
386+
c = zend_binary_strcmp(name1->val, name1->len, name2->val, name2->len);
387+
if (c != 0) {
388+
return (c < 0) ? -1 : 1;
389+
}
390+
// Fall back on spl_object_id() value, which will probably vary
391+
// non-deterministically between runs (alas).
392+
zend_object *obj1 = Z_OBJ_P(z1);
393+
zend_object *obj2 = Z_OBJ_P(z2);
394+
ZEND_ASSERT(obj1->handle != obj2->handle);
395+
return (obj1->handle < obj2->handle) ? -1 : 1;
396+
}
397+
398+
default:
399+
ZEND_ASSERT(0 && "Value types should have consistent == and ===");
400+
return 0;
401+
}
402+
}
403+
/* }}} */
404+
405+
406+
static zend_always_inline int php_array_data_compare_strict_unstable_i(Bucket *f, Bucket *s) /* {{{ */
407+
{
408+
return hash_zval_strict_function(&f->val, &s->val);
409+
}
410+
/* }}} */
411+
352412
static zend_always_inline int php_array_data_compare_numeric_unstable_i(Bucket *f, Bucket *s) /* {{{ */
353413
{
354414
return numeric_compare_function(&f->val, &s->val);
@@ -405,6 +465,7 @@ DEFINE_SORT_VARIANTS(key_compare_string_case);
405465
DEFINE_SORT_VARIANTS(key_compare_string);
406466
DEFINE_SORT_VARIANTS(key_compare_string_locale);
407467
DEFINE_SORT_VARIANTS(data_compare);
468+
DEFINE_SORT_VARIANTS(data_compare_strict);
408469
DEFINE_SORT_VARIANTS(data_compare_numeric);
409470
DEFINE_SORT_VARIANTS(data_compare_string_case);
410471
DEFINE_SORT_VARIANTS(data_compare_string);
@@ -527,6 +588,14 @@ static bucket_compare_func_t php_get_data_compare_func(zend_long sort_type, int
527588
}
528589
break;
529590

591+
case PHP_SORT_STRICT:
592+
if (reverse) {
593+
return php_array_reverse_data_compare_strict;
594+
} else {
595+
return php_array_data_compare_strict;
596+
}
597+
break;
598+
530599
case PHP_SORT_REGULAR:
531600
default:
532601
if (reverse) {
@@ -591,6 +660,14 @@ static bucket_compare_func_t php_get_data_compare_func_unstable(zend_long sort_t
591660
}
592661
break;
593662

663+
case PHP_SORT_STRICT:
664+
if (reverse) {
665+
return php_array_reverse_data_compare_strict_unstable;
666+
} else {
667+
return php_array_data_compare_strict_unstable;
668+
}
669+
break;
670+
594671
case PHP_SORT_REGULAR:
595672
default:
596673
if (reverse) {

ext/standard/php_array.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ PHPAPI zend_long php_count_recursive(HashTable *ht);
3636
#define PHP_SORT_ASC 4
3737
#define PHP_SORT_LOCALE_STRING 5
3838
#define PHP_SORT_NATURAL 6
39+
#define PHP_SORT_STRICT 7
3940
#define PHP_SORT_FLAG_CASE 8
4041

4142
#define COUNT_NORMAL 0
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
--TEST--
2+
Test array_unique() function : SORT_STRICT functionality
3+
--FILE--
4+
<?php
5+
echo "*** Testing array_unique() : SORT_STRICT functionality ***\n";
6+
7+
var_dump( array_unique( [ "1234", 1234 ] ) );
8+
var_dump( array_unique( [ "1234", "1234" ], SORT_STRICT ) );
9+
var_dump( array_unique( [ "1234", 1234 ], SORT_STRICT ) );
10+
11+
var_dump( array_unique( [ 0, "0", 0.0, '', null, null ] ) );
12+
var_dump( array_unique( [ 0, "0", 0.0, '', null, null ], SORT_STRICT ) );
13+
14+
// These are more values which are == but not ===
15+
$a = (object)[];
16+
$b = (object)[];
17+
$a2 = [ $a ];
18+
$b2 = [ $b ];
19+
$a3 = (object)[ 'foo' => $a ];
20+
$b3 = (object)[ 'foo' => $b ];
21+
var_dump( $a == $b && $a2 == $b2 && $a3 == $b3 );
22+
var_dump( $a === $b || $a2 === $b2 || $a3 === $b3 );
23+
24+
var_dump( count( array_unique( [ $a, $b, $a2, $b2, $a3, $b3 ], SORT_STRICT ) ) );
25+
26+
?>
27+
--EXPECT--
28+
*** Testing array_unique() : SORT_STRICT functionality ***
29+
array(1) {
30+
[0]=>
31+
string(4) "1234"
32+
}
33+
array(1) {
34+
[0]=>
35+
string(4) "1234"
36+
}
37+
array(2) {
38+
[0]=>
39+
string(4) "1234"
40+
[1]=>
41+
int(1234)
42+
}
43+
array(2) {
44+
[0]=>
45+
int(0)
46+
[3]=>
47+
string(0) ""
48+
}
49+
array(5) {
50+
[0]=>
51+
int(0)
52+
[1]=>
53+
string(1) "0"
54+
[2]=>
55+
float(0)
56+
[3]=>
57+
string(0) ""
58+
[4]=>
59+
NULL
60+
}
61+
bool(true)
62+
bool(false)
63+
int(6)

0 commit comments

Comments
 (0)