Skip to content

Commit 3b66d29

Browse files
committed
Add SORT_STRICT option to array_unique()
Provide a way to request the use of strict comparison (===) when using `array_unique`. SORT_STRICT does no type conversion: values of different types are ordered by their type. For array/object values which are == but not === we recursively compare the elements/properties. Arrays with identical elements will be identical themselves; but for objects with identical properties which are not identical we next compare class names and if that doesn't break the tie we compare the `spl_object_id()` of the objects.
1 parent 78ef25b commit 3b66d29

File tree

3 files changed

+182
-0
lines changed

3 files changed

+182
-0
lines changed

ext/standard/array.c

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ PHP_MINIT_FUNCTION(array) /* {{{ */
103103

104104
REGISTER_LONG_CONSTANT("SORT_REGULAR", PHP_SORT_REGULAR, CONST_CS | CONST_PERSISTENT);
105105
REGISTER_LONG_CONSTANT("SORT_NUMERIC", PHP_SORT_NUMERIC, CONST_CS | CONST_PERSISTENT);
106+
REGISTER_LONG_CONSTANT("SORT_STRICT", PHP_SORT_STRICT, CONST_CS | CONST_PERSISTENT);
106107
REGISTER_LONG_CONSTANT("SORT_STRING", PHP_SORT_STRING, CONST_CS | CONST_PERSISTENT);
107108
REGISTER_LONG_CONSTANT("SORT_LOCALE_STRING", PHP_SORT_LOCALE_STRING, CONST_CS | CONST_PERSISTENT);
108109
REGISTER_LONG_CONSTANT("SORT_NATURAL", PHP_SORT_NATURAL, CONST_CS | CONST_PERSISTENT);
@@ -349,6 +350,102 @@ static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucke
349350
}
350351
/* }}} */
351352

353+
/* return int to be compatible with compare_func_t */
354+
static int hash_zval_strict_function(zval *z1, zval *z2) /* {{{ */
355+
{
356+
ZVAL_DEREF(z1);
357+
ZVAL_DEREF(z2);
358+
// The most important thing about this comparison mode is that the result
359+
// is 0 when zend_is_identical, and non-zero otherwise.
360+
if (zend_is_identical(z1, z2)) {
361+
return 0;
362+
}
363+
// Below this point, the return value should always be non-zero.
364+
// If the types are different, compare based on type.
365+
int t1 = Z_TYPE_P(z1);
366+
int t2 = Z_TYPE_P(z2);
367+
if ( t1 != t2 ) {
368+
return (t1 > t2 ) ? 1 : -1;
369+
}
370+
// Both types are the same *but the values are not identical*
371+
switch (t1) {
372+
case IS_LONG:
373+
return Z_LVAL_P(z1) > Z_LVAL_P(z2) ? 1 : -1;
374+
375+
case IS_DOUBLE:
376+
return Z_DVAL_P(z1) > Z_DVAL_P(z2) ? 1 : -1;
377+
378+
case IS_STRING:
379+
return zend_binary_strcmp(
380+
Z_STRVAL_P(z1), Z_STRLEN_P(z1),
381+
Z_STRVAL_P(z2), Z_STRLEN_P(z2)
382+
);
383+
384+
case IS_ARRAY:
385+
// Do a recursive comparison. Unlike for objects, if this recursive
386+
// comparison returns 0, then the objects should have tested as
387+
// identical above.
388+
return zend_hash_compare(
389+
Z_ARRVAL_P(z1), Z_ARRVAL_P(z2),
390+
(compare_func_t) hash_zval_strict_function, 1 /* ordered */
391+
);
392+
393+
case IS_OBJECT:
394+
{
395+
// Start with a recursive comparison like for arrays, for consistency.
396+
// (This is deliberately not using the user-defined `compare` handler,
397+
// nor is it using zend_std_compare_objects() because that uses
398+
// zend_compare when examining properties, not a strict comparison.)
399+
zend_object *zobj1 = Z_OBJ_P(z1);
400+
zend_object *zobj2 = Z_OBJ_P(z2);
401+
rebuild_object_properties(zobj1);
402+
rebuild_object_properties(zobj2);
403+
int c = zend_hash_compare(
404+
zobj1->properties, zobj2->properties,
405+
(compare_func_t) hash_zval_strict_function, 0 /* unordered */
406+
);
407+
if (c != 0) {
408+
return (c > 0) ? 1 : -1;
409+
}
410+
// Properties are all identical. Check the class name, since this
411+
// is consistent across runs.
412+
zend_string *name1 = Z_OBJCE_P(z1)->name;
413+
zend_string *name2 = Z_OBJCE_P(z2)->name;
414+
c = zend_binary_strcmp(name1->val, name1->len, name2->val, name2->len);
415+
if (c != 0) {
416+
return (c > 0) ? 1 : -1;
417+
}
418+
// Fall back on spl_object_id() value, which will probably vary
419+
// non-deterministically between runs (alas).
420+
ZEND_ASSERT(zobj1->handle != zobj2->handle);
421+
return (zobj1->handle > zobj2->handle) ? 1 : -1;
422+
}
423+
424+
case IS_RESOURCE:
425+
// This will also likely vary non-deterministically between runs.
426+
return Z_RES_HANDLE_P(z1) > Z_RES_HANDLE_P(z2) ? 1 : -1;
427+
428+
case IS_REFERENCE:
429+
ZEND_ASSERT(0 && "Should have been dereferenced above");
430+
431+
case IS_UNDEF:
432+
case IS_NULL:
433+
case IS_FALSE:
434+
case IS_TRUE:
435+
default:
436+
ZEND_ASSERT(0 && "Values w/ same type should be identical");
437+
return 0;
438+
}
439+
}
440+
/* }}} */
441+
442+
443+
static zend_always_inline int php_array_data_compare_strict_unstable_i(Bucket *f, Bucket *s) /* {{{ */
444+
{
445+
return hash_zval_strict_function(&f->val, &s->val);
446+
}
447+
/* }}} */
448+
352449
static zend_always_inline int php_array_data_compare_numeric_unstable_i(Bucket *f, Bucket *s) /* {{{ */
353450
{
354451
return numeric_compare_function(&f->val, &s->val);
@@ -405,6 +502,7 @@ DEFINE_SORT_VARIANTS(key_compare_string_case);
405502
DEFINE_SORT_VARIANTS(key_compare_string);
406503
DEFINE_SORT_VARIANTS(key_compare_string_locale);
407504
DEFINE_SORT_VARIANTS(data_compare);
505+
DEFINE_SORT_VARIANTS(data_compare_strict);
408506
DEFINE_SORT_VARIANTS(data_compare_numeric);
409507
DEFINE_SORT_VARIANTS(data_compare_string_case);
410508
DEFINE_SORT_VARIANTS(data_compare_string);
@@ -527,6 +625,14 @@ static bucket_compare_func_t php_get_data_compare_func(zend_long sort_type, int
527625
}
528626
break;
529627

628+
case PHP_SORT_STRICT:
629+
if (reverse) {
630+
return php_array_reverse_data_compare_strict;
631+
} else {
632+
return php_array_data_compare_strict;
633+
}
634+
break;
635+
530636
case PHP_SORT_REGULAR:
531637
default:
532638
if (reverse) {
@@ -591,6 +697,14 @@ static bucket_compare_func_t php_get_data_compare_func_unstable(zend_long sort_t
591697
}
592698
break;
593699

700+
case PHP_SORT_STRICT:
701+
if (reverse) {
702+
return php_array_reverse_data_compare_strict_unstable;
703+
} else {
704+
return php_array_data_compare_strict_unstable;
705+
}
706+
break;
707+
594708
case PHP_SORT_REGULAR:
595709
default:
596710
if (reverse) {

ext/standard/php_array.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ PHPAPI zend_long php_count_recursive(HashTable *ht);
3636
#define PHP_SORT_ASC 4
3737
#define PHP_SORT_LOCALE_STRING 5
3838
#define PHP_SORT_NATURAL 6
39+
#define PHP_SORT_STRICT 7
3940
#define PHP_SORT_FLAG_CASE 8
4041

4142
#define COUNT_NORMAL 0
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
--TEST--
2+
Test array_unique() function : SORT_STRICT functionality
3+
--FILE--
4+
<?php
5+
echo "*** Testing array_unique() : SORT_STRICT functionality ***\n";
6+
7+
var_dump( array_unique( [ "1234", 1234 ] ) );
8+
var_dump( array_unique( [ "1234", "1234" ], SORT_STRICT ) );
9+
var_dump( array_unique( [ "1234", 1234 ], SORT_STRICT ) );
10+
11+
var_dump( array_unique( [ 0, "0", 0.0, "0.0", '', null, null ] ) );
12+
var_dump( array_unique( [ 0, "0", 0.0, "0.0", '', null, null ], SORT_STRICT ) );
13+
14+
// These are more values which are == but not ===
15+
$a = (object)[];
16+
$b = (object)[];
17+
$a2 = [ $a ];
18+
$b2 = [ $b ];
19+
$a3 = (object)[ 'foo' => $a ];
20+
$b3 = (object)[ 'foo' => $b ];
21+
var_dump( $a == $b && $a2 == $b2 && $a3 == $b3 );
22+
var_dump( $a === $b || $a2 === $b2 || $a3 === $b3 );
23+
24+
var_dump( count( array_unique( [ $a, $b, $a2, $b2, $a3, $b3 ], SORT_STRICT ) ) );
25+
26+
?>
27+
--EXPECT--
28+
*** Testing array_unique() : SORT_STRICT functionality ***
29+
array(1) {
30+
[0]=>
31+
string(4) "1234"
32+
}
33+
array(1) {
34+
[0]=>
35+
string(4) "1234"
36+
}
37+
array(2) {
38+
[0]=>
39+
string(4) "1234"
40+
[1]=>
41+
int(1234)
42+
}
43+
array(3) {
44+
[0]=>
45+
int(0)
46+
[3]=>
47+
string(3) "0.0"
48+
[4]=>
49+
string(0) ""
50+
}
51+
array(6) {
52+
[0]=>
53+
int(0)
54+
[1]=>
55+
string(1) "0"
56+
[2]=>
57+
float(0)
58+
[3]=>
59+
string(3) "0.0"
60+
[4]=>
61+
string(0) ""
62+
[5]=>
63+
NULL
64+
}
65+
bool(true)
66+
bool(false)
67+
int(6)

0 commit comments

Comments
 (0)