Skip to content

Commit 51ae411

Browse files
committed
Add SORT_STRICT option to array_unique()
Provide a way to request the use of strict comparison (===) when using `array_unique`. SORT_STRICT does no type conversion: values of different types are ordered by their type. For array/object values which are == but not === we recursively compare the elements/properties. Arrays with identical elements will be identical themselves; but for objects with identical properties which are not identical we break the tie using the `spl_object_id()` of the objects.
1 parent 97cdf62 commit 51ae411

File tree

7 files changed

+756
-0
lines changed

7 files changed

+756
-0
lines changed

ext/standard/array.c

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ PHP_MINIT_FUNCTION(array) /* {{{ */
103103

104104
REGISTER_LONG_CONSTANT("SORT_REGULAR", PHP_SORT_REGULAR, CONST_CS | CONST_PERSISTENT);
105105
REGISTER_LONG_CONSTANT("SORT_NUMERIC", PHP_SORT_NUMERIC, CONST_CS | CONST_PERSISTENT);
106+
REGISTER_LONG_CONSTANT("SORT_STRICT", PHP_SORT_STRICT, CONST_CS | CONST_PERSISTENT);
106107
REGISTER_LONG_CONSTANT("SORT_STRING", PHP_SORT_STRING, CONST_CS | CONST_PERSISTENT);
107108
REGISTER_LONG_CONSTANT("SORT_LOCALE_STRING", PHP_SORT_LOCALE_STRING, CONST_CS | CONST_PERSISTENT);
108109
REGISTER_LONG_CONSTANT("SORT_NATURAL", PHP_SORT_NATURAL, CONST_CS | CONST_PERSISTENT);
@@ -349,6 +350,104 @@ static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucke
349350
}
350351
/* }}} */
351352

353+
/* return int to be compatible with compare_func_t */
354+
static int hash_zval_strict_function(zval *z1, zval *z2) /* {{{ */
355+
{
356+
ZVAL_DEREF(z1);
357+
ZVAL_DEREF(z2);
358+
// If the types are different, compare based on type.
359+
// (Values of different types can't be identical.)
360+
int t1 = Z_TYPE_P(z1);
361+
int t2 = Z_TYPE_P(z2);
362+
if ( t1 != t2 ) {
363+
return (t1 > t2 ) ? 1 : -1;
364+
}
365+
// The most important thing about this comparison mode is that the result
366+
// is 0 when zend_is_identical, and non-zero otherwise. This test is
367+
// done first to make it easier to verify this property by inspection.
368+
// (Arrays are excluded as an optimization, to avoid a redudant
369+
// deep inspection.)
370+
if (t1 != IS_ARRAY && zend_is_identical(z1, z2)) {
371+
return 0;
372+
}
373+
// Both types are the same *but the values are not identical*
374+
// Below this point, the return value for non-array values
375+
// should always be non-zero.
376+
switch (t1) {
377+
case IS_LONG:
378+
return Z_LVAL_P(z1) > Z_LVAL_P(z2) ? 1 : -1;
379+
380+
case IS_DOUBLE:
381+
return Z_DVAL_P(z1) > Z_DVAL_P(z2) ? 1 : -1;
382+
383+
case IS_STRING:
384+
return zend_binary_strcmp(
385+
Z_STRVAL_P(z1), Z_STRLEN_P(z1),
386+
Z_STRVAL_P(z2), Z_STRLEN_P(z2)
387+
);
388+
389+
case IS_ARRAY:
390+
// Do a recursive comparison. This is consistent with the test
391+
// for arrays in zend_is_identical, but unlike that method it
392+
// provides a meaningful ordering in the case of non-identity
393+
// as well.
394+
return zend_hash_compare(
395+
Z_ARRVAL_P(z1), Z_ARRVAL_P(z2),
396+
(compare_func_t) hash_zval_strict_function, 1 /* ordered */
397+
);
398+
399+
case IS_OBJECT:
400+
{
401+
// Start with a recursive comparison like for arrays, for consistency.
402+
// (This is deliberately not using the user-defined `compare` handler,
403+
// nor is it using zend_std_compare_objects() because that uses
404+
// zend_compare when examining properties, not a strict comparison.)
405+
zend_object *zobj1 = Z_OBJ_P(z1);
406+
zend_object *zobj2 = Z_OBJ_P(z2);
407+
rebuild_object_properties(zobj1);
408+
rebuild_object_properties(zobj2);
409+
// zend_std_compare_objects() uses unordered comparison, but that
410+
// leads to a unpredictable sort: with unordered the properties are
411+
// compared in the order they appear in the *first* object so
412+
// `compare(a,b)` is not guaranteed to be the same as `-compare(b,a)`.
413+
int c = zend_hash_compare(
414+
zobj1->properties, zobj2->properties,
415+
(compare_func_t) hash_zval_strict_function, 1 /* ordered */
416+
);
417+
if (c != 0) {
418+
return (c > 0) ? 1 : -1;
419+
}
420+
// Fall back on spl_object_id() value, which will probably vary
421+
// non-deterministically between runs (alas).
422+
ZEND_ASSERT(zobj1->handle != zobj2->handle);
423+
return (zobj1->handle > zobj2->handle) ? 1 : -1;
424+
}
425+
426+
case IS_RESOURCE:
427+
// This will also likely vary non-deterministically between runs.
428+
return Z_RES_HANDLE_P(z1) > Z_RES_HANDLE_P(z2) ? 1 : -1;
429+
430+
case IS_REFERENCE:
431+
ZEND_ASSERT(0 && "Should have been dereferenced above");
432+
433+
case IS_UNDEF:
434+
case IS_NULL:
435+
case IS_FALSE:
436+
case IS_TRUE:
437+
default:
438+
ZEND_ASSERT(0 && "Values w/ same type should be identical");
439+
return 0;
440+
}
441+
}
442+
/* }}} */
443+
444+
445+
static zend_always_inline int php_array_data_compare_strict_unstable_i(Bucket *f, Bucket *s) /* {{{ */
446+
{
447+
return hash_zval_strict_function(&f->val, &s->val);
448+
}
449+
/* }}} */
450+
352451
static zend_always_inline int php_array_data_compare_numeric_unstable_i(Bucket *f, Bucket *s) /* {{{ */
353452
{
354453
return numeric_compare_function(&f->val, &s->val);
@@ -405,6 +504,7 @@ DEFINE_SORT_VARIANTS(key_compare_string_case);
405504
DEFINE_SORT_VARIANTS(key_compare_string);
406505
DEFINE_SORT_VARIANTS(key_compare_string_locale);
407506
DEFINE_SORT_VARIANTS(data_compare);
507+
DEFINE_SORT_VARIANTS(data_compare_strict);
408508
DEFINE_SORT_VARIANTS(data_compare_numeric);
409509
DEFINE_SORT_VARIANTS(data_compare_string_case);
410510
DEFINE_SORT_VARIANTS(data_compare_string);
@@ -527,6 +627,14 @@ static bucket_compare_func_t php_get_data_compare_func(zend_long sort_type, int
527627
}
528628
break;
529629

630+
case PHP_SORT_STRICT:
631+
if (reverse) {
632+
return php_array_reverse_data_compare_strict;
633+
} else {
634+
return php_array_data_compare_strict;
635+
}
636+
break;
637+
530638
case PHP_SORT_REGULAR:
531639
default:
532640
if (reverse) {
@@ -591,6 +699,14 @@ static bucket_compare_func_t php_get_data_compare_func_unstable(zend_long sort_t
591699
}
592700
break;
593701

702+
case PHP_SORT_STRICT:
703+
if (reverse) {
704+
return php_array_reverse_data_compare_strict_unstable;
705+
} else {
706+
return php_array_data_compare_strict_unstable;
707+
}
708+
break;
709+
594710
case PHP_SORT_REGULAR:
595711
default:
596712
if (reverse) {

ext/standard/php_array.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ PHPAPI zend_long php_count_recursive(HashTable *ht);
3636
#define PHP_SORT_ASC 4
3737
#define PHP_SORT_LOCALE_STRING 5
3838
#define PHP_SORT_NATURAL 6
39+
#define PHP_SORT_STRICT 7
3940
#define PHP_SORT_FLAG_CASE 8
4041

4142
#define COUNT_NORMAL 0
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
--TEST--
2+
Test array_unique() function : SORT_STRICT functionality
3+
--FILE--
4+
<?php
5+
echo "*** Testing array_unique() : SORT_STRICT functionality ***\n";
6+
7+
var_dump( array_unique( [ "1234", 1234 ] ) );
8+
var_dump( array_unique( [ "1234", "1234" ], SORT_STRICT ) );
9+
var_dump( array_unique( [ "1234", 1234 ], SORT_STRICT ) );
10+
11+
var_dump( array_unique( [ 0, "0", 0.0, "0.0", '', null, null ] ) );
12+
var_dump( array_unique( [ 0, "0", 0.0, "0.0", '', null, null ], SORT_STRICT ) );
13+
14+
// These are more values which are == but not ===
15+
$a = (object)[];
16+
$b = (object)[];
17+
$a2 = [ $a ];
18+
$b2 = [ $b ];
19+
$a3 = (object)[ 'foo' => $a ];
20+
$b3 = (object)[ 'foo' => $b ];
21+
var_dump( $a == $b && $a2 == $b2 && $a3 == $b3 );
22+
var_dump( $a === $b || $a2 === $b2 || $a3 === $b3 );
23+
24+
var_dump( count( array_unique( [ $a, $b, $a2, $b2, $a3, $b3 ], SORT_STRICT ) ) );
25+
26+
?>
27+
--EXPECT--
28+
*** Testing array_unique() : SORT_STRICT functionality ***
29+
array(1) {
30+
[0]=>
31+
string(4) "1234"
32+
}
33+
array(1) {
34+
[0]=>
35+
string(4) "1234"
36+
}
37+
array(2) {
38+
[0]=>
39+
string(4) "1234"
40+
[1]=>
41+
int(1234)
42+
}
43+
array(3) {
44+
[0]=>
45+
int(0)
46+
[3]=>
47+
string(3) "0.0"
48+
[4]=>
49+
string(0) ""
50+
}
51+
array(6) {
52+
[0]=>
53+
int(0)
54+
[1]=>
55+
string(1) "0"
56+
[2]=>
57+
float(0)
58+
[3]=>
59+
string(3) "0.0"
60+
[4]=>
61+
string(0) ""
62+
[5]=>
63+
NULL
64+
}
65+
bool(true)
66+
bool(false)
67+
int(6)

ext/standard/tests/array/arsort_basic.phpt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Test arsort() function : basic functionality
99
* SORT_REGULAR - compare items normally
1010
* SORT_NUMERIC - compare items numerically
1111
* SORT_STRING - compare items as strings
12+
* SORT_STRICT - compare items using strict equality (===)
1213
*/
1314

1415
echo "*** Testing arsort() : basic functionality ***\n";
@@ -67,6 +68,16 @@ $temp_array = $unsorted_numerics;
6768
var_dump( arsort($temp_array, SORT_NUMERIC) ); // expecting : bool(true)
6869
var_dump( $temp_array);
6970

71+
echo "\n-- Testing arsort() by supplying string array, 'flag' = SORT_STRICT --\n";
72+
$temp_array = $unsorted_strings;
73+
var_dump( arsort($temp_array, SORT_STRICT) ); // expecting : bool(true)
74+
var_dump( $temp_array);
75+
76+
echo "\n-- Testing arsort() by supplying numeric array, 'flag' = SORT_STRICT --\n";
77+
$temp_array = $unsorted_numerics;
78+
var_dump( arsort($temp_array, SORT_STRICT) ); // expecting : bool(true)
79+
var_dump( $temp_array);
80+
7081
echo "Done\n";
7182
?>
7283
--EXPECT--
@@ -236,4 +247,38 @@ array(4) {
236247
[4]=>
237248
int(22)
238249
}
250+
251+
-- Testing arsort() by supplying string array, 'flag' = SORT_STRICT --
252+
bool(true)
253+
array(8) {
254+
["o20"]=>
255+
string(8) "orange20"
256+
["o2"]=>
257+
string(7) "orange2"
258+
["o"]=>
259+
string(6) "orange"
260+
["l"]=>
261+
string(5) "lemon"
262+
["b"]=>
263+
string(6) "banana"
264+
["O3"]=>
265+
string(7) "Orange3"
266+
["O1"]=>
267+
string(7) "Orange1"
268+
["O"]=>
269+
string(6) "Orange"
270+
}
271+
272+
-- Testing arsort() by supplying numeric array, 'flag' = SORT_STRICT --
273+
bool(true)
274+
array(4) {
275+
[3]=>
276+
int(555)
277+
[1]=>
278+
int(100)
279+
[2]=>
280+
int(33)
281+
[4]=>
282+
int(22)
283+
}
239284
Done

0 commit comments

Comments
 (0)