19
19
#include <ctype.h>
20
20
#include <sys/types.h>
21
21
22
+ #ifdef __SSE2__
23
+ #include <emmintrin.h>
24
+ #endif
25
+
22
26
#include "php.h"
23
27
24
28
#include "url.h"
@@ -444,10 +448,34 @@ static int php_htoi(char *s)
444
448
445
449
static unsigned char hexchars [] = "0123456789ABCDEF" ;
446
450
447
- /* {{{ php_url_encode
448
- */
449
- PHPAPI zend_string * php_url_encode (char const * s , size_t len )
450
- {
451
+ #ifdef __SSE2__
452
+ #ifdef WORDS_BIGENDIAN
453
+ #define URL_ENCODE_EXTRACT_BYTE (val , lo , hi ) hi = val & 0xff; lo = (val >> 8) & 0xff;
454
+ #else
455
+ #define URL_ENCODE_EXTRACT_BYTE (val , lo , hi ) lo = val & 0xff; hi = (val >> 8) & 0xff;
456
+ #endif
457
+ #define URL_ENCODE_PHASE (count , bits , to , in ) do { \
458
+ unsigned char hi, lo; \
459
+ uint32_t pair = _mm_extract_epi16(in, count); \
460
+ URL_ENCODE_EXTRACT_BYTE(pair, lo, hi); \
461
+ if (bits & (0x1 << (count * 2))) { \
462
+ *to++ = lo; \
463
+ } else { \
464
+ *to++ = '%'; \
465
+ *to++ = hexchars[lo >> 4]; \
466
+ *to++ = hexchars[lo & 0xf]; \
467
+ } \
468
+ if (bits & (0x1 << (count * 2 + 1))) { \
469
+ *to++ = hi; \
470
+ } else { \
471
+ *to++ = '%'; \
472
+ *to++ = hexchars[hi >> 4]; \
473
+ *to++ = hexchars[hi & 0xf]; \
474
+ } \
475
+ } while (0)
476
+ #endif
477
+
478
+ static zend_always_inline zend_string * php_url_encode_impl (const char * s , size_t len , zend_bool raw ) /* {{{ */ {
451
479
register unsigned char c ;
452
480
unsigned char * to ;
453
481
unsigned char const * from , * end ;
@@ -458,15 +486,72 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
458
486
start = zend_string_safe_alloc (3 , len , 0 , 0 );
459
487
to = (unsigned char * )ZSTR_VAL (start );
460
488
489
+ #ifdef __SSE2__
490
+ while (from + 16 < end ) {
491
+ __m128i mask ;
492
+ uint32_t bits ;
493
+ const __m128i _A = _mm_set1_epi8 ('A' - 1 );
494
+ const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
495
+ const __m128i _a = _mm_set1_epi8 ('a' - 1 );
496
+ const __m128i z_ = _mm_set1_epi8 ('z' + 1 );
497
+ const __m128i _zero = _mm_set1_epi8 ('0' - 1 );
498
+ const __m128i nine_ = _mm_set1_epi8 ('9' + 1 );
499
+ const __m128i dot = _mm_set1_epi8 ('.' );
500
+ const __m128i minus = _mm_set1_epi8 ('-' );
501
+ const __m128i under = _mm_set1_epi8 ('_' );
502
+
503
+ __m128i in = _mm_loadu_si128 ((__m128i * )from );
504
+
505
+ __m128i gt = _mm_cmpgt_epi8 (in , _A );
506
+ __m128i lt = _mm_cmplt_epi8 (in , Z_ );
507
+ mask = _mm_and_si128 (lt , gt ); /* upper */
508
+ gt = _mm_cmpgt_epi8 (in , _a );
509
+ lt = _mm_cmplt_epi8 (in , z_ );
510
+ mask = _mm_or_si128 (mask , _mm_and_si128 (lt , gt )); /* lower */
511
+ gt = _mm_cmpgt_epi8 (in , _zero );
512
+ lt = _mm_cmplt_epi8 (in , nine_ );
513
+ mask = _mm_or_si128 (mask , _mm_and_si128 (lt , gt )); /* number */
514
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , dot ));
515
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , minus ));
516
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , under ));
517
+
518
+ if (!raw ) {
519
+ const __m128i blank = _mm_set1_epi8 (' ' );
520
+ __m128i eq = _mm_cmpeq_epi8 (in , blank );
521
+ if (_mm_movemask_epi8 (eq )) {
522
+ in = _mm_add_epi8 (in , _mm_and_si128 (eq , _mm_set1_epi8 ('+' - ' ' )));
523
+ mask = _mm_or_si128 (mask , eq );
524
+ }
525
+ }
526
+ if (raw ) {
527
+ const __m128i wavy = _mm_set1_epi8 ('~' );
528
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , wavy ));
529
+ }
530
+ if (((bits = _mm_movemask_epi8 (mask )) & 0xffff ) == 0xffff ) {
531
+ _mm_storeu_si128 ((__m128i * )to , in );
532
+ to += 16 ;
533
+ } else {
534
+ URL_ENCODE_PHASE (0 , bits , to , in );
535
+ URL_ENCODE_PHASE (1 , bits , to , in );
536
+ URL_ENCODE_PHASE (2 , bits , to , in );
537
+ URL_ENCODE_PHASE (3 , bits , to , in );
538
+ URL_ENCODE_PHASE (4 , bits , to , in );
539
+ URL_ENCODE_PHASE (5 , bits , to , in );
540
+ URL_ENCODE_PHASE (6 , bits , to , in );
541
+ URL_ENCODE_PHASE (7 , bits , to , in );
542
+ }
543
+ from += 16 ;
544
+ }
545
+ #endif
461
546
while (from < end ) {
462
547
c = * from ++ ;
463
548
464
- if (c == ' ' ) {
549
+ if (! raw && c == ' ' ) {
465
550
* to ++ = '+' ;
466
551
} else if ((c < '0' && c != '-' && c != '.' ) ||
467
- (c < 'A' && c > '9' ) ||
468
- (c > 'Z' && c < 'a' && c != '_' ) ||
469
- ( c > 'z' )) {
552
+ (c < 'A' && c > '9' ) ||
553
+ (c > 'Z' && c < 'a' && c != '_' ) ||
554
+ ( c > 'z' && (! raw || c != '~' ) )) {
470
555
to [0 ] = '%' ;
471
556
to [1 ] = hexchars [c >> 4 ];
472
557
to [2 ] = hexchars [c & 15 ];
@@ -483,6 +568,14 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
483
568
}
484
569
/* }}} */
485
570
571
+ /* {{{ php_url_encode
572
+ */
573
+ PHPAPI zend_string * php_url_encode (char const * s , size_t len )
574
+ {
575
+ return php_url_encode_impl (s , len , 0 );
576
+ }
577
+ /* }}} */
578
+
486
579
/* {{{ proto string urlencode(string str)
487
580
URL-encodes string */
488
581
PHP_FUNCTION (urlencode )
@@ -545,29 +638,7 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
545
638
*/
546
639
PHPAPI zend_string * php_raw_url_encode (char const * s , size_t len )
547
640
{
548
- register size_t x , y ;
549
- zend_string * str ;
550
- char * ret ;
551
-
552
- str = zend_string_safe_alloc (3 , len , 0 , 0 );
553
- ret = ZSTR_VAL (str );
554
- for (x = 0 , y = 0 ; len -- ; x ++ , y ++ ) {
555
- char c = s [x ];
556
-
557
- ret [y ] = c ;
558
- if ((c < '0' && c != '-' && c != '.' ) ||
559
- (c < 'A' && c > '9' ) ||
560
- (c > 'Z' && c < 'a' && c != '_' ) ||
561
- (c > 'z' && c != '~' )) {
562
- ret [y ++ ] = '%' ;
563
- ret [y ++ ] = hexchars [(unsigned char ) c >> 4 ];
564
- ret [y ] = hexchars [(unsigned char ) c & 15 ];
565
- }
566
- }
567
- ret [y ] = '\0' ;
568
- str = zend_string_truncate (str , y , 0 );
569
-
570
- return str ;
641
+ return php_url_encode_impl (s , len , 1 );
571
642
}
572
643
/* }}} */
573
644
0 commit comments