Skip to content

Commit 0efbbbd

Browse files
committed
Optimize pack()
Instead of using lookup tables, we can use a combination of shifts and byte swapping to achieve the same thing in less cycles and with less code. Benchmark files --------------- pack1.php: ```php for ($i = 0; $i < 10_000_000; ++$i) { pack("J", 0x7FFFFFFFFFFFFFFF); } ``` pack2.php: ```php for ($i = 0; $i < 4000000; ++$i) { pack("nvc*", 0x1234, 0x5678, 65, 66); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php pack1.php Time (mean ± σ): 408.8 ms ± 3.4 ms [User: 406.1 ms, System: 1.6 ms] Range (min … max): 403.6 ms … 413.6 ms 10 runs Benchmark 2: ./sapi/cli/php_old pack1.php Time (mean ± σ): 451.7 ms ± 7.7 ms [User: 448.5 ms, System: 2.0 ms] Range (min … max): 442.8 ms … 461.2 ms 10 runs Summary ./sapi/cli/php pack1.php ran 1.11 ± 0.02 times faster than ./sapi/cli/php_old pack1.php Benchmark 1: ./sapi/cli/php pack2.php Time (mean ± σ): 239.3 ms ± 6.0 ms [User: 236.2 ms, System: 2.3 ms] Range (min … max): 233.2 ms … 256.8 ms 12 runs Benchmark 2: ./sapi/cli/php_old pack2.php Time (mean ± σ): 271.9 ms ± 3.3 ms [User: 269.7 ms, System: 1.3 ms] Range (min … max): 267.4 ms … 279.0 ms 11 runs Summary ./sapi/cli/php pack2.php ran 1.14 ± 0.03 times faster than ./sapi/cli/php_old pack2.php ``` On an i7-1185G7: ``` Benchmark 1: ./sapi/cli/php pack1.php Time (mean ± σ): 263.7 ms ± 1.8 ms [User: 262.6 ms, System: 0.9 ms] Range (min … max): 261.5 ms … 268.2 ms 11 runs Benchmark 2: ./sapi/cli/php_old pack1.php Time (mean ± σ): 303.3 ms ± 6.5 ms [User: 300.7 ms, System: 2.3 ms] Range (min … max): 297.4 ms … 318.1 ms 10 runs Summary ./sapi/cli/php pack1.php ran 1.15 ± 0.03 times faster than ./sapi/cli/php_old pack1.php Benchmark 1: ./sapi/cli/php pack2.php Time (mean ± σ): 156.7 ms ± 2.9 ms [User: 154.7 ms, System: 1.7 ms] Range (min … max): 151.6 ms … 164.7 ms 19 runs Benchmark 2: ./sapi/cli/php_old pack2.php Time (mean ± σ): 174.6 ms ± 3.3 ms [User: 171.9 ms, System: 2.3 ms] Range (min … max): 170.7 ms … 180.4 ms 17 runs Summary ./sapi/cli/php pack2.php ran 1.11 ± 0.03 times faster than ./sapi/cli/php_old pack2.php ``` Co-authored-by: [email protected]
1 parent 7f3a2bc commit 0efbbbd

File tree

4 files changed

+36
-196
lines changed

4 files changed

+36
-196
lines changed

ext/standard/basic_functions.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */
299299

300300
BASIC_MINIT_SUBMODULE(var)
301301
BASIC_MINIT_SUBMODULE(file)
302-
BASIC_MINIT_SUBMODULE(pack)
303302
BASIC_MINIT_SUBMODULE(browscap)
304303
BASIC_MINIT_SUBMODULE(standard_filters)
305304
BASIC_MINIT_SUBMODULE(user_filters)

ext/standard/pack.c

Lines changed: 36 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
#else
2929
#include <sys/param.h>
3030
#endif
31-
#include "pack.h"
3231
#ifdef HAVE_PWD_H
3332
#ifdef PHP_WIN32
3433
#include "win32/pwd.h"
@@ -50,10 +49,23 @@
5049
} \
5150
outputpos += (a)*(b);
5251

52+
typedef enum {
53+
PHP_LITTLE_ENDIAN,
54+
PHP_BIG_ENDIAN,
55+
} php_pack_endianness;
56+
5357
#ifdef WORDS_BIGENDIAN
54-
#define MACHINE_LITTLE_ENDIAN 0
58+
# define MACHINE_LITTLE_ENDIAN 0
59+
# define PHP_MACHINE_ENDIAN PHP_BIG_ENDIAN
5560
#else
56-
#define MACHINE_LITTLE_ENDIAN 1
61+
# define MACHINE_LITTLE_ENDIAN 1
62+
# define PHP_MACHINE_ENDIAN PHP_LITTLE_ENDIAN
63+
#endif
64+
65+
#ifdef ZEND_ENABLE_ZVAL_LONG64
66+
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP64(u)
67+
#else
68+
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP32(u)
5769
#endif
5870

5971
typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
@@ -62,41 +74,17 @@ typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
6274
typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
6375
typedef ZEND_SET_ALIGNED(1, int unaligned_int);
6476

65-
/* Mapping of byte from char (8bit) to long for machine endian */
66-
static int byte_map[1];
67-
68-
/* Mappings of bytes from int (machine dependent) to int for machine endian */
69-
static int int_map[sizeof(int)];
70-
71-
/* Mappings of bytes from shorts (16bit) for all endian environments */
72-
static int machine_endian_short_map[2];
73-
static int big_endian_short_map[2];
74-
static int little_endian_short_map[2];
75-
76-
/* Mappings of bytes from longs (32bit) for all endian environments */
77-
static int machine_endian_long_map[4];
78-
static int big_endian_long_map[4];
79-
static int little_endian_long_map[4];
80-
81-
#if SIZEOF_ZEND_LONG > 4
82-
/* Mappings of bytes from quads (64bit) for all endian environments */
83-
static int machine_endian_longlong_map[8];
84-
static int big_endian_longlong_map[8];
85-
static int little_endian_longlong_map[8];
86-
#endif
87-
8877
/* {{{ php_pack */
89-
static void php_pack(zval *val, size_t size, int *map, char *output)
78+
static void php_pack(const zval *val, size_t size, php_pack_endianness endianness, char *output)
9079
{
91-
size_t i;
92-
char *v;
80+
zend_ulong zl = zval_get_long(val);
9381

94-
convert_to_long(val);
95-
v = (char *) &Z_LVAL_P(val);
96-
97-
for (i = 0; i < size; i++) {
98-
*output++ = v[map[i]];
82+
if ((endianness == PHP_LITTLE_ENDIAN) != MACHINE_LITTLE_ENDIAN) {
83+
zl = PHP_LONG_BSWAP(zl);
84+
zl >>= (sizeof(zl) - size) * 8;
9985
}
86+
87+
memcpy(output, (const char *) &zl, size);
10088
}
10189
/* }}} */
10290

@@ -529,7 +517,7 @@ PHP_FUNCTION(pack)
529517
case 'c':
530518
case 'C':
531519
while (arg-- > 0) {
532-
php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
520+
php_pack(&argv[currentarg++], 1, PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
533521
outputpos++;
534522
}
535523
break;
@@ -538,16 +526,16 @@ PHP_FUNCTION(pack)
538526
case 'S':
539527
case 'n':
540528
case 'v': {
541-
int *map = machine_endian_short_map;
529+
php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
542530

543531
if (code == 'n') {
544-
map = big_endian_short_map;
532+
endianness = PHP_BIG_ENDIAN;
545533
} else if (code == 'v') {
546-
map = little_endian_short_map;
534+
endianness = PHP_LITTLE_ENDIAN;
547535
}
548536

549537
while (arg-- > 0) {
550-
php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
538+
php_pack(&argv[currentarg++], 2, endianness, &ZSTR_VAL(output)[outputpos]);
551539
outputpos += 2;
552540
}
553541
break;
@@ -556,7 +544,7 @@ PHP_FUNCTION(pack)
556544
case 'i':
557545
case 'I':
558546
while (arg-- > 0) {
559-
php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
547+
php_pack(&argv[currentarg++], sizeof(int), PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
560548
outputpos += sizeof(int);
561549
}
562550
break;
@@ -565,16 +553,16 @@ PHP_FUNCTION(pack)
565553
case 'L':
566554
case 'N':
567555
case 'V': {
568-
int *map = machine_endian_long_map;
556+
php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
569557

570558
if (code == 'N') {
571-
map = big_endian_long_map;
559+
endianness = PHP_BIG_ENDIAN;
572560
} else if (code == 'V') {
573-
map = little_endian_long_map;
561+
endianness = PHP_LITTLE_ENDIAN;
574562
}
575563

576564
while (arg-- > 0) {
577-
php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
565+
php_pack(&argv[currentarg++], 4, endianness, &ZSTR_VAL(output)[outputpos]);
578566
outputpos += 4;
579567
}
580568
break;
@@ -585,16 +573,16 @@ PHP_FUNCTION(pack)
585573
case 'Q':
586574
case 'J':
587575
case 'P': {
588-
int *map = machine_endian_longlong_map;
576+
php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
589577

590578
if (code == 'J') {
591-
map = big_endian_longlong_map;
579+
endianness = PHP_BIG_ENDIAN;
592580
} else if (code == 'P') {
593-
map = little_endian_longlong_map;
581+
endianness = PHP_LITTLE_ENDIAN;
594582
}
595583

596584
while (arg-- > 0) {
597-
php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
585+
php_pack(&argv[currentarg++], 8, endianness, &ZSTR_VAL(output)[outputpos]);
598586
outputpos += 8;
599587
}
600588
break;
@@ -1199,127 +1187,3 @@ PHP_FUNCTION(unpack)
11991187
}
12001188
}
12011189
/* }}} */
1202-
1203-
/* {{{ PHP_MINIT_FUNCTION */
1204-
PHP_MINIT_FUNCTION(pack)
1205-
{
1206-
int i;
1207-
1208-
if (MACHINE_LITTLE_ENDIAN) {
1209-
/* Where to get lo to hi bytes from */
1210-
byte_map[0] = 0;
1211-
1212-
for (i = 0; i < (int)sizeof(int); i++) {
1213-
int_map[i] = i;
1214-
}
1215-
1216-
machine_endian_short_map[0] = 0;
1217-
machine_endian_short_map[1] = 1;
1218-
big_endian_short_map[0] = 1;
1219-
big_endian_short_map[1] = 0;
1220-
little_endian_short_map[0] = 0;
1221-
little_endian_short_map[1] = 1;
1222-
1223-
machine_endian_long_map[0] = 0;
1224-
machine_endian_long_map[1] = 1;
1225-
machine_endian_long_map[2] = 2;
1226-
machine_endian_long_map[3] = 3;
1227-
big_endian_long_map[0] = 3;
1228-
big_endian_long_map[1] = 2;
1229-
big_endian_long_map[2] = 1;
1230-
big_endian_long_map[3] = 0;
1231-
little_endian_long_map[0] = 0;
1232-
little_endian_long_map[1] = 1;
1233-
little_endian_long_map[2] = 2;
1234-
little_endian_long_map[3] = 3;
1235-
1236-
#if SIZEOF_ZEND_LONG > 4
1237-
machine_endian_longlong_map[0] = 0;
1238-
machine_endian_longlong_map[1] = 1;
1239-
machine_endian_longlong_map[2] = 2;
1240-
machine_endian_longlong_map[3] = 3;
1241-
machine_endian_longlong_map[4] = 4;
1242-
machine_endian_longlong_map[5] = 5;
1243-
machine_endian_longlong_map[6] = 6;
1244-
machine_endian_longlong_map[7] = 7;
1245-
big_endian_longlong_map[0] = 7;
1246-
big_endian_longlong_map[1] = 6;
1247-
big_endian_longlong_map[2] = 5;
1248-
big_endian_longlong_map[3] = 4;
1249-
big_endian_longlong_map[4] = 3;
1250-
big_endian_longlong_map[5] = 2;
1251-
big_endian_longlong_map[6] = 1;
1252-
big_endian_longlong_map[7] = 0;
1253-
little_endian_longlong_map[0] = 0;
1254-
little_endian_longlong_map[1] = 1;
1255-
little_endian_longlong_map[2] = 2;
1256-
little_endian_longlong_map[3] = 3;
1257-
little_endian_longlong_map[4] = 4;
1258-
little_endian_longlong_map[5] = 5;
1259-
little_endian_longlong_map[6] = 6;
1260-
little_endian_longlong_map[7] = 7;
1261-
#endif
1262-
}
1263-
else {
1264-
zval val;
1265-
int size = sizeof(Z_LVAL(val));
1266-
Z_LVAL(val)=0; /*silence a warning*/
1267-
1268-
/* Where to get hi to lo bytes from */
1269-
byte_map[0] = size - 1;
1270-
1271-
for (i = 0; i < (int)sizeof(int); i++) {
1272-
int_map[i] = size - (sizeof(int) - i);
1273-
}
1274-
1275-
machine_endian_short_map[0] = size - 2;
1276-
machine_endian_short_map[1] = size - 1;
1277-
big_endian_short_map[0] = size - 2;
1278-
big_endian_short_map[1] = size - 1;
1279-
little_endian_short_map[0] = size - 1;
1280-
little_endian_short_map[1] = size - 2;
1281-
1282-
machine_endian_long_map[0] = size - 4;
1283-
machine_endian_long_map[1] = size - 3;
1284-
machine_endian_long_map[2] = size - 2;
1285-
machine_endian_long_map[3] = size - 1;
1286-
big_endian_long_map[0] = size - 4;
1287-
big_endian_long_map[1] = size - 3;
1288-
big_endian_long_map[2] = size - 2;
1289-
big_endian_long_map[3] = size - 1;
1290-
little_endian_long_map[0] = size - 1;
1291-
little_endian_long_map[1] = size - 2;
1292-
little_endian_long_map[2] = size - 3;
1293-
little_endian_long_map[3] = size - 4;
1294-
1295-
#if SIZEOF_ZEND_LONG > 4
1296-
machine_endian_longlong_map[0] = size - 8;
1297-
machine_endian_longlong_map[1] = size - 7;
1298-
machine_endian_longlong_map[2] = size - 6;
1299-
machine_endian_longlong_map[3] = size - 5;
1300-
machine_endian_longlong_map[4] = size - 4;
1301-
machine_endian_longlong_map[5] = size - 3;
1302-
machine_endian_longlong_map[6] = size - 2;
1303-
machine_endian_longlong_map[7] = size - 1;
1304-
big_endian_longlong_map[0] = size - 8;
1305-
big_endian_longlong_map[1] = size - 7;
1306-
big_endian_longlong_map[2] = size - 6;
1307-
big_endian_longlong_map[3] = size - 5;
1308-
big_endian_longlong_map[4] = size - 4;
1309-
big_endian_longlong_map[5] = size - 3;
1310-
big_endian_longlong_map[6] = size - 2;
1311-
big_endian_longlong_map[7] = size - 1;
1312-
little_endian_longlong_map[0] = size - 1;
1313-
little_endian_longlong_map[1] = size - 2;
1314-
little_endian_longlong_map[2] = size - 3;
1315-
little_endian_longlong_map[3] = size - 4;
1316-
little_endian_longlong_map[4] = size - 5;
1317-
little_endian_longlong_map[5] = size - 6;
1318-
little_endian_longlong_map[6] = size - 7;
1319-
little_endian_longlong_map[7] = size - 8;
1320-
#endif
1321-
}
1322-
1323-
return SUCCESS;
1324-
}
1325-
/* }}} */

ext/standard/pack.h

Lines changed: 0 additions & 22 deletions
This file was deleted.

ext/standard/php_standard.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
#include "php_ext_syslog.h"
3030
#include "php_filestat.h"
3131
#include "php_browscap.h"
32-
#include "pack.h"
3332
#include "url.h"
3433
#include "pageinfo.h"
3534
#include "fsock.h"

0 commit comments

Comments
 (0)