atom feed5 messages in net.php.lists.internals[PHP-DEV] [PATCH] string_offset acces...
FromSent OnAttachments
Dmitry StogovJul 15, 2010 2:09 am 
Felipe PenaJul 15, 2010 3:18 pm 
Stas MalyshevJul 16, 2010 12:47 am 
Ferenc KovacsJul 16, 2010 12:58 am 
Dmitry StogovJul 16, 2010 1:06 am 
Subject:[PHP-DEV] [PATCH] string_offset access optimization
From:Dmitry Stogov (dmi@zend.com)
Date:Jul 15, 2010 2:09:51 am
List:net.php.lists.internals

Hi,

Recently I noticed that reading of string offset is performed in two steps. At first special string_offset variant of temporary_variable is created in zend_fetch_dimension_address_read() and then the real string value is created in _get_zval_ptr_var_string_offset().

I think we can create the real string in the first place. This makes 50% speed-up on string offset reading operation and allows to eliminate some checks and conditional brunches in VM.

The patch is attached (don't forget to regenerate zend_vm_execute.h to test it). However it changes behavior in one bogus case. The following code now will emit "b" (currently it generates a fatal error - cannot use string offset as an array).

$str = "abs"; var_dumo($str[1][0]);

I think it's not a problem at all. "b" makes sense because "abs"[1] -> "b" and "b"[0] -> "b".

I'm going to commit the patch in case of no objections.

Thanks. Dmitry.

Index: Zend/zend_execute.c =================================================================== --- Zend/zend_execute.c (revision 301262) +++ Zend/zend_execute.c (working copy) @@ -177,44 +177,14 @@ return should_free->var = &T(var).tmp_var; }

-static zval *_get_zval_ptr_var_string_offset(zend_uint var, const temp_variable
*Ts, zend_free_op *should_free TSRMLS_DC) +static zend_always_inline zval *_get_zval_ptr_var(zend_uint var, const
temp_variable *Ts, zend_free_op *should_free TSRMLS_DC) { - temp_variable *T = &T(var); - zval *str = T->str_offset.str; - zval *ptr; + zval *ptr = T(var).var.ptr;

- /* string offset */ - ALLOC_ZVAL(ptr); - T->str_offset.ptr = ptr; - should_free->var = ptr; - - if (T->str_offset.str->type != IS_STRING - || ((int)T->str_offset.offset < 0) - || (T->str_offset.str->value.str.len <= (int)T->str_offset.offset)) { - ptr->value.str.val = STR_EMPTY_ALLOC(); - ptr->value.str.len = 0; - } else { - ptr->value.str.val = estrndup(str->value.str.val + T->str_offset.offset, 1); - ptr->value.str.len = 1; - } - PZVAL_UNLOCK_FREE(str); - Z_SET_REFCOUNT_P(ptr, 1); - Z_SET_ISREF_P(ptr); - ptr->type = IS_STRING; + PZVAL_UNLOCK(ptr, should_free); return ptr; }

-static zend_always_inline zval *_get_zval_ptr_var(zend_uint var, const
temp_variable *Ts, zend_free_op *should_free TSRMLS_DC) -{ - zval *ptr = T(var).var.ptr; - if (EXPECTED(ptr != NULL)) { - PZVAL_UNLOCK(ptr, should_free); - return ptr; - } else { - return _get_zval_ptr_var_string_offset(var, Ts, should_free TSRMLS_CC); - } -} - static zend_never_inline zval **_get_zval_cv_lookup(zval ***ptr, zend_uint var,
int type TSRMLS_DC) { zend_compiled_variable *cv = &CV_DEF_OF(var); @@ -1286,14 +1256,23 @@ dim = &tmp; } if (result) { + zval *ptr; + + ALLOC_ZVAL(ptr); + INIT_PZVAL(ptr); + Z_TYPE_P(ptr) = IS_STRING; + if (Z_LVAL_P(dim) < 0 || Z_STRLEN_P(container) <= Z_LVAL_P(dim)) { zend_error(E_NOTICE, "Uninitialized string offset: %ld", Z_LVAL_P(dim)); + Z_STRVAL_P(ptr) = STR_EMPTY_ALLOC(); + Z_STRLEN_P(ptr) = 0; + } else { + Z_STRVAL_P(ptr) = (char*)emalloc(2); + Z_STRVAL_P(ptr)[0] = Z_STRVAL_P(container)[Z_LVAL_P(dim)]; + Z_STRVAL_P(ptr)[1] = 0; + Z_STRLEN_P(ptr) = 1; } - result->str_offset.str = container; - PZVAL_LOCK(container); - result->str_offset.offset = Z_LVAL_P(dim); - result->var.ptr_ptr = NULL; - result->var.ptr = NULL; + AI_SET_PTR(result, ptr); } return; } Index: Zend/micro_bench.php =================================================================== --- Zend/micro_bench.php (revision 301262) +++ Zend/micro_bench.php (working copy) @@ -188,6 +188,20 @@ } }

+function read_hash($n) { + $hash = array('test' => 0); + for ($i = 0; $i < $n; ++$i) { + $x = $hash['test']; + } +} + +function read_str_offset($n) { + $str = "test"; + for ($i = 0; $i < $n; ++$i) { + $x = $str[1]; + } +} + /*****/

function empty_loop($n) { @@ -300,4 +314,8 @@ $t = end_test($t, '$x = $_GET', $overhead); read_global_var(N); $t = end_test($t, '$x = $GLOBALS[\'v\']', $overhead); +read_hash(N); +$t = end_test($t, '$x = $hash[\'v\']', $overhead); +read_str_offset(N); +$t = end_test($t, '$x = $str[0]', $overhead); total($t0, "Total"); Index: Zend/zend_vm_def.h =================================================================== --- Zend/zend_vm_def.h (revision 301262) +++ Zend/zend_vm_def.h (working copy) @@ -1182,9 +1182,6 @@ PZVAL_LOCK(*EX_T(opline->op1.var).var.ptr_ptr); } container = GET_OP1_ZVAL_PTR_PTR(BP_VAR_R); - if (OP1_TYPE == IS_VAR && UNEXPECTED(container == NULL)) { - zend_error_noreturn(E_ERROR, "Cannot use string offset as an array"); - } zend_fetch_dimension_address_read(!RETURN_VALUE_USED(opline)?NULL:&EX_T(opline->result.var),
container, GET_OP2_ZVAL_PTR(BP_VAR_R), OP2_TYPE, BP_VAR_R TSRMLS_CC); FREE_OP2(); FREE_OP1_VAR_PTR(); @@ -1256,10 +1253,6 @@

SAVE_OPLINE(); container = GET_OP1_ZVAL_PTR_PTR(BP_VAR_IS); - - if (OP1_TYPE == IS_VAR && UNEXPECTED(container == NULL)) { - zend_error_noreturn(E_ERROR, "Cannot use string offset as an array"); - } zend_fetch_dimension_address_read(&EX_T(opline->result.var), container,
GET_OP2_ZVAL_PTR(BP_VAR_R), OP2_TYPE, BP_VAR_IS TSRMLS_CC); FREE_OP2(); FREE_OP1_VAR_PTR(); @@ -1289,9 +1282,6 @@ zend_error_noreturn(E_ERROR, "Cannot use [] for reading"); } container = GET_OP1_ZVAL_PTR_PTR(BP_VAR_R); - if (OP1_TYPE == IS_VAR && UNEXPECTED(container == NULL)) { - zend_error_noreturn(E_ERROR, "Cannot use string offset as an array"); - } zend_fetch_dimension_address_read(&EX_T(opline->result.var), container,
GET_OP2_ZVAL_PTR(BP_VAR_R), OP2_TYPE, BP_VAR_R TSRMLS_CC); } FREE_OP2(); @@ -3257,33 +3247,17 @@ ZEND_VM_HANDLER(48, ZEND_CASE, CONST|TMP|VAR|CV, CONST|TMP|VAR|CV) { USE_OPLINE - int switch_expr_is_overloaded=0; zend_free_op free_op1, free_op2;

SAVE_OPLINE(); if (OP1_TYPE==IS_VAR) { - if (EX_T(opline->op1.var).var.ptr_ptr) { - PZVAL_LOCK(EX_T(opline->op1.var).var.ptr); - } else { - switch_expr_is_overloaded = 1; - Z_ADDREF_P(EX_T(opline->op1.var).str_offset.str); - } + PZVAL_LOCK(EX_T(opline->op1.var).var.ptr); } is_equal_function(&EX_T(opline->result.var).tmp_var, GET_OP1_ZVAL_PTR(BP_VAR_R), GET_OP2_ZVAL_PTR(BP_VAR_R) TSRMLS_CC);

FREE_OP2(); - if (switch_expr_is_overloaded) { - /* We only free op1 if this is a string offset, - * Since if it is a TMP_VAR, it'll be reused by - * other CASE opcodes (whereas string offsets - * are allocated at each get_zval_ptr()) - */ - FREE_OP1(); - EX_T(opline->op1.var).var.ptr_ptr = NULL; - EX_T(opline->op1.var).var.ptr = NULL; - } CHECK_EXCEPTION(); ZEND_VM_NEXT_OPCODE(); } @@ -3358,7 +3332,6 @@ obj = GET_OP1_OBJ_ZVAL_PTR(BP_VAR_R);

if (OP1_TYPE == IS_CONST || - (OP1_TYPE == IS_VAR && UNEXPECTED(obj == NULL)) || UNEXPECTED(Z_TYPE_P(obj) != IS_OBJECT)) { zend_error_noreturn(E_ERROR, "__clone method called on non-object"); } @@ -4383,129 +4356,126 @@ ZEND_VM_HELPER_EX(zend_isset_isempty_dim_prop_obj_handler, VAR|UNUSED|CV,
CONST|TMP|VAR|CV, int prop_dim) { USE_OPLINE - zend_free_op free_op1; + zend_free_op free_op1, free_op2; zval **container; zval **value = NULL; int result = 0; ulong hval; long index; + zval *offset;

SAVE_OPLINE(); container = GET_OP1_OBJ_ZVAL_PTR_PTR(BP_VAR_IS);

- if (OP1_TYPE != IS_VAR || container) { - zend_free_op free_op2; - zval *offset = GET_OP2_ZVAL_PTR(BP_VAR_R); + offset = GET_OP2_ZVAL_PTR(BP_VAR_R);

- if (Z_TYPE_PP(container) == IS_ARRAY && !prop_dim) { - HashTable *ht; - int isset = 0; + if (Z_TYPE_PP(container) == IS_ARRAY && !prop_dim) { + HashTable *ht; + int isset = 0;

- ht = Z_ARRVAL_PP(container); + ht = Z_ARRVAL_PP(container);

- switch (Z_TYPE_P(offset)) { - case IS_DOUBLE: - index = zend_dval_to_lval(Z_DVAL_P(offset)); - ZEND_VM_C_GOTO(num_index_prop); - case IS_RESOURCE: - case IS_BOOL: - case IS_LONG: - index = Z_LVAL_P(offset); + switch (Z_TYPE_P(offset)) { + case IS_DOUBLE: + index = zend_dval_to_lval(Z_DVAL_P(offset)); + ZEND_VM_C_GOTO(num_index_prop); + case IS_RESOURCE: + case IS_BOOL: + case IS_LONG: + index = Z_LVAL_P(offset); ZEND_VM_C_LABEL(num_index_prop): - if (zend_hash_index_find(ht, index, (void **) &value) == SUCCESS) { - isset = 1; + if (zend_hash_index_find(ht, index, (void **) &value) == SUCCESS) { + isset = 1; + } + break; + case IS_STRING: + if (OP2_TYPE == IS_CONST) { + hval = Z_HASH_P(offset); + } else { + if (!prop_dim) { + ZEND_HANDLE_NUMERIC_EX(Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1, index,
ZEND_VM_C_GOTO(num_index_prop)); } - break; - case IS_STRING: - if (OP2_TYPE == IS_CONST) { - hval = Z_HASH_P(offset); + if (IS_INTERNED(Z_STRVAL_P(offset))) { + hval = INTERNED_HASH(Z_STRVAL_P(offset)); } else { - if (!prop_dim) { - ZEND_HANDLE_NUMERIC_EX(Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1, index,
ZEND_VM_C_GOTO(num_index_prop)); - } - if (IS_INTERNED(Z_STRVAL_P(offset))) { - hval = INTERNED_HASH(Z_STRVAL_P(offset)); - } else { - hval = zend_hash_func(Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1); - } + hval = zend_hash_func(Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1); } - if (zend_hash_quick_find(ht, Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1,
hval, (void **) &value) == SUCCESS) { - isset = 1; - } - break; - case IS_NULL: - if (zend_hash_find(ht, "", sizeof(""), (void **) &value) == SUCCESS) { - isset = 1; - } - break; - default: - zend_error(E_WARNING, "Illegal offset type in isset or empty"); - - break; - } - - if (opline->extended_value & ZEND_ISSET) { - if (isset && Z_TYPE_PP(value) == IS_NULL) { - result = 0; - } else { - result = isset; } - } else /* if (opline->extended_value & ZEND_ISEMPTY) */ { - if (!isset || !i_zend_is_true(*value)) { - result = 0; - } else { - result = 1; + if (zend_hash_quick_find(ht, Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1,
hval, (void **) &value) == SUCCESS) { + isset = 1; } - } - FREE_OP2(); - } else if (Z_TYPE_PP(container) == IS_OBJECT) { - if (IS_OP2_TMP_FREE()) { - MAKE_REAL_ZVAL_PTR(offset); - } - if (prop_dim) { - if (Z_OBJ_HT_P(*container)->has_property) { - result = Z_OBJ_HT_P(*container)->has_property(*container, offset,
(opline->extended_value & ZEND_ISEMPTY) != 0, ((OP2_TYPE == IS_CONST) ?
opline->op2.literal : NULL) TSRMLS_CC); - } else { - zend_error(E_NOTICE, "Trying to check property of non-object"); - result = 0; + break; + case IS_NULL: + if (zend_hash_find(ht, "", sizeof(""), (void **) &value) == SUCCESS) { + isset = 1; } + break; + default: + zend_error(E_WARNING, "Illegal offset type in isset or empty"); + break; + } + + if (opline->extended_value & ZEND_ISSET) { + if (isset && Z_TYPE_PP(value) == IS_NULL) { + result = 0; } else { - if (Z_OBJ_HT_P(*container)->has_dimension) { - result = Z_OBJ_HT_P(*container)->has_dimension(*container, offset,
(opline->extended_value & ZEND_ISEMPTY) != 0 TSRMLS_CC); - } else { - zend_error(E_NOTICE, "Trying to check element of non-array"); - result = 0; - } + result = isset; } - if (IS_OP2_TMP_FREE()) { - zval_ptr_dtor(&offset); + } else /* if (opline->extended_value & ZEND_ISEMPTY) */ { + if (!isset || !i_zend_is_true(*value)) { + result = 0; } else { - FREE_OP2(); + result = 1; } - } else if ((*container)->type == IS_STRING && !prop_dim) { /* string offsets
*/ - zval tmp; - - if (Z_TYPE_P(offset) != IS_LONG) { - ZVAL_COPY_VALUE(&tmp, offset); - zval_copy_ctor(&tmp); - convert_to_long(&tmp); - offset = &tmp; + } + FREE_OP2(); + } else if (Z_TYPE_PP(container) == IS_OBJECT) { + if (IS_OP2_TMP_FREE()) { + MAKE_REAL_ZVAL_PTR(offset); + } + if (prop_dim) { + if (Z_OBJ_HT_P(*container)->has_property) { + result = Z_OBJ_HT_P(*container)->has_property(*container, offset,
(opline->extended_value & ZEND_ISEMPTY) != 0, ((OP2_TYPE == IS_CONST) ?
opline->op2.literal : NULL) TSRMLS_CC); + } else { + zend_error(E_NOTICE, "Trying to check property of non-object"); + result = 0; } - if (Z_TYPE_P(offset) == IS_LONG) { - if (opline->extended_value & ZEND_ISSET) { - if (offset->value.lval >= 0 && offset->value.lval <
Z_STRLEN_PP(container)) { - result = 1; - } - } else /* if (opline->extended_value & ZEND_ISEMPTY) */ { - if (offset->value.lval >= 0 && offset->value.lval < Z_STRLEN_PP(container)
&& Z_STRVAL_PP(container)[offset->value.lval] != '0') { - result = 1; - } - } + } else { + if (Z_OBJ_HT_P(*container)->has_dimension) { + result = Z_OBJ_HT_P(*container)->has_dimension(*container, offset,
(opline->extended_value & ZEND_ISEMPTY) != 0 TSRMLS_CC); + } else { + zend_error(E_NOTICE, "Trying to check element of non-array"); + result = 0; } - FREE_OP2(); + } + if (IS_OP2_TMP_FREE()) { + zval_ptr_dtor(&offset); } else { FREE_OP2(); } + } else if ((*container)->type == IS_STRING && !prop_dim) { /* string offsets
*/ + zval tmp; + + if (Z_TYPE_P(offset) != IS_LONG) { + ZVAL_COPY_VALUE(&tmp, offset); + zval_copy_ctor(&tmp); + convert_to_long(&tmp); + offset = &tmp; + } + if (Z_TYPE_P(offset) == IS_LONG) { + if (opline->extended_value & ZEND_ISSET) { + if (offset->value.lval >= 0 && offset->value.lval < Z_STRLEN_PP(container))
{ + result = 1; + } + } else /* if (opline->extended_value & ZEND_ISEMPTY) */ { + if (offset->value.lval >= 0 && offset->value.lval < Z_STRLEN_PP(container)
&& Z_STRVAL_PP(container)[offset->value.lval] != '0') { + result = 1; + } + } + } + FREE_OP2(); + } else { + FREE_OP2(); }

Z_TYPE(EX_T(opline->result.var).tmp_var) = IS_BOOL;