How are PHP's built-in functions implemented internally? - php

are these functions written the same way as user functions? I mean with PHP code and with regular expressions and stuff like that?
For example:
filter_var($email, FILTER_VALIDATE_EMAIL);
vs.
http://www.totallyphp.co.uk/code/validate_an_email_address_using_regular_expressions.htm

PHP is written in C. The PHP functions are written in high quality C code then compiled to form the PHP langugae library
if you want to extend PHP (edit / write) own functions check this out: http://www.php.net/~wez/extending-php.pdf
EDIT:
here you go :
This is the original C code for the function:
/* {{{ proto mixed filter_var(mixed variable [, long filter [, mixed options]])
* Returns the filtered version of the vriable.
*/
PHP_FUNCTION(filter_var)
{
long filter = FILTER_DEFAULT;
zval **filter_args = NULL, *data;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z/|lZ", &data, &filter, &filter_args) == FAILURE) {
return;
}
if (!PHP_FILTER_ID_EXISTS(filter)) {
RETURN_FALSE;
}
MAKE_COPY_ZVAL(&data, return_value);
php_filter_call(&return_value, filter, filter_args, 1, FILTER_REQUIRE_SCALAR TSRMLS_CC);
}
/* }}} */
static void php_filter_call(zval **filtered, long filter, zval **filter_args, const int copy, long filter_flags TSRMLS_DC) /* {{{ */
{
zval *options = NULL;
zval **option;
char *charset = NULL;
if (filter_args && Z_TYPE_PP(filter_args) != IS_ARRAY) {
long lval;
PHP_FILTER_GET_LONG_OPT(filter_args, lval);
if (filter != -1) { /* handler for array apply */
/* filter_args is the filter_flags */
filter_flags = lval;
if (!(filter_flags & FILTER_REQUIRE_ARRAY || filter_flags & FILTER_FORCE_ARRAY)) {
filter_flags |= FILTER_REQUIRE_SCALAR;
}
} else {
filter = lval;
}
} else if (filter_args) {
if (zend_hash_find(HASH_OF(*filter_args), "filter", sizeof("filter"), (void **)&option) == SUCCESS) {
PHP_FILTER_GET_LONG_OPT(option, filter);
}
if (zend_hash_find(HASH_OF(*filter_args), "flags", sizeof("flags"), (void **)&option) == SUCCESS) {
PHP_FILTER_GET_LONG_OPT(option, filter_flags);
if (!(filter_flags & FILTER_REQUIRE_ARRAY || filter_flags & FILTER_FORCE_ARRAY)) {
filter_flags |= FILTER_REQUIRE_SCALAR;
}
}
if (zend_hash_find(HASH_OF(*filter_args), "options", sizeof("options"), (void **)&option) == SUCCESS) {
if (filter != FILTER_CALLBACK) {
if (Z_TYPE_PP(option) == IS_ARRAY) {
options = *option;
}
} else {
options = *option;
filter_flags = 0;
}
}
}
if (Z_TYPE_PP(filtered) == IS_ARRAY) {
if (filter_flags & FILTER_REQUIRE_SCALAR) {
if (copy) {
SEPARATE_ZVAL(filtered);
}
zval_dtor(*filtered);
if (filter_flags & FILTER_NULL_ON_FAILURE) {
ZVAL_NULL(*filtered);
} else {
ZVAL_FALSE(*filtered);
}
return;
}
php_zval_filter_recursive(filtered, filter, filter_flags, options, charset, copy TSRMLS_CC);
return;
}
if (filter_flags & FILTER_REQUIRE_ARRAY) {
if (copy) {
SEPARATE_ZVAL(filtered);
}
zval_dtor(*filtered);
if (filter_flags & FILTER_NULL_ON_FAILURE) {
ZVAL_NULL(*filtered);
} else {
ZVAL_FALSE(*filtered);
}
return;
}
php_zval_filter(filtered, filter, filter_flags, options, charset, copy TSRMLS_CC);
if (filter_flags & FILTER_FORCE_ARRAY) {
zval *tmp;
ALLOC_ZVAL(tmp);
MAKE_COPY_ZVAL(filtered, tmp);
zval_dtor(*filtered);
array_init(*filtered);
add_next_index_zval(*filtered, tmp);
}
}
AND HERE IS YOUR VALIDATE EMAIL ROUTINE:
-- this answers your question. Yes, it is done by regex internally.
void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
{
/*
* The regex below is based on a regex by Michael Rushton.
* However, it is not identical. I changed it to only consider routeable
* addresses as valid. Michael's regex considers a#b a valid address
* which conflicts with section 2.3.5 of RFC 5321 which states that:
*
* Only resolvable, fully-qualified domain names (FQDNs) are permitted
* when domain names are used in SMTP. In other words, names that can
* be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
* in Section 5) are permitted, as are CNAME RRs whose targets can be
* resolved, in turn, to MX or address RRs. Local nicknames or
* unqualified names MUST NOT be used.
*
* This regex does not handle comments and folding whitespace. While
* this is technically valid in an email address, these parts aren't
* actually part of the address itself.
*
* Michael's regex carries this copyright:
*
* Copyright © Michael Rushton 2009-10
* http://squiloople.com/
* Feel free to use and redistribute this code. But please keep this copyright notice.
*
*/
const char regexp[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}#)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*#(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
pcre *re = NULL;
pcre_extra *pcre_extra = NULL;
int preg_options = 0;
int ovector[150]; /* Needs to be a multiple of 3 */
int matches;
/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
if (Z_STRLEN_P(value) > 320) {
RETURN_VALIDATION_FAILED
}
re = pcre_get_compiled_regex((char *)regexp, &pcre_extra, &preg_options TSRMLS_CC);
if (!re) {
RETURN_VALIDATION_FAILED
}
matches = pcre_exec(re, NULL, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, ovector, 3);
/* 0 means that the vector is too small to hold all the captured substring offsets */
if (matches < 0) {
RETURN_VALIDATION_FAILED
}
}
/* }}} */

PHP functions are either :
Written in C -- and not in PHP
Or just wrappers to functions provided by other libraries (For instance, PHP's curl extension is just a wrapper arround the curl library).
If you are curious, you can take a look at the sources of PHP -- here's its SVN : http://svn.php.net/viewvc/
For instance, the filter_var() function should be defined somewhere in the sources of the filter extension.

Nope. PHP-internal functions are written in C, not with PHP code. Which looks quite unwieldy due to the many Zend-runtime macros and how parameters are transferred from PHP into C structures.
That particular function does use a regular expression. It also makes a nice example:
http://svn.php.net/repository/php/php-src/branches/PHP_5_3/ext/filter/logical_filters.c
Look for regexp[] somewhere in the middle.

Related

Unix socket set O_NONBLOCK to communication with php-fpm get segmentfault

I'm writing a Web Server to support FastCGI. Using Unix socket to communicate with php-fpm, the non-block option cannot be set, which will cause the php-fpm response parser to access illegal memory.
I've used socket() to set non-blocking options, and using fcntl() to set non-blocking options can lead to illegal memory access. Once the non-blocking options are cancelled, everything works. But my Web Server is a non-blocking event-driven model, so I have to use Unix sockets for non-blocking communications.
test.cc
/**
* Created by Crow on 12/27/18.
* Copyright (c) 2018 Crow All rights reserved.
* #author Crow
* #brief This file is test the ResponseParser
* #details construct the request, send/write it to the peer endpoint.
* use tcpdump can get the result [if php-fpm listened on TCP socket]
* $ sudo tcpdump port xxxx -i lo -vv -w a.cap
* $ wireshark a.cap
*/
#include <fcntl.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <cstdio>
#include <iostream>
#include <fstream>
#include "protocol/fastCGI/request_builder.h"
#include "protocol/fastCGI/response_parser.h"
int main()
{
std::map<std::string, std::string> param_map;
param_map.insert({"REMOTE_PORT", "80"});
param_map.insert({"REMOTE_ADDR", "127.0.0.1"});
param_map.insert({"REQUEST_METHOD", "POST"});
param_map.insert({"SERVER_PROTOCOL", "HTTP/1.1"});
param_map.insert({"SCRIPT_FILENAME", "/home/Crow/1.php"});
param_map.insert({"CONTENT_LENGTH", "11"});
std::string in_str("a=b&c=d&e=f");
platinum::fcgi::RequestBuilder builder(3, 11, in_str, param_map);
builder.Build();
auto b = builder.begin_requset();
auto p = builder.fcgi_params();
auto i = builder.fcgi_in();
errno = 0;
// ssize_t ret{};
// int fd = ::socket(AF_INET, SOCK_STREAM, 0);
// struct sockaddr_in addr{};
// addr.sin_family = AF_INET;
// addr.sin_port = ::htons(9000);
// addr.sin_addr.s_addr = ::inet_addr("127.0.0.1");
int fd = ::socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC , 0); <= Here! ! !
auto flag = ::fcntl(fd, F_GETFL);
flag |= O_NONBLOCK;
if (::fcntl(fd, F_SETFL, flag)) {
perror("fcntl");
std::abort();
}
struct sockaddr_un addr{};
addr.sun_family = AF_UNIX;
::strcpy(addr.sun_path, "/home/Crow/xfc.sock");
auto ret = ::connect(fd, (const struct sockaddr *) &addr, sizeof(addr));
if (ret < 0)
perror("connect");
::write(fd, reinterpret_cast<void *>(&b), sizeof(b));
for (const auto &var : p) {
::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
perror("write");
}
for (const auto &var : i) {
::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
perror("write");
}
char buf[10000];
std::vector<unsigned char> data(1024);
platinum::fcgi::ResponseParser parser;
while (!parser.Complete()) {
ret = ::read(fd, data.data(), 1024);
parser.feed(data.cbegin(), static_cast<int>(ret));
auto stdout_ = parser.transform_data();
std::string str(stdout_.cbegin(), stdout_.cend());
std::cout << str << std::endl;
}
close(fd);
return 0;
}
fastCGI/response_parser.h
/**
* Created on 12/26/18.
* Copyright (c) 2018 Crow All rights reserved.
* #author Crow
* #brief
*/
#ifndef PLATINUM_RESPONSE_PARSER_H
#define PLATINUM_RESPONSE_PARSER_H
#include "base.h"
#include "protocol/fastCGI/component.h"
#include "protocol/parser.hpp"
namespace platinum {
namespace fcgi {
enum State : int {
COMPLETED,
UNCOMPLETED,
FAULT,
};
class ResponseParser : public platinum::Parser {
public:
using const_iter = std::vector<FCGIData>::const_iterator;
ResponseParser();
~ResponseParser() override = default;
long feed(const_iter iter, long length);
auto transform_data() -> const std::vector<FCGIData> & {
return transform_data_;
}
int request_id() { return request_id_; }
long long app_status() { return app_status_; }
State state() { return static_cast<State>(state_); }
Status status() { return static_cast<Status>(status_); }
bool Complete() { return complete_; }
void Reset();
private:
void ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len);
void ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len);
void ParseEndRequest(const_iter &iter);
std::vector<FCGIData> transform_data_;
std::vector<FCGIData> name_value_data_;
int request_id_;
long transform_len_;
long padding_len_;
long long app_status_;
bool complete_;
bool in_content_;
State state_;
Status status_;
};
}
}
#endif //PLATINUM_RESPONSE_PARSER_H
fastCGI/response_parser.cc
/**
* Created by Crow on 12/26/18.
* Copyright (c) 2018 Crow All rights reserved.
* #author Crow
* #brief This file is Class ResponseParser. It can be reentrant
*/
#include "response_parser.h"
#include <cstring>
#include <string>
using namespace platinum::fcgi;
ResponseParser::ResponseParser()
: request_id_(-1),
transform_len_(0),
padding_len_(0),
app_status_(-1),
complete_(false),
in_content_(false),
state_(State::UNCOMPLETED),
status_(Status::FCGI_UNKNOWN_ROLE)
{
transform_data_.reserve(1024); // make sure reserve space for transform_data_
}
/**
* #brief feed() the core to parse the FCGI response
* #param iter Buffer's cosnt iterator
* #param length Buffer's length this time
* #return parse result
*/
long ResponseParser::feed(ResponseParser::const_iter iter, long length)
{
auto len_temp(length);
transform_data_.clear();
// To ensure the last parsing result is complete
if (transform_len_) {
auto len = transform_len_ > length ? length : transform_len_;
transform_data_.insert(transform_data_.cend(), iter, iter + len);
length -= len; // reduce the length
iter += len; // move the iter
transform_len_ -= len;
}
if (length == 0) {
return (len_temp - length);
} else if (padding_len_) {
auto len = padding_len_ > length ? length : padding_len_;
length -= len;
iter += len;
padding_len_ -= len;
}
while (length) { // the whole parsing process continus utils length < 0
if (state_ == State::COMPLETED
|| state_ == State::FAULT
|| length < sizeof(Header))
{
return (len_temp - length);
}
Header header(iter); // Construct a header
iter += sizeof(Header);
length -= sizeof(Header);
request_id_ = header.request_id();
auto ct_len = header.content_length();
auto pd_len = header.padding_length();
switch (header.type()) {
case Type::FCGI_STDOUT: ParseStdout(iter, length, ct_len, pd_len); break;
case Type::FCGI_STDERR: ParseStderr(iter, length, ct_len, pd_len); break;
case Type::FCGI_END_REQUEST: ParseEndRequest(iter); break;
default: break;
}
}
return len_temp - length;
}
/**
* #brief To parse the STDOUT part
* #param iter buffer's iterator (ref)
* #param length buffer's length (ref)
* #param ct_len the content length of FCGI_STDOUT
* #param pd_len the padding length of FCGI_STDOUT
*/
void ResponseParser::ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len)
{
if (ct_len == 0 && pd_len == 0)
return ;
auto len1 = ct_len > length ? length : ct_len; // judge if we have enough space to deal with
std::string str(iter, iter + len1);
std::string::size_type pos;
if (!in_content_) {
if ((pos = str.find("\r\n\r\n")) != std::string::npos) {
name_value_data_.insert(name_value_data_.cend(), iter, iter + pos);
iter += pos + 4;
length -= pos + 4;
ct_len -= pos + 4;
len1 -= pos + 4;
} else {
state_ = State::FAULT;
complete_ = true;
return ;
}
in_content_ = true;
}
transform_data_.insert(transform_data_.cend(), iter, iter + len1);
iter += len1;
length -= len1;
ct_len -= len1;
if (length == 0) {
transform_len_ += ct_len;
padding_len_ = pd_len;
return ;
}
auto len2 = pd_len > length ? length : pd_len;
iter += len2;
length -= len2;
pd_len -= len2;
if (length == 0) {
padding_len_ = pd_len;
return ;
}
}
/**
* #beief To parse the STDERR part
* #param iter buffer's iterator (ref)
* #param length buffer's length (ref)
* #param ct_len the conten length of FCGI_STDERR
* #param pd_len the padding length of FCGI_STDERR
*/
void ResponseParser::ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len)
{
if (ct_len == 0 && pd_len == 0)
return ;
auto len1 = ct_len > length ? length : ct_len; // judge if we have enough space to deal with
transform_data_.insert(transform_data_.cend(), iter, iter + len1);
iter += len1;
length -= len1;
ct_len -= len1;
if (length == 0) {
transform_len_ += ct_len;
padding_len_ = pd_len;
return ;
}
auto len2 = pd_len > length ? length : pd_len;
iter += len2;
length -= len2;
pd_len -= len2;
if (length == 0) {
padding_len_ = pd_len;
return ;
}
}
/**
* #brief To parse the EndRequestRecord part
* #param iter Buffer's iterator
*/
void ResponseParser::ParseEndRequest(const_iter &iter)
{
iter -= sizeof(Header); // back to the Header's start to constrcut the EndRequestRecord
EndRequestRocord end_request_record(iter);
app_status_ = end_request_record.app_status();
status_ = end_request_record.protocol_status();
complete_ = true;
state_ = State::COMPLETED;
}
void ResponseParser::Reset()
{
request_id_ = -1;
transform_len_ = 0;
padding_len_ = 0;
app_status_ = -1;
complete_ = false;
in_content_ = false;
state_ = State::UNCOMPLETED;
status_ = Status::FCGI_UNKNOWN_ROLE;
transform_data_.clear();
name_value_data_.clear();
}
I think the key is ResponseParser, so RequestBuilder is not posted, if necessary, I can fill it up.
gdb backstrace
(gdb) r
Starting program: /home/Crow/CLionProjects/platinum/test/bin/response_parser
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
Program received signal SIGSEGV, Segmentation fault.
0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
30 : version_(*iter),
(gdb) bt
#0 0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
#1 0x0000000000408c70 in platinum::fcgi::ResponseParser::feed (this=0x7fffffffb830,
iter=<error reading variable: Cannot access memory at address 0x638000>, length=-58305)
at /home/Crow/CLionProjects/platinum/protocol/fastCGI/response_parser.cc:65
#2 0x0000000000402ac7 in main () at /home/Crow/CLionProjects/platinum/test/response_parser_test.cc:81
(gdb)
Without the O_NONBLOCK
[Crow#EvilCrow bin]$ sudo ./response_parser
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
hello
the request PHP file 1.php
<?php
echo hello;
?>
I expect to make Unix Socket work properly after setting up non-blocking IO. As I understand it, Unix sockets and INET sockets should behave identically on blocking. Why does this happen? Thank for you very much.
while (!parser.Complete()) {
ret = ::read(fd, data.data(), 1024);
parser.feed(data.cbegin(), static_cast<int>(ret));
If you are using non-blocking sockets the ::read can fail with EAGAIN. In this case ret will be -1. Your code does not properly handle this case, i.e. instead of retrying to read you essentially just call parser.feed(data.cbegin(),-1).
The -1 will then not be specifically handled inside parser.feed either but it just assumes that the length will be positive. This ultimately results in accessing some memory which is not there which causes the segmentation fault.
Note that you also don't properly deal with writing: your code simply assumes that all writes will succeed and write the full buffer. That this is the case in your tests is only pure luck because you are not writing much data - if you would write more it might happen that a write fails completely (only with non-blocking sockets) or that only partial data are written (also with blocking sockets).

var_dump(object) results "*RECURSION*"

I'm write simple extension with class definition
extension.h
zend_class_entry * ExampleClass_class;
zend_class_entry * get_ExampleClass_class();
extension.c
#include "php.h"
#include "extension.h"
...
zend_class_entry * get_ExampleClass_class(){
return ExampleClass_class;
}
....
PHP_METHOD(ExampleClass, getInstance){
ZEND_PARSE_PARAMETERS_START(0, 0)
Z_PARAM_OPTIONAL
ZEND_PARSE_PARAMETERS_END();
RETURN_OBJ(
// ----------- fun objectToZval(obj: PhpObject) = obj.zval //CPointer<zval>
example_symbols()->kotlin.root.php.extension.proxy.objectToZval(
example_symbols()->kotlin.root.exampleclass.getInstance(
// ------- Unused parameter
example_symbols()
->kotlin.root.php.extension.proxy.phpObj(
ExampleClass_class, getThis()
)
// ------- Unused parameter end
)
)
)
}
Also I write and compile static library with logic realization (Kotlin Native)
.def
static inline zval* zend_helper_new_ExampleClass() {
zval *obj = malloc(sizeof(zval));
object_init_ex(obj, get_ExampleClass_class());
return obj;
}
.kt
fun newExampleClass() = zend_helper_new_ExampleClass()!!
//PhpObject is wrapper for two fields CPointer<zend_class_entry> and CPointer<zval>
class PhpObject(val context: CPointer<zend_class_entry>, val zval: PhpMixed) {
companion object {
fun fromMixed(zval: PhpMixed) = PhpObject(zval.pointed!!.value.obj!!.pointed!!.ce!!, zval)
}
....
}
val PhpMixed.phpObject get() = PhpObject.fromMixed(this)
fun getInstance(obj: PhpObject) = newExampleClass().phpObject
Finally I run PHP code
var_dump(ExampleClass::getInstance());
And receive this
# /opt/rh/rh-php71/root/usr/bin/php -dextension=`ls ./phpmodule/modules/*.so` -r "var_dump(ExampleClass::getInstance());"
*RECURSION*
#
Where I mistaken?
UPD
static inline zval* zend_helper_new_{className}() {
zval *obj = malloc(sizeof(zval));
object_init_ex(obj, get_{className}_class());
php_printf("Just created FLAGS %u\n", GC_FLAGS(obj->value.obj));
return obj;
}
Just created object have GC_FLAGS equals 0
*RECURSIVE* apears in function php_var_dump by code
case IS_OBJECT:
if (Z_IS_RECURSIVE_P(struc)) {
PUTS("*RECURSION*\n");
return;
}
Macro->macro->macro->Oh god!->macro->macro...
Z_IS_RECURSIVE_P(struc) = (GC_FLAGS((*(zval)).value.counted) & GC_PROTECTED)
Okay...
php_printf("%d\n", GC_FLAGS((*(obj)).value.counted));
Returns 0
Must not trigger *RECURSIVE*, but... Why!?
First
For compilation I used PHP 7.1.8, but coding based on latest sources.
Recursion protection has been changed 06.10.2017
Actual var_dump code for 7.1.8
case IS_OBJECT:
if (Z_OBJ_APPLY_COUNT_P(struc) > 0) {
PUTS("*RECURSION*\n");
return;
}
But it doesn't matter
Second
RETURN_OBJ(
example_symbols()->kotlin.root.php.extension.proxy.objectToZval(
example_symbols()->kotlin.root.exampleclass.getInstance(/*unused*/)
)
)
Let's expand the macro RETURN_OBJ (r)
RETURN_OBJ(r)
{ RETVAL_OBJ(r); return; }
{ ZVAL_OBJ(return_value, r); return; }
.
{ do {
zval *__z = (return_value);
Z_OBJ_P(__z) = (r);
Z_TYPE_INFO_P(__z) = IS_OBJECT_EX;
} while (0); return; }
.
{ do {
zval *__z = (return_value);
Z_OBJ(*(__z)) = (r);
Z_TYPE_INFO(*(__z)) = (IS_OBJECT | (IS_TYPE_REFCOUNTED << Z_TYPE_FLAGS_SHIFT));
} while (0); return; }
.
{ do {
zval *__z = (return_value);
(*(__z)).value.obj = (r);
(*(__z)).u1.type_info = (8 | ((1<<0) << 8));
} while (0); return; }
You see? :)
Yea, this macro must receive zend_object but not zval
Just change return expression to
example_symbols()->kotlin.root.php.extension.proxy.zendObject(
example_symbols()->kotlin.root.exampleclass.getInstance(/*unused*/)
)
where
fun zendObject(obj: PhpObject) = obj.zval.pointed!!.value.obj!!
Bingo!
PS Special thanks for php developers community for incredible documented macro hell

How to validate Ethereum addresses in PHP

I'm using PHP and curl with json to interact with my geth server.
I'm able to do all I want except one thing: checking if user's inputted address is valid according to ethereum wallet format.
I saw a javascript function here, but I'm mostly using PHP, I'm not into JS at all.
Any ideas how to validate ethereum addresses in PHP?
Here's a PHP implementation for Ethereum address validation against the EIP 55 specification. For details of how it works, please go through the comments.
<?php
use kornrunner\Keccak; // composer require greensea/keccak
class EthereumValidator
{
public function isAddress(string $address): bool
{
// See: https://github.com/ethereum/web3.js/blob/7935e5f/lib/utils/utils.js#L415
if ($this->matchesPattern($address)) {
return $this->isAllSameCaps($address) ?: $this->isValidChecksum($address);
}
return false;
}
protected function matchesPattern(string $address): int
{
return preg_match('/^(0x)?[0-9a-f]{40}$/i', $address);
}
protected function isAllSameCaps(string $address): bool
{
return preg_match('/^(0x)?[0-9a-f]{40}$/', $address) || preg_match('/^(0x)?[0-9A-F]{40}$/', $address);
}
protected function isValidChecksum($address)
{
$address = str_replace('0x', '', $address);
$hash = Keccak::hash(strtolower($address), 256);
// See: https://github.com/web3j/web3j/pull/134/files#diff-db8702981afff54d3de6a913f13b7be4R42
for ($i = 0; $i < 40; $i++ ) {
if (ctype_alpha($address{$i})) {
// Each uppercase letter should correlate with a first bit of 1 in the hash char with the same index,
// and each lowercase letter with a 0 bit.
$charInt = intval($hash{$i}, 16);
if ((ctype_upper($address{$i}) && $charInt <= 7) || (ctype_lower($address{$i}) && $charInt > 7)) {
return false;
}
}
}
return true;
}
}
Dependencies
To validate checksum addresses, we need a keccak-256 implementation in place which is not supported by the built-in hash() function. You need to require the greensea/keccak composer package as a dependency.
Kudos to #WebSpanner for pointing out the issue with SHA3 hashing.
Basically, you can convert the javascript entirely to PHP.
Here i have been able to convert and test the code for validating an ethereum address in PHP.
/**
* Checks if the given string is an address
*
* #method isAddress
* #param {String} $address the given HEX adress
* #return {Boolean}
*/
function isAddress($address) {
if (!preg_match('/^(0x)?[0-9a-f]{40}$/i',$address)) {
// check if it has the basic requirements of an address
return false;
} elseif (!preg_match('/^(0x)?[0-9a-f]{40}$/',$address) || preg_match('/^(0x)?[0-9A-F]{40}$/',$address)) {
// If it's all small caps or all all caps, return true
return true;
} else {
// Otherwise check each case
return isChecksumAddress($address);
}
}
/**
* Checks if the given string is a checksummed address
*
* #method isChecksumAddress
* #param {String} $address the given HEX adress
* #return {Boolean}
*/
function isChecksumAddress($address) {
// Check each case
$address = str_replace('0x','',$address);
$addressHash = hash('sha3',strtolower($address));
$addressArray=str_split($address);
$addressHashArray=str_split($addressHash);
for($i = 0; $i < 40; $i++ ) {
// the nth letter should be uppercase if the nth digit of casemap is 1
if ((intval($addressHashArray[$i], 16) > 7 && strtoupper($addressArray[$i]) !== $addressArray[$i]) || (intval($addressHashArray[$i], 16) <= 7 && strtolower($addressArray[$i]) !== $addressArray[$i])) {
return false;
}
}
return true;
}
Meanwhile, for someone looking for a very simple regular expression for checking ethereum address validity (e.g to use is as a pattern attribute of an HTML field), this regular expression may suffice.
^(0x)?[0-9a-fA-F]{40}$

Does PDO care if query with place holders uses an non-associated array?

I had a query in my application that looked like this:
$stmt = db::db()->prepare('INSERT INTO t(a,b) VALUES :a,:b)');
$stmt->execute(array(1,2));
Later, I had an unrelated error, and when reviewing this script, I asked myself why it ever worked previously. I would have expected I should have used
$stmt->execute(array('a'=>1,'b'=>2));
But, it seems to work?
Is it acceptable to use an non-associated array with a prepared statement where the placeholders are array keys and not question marks?
Not that I will get into this practice, but this had me stumped and I just need to know.
Thank you
Yes PDO does care, In this scenario prepare would not fail because the client sends a partial query to the database server,
this query has to be completely minus the data so it can be pre-processed.
The client then sends the data, either once or multiple times, the server then executes the pre-processed query with the data.
Howerver the following exception should have been raised when executing:
{ "HY093", "Invalid parameter number" }
If you read the source code of PDO for the execute function you will see the following piece of code that throws this error:
if (HASH_KEY_IS_STRING == zend_hash_get_current_key_ex(Z_ARRVAL_P(input_params),
&param.name, &str_length, &num_index, 0, NULL)) {
/* yes this is correct. we don't want to count the null byte. ask wez */
param.namelen = str_length - 1;
param.paramno = -1;
} else {
/* we're okay to be zero based here */
if (num_index < 0) {
pdo_raise_impl_error(stmt->dbh, stmt, "HY093", NULL TSRMLS_CC);
RETURN_FALSE;
}
param.paramno = num_index;
}
I am not sure how it worked for you so I would make sure PDO errormode attribute is set.
You still have to be careful because you must take care of proper order of the elements in the array that we are passing to the PDOStatement::execute() method.
execute() full function from source code
/* {{{ proto bool PDOStatement::execute([array $bound_input_params])
Execute a prepared statement, optionally binding parameters */
static PHP_METHOD(PDOStatement, execute)
{
zval *input_params = NULL;
int ret = 1;
PHP_STMT_GET_OBJ;
if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|a!", &input_params)) {
RETURN_FALSE;
}
PDO_STMT_CLEAR_ERR();
if (input_params) {
struct pdo_bound_param_data param;
zval **tmp;
uint str_length;
ulong num_index;
if (stmt->bound_params) {
zend_hash_destroy(stmt->bound_params);
FREE_HASHTABLE(stmt->bound_params);
stmt->bound_params = NULL;
}
zend_hash_internal_pointer_reset(Z_ARRVAL_P(input_params));
while (SUCCESS == zend_hash_get_current_data(Z_ARRVAL_P(input_params), (void*)&tmp)) {
memset(&param, 0, sizeof(param));
if (HASH_KEY_IS_STRING == zend_hash_get_current_key_ex(Z_ARRVAL_P(input_params),
&param.name, &str_length, &num_index, 0, NULL)) {
/* yes this is correct. we don't want to count the null byte. ask wez */
param.namelen = str_length - 1;
param.paramno = -1;
} else {
/* we're okay to be zero based here */
if (num_index < 0) {
pdo_raise_impl_error(stmt->dbh, stmt, "HY093", NULL TSRMLS_CC);
RETURN_FALSE;
}
param.paramno = num_index;
}
param.param_type = PDO_PARAM_STR;
MAKE_STD_ZVAL(param.parameter);
MAKE_COPY_ZVAL(tmp, param.parameter);
if (!really_register_bound_param(&param, stmt, 1 TSRMLS_CC)) {
if (param.parameter) {
zval_ptr_dtor(&param.parameter);
}
RETURN_FALSE;
}
zend_hash_move_forward(Z_ARRVAL_P(input_params));
}
}
if (PDO_PLACEHOLDER_NONE == stmt->supports_placeholders) {
/* handle the emulated parameter binding,
* stmt->active_query_string holds the query with binds expanded and
* quoted.
*/
ret = pdo_parse_params(stmt, stmt->query_string, stmt->query_stringlen,
&stmt->active_query_string, &stmt->active_query_stringlen TSRMLS_CC);
if (ret == 0) {
/* no changes were made */
stmt->active_query_string = stmt->query_string;
stmt->active_query_stringlen = stmt->query_stringlen;
ret = 1;
} else if (ret == -1) {
/* something broke */
PDO_HANDLE_STMT_ERR();
RETURN_FALSE;
}
} else if (!dispatch_param_event(stmt, PDO_PARAM_EVT_EXEC_PRE TSRMLS_CC)) {
PDO_HANDLE_STMT_ERR();
RETURN_FALSE;
}
if (stmt->methods->executer(stmt TSRMLS_CC)) {
if (stmt->active_query_string && stmt->active_query_string != stmt->query_string) {
efree(stmt->active_query_string);
}
stmt->active_query_string = NULL;
if (!stmt->executed) {
/* this is the first execute */
if (stmt->dbh->alloc_own_columns && !stmt->columns) {
/* for "big boy" drivers, we need to allocate memory to fetch
* the results into, so lets do that now */
ret = pdo_stmt_describe_columns(stmt TSRMLS_CC);
}
stmt->executed = 1;
}
if (ret && !dispatch_param_event(stmt, PDO_PARAM_EVT_EXEC_POST TSRMLS_CC)) {
RETURN_FALSE;
}
RETURN_BOOL(ret);
}
if (stmt->active_query_string && stmt->active_query_string != stmt->query_string) {
efree(stmt->active_query_string);
}
stmt->active_query_string = NULL;
PDO_HANDLE_STMT_ERR();
RETURN_FALSE;
}
/* }}} */

list of registered shutdown functions

Is there any method to access to list of registered shutdown functions?
You can write an extension and look at BG(user_shutdown_function_names). Probably easier is to make a wrapper for register_shutdown_function that saves the shutdown functions to some array and call it instead.
(Untested)
#include "ext/standard/basic_functions.h"
//usual include suspects here
typedef struct _php_shutdown_function_entry {
zval **arguments;
int arg_count;
} php_shutdown_function_entry;
static void _shutdown_function_dtor(php_shutdown_function_entry *shutdown_function_entry) /* {{{ */
{
int i;
for (i = 0; i < shutdown_function_entry->arg_count; i++) {
zval_ptr_dtor(&shutdown_function_entry->arguments[i]);
}
efree(shutdown_function_entry->arguments);
}
static int _build_shutdown_array(php_shutdown_function_entry *entry, zval *arr TSRMLS_DC)
{
zval *inner;
zval *args;
int i;
array_init(inner);
array_init(args);
Z_ADDREF_P(entry->arguments[0]);
add_assoc_zval(inner, "callback", entry->arguments[0]);
for (i = 1; i < entry->arg_count; i++) {
Z_ADDREF_P(entry->arguments[i]);
add_next_index_zval(args, entry->arguments[i]);
}
add_assoc_zval(inner, "arguments", args);
add_next_index_zval(arr, inner);
}
PHP_FUNCTION(list_shutdown_functions)
{
if (zend_parse_parameters_none() == FAILURE)
return;
if (!BG(user_shutdown_function_names)) {
ALLOC_HASHTABLE(BG(user_shutdown_function_names));
zend_hash_init(BG(user_shutdown_function_names), 0, NULL,
(void (*)(void *)) _shutdown_function_dtor, 0);
}
array_init(return_value);
zend_hash_apply_with_argument(BG(user_shutdown_function_names),
(apply_func_arg_t) _build_shutdown_array, return_value TSRMLS_CC);
}
Other than keeping track yourself, no. The list of registered function names is not exposed to your PHP scripts. If you're open to extending PHP itself (this would be a simple task) then see Artefacto's answer.

Categories