I'm writing a Web Server to support FastCGI. Using Unix socket to communicate with php-fpm, the non-block option cannot be set, which will cause the php-fpm response parser to access illegal memory.
I've used socket() to set non-blocking options, and using fcntl() to set non-blocking options can lead to illegal memory access. Once the non-blocking options are cancelled, everything works. But my Web Server is a non-blocking event-driven model, so I have to use Unix sockets for non-blocking communications.
test.cc
/**
* Created by Crow on 12/27/18.
* Copyright (c) 2018 Crow All rights reserved.
* #author Crow
* #brief This file is test the ResponseParser
* #details construct the request, send/write it to the peer endpoint.
* use tcpdump can get the result [if php-fpm listened on TCP socket]
* $ sudo tcpdump port xxxx -i lo -vv -w a.cap
* $ wireshark a.cap
*/
#include <fcntl.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <cstdio>
#include <iostream>
#include <fstream>
#include "protocol/fastCGI/request_builder.h"
#include "protocol/fastCGI/response_parser.h"
int main()
{
std::map<std::string, std::string> param_map;
param_map.insert({"REMOTE_PORT", "80"});
param_map.insert({"REMOTE_ADDR", "127.0.0.1"});
param_map.insert({"REQUEST_METHOD", "POST"});
param_map.insert({"SERVER_PROTOCOL", "HTTP/1.1"});
param_map.insert({"SCRIPT_FILENAME", "/home/Crow/1.php"});
param_map.insert({"CONTENT_LENGTH", "11"});
std::string in_str("a=b&c=d&e=f");
platinum::fcgi::RequestBuilder builder(3, 11, in_str, param_map);
builder.Build();
auto b = builder.begin_requset();
auto p = builder.fcgi_params();
auto i = builder.fcgi_in();
errno = 0;
// ssize_t ret{};
// int fd = ::socket(AF_INET, SOCK_STREAM, 0);
// struct sockaddr_in addr{};
// addr.sin_family = AF_INET;
// addr.sin_port = ::htons(9000);
// addr.sin_addr.s_addr = ::inet_addr("127.0.0.1");
int fd = ::socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC , 0); <= Here! ! !
auto flag = ::fcntl(fd, F_GETFL);
flag |= O_NONBLOCK;
if (::fcntl(fd, F_SETFL, flag)) {
perror("fcntl");
std::abort();
}
struct sockaddr_un addr{};
addr.sun_family = AF_UNIX;
::strcpy(addr.sun_path, "/home/Crow/xfc.sock");
auto ret = ::connect(fd, (const struct sockaddr *) &addr, sizeof(addr));
if (ret < 0)
perror("connect");
::write(fd, reinterpret_cast<void *>(&b), sizeof(b));
for (const auto &var : p) {
::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
perror("write");
}
for (const auto &var : i) {
::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
perror("write");
}
char buf[10000];
std::vector<unsigned char> data(1024);
platinum::fcgi::ResponseParser parser;
while (!parser.Complete()) {
ret = ::read(fd, data.data(), 1024);
parser.feed(data.cbegin(), static_cast<int>(ret));
auto stdout_ = parser.transform_data();
std::string str(stdout_.cbegin(), stdout_.cend());
std::cout << str << std::endl;
}
close(fd);
return 0;
}
fastCGI/response_parser.h
/**
* Created on 12/26/18.
* Copyright (c) 2018 Crow All rights reserved.
* #author Crow
* #brief
*/
#ifndef PLATINUM_RESPONSE_PARSER_H
#define PLATINUM_RESPONSE_PARSER_H
#include "base.h"
#include "protocol/fastCGI/component.h"
#include "protocol/parser.hpp"
namespace platinum {
namespace fcgi {
enum State : int {
COMPLETED,
UNCOMPLETED,
FAULT,
};
class ResponseParser : public platinum::Parser {
public:
using const_iter = std::vector<FCGIData>::const_iterator;
ResponseParser();
~ResponseParser() override = default;
long feed(const_iter iter, long length);
auto transform_data() -> const std::vector<FCGIData> & {
return transform_data_;
}
int request_id() { return request_id_; }
long long app_status() { return app_status_; }
State state() { return static_cast<State>(state_); }
Status status() { return static_cast<Status>(status_); }
bool Complete() { return complete_; }
void Reset();
private:
void ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len);
void ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len);
void ParseEndRequest(const_iter &iter);
std::vector<FCGIData> transform_data_;
std::vector<FCGIData> name_value_data_;
int request_id_;
long transform_len_;
long padding_len_;
long long app_status_;
bool complete_;
bool in_content_;
State state_;
Status status_;
};
}
}
#endif //PLATINUM_RESPONSE_PARSER_H
fastCGI/response_parser.cc
/**
* Created by Crow on 12/26/18.
* Copyright (c) 2018 Crow All rights reserved.
* #author Crow
* #brief This file is Class ResponseParser. It can be reentrant
*/
#include "response_parser.h"
#include <cstring>
#include <string>
using namespace platinum::fcgi;
ResponseParser::ResponseParser()
: request_id_(-1),
transform_len_(0),
padding_len_(0),
app_status_(-1),
complete_(false),
in_content_(false),
state_(State::UNCOMPLETED),
status_(Status::FCGI_UNKNOWN_ROLE)
{
transform_data_.reserve(1024); // make sure reserve space for transform_data_
}
/**
* #brief feed() the core to parse the FCGI response
* #param iter Buffer's cosnt iterator
* #param length Buffer's length this time
* #return parse result
*/
long ResponseParser::feed(ResponseParser::const_iter iter, long length)
{
auto len_temp(length);
transform_data_.clear();
// To ensure the last parsing result is complete
if (transform_len_) {
auto len = transform_len_ > length ? length : transform_len_;
transform_data_.insert(transform_data_.cend(), iter, iter + len);
length -= len; // reduce the length
iter += len; // move the iter
transform_len_ -= len;
}
if (length == 0) {
return (len_temp - length);
} else if (padding_len_) {
auto len = padding_len_ > length ? length : padding_len_;
length -= len;
iter += len;
padding_len_ -= len;
}
while (length) { // the whole parsing process continus utils length < 0
if (state_ == State::COMPLETED
|| state_ == State::FAULT
|| length < sizeof(Header))
{
return (len_temp - length);
}
Header header(iter); // Construct a header
iter += sizeof(Header);
length -= sizeof(Header);
request_id_ = header.request_id();
auto ct_len = header.content_length();
auto pd_len = header.padding_length();
switch (header.type()) {
case Type::FCGI_STDOUT: ParseStdout(iter, length, ct_len, pd_len); break;
case Type::FCGI_STDERR: ParseStderr(iter, length, ct_len, pd_len); break;
case Type::FCGI_END_REQUEST: ParseEndRequest(iter); break;
default: break;
}
}
return len_temp - length;
}
/**
* #brief To parse the STDOUT part
* #param iter buffer's iterator (ref)
* #param length buffer's length (ref)
* #param ct_len the content length of FCGI_STDOUT
* #param pd_len the padding length of FCGI_STDOUT
*/
void ResponseParser::ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len)
{
if (ct_len == 0 && pd_len == 0)
return ;
auto len1 = ct_len > length ? length : ct_len; // judge if we have enough space to deal with
std::string str(iter, iter + len1);
std::string::size_type pos;
if (!in_content_) {
if ((pos = str.find("\r\n\r\n")) != std::string::npos) {
name_value_data_.insert(name_value_data_.cend(), iter, iter + pos);
iter += pos + 4;
length -= pos + 4;
ct_len -= pos + 4;
len1 -= pos + 4;
} else {
state_ = State::FAULT;
complete_ = true;
return ;
}
in_content_ = true;
}
transform_data_.insert(transform_data_.cend(), iter, iter + len1);
iter += len1;
length -= len1;
ct_len -= len1;
if (length == 0) {
transform_len_ += ct_len;
padding_len_ = pd_len;
return ;
}
auto len2 = pd_len > length ? length : pd_len;
iter += len2;
length -= len2;
pd_len -= len2;
if (length == 0) {
padding_len_ = pd_len;
return ;
}
}
/**
* #beief To parse the STDERR part
* #param iter buffer's iterator (ref)
* #param length buffer's length (ref)
* #param ct_len the conten length of FCGI_STDERR
* #param pd_len the padding length of FCGI_STDERR
*/
void ResponseParser::ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len)
{
if (ct_len == 0 && pd_len == 0)
return ;
auto len1 = ct_len > length ? length : ct_len; // judge if we have enough space to deal with
transform_data_.insert(transform_data_.cend(), iter, iter + len1);
iter += len1;
length -= len1;
ct_len -= len1;
if (length == 0) {
transform_len_ += ct_len;
padding_len_ = pd_len;
return ;
}
auto len2 = pd_len > length ? length : pd_len;
iter += len2;
length -= len2;
pd_len -= len2;
if (length == 0) {
padding_len_ = pd_len;
return ;
}
}
/**
* #brief To parse the EndRequestRecord part
* #param iter Buffer's iterator
*/
void ResponseParser::ParseEndRequest(const_iter &iter)
{
iter -= sizeof(Header); // back to the Header's start to constrcut the EndRequestRecord
EndRequestRocord end_request_record(iter);
app_status_ = end_request_record.app_status();
status_ = end_request_record.protocol_status();
complete_ = true;
state_ = State::COMPLETED;
}
void ResponseParser::Reset()
{
request_id_ = -1;
transform_len_ = 0;
padding_len_ = 0;
app_status_ = -1;
complete_ = false;
in_content_ = false;
state_ = State::UNCOMPLETED;
status_ = Status::FCGI_UNKNOWN_ROLE;
transform_data_.clear();
name_value_data_.clear();
}
I think the key is ResponseParser, so RequestBuilder is not posted, if necessary, I can fill it up.
gdb backstrace
(gdb) r
Starting program: /home/Crow/CLionProjects/platinum/test/bin/response_parser
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
Program received signal SIGSEGV, Segmentation fault.
0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
30 : version_(*iter),
(gdb) bt
#0 0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
#1 0x0000000000408c70 in platinum::fcgi::ResponseParser::feed (this=0x7fffffffb830,
iter=<error reading variable: Cannot access memory at address 0x638000>, length=-58305)
at /home/Crow/CLionProjects/platinum/protocol/fastCGI/response_parser.cc:65
#2 0x0000000000402ac7 in main () at /home/Crow/CLionProjects/platinum/test/response_parser_test.cc:81
(gdb)
Without the O_NONBLOCK
[Crow#EvilCrow bin]$ sudo ./response_parser
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
hello
the request PHP file 1.php
<?php
echo hello;
?>
I expect to make Unix Socket work properly after setting up non-blocking IO. As I understand it, Unix sockets and INET sockets should behave identically on blocking. Why does this happen? Thank for you very much.
while (!parser.Complete()) {
ret = ::read(fd, data.data(), 1024);
parser.feed(data.cbegin(), static_cast<int>(ret));
If you are using non-blocking sockets the ::read can fail with EAGAIN. In this case ret will be -1. Your code does not properly handle this case, i.e. instead of retrying to read you essentially just call parser.feed(data.cbegin(),-1).
The -1 will then not be specifically handled inside parser.feed either but it just assumes that the length will be positive. This ultimately results in accessing some memory which is not there which causes the segmentation fault.
Note that you also don't properly deal with writing: your code simply assumes that all writes will succeed and write the full buffer. That this is the case in your tests is only pure luck because you are not writing much data - if you would write more it might happen that a write fails completely (only with non-blocking sockets) or that only partial data are written (also with blocking sockets).
Related
I'm write simple extension with class definition
extension.h
zend_class_entry * ExampleClass_class;
zend_class_entry * get_ExampleClass_class();
extension.c
#include "php.h"
#include "extension.h"
...
zend_class_entry * get_ExampleClass_class(){
return ExampleClass_class;
}
....
PHP_METHOD(ExampleClass, getInstance){
ZEND_PARSE_PARAMETERS_START(0, 0)
Z_PARAM_OPTIONAL
ZEND_PARSE_PARAMETERS_END();
RETURN_OBJ(
// ----------- fun objectToZval(obj: PhpObject) = obj.zval //CPointer<zval>
example_symbols()->kotlin.root.php.extension.proxy.objectToZval(
example_symbols()->kotlin.root.exampleclass.getInstance(
// ------- Unused parameter
example_symbols()
->kotlin.root.php.extension.proxy.phpObj(
ExampleClass_class, getThis()
)
// ------- Unused parameter end
)
)
)
}
Also I write and compile static library with logic realization (Kotlin Native)
.def
static inline zval* zend_helper_new_ExampleClass() {
zval *obj = malloc(sizeof(zval));
object_init_ex(obj, get_ExampleClass_class());
return obj;
}
.kt
fun newExampleClass() = zend_helper_new_ExampleClass()!!
//PhpObject is wrapper for two fields CPointer<zend_class_entry> and CPointer<zval>
class PhpObject(val context: CPointer<zend_class_entry>, val zval: PhpMixed) {
companion object {
fun fromMixed(zval: PhpMixed) = PhpObject(zval.pointed!!.value.obj!!.pointed!!.ce!!, zval)
}
....
}
val PhpMixed.phpObject get() = PhpObject.fromMixed(this)
fun getInstance(obj: PhpObject) = newExampleClass().phpObject
Finally I run PHP code
var_dump(ExampleClass::getInstance());
And receive this
# /opt/rh/rh-php71/root/usr/bin/php -dextension=`ls ./phpmodule/modules/*.so` -r "var_dump(ExampleClass::getInstance());"
*RECURSION*
#
Where I mistaken?
UPD
static inline zval* zend_helper_new_{className}() {
zval *obj = malloc(sizeof(zval));
object_init_ex(obj, get_{className}_class());
php_printf("Just created FLAGS %u\n", GC_FLAGS(obj->value.obj));
return obj;
}
Just created object have GC_FLAGS equals 0
*RECURSIVE* apears in function php_var_dump by code
case IS_OBJECT:
if (Z_IS_RECURSIVE_P(struc)) {
PUTS("*RECURSION*\n");
return;
}
Macro->macro->macro->Oh god!->macro->macro...
Z_IS_RECURSIVE_P(struc) = (GC_FLAGS((*(zval)).value.counted) & GC_PROTECTED)
Okay...
php_printf("%d\n", GC_FLAGS((*(obj)).value.counted));
Returns 0
Must not trigger *RECURSIVE*, but... Why!?
First
For compilation I used PHP 7.1.8, but coding based on latest sources.
Recursion protection has been changed 06.10.2017
Actual var_dump code for 7.1.8
case IS_OBJECT:
if (Z_OBJ_APPLY_COUNT_P(struc) > 0) {
PUTS("*RECURSION*\n");
return;
}
But it doesn't matter
Second
RETURN_OBJ(
example_symbols()->kotlin.root.php.extension.proxy.objectToZval(
example_symbols()->kotlin.root.exampleclass.getInstance(/*unused*/)
)
)
Let's expand the macro RETURN_OBJ (r)
RETURN_OBJ(r)
{ RETVAL_OBJ(r); return; }
{ ZVAL_OBJ(return_value, r); return; }
.
{ do {
zval *__z = (return_value);
Z_OBJ_P(__z) = (r);
Z_TYPE_INFO_P(__z) = IS_OBJECT_EX;
} while (0); return; }
.
{ do {
zval *__z = (return_value);
Z_OBJ(*(__z)) = (r);
Z_TYPE_INFO(*(__z)) = (IS_OBJECT | (IS_TYPE_REFCOUNTED << Z_TYPE_FLAGS_SHIFT));
} while (0); return; }
.
{ do {
zval *__z = (return_value);
(*(__z)).value.obj = (r);
(*(__z)).u1.type_info = (8 | ((1<<0) << 8));
} while (0); return; }
You see? :)
Yea, this macro must receive zend_object but not zval
Just change return expression to
example_symbols()->kotlin.root.php.extension.proxy.zendObject(
example_symbols()->kotlin.root.exampleclass.getInstance(/*unused*/)
)
where
fun zendObject(obj: PhpObject) = obj.zval.pointed!!.value.obj!!
Bingo!
PS Special thanks for php developers community for incredible documented macro hell
I have trouble with a custom extension in php.
I am extening Php::ArrayAccess for a self made object and I am able to use my object in PHP juste like a native array. BUT I can not chain the operators [] even though I am returning a reference to my object in the implementation of offsetGet. I get this error :
PHP Fatal error: Cannot use object of type Jq as array in ... (myfile.php on line 0)
#include <phpcpp.h>
#include <iostream>
#include <sstream>
#include <algorithm>
class Jq : public Php::Base, public Php::ArrayAccess
{
public:
Jq()
{
}
virtual ~Jq()
{
}
void __construct(Php::Parameters& params)
{
std::string localParam1 = params[0];
std::string localParam2 = params[1];
_pathToJq = localParam1;
_pathToCacheFile = localParam2;
}
Php::Value __toString()
{
return asString();
}
Php::Value asString()
{
std::ostringstream os;
os << _pathToJq << ' ' << _pathToCacheFile << " : " << _filters.str();
return os.str();
}
virtual bool offsetExists(const Php::Value &key) override
{
return true;
}
virtual Php::Value offsetGet(const Php::Value& key) override
{
return &((*this)[key]);
}
virtual void offsetSet(const Php::Value &key, const Php::Value &value) override
{
}
virtual void offsetUnset(const Php::Value &key) override
{
}
Jq& operator[] (const std::string& key)
{
const std::string offset = key;
if (is_number(offset)) {
if (_filters.tellp() > 0) {
_filters << '[' << offset << ']';
} else {
_filters << ".[" << offset << ']';
}
} else {
_filters << '.' << offset;
}
return *this;
}
private:
std::string _pathToJq;
std::string _pathToCacheFile;
std::ostringstream _filters;
std::ostringstream _chainedOutput;
bool is_number(const std::string& s)
{
return !s.empty() && std::find_if(s.begin(), s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
}
};
/**
* tell the compiler that the get_module is a pure C function
*/
extern "C" {
/**
* Function that is called by PHP right after the PHP process
* has started, and that returns an address of an internal PHP
* strucure with all the details and features of your extension
*
* #return void* a pointer to an address that is understood by PHP
*/
PHPCPP_EXPORT void *get_module()
{
// static(!) Php::Extension object that should stay in memory
// for the entire duration of the process (that's why it's static)
static Php::Extension extension("jq", "0.0.1");
// #todo add your own functions, classes, namespaces to the extension
Php::Class<Jq> jq("Jq");
jq.method("__construct", &Jq::__construct);
jq.method("__toString", &Jq::__toString);
jq.method("asString", &Jq::asString);
//jq.method("offsetGet", &Jq::offsetGet);
// add the class to the extension
extension.add(std::move(jq));
// return the extension
return extension;
}
}
and the php code to be executed:
<?php
$jqa = new Jq("pathJQ", "pathCache");
// This is fine !
echo $jqa['test'] . PHP_EOL;
// This is fine too !
echo $jqa . PHP_EOL;
// But This is not !
echo $jqa['coc']['players'][0]['name'] . PHP_EOL;
Thank's for your help !
are these functions written the same way as user functions? I mean with PHP code and with regular expressions and stuff like that?
For example:
filter_var($email, FILTER_VALIDATE_EMAIL);
vs.
http://www.totallyphp.co.uk/code/validate_an_email_address_using_regular_expressions.htm
PHP is written in C. The PHP functions are written in high quality C code then compiled to form the PHP langugae library
if you want to extend PHP (edit / write) own functions check this out: http://www.php.net/~wez/extending-php.pdf
EDIT:
here you go :
This is the original C code for the function:
/* {{{ proto mixed filter_var(mixed variable [, long filter [, mixed options]])
* Returns the filtered version of the vriable.
*/
PHP_FUNCTION(filter_var)
{
long filter = FILTER_DEFAULT;
zval **filter_args = NULL, *data;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z/|lZ", &data, &filter, &filter_args) == FAILURE) {
return;
}
if (!PHP_FILTER_ID_EXISTS(filter)) {
RETURN_FALSE;
}
MAKE_COPY_ZVAL(&data, return_value);
php_filter_call(&return_value, filter, filter_args, 1, FILTER_REQUIRE_SCALAR TSRMLS_CC);
}
/* }}} */
static void php_filter_call(zval **filtered, long filter, zval **filter_args, const int copy, long filter_flags TSRMLS_DC) /* {{{ */
{
zval *options = NULL;
zval **option;
char *charset = NULL;
if (filter_args && Z_TYPE_PP(filter_args) != IS_ARRAY) {
long lval;
PHP_FILTER_GET_LONG_OPT(filter_args, lval);
if (filter != -1) { /* handler for array apply */
/* filter_args is the filter_flags */
filter_flags = lval;
if (!(filter_flags & FILTER_REQUIRE_ARRAY || filter_flags & FILTER_FORCE_ARRAY)) {
filter_flags |= FILTER_REQUIRE_SCALAR;
}
} else {
filter = lval;
}
} else if (filter_args) {
if (zend_hash_find(HASH_OF(*filter_args), "filter", sizeof("filter"), (void **)&option) == SUCCESS) {
PHP_FILTER_GET_LONG_OPT(option, filter);
}
if (zend_hash_find(HASH_OF(*filter_args), "flags", sizeof("flags"), (void **)&option) == SUCCESS) {
PHP_FILTER_GET_LONG_OPT(option, filter_flags);
if (!(filter_flags & FILTER_REQUIRE_ARRAY || filter_flags & FILTER_FORCE_ARRAY)) {
filter_flags |= FILTER_REQUIRE_SCALAR;
}
}
if (zend_hash_find(HASH_OF(*filter_args), "options", sizeof("options"), (void **)&option) == SUCCESS) {
if (filter != FILTER_CALLBACK) {
if (Z_TYPE_PP(option) == IS_ARRAY) {
options = *option;
}
} else {
options = *option;
filter_flags = 0;
}
}
}
if (Z_TYPE_PP(filtered) == IS_ARRAY) {
if (filter_flags & FILTER_REQUIRE_SCALAR) {
if (copy) {
SEPARATE_ZVAL(filtered);
}
zval_dtor(*filtered);
if (filter_flags & FILTER_NULL_ON_FAILURE) {
ZVAL_NULL(*filtered);
} else {
ZVAL_FALSE(*filtered);
}
return;
}
php_zval_filter_recursive(filtered, filter, filter_flags, options, charset, copy TSRMLS_CC);
return;
}
if (filter_flags & FILTER_REQUIRE_ARRAY) {
if (copy) {
SEPARATE_ZVAL(filtered);
}
zval_dtor(*filtered);
if (filter_flags & FILTER_NULL_ON_FAILURE) {
ZVAL_NULL(*filtered);
} else {
ZVAL_FALSE(*filtered);
}
return;
}
php_zval_filter(filtered, filter, filter_flags, options, charset, copy TSRMLS_CC);
if (filter_flags & FILTER_FORCE_ARRAY) {
zval *tmp;
ALLOC_ZVAL(tmp);
MAKE_COPY_ZVAL(filtered, tmp);
zval_dtor(*filtered);
array_init(*filtered);
add_next_index_zval(*filtered, tmp);
}
}
AND HERE IS YOUR VALIDATE EMAIL ROUTINE:
-- this answers your question. Yes, it is done by regex internally.
void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
{
/*
* The regex below is based on a regex by Michael Rushton.
* However, it is not identical. I changed it to only consider routeable
* addresses as valid. Michael's regex considers a#b a valid address
* which conflicts with section 2.3.5 of RFC 5321 which states that:
*
* Only resolvable, fully-qualified domain names (FQDNs) are permitted
* when domain names are used in SMTP. In other words, names that can
* be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
* in Section 5) are permitted, as are CNAME RRs whose targets can be
* resolved, in turn, to MX or address RRs. Local nicknames or
* unqualified names MUST NOT be used.
*
* This regex does not handle comments and folding whitespace. While
* this is technically valid in an email address, these parts aren't
* actually part of the address itself.
*
* Michael's regex carries this copyright:
*
* Copyright © Michael Rushton 2009-10
* http://squiloople.com/
* Feel free to use and redistribute this code. But please keep this copyright notice.
*
*/
const char regexp[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}#)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*#(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
pcre *re = NULL;
pcre_extra *pcre_extra = NULL;
int preg_options = 0;
int ovector[150]; /* Needs to be a multiple of 3 */
int matches;
/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
if (Z_STRLEN_P(value) > 320) {
RETURN_VALIDATION_FAILED
}
re = pcre_get_compiled_regex((char *)regexp, &pcre_extra, &preg_options TSRMLS_CC);
if (!re) {
RETURN_VALIDATION_FAILED
}
matches = pcre_exec(re, NULL, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, ovector, 3);
/* 0 means that the vector is too small to hold all the captured substring offsets */
if (matches < 0) {
RETURN_VALIDATION_FAILED
}
}
/* }}} */
PHP functions are either :
Written in C -- and not in PHP
Or just wrappers to functions provided by other libraries (For instance, PHP's curl extension is just a wrapper arround the curl library).
If you are curious, you can take a look at the sources of PHP -- here's its SVN : http://svn.php.net/viewvc/
For instance, the filter_var() function should be defined somewhere in the sources of the filter extension.
Nope. PHP-internal functions are written in C, not with PHP code. Which looks quite unwieldy due to the many Zend-runtime macros and how parameters are transferred from PHP into C structures.
That particular function does use a regular expression. It also makes a nice example:
http://svn.php.net/repository/php/php-src/branches/PHP_5_3/ext/filter/logical_filters.c
Look for regexp[] somewhere in the middle.
I'm using SWIG to generate a PHP extension over GLib which uses callbacks. To allow using PHP user-space functions as callbacks, i'm using something like:
The Wrapper (registers a unique callback dispatcher to handle all signal emissions):
/* {{{ proto void my_signal_connect(resource $instance, string $signal, mixed $callback, mixed $additional_args) }}}*/
ZEND_NAMED_FUNCTION(_wrap_my_signal_connect) {
GstObject *instance = (GstObject *) 0 ;
gchar *signal = (gchar *) 0 ;
zval *zcallback = (zval *) 0 ;
zval *zargs = (zval *) 0 ;
zval **args[4];
gulong result;
struct the_callback_struct *cb;
GType itype;
guint signal_id;
GSignalQuery *signal_info;
char *callback_name;
/* parse arguments */
SWIG_ResetError();
if(ZEND_NUM_ARGS() != 4 || zend_get_parameters_array_ex(4, args) != SUCCESS) {
WRONG_PARAM_COUNT;
}
{
if(SWIG_ConvertPtr(*args[0], (void **) &instance, 0, 0) < 0) {
if((*args[0])->type==IS_NULL) instance = 0;
else SWIG_PHP_Error(E_ERROR, "Wrapper: Type error in argument 1. Expected SWIGTYPE_p_p_void");
}
}
if((*args[1])->type == IS_NULL) {
signal = (gchar *) 0;
} else {
convert_to_string_ex(args[1]);
signal = (gchar *) Z_STRVAL_PP(args[1]);
}
MAKE_STD_ZVAL(zcallback);
*zcallback = **args[2];
zval_copy_ctor(zcallback);
MAKE_STD_ZVAL(zargs);
*zargs = **args[3];
zval_copy_ctor(zargs);
/* query the signal system for in-depth info about the signal */
{
itype = G_TYPE_FROM_INSTANCE((GObject *) instance);
signal_id = g_signal_lookup((const gchar *) signal, itype);
if(signal_id == 0) {
SWIG_PHP_Error(E_ERROR, "The object does not emit the given signal");
}
signal_info = (GSignalQuery *) emalloc(sizeof(*signal_info));
g_signal_query(signal_id, signal_info);
}
/* get the function name or object + method name */
cb = (struct callback_struct *)emalloc(sizeof(*cb));
if(zcallback->type == IS_NULL) {
SWIG_PHP_Error(E_ERROR, "Wrapper: Type error in callback argument.");
}
if(zcallback->type == IS_ARRAY) {
HashTable *ht = Z_ARRVAL_P(zcallback);
int n = zend_hash_num_elements(ht);
if(n == 2) {
if(zend_hash_index_find(ht, 0, (void **)&cb->target) == SUCCESS && Z_TYPE_PP(cb->target) == IS_OBJECT) {
if(zend_hash_index_find(ht, 1, (void **)&tmp2) == SUCCESS && Z_TYPE_PP(tmp2) == IS_STRING) {
MAKE_STD_ZVAL(cb->fx);
*cb->fx = **tmp2;
zval_copy_ctor(cb->fx);
}
}
}
} else if(zcallback->type == IS_STRING) {
cb->target = NULL;
MAKE_STD_ZVAL(cb->fx);
*cb->fx = *zcallback;
zval_copy_ctor(cb->fx);
} else {
SWIG_PHP_Error(E_ERROR, "Wrapper: Type error in callback argument.");
}
/* Validate callback */
if(zend_is_callable(cb->fx, 0, &callback_name) == FAILURE) {
efree(callback_name);
SWIG_PHP_Error(E_ERROR, "Invalid callback");
}
/* copy the args into the structure */
MAKE_STD_ZVAL(cb->args);
*cb->args = *zargs;
zval_copy_ctor(cb->args);
cb->signal_id = signal_info->signal_id;
cb->signal_name = signal_info->signal_name;
cb->signal_flags = signal_info->signal_flags;
cb->itype = signal_info->itype;
cb->return_type = signal_info->return_type;
cb->n_params = signal_info->n_params;
cb->param_types = signal_info->param_types;
/* connect the signal handler */
result = (gulong)g_signal_connect(instance, signal, G_CALLBACK(my_signal_dispatcher), (gpointer) cb);
{
ZVAL_LONG(return_value,result);
}
return;
fail:
zend_error(SWIG_ErrorCode(),"%s",SWIG_ErrorMsg());
}
The callback struct:
struct callback_struct {
zval **target;
zval *fx;
zval *args;
GType itype; /* The type of object/instance which emitted the signal */
guint signal_id; /* The signal id (or 0 if the signal is unknown) */
const gchar *signal_name; /* The signal name */
GSignalFlags signal_flags; /* The signal flags (as declared when creating the signal) */
GType return_type; /* The return type for the callback */
guint n_params; /* The number of parameters of the callback */
const GType *param_types; /* The parameter types for callback arguments */
};
The signal dispatcher maps the signal handling to a PHP user space function:
static void my_signal_dispatcher(gpointer instance, ...) {
int i = 0, addr;
gpointer arg, ref;
zval retval;
zval *arglist[3];
struct callback_struct *cb;
/* add emitter instance to arg list */
SWIG_SetPointerZval(arglist[i++], (void *) instance, SWIGTYPE_p__GObject, 1);
va_list ap;
va_start(ap, instance);
/* fetch the variable list of arguments */
while((addr = va_arg(ap, int)) > 2) {
arg = (gpointer) addr;
if(G_IS_OBJECT(arg)) {
SWIG_SetPointerZval(arglist[i++], (void *) arg, SWIGTYPE_p__GObject, 1);
} else {
cb = (struct callback_struct *) arg;
MAKE_STD_ZVAL(arglist[i]);
*arglist[i] = *cb->args;
zval_copy_ctor(arglist[i]);
i++;
break;
}
}
va_end(ap);
if(cb->target == NULL) {
if(call_user_function(EG(function_table), NULL, cb->fx, &retval, i, arglist TSRMLS_CC) == SUCCESS) {
zval_dtor(&retval);
}
} else {
if(call_user_function(NULL, cb->target, cb->fx, &retval, i, arglist TSRMLS_CC) == SUCCESS) {
zval_dtor(&retval);
}
}
zval_ptr_dtor(cb->target);
zval_dtor(cb->fx);
zval_dtor(cb->args);
efree(cb);
}
I'am able to build the extension, and to connect a PHP signal handler (callback) to a given signal, for example:
<?php
//...
function cb() {
$s = array();
foreach(func_get_args() as $arg) {
$s[] = gettype($arg) == 'resource' ? 'Resource '.get_resource_type($arg) : (gettype($arg) == 'object' ? 'Object '.get_class($arg) : gettype($arg));
}
$s = implode(', ', $s);
echo " { PHP user-space: cb($s) } ";
return 1;
}
//...
myextension::my_signal_connect($instance, "child-added", array('one' => 1));
?>
so, when $instance emits the "child-added" signal i got the output from the cb() PHP function, and the following error:
{ PHP user-space: cb(Resource _p__GObject, Resource _p__GObject, array) }
*** glibc detected *** php: free(): invalid pointer: 0x095080c8 ***
======= Backtrace: =========
/lib/tls/i686/cmov/libc.so.6(+0x6b591)[0xb95591]
/lib/tls/i686/cmov/libc.so.6(+0x6cde8)[0xb96de8]
/lib/tls/i686/cmov/libc.so.6(cfree+0x6d)[0xb99ecd]
/usr/lib/php5/20090626+lfs/myextension.so(+0x2a477)[0x7510477]
php[0x831c024]
php(zend_hash_del_key_or_index+0x112)[0x831af82]
php(_zend_list_delete+0x8c)[0x831c2ec]
php(_zval_dtor_func+0xb2)[0x830b872]
php(_zval_ptr_dtor+0x4d)[0x82ff00d]
php[0x82ff0c9]
php(zend_call_function+0x764)[0x8301694]
php(call_user_function_ex+0x64)[0x83023b4]
php(call_user_function+0x6b)[0x830242b]
/usr/lib/php5/20090626+lfs/gstreamer.so(+0x93c2d)[0x7579c2d]
/usr/lib/libgobject-2.0.so.0(g_cclosure_marshal_VOID__OBJECT+0x88)[0xd262d8]
======= Memory map: ========
00110000-0026e000 r-xp 00000000 08:04 440863 /usr/lib/libdb-4.8.so
0026e000-00270000 r--p 0015d000 08:04 440863 /usr/lib/libdb-4.8.so
00270000-00271000 rw-p 0015f000 08:04 440863 /usr/lib/libdb-4.8.so
...
I've tried to ref the GObject instances using g_object_ref() when connecting the signal before adding to the arguments list, without success
Any help?
Is there any method to access to list of registered shutdown functions?
You can write an extension and look at BG(user_shutdown_function_names). Probably easier is to make a wrapper for register_shutdown_function that saves the shutdown functions to some array and call it instead.
(Untested)
#include "ext/standard/basic_functions.h"
//usual include suspects here
typedef struct _php_shutdown_function_entry {
zval **arguments;
int arg_count;
} php_shutdown_function_entry;
static void _shutdown_function_dtor(php_shutdown_function_entry *shutdown_function_entry) /* {{{ */
{
int i;
for (i = 0; i < shutdown_function_entry->arg_count; i++) {
zval_ptr_dtor(&shutdown_function_entry->arguments[i]);
}
efree(shutdown_function_entry->arguments);
}
static int _build_shutdown_array(php_shutdown_function_entry *entry, zval *arr TSRMLS_DC)
{
zval *inner;
zval *args;
int i;
array_init(inner);
array_init(args);
Z_ADDREF_P(entry->arguments[0]);
add_assoc_zval(inner, "callback", entry->arguments[0]);
for (i = 1; i < entry->arg_count; i++) {
Z_ADDREF_P(entry->arguments[i]);
add_next_index_zval(args, entry->arguments[i]);
}
add_assoc_zval(inner, "arguments", args);
add_next_index_zval(arr, inner);
}
PHP_FUNCTION(list_shutdown_functions)
{
if (zend_parse_parameters_none() == FAILURE)
return;
if (!BG(user_shutdown_function_names)) {
ALLOC_HASHTABLE(BG(user_shutdown_function_names));
zend_hash_init(BG(user_shutdown_function_names), 0, NULL,
(void (*)(void *)) _shutdown_function_dtor, 0);
}
array_init(return_value);
zend_hash_apply_with_argument(BG(user_shutdown_function_names),
(apply_func_arg_t) _build_shutdown_array, return_value TSRMLS_CC);
}
Other than keeping track yourself, no. The list of registered function names is not exposed to your PHP scripts. If you're open to extending PHP itself (this would be a simple task) then see Artefacto's answer.