Hi I'm having a problem trying to get threads to perform properly. The problem is that my php script will terminate unexpectedly at 1 of 2 specific points, during execution, and works the rest of the time.
I've created a test scenario that produces the same result:
This script creates 5 threads. Each thread adds up, each iteration, of a random number ranging from 10 to 20.
Secondly, Summary_Thread checks to see when all threads are completed before printing the summary... and there in lies the issue.
The script terminates during Summary_Thread::run and Stack_Item_Container_Stack::Compile_Summary, which is indirectly called at the end of Summary_Thread::run.
#!/usr/bin/php
<?php
ini_set('max_execution_time', 0);
ini_set('display_errors', 1);
error_reporting(E_ALL ^ E_NOTICE);
ignore_user_abort();
class Summary_Thread_Container
{
public $stack_item_container_stack;
private $summary_thread;
function __construct($thread_count, Stack_Item_Container_Stack $stack_item_container_stack)
{
$this->stack_item_container_stack = $stack_item_container_stack;
$this->summary_thread = new Summary_Thread($thread_count, $this);
$this->summary_thread->start();
}
public function Compile_Summary(){ $this->stack_item_container_stack->Compile_Summary(); }
}
class Summary_Thread extends Worker
{
private $summary_thread_container;
private $thread_count;
function __construct($thread_count, Summary_Thread_Container $summary_thread_container)
{
$this->summary_thread_container = $summary_thread_container;
$this->thread_count = $thread_count;
}
public function run()
{
$thread_count = 0;
echo "\n************************************** Stack Thread Count: {$this->thread_count} \n";
echo "*** START.\n";
if($this->thread_count == $thread_count)
echo "*** THREAD COUNTS MATCH.\n";
else
echo "*** THREAD COUNTS DO NOT MATCH.\n";
while($this->thread_count != $thread_count)
{
$temp_SIC = $this->summary_thread_container->stack_item_container_stack->first_stack_item_container;
$thread_count = 0;
while($temp_SIC)
{
$thread_count++;
echo "************************************** Thread Count: {$thread_count} \n";
$temp_SIC = $temp_SIC->Get_Next_Stack_Item_Container();
}
echo "*** END.\n";
if($this->thread_count == $thread_count)
echo "*** THREAD COUNTS MATCH.\n";
else
echo "*** THREAD COUNTS DO NOT MATCH.\n";
}
$this->Compile_Summary();
}
public function Compile_Summary(){ $this->summary_thread_container->Compile_Summary(); }
}
class Stack_Item_Container_Stack
{
public $first_stack_item_container;
private $thread_count;
private $summary_thread_container;
function __construct()
{
$this->first_stack_item_container = null;
$this->thread_count = 0;
for($i = 0; $i < 5; $i++)
{
echo " * Creating Stack Item Container: {$i}\n";
$this->thread_count++;
$this->Add_Stack_Item_Container(new Stack_Item_Container(rand(10, 20), $i, $this));
}
$this->summary_thread_container = new Summary_Thread_Container($this->thread_count, $this);
}
public function Add_Stack_Item_Container(Stack_Item_Container $stack_item_container)
{
echo " * Adding Stack Item Container *\n";
if($this->first_stack_item_container)
{
$temp_stack_item_container = $this->first_stack_item_container;
while($temp_stack_item_container->Get_Next_Stack_Item_Container())
$temp_stack_item_container = $temp_stack_item_container->Get_Next_Stack_Item_Container();
$temp_stack_item_container->Set_Next_Stack_Item_Container($stack_item_container);
}
else $this->first_stack_item_container = $stack_item_container;
}
public function Compile_Summary()
{
echo "\n";
echo "===============\n";
echo "=== Summary ===\n";
echo "===============\n";
echo "\n";
$temp_SIC = $this->first_stack_item_container;
while($temp_SIC)
{
echo " Thread ID {$temp_SIC->member_variables[0]} ({$temp_SIC->member_variables[4]}) has a Total of {$temp_SIC->member_variables[2]}";
echo "\n";
$temp_SIC = $temp_SIC->Get_Next_Stack_Item_Container();
}
echo "\n";
$this->Kill();
}
private function Kill()
{
while($this->first_stack_item_container)
{
$temp_SIC = $this->first_stack_item_container;
$this->first_stack_item_container = $this->first_stack_item_container->Get_Next_Stack_Item_Container();
$temp_SIC->Kill();
}
unset($this->summary_thread_container);
unset($this);
}
}
class Stack_Item_Container
{
private $stack_item_container_stack;
private $next_stack_item_container;
public $member_variables;
public $stack_item_thread;
function __construct($time, $index, Stack_Item_Container_Stack $stack_item_container_stack)
{
$this->stack_item_container_stack = $stack_item_container_stack;
$this->next_stack_item_container = null;
$this->member_variables = new Stackable();
$this->member_variables[] = -1;
$this->member_variables[] = $time;
$this->member_variables[] = 0;
$this->member_variables[] = false;
$this->member_variables[] = $index;
$this->stack_item_thread = new Stack_Item_Thread($this->member_variables, $this);
$this->stack_item_thread->start();
}
public function Get_Stack_Item_Container_Stack(){ return $this->stack_item_container_stack; }
public function Get_Next_Stack_Item_Container(){ return $this->next_stack_item_container; }
public function Set_Next_Stack_Item_Container(Stack_Item_Container $next_SIC){ $this->next_stack_item_container = $next_SIC; }
public function Kill()
{
$this->stack_item_thread->kill();
unset($this->member_variables);
unset($this);
}
}
class Stack_Item_Thread extends Worker
{
private $stack_item_container;
private $member_variables;
function __construct($member_variables, Stack_Item_Container $stack_item_container)
{
$this->member_variables = $member_variables;
$this->stack_item_container = $stack_item_container;
}
public function run()
{
$this->member_variables[0] = $this->getThreadId();
$total = 0;
echo "\n";
for($i = 0; $i < $this->member_variables[1]; $i++)
{
$total += $i;
$val = $i + 1;
echo "Thread ID ({$this->member_variables[4]}): {$this->member_variables[0]}:";
echo " Count: {$val} of {$this->member_variables[1]}";
echo "\n";
}
echo "\n";
$this->member_variables[2] = $total;
$this->member_variables[3] = true;
}
}
$stack_item_container_stack = new Stack_Item_Container_Stack();
OUTPUT 1 (When it does work):
************************************** Stack Thread Count: 5
*** START.
*** THREAD COUNTS DO NOT MATCH.
************************************** Thread Count: 1
************************************** Thread Count: 2
************************************** Thread Count: 3
************************************** Thread Count: 4
************************************** Thread Count: 5
*** END.
*** THREAD COUNTS MATCH.
===============
=== Summary ===
===============
Thread ID 139975400195840 (0) has a Total of 105
Thread ID 139975389705984 (1) has a Total of 153
Thread ID 139975378360064 (2) has a Total of 153
Thread ID 139975367014144 (3) has a Total of 55
Thread ID 139975130801920 (4) has a Total of 153
OUTPUT 2: (the first point that it will terminate):
************************************** Stack Thread Count: 5
*** START.
*** THREAD COUNTS DO NOT MATCH.
OUTPUT 3: (the second point that it will terminate)
************************************** Stack Thread Count: 5
*** START.
*** THREAD COUNTS DO NOT MATCH.
************************************** Thread Count: 1
************************************** Thread Count: 2
************************************** Thread Count: 3
************************************** Thread Count: 4
************************************** Thread Count: 5
*** END.
*** THREAD COUNTS MATCH.
Just to give you as much information as i can:
(might not be relevant but just in case)
Changes made to config:
File: /etc/sysctl.conf, Changes Made: net.ipv4.tcp_fin_timeout=10
File: php.ini, Changes Made: extension=php_pthreads.dll
Server:
Linux 2.6.32-504.8.1.el6.x86_64
PHP 5.5.13
Apache/2.2.15 (CentOS)
Max Requests Per Child: 4000 - Keep Alive: off - Max Per Connection: 100
Timeouts Connection: 60 - Keep-Alive: 15
Virtual Server No
Please help :) and questions would help ... anything i'm not seeing?
Thanks in advance
I cant test it myself, but if PHP exits normaly without errors its propably caused by the main thread exiting before any other thread could finish. Try Thread::join them (http://php.net/manual/en/thread.join.php) so the parent thread waits until they are finished.
Related
I am developing an application using Laravel, Eloquent as ORM, phpunit for unit testing. I want to save more than one entry in database, i have tried for loop. But for loop is saving only one entry, even the loop is running only one time. The loop is working fine for other code except the save chunk. Below are my codes:
Model Class:
class Post extends Model
{
protected $table = "posts";
protected $fillable = [
'id',
'user_id',
'title',
'description',
'total_needed',
'total_collected',
'total_expanse',
'start_date',
'end_date',
'active',
'updated_at',
'created_at',
];
}
Unit testing code
class RepoPost extends TestCase
{
public function testMain()
{
echo "\n >----------- Test Name : " . get_class($this);
echo "\n >----------- Test Main : ---------> \n";
$this->postSave();
} //test:main
public function postSave()
{
$postDummy = new Post();
// $postDummy->id ='';
$postDummy->user_id = 'Tst';
$postDummy->title = 'Post Save Repo Test.';
$postDummy->description = 'UnitTesting of URLs';
$postDummy->total_needed = '2000';
$postDummy->total_collected = '1000';
$postDummy->total_expanse = '500';
$postDummy->start_date = '22-09-2019';
$postDummy->end_date = '22-10-2019';
$postDummy->active = '1';
$postDummy->updated_at = '2019-09-22';
$postDummy->created_at = '2019-09-23';
//loop 1
for ($x = 0; $x < 10; $x++) {
echo '\n----PostSave----\n' . $x;
$postRepoSave = $this->getRepoPostImpl();
dd($postRepoSave->save2($postDummy));
}
//loop 2
for ($x = 0; $x <= 10; $x++) {
echo "\n The number is:" . $x;
}
}
public function getRepoPostImpl()
{
return new Post_Repo_Impl;
}
}
Loop 1 is saving only one data, the loop also running one time. Loop 2 is working well, printing 10 lines.
If i copy the same method multiple times in "testMain()", it is saving more than one entry, as many as times i have copied the method. Below code will save 3 entries in database.
public function testMain()
{
echo "\n >----------- Test Name : " . get_class($
$this->postSave();
$this->postSave();
$this->postSave();
} //test:main
Using a loop in testMain(), also provide same result, saving one entry.
public function testMain()
{
echo "\n >----------- Test Name : " . get_class($this);
for ($x = 0; $x < 10; $x++) {
$this->postSave();
}
} //test:main
Repository code:
class Post_Repo_Impl implements Post_Repo_I
{
public function save2(Post $post)
{
$saveStatus = true;
try {
$post->save();
} catch (Exception $e) {
$saveStatus = false;
error_log("Saveing Post Failed. : " . $e);
}
return $saveStatus;
}
}
Why i am not able to save multiple entries by for loop, is there any way to do so?
Update :
instantiated the "post object" inside the loop, doesn't solve the issue.
for ($x = 0; $x < 10; $x++) {
$postDummy = new Post();
// $postDummy->id ='';
$postDummy->user_id = 'Tst';
$postDummy->title = 'Post Save Repo Test.';
$postDummy->description = 'UnitTesting of URLs';
$postDummy->total_needed = '2000';
$postDummy->total_collected = '1000';
$postDummy->total_expanse = '500';
$postDummy->start_date = '22-09-2019';
$postDummy->end_date = '22-10-2019';
$postDummy->active = '1';
$postDummy->updated_at = '2019-09-22';
$postDummy->created_at = '2019-09-22';
echo '\n----PostSave----\n' . $x;
$postRepoSave = $this->getRepoPostImpl();
dd($postRepoSave->save2($postDummy));
}
Move the Model instantiation inside the for loop, and clean up your test, it's calling an external class implementing an interface for no purpose
<?php
namespace Tests\Feature;
use App\Post;
use Tests\TestCase;
class RepoPostTest extends TestCase
{
/** #test */
public function testMain()
{
echo "\n >----------- Test Name : " . get_class($this);
echo "\n >----------- Test Main : ---------> \n";
for ($x = 0; $x < 10; $x++) {
echo '\n----PostSave----\n' . $x;
$this->postSave();
}
$this->assertCount(10, Post::whereTime('created_at', now())->get());
}
public function postSave()
{
$postDummy = new Post();
$postDummy->user_id = 'Tst';
$postDummy->title = 'Post Save Repo Test.';
$postDummy->description = 'UnitTesting of URLs';
$postDummy->total_needed = '2000';
$postDummy->total_collected = '1000';
$postDummy->total_expanse = '500';
$postDummy->start_date = '22-09-2019';
$postDummy->end_date = '22-10-2019';
$postDummy->active = '1';
$postDummy->save();
}
}
Assuming a posts migration like this
Schema::create('posts', function (Blueprint $table) {
$table->bigIncrements('id');
$table->string('user_id');
$table->string('title');
$table->string('description');
$table->string('total_needed');
$table->string('total_collected');
$table->string('total_expanse');
$table->string('start_date');
$table->string('end_date');
$table->string('active');
$table->timestamps();
});
And you don't need the $table property on your model because it has the plural form so it's automatically resolved, and you don't need the $fillable property because you're not doing mass assignment
Results
PHPUnit 8.3.4 by Sebastian Bergmann and contributors.
... 3 / 3 (100%)
>----------- Test Name : Tests\Feature\RepoPostTest
>----------- Test Main : --------->
\n----PostSave----\n0\n----PostSave----\n1\n----PostSave----\n2\n----PostSave----\n3\n----PostSave----\n4\n----PostSave----\n5\n----PostSave----\n6\n----PostSave----\n7\n----PostSave----\n8\n----PostSave----\n9
Time: 263 ms, Memory: 20.00 MB
OK (1 tests, 1 assertions)
(First problem is below)
Finally made it like i wanted ! What a headache ^^
Not using pool was the answer ;)
So, here is my code, output below !
As you can see on the output, i got my jobs distribute evenly between workers 1 & 2 while worker 0 is running a long job !
<?php
class ChildThread extends Threaded {
public $workerId;
protected $complete;
public function __construct($workerId){
// init some properties
$this->workerId = $workerId;
$this->complete = false;
}
public function run(){
// do some work
echo " STARTING " . $this->workerId . "\r\n";
if($this->workerId == 0){
sleep(10);
} else {
sleep(1);
}
echo " ENDING " . $this->workerId . "\r\n";
$this->complete = true;
}
public function isComplete() {
return $this->complete;
}
}
$WORKER_COUNT = 3;
$workers = array();
for ($i=0; $i<$WORKER_COUNT; $i++) {
$worker = new Worker();
$worker->start();
$workers[] = $worker;
}
$tasks = array();
//Create 9 jobs
for ($i=0; $i < 9; $i++) {
$tasks[] = new ChildThread($i);
}
$cptTasks = 0;
//References for running jobs
$taskss = array();
while(count($tasks) > 0){
foreach ($workers as $key => $worker) {
if($worker->isShutdown()){
echo "Shutdowned worker ".$key.", restarting it !"."\r\n";
$worker->start();
}
//If worker is free of charge
if($worker->isWorking() === false ){
//Check if task not null
if(isset($tasks[$cptTasks])){
echo ("Stacking job ".$cptTasks." on worker ".$key."\r\n");
$worker->stack($tasks[$cptTasks]);
$taskss[] = $tasks[$cptTasks];
//Clear job from jobs list
unset($tasks[$cptTasks]);
$cptTasks++;
}
}
}
usleep(2000);
}
//Clear workers
foreach ($workers as $key => $worker) {
echo "Shutdown worker : " .$key."\r\n";
$worker->shutdown();
}
?>
Output :
Stacking job 0 on worker 0
Stacking job 1 on worker 1
Stacking job 2 on worker 2
STARTING 1
STARTING 0
STARTING 2
ENDING 1
ENDING 2
Stacking job 3 on worker 1
Stacking job 4 on worker 2
STARTING 3
STARTING 4
ENDING 3
ENDING 4
Stacking job 5 on worker 1
Stacking job 6 on worker 2
STARTING 5
STARTING 6
ENDING 5
ENDING 6
Stacking job 7 on worker 1
Stacking job 8 on worker 2
STARTING 7
STARTING 8
Shutdown worker : 0
ENDING 8
ENDING 7
ENDING 0
Shutdown worker : 1
Shutdown worker : 2
I'm working on a pthread system, and i'm trying to implement kind of a pool system.
Currently, i'm creating a pool of 3 workers, then send my jobs with pool->submit().
Then, pool->collect()
And pool->shutdown()
But, i'm disapointed with the result.
Jobs seems to be distributed to all workers evenly
Worker 1 : jobs 1, 4, 7
Worker 2 : jobs 2, 5, 8
Worker 3 : jobs 3, 6, 9
Then, let's say my job 1 is pretty long, and all other are not dat long, my jobs will be finished like so :
2, 5, 3, 6, 8, 9, 1, 4, 7
But, what i'd like to achieve should be more like : 2, 3, 5, 4, 6, 8, 7, 9, 1
Like distributing jobs to the nearest not working worker, so my heavy job is running on worker 1, and all others jobs goes to workers 2 and 3
Is that's something doable ?
I'm i missing something here ?
Code exemple :
<?php
class ChildThread extends Threaded {
public $jobId;
protected $complete;
public function __construct($jobId){
// init some properties
$this->jobId = $jobId;
$this->complete = false;
}
public function run(){
echo "STARTING " . $this->jobId . "\r\n";
//Job ID 1 == heavy waiting
if($this->jobId == 1){
sleep(10);
} else {
sleep(1);
}
echo "ENDING " . $this->jobId . "\r\n";
$this->complete = true;
}
public function isComplete() {
return $this->complete;
}
}
$pool = new Pool(3);
$tasks = array();
// Add tasks to pool queue
for ($i=1; $i<=9; $i++) {
$task = new ChildThread($i);
$pool->submit($task);
$tasks[] = $task;
}
$pool->collect(function($work){
return $work->isComplete();
});
$pool->shutdown();
?>
Result :
STARTING 1
STARTING 2
STARTING 3
ENDING 2
STARTING 5
ENDING 3
STARTING 6
ENDING 5
STARTING 8
ENDING 6
STARTING 9
ENDING 8
ENDING 9
ENDING 1
STARTING 4
ENDING 4
STARTING 7
ENDING 7
Update, 2013-09-12:
I've dug a bit deeper into systemd and it's journal, and, I've stumbled upon this, that states:
systemd-journald will forward all received log messages to the AF_UNIX SOCK_DGRAM socket /run/systemd/journal/syslog, if it exists, which may be used by Unix syslog daemons to process the data further.
As per manpage, I did set up my environment to also have syslog underneath, I've tweaked my code accordingly:
define('NL', "\n\r");
$log = function ()
{
if (func_num_args() >= 1)
{
$message = call_user_func_array('sprintf', func_get_args());
echo '[' . date('r') . '] ' . $message . NL;
}
};
$syslog = '/var/run/systemd/journal/syslog';
$sock = socket_create(AF_UNIX, SOCK_DGRAM, 0);
$connection = socket_connect($sock, $syslog);
if (!$connection)
{
$log('Couldn\'t connect to ' . $syslog);
}
else
{
$log('Connected to ' . $syslog);
$readables = array($sock);
socket_set_nonblock($sock);
while (true)
{
$read = $readables;
$write = $readables;
$except = $readables;
$select = socket_select($read, $write, $except, 0);
$log('Changes: %d.', $select);
$log('-------');
$log('Read: %d.', count($read));
$log('Write: %d.', count($write));
$log('Except: %d.', count($except));
if ($select > 0)
{
if ($read)
{
foreach ($read as $readable)
{
$data = socket_read($readable, 4096, PHP_BINARY_READ);
if ($data === false)
{
$log(socket_last_error() . ': ' . socket_strerror(socket_last_error()));
}
else if (!empty($data))
{
$log($data);
}
else
{
$log('Read empty.');
}
}
}
if ($write)
{
foreach ($write as $writable)
{
$data = socket_read($writable, 4096, PHP_BINARY_READ);
if ($data === false)
{
$log(socket_last_error() . ': ' . socket_strerror(socket_last_error()));
}
else if (!empty($data))
{
$log($data);
}
else
{
$log('Write empty.');
}
}
}
}
}
}
This apparently, only sees (selects) changes on write sockets. Well, might be that something here is wrong so I attempted to read from them, no luck (nor there should be):
[Thu, 12 Sep 2013 14:45:15 +0300] Changes: 1.
[Thu, 12 Sep 2013 14:45:15 +0300] -------
[Thu, 12 Sep 2013 14:45:15 +0300] Read: 0.
[Thu, 12 Sep 2013 14:45:15 +0300] Write: 1.
[Thu, 12 Sep 2013 14:45:15 +0300] Except: 0.
[Thu, 12 Sep 2013 14:45:15 +0300] 11: Resource temporarily unavailable
Now, this drives me nuts a little. syslog documentation says this should be possible. What is wrong with the code?
Original:
I had a working prototype, by simply:
while(true)
{
exec('journalctl -r -n 1 | more', $result, $exit);
// do stuff
}
But this feels wrong, and consumes too much system resources, then I found out about journald having sockets.
I have attempted to connect and read from:
AF_UNIX, SOCK_DGRAM : /var/run/systemd/journal/socket
AF_UNIX, SOCK_STREAM : /var/run/systemd/journal/stdout
the given sockets.
With /var/run/systemd/journal/socket, socket_select sees 0 changes. With /var/run/systemd/journal/stdout I always (every loop) get 1 change, with 0 byte data.
This is my "reader":
<?php
define('NL', "\n\r");
$journal = '/var/run/systemd/journal/socket';
$jSTDOUT = '/var/run/systemd/journal/stdout';
$journal = $jSTDOUT;
$sock = socket_create(AF_UNIX, SOCK_STREAM, 0);
$connection = #socket_connect($sock, $journal);
$log = function ($message)
{
echo '[' . date('r') . '] ' . $message . NL;
};
if (!$connection)
{
$log('Couldn\'t connect to ' . $journal);
}
else
{
$log('Connected to ' . $journal);
$readables = array($sock);
while (true)
{
$read = $readables;
if (socket_select($read, $write = NULL, $except = NULL, 0) < 1)
{
continue;
}
foreach ($read as $read_socket)
{
$data = #socket_read($read_socket, 1024, PHP_BINARY_READ);
if ($data === false)
{
$log('Couldn\'t read.');
socket_shutdown($read_socket, 2);
socket_close($read_socket);
$log('Server terminated.');
break 2;
}
$data = trim($data);
if (!empty($data))
{
$log($data);
}
}
}
$log('Exiting.');
}
Having no data in read socket(s), I assume I'm doing something wrong.
Question, idea:
My goal is to read the messages and upon some of them, execute a callback.
Could anyone point me into the right direction of how to programmatically read journal messages?
The sockets under /run/systemd/journal/ won't work for this – …/socket and …/stdout are actually write-only (i.e. used for feeding data into the journal) while the …/syslog socket is not supposed to be used by anything else than a real syslogd, not to mention journald does not send any metadata over it. (In fact, the …/syslog socket doesn't even exist by default – syslogd must actually listen on it, and journald connects to it.)
The official method is to read directly from the journal files, and use inotify to watch for changes (which is the same thing journalctl --follow and even tail -f /var/log/syslog use in place of polling). In a C program, you can use the functions from libsystemd-journal, which will do the necessary parsing and even filtering for you.
In other languages, you have three choices: call the C library; parse the journal files yourself (the format is documented); or fork journalctl --follow which can be told to output JSON-formatted entries (or the more verbose journal export format). The third option actually works very well, since it only forks a single process for the entire stream; I have written a PHP wrapper for it (see below).
Recent systemd versions (v193) also come with systemd-journal-gatewayd, which is essentially a HTTP-based version of journalctl; that is, you can get a JSON or journal-export stream at http://localhost:19531/entries. (Both gatewayd and journalctl even support server-sent events for accessing the stream from HTML 5 webpages.) However, due to obvious security issues, gatewayd is disabled by default.
Attachment: PHP wrapper for journalctl --follow
<?php
/* © 2013 Mantas Mikulėnas <grawity#gmail.com>
* Released under the MIT Expat License <https://opensource.org/licenses/MIT>
*/
/* Iterator extends Traversable {
void rewind()
boolean valid()
void next()
mixed current()
scalar key()
}
calls: rewind, valid==true, current, key
next, valid==true, current, key
next, valid==false
*/
class Journal implements Iterator {
private $filter;
private $startpos;
private $proc;
private $stdout;
private $entry;
static function _join_argv($argv) {
return implode(" ",
array_map(function($a) {
return strlen($a) ? escapeshellarg($a) : "''";
}, $argv));
}
function __construct($filter=[], $cursor=null) {
$this->filter = $filter;
$this->startpos = $cursor;
}
function _close_journal() {
if ($this->stdout) {
fclose($this->stdout);
$this->stdout = null;
}
if ($this->proc) {
proc_close($this->proc);
$this->proc = null;
}
$this->entry = null;
}
function _open_journal($filter=[], $cursor=null) {
if ($this->proc)
$this->_close_journal();
$this->filter = $filter;
$this->startpos = $cursor;
$cmd = ["journalctl", "-f", "-o", "json"];
if ($cursor) {
$cmd[] = "-c";
$cmd[] = $cursor;
}
$cmd = array_merge($cmd, $filter);
$cmd = self::_join_argv($cmd);
$fdspec = [
0 => ["file", "/dev/null", "r"],
1 => ["pipe", "w"],
2 => ["file", "/dev/null", "w"],
];
$this->proc = proc_open($cmd, $fdspec, $fds);
if (!$this->proc)
return false;
$this->stdout = $fds[1];
}
function seek($cursor) {
$this->_open_journal($this->filter, $cursor);
}
function rewind() {
$this->seek($this->startpos);
}
function next() {
$line = fgets($this->stdout);
if ($line === false)
$this->entry = false;
else
$this->entry = json_decode($line);
}
function valid() {
return ($this->entry !== false);
/* null is valid, it just means next() hasn't been called yet */
}
function current() {
if (!$this->entry)
$this->next();
return $this->entry;
}
function key() {
if (!$this->entry)
$this->next();
return $this->entry->__CURSOR;
}
}
$a = new Journal();
foreach ($a as $cursor => $item) {
echo "================\n";
var_dump($cursor);
//print_r($item);
if ($item)
var_dump($item->MESSAGE);
}
I'm trying to run a long-running script that downloads and parses news items for a page. There are about 260+ html pages my script downloads, and parses news items from each page.
It's a long-running script.
If I set the amount of pages it has to parse to something low (meaning it'll take less to execute), the script runs fine:
// Since the University blog page has 262 pages, we'll iterate through that.
// Only 21 pages.
for ($i = 2; $i <= 21; $i++) {
$url = "http://www.uvm.cl/noticias_mas.shtml?AA_SL_Session=34499aef1fc7a296fb666dcc7b9d8d05&scrl=1&scr_scr_Go=" . $i;
$page = file_get_html($url);
parse_page_for_news($page, $parsedNews);
}
If I increase that page count to 40 or more, the script no longer returns anything at all. On Google Chrome this error message appears:
// Since the University blog page has 262 pages, we'll iterate through that.
// 41 pages this time. Longer running time!
for ($i = 2; $i <= 41; $i++) {
$url = "http://www.uvm.cl/noticias_mas.shtml?AA_SL_Session=34499aef1fc7a296fb666dcc7b9d8d05&scrl=1&scr_scr_Go=" . $i;
$page = file_get_html($url);
parse_page_for_news($page, $parsedNews);
}
No data received
Unable to load the webpage because the server sent no
data.
Here are some suggestions: Reload this webpage later.
Error 324
(net::ERR_EMPTY_RESPONSE): The server closed the connection without
sending any data.
Here are the modifications I made to php.ini based on some searching, but still no working fix.
;;;;;;;;;;;;;;;;;;;
; Resource Limits ;
;;;;;;;;;;;;;;;;;;;
max_execution_time = 0 ; Maximum execution time of each script, in seconds
max_input_time = 60 ; Maximum amount of time each script may spend parsing request data
memory_limit = -1 ; Maximum amount of memory a script may consume (8MB)
Another oddity, when I set the script to run fast (with few iterations), I can actually see the request in the access_log file.
127.0.0.1 - - [27/Jul/2012:15:50:19 -0400] "GET /scrapernoticias/scraper.php HTTP/1.1" 200 509
127.0.0.1 - - [27/Jul/2012:15:50:23 -0400] "GET /scrapernoticias/scraper.php HTTP/1.1" 200 509
127.0.0.1 - - [27/Jul/2012:15:58:02 -0400] "GET /scrapernoticias/scraper.php HTTP/1.1" 200 500
When I set things to high (long running iterations), I cannot see this new request as if it never even reached the server.
Here is the script in it's entirety:
<h1>Scraper Noticias</h1>
<?php
include('simple_html_dom.php');
include('rb.php');
// Setup RedBean to work with a database.
R::setup('mysql:host=localhost;dbname=noticias','root','');
set_time_limit(0);
class News {
var $image;
var $fechanoticia;
var $title;
var $description;
var $sourceurl;
function get_image( ) {
return $this->image;
}
function set_image ($new_image) {
$this->image = $new_image;
}
function get_fechanoticia( ) {
return $this->fechanoticia;
}
function set_fechanoticia ($new_fechanoticia) {
$this->fechanoticia = $new_fechanoticia;
}
function get_title( ) {
return $this->title;
}
function set_title ($new_title) {
$this->title = $new_title;
}
function get_description( ) {
return $this->description;
}
function set_description ($new_description) {
$this->description = $new_description;
}
function get_sourceurl( ) {
return $this->sourceurl;
}
function set_sourceurl ($new_sourceurl) {
$this->sourceurl = $new_sourceurl;
}
}
// Declare variable to hold all parsed news items.
$parsedNews = array();
// Grab page number 1, and parse that first.
$initialPage = file_get_html('http://www.uvm.cl/noticias_mas.shtml');
parse_page_for_news($initialPage, $parsedNews);
// Since the University blog page has 262 pages, we'll iterate through that.
for ($i = 2; $i <= 3; $i++) {
$url = "http://www.uvm.cl/noticias_mas.shtml?AA_SL_Session=34499aef1fc7a296fb666dcc7b9d8d05&scrl=1&scr_scr_Go=" . $i;
$page = file_get_html($url);
parse_page_for_news($page, $parsedNews);
}
echo "<h1>Noticias encontradas:" . count($parsedNews) . "</h1>";
//echo print_r($parsedNews[count($parsedNews) - 1]);
// Save each parsed news to the database.
foreach($parsedNews as &$tmpNews) {
$noticia = R::dispense('noticia');
$noticia->imagen = $tmpNews->get_image();
$noticia->fecha = $tmpNews->get_fechanoticia();
$noticia->titulo = $tmpNews->get_title();
$noticia->url = $tmpNews->get_sourceurl();
$noticia->descripcion = $tmpNews->get_description();
$id = R::store($noticia);
}
// Disconnect from the database.
R::close();
// Function receives an HTML Dom object, and the library works against that single HTML object.
function parse_page_for_news ($page, &$parsedNews) {
foreach($page->find('#cont2 p') as $element) {
$newItem = new News;
// Parse the news item's thumbnail image.
foreach ($element->find('img') as $image) {
$newItem->set_image($image->src);
//echo $newItem->get_image() . "<br />";
}
// Parse the news item's post date.
foreach ($element->find('span.fechanoticia') as $fecha) {
$newItem->set_fechanoticia($fecha->innertext);
//echo $newItem->get_fechanoticia() . "<br />";
}
// Parse the news item's title.
foreach ($element->find('a') as $title) {
$newItem->set_title($title->innertext);
//echo $newItem->get_title() . "<br />";
}
// Parse the news item's source URL link.
foreach ($element->find('a') as $sourceurl) {
$newItem->set_sourceurl("http://www.uvm.cl/" . $sourceurl->href);
}
// Parse the news items' description text.
foreach ($element->find('a') as $link) {
$link->outertext = '';
}
foreach ($element->find('span') as $link) {
$link->outertext = '';
}
foreach ($element->find('img') as $link) {
$link->outertext = '';
}
$newItem->set_description($element->innertext);
// Add the newly formed NewsItem to the $parsedNews object.
$parsedNews[] = $newItem;
// For debugging purposes, it'll print each parsed News Item.
//print_r($newItem);
//echo "<br /><br /><br />";
}
}
?>
I have the problem that in a PHP application Gearman jobs sometimes are passed to more than one worker. I could reduce a code to reproduce it into one file. Now I am not sure if this is a bug in Gearman or a bug in the pecl library or maybe in my code.
Here is the code to reproduce the error:
#!/usr/bin/php
<?php
// Try 'standard', 'exception' or 'exception-sleep'.
$sWorker = 'exception';
// Detect run mode "client" or "worker".
if (!isset($argv[1]))
$sMode = 'client';
else
$sMode = 'worker-' . $sWorker;
$sLogFilePath = __DIR__ . '/log.txt';
switch ($sMode) {
case 'client':
// Remove all queued test jobs and quit if there are test workers running.
prepare();
// Init the greaman client.
$Client= new GearmanClient;
$Client->addServer();
// Empty the log file.
file_put_contents($sLogFilePath, '');
// Start some worker processes.
$aPids = array();
for ($i = 0; $i < 100; $i++)
$aPids[] = exec('php ' . __FILE__ . ' worker > /dev/null 2>&1 & echo $!');
// Start some jobs. Also try doHigh(), doBackground() and
// doBackgroundHigh();
for ($i = 0; $i < 50; $i++)
$Client->doNormal('test', $i);
// Wait a second (when running jobs in background).
// sleep(1);
// Prepare the log file entries.
$aJobs = array();
$aLines = file($sLogFilePath);
foreach ($aLines as $sLine) {
list($sTime, $sPid, $sHandle, $sWorkload) = $aAttributes = explode("\t", $sLine);
$sWorkload = trim($sWorkload);
if (!isset($aJobs[$sWorkload]))
$aJobs[$sWorkload] = array();
$aJobs[$sWorkload][] = $aAttributes;
}
// Remove all jobs that have been passed to only one worker as expected.
foreach ($aJobs as $sWorkload => $aJob) {
if (count($aJob) === 1)
unset($aJobs[$sWorkload]);
}
echo "\n\n";
if (empty($aJobs))
echo "No job has been passed to more than one worker.";
else {
echo "Those jobs has been passed more than one times to a worker:\n";
foreach ($aJobs as $sWorload => $aJob) {
echo "\nJob #" . $sWorload . ":\n";
foreach ($aJob as $aAttributes)
echo " $aAttributes[2] (Worker PID: $aAttributes[1])\n";
}
}
echo "\n\n";
// Kill all started workers.
foreach ($aPids as $sPid)
exec('kill ' . $sPid . ' > /dev/null 2>&1');
break;
case 'worker-standard':
$Worker = new GearmanWorker;
$Worker->addServer();
$Worker->addFunction('test', 'logJob');
$bShutdown = false;
while ($Worker->work())
if ($bShutdown)
continue;
break;
case 'worker-exception':
try {
$Worker = new GearmanWorker;
$Worker->addServer();
$Worker->addFunction('test', 'logJob');
$bShutdown = false;
while ($Worker->work())
if ($bShutdown)
throw new \Exception;
} catch (\Exception $E) {
}
break;
case 'worker-exception-sleep':
try {
$Worker = new GearmanWorker;
$Worker->addServer();
$Worker->addFunction('test', 'logJob');
$bShutdown = false;
while ($Worker->work())
{
if ($bShutdown) {
sleep(1);
throw new \Exception;
}
}
} catch (\Exception $E) {
}
break;
}
function logJob(\GearmanJob $Job)
{
global $bShutdown, $sLogFilePath;
$sLine = microtime(true) . "\t" . getmypid() . "\t" . $Job->handle() . "\t" . $Job->workload() . "\n";
file_put_contents($sLogFilePath, $sLine, FILE_APPEND);
$bShutdown = true;
}
function prepare()
{
$rGearman = fsockopen('127.0.0.1', '4730', $iErrno, $sErrstr, 3);
$aBuffer = array();
fputs ($rGearman, 'STATUS' . PHP_EOL);
stream_set_timeout($rGearman, 1);
while (!feof($rGearman))
if ('.' . PHP_EOL !== $sLine = fgets($rGearman, 128))
$aBuffer[] = $sLine;
else
break;
fclose($rGearman);
$bJobsInQueue = false;
$bWorkersRunning = false;
foreach ($aBuffer as $sFunctionLine) {
list($sFunctionName, $iQueuedJobs, $iRunningJobs, $iWorkers) = explode("\t", $sFunctionLine);
if ('test' === $sFunctionName) {
if (0 != $iQueuedJobs)
$bJobsInQueue = true;
if (0 != $iWorkers)
$bWorkersRunning = true;;
}
}
// Exit if there are workers running.
if ($bWorkersRunning)
die("There are some Gearman workers running that have registered a 'test' function. Please stop these workers and run again.\n\n");
// If there are test jobs in the queue start a worker that eat up the jobs.
if ($bJobsInQueue) {
$sPid = exec('gearman -n -w -f test > /dev/null 2>&1 & echo $!');
sleep(1);
exec ("kill $sPid > /dev/null 2>&1");
// Repeat this method to make sure all jobs are removed.
prepare();
}
}
When you run this code on the command line it should output "No job
has been passed to more than one worker." but insted it alway outputs a list of some jobs that have been passed to more than one worker. The error doesn't appear if you set $sWorker = 'standard'; or 'exception-sleep'.
It would help me a lot if you could run the code and tell me if you are able to reproduce the error of if I have a bug in the code.
Had exactly the same issue with Gearman 0.24, PECL lib 1.0.2. Was able to reproduce the error with your script every time.
An older version of Gearman (0.14 I think) used to work fine.
Upgrading Gearman to 0.33 fixed the issue.