Skip to content

Commit 89ea07e

Browse files
authored
Merge pull request #24 from Automattic/add/row-iterator
Add a row iterator
2 parents 75986d9 + fceb6b5 commit 89ea07e

File tree

9 files changed

+201
-17
lines changed

9 files changed

+201
-17
lines changed

README.md

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,95 @@ Currently the following engines are supported:
1111
Usage Example
1212
-------------
1313

14+
The recommended way to use this library is to get results from Hive/Impala via the memory efficient iterator which will keep the connection open and scroll through the results a couple rows at a time. This allows the processing of large result datasets one record at a time minimizing PHP's memory consumption.
15+
1416
```php
1517
// Load this lib
1618
require_once __DIR__ . '/ThriftSQL.phar';
1719

18-
// Try out a Hive query
20+
// Try out a Hive query via iterator object
21+
$hive = new \ThriftSQL\Hive( 'hive.host.local', 10000, 'user', 'pass' );
22+
$hiveTables = $hive
23+
->connect()
24+
->getIterator( 'SHOW TABLES' );
25+
26+
// Try out an Impala query via iterator object
27+
$impala = new \ThriftSQL\Impala( 'impala.host.local' );
28+
$impalaTables = $impala
29+
->connect()
30+
->getIterator( 'SHOW TABLES' );
31+
32+
// Execute the Hive query and iterate over the result set
33+
foreach( $hiveTables as $rowNum => $row ) {
34+
print_r( $row );
35+
}
36+
37+
// Execute the Impala query and iterate over the result set
38+
foreach( $impalaTables as $rowNum => $row ) {
39+
print_r( $row );
40+
}
41+
42+
// Don't forget to close socket connection once you're done with it
43+
$hive->disconnect();
44+
$impala->disconnect();
45+
```
46+
47+
The downside to using the memory efficient iterator is that we can only iterate over the result set once. If a second `foreach` is called on the same iterator object an exception is thrown by default to prevent the same query from executing on Hive/Impala again as results are not cached within the PHP client. This can be turned off however be aware iterating over the same iterator object may produce different results as the query is rerun.
48+
49+
Consider the following example:
50+
51+
```php
52+
// Connect to hive and get a rerun-able iterator
53+
$hive = new \ThriftSQL\Hive( 'hive.host.local', 10000, 'user', 'pass' );
54+
$results = $hive
55+
->connect()
56+
->getIterator( 'SELECT UNIX_TIMESTAMP()' )
57+
->allowRerun( true );
58+
59+
// Execute the Hive query and get results
60+
foreach( $results as $rowNum => $row ) {
61+
echo "Hive server time is: {$v[0]}\n";
62+
}
63+
64+
sleep(3);
65+
66+
// Execute the Hive query a second time
67+
foreach( $results as $rowNum => $row ) {
68+
echo "Hive server time is: {$v[0]}\n";
69+
}
70+
```
71+
72+
Which will output something like:
73+
74+
```
75+
Hive server time is: 1517875200
76+
Hive server time is: 1517875203
77+
```
78+
79+
If the result set is small and it would be easier to load all of it into PHP memory the `queryAndFetchAll()` method can be used which will return a plain numeric multidimensional array of the full result set.
80+
81+
```php
82+
// Try out a small Hive query
1983
$hive = new \ThriftSQL\Hive( 'hive.host.local', 10000, 'user', 'pass' );
2084
$hiveTables = $hive
2185
->connect()
2286
->queryAndFetchAll( 'SHOW TABLES' );
87+
$hive->disconnect();
88+
89+
// Print out the cached results
2390
print_r( $hiveTables );
91+
```
2492

25-
// Try out an Impala query
93+
```php
94+
// Try out a small Impala query
2695
$impala = new \ThriftSQL\Impala( 'impala.host.local' );
2796
$impalaTables = $impala
2897
->connect()
2998
->queryAndFetchAll( 'SHOW TABLES' );
30-
print_r( $impalaTables );
31-
32-
// Don't forget to clear the client and close socket.
33-
$hive->disconnect();
3499
$impala->disconnect();
100+
101+
// Print out the cached results
102+
print_r( $impalaTables );
35103
```
36104

37105
Developing & Contributing

ThriftSQL.phar

2.93 KB
Binary file not shown.

src/ThriftSQL.php

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
<?php
22

3-
interface ThriftSQL {
3+
abstract class ThriftSQL {
44
/**
55
* @return self
66
* @throws \ThriftSQL\Exception
77
*/
8-
public function connect();
8+
abstract public function connect();
99

1010
/**
1111
* The simplest use case; takes a query string executes it synchronously and
@@ -15,7 +15,7 @@ public function connect();
1515
* @return array
1616
* @throws \ThriftSQL\Exception
1717
*/
18-
public function queryAndFetchAll( $queryStr );
18+
abstract public function queryAndFetchAll( $queryStr );
1919

2020
/**
2121
* Sends a query string for execution on the server and returns a
@@ -25,10 +25,21 @@ public function queryAndFetchAll( $queryStr );
2525
* @return \ThriftSQLQuery
2626
* @throws \ThriftSQL\Exception
2727
*/
28-
public function query( $queryStr );
28+
abstract public function query( $queryStr );
2929

3030
/**
3131
* @return null
3232
*/
33-
public function disconnect();
33+
abstract public function disconnect();
34+
35+
/**
36+
* Gets a memory efficient iterator that you can use in a foreach loop.
37+
*
38+
* @param string $queryStr
39+
* @return \ThriftSQL\Utils\Iterator
40+
* @throws \ThriftSQL\Exception
41+
*/
42+
public function getIterator( $queryStr ) {
43+
return new \ThriftSQL\Utils\Iterator( $this, $queryStr );
44+
}
3445
}

src/ThriftSQL/Hive.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
namespace ThriftSQL;
44

5-
class Hive implements \ThriftSQL {
5+
class Hive extends \ThriftSQL {
66
private $_host;
77
private $_port;
88
private $_username;
@@ -12,7 +12,7 @@ class Hive implements \ThriftSQL {
1212
private $_client;
1313
private $_sessionHandle;
1414
protected $_sasl = true;
15-
15+
1616
public function __construct( $host, $port = 10000, $username = null, $password = null, $timeout = null ) {
1717
$this->_host = $host;
1818
$this->_port = $port;

src/ThriftSQL/HiveQuery.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,4 @@ private function _isOperationRunning( $state ) {
121121
)
122122
);
123123
}
124-
}
124+
}

src/ThriftSQL/Impala.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?php
22
namespace ThriftSQL;
33

4-
class Impala implements \ThriftSQL {
4+
class Impala extends \ThriftSQL {
55

66
private $_host;
77
private $_port;

src/ThriftSQL/ImpalaQuery.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public function __construct( $queryStr, $client ) {
2020

2121
public function wait() {
2222
$sleeper = new \ThriftSQL\Utils\Sleeper();
23-
23+
2424
// Wait for results
2525
$sleeper->reset();
2626
do {
@@ -134,4 +134,4 @@ private function _isOperationRunning( $state ) {
134134
)
135135
);
136136
}
137-
}
137+
}

src/ThriftSQL/Utils/Iterator.php

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
<?php
2+
3+
namespace ThriftSQL\Utils;
4+
5+
class Iterator implements \Iterator {
6+
7+
const BUFFER_ROWS = 64;
8+
9+
/**
10+
* @var \ThriftSQL
11+
*/
12+
private $thriftSQL;
13+
14+
/**
15+
* @var \ThriftSQLQuery
16+
*/
17+
private $thriftSQLQuery;
18+
19+
private $queryStr;
20+
private $buffer;
21+
private $location;
22+
23+
private $runCount = 0;
24+
private $allowRerun = false;
25+
26+
public function __construct( \ThriftSQL $thriftSQL, $queryStr ) {
27+
$this->thriftSQL = $thriftSQL;
28+
$this->queryStr = $queryStr;
29+
}
30+
31+
public function allowRerun( $value ) {
32+
$this->allowRerun = (bool) $value;
33+
return $this;
34+
}
35+
36+
/**
37+
* Return the current element
38+
* @link http://php.net/manual/en/iterator.current.php
39+
* @return mixed Can return any type.
40+
* @since 5.0.0
41+
*/
42+
public function current() {
43+
return $this->buffer[0];
44+
}
45+
46+
/**
47+
* Move forward to next element
48+
* @link http://php.net/manual/en/iterator.next.php
49+
* @return void Any returned value is ignored.
50+
* @since 5.0.0
51+
*/
52+
public function next() {
53+
$this->location++;
54+
array_shift( $this->buffer );
55+
}
56+
57+
/**
58+
* Return the key of the current element
59+
* @link http://php.net/manual/en/iterator.key.php
60+
* @return mixed scalar on success, or null on failure.
61+
* @since 5.0.0
62+
*/
63+
public function key() {
64+
return $this->location;
65+
}
66+
67+
/**
68+
* Checks if current position is valid
69+
* @link http://php.net/manual/en/iterator.valid.php
70+
* @return boolean The return value will be casted to boolean and then evaluated.
71+
* Returns true on success or false on failure.
72+
* @since 5.0.0
73+
*/
74+
public function valid() {
75+
if ( ! empty( $this->buffer ) ) {
76+
return true;
77+
}
78+
79+
$this->buffer = $this->thriftSQLQuery->fetch( self::BUFFER_ROWS );
80+
return ( ! empty( $this->buffer ) );
81+
}
82+
83+
/**
84+
* Rewind the Iterator to the first element
85+
* @link http://php.net/manual/en/iterator.rewind.php
86+
* @return void Any returned value is ignored.
87+
* @since 5.0.0
88+
* @throws \ThriftSQL\Exception
89+
*/
90+
public function rewind() {
91+
if ( $this->runCount > 0 && !$this->allowRerun ) {
92+
throw new \ThriftSQL\Exception(
93+
'Iterator rewound, this will cause the ThriftSQL to execute again. ' .
94+
'Set `' . __CLASS__ . '::allowRerun(true)` to allow this behavior.'
95+
);
96+
}
97+
$this->runCount++;
98+
99+
$this->thriftSQLQuery = $this->thriftSQL->query( $this->queryStr );
100+
$this->buffer = array();
101+
$this->location = 0;
102+
$this->thriftSQLQuery->wait();
103+
}
104+
}

src/autoload.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@
198198
'ThriftSQL\\TTypeQualifiers' => 'Packages/TCLIService/Types.php',
199199
'ThriftSQL\\TUnionTypeEntry' => 'Packages/TCLIService/Types.php',
200200
'ThriftSQL\\TUserDefinedTypeEntry' => 'Packages/TCLIService/Types.php',
201+
'ThriftSQL\\Utils\\Iterator' => 'ThriftSQL/Utils/Iterator.php',
201202
'ThriftSQL\\Utils\\QueryCleaner' => 'ThriftSQL/Utils/QueryCleaner.php',
202203
'ThriftSQL\\Utils\\Sleeper' => 'ThriftSQL/Utils/Sleeper.php',
203204
'Thrift\\Base\\TBase' => 'Thrift/Base/TBase.php',

0 commit comments

Comments
 (0)