Correl Roush
26d9f61672
git-svn-id: file:///srv/svn/scanner/trunk@20 a0501263-5b7a-4423-a8ba-1edf086583e7
529 lines
17 KiB
PHP
529 lines
17 KiB
PHP
<?php
|
|
define( 'PHPPARSER_FETCH_CLASSES', 1 );
|
|
define( 'PHPPARSER_FETCH_FUNCTIONS', 2 );
|
|
define( 'PHPPARSER_FETCH_CALLS', 4 );
|
|
define( 'PHPPARSER_FETCH_INCLUDES', 8 );
|
|
define( 'PHPPARSER_FETCH_INTERNAL', 16 );
|
|
define( 'PHPPARSER_FETCH_CONSTRUCTS', 32 );
|
|
define( 'PHPPARSER_FETCH_EXPRESSIONS', 64 );
|
|
define( 'PHPPARSER_FETCH_INLINE_HTML', 128 );
|
|
|
|
define( 'PHPPARSER_FETCH_ALL', 65535 );
|
|
|
|
define( 'PHPPARSER_CLASS_DEF', 1 );
|
|
define( 'PHPPARSER_FUNCTION_DEF', 2 );
|
|
define( 'PHPPARSER_FUNCTION_CALL', 3 );
|
|
define( 'PHPPARSER_INCLUDE', 4 );
|
|
define( 'PHPPARSER_EXPRESSION', 5 );
|
|
define( 'PHPPARSER_LANGUAGE_CONSTRUCT', 6 );
|
|
define( 'PHPPARSER_INLINE_HTML', 7 );
|
|
define( 'PHPPARSER_VARIABLE', 8 );
|
|
define( 'PHPPARSER_ASSIGNMENT', 9 );
|
|
|
|
class PHPParser {
|
|
|
|
var $fetch_mode;
|
|
var $file_name;
|
|
var $internal_functions;
|
|
var $defined_functions;
|
|
var $callbacks;
|
|
|
|
function PHPParser( $fetch_mode = PHPPARSER_FETCH_ALL, $functionlist = array() ) {
|
|
$this->reset( $fetch_mode );
|
|
$this->internal_functions = get_defined_functions(); $this->internal_functions = $this->internal_functions['internal'];
|
|
$this->addFunctionDefinitions( $functionlist );
|
|
$this->callbacks = array();
|
|
}
|
|
|
|
function reset( $fetch_mode = null ) {
|
|
if( $fetch_mode > 0 ) {
|
|
$this->fetch_mode = $fetch_mode;
|
|
}
|
|
$this->file_name = '';
|
|
$this->defined_functions = array();
|
|
}
|
|
|
|
function addFunctionDefinitions( $functionlist ) {
|
|
if( is_array( $functionlist ) ) {
|
|
array_merge( $this->defined_functions, $functionlist );
|
|
}
|
|
}
|
|
|
|
function registerCallback( $function_name, $fetch_mode = PHPPARSER_FETCH_ALL ) {
|
|
if( is_callable( $function_name ) ) {
|
|
$this->callbacks[] = array(
|
|
'function' => $function_name,
|
|
'fetch' => $fetch_mode
|
|
);
|
|
return count( $this->callbacks );
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
function foundObject( $object ) {
|
|
if( !is_array( $object ) ) {
|
|
// Now how the hell did that happen?
|
|
return false;
|
|
}
|
|
foreach( $this->callbacks as $callback ) {
|
|
call_user_func( $callback['function'], $object );
|
|
}
|
|
}
|
|
|
|
function parseFile( $file ) {
|
|
if( !file_exists( $file ) || !is_readable( $file ) ) { return false; }
|
|
$this->file_name = $file;
|
|
$this->parse( file_get_contents( $file ) );
|
|
}
|
|
|
|
function parse( $content ) {
|
|
$tokens = token_get_all( $content );
|
|
$lines = preg_split( '/\r?(\n|\r)/', $content );
|
|
$line = 1;
|
|
$depth = 0;
|
|
$block = 0;
|
|
$block_count = 0;
|
|
|
|
$class = $classname = null;
|
|
$function = $functionname = null;
|
|
$string = $last_token = null;
|
|
$switch = array();
|
|
$expressions = array('');
|
|
$expression = &$expressions[0];
|
|
$assignment = false;
|
|
$line_text = '';
|
|
|
|
$internal_functions = get_defined_functions(); $internal_functions = $internal_functions['internal'];
|
|
$local_functions = array();
|
|
$local_classes = array();
|
|
$expression_variables = array(array());
|
|
$variables = &$expression_variables[0];
|
|
$open_blocks = array( 0 );
|
|
$in_string = false;
|
|
$buffer = array(T_INLINE_HTML => '');
|
|
foreach( $tokens as $token ) {
|
|
//echo ( is_string( $token ) ? 'CHAR: ' . $token : token_name( $token[0] ) . ': ' . $token[1] ) . "\n";
|
|
if( !in_array( 0, $open_blocks ) ) {
|
|
//echo "PARSER ERROR: LOST ZERO BLOCK AT FILE {$this->file_name} LINE $line\n";
|
|
$open_blocks = array_merge( array(0), $open_blocks );
|
|
}
|
|
$classname = isset( $class['name'] ) ? $class['name'] : '';
|
|
$functionname = isset( $function['name'] ) ? $function['name'] : '';
|
|
if( $token == '"' ) {
|
|
$in_string = !$in_string;
|
|
}
|
|
if( $in_string ) {
|
|
$expression .= is_string( $token ) ? $token : $token[1];
|
|
$count = preg_match_all( '/\r?(\n|\r)/', is_string( $token ) ? $token : $token[1], $m = array() );
|
|
$line += $count;
|
|
continue;
|
|
}
|
|
if( is_string( $token ) ) {
|
|
// Single character token
|
|
$text = $token;
|
|
switch( $token ) {
|
|
case '{':
|
|
$block_count++;
|
|
$block = $block_count;
|
|
array_push( $open_blocks, $block );
|
|
$depth = count( $open_blocks ) - 1;
|
|
break;
|
|
case '}':
|
|
array_pop( $open_blocks );
|
|
$depth = count( $open_blocks ) - 1;
|
|
$block = $depth == 0 ? 0 : ( isset( $open_blocks[$depth] ) ? $open_blocks[$depth] : 0 );
|
|
if( !empty( $class ) && $class['block'] == $block ) { $class = $functionname = null; }
|
|
if( !empty( $function ) && $function['block'] == $block ) { $function = $functionname = null; }
|
|
if( in_array( $block, $switch ) ) {
|
|
array_pop( $open_blocks );
|
|
$depth = count( $open_blocks ) - 1;
|
|
$block = $depth == 0 ? 0 : $open_blocks[$depth-1];
|
|
}
|
|
break;
|
|
case '(':
|
|
if(
|
|
(bool)($this->fetch_mode & PHPPARSER_FETCH_CALLS)
|
|
&& !empty( $string )
|
|
&& $last_token != T_FUNCTION
|
|
&& $last_token != T_OBJECT_OPERATOR
|
|
&& $last_token != T_DOUBLE_COLON
|
|
&& $last_token != T_NEW
|
|
//&& ( (bool)($this->fetch_mode & PHPPARSER_FETCH_INTERNAL) && !in_array( $string, $this->internal_functions ) )
|
|
) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_FUNCTION_CALL,
|
|
'name' => strtolower( $string ),
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname,
|
|
'open_blocks' => $open_blocks
|
|
) );
|
|
}
|
|
break;
|
|
case ')':
|
|
$in_function_params = false;
|
|
$in_foreach_params = false;
|
|
$in_list = false;
|
|
break;
|
|
case '[':
|
|
$expressions[] = '';
|
|
$expression_variables[] = array();
|
|
$variables = &$expression_variables[count($expression_variables) - 1];
|
|
break;
|
|
case ']':
|
|
/*
|
|
TODO: Subexpression reporting is NOT quite as accurate as it should be...
|
|
See the unit test failure for an example. The assigned variable is reported fine, which is good,
|
|
but the value it is assigned will not be. Since this isn't awfully important, and the fact that
|
|
everything else does is, I'm leaving this as-is for now.
|
|
*/
|
|
$e = array_pop($expressions);
|
|
$expression = &$expressions[count($expressions) - 1];
|
|
$expression .= trim($e);
|
|
array_pop($expression_variables);
|
|
$variables = &$expression_variables[count($expression_variables) - 1];
|
|
$variables[count($variables) - 1] = end($variables) . '[' . trim($e) . ']';
|
|
break;
|
|
default:
|
|
/* Should be able to add a hook here later on to catch the use of defines,
|
|
which are basically just T_STRINGs that PHP can't find anything else to
|
|
do with. That could be useful in determining whether a define is defined.
|
|
*/
|
|
$string = null;
|
|
break;
|
|
}
|
|
if (
|
|
(bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS)
|
|
&& (
|
|
(($in_function_params || $in_foreach_params) && in_array( $token, array( '{', '}', '(', ')', ';', ',', '?', ':' ) ))
|
|
|| in_array( $token, array( '{', '}', ';', '=', '?', ':' ) )
|
|
)
|
|
) {
|
|
$expression = trim($expression);
|
|
if ($assignment) {
|
|
// If this was a list assignment, we've got an array of variables to mark as assigned at once!
|
|
$variable = is_array($assignment) ? $assignment : array($assignment);
|
|
foreach ($variable as $var) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_ASSIGNMENT,
|
|
'name' => "$var=$expression",
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
$assignment = false;
|
|
}
|
|
elseif (strlen($expression) > 0) {
|
|
foreach($variables as $var) {
|
|
}
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_EXPRESSION,
|
|
'name' => $expression,
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname,
|
|
'open_blocks' => $open_blocks
|
|
) );
|
|
}
|
|
$expression = '';
|
|
$variable_declaration = false;
|
|
$assignment = ('=' == $token ? array_pop($variables) : false);
|
|
while (count($variables) > 0) {
|
|
$variable = array_pop($variables);
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_VARIABLE,
|
|
'name' => $variable,
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
} else { $expression .= $token; }
|
|
if ($expression !== end($expressions))
|
|
$expression = &$expressions[count($expressions) - 1];
|
|
if( !empty($buffer[T_INLINE_HTML]) && (bool)($this->fetch_mode & PHPPARSER_FETCH_INLINE_HTML) ) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_INLINE_HTML,
|
|
'name' => $buffer[T_INLINE_HTML],
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
$buffer[T_INLINE_HTML] = '';
|
|
if( !in_array( $token, array( '(', ')' ) ) ) { $last_token = null; }
|
|
}
|
|
} else {
|
|
list($id, $text) = $token;
|
|
switch( $id ) {
|
|
case T_CURLY_OPEN:
|
|
case T_DOLLAR_OPEN_CURLY_BRACES:
|
|
$block_count++;
|
|
$block = $block_count;
|
|
array_push( $open_blocks, $block );
|
|
$depth = count( $open_blocks ) - 1;
|
|
break;
|
|
case T_STRING:
|
|
$string = $text;
|
|
switch( $last_token ) {
|
|
case T_CLASS:
|
|
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_CLASSES) ) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_CLASS_DEF,
|
|
'name' => strtolower( $string ),
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
$class = array( 'name' => $text, 'block' => $block );
|
|
break;
|
|
case T_FUNCTION:
|
|
$variable_declaration = false;
|
|
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_FUNCTIONS) && empty( $classname ) ) { // Not interested in member function definitions
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_FUNCTION_DEF,
|
|
'name' => strtolower( $string ),
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
$function = array( 'name' => $text, 'block' => $block );
|
|
$in_function_params = true;
|
|
break;
|
|
}
|
|
break;
|
|
case T_CONSTANT_ENCAPSED_STRING:
|
|
switch( $last_token ) {
|
|
case T_INCLUDE:
|
|
case T_INCLUDE_ONCE:
|
|
case T_REQUIRE:
|
|
case T_REQUIRE_ONCE:
|
|
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_INCLUDES) ) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_INCLUDE,
|
|
'name' => trim( str_replace( '\'', '', $text ) ),
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case T_GLOBAL:
|
|
case T_VAR:
|
|
case T_PRIVATE:
|
|
case T_PUBLIC:
|
|
case T_PROTECTED:
|
|
case T_STATIC:
|
|
$variable_declaration = true;
|
|
break;
|
|
case T_LIST:
|
|
$in_list = true;
|
|
$variables[] = array();
|
|
break;
|
|
case T_VARIABLE:
|
|
if ($in_list) {
|
|
/*
|
|
TODO: Proper fix for this.
|
|
Has to do with the less-than-perfect sub-expressions. If an entry in a list() is a keyed value of an array (i.e. $var[$key]),
|
|
everything sort of falls apart.
|
|
*/
|
|
if (!is_array($variables[count($variables) - 1]) )
|
|
$variables[count($variables) - 1] = array(end($variables));
|
|
$variables[count($variables) - 1][] = $text;
|
|
} else {
|
|
$variables[] = $text;
|
|
}
|
|
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) ) {
|
|
if (
|
|
$variable_declaration
|
|
|| in_array($last_token, array(T_AS))
|
|
|| ($in_foreach_params && T_DOUBLE_ARROW == $last_token)
|
|
|| $in_function_params
|
|
) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_ASSIGNMENT,
|
|
'name' => "$text=$text",
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
}
|
|
break;
|
|
///*
|
|
case T_SWITCH:
|
|
$block_count++;
|
|
$block = $block_count;
|
|
array_push( $open_blocks, $block );
|
|
$depth = count( $open_blocks ) - 1;
|
|
$switch[$block] = $block;
|
|
break;
|
|
case T_CASE:
|
|
case T_DEFAULT:
|
|
// Each conditional of a switch statement is its own block
|
|
array_pop( $open_blocks );
|
|
$block_count++;
|
|
$block = $block_count;
|
|
array_push( $open_blocks, $block );
|
|
$depth = count( $open_blocks ) - 1;
|
|
break;
|
|
case T_AS:
|
|
$in_foreach_params = true;
|
|
break;
|
|
case T_INLINE_HTML:
|
|
// NOTE: There seems to be a string limit of around 400 characters, which is very easy to reach with this token
|
|
// We'll get around it by combining adjacent token results
|
|
$buffer[T_INLINE_HTML] .= $text;
|
|
break;
|
|
//*/
|
|
}
|
|
if( !in_array( $id, array( T_WHITESPACE, /*T_COMMENT, T_DOC_COMMENT,*/ T_STRING ) ) ) { $string = null; $last_token = $id; }
|
|
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_CONSTRUCTS) && in_array( $id, array(
|
|
T_ARRAY,
|
|
T_ECHO,
|
|
T_EMPTY,
|
|
T_EVAL,
|
|
T_EXIT,
|
|
T_HALT_COMPILER,
|
|
T_INCLUDE,
|
|
T_INCLUDE_ONCE,
|
|
T_ISSET,
|
|
T_LIST,
|
|
T_PRINT,
|
|
T_REQUIRE,
|
|
T_REQUIRE_ONCE,
|
|
T_UNSET
|
|
) ) ) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_LANGUAGE_CONSTRUCT,
|
|
'name' => $text,
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
}
|
|
$expression .= ( in_array( $id, array(
|
|
/* Values */
|
|
T_STRING,
|
|
T_CHARACTER,
|
|
T_CONSTANT_ENCAPSED_STRING,
|
|
T_ENCAPSED_AND_WHITESPACE,
|
|
T_WHITESPACE,
|
|
T_DNUMBER,
|
|
T_LNUMBER,
|
|
T_NUM_STRING,
|
|
T_VARIABLE,
|
|
//T_ARRAY,
|
|
T_STRING_VARNAME,
|
|
|
|
/* Operators */
|
|
T_BOOLEAN_AND,
|
|
T_BOOLEAN_OR,
|
|
T_DEC,
|
|
T_INC,
|
|
T_IS_EQUAL,
|
|
T_IS_GREATER_OR_EQUAL,
|
|
T_IS_IDENTICAL,
|
|
T_IS_NOT_EQUAL,
|
|
T_IS_NOT_IDENTICAL,
|
|
T_IS_SMALLER_OR_EQUAL,
|
|
T_LOGICAL_AND,
|
|
T_LOGICAL_OR,
|
|
T_LOGICAL_XOR,
|
|
T_OBJECT_OPERATOR,
|
|
T_DOUBLE_COLON,
|
|
T_SL,
|
|
T_SR,
|
|
|
|
/* Casts */
|
|
T_DOUBLE_CAST,
|
|
T_INT_CAST,
|
|
T_OBJECT_CAST,
|
|
T_STRING_CAST,
|
|
T_UNSET_CAST,
|
|
|
|
/* Constructs */
|
|
T_ECHO,
|
|
T_PRINT
|
|
) ) ) ? $text : ' ';
|
|
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) && in_array( $id, array( T_CLOSE_TAG ) ) && strlen( trim( $expression ) ) > 0 ) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_EXPRESSION,
|
|
'name' => trim( $expression ),
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname,
|
|
'open_blocks' => $open_blocks
|
|
) );
|
|
$expression = '';
|
|
}
|
|
if( $id != T_INLINE_HTML && !empty($buffer[T_INLINE_HTML]) && (bool)($this->fetch_mode & PHPPARSER_FETCH_INLINE_HTML) ) {
|
|
$this->foundObject( array(
|
|
'type' => PHPPARSER_INLINE_HTML,
|
|
'name' => $buffer[T_INLINE_HTML],
|
|
'file' => $this->file_name,
|
|
'context' => $lines[$line - 1],
|
|
'line' => $line,
|
|
'block' => $block,
|
|
'depth' => $depth,
|
|
'in_class' => $classname,
|
|
'in_function' => $functionname
|
|
) );
|
|
$buffer[T_INLINE_HTML] = '';
|
|
}
|
|
$count = preg_match_all( '/\r?(\n|\r)/', $text, $m );
|
|
$line += $count;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
?>
|