scanner/parser.php

530 lines
17 KiB
PHP
Raw Permalink Normal View History

<?php
define( 'PHPPARSER_FETCH_CLASSES', 1 );
define( 'PHPPARSER_FETCH_FUNCTIONS', 2 );
define( 'PHPPARSER_FETCH_CALLS', 4 );
define( 'PHPPARSER_FETCH_INCLUDES', 8 );
define( 'PHPPARSER_FETCH_INTERNAL', 16 );
define( 'PHPPARSER_FETCH_CONSTRUCTS', 32 );
define( 'PHPPARSER_FETCH_EXPRESSIONS', 64 );
define( 'PHPPARSER_FETCH_INLINE_HTML', 128 );
define( 'PHPPARSER_FETCH_ALL', 65535 );
define( 'PHPPARSER_CLASS_DEF', 1 );
define( 'PHPPARSER_FUNCTION_DEF', 2 );
define( 'PHPPARSER_FUNCTION_CALL', 3 );
define( 'PHPPARSER_INCLUDE', 4 );
define( 'PHPPARSER_EXPRESSION', 5 );
define( 'PHPPARSER_LANGUAGE_CONSTRUCT', 6 );
define( 'PHPPARSER_INLINE_HTML', 7 );
define( 'PHPPARSER_VARIABLE', 8 );
define( 'PHPPARSER_ASSIGNMENT', 9 );
class PHPParser {
var $fetch_mode;
var $file_name;
var $internal_functions;
var $defined_functions;
var $callbacks;
function PHPParser( $fetch_mode = PHPPARSER_FETCH_ALL, $functionlist = array() ) {
$this->reset( $fetch_mode );
$this->internal_functions = get_defined_functions(); $this->internal_functions = $this->internal_functions['internal'];
$this->addFunctionDefinitions( $functionlist );
$this->callbacks = array();
}
function reset( $fetch_mode = null ) {
if( $fetch_mode > 0 ) {
$this->fetch_mode = $fetch_mode;
}
$this->file_name = '';
$this->defined_functions = array();
}
function addFunctionDefinitions( $functionlist ) {
if( is_array( $functionlist ) ) {
array_merge( $this->defined_functions, $functionlist );
}
}
function registerCallback( $function_name, $fetch_mode = PHPPARSER_FETCH_ALL ) {
if( is_callable( $function_name ) ) {
$this->callbacks[] = array(
'function' => $function_name,
'fetch' => $fetch_mode
);
return count( $this->callbacks );
} else {
return false;
}
}
function foundObject( $object ) {
if( !is_array( $object ) ) {
// Now how the hell did that happen?
return false;
}
foreach( $this->callbacks as $callback ) {
call_user_func( $callback['function'], $object );
}
}
function parseFile( $file ) {
if( !file_exists( $file ) || !is_readable( $file ) ) { return false; }
$this->file_name = $file;
$this->parse( file_get_contents( $file ) );
}
function parse( $content ) {
$tokens = token_get_all( $content );
$lines = preg_split( '/\r?(\n|\r)/', $content );
$line = 1;
$depth = 0;
$block = 0;
$block_count = 0;
$class = $classname = null;
$function = $functionname = null;
$string = $last_token = null;
$switch = array();
$expressions = array('');
$expression = &$expressions[0];
$assignment = false;
$line_text = '';
$internal_functions = get_defined_functions(); $internal_functions = $internal_functions['internal'];
$local_functions = array();
$local_classes = array();
$expression_variables = array(array());
$variables = &$expression_variables[0];
$open_blocks = array( 0 );
$in_string = false;
$buffer = array(T_INLINE_HTML => '');
foreach( $tokens as $token ) {
//echo ( is_string( $token ) ? 'CHAR: ' . $token : token_name( $token[0] ) . ': ' . $token[1] ) . "\n";
if( !in_array( 0, $open_blocks ) ) {
//echo "PARSER ERROR: LOST ZERO BLOCK AT FILE {$this->file_name} LINE $line\n";
$open_blocks = array_merge( array(0), $open_blocks );
}
$classname = isset( $class['name'] ) ? $class['name'] : '';
$functionname = isset( $function['name'] ) ? $function['name'] : '';
if( $token == '"' ) {
$in_string = !$in_string;
}
if( $in_string ) {
$expression .= is_string( $token ) ? $token : $token[1];
$count = preg_match_all( '/\r?(\n|\r)/', is_string( $token ) ? $token : $token[1], $m = array() );
$line += $count;
continue;
}
if( is_string( $token ) ) {
// Single character token
$text = $token;
switch( $token ) {
case '{':
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
break;
case '}':
array_pop( $open_blocks );
$depth = count( $open_blocks ) - 1;
$block = $depth == 0 ? 0 : ( isset( $open_blocks[$depth] ) ? $open_blocks[$depth] : 0 );
if( !empty( $class ) && $class['block'] == $block ) { $class = $functionname = null; }
if( !empty( $function ) && $function['block'] == $block ) { $function = $functionname = null; }
if( in_array( $block, $switch ) ) {
array_pop( $open_blocks );
$depth = count( $open_blocks ) - 1;
$block = $depth == 0 ? 0 : $open_blocks[$depth-1];
}
break;
case '(':
if(
(bool)($this->fetch_mode & PHPPARSER_FETCH_CALLS)
&& !empty( $string )
&& $last_token != T_FUNCTION
&& $last_token != T_OBJECT_OPERATOR
&& $last_token != T_DOUBLE_COLON
&& $last_token != T_NEW
//&& ( (bool)($this->fetch_mode & PHPPARSER_FETCH_INTERNAL) && !in_array( $string, $this->internal_functions ) )
) {
$this->foundObject( array(
'type' => PHPPARSER_FUNCTION_CALL,
'name' => strtolower( $string ),
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname,
'open_blocks' => $open_blocks
) );
}
break;
case ')':
$in_function_params = false;
$in_foreach_params = false;
$in_list = false;
break;
case '[':
$expressions[] = '';
$expression_variables[] = array();
$variables = &$expression_variables[count($expression_variables) - 1];
break;
case ']':
/*
TODO: Subexpression reporting is NOT quite as accurate as it should be...
See the unit test failure for an example. The assigned variable is reported fine, which is good,
but the value it is assigned will not be. Since this isn't awfully important, and the fact that
everything else does is, I'm leaving this as-is for now.
*/
$e = array_pop($expressions);
$expression = &$expressions[count($expressions) - 1];
$expression .= trim($e);
array_pop($expression_variables);
$variables = &$expression_variables[count($expression_variables) - 1];
$variables[count($variables) - 1] = end($variables) . '[' . trim($e) . ']';
break;
default:
/* Should be able to add a hook here later on to catch the use of defines,
which are basically just T_STRINGs that PHP can't find anything else to
do with. That could be useful in determining whether a define is defined.
*/
$string = null;
break;
}
if (
(bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS)
&& (
(($in_function_params || $in_foreach_params) && in_array( $token, array( '{', '}', '(', ')', ';', ',', '?', ':' ) ))
|| in_array( $token, array( '{', '}', ';', '=', '?', ':' ) )
)
) {
$expression = trim($expression);
if ($assignment) {
// If this was a list assignment, we've got an array of variables to mark as assigned at once!
$variable = is_array($assignment) ? $assignment : array($assignment);
foreach ($variable as $var) {
$this->foundObject( array(
'type' => PHPPARSER_ASSIGNMENT,
'name' => "$var=$expression",
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$assignment = false;
}
elseif (strlen($expression) > 0) {
foreach($variables as $var) {
}
$this->foundObject( array(
'type' => PHPPARSER_EXPRESSION,
'name' => $expression,
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname,
'open_blocks' => $open_blocks
) );
}
$expression = '';
$variable_declaration = false;
$assignment = ('=' == $token ? array_pop($variables) : false);
while (count($variables) > 0) {
$variable = array_pop($variables);
$this->foundObject( array(
'type' => PHPPARSER_VARIABLE,
'name' => $variable,
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
} else { $expression .= $token; }
if ($expression !== end($expressions))
$expression = &$expressions[count($expressions) - 1];
if( !empty($buffer[T_INLINE_HTML]) && (bool)($this->fetch_mode & PHPPARSER_FETCH_INLINE_HTML) ) {
$this->foundObject( array(
'type' => PHPPARSER_INLINE_HTML,
'name' => $buffer[T_INLINE_HTML],
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
$buffer[T_INLINE_HTML] = '';
if( !in_array( $token, array( '(', ')' ) ) ) { $last_token = null; }
}
} else {
list($id, $text) = $token;
switch( $id ) {
case T_CURLY_OPEN:
case T_DOLLAR_OPEN_CURLY_BRACES:
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
break;
case T_STRING:
$string = $text;
switch( $last_token ) {
case T_CLASS:
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_CLASSES) ) {
$this->foundObject( array(
'type' => PHPPARSER_CLASS_DEF,
'name' => strtolower( $string ),
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$class = array( 'name' => $text, 'block' => $block );
break;
case T_FUNCTION:
$variable_declaration = false;
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_FUNCTIONS) && empty( $classname ) ) { // Not interested in member function definitions
$this->foundObject( array(
'type' => PHPPARSER_FUNCTION_DEF,
'name' => strtolower( $string ),
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$function = array( 'name' => $text, 'block' => $block );
$in_function_params = true;
break;
}
break;
case T_CONSTANT_ENCAPSED_STRING:
switch( $last_token ) {
case T_INCLUDE:
case T_INCLUDE_ONCE:
case T_REQUIRE:
case T_REQUIRE_ONCE:
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_INCLUDES) ) {
$this->foundObject( array(
'type' => PHPPARSER_INCLUDE,
'name' => trim( str_replace( '\'', '', $text ) ),
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
break;
}
break;
case T_GLOBAL:
case T_VAR:
case T_PRIVATE:
case T_PUBLIC:
case T_PROTECTED:
case T_STATIC:
$variable_declaration = true;
break;
case T_LIST:
$in_list = true;
$variables[] = array();
break;
case T_VARIABLE:
if ($in_list) {
/*
TODO: Proper fix for this.
Has to do with the less-than-perfect sub-expressions. If an entry in a list() is a keyed value of an array (i.e. $var[$key]),
everything sort of falls apart.
*/
if (!is_array($variables[count($variables) - 1]) )
$variables[count($variables) - 1] = array(end($variables));
$variables[count($variables) - 1][] = $text;
} else {
$variables[] = $text;
}
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) ) {
if (
$variable_declaration
|| in_array($last_token, array(T_AS))
|| ($in_foreach_params && T_DOUBLE_ARROW == $last_token)
|| $in_function_params
) {
$this->foundObject( array(
'type' => PHPPARSER_ASSIGNMENT,
'name' => "$text=$text",
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
}
break;
///*
case T_SWITCH:
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
$switch[$block] = $block;
break;
case T_CASE:
case T_DEFAULT:
// Each conditional of a switch statement is its own block
array_pop( $open_blocks );
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
break;
case T_AS:
$in_foreach_params = true;
break;
case T_INLINE_HTML:
// NOTE: There seems to be a string limit of around 400 characters, which is very easy to reach with this token
// We'll get around it by combining adjacent token results
$buffer[T_INLINE_HTML] .= $text;
break;
//*/
}
if( !in_array( $id, array( T_WHITESPACE, /*T_COMMENT, T_DOC_COMMENT,*/ T_STRING ) ) ) { $string = null; $last_token = $id; }
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_CONSTRUCTS) && in_array( $id, array(
T_ARRAY,
T_ECHO,
T_EMPTY,
T_EVAL,
T_EXIT,
T_HALT_COMPILER,
T_INCLUDE,
T_INCLUDE_ONCE,
T_ISSET,
T_LIST,
T_PRINT,
T_REQUIRE,
T_REQUIRE_ONCE,
T_UNSET
) ) ) {
$this->foundObject( array(
'type' => PHPPARSER_LANGUAGE_CONSTRUCT,
'name' => $text,
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$expression .= ( in_array( $id, array(
/* Values */
T_STRING,
T_CHARACTER,
T_CONSTANT_ENCAPSED_STRING,
T_ENCAPSED_AND_WHITESPACE,
T_WHITESPACE,
T_DNUMBER,
T_LNUMBER,
T_NUM_STRING,
T_VARIABLE,
//T_ARRAY,
T_STRING_VARNAME,
/* Operators */
T_BOOLEAN_AND,
T_BOOLEAN_OR,
T_DEC,
T_INC,
T_IS_EQUAL,
T_IS_GREATER_OR_EQUAL,
T_IS_IDENTICAL,
T_IS_NOT_EQUAL,
T_IS_NOT_IDENTICAL,
T_IS_SMALLER_OR_EQUAL,
T_LOGICAL_AND,
T_LOGICAL_OR,
T_LOGICAL_XOR,
T_OBJECT_OPERATOR,
T_DOUBLE_COLON,
T_SL,
T_SR,
/* Casts */
T_DOUBLE_CAST,
T_INT_CAST,
T_OBJECT_CAST,
T_STRING_CAST,
T_UNSET_CAST,
/* Constructs */
T_ECHO,
T_PRINT
) ) ) ? $text : ' ';
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) && in_array( $id, array( T_CLOSE_TAG ) ) && strlen( trim( $expression ) ) > 0 ) {
$this->foundObject( array(
'type' => PHPPARSER_EXPRESSION,
'name' => trim( $expression ),
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname,
'open_blocks' => $open_blocks
) );
$expression = '';
}
if( $id != T_INLINE_HTML && !empty($buffer[T_INLINE_HTML]) && (bool)($this->fetch_mode & PHPPARSER_FETCH_INLINE_HTML) ) {
$this->foundObject( array(
'type' => PHPPARSER_INLINE_HTML,
'name' => $buffer[T_INLINE_HTML],
'file' => $this->file_name,
'context' => $lines[$line - 1],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
$buffer[T_INLINE_HTML] = '';
}
$count = preg_match_all( '/\r?(\n|\r)/', $text, $m );
$line += $count;
}
}
}
}
?>