scanner/parser.php
Correl Roush 2386113895 Text output is now tab delimited, one fault per line. Added an html output module. Mantis: 2691
git-svn-id: file:///srv/svn/scanner/trunk@5 a0501263-5b7a-4423-a8ba-1edf086583e7
2008-02-15 19:05:09 +00:00

341 lines
10 KiB
PHP

<?php
define( 'PHPPARSER_FETCH_CLASSES', 1 );
define( 'PHPPARSER_FETCH_FUNCTIONS', 2 );
define( 'PHPPARSER_FETCH_CALLS', 4 );
define( 'PHPPARSER_FETCH_INCLUDES', 8 );
define( 'PHPPARSER_FETCH_INTERNAL', 16 );
define( 'PHPPARSER_FETCH_CONSTRUCTS', 32 );
define( 'PHPPARSER_FETCH_EXPRESSIONS', 64 );
define( 'PHPPARSER_FETCH_ALL', 65535 );
define( 'PHPPARSER_CLASS_DEF', 1 );
define( 'PHPPARSER_FUNCTION_DEF', 2 );
define( 'PHPPARSER_FUNCTION_CALL', 3 );
define( 'PHPPARSER_INCLUDE', 4 );
define( 'PHPPARSER_EXPRESSION', 5 );
define( 'PHPPARSER_LANGUAGE_CONSTRUCT', 6 );
class PHPParser {
var $fetch_mode;
var $file_name;
var $internal_functions;
var $defined_functions;
var $callbacks;
function PHPParser( $fetch_mode = PHPPARSER_FETCH_ALL, $functionlist = array() ) {
$this->reset( $fetch_mode );
$this->internal_functions = get_defined_functions(); $this->internal_functions = $this->internal_functions['internal'];
$this->addFunctionDefinitions( $functionlist );
$this->callbacks = array();
}
function reset( $fetch_mode = null ) {
if( $fetch_mode > 0 ) {
$this->fetch_mode = $fetch_mode;
}
$this->file_name = '';
$this->defined_functions = array();
}
function addFunctionDefinitions( $functionlist ) {
if( is_array( $functionlist ) ) {
array_merge( $this->defined_functions, $functionlist );
}
}
function registerCallback( $function_name, $fetch_mode = PHPPARSER_FETCH_ALL ) {
if( function_exists( $function_name ) ) {
$this->callbacks[] = array(
'function' => $function_name,
'fetch' => $fetch_mode
);
return count( $this->callbacks );
} else {
return false;
}
}
function foundObject( $object ) {
if( !is_array( $object ) ) {
// Now how the hell did that happen?
return false;
}
foreach( $this->callbacks as $callback ) {
call_user_func( $callback['function'], $object );
}
}
function parseFile( $file ) {
if( !file_exists( $file ) || !is_readable( $file ) ) { return false; }
$this->file_name = $file;
$this->parse( file_get_contents( $file ) );
}
function parse( $content ) {
$tokens = token_get_all( $content );
$lines = preg_split( '/\r?(\n|\r)/', $content );
$line = 1;
$depth = 0;
$block = 0;
$block_count = 0;
$class = $classname = null;
$function = $functionname = null;
$switch = array();
$expression = '';
$line_text = '';
$internal_functions = get_defined_functions(); $internal_functions = $internal_functions['internal'];
$local_functions = array();
$local_classes = array();
$open_blocks = array( 0 );
$in_string = false;
foreach( $tokens as $token ) {
//echo ( is_string( $token ) ? 'CHAR: ' . $token : token_name( $token[0] ) . ': ' . $token[1] ) . "\n";
if( !in_array( 0, $open_blocks ) ) {
//echo "PARSER ERROR: LOST ZERO BLOCK AT FILE {$this->file_name} LINE $line\n";
$open_blocks = array_merge( array(0), $open_blocks );
}
$classname = isset( $class['name'] ) ? $class['name'] : '';
$functionname = isset( $function['name'] ) ? $function['name'] : '';
if( $token == '"' ) {
$in_string = !$in_string;
}
if( $in_string ) {
$expression .= is_string( $token ) ? $token : $token[1];
$count = preg_match_all( '/\r?(\n|\r)/', is_string( $token ) ? $token : $token[1], $m );
$line += $count;
continue;
}
if( is_string( $token ) ) {
// Single character token
$text = $token;
switch( $token ) {
case '{':
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
break;
case '}':
array_pop( $open_blocks );
$depth = count( $open_blocks ) - 1;
$block = $depth == 0 ? 0 : ( isset( $open_blocks[$depth] ) ? $open_blocks[$depth] : 0 );
if( !empty( $class ) && $class['block'] == $block ) { $class = $functionname = null; }
if( !empty( $function ) && $function['block'] == $block ) { $function = $functionname = null; }
if( in_array( $block, $switch ) ) {
array_pop( $open_blocks );
$depth = count( $open_blocks ) - 1;
$block = $depth == 0 ? 0 : $open_blocks[$depth-1];
}
break;
case '(':
if(
(bool)($this->fetch_mode & PHPPARSER_FETCH_CALLS)
&& !empty( $string )
&& $last_token != T_FUNCTION
&& $last_token != T_OBJECT_OPERATOR
&& $last_token != T_NEW
//&& ( (bool)($this->fetch_mode & PHPPARSER_FETCH_INTERNAL) && !in_array( $string, $this->internal_functions ) )
) {
$this->foundObject( array(
'type' => PHPPARSER_FUNCTION_CALL,
'name' => strtolower( $string ),
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname,
'open_blocks' => $open_blocks
) );
}
break;
}
if( !in_array( $token, array( '(', ')' ) ) ) { $last_token = null; }
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) && in_array( $token, array( '{', '}', ';', '=', '?', ':' ) ) && strlen( trim( $expression ) ) > 0 ) {
$this->foundObject( array(
'type' => PHPPARSER_EXPRESSION,
'name' => trim( $expression ),
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname,
'open_blocks' => $open_blocks
) );
$expression = '';
} else { $expression .= $token; }
} else {
list($id, $text) = $token;
switch( $id ) {
case T_CURLY_OPEN:
case T_DOLLAR_OPEN_CURLY_BRACES:
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
break;
case T_STRING:
$string = $text;
switch( $last_token ) {
case T_CLASS:
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_CLASSES) ) {
$this->foundObject( array(
'type' => PHPPARSER_CLASS_DEF,
'name' => strtolower( $string ),
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$class = array( 'name' => $text, 'block' => $block );
break;
case T_FUNCTION:
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_FUNCTIONS) && empty( $classname ) ) { // Not interested in member function definitions
$this->foundObject( array(
'type' => PHPPARSER_FUNCTION_DEF,
'name' => strtolower( $string ),
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$function = array( 'name' => $text, 'block' => $block );
break;
}
break;
case T_CONSTANT_ENCAPSED_STRING:
switch( $last_token ) {
case T_INCLUDE:
case T_INCLUDE_ONCE:
case T_REQUIRE:
case T_REQUIRE_ONCE:
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_INCLUDES) ) {
$this->foundObject( array(
'type' => PHPPARSER_INCLUDE,
'name' => trim( str_replace( '\'', '', $text ) ),
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
break;
}
break;
///*
case T_SWITCH:
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
$switch[$block] = $block;
break;
case T_CASE:
case T_DEFAULT:
// Each conditional of a switch statement is its own block
array_pop( $open_blocks );
$block_count++;
$block = $block_count;
array_push( $open_blocks, $block );
$depth = count( $open_blocks ) - 1;
break;
//*/
}
if( !in_array( $id, array( T_WHITESPACE, /*T_COMMENT, T_DOC_COMMENT,*/ T_STRING ) ) ) { $string = null; $last_token = $id; }
if( (bool)($this->fetch_mode & PHPPARSER_FETCH_CONSTRUCTS) && in_array( $id, array(
T_ARRAY,
T_ECHO,
T_EMPTY,
T_EVAL,
T_EXIT,
T_HALT_COMPILER,
T_INCLUDE,
T_INCLUDE_ONCE,
T_ISSET,
T_LIST,
T_PRINT,
T_REQUIRE,
T_REQUIRE_ONCE,
T_UNSET
) ) ) {
$this->foundObject( array(
'type' => PHPPARSER_LANGUAGE_CONSTRUCT,
'name' => $text,
'file' => $this->file_name,
'context' => $lines[$line],
'line' => $line,
'block' => $block,
'depth' => $depth,
'in_class' => $classname,
'in_function' => $functionname
) );
}
$expression .= ( in_array( $id, array(
/* Values */
T_STRING,
T_CHARACTER,
T_CONSTANT_ENCAPSED_STRING,
T_ENCAPSED_AND_WHITESPACE,
T_WHITESPACE,
T_DNUMBER,
T_LNUMBER,
T_NUM_STRING,
T_VARIABLE,
//T_ARRAY,
T_STRING_VARNAME,
/* Operators */
T_BOOLEAN_AND,
T_BOOLEAN_OR,
T_DEC,
T_INC,
T_IS_EQUAL,
T_IS_GREATER_OR_EQUAL,
T_IS_IDENTICAL,
T_IS_NOT_EQUAL,
T_IS_NOT_IDENTICAL,
T_IS_SMALLER_OR_EQUAL,
T_LOGICAL_AND,
T_LOGICAL_OR,
T_LOGICAL_XOR,
T_OBJECT_OPERATOR,
T_DOUBLE_COLON,
T_SL,
T_SR,
/* Casts */
T_DOUBLE_CAST,
T_INT_CAST,
T_OBJECT_CAST,
T_STRING_CAST,
T_UNSET_CAST,
/* Constructs */
T_ECHO,
T_PRINT
) ) ) ? $text : ' ';
$count = preg_match_all( '/\r?(\n|\r)/', $text, $m );
$line += $count;
}
}
}
}
?>