From 08bc427e4a4f309dea7b46b9272df19cd937fe9b Mon Sep 17 00:00:00 2001 From: Correl Roush Date: Thu, 22 May 2008 20:05:53 +0000 Subject: [PATCH] Parser modified to return inline html, variables and variable assignment. Included some phpunit tests. Assignment fetching is good, but not perfect (doesn't handle array index assignment or nested expressions yet, see tests for more information). Therefore, the variable scan is not enabled by default yet. Mantis: 2691 git-svn-id: file:///srv/svn/scanner/trunk@19 a0501263-5b7a-4423-a8ba-1edf086583e7 --- modules/scanner_functions.php | 15 ++-- modules/scanner_variables.php | 36 ++++++++ parser.php | 155 ++++++++++++++++++++++++++++++--- scanner.php | 12 ++- test.php | 19 +++- tests/AllTests.php | 22 +++++ tests/ParserTests.php | 138 +++++++++++++++++++++++++++++ tests/ScannerTests.php | 34 ++++++++ tests/samples/lint_failure.php | 9 ++ tests/samples/lint_failure.txt | 1 + tests/samples/patterns.php | 9 ++ tests/samples/patterns.txt | 5 ++ 12 files changed, 432 insertions(+), 23 deletions(-) create mode 100644 modules/scanner_variables.php create mode 100644 tests/AllTests.php create mode 100644 tests/ParserTests.php create mode 100644 tests/ScannerTests.php create mode 100644 tests/samples/lint_failure.php create mode 100644 tests/samples/lint_failure.txt create mode 100644 tests/samples/patterns.php create mode 100644 tests/samples/patterns.txt diff --git a/modules/scanner_functions.php b/modules/scanner_functions.php index 85b61e6..cac1111 100644 --- a/modules/scanner_functions.php +++ b/modules/scanner_functions.php @@ -83,12 +83,15 @@ class FunctionsModule extends ScannerModule { case PHPPARSER_INCLUDE: $this->included_files[] = $object; if( $object['name'] == 'global.php' ) { - $object['name'] = 'libs/security/lib_security_input.php'; - $this->included_files[] = $object; - $object['name'] = 'libs/get/lib_get_portal.php'; - $this->included_files[] = $object; - $object['name'] = 'libs/logging/lib_logging_errors.php'; - $this->included_files[] = $object; + $global_includes = array( + 'libs/security/lib_security_input.php', + 'libs/get/lib_get_portal.php', + 'libs/logging/lib_logging_errors.php', + ); + foreach ($global_includes as $global_include) { + $object['name'] = $global_include; + $this->included_files[] = $object; + } } break; case PHPPARSER_FUNCTION_CALL: diff --git a/modules/scanner_variables.php b/modules/scanner_variables.php new file mode 100644 index 0000000..7bc0f49 --- /dev/null +++ b/modules/scanner_variables.php @@ -0,0 +1,36 @@ +ScannerModule(); + } + function parserCallback( $object ) { + $pattern = '/\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/'; + $matches = array(); + $scope = "{$object['in_class']}::{$object['in_function']}"; + if (!isset($this->assigned_variables[$scope] ) ) + $this->assigned_variables[$scope] = array(); + if ($object['type'] == PHPPARSER_ASSIGNMENT) { + //$this->fault($object, 0, "Assignment: {$object['name']}"); + list($variable, $value) = explode('=', $object['name']); + $this->assigned_variables[$scope][] = $variable; + } + if ( + $object['type'] == PHPPARSER_VARIABLE + // Cannot yet accurately scan the global scope, so functions only + && !empty($object['in_function']) + && !in_array($object['name'], $this->assigned_variables[$scope]) + && !in_array($object['name'], array( + // Superglobals are exempt, obviously + '$GLOBALS', '$_SERVER', '$_GET', '$_POST', '$_FILES', '$_COOKIE', '$_SESSION', '$_REQUEST', '$_ENV' + )) + ) { + $this->fault($object, FAULT_MEDIUM, "Undefined Variable: {$object['name']}"); + } + } +} + +addModule( new VariableModule() ); +?> diff --git a/parser.php b/parser.php index 486ae53..4ec0e8d 100644 --- a/parser.php +++ b/parser.php @@ -6,6 +6,7 @@ define( 'PHPPARSER_FETCH_INCLUDES', 8 ); define( 'PHPPARSER_FETCH_INTERNAL', 16 ); define( 'PHPPARSER_FETCH_CONSTRUCTS', 32 ); define( 'PHPPARSER_FETCH_EXPRESSIONS', 64 ); +define( 'PHPPARSER_FETCH_INLINE_HTML', 128 ); define( 'PHPPARSER_FETCH_ALL', 65535 ); @@ -15,6 +16,9 @@ define( 'PHPPARSER_FUNCTION_CALL', 3 ); define( 'PHPPARSER_INCLUDE', 4 ); define( 'PHPPARSER_EXPRESSION', 5 ); define( 'PHPPARSER_LANGUAGE_CONSTRUCT', 6 ); +define( 'PHPPARSER_INLINE_HTML', 7 ); +define( 'PHPPARSER_VARIABLE', 8 ); +define( 'PHPPARSER_ASSIGNMENT', 9 ); class PHPParser { @@ -46,7 +50,7 @@ class PHPParser { } function registerCallback( $function_name, $fetch_mode = PHPPARSER_FETCH_ALL ) { - if( function_exists( $function_name ) ) { + if( is_callable( $function_name ) ) { $this->callbacks[] = array( 'function' => $function_name, 'fetch' => $fetch_mode @@ -83,15 +87,19 @@ class PHPParser { $class = $classname = null; $function = $functionname = null; + $string = $last_token = null; $switch = array(); $expression = ''; + $assignment = false; $line_text = ''; $internal_functions = get_defined_functions(); $internal_functions = $internal_functions['internal']; $local_functions = array(); $local_classes = array(); + $variables = array(); $open_blocks = array( 0 ); $in_string = false; + $buffer = array(T_INLINE_HTML => ''); foreach( $tokens as $token ) { //echo ( is_string( $token ) ? 'CHAR: ' . $token : token_name( $token[0] ) . ': ' . $token[1] ) . "\n"; if( !in_array( 0, $open_blocks ) ) { @@ -105,7 +113,7 @@ class PHPParser { } if( $in_string ) { $expression .= is_string( $token ) ? $token : $token[1]; - $count = preg_match_all( '/\r?(\n|\r)/', is_string( $token ) ? $token : $token[1], $m ); + $count = preg_match_all( '/\r?(\n|\r)/', is_string( $token ) ? $token : $token[1], $m = array() ); $line += $count; continue; } @@ -155,6 +163,11 @@ class PHPParser { ) ); } break; + case ')': + $in_function_params = false; + $in_foreach_params = false; + $in_list = false; + break; default: /* Should be able to add a hook here later on to catch the use of defines, which are basically just T_STRINGs that PHP can't find anything else to @@ -163,22 +176,81 @@ class PHPParser { $string = null; break; } - if( !in_array( $token, array( '(', ')' ) ) ) { $last_token = null; } - if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) && in_array( $token, array( '{', '}', ';', '=', '?', ':' ) ) && strlen( trim( $expression ) ) > 0 ) { + if ( + (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) + && ( + (($in_function_params || $in_foreach_params) && in_array( $token, array( '{', '}', '(', ')', ';', ',', '?', ':' ) )) + || in_array( $token, array( '{', '}', ';', '=', '?', ':' ) ) + ) + ) { + $expression = trim($expression); + if ($assignment) { + // If this was a list assignment, we've got an array of variables to mark as assigned at once! + $variable = is_array($assignment) ? $assignment : array($assignment); + foreach ($variable as $var) { + $this->foundObject( array( + 'type' => PHPPARSER_ASSIGNMENT, + 'name' => "$var=$expression", + 'file' => $this->file_name, + 'context' => $lines[$line - 1], + 'line' => $line, + 'block' => $block, + 'depth' => $depth, + 'in_class' => $classname, + 'in_function' => $functionname + ) ); + } + $assignment = false; + } + elseif (strlen($expression) > 0) { + foreach($variables as $var) { + } + $this->foundObject( array( + 'type' => PHPPARSER_EXPRESSION, + 'name' => $expression, + 'file' => $this->file_name, + 'context' => $lines[$line - 1], + 'line' => $line, + 'block' => $block, + 'depth' => $depth, + 'in_class' => $classname, + 'in_function' => $functionname, + 'open_blocks' => $open_blocks + ) ); + } + $expression = ''; + $variable_declaration = false; + $assignment = ('=' == $token ? array_pop($variables) : false); + while (count($variables) > 0) { + $variable = array_pop($variables); + $this->foundObject( array( + 'type' => PHPPARSER_VARIABLE, + 'name' => $variable, + 'file' => $this->file_name, + 'context' => $lines[$line - 1], + 'line' => $line, + 'block' => $block, + 'depth' => $depth, + 'in_class' => $classname, + 'in_function' => $functionname + ) ); + } + } else { $expression .= $token; } + if( !empty($buffer[T_INLINE_HTML]) && (bool)($this->fetch_mode & PHPPARSER_FETCH_INLINE_HTML) ) { $this->foundObject( array( - 'type' => PHPPARSER_EXPRESSION, - 'name' => trim( $expression ), + 'type' => PHPPARSER_INLINE_HTML, + 'name' => $buffer[T_INLINE_HTML], 'file' => $this->file_name, 'context' => $lines[$line - 1], 'line' => $line, 'block' => $block, 'depth' => $depth, 'in_class' => $classname, - 'in_function' => $functionname, - 'open_blocks' => $open_blocks + 'in_function' => $functionname ) ); - $expression = ''; - } else { $expression .= $token; } + $buffer[T_INLINE_HTML] = ''; + if( !in_array( $token, array( '(', ')' ) ) ) { $last_token = null; } + } } else { list($id, $text) = $token; switch( $id ) { @@ -209,6 +281,7 @@ class PHPParser { $class = array( 'name' => $text, 'block' => $block ); break; case T_FUNCTION: + $variable_declaration = false; if( (bool)($this->fetch_mode & PHPPARSER_FETCH_FUNCTIONS) && empty( $classname ) ) { // Not interested in member function definitions $this->foundObject( array( 'type' => PHPPARSER_FUNCTION_DEF, @@ -223,6 +296,7 @@ class PHPParser { ) ); } $function = array( 'name' => $text, 'block' => $block ); + $in_function_params = true; break; } break; @@ -248,6 +322,45 @@ class PHPParser { break; } break; + case T_GLOBAL: + case T_VAR: + case T_PRIVATE: + case T_PUBLIC: + case T_PROTECTED: + case T_STATIC: + $variable_declaration = true; + break; + case T_LIST: + $in_list = true; + $variables[] = array(); + break; + case T_VARIABLE: + if ($in_list) { + $variables[count($variables) - 1][] = $text; + } else { + $variables[] = $text; + } + if( (bool)($this->fetch_mode & PHPPARSER_FETCH_EXPRESSIONS) ) { + if ( + $variable_declaration + || in_array($last_token, array(T_AS)) + || ($in_foreach_params && T_DOUBLE_ARROW == $last_token) + || $in_function_params + ) { + $this->foundObject( array( + 'type' => PHPPARSER_ASSIGNMENT, + 'name' => "$text=$text", + 'file' => $this->file_name, + 'context' => $lines[$line - 1], + 'line' => $line, + 'block' => $block, + 'depth' => $depth, + 'in_class' => $classname, + 'in_function' => $functionname + ) ); + } + } + break; ///* case T_SWITCH: $block_count++; @@ -265,6 +378,14 @@ class PHPParser { array_push( $open_blocks, $block ); $depth = count( $open_blocks ) - 1; break; + case T_AS: + $in_foreach_params = true; + break; + case T_INLINE_HTML: + // NOTE: There seems to be a string limit of around 400 characters, which is very easy to reach with this token + // We'll get around it by combining adjacent token results + $buffer[T_INLINE_HTML] .= $text; + break; //*/ } if( !in_array( $id, array( T_WHITESPACE, /*T_COMMENT, T_DOC_COMMENT,*/ T_STRING ) ) ) { $string = null; $last_token = $id; } @@ -355,6 +476,20 @@ class PHPParser { ) ); $expression = ''; } + if( $id != T_INLINE_HTML && !empty($buffer[T_INLINE_HTML]) && (bool)($this->fetch_mode & PHPPARSER_FETCH_INLINE_HTML) ) { + $this->foundObject( array( + 'type' => PHPPARSER_INLINE_HTML, + 'name' => $buffer[T_INLINE_HTML], + 'file' => $this->file_name, + 'context' => $lines[$line - 1], + 'line' => $line, + 'block' => $block, + 'depth' => $depth, + 'in_class' => $classname, + 'in_function' => $functionname + ) ); + $buffer[T_INLINE_HTML] = ''; + } $count = preg_match_all( '/\r?(\n|\r)/', $text, $m ); $line += $count; } diff --git a/scanner.php b/scanner.php index b723c89..4f3bace 100644 --- a/scanner.php +++ b/scanner.php @@ -190,7 +190,7 @@ for( $i = 1; $i < $argc; $i++ ) { break; case '-m': case '--module': - $config['modules'][] = $argv[++$i]; + $config['modules'] = array_merge($config['modules'], explode(',', $argv[++$i])); break; case '-o': case '--output': @@ -265,8 +265,9 @@ for( $i = 1; $i < $argc; $i++ ) { } } } - } +if (empty($config['modules'])) $config['modules'] = array('lint', 'functions', 'pattern'); + if( count( $files ) == 0 ) { if( count( $revisions ) > 0 ) { die( "Revisions invalid or contained no files from the supplied path\n" ); @@ -341,6 +342,7 @@ foreach( $files as $file ) { foreach( $modules['scanner'] as $module ) { $module->preScan( $file ); } $file_contents = ( $file['revision'] > 0 ? shell_exec( "svn cat -r {$rev} {$svn_root}{$svn_base}/{$file['filename']} 2>/dev/null" ) : file_get_contents( $file['filename'] ) ); $parser->parse( $file_contents ); + foreach( $modules['scanner'] as $module ) { $module->postScan( $file ); } if( $curses ) { ncurses_reset_prog_mode(); $nc_faults->title( sprintf( 'Faults [%d found]', count( $faults ) ) ); @@ -436,7 +438,9 @@ if( $curses ) { ncurses_end(); } else { $modules['output']->write( $config['output_file'] ); - sleep( 1 ); - err( sprintf( "Found %d faults in %d files.\n", count( $faults ), count( $files ) ) ); + if (false === $config['quiet']) { + sleep( 1 ); + err( sprintf( "Found %d faults in %d files.\n", count( $faults ), count( $files ) ) ); + } } ?> diff --git a/test.php b/test.php index 2e87c85..b96182a 100644 --- a/test.php +++ b/test.php @@ -23,11 +23,24 @@ $filters = array( 'pattern' => '/(? $filter ) { if( $object['type'] == $filter['type'] ) { if( preg_match( $filter['pattern'], $object['name'] ) > 0 ) { echo "fn: Triggered Filter '{$filter['desc']}' at line {$object['line']}\n"; diff --git a/tests/AllTests.php b/tests/AllTests.php new file mode 100644 index 0000000..9024ba5 --- /dev/null +++ b/tests/AllTests.php @@ -0,0 +1,22 @@ +addTestSuite('ParserTests'); + $suite->addTestSuite('ScannerTests'); + + return $suite; + } +} +?> diff --git a/tests/ParserTests.php b/tests/ParserTests.php new file mode 100644 index 0000000..cdaa7d4 --- /dev/null +++ b/tests/ParserTests.php @@ -0,0 +1,138 @@ +objects = array(); + } + public function callback($object) { + $this->objects[] = $object; + } + private function parse_and_count_type($code, $type, $fetch_mode = PHPPARSER_FETCH_ALL) { + $this->objects = array(); + $this->parser = new PHPParser($fetch_mode); + $this->parser->registerCallback(array($this, 'callback')); + $this->parser->parse($code); + $count = 0; + foreach ($this->objects as $o) { + if ($type == $o['type']) $count++; + } + return $count; + } + public function testFetchClassDefinitions() { + $code = ''; + $this->assertEquals(3, $this->parse_and_count_type($code, PHPPARSER_CLASS_DEF, PHPPARSER_FETCH_CLASSES)); + } + public function testFetchFunctionDefinitions() { + $code = ''; + $this->assertEquals(2, $this->parse_and_count_type($code, PHPPARSER_FUNCTION_DEF, PHPPARSER_FETCH_FUNCTIONS)); + } + + public function testFetchMethodDefinitions() { + $code = ''; + $this->assertEquals(3, $this->parse_and_count_type($code, PHPPARSER_FUNCTION_DEF, PHPPARSER_FETCH_FUNCTIONS)); + } + + public function testFetchFunctionCalls() { + $code = ''; + $this->assertEquals(3, $this->parse_and_count_type($code, PHPPARSER_FUNCTION_CALL, PHPPARSER_FETCH_CALLS)); + } + + public function testFetchIncludes() { + $code = ''; + $this->assertEquals(4, $this->parse_and_count_type($code, PHPPARSER_INCLUDE, PHPPARSER_FETCH_INCLUDES)); + } + + public function testFetchConstructs() { + $code = ''; + $this->assertEquals(3, $this->parse_and_count_type($code, PHPPARSER_LANGUAGE_CONSTRUCT, PHPPARSER_FETCH_CONSTRUCTS)); + } + + public function testFetchAssignments() { + /* + Function arguments count as an assignment (as they are assigned when the function is called) + Variables that are declared are considered assigned, even if no value is actually set. + List assignments are counted once for each item in the list + The final line here tests nested expressions, which should be solved from the inside out. + */ + $code = ''; + $this->parse_and_count_type($code, PHPPARSER_ASSIGNMENT, PHPPARSER_FETCH_EXPRESSIONS); + $assignments = array(); + foreach ($this->objects as $object) { + if (PHPPARSER_ASSIGNMENT == $object['type']) + $assignments[] = $object['name']; + } + $expected = array( + '$array=$array', + '$dbconn=$dbconn', + '$options=$options', + '$foo="bar"', + '$a=$array', + '$b=$array', + '$c=$array', + '$hash["answer"]=42', + '$data[$index[0]]="complex"', + '$index=0', + '$data[$index = 0]="nested"', + ); + $this->assertEquals($expected, $assignments); + } + + public function testCommentedCode() { + $code = ''; + $this->assertEquals(0, $this->parse_and_count_type($code, PHPPARSER_VARIABLE, PHPPARSER_FETCH_EXPRESSIONS)); + } +} +?> diff --git a/tests/ScannerTests.php b/tests/ScannerTests.php new file mode 100644 index 0000000..9d0b34f --- /dev/null +++ b/tests/ScannerTests.php @@ -0,0 +1,34 @@ + 0) + $command .= ' -m ' . implode(',', $modules); + if ($base) + $command .= ' -b ' . $base; + $command .= ' ' . implode(' ', $files); + return shell_exec($command); + } + + public function testModuleLint() { + $result = $this->run_scanner( + array('modules' => 'lint'), + 'samples/lint_failure.php' + ); + $this->assertEquals(file_get_contents('samples/lint_failure.txt'), $result); + } + public function testModulePatterns() { + $result = $this->run_scanner( + array('modules' => 'pattern'), + 'samples/patterns.php' + ); + $this->assertEquals(file_get_contents('samples/patterns.txt'), $result); + } +} +?> diff --git a/tests/samples/lint_failure.php b/tests/samples/lint_failure.php new file mode 100644 index 0000000..be8efcb --- /dev/null +++ b/tests/samples/lint_failure.php @@ -0,0 +1,9 @@ + diff --git a/tests/samples/lint_failure.txt b/tests/samples/lint_failure.txt new file mode 100644 index 0000000..a413412 --- /dev/null +++ b/tests/samples/lint_failure.txt @@ -0,0 +1 @@ +LintModule 2 lint_failure.php 8 ? 0 Parse error: syntax error, unexpected '}' in samples/lint_failure.php on line 8 diff --git a/tests/samples/patterns.php b/tests/samples/patterns.php new file mode 100644 index 0000000..8099e17 --- /dev/null +++ b/tests/samples/patterns.php @@ -0,0 +1,9 @@ + diff --git a/tests/samples/patterns.txt b/tests/samples/patterns.txt new file mode 100644 index 0000000..3636a6f --- /dev/null +++ b/tests/samples/patterns.txt @@ -0,0 +1,5 @@ +PatternModule 1 patterns.php 3 ? 0 Triggered Filter 'Echoing Sql' +PatternModule 0 patterns.php 4 ? 0 Triggered Filter 'Developer Email' +PatternModule 1 patterns.php 5 ? 0 Triggered Filter 'Evil Eval' +PatternModule 1 patterns.php 6 ? 0 Triggered Filter 'PRINT_R or VAR_DUMP' +PatternModule 1 patterns.php 7 ? 0 Triggered Filter 'PRINT_R or VAR_DUMP'