added simple checker for array sizes, uses clang to parse C/C++,
authorCampbell Barton <ideasman42@gmail.com>
Sat, 6 Oct 2012 12:36:21 +0000 (12:36 +0000)
committerCampbell Barton <ideasman42@gmail.com>
Sat, 6 Oct 2012 12:36:21 +0000 (12:36 +0000)
Warns if an array is passed to a function where the array is declared larger, eg float[2] argument is passed function defined as float[3], (or a greater size).

Existing free static checkers dont do this from what I can tell.

GNUmakefile
build_files/cmake/clang_array_check.py [new file with mode: 0644]
build_files/cmake/cmake_static_check_clang_array.py [new file with mode: 0644]

index 7b333a5..90d76df 100644 (file)
@@ -171,6 +171,7 @@ help:
        @echo ""
        @echo "Static Source Code Checking (not associated with building blender)"
        @echo "  * check_cppcheck    - run blender source through cppcheck (C & C++)"
+       @echo "  * check_clang_array - run blender source through clang array checking script (C & C++)"
        @echo "  * check_splint      - run blenders source through splint (C only)"
        @echo "  * check_sparse      - run blenders source through sparse (C only)"
        @echo "  * check_smatch      - run blenders source through smatch (C only)"
@@ -244,6 +245,10 @@ check_cppcheck:
        $(CMAKE_CONFIG)
        cd $(BUILD_DIR) ; python3.2 $(BLENDER_DIR)/build_files/cmake/cmake_static_check_cppcheck.py
 
+check_clang_array:
+       $(CMAKE_CONFIG)
+       cd $(BUILD_DIR) ; python3.2 $(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang_array.py
+
 check_splint:
        $(CMAKE_CONFIG)
        cd $(BUILD_DIR) ; python3.2 $(BLENDER_DIR)/build_files/cmake/cmake_static_check_splint.py
diff --git a/build_files/cmake/clang_array_check.py b/build_files/cmake/clang_array_check.py
new file mode 100644 (file)
index 0000000..df45648
--- /dev/null
@@ -0,0 +1,337 @@
+# ---
+# * Licensed under the Apache License, Version 2.0 (the "License");
+# * you may not use this file except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# ---
+# by Campbell Barton
+
+"""
+Invocation:
+
+   export CLANG_BIND_DIR="/dsk/src/llvm/tools/clang/bindings/python"
+   export CLANG_LIB_DIR="/opt/llvm/lib"
+
+   python2 clang_array_check.py somefile.c -DSOME_DEFINE -I/some/include
+
+... defines and includes are optional
+
+"""
+
+# -----------------------------------------------------------------------------
+# predefined function/arg sizes, handy sometimes, but not complete...
+
+defs_precalc = {
+    "glColor3bv":  {0: 3},
+    "glColor4bv":  {0: 4},
+
+    "glColor3ubv": {0: 3},
+    "glColor4ubv": {0: 4},
+
+    "glColor4usv": {0: 3},
+    "glColor4usv": {0: 4},
+
+    "glColor3fv":  {0: 3},
+    "glColor4fv":  {0: 4},
+
+    "glColor3dv":  {0: 3},
+    "glColor4dv":  {0: 4},
+    
+    "glVertex2fv": {0: 2},
+    "glVertex3fv": {0: 3},
+    "glVertex4fv": {0: 4},
+
+    "glEvalCoord1fv": {0: 1},
+    "glEvalCoord1dv": {0: 1},
+    "glEvalCoord2fv": {0: 2},
+    "glEvalCoord2dv": {0: 2},
+    
+    "glRasterPos2dv": {0: 2},
+    "glRasterPos3dv": {0: 3},
+    "glRasterPos4dv": {0: 4},
+    
+    "glRasterPos2fv": {0: 2},
+    "glRasterPos3fv": {0: 3},    
+    "glRasterPos4fv": {0: 4},
+    
+    "glRasterPos2sv": {0: 2},
+    "glRasterPos3sv": {0: 3},    
+    "glRasterPos4sv": {0: 4},   
+    
+    "glTexCoord2fv": {0: 2},
+    "glTexCoord3fv": {0: 3},
+    "glTexCoord4fv": {0: 4},
+    
+    "glTexCoord2dv": {0: 2},
+    "glTexCoord3dv": {0: 3},
+    "glTexCoord4dv": {0: 4},
+    
+    "glNormal3fv": {0: 3},
+    "glNormal3dv": {0: 3},
+    "glNormal3bv": {0: 3},
+    "glNormal3iv": {0: 3},
+    "glNormal3sv": {0: 3},
+}
+
+# -----------------------------------------------------------------------------
+
+import sys
+
+if 0:
+    # Examples with LLVM as the root dir: '/dsk/src/llvm'
+    
+    # path containing 'clang/__init__.py'
+    CLANG_BIND_DIR = "/dsk/src/llvm/tools/clang/bindings/python"
+    
+    # path containing libclang.so
+    CLANG_LIB_DIR = "/opt/llvm/lib"
+else:
+    import os
+    CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR")
+    CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR")
+    
+    if CLANG_BIND_DIR is None:
+        print("$CLANG_BIND_DIR python binding dir not set")
+    if CLANG_LIB_DIR is None:
+        print("$CLANG_LIB_DIR clang lib dir not set")
+
+sys.path.append(CLANG_BIND_DIR)
+
+import clang
+import clang.cindex
+from clang.cindex import (CursorKind,
+                          TypeKind,
+                          TokenKind)
+
+clang.cindex.Config.set_library_path(CLANG_LIB_DIR)
+
+index = clang.cindex.Index.create()
+
+args = sys.argv[2:]
+# print(args)
+
+tu = index.parse(sys.argv[1], args)
+print 'Translation unit:', tu.spelling
+
+# -----------------------------------------------------------------------------
+
+def function_parm_wash_tokens(parm):
+    # print(parm.kind)
+    assert parm.kind in (CursorKind.PARM_DECL,
+                         CursorKind.VAR_DECL,  # XXX, double check this
+                         CursorKind.FIELD_DECL,
+                         )
+    
+    """
+    Return tolens without trailing commads and 'const'
+    """
+
+    tokens = [t for t in parm.get_tokens()]
+    if not tokens:
+        return tokens
+    
+    #if tokens[-1].kind == To
+    # remove trailing char
+    if tokens[-1].kind == TokenKind.PUNCTUATION:
+        if tokens[-1].spelling in (",", ")", ";"):
+            tokens.pop()
+        #else:
+        #    print(tokens[-1].spelling)
+
+    t_new = []
+    for t in tokens:
+        t_kind = t.kind
+        t_spelling = t.spelling
+        ok = True
+        if t_kind == TokenKind.KEYWORD:
+            if t_spelling in ("const", "restrict", "volatile"):
+                ok = False
+            elif t_spelling.startswith("__"):
+                ok = False  # __restrict
+        elif t_kind in (TokenKind.COMMENT, ):
+            ok = False
+            
+            # Use these
+        elif t_kind in (TokenKind.LITERAL,
+                        TokenKind.PUNCTUATION,
+                        TokenKind.IDENTIFIER):
+            # use but ignore
+            pass
+        
+        else:
+            print("Unknown!", t_kind, t_spelling)
+        
+        # if its OK we will add
+        if ok:
+            t_new.append(t)
+    return t_new
+
+
+def parm_size(node_child):
+    tokens = function_parm_wash_tokens(node_child)
+    
+    # print(" ".join([t.spelling for t in tokens]))
+    
+    # NOT PERFECT CODE, EXTRACT SIZE FROM TOKENS
+    if len(tokens) >= 3: # foo [ 1 ]
+        if ((tokens[-3].kind == TokenKind.PUNCTUATION and tokens[-3].spelling == "[") and
+            (tokens[-2].kind == TokenKind.LITERAL     and tokens[-2].spelling.isdigit()) and
+            (tokens[-1].kind == TokenKind.PUNCTUATION and tokens[-1].spelling == "]")):
+            # ---
+            return int(tokens[-2].spelling)
+    return -1
+
+
+
+def function_get_arg_sizes(node):
+    # Return a dict if (index: size) items
+    # {arg_indx: arg_array_size, ... ]
+    arg_sizes = {}
+
+    if node.spelling == "BM_vert_create" or 1:
+        node_parms = [node_child for node_child in node.get_children()
+                      if node_child.kind == CursorKind.PARM_DECL]
+
+        for i, node_child in enumerate(node_parms):
+
+            # print(node_child.kind, node_child.spelling)
+            #print(node_child.type.kind, node_child.spelling)  # TypeKind.POINTER
+            
+            if node_child.type.kind == TypeKind.POINTER:
+                pointee = node_child.type.get_pointee()
+                if pointee.is_pod():
+                    size = parm_size(node_child)
+                    if size != -1:
+                        arg_sizes[i] = size
+
+    return arg_sizes
+
+
+# -----------------------------------------------------------------------------
+_defs = {}
+
+def lookup_function_size_def(func_id):
+    return _defs.get(func_id, ())
+
+# -----------------------------------------------------------------------------
+
+def file_check_arg_sizes(tu):
+    
+    # main checking function
+    def validate_arg_size(node):
+        """
+        Loop over args and validate sizes for args we KNOW the size of.
+        """
+        assert node.kind == CursorKind.CALL_EXPR
+        # print("---", " <~> ".join([" ".join([t.spelling for t in C.get_tokens()]) for C in node.get_children()]))
+        # print(node.location)
+        
+        # first child is the function call, skip that.
+        children = list(node.get_children())
+
+        if not children:
+            return  # XXX, look into this, happens on C++
+        
+        func = children[0]
+        
+        # get the func declaration!
+        # works but we can better scan for functions ahead of time.
+        if 0:
+            func_dec = func.get_definition()
+            if func_dec:
+                print("FD", " ".join([t.spelling for t in func_dec.get_tokens()]))
+            else:
+                # HRMP'f - why does this fail?
+                print("AA", " ".join([t.spelling for t in node.get_tokens()]))
+        else:
+            args_size_definition = ()  # dummy
+            
+            # get the key
+            tok = list(func.get_tokens())
+            if tok:
+                func_id = tok[0].spelling
+                args_size_definition = lookup_function_size_def(func_id)
+        
+        if not args_size_definition:
+            return
+
+        children = children[1:]
+        for i, node_child in enumerate(children):
+            children = list(node_child.get_children())
+            
+            # skip if we dont have an index...
+            size_def = args_size_definition.get(i, -1)
+
+            if size_def == -1:
+                continue
+            
+            #print([c.kind for c in children])
+            # print(" ".join([t.spelling for t in node_child.get_tokens()]))
+            
+            if len(children) == 1:
+                arg = children[0]
+                if arg.kind in (CursorKind.DECL_REF_EXPR,
+                                CursorKind.UNEXPOSED_EXPR):
+
+                    if arg.type.kind == TypeKind.POINTER:
+                        dec = arg.get_definition()
+                        if dec:
+                            size = parm_size(dec)
+                            
+                            # size == 0 is for 'float *a'
+                            if size != -1 and size != 0:
+                                
+                                # nice print!
+                                '''
+                                print("".join([t.spelling for t in func.get_tokens()]),
+                                      i,
+                                      " ".join([t.spelling for t in dec.get_tokens()]))
+                                '''
+
+                                # testing
+                                # size_def = 100
+
+                                if size < size_def:
+                                    location = node.location
+                                    print("%s:%d:%d: argument %d is size %d, should be %d" %
+                                          (location.file,
+                                           location.line,
+                                            location.column,
+                                            i + 1, size, size_def
+                                            ))
+
+
+    # we dont really care what we are looking at, just scan entire file for
+    # function calls.
+    
+    def recursive_func_call_check(node):
+        
+        if node.kind == CursorKind.CALL_EXPR:
+            validate_arg_size(node)
+        
+        for c in node.get_children():
+            recursive_func_call_check(c)
+    
+    recursive_func_call_check(tu.cursor)
+
+
+# -- first pass, cache function definitions sizes
+
+# PRINT FUNC DEFINES
+def recursive_arg_sizes(node, ):
+    # print(node.kind, node.spelling)
+    if node.kind == CursorKind.FUNCTION_DECL:
+        args_sizes = function_get_arg_sizes(node)
+        #if args_sizes:
+        #    print(node.spelling, args_sizes)
+        _defs[node.spelling] = args_sizes
+        # print("adding", node.spelling)
+    for c in node.get_children():
+        recursive_arg_sizes(c)
+# cache function sizes
+recursive_arg_sizes(tu.cursor)
+_defs.update(defs_precalc)
+
+# --- second pass, check against def's
+file_check_arg_sizes(tu)
diff --git a/build_files/cmake/cmake_static_check_clang_array.py b/build_files/cmake/cmake_static_check_clang_array.py
new file mode 100644 (file)
index 0000000..ff15a13
--- /dev/null
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3.2
+
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# Contributor(s): Campbell Barton
+#
+# ***** END GPL LICENSE BLOCK *****
+
+# <pep8 compliant>
+
+import project_source_info
+import subprocess
+import sys
+import os
+
+CHECKER_IGNORE_PREFIX = [
+    "extern",
+    "intern/moto",
+    ]
+
+CHECKER_BIN = "python2"
+
+CHECKER_ARGS = [
+    os.path.join(os.path.dirname(__file__), "clang_array_check.py"),
+    # not sure why this is needed, but it is.
+    "-I" + os.path.join(project_source_info.SOURCE_DIR, "extern", "glew", "include"),
+    ]
+
+
+def main():
+    source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
+
+    check_commands = []
+    for c, inc_dirs, defs in source_info:
+        cmd = ([CHECKER_BIN] +
+                CHECKER_ARGS +
+               [c] +
+               [("-I%s" % i) for i in inc_dirs] +
+               [("-D%s" % d) for d in defs]
+              )
+
+        check_commands.append((c, cmd))
+
+    process_functions = []
+
+    def my_process(i, c, cmd):
+        percent = 100.0 * (i / (len(check_commands) - 1))
+        percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
+
+        sys.stdout.flush()
+        sys.stdout.write("%s " % percent_str)
+
+        return subprocess.Popen(cmd)
+
+    for i, (c, cmd) in enumerate(check_commands):
+        process_functions.append((my_process, (i, c, cmd)))
+
+    project_source_info.queue_processes(process_functions)
+
+
+if __name__ == "__main__":
+    main()