Merged changes in the trunk up to revision 53729.
[blender.git] / build_files / cmake / clang_array_check.py
1 # ---
2 # * Licensed under the Apache License, Version 2.0 (the "License");
3 # * you may not use this file except in compliance with the License.
4 # * You may obtain a copy of the License at
5 # *
6 # * http://www.apache.org/licenses/LICENSE-2.0
7 # ---
8 # by Campbell Barton
9
10 """
11 Invocation:
12
13    export CLANG_BIND_DIR="/dsk/src/llvm/tools/clang/bindings/python"
14    export CLANG_LIB_DIR="/opt/llvm/lib"
15
16    python2 clang_array_check.py somefile.c -DSOME_DEFINE -I/some/include
17
18 ... defines and includes are optional
19
20 """
21
22 # delay parsing functions until we need them
23 USE_LAZY_INIT = True
24
25 # -----------------------------------------------------------------------------
26 # predefined function/arg sizes, handy sometimes, but not complete...
27
28 defs_precalc = {
29     "glColor3bv": {0: 3},
30     "glColor4bv": {0: 4},
31
32     "glColor3ubv": {0: 3},
33     "glColor4ubv": {0: 4},
34
35     "glColor4usv": {0: 3},
36     "glColor4usv": {0: 4},
37
38     "glColor3fv": {0: 3},
39     "glColor4fv": {0: 4},
40
41     "glColor3dv": {0: 3},
42     "glColor4dv": {0: 4},
43     
44     "glVertex2fv": {0: 2},
45     "glVertex3fv": {0: 3},
46     "glVertex4fv": {0: 4},
47
48     "glEvalCoord1fv": {0: 1},
49     "glEvalCoord1dv": {0: 1},
50     "glEvalCoord2fv": {0: 2},
51     "glEvalCoord2dv": {0: 2},
52     
53     "glRasterPos2dv": {0: 2},
54     "glRasterPos3dv": {0: 3},
55     "glRasterPos4dv": {0: 4},
56     
57     "glRasterPos2fv": {0: 2},
58     "glRasterPos3fv": {0: 3},
59     "glRasterPos4fv": {0: 4},
60     
61     "glRasterPos2sv": {0: 2},
62     "glRasterPos3sv": {0: 3},
63     "glRasterPos4sv": {0: 4},
64     
65     "glTexCoord2fv": {0: 2},
66     "glTexCoord3fv": {0: 3},
67     "glTexCoord4fv": {0: 4},
68     
69     "glTexCoord2dv": {0: 2},
70     "glTexCoord3dv": {0: 3},
71     "glTexCoord4dv": {0: 4},
72     
73     "glNormal3fv": {0: 3},
74     "glNormal3dv": {0: 3},
75     "glNormal3bv": {0: 3},
76     "glNormal3iv": {0: 3},
77     "glNormal3sv": {0: 3},
78 }
79
80 # -----------------------------------------------------------------------------
81
82 import sys
83
84 if 0:
85     # Examples with LLVM as the root dir: '/dsk/src/llvm'
86     
87     # path containing 'clang/__init__.py'
88     CLANG_BIND_DIR = "/dsk/src/llvm/tools/clang/bindings/python"
89     
90     # path containing libclang.so
91     CLANG_LIB_DIR = "/opt/llvm/lib"
92 else:
93     import os
94     CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR")
95     CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR")
96     
97     if CLANG_BIND_DIR is None:
98         print("$CLANG_BIND_DIR python binding dir not set")
99     if CLANG_LIB_DIR is None:
100         print("$CLANG_LIB_DIR clang lib dir not set")
101
102 sys.path.append(CLANG_BIND_DIR)
103
104 import clang
105 import clang.cindex
106 from clang.cindex import (CursorKind,
107                           TypeKind,
108                           TokenKind)
109
110 clang.cindex.Config.set_library_path(CLANG_LIB_DIR)
111
112 index = clang.cindex.Index.create()
113
114 args = sys.argv[2:]
115 # print(args)
116
117 tu = index.parse(sys.argv[1], args)
118 print('Translation unit: %s' % tu.spelling)
119
120 # -----------------------------------------------------------------------------
121
122
123 def function_parm_wash_tokens(parm):
124     # print(parm.kind)
125     assert parm.kind in (CursorKind.PARM_DECL,
126                          CursorKind.VAR_DECL,  # XXX, double check this
127                          CursorKind.FIELD_DECL,
128                          )
129     
130     """
131     Return tolens without trailing commads and 'const'
132     """
133
134     tokens = [t for t in parm.get_tokens()]
135     if not tokens:
136         return tokens
137     
138     #if tokens[-1].kind == To
139     # remove trailing char
140     if tokens[-1].kind == TokenKind.PUNCTUATION:
141         if tokens[-1].spelling in (",", ")", ";"):
142             tokens.pop()
143         #else:
144         #    print(tokens[-1].spelling)
145
146     t_new = []
147     for t in tokens:
148         t_kind = t.kind
149         t_spelling = t.spelling
150         ok = True
151         if t_kind == TokenKind.KEYWORD:
152             if t_spelling in ("const", "restrict", "volatile"):
153                 ok = False
154             elif t_spelling.startswith("__"):
155                 ok = False  # __restrict
156         elif t_kind in (TokenKind.COMMENT, ):
157             ok = False
158             
159             # Use these
160         elif t_kind in (TokenKind.LITERAL,
161                         TokenKind.PUNCTUATION,
162                         TokenKind.IDENTIFIER):
163             # use but ignore
164             pass
165         
166         else:
167             print("Unknown!", t_kind, t_spelling)
168         
169         # if its OK we will add
170         if ok:
171             t_new.append(t)
172     return t_new
173
174
175 def parm_size(node_child):
176     tokens = function_parm_wash_tokens(node_child)
177     
178     # print(" ".join([t.spelling for t in tokens]))
179     
180     # NOT PERFECT CODE, EXTRACT SIZE FROM TOKENS
181     if len(tokens) >= 3:  # foo [ 1 ]
182         if      ((tokens[-3].kind == TokenKind.PUNCTUATION and tokens[-3].spelling == "[") and
183                  (tokens[-2].kind == TokenKind.LITERAL and tokens[-2].spelling.isdigit()) and
184                  (tokens[-1].kind == TokenKind.PUNCTUATION and tokens[-1].spelling == "]")):
185             # ---
186             return int(tokens[-2].spelling)
187     return -1
188
189
190 def function_get_arg_sizes(node):
191     # Return a dict if (index: size) items
192     # {arg_indx: arg_array_size, ... ]
193     arg_sizes = {}
194
195     if 1:  # node.spelling == "BM_vert_create", for debugging
196         node_parms = [node_child for node_child in node.get_children()
197                       if node_child.kind == CursorKind.PARM_DECL]
198
199         for i, node_child in enumerate(node_parms):
200
201             # print(node_child.kind, node_child.spelling)
202             #print(node_child.type.kind, node_child.spelling)  # TypeKind.POINTER
203             
204             if node_child.type.kind == TypeKind.POINTER:
205                 pointee = node_child.type.get_pointee()
206                 if pointee.is_pod():
207                     size = parm_size(node_child)
208                     if size != -1:
209                         arg_sizes[i] = size
210
211     return arg_sizes
212
213
214 # -----------------------------------------------------------------------------
215 _defs = {}
216
217
218 def lookup_function_size_def(func_id):
219     if USE_LAZY_INIT:
220         result = _defs.get(func_id, {})
221         if type(result) != dict:
222             result = _defs[func_id] = function_get_arg_sizes(result)
223         return result
224     else:
225         return _defs.get(func_id, {})
226
227 # -----------------------------------------------------------------------------
228
229
230 def file_check_arg_sizes(tu):
231     
232     # main checking function
233     def validate_arg_size(node):
234         """
235         Loop over args and validate sizes for args we KNOW the size of.
236         """
237         assert node.kind == CursorKind.CALL_EXPR
238         
239         if 0:
240             print("---",
241                   " <~> ".join(
242                   [" ".join([t.spelling for t in C.get_tokens()])
243                   for C in node.get_children()]
244                   ))
245         # print(node.location)
246         
247         # first child is the function call, skip that.
248         children = list(node.get_children())
249
250         if not children:
251             return  # XXX, look into this, happens on C++
252         
253         func = children[0]
254         
255         # get the func declaration!
256         # works but we can better scan for functions ahead of time.
257         if 0:
258             func_dec = func.get_definition()
259             if func_dec:
260                 print("FD", " ".join([t.spelling for t in func_dec.get_tokens()]))
261             else:
262                 # HRMP'f - why does this fail?
263                 print("AA", " ".join([t.spelling for t in node.get_tokens()]))
264         else:
265             args_size_definition = ()  # dummy
266             
267             # get the key
268             tok = list(func.get_tokens())
269             if tok:
270                 func_id = tok[0].spelling
271                 args_size_definition = lookup_function_size_def(func_id)
272         
273         if not args_size_definition:
274             return
275
276         children = children[1:]
277         for i, node_child in enumerate(children):
278             children = list(node_child.get_children())
279             
280             # skip if we dont have an index...
281             size_def = args_size_definition.get(i, -1)
282
283             if size_def == -1:
284                 continue
285             
286             #print([c.kind for c in children])
287             # print(" ".join([t.spelling for t in node_child.get_tokens()]))
288             
289             if len(children) == 1:
290                 arg = children[0]
291                 if arg.kind in (CursorKind.DECL_REF_EXPR,
292                                 CursorKind.UNEXPOSED_EXPR):
293
294                     if arg.type.kind == TypeKind.POINTER:
295                         dec = arg.get_definition()
296                         if dec:
297                             size = parm_size(dec)
298                             
299                             # size == 0 is for 'float *a'
300                             if size != -1 and size != 0:
301                                 
302                                 # nice print!
303                                 if 0:
304                                     print("".join([t.spelling for t in func.get_tokens()]),
305                                           i,
306                                           " ".join([t.spelling for t in dec.get_tokens()]))
307
308                                 # testing
309                                 # size_def = 100
310                                 if size < size_def and size != 1:
311                                     location = node.location
312                                     print("%s:%d:%d: argument %d is size %d, should be %d" %
313                                           (location.file,
314                                            location.line,
315                                            location.column,
316                                            i + 1, size, size_def
317                                            ))
318
319     # we dont really care what we are looking at, just scan entire file for
320     # function calls.
321
322     def recursive_func_call_check(node):
323         if node.kind == CursorKind.CALL_EXPR:
324             validate_arg_size(node)
325
326         for c in node.get_children():
327             recursive_func_call_check(c)
328
329     recursive_func_call_check(tu.cursor)
330
331
332 # -- first pass, cache function definitions sizes
333
334 # PRINT FUNC DEFINES
335 def recursive_arg_sizes(node, ):
336     # print(node.kind, node.spelling)
337     if node.kind == CursorKind.FUNCTION_DECL:
338         if USE_LAZY_INIT:
339             args_sizes = node
340         else:
341             args_sizes = function_get_arg_sizes(node)
342         #if args_sizes:
343         #    print(node.spelling, args_sizes)
344         _defs[node.spelling] = args_sizes
345         # print("adding", node.spelling)
346     for c in node.get_children():
347         recursive_arg_sizes(c)
348 # cache function sizes
349 recursive_arg_sizes(tu.cursor)
350 _defs.update(defs_precalc)
351
352 # --- second pass, check against def's
353 file_check_arg_sizes(tu)