aa10b71783c29488b91d017261f7f05cd0db8bf4
[blender.git] / release / scripts / modules / bl_i18n_utils / utils_spell_check.py
1 # ##### BEGIN GPL LICENSE BLOCK #####
2 #
3 #  This program is free software; you can redistribute it and/or
4 #  modify it under the terms of the GNU General Public License
5 #  as published by the Free Software Foundation; either version 2
6 #  of the License, or (at your option) any later version.
7 #
8 #  This program is distributed in the hope that it will be useful,
9 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 #  GNU General Public License for more details.
12 #
13 #  You should have received a copy of the GNU General Public License
14 #  along with this program; if not, write to the Free Software Foundation,
15 #  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 #
17 # ##### END GPL LICENSE BLOCK #####
18
19 # <pep8 compliant>
20
21 import enchant
22 import os
23 import pickle
24 import re
25
26
27 class SpellChecker():
28     """
29     A basic spell checker.
30     """
31
32     # These must be all lower case for comparisons
33     uimsgs = {
34         # OK words
35         "aren",  # aren't
36         "betweens",  # yuck! in-betweens!
37         "boolean", "booleans",
38         "couldn",  # couldn't
39         "decrement",
40         "derivate",
41         "doesn",  # doesn't
42         "equi",  # equi-angular, etc.
43         "fader",
44         "globbing",
45         "hasn",  # hasn't
46         "hoc",  # ad-hoc
47         "indices",
48         "iridas",
49         "isn",  # isn't
50         "iterable",
51         "kyrgyz",
52         "latin",
53         "merchantability",
54         "mplayer",
55         "teleport", "teleporting",
56         "vertices",
57
58         # Merged words
59         "addon", "addons",
60         "antialiasing",
61         "arcsine", "arccosine", "arctangent",
62         "autoclip",
63         "autocomplete",
64         "autoexec",
65         "autoexecution",
66         "autoname",
67         "autopack",
68         "autosave",
69         "autoscale",
70         "autosmooth",
71         "autosplit",
72         "backface", "backfacing",
73         "backimage",
74         "backscattered",
75         "bandnoise",
76         "bindcode",
77         "bitflag", "bitflags",
78         "bitrate",
79         "blackbody",
80         "blendfile",
81         "blendin",
82         "bonesize",
83         "boundbox",
84         "boxpack",
85         "buffersize",
86         "builtin", "builtins",
87         "bytecode",
88         "chunksize",
89         "customdata",
90         "dataset", "datasets",
91         "de",
92         "defocus",
93         "denoise",
94         "despill", "despilling",
95         "editcurve",
96         "editmesh",
97         "filebrowser",
98         "filelist",
99         "filename", "filenames",
100         "filepath", "filepaths",
101         "forcefield", "forcefields",
102         "fulldome", "fulldomes",
103         "fullscreen",
104         "gridline",
105         "hemi",
106         "inbetween",
107         "inscatter", "inscattering",
108         "libdata",
109         "lightless",
110         "lineset",
111         "linestyle",
112         "localview",
113         "lookup", "lookups",
114         "mathutils",
115         "midlevel",
116         "midground",
117         "mixdown",
118         "multi",
119         "multifractal",
120         "multires", "multiresolution",
121         "multisampling",
122         "multitexture",
123         "multiuser",
124         "namespace",
125         "keyconfig",
126         "playhead",
127         "polyline",
128         "popup", "popups",
129         "pre",
130         "precache", "precaching",
131         "precalculate",
132         "prefetch",
133         "premultiply", "premultiplied",
134         "prepass",
135         "prepend",
136         "preprocess", "preprocessing",
137         "preseek",
138         "promillage",
139         "pushdown",
140         "raytree",
141         "readonly",
142         "realtime",
143         "rekey",
144         "remesh",
145         "reprojection",
146         "resize",
147         "restpose",
148         "retarget", "retargets", "retargeting", "retargeted",
149         "rigidbody",
150         "ringnoise",
151         "rolloff",
152         "runtime",
153         "screencast", "screenshot", "screenshots",
154         "selfcollision",
155         "shadowbuffer", "shadowbuffers",
156         "singletexture",
157         "spellcheck", "spellchecking",
158         "startup",
159         "stateful",
160         "starfield",
161         "subflare", "subflares",
162         "subframe", "subframes",
163         "subclass", "subclasses", "subclassing",
164         "subdirectory", "subdirectories", "subdir", "subdirs",
165         "submodule", "submodules",
166         "subpath",
167         "subsize",
168         "substep", "substeps",
169         "targetless",
170         "textbox", "textboxes",
171         "tilemode",
172         "timestamp", "timestamps",
173         "timestep", "timesteps",
174         "todo",
175         "un",
176         "unbake",
177         "uncomment",
178         "unculled",
179         "undeformed",
180         "undistort", "undistortion",
181         "ungroup", "ungrouped",
182         "unhide",
183         "unindent",
184         "unkeyed",
185         "unpremultiply",
186         "unprojected",
187         "unreacted",
188         "unregister",
189         "unselected", "unselectable",
190         "unsubdivided", "unsubdivide",
191         "unshadowed",
192         "unspill",
193         "unstitchable",
194         "vectorscope",
195         "whitespace", "whitespaces",
196         "worldspace",
197         "workflow",
198
199         # Neologisms, slangs
200         "affectable",
201         "animatable",
202         "automagic", "automagically",
203         "blobby",
204         "blockiness", "blocky",
205         "collider", "colliders",
206         "deformer", "deformers",
207         "determinator",
208         "editability",
209         "keyer",
210         "lacunarity",
211         "numerics",
212         "occluder", "occluders",
213         "passepartout",
214         "perspectively",
215         "pixelate",
216         "polygonization", "polygonalization",  # yuck!
217         "selectability",
218         "slurph",
219         "stitchable",
220         "symmetrize",
221         "trackability",
222         "transmissivity",
223         "rasterized", "rasterization", "rasterizer",
224         "renderer", "renderable", "renderability",
225
226         # Abbreviations
227         "aero",
228         "amb",
229         "anim",
230         "bool",
231         "calc",
232         "config", "configs",
233         "const",
234         "coord", "coords",
235         "degr",
236         "dof",
237         "dupli", "duplis",
238         "eg",
239         "esc",
240         "expr",
241         "fac",
242         "fra",
243         "frs",
244         "grless",
245         "http",
246         "init",
247         "kbit", "kb",
248         "lang", "langs",
249         "lclick", "rclick",
250         "lensdist",
251         "loc", "rot", "pos",
252         "lorem",
253         "luma",
254         "mem",
255         "multicam",
256         "num",
257         "ok",
258         "orco",
259         "ortho",
260         "persp",
261         "pref", "prefs",
262         "prev",
263         "param",
264         "premul",
265         "quad", "quads",
266         "quat", "quats",
267         "recalc", "recalcs",
268         "refl",
269         "sce",
270         "sel",
271         "spec",
272         "struct", "structs",
273         "sys",
274         "tex",
275         "tri", "tris",
276         "uv", "uvs", "uvw", "uw", "uvmap",
277         "ve",
278         "vec",
279         "vel",  # velocity!
280         "vert", "verts",
281         "vis",
282         "xyz", "xzy", "yxz", "yzx", "zxy", "zyx",
283         "xy", "xz", "yx", "yz", "zx", "zy",
284
285         # General computer/science terms
286         "bitangent",
287         "boid", "boids",
288         "equisolid",
289         "euler", "eulers",
290         "fribidi",
291         "gettext",
292         "hashable",
293         "intrinsics",
294         "isosurface",
295         "jitter", "jittering", "jittered",
296         "keymap", "keymaps",
297         "lambertian",
298         "laplacian",
299         "metadata",
300         "msgfmt",
301         "nand", "xnor",
302         "normals",
303         "numpad",
304         "octree",
305         "opengl",
306         "openmp",
307         "polyline", "polylines",
308         "pulldown", "pulldowns",
309         "quantized",
310         "samplerate",
311         "scrollback",
312         "scrollbar",
313         "scroller",
314         "searchable",
315         "spacebar",
316         "tooltip", "tooltips",
317         "trackpad",
318         "tuple",
319         "unicode",
320         "viewport", "viewports",
321         "viscoelastic",
322         "wildcard", "wildcards",
323
324         # General computer graphics terms
325         "anaglyph",
326         "bezier", "beziers",
327         "bicubic",
328         "bilinear",
329         "binormal",
330         "blackpoint", "whitepoint",
331         "blinn",
332         "bokeh",
333         "catadioptric",
334         "centroid",
335         "chrominance",
336         "codec", "codecs",
337         "collada",
338         "compositing",
339         "crossfade",
340         "deinterlace",
341         "dropoff",
342         "dv",
343         "eigenvectors",
344         "equirectangular",
345         "fisheye",
346         "framerate",
347         "gimbal",
348         "grayscale",
349         "icosphere",
350         "inpaint",
351         "lightmap",
352         "lossless", "lossy",
353         "matcap",
354         "midtones",
355         "mipmap", "mipmaps", "mip",
356         "ngon", "ngons",
357         "ntsc",
358         "nurb", "nurbs",
359         "perlin",
360         "phong",
361         "qi",
362         "radiosity",
363         "raycasting",
364         "raytrace", "raytracing", "raytraced",
365         "renderfarm",
366         "scanfill",
367         "shader", "shaders",
368         "specular", "specularity",
369         "spillmap",
370         "sobel",
371         "texel",
372         "tonemap",
373         "toon",
374         "timecode",
375         "voronoi",
376         "voxel", "voxels",
377         "vsync",
378         "wireframe",
379         "zmask",
380         "ztransp",
381
382         # Blender terms
383         "audaspace",
384         "bbone",
385         "breakdowner",
386         "bspline",
387         "bweight",
388         "colorband",
389         "datablock", "datablocks",
390         "despeckle",
391         "dopesheet",
392         "dupliface", "duplifaces",
393         "dupliframe", "dupliframes",
394         "dupliobject", "dupliob",
395         "dupligroup",
396         "duplivert",
397         "dyntopo",
398         "editbone",
399         "editmode",
400         "fcurve", "fcurves",
401         "fedge", "fedges",
402         "fluidsim",
403         "frameserver",
404         "freestyle",
405         "enum", "enums",
406         "gpencil",
407         "idcol",
408         "keyframe", "keyframes", "keyframing", "keyframed",
409         "metaball", "metaballs", "mball",
410         "metaelement", "metaelements",
411         "metastrip", "metastrips",
412         "movieclip",
413         "mpoly",
414         "mtex",
415         "nabla",
416         "navmesh",
417         "outliner",
418         "paintmap", "paintmaps",
419         "polygroup", "polygroups",
420         "poselib",
421         "pushpull",
422         "pyconstraint", "pyconstraints",
423         "qe",  # keys...
424         "shapekey", "shapekeys",
425         "shrinkfatten",
426         "shrinkwrap",
427         "softbody",
428         "stucci",
429         "sunsky",
430         "subsurf",
431         "tessface", "tessfaces",
432         "texface",
433         "timeline", "timelines",
434         "tosphere",
435         "uilist",
436         "vcol", "vcols",
437         "vgroup", "vgroups",
438         "vinterlace",
439         "wasd", "wasdqe",  # keys...
440         "wetmap", "wetmaps",
441         "wpaint",
442         "uvwarp",
443
444         # Algorithm names
445         "beckmann",
446         "catmull",
447         "catrom",
448         "chebychev",
449         "courant",
450         "hosek",
451         "kutta",
452         "lennard",
453         "mikktspace",
454         "minkowski",
455         "minnaert",
456         "musgrave",
457         "nayar",
458         "netravali",
459         "oren",
460         "preetham",
461         "prewitt",
462         "runge",
463         "sobol",
464         "verlet",
465         "wilkie",
466         "worley",
467
468         # Acronyms
469         "aa", "msaa",
470         "ao",
471         "api",
472         "asc", "cdl",
473         "ascii",
474         "atrac",
475         "bsdf",
476         "bssrdf",
477         "bw",
478         "ccd",
479         "cmd",
480         "cpus",
481         "ctrl",
482         "cw", "ccw",
483         "dev",
484         "djv",
485         "dpi",
486         "dvar",
487         "dx",
488         "eo",
489         "fh",
490         "fov",
491         "fft",
492         "futura",
493         "gfx",
494         "gl",
495         "glsl",
496         "gpl",
497         "gpu", "gpus",
498         "hc",
499         "hdc",
500         "hdr",
501         "hh", "mm", "ss", "ff",  # hh:mm:ss:ff timecode
502         "hsv", "hsva", "hsl",
503         "id",
504         "ior",
505         "itu",
506         "lhs",
507         "lmb", "mmb", "rmb",
508         "kb",
509         "mocap",
510         "msgid", "msgids",
511         "mux",
512         "ndof",
513         "ppc",
514         "precisa",
515         "px",
516         "qmc",
517         "rgb", "rgba",
518         "rhs",
519         "rv",
520         "sdl",
521         "sl",
522         "smpte",
523         "svn",
524         "ui",
525         "unix",
526         "vbo", "vbos",
527         "ycc", "ycca",
528         "yuv", "yuva",
529
530         # Blender acronyms
531         "bge",
532         "bli",
533         "bpy",
534         "bvh",
535         "dbvt",
536         "dop",  # BLI K-Dop BVH
537         "ik",
538         "nla",
539         "py",
540         "qbvh",
541         "rna",
542         "rvo",
543         "simd",
544         "sph",
545         "svbvh",
546
547         # Files types/formats
548         "avi",
549         "attrac",
550         "autocad",
551         "autodesk",
552         "bmp",
553         "btx",
554         "cineon",
555         "dpx",
556         "dxf",
557         "eps",
558         "exr",
559         "fbx",
560         "ffmpeg",
561         "flac",
562         "gzip",
563         "ico",
564         "jpg", "jpeg",
565         "json",
566         "matroska",
567         "mdd",
568         "mkv",
569         "mpeg", "mjpeg",
570         "mtl",
571         "ogg",
572         "openjpeg",
573         "osl",
574         "oso",
575         "piz",
576         "png",
577         "po",
578         "quicktime",
579         "rle",
580         "sgi",
581         "stl",
582         "svg",
583         "targa", "tga",
584         "tiff",
585         "theora",
586         "vorbis",
587         "wav",
588         "xiph",
589         "xml",
590         "xna",
591         "xvid",
592     }
593
594     _valid_before = "(?<=[\\s*'\"`])|(?<=[a-zA-Z][/-])|(?<=^)"
595     _valid_after = "(?=[\\s'\"`.!?,;:])|(?=[/-]\\s*[a-zA-Z])|(?=$)"
596     _valid_words = "(?:{})(?:(?:[A-Z]+[a-z]*)|[A-Z]*|[a-z]*)(?:{})".format(_valid_before, _valid_after)
597     _split_words = re.compile(_valid_words).findall
598
599     @classmethod
600     def split_words(cls, text):
601         return [w for w in cls._split_words(text) if w]
602
603     def __init__(self, settings, lang="en_US"):
604         self.settings = settings
605         self.dict_spelling = enchant.Dict(lang)
606         self.cache = set(self.uimsgs)
607
608         cache = self.settings.SPELL_CACHE
609         if cache and os.path.exists(cache):
610             with open(cache, 'rb') as f:
611                 self.cache |= set(pickle.load(f))
612
613     def __del__(self):
614         cache = self.settings.SPELL_CACHE
615         if cache and os.path.exists(cache):
616             with open(cache, 'wb') as f:
617                 pickle.dump(self.cache, f)
618
619     def check(self, txt):
620         ret = []
621
622         if txt in self.cache:
623             return ret
624
625         for w in self.split_words(txt):
626             w_lower = w.lower()
627             if w_lower in self.cache:
628                 continue
629             if not self.dict_spelling.check(w):
630                 ret.append((w, self.dict_spelling.suggest(w)))
631             else:
632                 self.cache.add(w_lower)
633
634         if not ret:
635             self.cache.add(txt)
636
637         return ret