First set of UI/i18n messages fixes (mostly new GP code).
[blender.git] / release / scripts / modules / bl_i18n_utils / utils_spell_check.py
1 # ##### BEGIN GPL LICENSE BLOCK #####
2 #
3 #  This program is free software; you can redistribute it and/or
4 #  modify it under the terms of the GNU General Public License
5 #  as published by the Free Software Foundation; either version 2
6 #  of the License, or (at your option) any later version.
7 #
8 #  This program is distributed in the hope that it will be useful,
9 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 #  GNU General Public License for more details.
12 #
13 #  You should have received a copy of the GNU General Public License
14 #  along with this program; if not, write to the Free Software Foundation,
15 #  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 #
17 # ##### END GPL LICENSE BLOCK #####
18
19 # <pep8 compliant>
20
21 import enchant
22 import os
23 import pickle
24 import re
25
26
27 class SpellChecker:
28     """
29     A basic spell checker.
30     """
31
32     # These must be all lower case for comparisons
33     uimsgs = {
34         # OK words
35         "aren",  # aren't
36         "betweens",  # yuck! in-betweens!
37         "boolean", "booleans",
38         "couldn",  # couldn't
39         "decrement",
40         "derivate",
41         "doesn",  # doesn't
42         "equi",  # equi-angular, etc.
43         "fader",
44         "globbing",
45         "hasn",  # hasn't
46         "hetero",
47         "hoc",  # ad-hoc
48         "indices",
49         "iridas",
50         "isn",  # isn't
51         "iterable",
52         "kyrgyz",
53         "latin",
54         "merchantability",
55         "mplayer",
56         "pong",  # ping pong
57         "teleport", "teleporting",
58         "vertices",
59
60         # Merged words
61         "antialiasing",
62         "arcsine", "arccosine", "arctangent",
63         "autoclip",
64         "autocomplete",
65         "autoexec",
66         "autoexecution",
67         "autoname",
68         "autopack",
69         "autosave",
70         "autoscale",
71         "autosmooth",
72         "autosplit",
73         "backface", "backfacing",
74         "backimage",
75         "backscattered",
76         "bandnoise",
77         "bindcode",
78         "bitflag", "bitflags",
79         "bitrate",
80         "blackbody",
81         "blendfile",
82         "blendin",
83         "bonesize",
84         "boundbox",
85         "boxpack",
86         "buffersize",
87         "builtin", "builtins",
88         "bytecode",
89         "chunksize",
90         "customdata",
91         "dataset", "datasets",
92         "de",
93         "deadzone",
94         "deconstruct",
95         "defocus",
96         "denoise",
97         "deselect", "deselecting", "deselection",
98         "despill", "despilling",
99         "dirtree",
100         "editcurve",
101         "editmesh",
102         "filebrowser",
103         "filelist",
104         "filename", "filenames",
105         "filepath", "filepaths",
106         "forcefield", "forcefields",
107         "fulldome", "fulldomes",
108         "fullscreen",
109         "gridline",
110         "hemi",
111         "inbetween",
112         "inscatter", "inscattering",
113         "libdata",
114         "lightless",
115         "lineset",
116         "linestyle", "linestyles",
117         "localview",
118         "lookup", "lookups",
119         "mathutils",
120         "micropolygon",
121         "midlevel",
122         "midground",
123         "mixdown",
124         "multi",
125         "multifractal",
126         "multipaint",
127         "multires", "multiresolution",
128         "multisampling",
129         "multiscatter",
130         "multitexture",
131         "multithreaded", 
132         "multiuser",
133         "multiview",
134         "namespace",
135         "nodetree", "nodetrees",
136         "keyconfig",
137         "online",
138         "playhead",
139         "popup", "popups",
140         "pre",
141         "precache", "precaching",
142         "precalculate",
143         "prefetch",
144         "premultiply", "premultiplied",
145         "prepass",
146         "prepend",
147         "preprocess", "preprocessing",
148         "preseek",
149         "promillage",
150         "pushdown",
151         "raytree",
152         "readonly",
153         "realtime",
154         "rekey",
155         "remesh",
156         "reprojection",
157         "resize",
158         "restpose",
159         "retarget", "retargets", "retargeting", "retargeted",
160         "rigidbody",
161         "ringnoise",
162         "rolloff",
163         "runtime",
164         "scanline",
165         "screencast", "screenshot", "screenshots",
166         "selfcollision",
167         "shadowbuffer", "shadowbuffers",
168         "singletexture",
169         "spellcheck", "spellchecking",
170         "startup",
171         "stateful",
172         "starfield",
173         "subflare", "subflares",
174         "subframe", "subframes",
175         "subclass", "subclasses", "subclassing",
176         "subdirectory", "subdirectories", "subdir", "subdirs",
177         "submodule", "submodules",
178         "subpath",
179         "subsize",
180         "substep", "substeps",
181         "targetless",
182         "textbox", "textboxes",
183         "tilemode",
184         "timestamp", "timestamps",
185         "timestep", "timesteps",
186         "todo",
187         "un",
188         "unbake",
189         "uncomment",
190         "unculled",
191         "undeformed",
192         "undistort", "undistortion",
193         "ungroup", "ungrouped",
194         "unhide",
195         "unindent",
196         "unkeyed",
197         "unmute",
198         "unpremultiply",
199         "unprojected",
200         "unprotect",
201         "unreacted",
202         "unregister",
203         "unselected", "unselectable",
204         "unsubdivided", "unsubdivide",
205         "unshadowed",
206         "unspill",
207         "unstitchable",
208         "vectorscope",
209         "whitespace", "whitespaces",
210         "worldspace",
211         "workflow",
212
213         # Neologisms, slangs
214         "affectable",
215         "animatable",
216         "automagic", "automagically",
217         "blobby",
218         "blockiness", "blocky",
219         "collider", "colliders",
220         "deformer", "deformers",
221         "determinator",
222         "editability",
223         "keyer",
224         "lacunarity",
225         "numerics",
226         "occluder", "occluders",
227         "passepartout",
228         "perspectively",
229         "pixelate",
230         "pointiness",
231         "polycount",
232         "polygonization", "polygonalization",  # yuck!
233         "selectability",
234         "stitchable",
235         "symmetrize",
236         "trackability",
237         "transmissivity",
238         "rasterized", "rasterization", "rasterizer",
239         "renderer", "renderable", "renderability",
240
241         # Really bad!!!
242         "convertor",
243
244         # Abbreviations
245         "aero",
246         "amb",
247         "anim",
248         "app",
249         "bool",
250         "calc",
251         "config", "configs",
252         "const",
253         "coord", "coords",
254         "degr",
255         "diff",
256         "dof",
257         "dupli", "duplis",
258         "eg",
259         "esc",
260         "expr",
261         "fac",
262         "fra",
263         "frs",
264         "grless",
265         "http",
266         "init",
267         "kbit", "kb",
268         "lang", "langs",
269         "lclick", "rclick",
270         "lensdist",
271         "loc", "rot", "pos",
272         "lorem",
273         "luma",
274         "mbs",  # mouse button 'select'.
275         "mem",
276         "multicam",
277         "num",
278         "ok",
279         "orco",
280         "ortho",
281         "pano",
282         "persp",
283         "pref", "prefs",
284         "prev",
285         "param",
286         "premul",
287         "quad", "quads",
288         "quat", "quats",
289         "recalc", "recalcs",
290         "refl",
291         "sce",
292         "sel",
293         "spec",
294         "struct", "structs",
295         "sys",
296         "tex",
297         "tmr",  # timer
298         "tri", "tris",
299         "uv", "uvs", "uvw", "uw", "uvmap",
300         "ve",
301         "vec",
302         "vel",  # velocity!
303         "vert", "verts",
304         "vis",
305         "xor",
306         "xyz", "xzy", "yxz", "yzx", "zxy", "zyx",
307         "xy", "xz", "yx", "yz", "zx", "zy",
308
309         # General computer/science terms
310         "bitangent",
311         "boid", "boids",
312         "equisolid",
313         "euler", "eulers",
314         "fribidi",
315         "gettext",
316         "hashable",
317         "hotspot",
318         "intrinsics",
319         "isosurface",
320         "jitter", "jittering", "jittered",
321         "keymap", "keymaps",
322         "lambertian",
323         "laplacian",
324         "metadata",
325         "msgfmt",
326         "nand", "xnor",
327         "normals",
328         "numpad",
329         "octree",
330         "omnidirectional",
331         "opengl",
332         "openmp",
333         "photoreceptor",
334         "poly",
335         "polyline", "polylines",
336         "pulldown", "pulldowns",
337         "quantized",
338         "samplerate",
339         "scrollback",
340         "scrollbar",
341         "scroller",
342         "searchable",
343         "spacebar",
344         "tooltip", "tooltips",
345         "trackpad",
346         "tuple",
347         "unicode",
348         "viewport", "viewports",
349         "viscoelastic",
350         "wildcard", "wildcards",
351
352         # General computer graphics terms
353         "anaglyph",
354         "bezier", "beziers",
355         "bicubic",
356         "bilinear",
357         "binormal",
358         "blackpoint", "whitepoint",
359         "blinn",
360         "bokeh",
361         "catadioptric",
362         "centroid",
363         "chrominance",
364         "codec", "codecs",
365         "collada",
366         "compositing",
367         "crossfade",
368         "cuda",
369         "deinterlace",
370         "dropoff",
371         "dv",
372         "eigenvectors",
373         "equirectangular",
374         "fisheye",
375         "framerate",
376         "gimbal",
377         "grayscale",
378         "icosphere",
379         "inpaint",
380         "lightmap",
381         "linearlight",
382         "lossless", "lossy",
383         "matcap",
384         "midtones",
385         "mipmap", "mipmaps", "mip",
386         "ngon", "ngons",
387         "ntsc",
388         "nurb", "nurbs",
389         "perlin",
390         "phong",
391         "pinlight",
392         "qi",
393         "radiosity",
394         "raycasting",
395         "raytrace", "raytracing", "raytraced",
396         "renderfarm",
397         "scanfill",
398         "shader", "shaders",
399         "softlight",
400         "specular", "specularity",
401         "spillmap",
402         "sobel",
403         "texel",
404         "tonemap",
405         "toon",
406         "timecode",
407         "vividlight",
408         "voronoi",
409         "voxel", "voxels",
410         "vsync",
411         "wireframe",
412         "zmask",
413         "ztransp",
414
415         # Blender terms
416         "audaspace",
417         "bbone",
418         "bendy",  # bones
419         "bmesh",
420         "breakdowner",
421         "bspline",
422         "bweight",
423         "colorband",
424         "datablock", "datablocks",
425         "despeckle",
426         "dopesheet",
427         "dupliface", "duplifaces",
428         "dupliframe", "dupliframes",
429         "dupliobject", "dupliob",
430         "dupligroup",
431         "duplivert",
432         "dyntopo",
433         "editbone",
434         "editmode",
435         "fcurve", "fcurves",
436         "fedge", "fedges",
437         "fluidsim",
438         "frameserver",
439         "freestyle",
440         "enum", "enums",
441         "gpencil",
442         "idcol",
443         "keyframe", "keyframes", "keyframing", "keyframed",
444         "metaball", "metaballs", "mball",
445         "metaelement", "metaelements",
446         "metastrip", "metastrips",
447         "movieclip",
448         "mpoly",
449         "mtex",
450         "nabla",
451         "navmesh",
452         "outliner",
453         "paintmap", "paintmaps",
454         "polygroup", "polygroups",
455         "poselib",
456         "pushpull",
457         "pyconstraint", "pyconstraints",
458         "qe",  # keys...
459         "shapekey", "shapekeys",
460         "shrinkfatten",
461         "shrinkwrap",
462         "softbody",
463         "stucci",
464         "sunsky",
465         "subsurf",
466         "tessface", "tessfaces",
467         "texface",
468         "timeline", "timelines",
469         "tosphere",
470         "uilist",
471         "vcol", "vcols",
472         "vgroup", "vgroups",
473         "vinterlace",
474         "vse",
475         "wasd", "wasdqe",  # keys...
476         "wetmap", "wetmaps",
477         "wpaint",
478         "uvwarp",
479
480         # Algorithm/library names
481         "ashikhmin",  # Ashikhmin-Shirley
482         "beckmann",
483         "blosc",
484         "catmull",
485         "catrom",
486         "chebychev",
487         "courant",
488         "hosek",
489         "kutta",
490         "lennard",
491         "mikktspace",
492         "minkowski",
493         "minnaert",
494         "musgrave",
495         "nayar",
496         "netravali",
497         "ogawa",
498         "oren",
499         "preetham",
500         "prewitt",
501         "runge",
502         "sobol",
503         "verlet",
504         "wilkie",
505         "worley",
506
507         # Acronyms
508         "aa", "msaa",
509         "ao",
510         "api",
511         "asc", "cdl",
512         "ascii",
513         "atrac",
514         "avx",
515         "bsdf",
516         "bssrdf",
517         "bw",
518         "ccd",
519         "cmd",
520         "cpus",
521         "ctrl",
522         "cw", "ccw",
523         "dev",
524         "djv",
525         "dpi",
526         "dvar",
527         "dx",
528         "eo",
529         "fh",
530         "fov",
531         "fft",
532         "futura",
533         "fx",
534         "gfx",
535         "ggx",
536         "gl",
537         "glsl",
538         "gpl",
539         "gpu", "gpus",
540         "hc",
541         "hdc",
542         "hdr",
543         "hh", "mm", "ss", "ff",  # hh:mm:ss:ff timecode
544         "hsv", "hsva", "hsl",
545         "id",
546         "ior",
547         "itu",
548         "lhs",
549         "lmb", "mmb", "rmb",
550         "kb",
551         "mocap",
552         "msgid", "msgids",
553         "mux",
554         "ndof",
555         "ppc",
556         "precisa",
557         "px",
558         "qmc",
559         "rgb", "rgba",
560         "rhs",
561         "rv",
562         "sdl",
563         "sl",
564         "smpte",
565         "ssao",
566         "svn",
567         "ui",
568         "unix",
569         "vbo", "vbos",
570         "wxyz",
571         "ycc", "ycca",
572         "yrgb",
573         "yuv", "yuva",
574
575         # Blender acronyms
576         "bge",
577         "bli",
578         "bpy",
579         "bvh",
580         "dbvt",
581         "dop",  # BLI K-Dop BVH
582         "ik",
583         "nla",
584         "py",
585         "qbvh",
586         "rna",
587         "rvo",
588         "simd",
589         "sph",
590         "svbvh",
591
592         # Files types/formats
593         "avi",
594         "attrac",
595         "autocad",
596         "autodesk",
597         "bmp",
598         "btx",
599         "cineon",
600         "dpx",
601         "dwaa",
602         "dwab",
603         "dxf",
604         "eps",
605         "exr",
606         "fbx",
607         "fbxnode",
608         "ffmpeg",
609         "flac",
610         "gzip",
611         "ico",
612         "jpg", "jpeg",
613         "json",
614         "matroska",
615         "mdd",
616         "mkv",
617         "mpeg", "mjpeg",
618         "mtl",
619         "ogg",
620         "openjpeg",
621         "osl",
622         "oso",
623         "piz",
624         "png",
625         "po",
626         "quicktime",
627         "rle",
628         "sgi",
629         "stl",
630         "svg",
631         "targa", "tga",
632         "tiff",
633         "theora",
634         "vorbis",
635         "wav",
636         "xiph",
637         "xml",
638         "xna",
639         "xvid",
640     }
641
642     _valid_before = "(?<=[\\s*'\"`])|(?<=[a-zA-Z][/-])|(?<=^)"
643     _valid_after = "(?=[\\s'\"`.!?,;:])|(?=[/-]\\s*[a-zA-Z])|(?=$)"
644     _valid_words = "(?:{})(?:(?:[A-Z]+[a-z]*)|[A-Z]*|[a-z]*)(?:{})".format(_valid_before, _valid_after)
645     _split_words = re.compile(_valid_words).findall
646
647     @classmethod
648     def split_words(cls, text):
649         return [w for w in cls._split_words(text) if w]
650
651     def __init__(self, settings, lang="en_US"):
652         self.settings = settings
653         self.dict_spelling = enchant.Dict(lang)
654         self.cache = set(self.uimsgs)
655
656         cache = self.settings.SPELL_CACHE
657         if cache and os.path.exists(cache):
658             with open(cache, 'rb') as f:
659                 self.cache |= set(pickle.load(f))
660
661     def __del__(self):
662         cache = self.settings.SPELL_CACHE
663         if cache and os.path.exists(cache):
664             with open(cache, 'wb') as f:
665                 pickle.dump(self.cache, f)
666
667     def check(self, txt):
668         ret = []
669
670         if txt in self.cache:
671             return ret
672
673         for w in self.split_words(txt):
674             w_lower = w.lower()
675             if w_lower in self.cache:
676                 continue
677             if not self.dict_spelling.check(w):
678                 ret.append((w, self.dict_spelling.suggest(w)))
679             else:
680                 self.cache.add(w_lower)
681
682         if not ret:
683             self.cache.add(txt)
684
685         return ret