More UI message i18n fixes and improvements...
[blender.git] / release / scripts / modules / bl_i18n_utils / spell_check_utils.py
1 # ##### BEGIN GPL LICENSE BLOCK #####
2 #
3 #  This program is free software; you can redistribute it and/or
4 #  modify it under the terms of the GNU General Public License
5 #  as published by the Free Software Foundation; either version 2
6 #  of the License, or (at your option) any later version.
7 #
8 #  This program is distributed in the hope that it will be useful,
9 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 #  GNU General Public License for more details.
12 #
13 #  You should have received a copy of the GNU General Public License
14 #  along with this program; if not, write to the Free Software Foundation,
15 #  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 #
17 # ##### END GPL LICENSE BLOCK #####
18
19 # <pep8 compliant>
20
21 import enchant
22 import os
23 import pickle
24 import re
25
26
27 class SpellChecker():
28     """
29     A basic spell checker.
30     """
31
32     # These must be all lower case for comparisons
33     uimsgs = {
34         # OK words
35         "aren",  # aren't
36         "betweens",  # yuck! in-betweens!
37         "boolean", "booleans",
38         "couldn",  # couldn't
39         "decrement",
40         "derivate",
41         "doesn",  # doesn't
42         "fader",
43         "hasn",  # hasn't
44         "hoc",  # ad-hoc
45         "indices",
46         "iridas",
47         "isn",  # isn't
48         "iterable",
49         "kyrgyz",
50         "latin",
51         "merchantability",
52         "mplayer",
53         "vertices",
54
55         # Merged words
56         "addon", "addons",
57         "antialiasing",
58         "arcsine", "arccosine", "arctangent",
59         "autoclip",
60         "autocomplete",
61         "autoname",
62         "autosave",
63         "autoscale",
64         "autosmooth",
65         "autosplit",
66         "backface", "backfacing",
67         "backimage",
68         "backscattered",
69         "bandnoise",
70         "bindcode",
71         "bitrate",
72         "blendin",
73         "bonesize",
74         "boundbox",
75         "boxpack",
76         "buffersize",
77         "builtin", "builtins",
78         "bytecode",
79         "chunksize",
80         "dataset", "datasets",
81         "de",
82         "defocus",
83         "denoise",
84         "despill", "despilling",
85         "editcurve",
86         "editmesh",
87         "filebrowser",
88         "filelist",
89         "filename", "filenames",
90         "filepath", "filepaths",
91         "forcefield", "forcefields",
92         "fulldome", "fulldomes",
93         "fullscreen",
94         "gridline",
95         "hemi",
96         "inbetween",
97         "inscatter", "inscattering",
98         "libdata",
99         "lightless",
100         "localview",
101         "lookup", "lookups",
102         "mathutils",
103         "midlevel",
104         "midground",
105         "mixdown",
106         "multi",
107         "multifractal",
108         "multires", "multiresolution",
109         "multisampling",
110         "multitexture",
111         "multiuser",
112         "namespace",
113         "keyconfig",
114         "playhead",
115         "polyline",
116         "popup", "popups",
117         "pre",
118         "precache", "precaching",
119         "precalculate",
120         "prefetch",
121         "premultiply", "premultiplied",
122         "prepass",
123         "prepend",
124         "preprocess", "preprocessing",
125         "preseek",
126         "raytree",
127         "readonly",
128         "realtime",
129         "rekey",
130         "remesh",
131         "reprojection",
132         "resize",
133         "restpose",
134         "retarget", "retargets", "retargeting", "retargeted",
135         "ringnoise",
136         "rolloff",
137         "screencast", "screenshot", "screenshots",
138         "selfcollision",
139         "shadowbuffer", "shadowbuffers",
140         "singletexture",
141         "spellcheck", "spellchecking",
142         "startup",
143         "stateful",
144         "starfield",
145         "subflare", "subflares",
146         "subframe", "subframes",
147         "subclass", "subclasses", "subclassing",
148         "subdirectory", "subdirectories", "subdir", "subdirs",
149         "submodule", "submodules",
150         "subpath",
151         "subsize",
152         "substep", "substeps",
153         "targetless",
154         "textbox", "textboxes",
155         "tilemode",
156         "timestamp", "timestamps",
157         "timestep", "timesteps",
158         "todo",
159         "un",
160         "unbake",
161         "uncomment",
162         "undeformed",
163         "undistort", "undistortion",
164         "ungroup",
165         "unhide",
166         "unindent",
167         "unkeyed",
168         "unpremultiply",
169         "unprojected",
170         "unreacted",
171         "unregister",
172         "unselected",
173         "unsubdivided", "unsubdivide",
174         "unshadowed",
175         "unspill",
176         "unstitchable",
177         "vectorscope",
178         "whitespace", "whitespaces",
179         "worldspace",
180         "workflow",
181
182         # Neologisms, slangs
183         "affectable",
184         "automagic", "automagically",
185         "blobby",
186         "blockiness", "blocky",
187         "collider", "colliders",
188         "deformer", "deformers",
189         "determinator",
190         "editability",
191         "keyer",
192         "lacunarity",
193         "numerics",
194         "occluder",
195         "passepartout",
196         "perspectively",
197         "pixelate",
198         "polygonization",
199         "selectability",
200         "slurph",
201         "stitchable",
202         "symmetrize",
203         "trackability",
204         "transmissivity",
205         "rasterized", "rasterization", "rasterizer",
206         "renderer", "renderable", "renderability",
207
208         # Abbreviations
209         "aero",
210         "amb",
211         "anim",
212         "bool",
213         "calc",
214         "config", "configs",
215         "const",
216         "coord", "coords",
217         "degr",
218         "dof",
219         "dupli", "duplis",
220         "eg",
221         "esc",
222         "expr",
223         "fac",
224         "fra",
225         "frs",
226         "grless",
227         "http",
228         "init",
229         "kbit", "kb",
230         "lang", "langs",
231         "lclick", "rclick",
232         "lensdist",
233         "loc", "rot", "pos",
234         "lorem",
235         "luma",
236         "mem",
237         "multicam",
238         "num",
239         "ok",
240         "orco",
241         "ortho",
242         "persp",
243         "pref", "prefs",
244         "prev",
245         "param",
246         "premul",
247         "quad", "quads",
248         "quat", "quats",
249         "recalc", "recalcs",
250         "refl",
251         "sce",
252         "sel",
253         "spec",
254         "struct", "structs",
255         "sys",
256         "tex",
257         "tri", "tris",
258         "uv", "uvs", "uvw", "uw", "uvmap",
259         "ve",
260         "vec",
261         "vel",  # velocity!
262         "vert", "verts",
263         "vis",
264         "xyz", "xzy", "yxz", "yzx", "zxy", "zyx",
265         "xy", "xz", "yx", "yz", "zx", "zy",
266
267         # General computer/science terms
268         "boid", "boids",
269         "equisolid",
270         "euler", "eulers",
271         "fribidi",
272         "gettext",
273         "hashable",
274         "intrinsics",
275         "isosurface",
276         "jitter", "jittering", "jittered",
277         "keymap", "keymaps",
278         "lambertian",
279         "laplacian",
280         "metadata",
281         "msgfmt",
282         "nand", "xnor",
283         "normals",
284         "numpad",
285         "octree",
286         "opengl",
287         "pulldown", "pulldowns",
288         "quantized",
289         "samplerate",
290         "scrollback",
291         "scrollbar",
292         "scroller",
293         "searchable",
294         "spacebar",
295         "tooltip", "tooltips",
296         "trackpad",
297         "unicode",
298         "viewport", "viewports",
299         "viscoelastic",
300         "wildcard", "wildcards",
301
302         # General computer graphics terms
303         "anaglyph",
304         "bezier", "beziers",
305         "bicubic",
306         "bilinear",
307         "blackpoint", "whitepoint",
308         "blinn",
309         "bokeh",
310         "catadioptric",
311         "centroid",
312         "chrominance",
313         "codec", "codecs",
314         "collada",
315         "compositing",
316         "crossfade",
317         "deinterlace",
318         "dropoff",
319         "dv",
320         "eigenvectors",
321         "equirectangular",
322         "fisheye",
323         "framerate",
324         "gimbal",
325         "grayscale",
326         "icosphere",
327         "inpaint",
328         "lightmap",
329         "lossless", "lossy",
330         "matcap",
331         "midtones",
332         "mipmap", "mipmaps", "mip",
333         "ngon", "ngons",
334         "ntsc",
335         "nurb", "nurbs",
336         "perlin",
337         "phong",
338         "radiosity",
339         "raytrace", "raytracing", "raytraced",
340         "renderfarm",
341         "shader", "shaders",
342         "specular", "specularity",
343         "spillmap",
344         "sobel",
345         "tonemap",
346         "toon",
347         "timecode",
348         "voronoi",
349         "voxel", "voxels",
350         "wireframe",
351         "zmask",
352         "ztransp",
353
354         # Blender terms
355         "audaspace",
356         "bbone",
357         "breakdowner",
358         "bspline",
359         "bweight",
360         "colorband",
361         "datablock", "datablocks",
362         "despeckle",
363         "dopesheet",
364         "dupliface", "duplifaces",
365         "dupliframe", "dupliframes",
366         "dupliobject", "dupliob",
367         "dupligroup",
368         "duplivert",
369         "editbone",
370         "editmode",
371         "fcurve", "fcurves",
372         "fluidsim",
373         "frameserver",
374         "enum",
375         "keyframe", "keyframes", "keyframing", "keyframed",
376         "metaball", "metaballs",
377         "metaelement", "metaelements",
378         "metastrip", "metastrips",
379         "movieclip",
380         "mpoly",
381         "mtex",
382         "nabla",
383         "navmesh",
384         "outliner",
385         "paintmap", "paintmaps",
386         "polygroup", "polygroups",
387         "poselib",
388         "pushpull",
389         "pyconstraint", "pyconstraints",
390         "shapekey", "shapekeys",
391         "shrinkfatten",
392         "shrinkwrap",
393         "softbody",
394         "stucci",
395         "sunsky",
396         "subsurf",
397         "tessface", "tessfaces",
398         "texface",
399         "timeline", "timelines",
400         "tosphere",
401         "uilist",
402         "vcol", "vcols",
403         "vgroup", "vgroups",
404         "vinterlace",
405         "wetmap", "wetmaps",
406         "wpaint",
407         "uvwarp",
408
409         # Algorithm names
410         "beckmann",
411         "catmull",
412         "catrom",
413         "chebychev",
414         "courant",
415         "kutta",
416         "lennard",
417         "minkowski",
418         "minnaert",
419         "musgrave",
420         "nayar",
421         "netravali",
422         "oren",
423         "prewitt",
424         "runge",
425         "verlet",
426         "worley",
427
428         # Acronyms
429         "aa", "msaa",
430         "ao",
431         "api",
432         "asc", "cdl",
433         "ascii",
434         "atrac",
435         "bsdf",
436         "bw",
437         "ccd",
438         "cmd",
439         "cpus",
440         "ctrl",
441         "cw", "ccw",
442         "dev",
443         "djv",
444         "dpi",
445         "dvar",
446         "dx",
447         "eo",
448         "fh",
449         "fov",
450         "fft",
451         "futura",
452         "gfx",
453         "gl",
454         "glsl",
455         "gpl",
456         "gpu", "gpus",
457         "hc",
458         "hdc",
459         "hdr",
460         "hh", "mm", "ss", "ff",  # hh:mm:ss:ff timecode
461         "hsv", "hsva",
462         "id",
463         "ior",
464         "itu",
465         "lhs",
466         "lmb", "mmb", "rmb",
467         "kb",
468         "mocap",
469         "msgid", "msgids",
470         "mux",
471         "ndof",
472         "ppc",
473         "precisa",
474         "px",
475         "qmc",
476         "rgb", "rgba",
477         "rhs",
478         "rv",
479         "sdl",
480         "sl",
481         "smpte",
482         "svn",
483         "ui",
484         "unix",
485         "vbo", "vbos",
486         "ycc", "ycca",
487         "yuv", "yuva",
488
489         # Blender acronyms
490         "bge",
491         "bli",
492         "bpy",
493         "bvh",
494         "dbvt",
495         "dop",  # BLI K-Dop BVH
496         "ik",
497         "nla",
498         "py",
499         "qbvh",
500         "rna",
501         "rvo",
502         "simd",
503         "sph",
504         "svbvh",
505
506         # Files types/formats
507         "avi",
508         "attrac",
509         "autocad",
510         "autodesk",
511         "bmp",
512         "btx",
513         "cineon",
514         "dpx",
515         "dxf",
516         "eps",
517         "exr",
518         "fbx",
519         "ffmpeg",
520         "flac",
521         "gzip",
522         "ico",
523         "jpg", "jpeg",
524         "json",
525         "matroska",
526         "mdd",
527         "mkv",
528         "mpeg", "mjpeg",
529         "mtl",
530         "ogg",
531         "openjpeg",
532         "osl",
533         "oso",
534         "piz",
535         "png",
536         "po",
537         "quicktime",
538         "rle",
539         "sgi",
540         "stl",
541         "svg",
542         "targa", "tga",
543         "tiff",
544         "theora",
545         "vorbis",
546         "wav",
547         "xiph",
548         "xml",
549         "xna",
550         "xvid",
551     }
552
553     _valid_before = "(?<=[\\s*'\"`])|(?<=[a-zA-Z][/-])|(?<=^)"
554     _valid_after = "(?=[\\s'\"`.!?,;:])|(?=[/-]\\s*[a-zA-Z])|(?=$)"
555     _valid_words = "(?:{})(?:(?:[A-Z]+[a-z]*)|[A-Z]*|[a-z]*)(?:{})".format(_valid_before, _valid_after)
556     _split_words = re.compile(_valid_words).findall
557
558     @classmethod
559     def split_words(cls, text):
560         return [w for w in cls._split_words(text) if w]
561
562     def __init__(self, settings, lang="en_US"):
563         self.settings = settings
564         self.dict_spelling = enchant.Dict(lang)
565         self.cache = set(self.uimsgs)
566
567         cache = self.settings.SPELL_CACHE
568         if cache and os.path.exists(cache):
569             with open(cache, 'rb') as f:
570                 self.cache |= set(pickle.load(f))
571
572     def __del__(self):
573         cache = self.settings.SPELL_CACHE
574         if cache and os.path.exists(cache):
575             with open(cache, 'wb') as f:
576                 pickle.dump(self.cache, f)
577
578     def check(self, txt):
579         ret = []
580
581         if txt in self.cache:
582             return ret
583
584         for w in self.split_words(txt):
585             w_lower = w.lower()
586             if w_lower in self.cache:
587                 continue
588             if not self.dict_spelling.check(w):
589                 ret.append((w, self.dict_spelling.suggest(w)))
590             else:
591                 self.cache.add(w_lower)
592
593         if not ret:
594             self.cache.add(txt)
595
596         return ret