Fix more UI i18n issues (reported by Leon Cheung).
[blender.git] / release / scripts / modules / bl_i18n_utils / spell_check_utils.py
1 # ##### BEGIN GPL LICENSE BLOCK #####
2 #
3 #  This program is free software; you can redistribute it and/or
4 #  modify it under the terms of the GNU General Public License
5 #  as published by the Free Software Foundation; either version 2
6 #  of the License, or (at your option) any later version.
7 #
8 #  This program is distributed in the hope that it will be useful,
9 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 #  GNU General Public License for more details.
12 #
13 #  You should have received a copy of the GNU General Public License
14 #  along with this program; if not, write to the Free Software Foundation,
15 #  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 #
17 # ##### END GPL LICENSE BLOCK #####
18
19 # <pep8 compliant>
20
21 import enchant
22 import os
23 import pickle
24 import re
25
26
27 class SpellChecker():
28     """
29     A basic spell checker.
30     """
31
32     # These must be all lower case for comparisons
33     uimsgs = {
34         # OK words
35         "aren",  # aren't
36         "betweens",  # yuck! in-betweens!
37         "boolean", "booleans",
38         "couldn",  # couldn't
39         "decrement",
40         "derivate",
41         "doesn",  # doesn't
42         "fader",
43         "hasn",  # hasn't
44         "hoc",  # ad-hoc
45         "indices",
46         "iridas",
47         "isn",  # isn't
48         "iterable",
49         "kyrgyz",
50         "latin",
51         "merchantability",
52         "mplayer",
53         "vertices",
54
55         # Merged words
56         "addon", "addons",
57         "antialiasing",
58         "arcsine", "arccosine", "arctangent",
59         "autoclip",
60         "autocomplete",
61         "autoname",
62         "autosave",
63         "autoscale",
64         "autosmooth",
65         "autosplit",
66         "backface", "backfacing",
67         "backimage",
68         "backscattered",
69         "bandnoise",
70         "bindcode",
71         "bitrate",
72         "blendfile",
73         "blendin",
74         "bonesize",
75         "boundbox",
76         "boxpack",
77         "buffersize",
78         "builtin", "builtins",
79         "bytecode",
80         "chunksize",
81         "customdata",
82         "dataset", "datasets",
83         "de",
84         "defocus",
85         "denoise",
86         "despill", "despilling",
87         "editcurve",
88         "editmesh",
89         "filebrowser",
90         "filelist",
91         "filename", "filenames",
92         "filepath", "filepaths",
93         "forcefield", "forcefields",
94         "fulldome", "fulldomes",
95         "fullscreen",
96         "gridline",
97         "hemi",
98         "inbetween",
99         "inscatter", "inscattering",
100         "libdata",
101         "lightless",
102         "localview",
103         "lookup", "lookups",
104         "mathutils",
105         "midlevel",
106         "midground",
107         "mixdown",
108         "multi",
109         "multifractal",
110         "multires", "multiresolution",
111         "multisampling",
112         "multitexture",
113         "multiuser",
114         "namespace",
115         "keyconfig",
116         "playhead",
117         "polyline",
118         "popup", "popups",
119         "pre",
120         "precache", "precaching",
121         "precalculate",
122         "prefetch",
123         "premultiply", "premultiplied",
124         "prepass",
125         "prepend",
126         "preprocess", "preprocessing",
127         "preseek",
128         "raytree",
129         "readonly",
130         "realtime",
131         "rekey",
132         "remesh",
133         "reprojection",
134         "resize",
135         "restpose",
136         "retarget", "retargets", "retargeting", "retargeted",
137         "ringnoise",
138         "rolloff",
139         "screencast", "screenshot", "screenshots",
140         "selfcollision",
141         "shadowbuffer", "shadowbuffers",
142         "singletexture",
143         "spellcheck", "spellchecking",
144         "startup",
145         "stateful",
146         "starfield",
147         "subflare", "subflares",
148         "subframe", "subframes",
149         "subclass", "subclasses", "subclassing",
150         "subdirectory", "subdirectories", "subdir", "subdirs",
151         "submodule", "submodules",
152         "subpath",
153         "subsize",
154         "substep", "substeps",
155         "targetless",
156         "textbox", "textboxes",
157         "tilemode",
158         "timestamp", "timestamps",
159         "timestep", "timesteps",
160         "todo",
161         "un",
162         "unbake",
163         "uncomment",
164         "undeformed",
165         "undistort", "undistortion",
166         "ungroup",
167         "unhide",
168         "unindent",
169         "unkeyed",
170         "unpremultiply",
171         "unprojected",
172         "unreacted",
173         "unregister",
174         "unselected",
175         "unsubdivided", "unsubdivide",
176         "unshadowed",
177         "unspill",
178         "unstitchable",
179         "vectorscope",
180         "whitespace", "whitespaces",
181         "worldspace",
182         "workflow",
183
184         # Neologisms, slangs
185         "affectable",
186         "automagic", "automagically",
187         "blobby",
188         "blockiness", "blocky",
189         "collider", "colliders",
190         "deformer", "deformers",
191         "determinator",
192         "editability",
193         "keyer",
194         "lacunarity",
195         "numerics",
196         "occluder",
197         "passepartout",
198         "perspectively",
199         "pixelate",
200         "polygonization",
201         "selectability",
202         "slurph",
203         "stitchable",
204         "symmetrize",
205         "trackability",
206         "transmissivity",
207         "rasterized", "rasterization", "rasterizer",
208         "renderer", "renderable", "renderability",
209
210         # Abbreviations
211         "aero",
212         "amb",
213         "anim",
214         "bool",
215         "calc",
216         "config", "configs",
217         "const",
218         "coord", "coords",
219         "degr",
220         "dof",
221         "dupli", "duplis",
222         "eg",
223         "esc",
224         "expr",
225         "fac",
226         "fra",
227         "frs",
228         "grless",
229         "http",
230         "init",
231         "kbit", "kb",
232         "lang", "langs",
233         "lclick", "rclick",
234         "lensdist",
235         "loc", "rot", "pos",
236         "lorem",
237         "luma",
238         "mem",
239         "multicam",
240         "num",
241         "ok",
242         "orco",
243         "ortho",
244         "persp",
245         "pref", "prefs",
246         "prev",
247         "param",
248         "premul",
249         "quad", "quads",
250         "quat", "quats",
251         "recalc", "recalcs",
252         "refl",
253         "sce",
254         "sel",
255         "spec",
256         "struct", "structs",
257         "sys",
258         "tex",
259         "tri", "tris",
260         "uv", "uvs", "uvw", "uw", "uvmap",
261         "ve",
262         "vec",
263         "vel",  # velocity!
264         "vert", "verts",
265         "vis",
266         "xyz", "xzy", "yxz", "yzx", "zxy", "zyx",
267         "xy", "xz", "yx", "yz", "zx", "zy",
268
269         # General computer/science terms
270         "boid", "boids",
271         "equisolid",
272         "euler", "eulers",
273         "fribidi",
274         "gettext",
275         "hashable",
276         "intrinsics",
277         "isosurface",
278         "jitter", "jittering", "jittered",
279         "keymap", "keymaps",
280         "lambertian",
281         "laplacian",
282         "metadata",
283         "msgfmt",
284         "nand", "xnor",
285         "normals",
286         "numpad",
287         "octree",
288         "opengl",
289         "pulldown", "pulldowns",
290         "quantized",
291         "samplerate",
292         "scrollback",
293         "scrollbar",
294         "scroller",
295         "searchable",
296         "spacebar",
297         "tooltip", "tooltips",
298         "trackpad",
299         "unicode",
300         "viewport", "viewports",
301         "viscoelastic",
302         "wildcard", "wildcards",
303
304         # General computer graphics terms
305         "anaglyph",
306         "bezier", "beziers",
307         "bicubic",
308         "bilinear",
309         "blackpoint", "whitepoint",
310         "blinn",
311         "bokeh",
312         "catadioptric",
313         "centroid",
314         "chrominance",
315         "codec", "codecs",
316         "collada",
317         "compositing",
318         "crossfade",
319         "deinterlace",
320         "dropoff",
321         "dv",
322         "eigenvectors",
323         "equirectangular",
324         "fisheye",
325         "framerate",
326         "gimbal",
327         "grayscale",
328         "icosphere",
329         "inpaint",
330         "lightmap",
331         "lossless", "lossy",
332         "matcap",
333         "midtones",
334         "mipmap", "mipmaps", "mip",
335         "ngon", "ngons",
336         "ntsc",
337         "nurb", "nurbs",
338         "perlin",
339         "phong",
340         "radiosity",
341         "raytrace", "raytracing", "raytraced",
342         "renderfarm",
343         "shader", "shaders",
344         "specular", "specularity",
345         "spillmap",
346         "sobel",
347         "tonemap",
348         "toon",
349         "timecode",
350         "voronoi",
351         "voxel", "voxels",
352         "wireframe",
353         "zmask",
354         "ztransp",
355
356         # Blender terms
357         "audaspace",
358         "bbone",
359         "breakdowner",
360         "bspline",
361         "bweight",
362         "colorband",
363         "datablock", "datablocks",
364         "despeckle",
365         "dopesheet",
366         "dupliface", "duplifaces",
367         "dupliframe", "dupliframes",
368         "dupliobject", "dupliob",
369         "dupligroup",
370         "duplivert",
371         "editbone",
372         "editmode",
373         "fcurve", "fcurves",
374         "fluidsim",
375         "frameserver",
376         "enum",
377         "keyframe", "keyframes", "keyframing", "keyframed",
378         "metaball", "metaballs",
379         "metaelement", "metaelements",
380         "metastrip", "metastrips",
381         "movieclip",
382         "mpoly",
383         "mtex",
384         "nabla",
385         "navmesh",
386         "outliner",
387         "paintmap", "paintmaps",
388         "polygroup", "polygroups",
389         "poselib",
390         "pushpull",
391         "pyconstraint", "pyconstraints",
392         "shapekey", "shapekeys",
393         "shrinkfatten",
394         "shrinkwrap",
395         "softbody",
396         "stucci",
397         "sunsky",
398         "subsurf",
399         "tessface", "tessfaces",
400         "texface",
401         "timeline", "timelines",
402         "tosphere",
403         "uilist",
404         "vcol", "vcols",
405         "vgroup", "vgroups",
406         "vinterlace",
407         "wetmap", "wetmaps",
408         "wpaint",
409         "uvwarp",
410
411         # Algorithm names
412         "beckmann",
413         "catmull",
414         "catrom",
415         "chebychev",
416         "courant",
417         "kutta",
418         "lennard",
419         "minkowski",
420         "minnaert",
421         "musgrave",
422         "nayar",
423         "netravali",
424         "oren",
425         "prewitt",
426         "runge",
427         "verlet",
428         "worley",
429
430         # Acronyms
431         "aa", "msaa",
432         "ao",
433         "api",
434         "asc", "cdl",
435         "ascii",
436         "atrac",
437         "bsdf",
438         "bw",
439         "ccd",
440         "cmd",
441         "cpus",
442         "ctrl",
443         "cw", "ccw",
444         "dev",
445         "djv",
446         "dpi",
447         "dvar",
448         "dx",
449         "eo",
450         "fh",
451         "fov",
452         "fft",
453         "futura",
454         "gfx",
455         "gl",
456         "glsl",
457         "gpl",
458         "gpu", "gpus",
459         "hc",
460         "hdc",
461         "hdr",
462         "hh", "mm", "ss", "ff",  # hh:mm:ss:ff timecode
463         "hsv", "hsva",
464         "id",
465         "ior",
466         "itu",
467         "lhs",
468         "lmb", "mmb", "rmb",
469         "kb",
470         "mocap",
471         "msgid", "msgids",
472         "mux",
473         "ndof",
474         "ppc",
475         "precisa",
476         "px",
477         "qmc",
478         "rgb", "rgba",
479         "rhs",
480         "rv",
481         "sdl",
482         "sl",
483         "smpte",
484         "svn",
485         "ui",
486         "unix",
487         "vbo", "vbos",
488         "ycc", "ycca",
489         "yuv", "yuva",
490
491         # Blender acronyms
492         "bge",
493         "bli",
494         "bpy",
495         "bvh",
496         "dbvt",
497         "dop",  # BLI K-Dop BVH
498         "ik",
499         "nla",
500         "py",
501         "qbvh",
502         "rna",
503         "rvo",
504         "simd",
505         "sph",
506         "svbvh",
507
508         # Files types/formats
509         "avi",
510         "attrac",
511         "autocad",
512         "autodesk",
513         "bmp",
514         "btx",
515         "cineon",
516         "dpx",
517         "dxf",
518         "eps",
519         "exr",
520         "fbx",
521         "ffmpeg",
522         "flac",
523         "gzip",
524         "ico",
525         "jpg", "jpeg",
526         "json",
527         "matroska",
528         "mdd",
529         "mkv",
530         "mpeg", "mjpeg",
531         "mtl",
532         "ogg",
533         "openjpeg",
534         "osl",
535         "oso",
536         "piz",
537         "png",
538         "po",
539         "quicktime",
540         "rle",
541         "sgi",
542         "stl",
543         "svg",
544         "targa", "tga",
545         "tiff",
546         "theora",
547         "vorbis",
548         "wav",
549         "xiph",
550         "xml",
551         "xna",
552         "xvid",
553     }
554
555     _valid_before = "(?<=[\\s*'\"`])|(?<=[a-zA-Z][/-])|(?<=^)"
556     _valid_after = "(?=[\\s'\"`.!?,;:])|(?=[/-]\\s*[a-zA-Z])|(?=$)"
557     _valid_words = "(?:{})(?:(?:[A-Z]+[a-z]*)|[A-Z]*|[a-z]*)(?:{})".format(_valid_before, _valid_after)
558     _split_words = re.compile(_valid_words).findall
559
560     @classmethod
561     def split_words(cls, text):
562         return [w for w in cls._split_words(text) if w]
563
564     def __init__(self, settings, lang="en_US"):
565         self.settings = settings
566         self.dict_spelling = enchant.Dict(lang)
567         self.cache = set(self.uimsgs)
568
569         cache = self.settings.SPELL_CACHE
570         if cache and os.path.exists(cache):
571             with open(cache, 'rb') as f:
572                 self.cache |= set(pickle.load(f))
573
574     def __del__(self):
575         cache = self.settings.SPELL_CACHE
576         if cache and os.path.exists(cache):
577             with open(cache, 'wb') as f:
578                 pickle.dump(self.cache, f)
579
580     def check(self, txt):
581         ret = []
582
583         if txt in self.cache:
584             return ret
585
586         for w in self.split_words(txt):
587             w_lower = w.lower()
588             if w_lower in self.cache:
589                 continue
590             if not self.dict_spelling.check(w):
591                 ret.append((w, self.dict_spelling.suggest(w)))
592             else:
593                 self.cache.add(w_lower)
594
595         if not ret:
596             self.cache.add(txt)
597
598         return ret