python/utf8 compatibility fixes. (as discussed on the mailing list)
authorCampbell Barton <ideasman42@gmail.com>
Sat, 28 Aug 2010 12:34:22 +0000 (12:34 +0000)
committerCampbell Barton <ideasman42@gmail.com>
Sat, 28 Aug 2010 12:34:22 +0000 (12:34 +0000)
- user input gets non utf8 chars stripped all text input other then file paths.

- python has the same limitations, it will raise an error on non utf8 strings except for paths use unicode escape literals so its possible to deal with saving to these file paths from python.

- new string functions
  BLI_utf8_invalid_byte(str, len) returns the first invalid utf8 byte or -1 on on success.
  BLI_utf8_invalid_strip(str, len) strips non utf-8 chars.

source/blender/blenlib/BLI_string.h
source/blender/blenlib/intern/string.c
source/blender/editors/include/UI_interface.h
source/blender/editors/interface/interface_handlers.c
source/blender/editors/space_file/file_draw.c
source/blender/makesrna/intern/rna_render.c
source/blender/makesrna/intern/rna_sequencer.c
source/blender/makesrna/intern/rna_space.c
source/blender/python/intern/bpy_rna.c

index 39123a438df5cd28e1be6dd4a6cbe216c2d161b8..ccb10190816b7710a4db80fe83cb22817b1c6ee9 100644 (file)
@@ -132,6 +132,9 @@ size_t BLI_strnlen(const char *str, size_t maxlen);
 
 void BLI_timestr(double _time, char *str); /* time var is global */
 
+int BLI_utf8_invalid_byte(const char *str, int length);
+int BLI_utf8_invalid_strip(char *str, int length);
+
 #ifdef __cplusplus
 }
 #endif
index c344d8c0711d87d4fe91a529b1530390e7079a72..76193ba9a136a314c94ee4309ec838d8b23b4fa1 100644 (file)
@@ -348,3 +348,114 @@ size_t BLI_strnlen(const char *str, size_t maxlen)
        const char *end = memchr(str, '\0', maxlen);
        return end ? (size_t) (end - str) : maxlen;
 }
+
+/* from libswish3, originally called u8_isvalid(),
+ * modified to return the index of the bad character (byte index not utf).
+ * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
+
+/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
+
+   length is in bytes, since without knowing whether the string is valid
+   it's hard to know how many characters there are! */
+
+static const char trailingBytesForUTF8[256] = {
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+       2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+int BLI_utf8_invalid_byte(const char *str, int length)
+{
+    const unsigned char *p, *pend = (unsigned char*)str + length;
+    unsigned char c;
+    int ab;
+
+    for (p = (unsigned char*)str; p < pend; p++) {
+        c = *p;
+        if (c < 128)
+            continue;
+        if ((c & 0xc0) != 0xc0)
+            goto utf8_error;
+        ab = trailingBytesForUTF8[c];
+        if (length < ab)
+            goto utf8_error;
+        length -= ab;
+
+        p++;
+        /* Check top bits in the second byte */
+        if ((*p & 0xc0) != 0x80)
+            goto utf8_error;
+
+        /* Check for overlong sequences for each different length */
+        switch (ab) {
+            /* Check for xx00 000x */
+        case 1:
+            if ((c & 0x3e) == 0) goto utf8_error;
+            continue;   /* We know there aren't any more bytes to check */
+
+            /* Check for 1110 0000, xx0x xxxx */
+        case 2:
+            if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
+            break;
+
+            /* Check for 1111 0000, xx00 xxxx */
+        case 3:
+            if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
+            break;
+
+            /* Check for 1111 1000, xx00 0xxx */
+        case 4:
+            if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
+            break;
+
+            /* Check for leading 0xfe or 0xff,
+               and then for 1111 1100, xx00 00xx */
+        case 5:
+            if (c == 0xfe || c == 0xff ||
+                (c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
+            break;
+        }
+
+        /* Check for valid bytes after the 2nd, if any; all must start 10 */
+        while (--ab > 0) {
+            if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
+                       p++; /* do this after so we get usable offset - campbell */
+        }
+    }
+
+    return -1;
+
+utf8_error:
+
+       return (int)((char *)p - (char *)str) - 1;
+}
+
+int BLI_utf8_invalid_strip(char *str, int length)
+{
+       int bad_char, tot= 0;
+
+       while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) {
+               str += bad_char;
+               length -= bad_char;
+
+               if(length == 0) {
+                       /* last character bad, strip it */
+                       *str= '\0';
+                       tot++;
+                       break;
+               }
+               else {
+                       /* strip, keep looking */
+                       memmove(str, str + 1, length);
+                       tot++;
+               }
+       }
+
+       return tot;
+}
+
index 9601cdc3f21d9dd77da5c9b1fc5458d0117a2975..40985a918d4ec95b962af1a7ad3500cdd710b3d8 100644 (file)
@@ -135,7 +135,7 @@ typedef struct uiLayout uiLayout;
 #define UI_MAKE_RIGHT  8192
 
        /* button align flag, for drawing groups together */
-#define UI_BUT_ALIGN           (15<<14)
+#define UI_BUT_ALIGN           (UI_BUT_ALIGN_TOP|UI_BUT_ALIGN_LEFT|UI_BUT_ALIGN_RIGHT|UI_BUT_ALIGN_DOWN)
 #define UI_BUT_ALIGN_TOP       (1<<14)
 #define UI_BUT_ALIGN_LEFT      (1<<15)
 #define UI_BUT_ALIGN_RIGHT     (1<<16)
@@ -151,9 +151,10 @@ typedef struct uiLayout uiLayout;
 #define UI_BUT_UNDO                    (1<<25)
 #define UI_BUT_IMMEDIATE       (1<<26)
 #define UI_BUT_NO_TOOLTIP      (1<<27)
+#define UI_BUT_NO_UTF8         (1<<28)
 
-#define UI_BUT_VEC_SIZE_LOCK (1<<28) /* used to flag if color hsv-circle should keep luminance */
-#define UI_BUT_COLOR_CUBIC     (1<<29) /* cubic saturation for the color wheel */
+#define UI_BUT_VEC_SIZE_LOCK (1<<29) /* used to flag if color hsv-circle should keep luminance */
+#define UI_BUT_COLOR_CUBIC     (1<<30) /* cubic saturation for the color wheel */
 
 #define UI_PANEL_WIDTH                 340
 #define UI_COMPACT_PANEL_WIDTH 160
index ac06f635975b302a98d674226513295ca7bff5c4..484c78cd8319843777f143a73b1cc43983c734cc 100644 (file)
@@ -242,6 +242,20 @@ static int ui_is_a_warp_but(uiBut *but)
        return FALSE;
 }
 
+/* file selectors are exempt from utf-8 checks */
+static int ui_is_utf8_but(uiBut *but)
+{
+       if (but->rnaprop) {
+               int subtype= RNA_property_subtype(but->rnaprop);
+               
+               if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
+                       return TRUE;
+               }
+       }
+
+       return !(but->flag & UI_BUT_NO_UTF8);
+}
+
 /* ********************** button apply/revert ************************/
 
 static ListBase UIAfterFuncs = {NULL, NULL};
@@ -1572,6 +1586,15 @@ static void ui_textedit_begin(bContext *C, uiBut *but, uiHandleButtonData *data)
 static void ui_textedit_end(bContext *C, uiBut *but, uiHandleButtonData *data)
 {
        if(but) {
+               if(ui_is_utf8_but(but)) {
+                       int strip= BLI_utf8_invalid_strip(but->editstr, strlen(but->editstr));
+                       /* not a file?, strip non utf-8 chars */
+                       if(strip) {
+                               /* wont happen often so isnt that annoying to keep it here for a while */
+                               printf("invalid utf8 - stripped chars %d\n", strip);
+                       }
+               }
+               
                if(data->searchbox) {
                        if(data->cancel==0)
                                ui_searchbox_apply(but, data->searchbox);
index 3368ce636e1245b4e2cb893de8309692780c6190..0867acbfb60d92a4e252eaf9a033bae013fc1082 100644 (file)
@@ -177,11 +177,14 @@ void file_draw_buttons(const bContext *C, ARegion *ar)
                                 params->dir, 0.0, (float)FILE_MAX-1, 0, 0, 
                                 "File path.");
                uiButSetCompleteFunc(but, autocomplete_directory, NULL);
+               uiButSetFlag(but, UI_BUT_NO_UTF8);
+
                but = uiDefBut(block, TEX, B_FS_FILENAME, "",
                                 min_x, line2_y, line2_w-chan_offs, btn_h,
                                 params->file, 0.0, (float)FILE_MAXFILE-1, 0, 0, 
                                 "File name.");
                uiButSetCompleteFunc(but, autocomplete_file, NULL);
+               uiButSetFlag(but, UI_BUT_NO_UTF8);
        }
        
        /* Filename number increment / decrement buttons. */
index e4a89ed3121fd1a2b203e5d5fee25cf609fccfa1..e6b86ae8766ca5f4ef5f1b4a652841a47131a19e 100644 (file)
@@ -291,8 +291,8 @@ static void rna_def_render_engine(BlenderRNA *brna)
 static void rna_def_render_result(BlenderRNA *brna)
 {
        StructRNA *srna;
-       PropertyRNA *prop;
        FunctionRNA *func;
+       PropertyRNA *parm;
        
        srna= RNA_def_struct(brna, "RenderResult", NULL);
        RNA_def_struct_ui_text(srna, "Render Result", "Result of rendering, including all layers and passes");
@@ -300,22 +300,22 @@ static void rna_def_render_result(BlenderRNA *brna)
        func= RNA_def_function(srna, "load_from_file", "RE_result_load_from_file");
        RNA_def_function_ui_description(func, "Copies the pixels of this render result from an image file.");
        RNA_def_function_flag(func, FUNC_USE_REPORTS);
-       prop= RNA_def_string(func, "filename", "", 0, "Filename", "Filename to load into this render tile, must be no smaller then the render result");
-       RNA_def_property_flag(prop, PROP_REQUIRED);
+       parm= RNA_def_string_file_name(func, "filename", "", FILE_MAX, "File Name", "Filename to load into this render tile, must be no smaller then the render result");
+       RNA_def_property_flag(parm, PROP_REQUIRED);
 
        RNA_define_verify_sdna(0);
 
-       prop= RNA_def_property(srna, "resolution_x", PROP_INT, PROP_NONE);
-       RNA_def_property_int_sdna(prop, NULL, "rectx");
-       RNA_def_property_clear_flag(prop, PROP_EDITABLE);
+       parm= RNA_def_property(srna, "resolution_x", PROP_INT, PROP_NONE);
+       RNA_def_property_int_sdna(parm, NULL, "rectx");
+       RNA_def_property_clear_flag(parm, PROP_EDITABLE);
 
-       prop= RNA_def_property(srna, "resolution_y", PROP_INT, PROP_NONE);
-       RNA_def_property_int_sdna(prop, NULL, "recty");
-       RNA_def_property_clear_flag(prop, PROP_EDITABLE);
+       parm= RNA_def_property(srna, "resolution_y", PROP_INT, PROP_NONE);
+       RNA_def_property_int_sdna(parm, NULL, "recty");
+       RNA_def_property_clear_flag(parm, PROP_EDITABLE);
 
-       prop= RNA_def_property(srna, "layers", PROP_COLLECTION, PROP_NONE);
-       RNA_def_property_struct_type(prop, "RenderLayer");
-       RNA_def_property_collection_funcs(prop, "rna_RenderResult_layers_begin", "rna_iterator_listbase_next", "rna_iterator_listbase_end", "rna_iterator_listbase_get", 0, 0, 0);
+       parm= RNA_def_property(srna, "layers", PROP_COLLECTION, PROP_NONE);
+       RNA_def_property_struct_type(parm, "RenderLayer");
+       RNA_def_property_collection_funcs(parm, "rna_RenderResult_layers_begin", "rna_iterator_listbase_next", "rna_iterator_listbase_end", "rna_iterator_listbase_get", 0, 0, 0);
 
        RNA_define_verify_sdna(1);
 }
index d3207f14e24d0cbc176d451ee2026c833b970a82..3593efaa74347dfcc7c91252270f17c9fb73608a 100644 (file)
@@ -1336,7 +1336,7 @@ static void rna_def_plugin(BlenderRNA *brna)
        RNA_def_struct_ui_text(srna, "Plugin Sequence", "Sequence strip applying an effect, loaded from an external plugin");
        RNA_def_struct_sdna_from(srna, "PluginSeq", "plugin");
 
-       prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_FILEPATH);
+       prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_FILENAME);
        RNA_def_property_string_sdna(prop, NULL, "name");
        RNA_def_property_clear_flag(prop, PROP_EDITABLE);
        RNA_def_property_ui_text(prop, "Filename", "");
index 214ef891724518cf98fd2c837fe66471911001d2..780cc31edea335df40fbbc2df9351f99b9dd2ba9 100644 (file)
@@ -2047,12 +2047,12 @@ static void rna_def_fileselect_params(BlenderRNA *brna)
        RNA_def_property_ui_text(prop, "Title", "Title for the file browser");
        RNA_def_property_clear_flag(prop, PROP_EDITABLE);
 
-       prop= RNA_def_property(srna, "directory", PROP_STRING, PROP_NONE);
+       prop= RNA_def_property(srna, "directory", PROP_STRING, PROP_DIRPATH);
        RNA_def_property_string_sdna(prop, NULL, "dir");
        RNA_def_property_ui_text(prop, "Directory", "Directory displayed in the file browser");
        RNA_def_property_update(prop, NC_SPACE|ND_SPACE_FILE_PARAMS, NULL);
 
-       prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_NONE);
+       prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_FILENAME);
        RNA_def_property_string_sdna(prop, NULL, "file");
        RNA_def_property_ui_text(prop, "File Name", "Active file in the file browser");
        RNA_def_property_update(prop, NC_SPACE|ND_SPACE_FILE_PARAMS, NULL);
index 3bce1f83d6e9344dfade741bd6520bd347565ee1..2c16d4f2b5691e377e76b842a289f28a45b3346d 100644 (file)
@@ -49,6 +49,7 @@
 #include "ED_keyframing.h"
 
 #define USE_MATHUTILS
+#define USE_STRING_COERCE
 
 #ifdef USE_MATHUTILS
 #include "../generic/mathutils.h" /* so we can have mathutils callbacks */
@@ -192,6 +193,62 @@ Mathutils_Callback mathutils_rna_matrix_cb = {
        NULL
 };
 
+#ifdef USE_STRING_COERCE
+/* string conversion, escape non-unicode chars, coerce must be set to NULL */
+static const char *py_safe_unicode_to_byte(PyObject *py_str, PyObject **coerce)
+{
+       char *result;
+
+       result= _PyUnicode_AsString(py_str);
+
+       if(result) {
+               /* 99% of the time this is enough but we better support non unicode
+                * chars since blender doesnt limit this */
+               return result;
+       }
+       else {
+               /* mostly copied from fileio.c's, fileio_init */
+               PyObject *stringobj;
+               PyObject *u;
+
+               PyErr_Clear();
+               
+               u= PyUnicode_FromObject(py_str); /* coerce into unicode */
+               
+               if (u == NULL)
+                       return NULL;
+
+               stringobj= PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u), "surrogateescape");
+               Py_DECREF(u);
+               if (stringobj == NULL)
+                       return NULL;
+               if (!PyBytes_Check(stringobj)) { /* this seems wrong but it works fine */
+                       // printf("encoder failed to return bytes\n");
+                       Py_DECREF(stringobj);
+                       return NULL;
+               }
+               *coerce= stringobj;
+
+               return PyBytes_AS_STRING(stringobj);
+       }
+}
+
+static PyObject *py_safe_byte_to_unicode(char *str)
+{
+       PyObject *result= PyUnicode_FromString(str);
+       if(result) {
+               /* 99% of the time this is enough but we better support non unicode
+                * chars since blender doesnt limit this */
+               return result;
+       }
+       else {
+               PyErr_Clear();
+               result= PyUnicode_DecodeUTF8(str, strlen(str), "surrogateescape");
+               return result;
+       }
+}
+#endif
+
 /* same as RNA_enum_value_from_id but raises an exception  */
 int pyrna_enum_value_from_id(EnumPropertyItem *item, const char *identifier, int *value, const char *error_prefix)
 {
@@ -768,9 +825,20 @@ PyObject * pyrna_prop_to_py(PointerRNA *ptr, PropertyRNA *prop)
                break;
        case PROP_STRING:
        {
+               int subtype= RNA_property_subtype(prop);
                char *buf;
                buf = RNA_property_string_get_alloc(ptr, prop, NULL, -1);
-               ret = PyUnicode_FromString( buf );
+#ifdef USE_STRING_COERCE
+               /* only file paths get special treatment, they may contain non utf-8 chars */
+               if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
+                       ret= py_safe_byte_to_unicode(buf);
+               }
+               else {
+                       ret= PyUnicode_FromString(buf);
+               }
+#else
+               ret= PyUnicode_FromString(buf);
+#endif
                MEM_freeN(buf);
                break;
        }
@@ -971,16 +1039,31 @@ int pyrna_py_to_prop(PointerRNA *ptr, PropertyRNA *prop, ParameterList *parms, v
                }
                case PROP_STRING:
                {
-                       char *param = _PyUnicode_AsString(value);
+                       const char *param;
+#ifdef USE_STRING_COERCE
+                       PyObject *value_coerce= NULL;
+                       int subtype= RNA_property_subtype(prop);
+                       if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
+                               param= py_safe_unicode_to_byte(value, &value_coerce);
+                       }
+                       else {
+                               param= _PyUnicode_AsString(value);
+                       }
+#else
+                       param= _PyUnicode_AsString(value);
+#endif
 
                        if (param==NULL) {
                                PyErr_Format(PyExc_TypeError, "%.200s %.200s.%.200s expected a string type", error_prefix, RNA_struct_identifier(ptr->type), RNA_property_identifier(prop));
                                return -1;
                        }
                        else {
-                               if(data)        *((char**)data)= param;
+                               if(data)        *((char**)data)= param; /*XXX, this assignes a pointer, wouldnt it be better to copy??? */
                                else            RNA_property_string_set(ptr, prop, param);
                        }
+#ifdef USE_STRING_COERCE
+                       Py_XDECREF(value_coerce);
+#endif
                        break;
                }
                case PROP_ENUM:
@@ -1307,7 +1390,7 @@ static PyObject *pyrna_prop_array_subscript_int(BPy_PropertyRNA *self, int keynu
        return NULL;
 }
 
-static PyObject *pyrna_prop_collection_subscript_str(BPy_PropertyRNA *self, char *keyname)
+static PyObject *pyrna_prop_collection_subscript_str(BPy_PropertyRNA *self, const char *keyname)
 {
        PointerRNA newptr;
        if(RNA_property_collection_lookup_string(&self->ptr, self->prop, keyname, &newptr))
@@ -3372,10 +3455,30 @@ PyObject *pyrna_param_to_py(PointerRNA *ptr, ParameterList *parms, PropertyRNA *
                        break;
                case PROP_STRING:
                {
-                       if(flag & PROP_THICK_WRAP)
-                               ret = PyUnicode_FromString( (char*)data );
+                       char *data_ch;
+                       PyObject *value_coerce= NULL;
+                       int subtype= RNA_property_subtype(prop);
+
+                       if (flag & PROP_THICK_WRAP)
+                               data_ch= (char *)data;
                        else
-                               ret = PyUnicode_FromString( *(char**)data );
+                               data_ch= *(char **)data;
+
+#ifdef USE_STRING_COERCE
+                       if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
+                               ret= py_safe_byte_to_unicode(data_ch);
+                       }
+                       else {
+                               ret= PyUnicode_FromString(data_ch);
+                       }
+#else
+                       ret = PyUnicode_FromString(data_ch);
+#endif
+
+#ifdef USE_STRING_COERCE
+                       Py_XDECREF(value_coerce);
+#endif
+
                        break;
                }
                case PROP_ENUM: