2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version 2
5 * of the License, or (at your option) any later version.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software Foundation,
14 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * The Original Code is Copyright (C) 2017 by Blender Foundation.
17 * All rights reserved.
21 * Based on C++ version by Sergey Sharybin <sergey.vfx@gmail.com>.
22 * Based on Python script msgfmt.py from Python source code tree, which was written by
23 * Martin v. Löwis <loewis@informatik.hu-berlin.de>
25 * Generate binary message catalog from textual translation description.
27 * This program converts a textual Uniform-style message catalog (.po file)
28 * into a binary GNU catalog (.mo file).
29 * This is essentially the same function as the GNU msgfmt program,
30 * however, it is a simpler implementation.
32 * Usage: msgfmt input.po output.po
38 #include "BLI_dynstr.h"
39 #include "BLI_fileops.h"
40 #include "BLI_ghash.h"
41 #include "BLI_linklist.h"
42 #include "BLI_memarena.h"
43 #include "BLI_utildefines.h"
45 #include "MEM_guardedalloc.h"
47 /* Stupid stub necessary because some BLI files includes winstuff.h, which uses G a bit... */
49 typedef struct Global {
56 /* We cannot use NULL char until ultimate step, would give nightmare to our C string processing...
57 * Using one of the UTF-8 invalid bytes (as per our BLI string_utf8.c) */
58 #define NULLSEP_STR "\xff"
59 #define NULLSEP_CHR '\xff'
68 typedef struct Message {
76 static char *trim(char *str)
78 const size_t len = strlen(str);
85 for (i = 0; i < len && ELEM(str[0], ' ', '\t', '\r', '\n'); str++, i++) {
89 char *end = &str[len - 1 - i];
90 for (i = len; i > 0 && ELEM(end[0], ' ', '\t', '\r', '\n'); end--, i--) {
99 static char *unescape(char *str)
102 for (curr = next = str; next[0] != '\0'; curr++, next++) {
103 if (next[0] == '\\') {
106 /* Get rid of trailing escape char... */
122 /* Get rid of useless escape char. */
127 else if (curr != next) {
133 if (str[0] == '"' && *(curr - 1) == '"') {
140 static int qsort_str_cmp(const void *a, const void *b)
142 return strcmp(*(const char **)a, *(const char **)b);
145 static char **get_keys_sorted(GHash *messages, const uint32_t num_keys)
149 char **keys = MEM_mallocN(sizeof(*keys) * num_keys, __func__);
152 GHASH_ITER (iter, messages) {
153 *k = BLI_ghashIterator_getKey(&iter);
157 qsort(keys, num_keys, sizeof(*keys), qsort_str_cmp);
162 BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
165 for (i = 0; i < sizeof(value); i++) {
166 bytes[i] = (char)((value >> ((int)i * 8)) & 0xff);
171 BLI_INLINE size_t msg_to_bytes(char *msg, char *bytes, uint32_t size)
173 /* Note that we also perform replacing of our NULLSEP placeholder by real NULL char... */
175 for (i = 0; i < size; i++, msg++, bytes++) {
176 *bytes = (*msg == NULLSEP_CHR) ? '\0' : *msg;
181 typedef struct Offset {
182 uint32_t key_offset, key_len, val_offset, val_len;
185 /* Return the generated binary output. */
186 static char *generate(GHash *messages, size_t *r_output_size)
188 const uint32_t num_keys = BLI_ghash_len(messages);
190 /* Get list of sorted keys. */
191 char **keys = get_keys_sorted(messages, num_keys);
192 char **vals = MEM_mallocN(sizeof(*vals) * num_keys, __func__);
193 uint32_t tot_keys_len = 0;
194 uint32_t tot_vals_len = 0;
196 Offset *offsets = MEM_mallocN(sizeof(*offsets) * num_keys, __func__);
198 for (int i = 0; i < num_keys; i++) {
199 Offset *off = &offsets[i];
201 vals[i] = BLI_ghash_lookup(messages, keys[i]);
203 /* For each string, we need size and file offset.
204 * Each string is NULL terminated; the NULL does not count into the size. */
205 off->key_offset = tot_keys_len;
206 off->key_len = (uint32_t)strlen(keys[i]);
207 tot_keys_len += off->key_len + 1;
209 off->val_offset = tot_vals_len;
210 off->val_len = (uint32_t)strlen(vals[i]);
211 tot_vals_len += off->val_len + 1;
214 /* The header is 7 32-bit unsigned integers.
215 * Then comes the keys index table, then the values index table. */
216 const uint32_t idx_keystart = 7 * 4;
217 const uint32_t idx_valstart = idx_keystart + 8 * num_keys;
218 /* We don't use hash tables, so the keys start right after the index tables. */
219 const uint32_t keystart = idx_valstart + 8 * num_keys;
220 /* and the values start after the keys */
221 const uint32_t valstart = keystart + tot_keys_len;
223 /* Final buffer representing the binary MO file. */
224 *r_output_size = valstart + tot_vals_len;
225 char *output = MEM_mallocN(*r_output_size, __func__);
227 char *ik = output + idx_keystart;
228 char *iv = output + idx_valstart;
229 char *k = output + keystart;
230 char *v = output + valstart;
232 h += uint32_to_bytes(0x950412de, h); /* Magic */
233 h += uint32_to_bytes(0x0, h); /* Version */
234 h += uint32_to_bytes(num_keys, h); /* Number of entries */
235 h += uint32_to_bytes(idx_keystart, h); /* Start of key index */
236 h += uint32_to_bytes(idx_valstart, h); /* Start of value index */
237 h += uint32_to_bytes(0, h); /* Size of hash table */
238 h += uint32_to_bytes(0, h); /* Offset of hash table */
242 for (int i = 0; i < num_keys; i++) {
243 Offset *off = &offsets[i];
245 /* The index table first has the list of keys, then the list of values.
246 * Each entry has first the size of the string, then the file offset. */
247 ik += uint32_to_bytes(off->key_len, ik);
248 ik += uint32_to_bytes(off->key_offset + keystart, ik);
249 iv += uint32_to_bytes(off->val_len, iv);
250 iv += uint32_to_bytes(off->val_offset + valstart, iv);
252 k += msg_to_bytes(keys[i], k, off->key_len + 1);
253 v += msg_to_bytes(vals[i], v, off->val_len + 1);
256 BLI_assert(ik == output + idx_valstart);
257 BLI_assert(iv == output + keystart);
258 BLI_assert(k == output + valstart);
267 /* Add a non-fuzzy translation to the dictionary. */
268 static void add(GHash *messages, MemArena *memarena, const Message *msg)
270 const size_t msgctxt_len = (size_t)BLI_dynstr_get_len(msg->ctxt);
271 const size_t msgid_len = (size_t)BLI_dynstr_get_len(msg->id);
272 const size_t msgstr_len = (size_t)BLI_dynstr_get_len(msg->str);
273 const size_t msgkey_len = msgid_len + ((msgctxt_len == 0) ? 0 : msgctxt_len + 1);
275 if (!msg->is_fuzzy && msgstr_len != 0) {
276 char *msgkey = BLI_memarena_alloc(memarena, sizeof(*msgkey) * (msgkey_len + 1));
277 char *msgstr = BLI_memarena_alloc(memarena, sizeof(*msgstr) * (msgstr_len + 1));
279 if (msgctxt_len != 0) {
280 BLI_dynstr_get_cstring_ex(msg->ctxt, msgkey);
281 msgkey[msgctxt_len] = '\x04'; /* Context/msgid separator */
282 BLI_dynstr_get_cstring_ex(msg->id, &msgkey[msgctxt_len + 1]);
285 BLI_dynstr_get_cstring_ex(msg->id, msgkey);
288 BLI_dynstr_get_cstring_ex(msg->str, msgstr);
290 BLI_ghash_insert(messages, msgkey, msgstr);
294 static void clear(Message *msg)
296 BLI_dynstr_clear(msg->ctxt);
297 BLI_dynstr_clear(msg->id);
298 BLI_dynstr_clear(msg->str);
299 msg->is_fuzzy = false;
302 static int make(const char *input_file_name, const char *output_file_name)
304 GHash *messages = BLI_ghash_new(BLI_ghashutil_strhash_p_murmur, BLI_ghashutil_strcmp, __func__);
305 MemArena *msgs_memarena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
307 const char *msgctxt_kw = "msgctxt";
308 const char *msgid_kw = "msgid";
309 const char *msgid_plural_kw = "msgid_plural";
310 const char *msgstr_kw = "msgstr";
311 const size_t msgctxt_len = strlen(msgctxt_kw);
312 const size_t msgid_len = strlen(msgid_kw);
313 const size_t msgid_plural_len = strlen(msgid_plural_kw);
314 const size_t msgstr_len = strlen(msgstr_kw);
316 /* Note: For now, we assume file encoding is always utf-8. */
318 eSectionType section = SECTION_NONE;
319 bool is_plural = false;
322 .ctxt = BLI_dynstr_new_memarena(),
323 .id = BLI_dynstr_new_memarena(),
324 .str = BLI_dynstr_new_memarena(),
328 LinkNode *input_file_lines = BLI_file_read_as_lines(input_file_name);
329 LinkNode *ifl = input_file_lines;
331 /* Parse the catalog. */
332 for (int lno = 1; ifl; ifl = ifl->next, lno++) {
334 const bool is_comment = (l[0] == '#');
335 /* If we get a comment line after a msgstr, this is a new entry. */
337 if (section == SECTION_STR) {
338 add(messages, msgs_memarena, &msg);
340 section = SECTION_NONE;
342 /* Record a fuzzy mark. */
343 if (l[1] == ',' && strstr(l, "fuzzy") != NULL) {
349 if (strstr(l, msgctxt_kw) == l) {
350 if (section == SECTION_STR) {
351 /* New message, output previous section. */
352 add(messages, msgs_memarena, &msg);
354 if (!ELEM(section, SECTION_NONE, SECTION_STR)) {
355 printf("msgctxt not at start of new message on %s:%d\n", input_file_name, lno);
358 section = SECTION_CTX;
362 else if (strstr(l, msgid_plural_kw) == l) {
363 /* This is a message with plural forms. */
364 if (section != SECTION_ID) {
365 printf("msgid_plural not preceded by msgid on %s:%d\n", input_file_name, lno);
368 l = l + msgid_plural_len;
369 BLI_dynstr_append(msg.id, NULLSEP_STR); /* separator of singular and plural */
372 else if (strstr(l, msgid_kw) == l) {
373 if (section == SECTION_STR) {
374 add(messages, msgs_memarena, &msg);
376 if (section != SECTION_CTX) {
379 section = SECTION_ID;
383 else if (strstr(l, msgstr_kw) == l) {
385 // Now we are in a msgstr section
386 section = SECTION_STR;
389 printf("plural without msgid_plural on %s:%d\n", input_file_name, lno);
392 if ((l = strchr(l, ']')) == NULL) {
393 printf("Syntax error on %s:%d\n", input_file_name, lno);
396 if (BLI_dynstr_get_len(msg.str) != 0) {
397 BLI_dynstr_append(msg.str, NULLSEP_STR); /* Separator of the various plural forms. */
402 printf("indexed msgstr required for plural on %s:%d\n", input_file_name, lno);
407 /* Skip empty lines. */
410 if (section == SECTION_STR) {
411 add(messages, msgs_memarena, &msg);
414 section = SECTION_NONE;
418 if (section == SECTION_CTX) {
419 BLI_dynstr_append(msg.ctxt, l);
421 else if (section == SECTION_ID) {
422 BLI_dynstr_append(msg.id, l);
424 else if (section == SECTION_STR) {
425 BLI_dynstr_append(msg.str, l);
428 printf("Syntax error on %s:%d\n", input_file_name, lno);
433 if (section == SECTION_STR) {
434 add(messages, msgs_memarena, &msg);
437 BLI_dynstr_free(msg.ctxt);
438 BLI_dynstr_free(msg.id);
439 BLI_dynstr_free(msg.str);
440 BLI_file_free_lines(input_file_lines);
444 char *output = generate(messages, &output_size);
446 FILE *fp = BLI_fopen(output_file_name, "wb");
447 fwrite(output, 1, output_size, fp);
451 BLI_ghash_free(messages, NULL, NULL);
452 BLI_memarena_free(msgs_memarena);
457 int main(int argc, char **argv)
460 printf("Usage: %s <input.po> <output.mo>\n", argv[0]);
463 const char *input_file = argv[1];
464 const char *output_file = argv[2];
466 return make(input_file, output_file);