X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fmultilex.c;h=420bfcb8b1c88b7a684328313f85055601df6af2;hb=32da62601457ba994c6b71d71470ae066fc3969b;hp=75563117c55f8f51f6bbd151491f5e31a40cf1af;hpb=7cbefaec45b82eb0449465da19d38a89f2ff2cf3;p=gedcom-parse.git diff --git a/gedcom/multilex.c b/gedcom/multilex.c index 7556311..420bfcb 100644 --- a/gedcom/multilex.c +++ b/gedcom/multilex.c @@ -33,7 +33,7 @@ lex_func lf; #define NEW_MODEL_FILE "new.ged" -int lexer_init(ENCODING enc, FILE* f) +int lexer_init(Encoding enc, FILE* f) { if (enc == ONE_BYTE) { lf = &gedcom_1byte_lex; @@ -68,6 +68,12 @@ int gedcom_lex() return (*lf)(); } +void rewind_file(FILE* f) +{ + if (fseek(f, 0, 0) != 0) + gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); +} + int determine_encoding(FILE* f) { char first[2]; @@ -76,86 +82,70 @@ int determine_encoding(FILE* f) read = fread(first, 1, 2, f); if (read != 2) { gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); + rewind_file(f); return ONE_BYTE; } else if ((first[0] == '0') && (first[1] == ' ')) { gedcom_debug_print(_("One-byte encoding")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + rewind_file(f); return ONE_BYTE; } - else if ((first[0] == '\0') && (first[1] == '0')) - { + else if ((first[0] == '\0') && (first[1] == '0')) { gedcom_debug_print(_("Two-byte encoding, high-low")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + rewind_file(f); return TWO_BYTE_HILO; } - else if ((first[0] == '\xFE') && (first[1] == '\xFF')) - { + else if ((first[0] == '\xFE') && (first[1] == '\xFF')) { gedcom_debug_print(_("Two-byte encoding, high-low, with BOM")); return TWO_BYTE_HILO; } - else if ((first[0] == '0') && (first[1] == '\0')) - { + else if ((first[0] == '0') && (first[1] == '\0')) { gedcom_debug_print(_("Two-byte encoding, low-high")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + rewind_file(f); return TWO_BYTE_LOHI; } - else if ((first[0] == '\xFF') && (first[1] == '\xFE')) - { + else if ((first[0] == '\xFF') && (first[1] == '\xFE')) { gedcom_debug_print(_("Two-byte encoding, low-high, with BOM")); return TWO_BYTE_LOHI; } + else if ((first[0] == '\xEF') && (first[1] == '\xBB')) { + read = fread(first, 1, 1, f); + if (read != 1) { + gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); + rewind_file(f); + } + else if (first[0] == '\xBF') { + gedcom_debug_print(_("UTF-8 encoding, with BOM")); + } + return ONE_BYTE; + } else { gedcom_warning(_("Unknown encoding, falling back to one-byte")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + rewind_file(f); return ONE_BYTE; } } int init_called = 0; -void gedcom_init() +int gedcom_init() { init_called = 1; update_gconv_search_path(); + init_encodings(); + if (!setlocale(LC_ALL, "") + || ! bindtextdomain(PACKAGE, LOCALEDIR) + || ! bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING)) + return 1; + else + return 0; } int gedcom_parse_file(const char* file_name) { - ENCODING enc; + Encoding enc; int result = 1; FILE* file; - char *locale, *save_locale, *save_textdom; - - locale = setlocale(LC_ALL, NULL); - if (! locale) { - gedcom_error(_("Could not retrieve locale information")); - return result; - } - - save_locale = strdup(locale); - if (! save_locale) { - MEMORY_ERROR; - return result; - } - - save_textdom = textdomain(NULL); - if (!save_textdom) { - gedcom_error(_("Could not retrieve locale domain: %s"), strerror(errno)); - return result; - } - - if (! setlocale(LC_ALL, "") - || ! bindtextdomain(PACKAGE, LOCALEDIR) - || ! bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING) - || ! textdomain(PACKAGE)) { - gedcom_error(_("Could not set locale: %s"), strerror(errno)); - return result; - } if (!init_called) { gedcom_error(_("Internal error: GEDCOM parser not initialized")); @@ -167,7 +157,7 @@ int gedcom_parse_file(const char* file_name) file_name, strerror(errno)); } else { - init_encodings(); + line_no = 1; enc = determine_encoding(file); if (lexer_init(enc, file)) { @@ -183,12 +173,6 @@ int gedcom_parse_file(const char* file_name) } } - if (! textdomain(save_textdom) - || ! setlocale(LC_ALL, save_locale)) { - gedcom_error(_("Could not restore locale: %s"), strerror(errno)); - return result; - } - free(save_locale); return result; } @@ -228,6 +212,6 @@ int gedcom_check_version(int major, int minor, int patch) return 0; else if (patch <= GEDCOM_PARSE_VERSION_PATCH) return 1; - else { + else return 0; }