X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fmultilex.c;h=721702e2541184339e9bbb7079a861ff2e6d1f8d;hb=03b5e1e83ef68195cf25ca2741a6ad9bdf6f027c;hp=e8d514cb67296e2777548eeb0ebf80c1ed733ade;hpb=015d4f6782128a884085c6d6958e4509aeb96655;p=gedcom-parse.git diff --git a/gedcom/multilex.c b/gedcom/multilex.c index e8d514c..721702e 100644 --- a/gedcom/multilex.c +++ b/gedcom/multilex.c @@ -68,50 +68,68 @@ int gedcom_lex() return (*lf)(); } +void rewind_file(FILE* f) +{ + if (fseek(f, 0, 0) != 0) + gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); +} + int determine_encoding(FILE* f) { char first[2]; int read; + set_encoding_bom(WITHOUT_BOM); read = fread(first, 1, 2, f); if (read != 2) { gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); + rewind_file(f); return ONE_BYTE; } else if ((first[0] == '0') && (first[1] == ' ')) { - gedcom_debug_print(_("One-byte encoding")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + gedcom_debug_print("One-byte encoding"); + rewind_file(f); return ONE_BYTE; } - else if ((first[0] == '\0') && (first[1] == '0')) - { - gedcom_debug_print(_("Two-byte encoding, high-low")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + else if ((first[0] == '\0') && (first[1] == '0')) { + gedcom_debug_print("Two-byte encoding, high-low"); + rewind_file(f); return TWO_BYTE_HILO; } - else if ((first[0] == '\xFE') && (first[1] == '\xFF')) - { - gedcom_debug_print(_("Two-byte encoding, high-low, with BOM")); + else if ((first[0] == '\xFE') && (first[1] == '\xFF')) { + gedcom_debug_print("Two-byte encoding, high-low, with BOM"); + set_encoding_bom(WITH_BOM); return TWO_BYTE_HILO; } - else if ((first[0] == '0') && (first[1] == '\0')) - { - gedcom_debug_print(_("Two-byte encoding, low-high")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + else if ((first[0] == '0') && (first[1] == '\0')) { + gedcom_debug_print("Two-byte encoding, low-high"); + rewind_file(f); return TWO_BYTE_LOHI; } - else if ((first[0] == '\xFF') && (first[1] == '\xFE')) - { - gedcom_debug_print(_("Two-byte encoding, low-high, with BOM")); + else if ((first[0] == '\xFF') && (first[1] == '\xFE')) { + gedcom_debug_print("Two-byte encoding, low-high, with BOM"); + set_encoding_bom(WITH_BOM); return TWO_BYTE_LOHI; } + else if ((first[0] == '\xEF') && (first[1] == '\xBB')) { + read = fread(first, 1, 1, f); + if (read != 1) { + gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); + rewind_file(f); + } + else if (first[0] == '\xBF') { + set_encoding_bom(WITH_BOM); + gedcom_debug_print("UTF-8 encoding, with BOM"); + } + else { + gedcom_warning(_("Unknown encoding, falling back to one-byte")); + rewind_file(f); + } + return ONE_BYTE; + } else { gedcom_warning(_("Unknown encoding, falling back to one-byte")); - if (fseek(f, 0, 0) != 0) - gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); + rewind_file(f); return ONE_BYTE; } }