X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fmultilex.c;h=e4128174d5e6393ea497a8b6faa9f0be079a44c2;hb=ea81accae13a4617cc46f5256dad50e2732cc206;hp=18992760f5883a230d9e0c6671bf21903874b86e;hpb=3068ed1f64a96d42cdde19cfebf1468ec9169e62;p=gedcom-parse.git diff --git a/gedcom/multilex.c b/gedcom/multilex.c index 1899276..e412817 100644 --- a/gedcom/multilex.c +++ b/gedcom/multilex.c @@ -24,31 +24,34 @@ #include "gedcom_internal.h" #include "multilex.h" #include "encoding.h" +#include "encoding_state.h" #include "xref.h" -int line_no; +int line_no = 0; typedef int (*lex_func)(void); lex_func lf; -int lexer_init(ENCODING enc, FILE* f) +#define NEW_MODEL_FILE "new.ged" + +int lexer_init(Encoding enc, FILE* f) { if (enc == ONE_BYTE) { - gedcom_1byte_in = f; - lf = &gedcom_1byte_lex; - set_encoding_width(enc); + lf = &gedcom_1byte_lex; + gedcom_1byte_myinit(f); + set_read_encoding_width(enc); return open_conv_to_internal("ASCII"); } else if (enc == TWO_BYTE_HILO) { - gedcom_hilo_in = f; - lf = &gedcom_hilo_lex; - set_encoding_width(enc); + lf = &gedcom_hilo_lex; + gedcom_hilo_myinit(f); + set_read_encoding_width(enc); return open_conv_to_internal("UNICODE"); } else if (enc == TWO_BYTE_LOHI) { - gedcom_lohi_in = f; - lf = &gedcom_lohi_lex; - set_encoding_width(enc); + lf = &gedcom_lohi_lex; + gedcom_lohi_myinit(f); + set_read_encoding_width(enc); return open_conv_to_internal("UNICODE"); } else { @@ -66,79 +69,158 @@ int gedcom_lex() return (*lf)(); } +void rewind_file(FILE* f) +{ + if (fseek(f, 0, 0) != 0) + gedcom_warning(_("Error positioning input file: %s"), strerror(errno)); +} + int determine_encoding(FILE* f) { char first[2]; + int read; - fread(first, 1, 2, f); - if ((first[0] == '0') && (first[1] == ' ')) { - gedcom_debug_print(_("One-byte encoding")); - fseek(f, 0, 0); + set_read_encoding_bom(WITHOUT_BOM); + read = fread(first, 1, 2, f); + if (read != 2) { + gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); + rewind_file(f); return ONE_BYTE; } - else if ((first[0] == '\0') && (first[1] == '0')) - { - gedcom_debug_print(_("Two-byte encoding, high-low")); - fseek(f, 0, 0); + else if ((first[0] == '0') && (first[1] == ' ')) { + gedcom_debug_print("One-byte encoding"); + rewind_file(f); + return ONE_BYTE; + } + else if ((first[0] == '\0') && (first[1] == '0')) { + gedcom_debug_print("Two-byte encoding, high-low"); + rewind_file(f); return TWO_BYTE_HILO; } - else if ((first[0] == '\xFE') && (first[1] == '\xFF')) - { - gedcom_debug_print(_("Two-byte encoding, high-low, with BOM")); + else if ((first[0] == '\xFE') && (first[1] == '\xFF')) { + gedcom_debug_print("Two-byte encoding, high-low, with BOM"); + set_read_encoding_bom(WITH_BOM); return TWO_BYTE_HILO; } - else if ((first[0] == '0') && (first[1] == '\0')) - { - gedcom_debug_print(_("Two-byte encoding, low-high")); - fseek(f, 0, 0); + else if ((first[0] == '0') && (first[1] == '\0')) { + gedcom_debug_print("Two-byte encoding, low-high"); + rewind_file(f); return TWO_BYTE_LOHI; } - else if ((first[0] == '\xFF') && (first[1] == '\xFE')) - { - gedcom_debug_print(_("Two-byte encoding, low-high, with BOM")); + else if ((first[0] == '\xFF') && (first[1] == '\xFE')) { + gedcom_debug_print("Two-byte encoding, low-high, with BOM"); + set_read_encoding_bom(WITH_BOM); return TWO_BYTE_LOHI; } + else if ((first[0] == '\xEF') && (first[1] == '\xBB')) { + read = fread(first, 1, 1, f); + if (read != 1) { + gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); + rewind_file(f); + } + else if (first[0] == '\xBF') { + set_read_encoding_bom(WITH_BOM); + gedcom_debug_print("UTF-8 encoding, with BOM"); + } + else { + gedcom_warning(_("Unknown encoding, falling back to one-byte")); + rewind_file(f); + } + return ONE_BYTE; + } else { gedcom_warning(_("Unknown encoding, falling back to one-byte")); - fseek(f, 0, 0); + rewind_file(f); return ONE_BYTE; } } -int gedcom_parse_file(char* file_name) +int init_called = 0; + +int gedcom_init() { - ENCODING enc; + init_called = 1; + update_gconv_search_path(); + init_encodings(); + if (!setlocale(LC_ALL, "") + || ! bindtextdomain(PACKAGE, LOCALEDIR) + || ! bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING)) + return 1; + else + return 0; +} + +int gedcom_parse_file(const char* file_name) +{ + Encoding enc; int result = 1; FILE* file; - - char *save_textdom = textdomain(NULL); - setlocale(LC_ALL, ""); /* In fact only necessary if main program doesn't - do this */ - bindtextdomain(PACKAGE, LOCALEDIR); - bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING); - textdomain(PACKAGE); - - line_no = 1; - file = fopen(file_name, "r"); - if (!file) { - gedcom_error(_("Could not open file '%s'"), file_name); + + if (!init_called) { + gedcom_error(_("Internal error: GEDCOM parser not initialized")); } else { - init_encodings(); - enc = determine_encoding(file); - - if (lexer_init(enc, file)) { + file = fopen(file_name, "r"); + if (!file) { + gedcom_error(_("Could not open file '%s': %s"), + file_name, strerror(errno)); + } + else { line_no = 1; - make_xref_table(); - result = gedcom_parse(); - if (result == 0) - result = check_xref_table(); + enc = determine_encoding(file); + + if (lexer_init(enc, file)) { + line_no = 0; + make_xref_table(); + result = gedcom_parse(); + line_no = 0; + if (result == 0) + result = check_xref_table(); + } + lexer_close(); + fclose(file); } - lexer_close(); - fclose(file); } - textdomain(save_textdom); return result; } +int gedcom_new_model() +{ + int result = 1; + FILE* file; + + file = fopen(NEW_MODEL_FILE, "r"); + if (file) { + fclose(file); + result = gedcom_parse_file(NEW_MODEL_FILE); + } + else { + char* filename = (char*) malloc(strlen(PKGDATADIR) + strlen(NEW_MODEL_FILE) + + 2); + if (!filename) + MEMORY_ERROR; + else { + sprintf(filename, "%s/%s", PKGDATADIR, NEW_MODEL_FILE); + result = gedcom_parse_file(filename); + free(filename); + } + } + return result; +} + +int gedcom_check_version(int major, int minor, int patch) +{ + if (major < GEDCOM_PARSE_VERSION_MAJOR) + return 1; + else if (major > GEDCOM_PARSE_VERSION_MAJOR) + return 0; + else if (minor < GEDCOM_PARSE_VERSION_MINOR) + return 1; + else if (minor > GEDCOM_PARSE_VERSION_MINOR) + return 0; + else if (patch <= GEDCOM_PARSE_VERSION_PATCH) + return 1; + else + return 0; +}