1 /* The lexer multiplexer for Gedcom.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include "gedcom_internal.h"
27 #include "encoding_state.h"
32 typedef int (*lex_func)(void);
35 #define NEW_MODEL_FILE "new.ged"
37 int lexer_init(Encoding enc, FILE* f)
39 if (enc == ONE_BYTE) {
40 lf = &gedcom_1byte_lex;
41 gedcom_1byte_myinit(f);
42 set_read_encoding_width(enc);
43 return open_conv_to_internal("ASCII");
45 else if (enc == TWO_BYTE_HILO) {
46 lf = &gedcom_hilo_lex;
47 gedcom_hilo_myinit(f);
48 set_read_encoding_width(enc);
49 return open_conv_to_internal("UNICODE");
51 else if (enc == TWO_BYTE_LOHI) {
52 lf = &gedcom_lohi_lex;
53 gedcom_lohi_myinit(f);
54 set_read_encoding_width(enc);
55 return open_conv_to_internal("UNICODE");
64 close_conv_to_internal();
72 void rewind_file(FILE* f)
74 if (fseek(f, 0, 0) != 0)
75 gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
78 int determine_encoding(FILE* f)
83 set_read_encoding_bom(WITHOUT_BOM);
84 read = fread(first, 1, 2, f);
86 gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
90 else if ((first[0] == '0') && (first[1] == ' ')) {
91 gedcom_debug_print("One-byte encoding");
95 else if ((first[0] == '\0') && (first[1] == '0')) {
96 gedcom_debug_print("Two-byte encoding, high-low");
100 else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
101 gedcom_debug_print("Two-byte encoding, high-low, with BOM");
102 set_read_encoding_bom(WITH_BOM);
103 return TWO_BYTE_HILO;
105 else if ((first[0] == '0') && (first[1] == '\0')) {
106 gedcom_debug_print("Two-byte encoding, low-high");
108 return TWO_BYTE_LOHI;
110 else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
111 gedcom_debug_print("Two-byte encoding, low-high, with BOM");
112 set_read_encoding_bom(WITH_BOM);
113 return TWO_BYTE_LOHI;
115 else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
116 read = fread(first, 1, 1, f);
118 gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
121 else if (first[0] == '\xBF') {
122 set_read_encoding_bom(WITH_BOM);
123 gedcom_debug_print("UTF-8 encoding, with BOM");
126 gedcom_warning(_("Unknown encoding, falling back to one-byte"));
132 gedcom_warning(_("Unknown encoding, falling back to one-byte"));
143 update_gconv_search_path();
145 if (!setlocale(LC_ALL, "")
146 || ! bindtextdomain(PACKAGE, LOCALEDIR)
147 || ! bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING))
153 int gedcom_parse_file(const char* file_name)
160 gedcom_error(_("Internal error: GEDCOM parser not initialized"));
163 file = fopen(file_name, "r");
165 gedcom_error(_("Could not open file '%s': %s"),
166 file_name, strerror(errno));
170 enc = determine_encoding(file);
172 if (lexer_init(enc, file)) {
175 result = gedcom_parse();
178 result = check_xref_table();
188 int gedcom_new_model()
193 file = fopen(NEW_MODEL_FILE, "r");
196 result = gedcom_parse_file(NEW_MODEL_FILE);
199 char* filename = (char*) malloc(strlen(PKGDATADIR) + strlen(NEW_MODEL_FILE)
204 sprintf(filename, "%s/%s", PKGDATADIR, NEW_MODEL_FILE);
205 result = gedcom_parse_file(filename);
212 int gedcom_check_version(int major, int minor, int patch)
214 if (major < GEDCOM_PARSE_VERSION_MAJOR)
216 else if (major > GEDCOM_PARSE_VERSION_MAJOR)
218 else if (minor < GEDCOM_PARSE_VERSION_MINOR)
220 else if (minor > GEDCOM_PARSE_VERSION_MINOR)
222 else if (patch <= GEDCOM_PARSE_VERSION_PATCH)