1 /* The lexer multiplexer for Gedcom.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include "gedcom_internal.h"
27 #include "encoding_state.h"
32 typedef int (*lex_func)(void);
35 #define NEW_MODEL_FILE "new.ged"
37 int lexer_init(Encoding enc, FILE* f)
39 if (enc == ONE_BYTE) {
40 lf = &gedcom_1byte_lex;
41 gedcom_1byte_myinit(f);
42 set_read_encoding_width(enc);
43 return open_conv_to_internal("ASCII");
45 else if (enc == TWO_BYTE_HILO) {
46 lf = &gedcom_hilo_lex;
47 gedcom_hilo_myinit(f);
48 set_read_encoding_width(enc);
49 return open_conv_to_internal("UNICODE");
51 else if (enc == TWO_BYTE_LOHI) {
52 lf = &gedcom_lohi_lex;
53 gedcom_lohi_myinit(f);
54 set_read_encoding_width(enc);
55 return open_conv_to_internal("UNICODE");
64 close_conv_to_internal();
72 void rewind_file(FILE* f)
74 if (fseek(f, 0, 0) != 0)
75 gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
78 int determine_encoding(FILE* f)
83 set_read_encoding_bom(WITHOUT_BOM);
84 read = fread(first, 1, 2, f);
86 gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
90 else if ((first[0] == '0') && (first[1] == ' ')) {
91 gedcom_debug_print("One-byte encoding");
95 else if ((first[0] == '\0') && (first[1] == '0')) {
96 gedcom_debug_print("Two-byte encoding, high-low");
100 else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
101 gedcom_debug_print("Two-byte encoding, high-low, with BOM");
102 set_read_encoding_bom(WITH_BOM);
103 return TWO_BYTE_HILO;
105 else if ((first[0] == '0') && (first[1] == '\0')) {
106 gedcom_debug_print("Two-byte encoding, low-high");
108 return TWO_BYTE_LOHI;
110 else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
111 gedcom_debug_print("Two-byte encoding, low-high, with BOM");
112 set_read_encoding_bom(WITH_BOM);
113 return TWO_BYTE_LOHI;
115 else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
116 read = fread(first, 1, 1, f);
118 gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
121 else if (first[0] == '\xBF') {
122 set_read_encoding_bom(WITH_BOM);
123 gedcom_debug_print("UTF-8 encoding, with BOM");
126 gedcom_warning(_("Unknown encoding, falling back to one-byte"));
132 gedcom_warning(_("Unknown encoding, falling back to one-byte"));
140 /** This function initializes the Gedcom parser library and must be called
141 before any other function in this library.
143 The function also initializes locale handling by calling
144 <tt> setlocale(LC_ALL, "") </tt>, in case the application would not do this
145 (it doesn't hurt for the application to do the same).
147 \attention This function should be called as early as possible. The
149 is that it should come before the first call to \c iconv_open (part of the
150 generic character set conversion feature) in the program, either by your
151 program itself, or indirectly by the library calls it makes.
152 \attention Practically,
153 it should e.g. come before any calls to any GTK functions, because GTK
154 uses \c iconv_open in its initialization.
156 \retval 0 in case of success
157 \retval nonzero in case of failure (e.g. failure to set locale)
162 update_gconv_search_path();
164 if (!setlocale(LC_ALL, "")
165 || ! bindtextdomain(PACKAGE, LOCALEDIR)
166 || ! bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING))
172 /** This function parses the given file. By itself, it doesn't provide any
173 other information than the parse result.
175 The function also empties the cross-reference table before parsing, and
176 checks the validity of the
177 cross-references if the parse was successful.
178 The following conditions can occur in the cross-reference table:
179 - An xref was defined, but not used (warning)
180 - An xref was used, but not defined (error)
181 - An xref was used as a different type than the defined type (error)
183 \param file_name The name of the Gedcom file to parse
185 \retval 0 if the parse was successful and no errors were found in the
186 cross-reference table
187 \retval nonzero on errors, which can include:
188 - \ref gedcom_init() was not called
189 - The given file was not found
190 - The parse of the given file failed
191 - There were errors found in the cross-reference table
194 int gedcom_parse_file(const char* file_name)
201 gedcom_error(_("Internal error: GEDCOM parser not initialized"));
204 file = fopen(file_name, "r");
206 gedcom_error(_("Could not open file '%s': %s"),
207 file_name, strerror(errno));
211 enc = determine_encoding(file);
213 if (lexer_init(enc, file)) {
216 result = gedcom_parse();
219 result = check_xref_table();
229 /** This function starts a new model. It does this by parsing the \c new.ged
230 file in the data directory of the library (\c $PREFIX/share/gedcom-parse).
231 This can be used to start from an empty model, and to build up the model
232 by adding new records yourself.
235 \retval nonzero on errors (mainly the errors from
236 \ref gedcom_parse_file()).
239 int gedcom_new_model()
244 file = fopen(NEW_MODEL_FILE, "r");
247 result = gedcom_parse_file(NEW_MODEL_FILE);
250 char* filename = (char*) malloc(strlen(PKGDATADIR) + strlen(NEW_MODEL_FILE)
255 sprintf(filename, "%s/%s", PKGDATADIR, NEW_MODEL_FILE);
256 result = gedcom_parse_file(filename);
263 int gedcom_check_version(int major, int minor, int patch)
265 if (major < GEDCOM_PARSE_VERSION_MAJOR)
267 else if (major > GEDCOM_PARSE_VERSION_MAJOR)
269 else if (minor < GEDCOM_PARSE_VERSION_MINOR)
271 else if (minor > GEDCOM_PARSE_VERSION_MINOR)
273 else if (patch <= GEDCOM_PARSE_VERSION_PATCH)