1 /* The lexer multiplexer for Gedcom.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include "gedcom_internal.h"
31 typedef int (*lex_func)(void);
34 int lexer_init(ENCODING enc, FILE* f)
36 if (enc == ONE_BYTE) {
38 lf = &gedcom_1byte_lex;
39 set_encoding_width(enc);
40 return open_conv_to_internal("ASCII");
42 else if (enc == TWO_BYTE_HILO) {
44 lf = &gedcom_hilo_lex;
45 set_encoding_width(enc);
46 return open_conv_to_internal("UNICODE");
48 else if (enc == TWO_BYTE_LOHI) {
50 lf = &gedcom_lohi_lex;
51 set_encoding_width(enc);
52 return open_conv_to_internal("UNICODE");
61 close_conv_to_internal();
69 int determine_encoding(FILE* f)
73 fread(first, 1, 2, f);
74 if ((first[0] == '0') && (first[1] == ' ')) {
75 gedcom_debug_print(_("One-byte encoding"));
79 else if ((first[0] == '\0') && (first[1] == '0'))
81 gedcom_debug_print(_("Two-byte encoding, high-low"));
85 else if ((first[0] == '\xFE') && (first[1] == '\xFF'))
87 gedcom_debug_print(_("Two-byte encoding, high-low, with BOM"));
90 else if ((first[0] == '0') && (first[1] == '\0'))
92 gedcom_debug_print(_("Two-byte encoding, low-high"));
96 else if ((first[0] == '\xFF') && (first[1] == '\xFE'))
98 gedcom_debug_print(_("Two-byte encoding, low-high, with BOM"));
102 gedcom_warning(_("Unknown encoding, falling back to one-byte"));
108 static int init_called = 0;
113 update_gconv_search_path();
116 int gedcom_parse_file(char* file_name)
122 char *save_locale = strdup(setlocale(LC_ALL, NULL));
123 char *save_textdom = textdomain(NULL);
124 setlocale(LC_ALL, "");
125 bindtextdomain(PACKAGE, LOCALEDIR);
126 bind_textdomain_codeset(PACKAGE, INTERNAL_ENCODING);
130 gedcom_error(_("Internal error: GEDCOM parser not initialized"));
134 file = fopen(file_name, "r");
136 gedcom_error(_("Could not open file '%s'"), file_name);
140 enc = determine_encoding(file);
142 if (lexer_init(enc, file)) {
145 result = gedcom_parse();
147 result = check_xref_table();
154 textdomain(save_textdom);
155 setlocale(LC_ALL, save_locale);