2 Copyright (C) 2001, 2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 #include "gedcom_internal.h"
30 #include "gedcom.tabgen.h"
33 static size_t encoding_width;
34 static int current_level = -1;
35 static int level_diff=MAXGEDCLEVEL;
36 static size_t line_len = 0;
38 static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
39 static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
40 static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
49 void message_handler(Gedcom_msg_type type, char *msg)
51 fprintf(stderr, "(%d) %s\n", type, msg);
54 int test_loop(ENCODING enc, const char* code)
58 set_encoding_width(enc);
59 gedcom_set_message_handler(message_handler);
60 res = open_conv_to_internal(code);
62 gedcom_error("Unable to open conversion context: %s",
69 case BADTOKEN: printf("BADTOKEN "); break;
70 case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
71 case CLOSE: printf("CLOSE "); break;
72 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
73 case DELIM: printf("DELIM "); break;
74 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
75 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
76 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag.string); break;
77 default: printf("TAG(%s) ", gedcom_lval.tag.string); break;
82 close_conv_to_internal();
86 #endif /* of #ifdef LEXER_TEST */
88 /* These are defined as functions here, because xgettext has trouble
89 extracting the strings out of long pre-processor defined */
91 static void error_line_too_long()
93 gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN);
96 static void error_level_leading_zero()
98 gedcom_error (_("Level number with leading zero not allowed"));
101 static void error_level_out_of_range()
103 gedcom_error (_("Level number out of range [0..%d]"), MAXGEDCLEVEL);
106 static void error_level_too_high(int level_diff)
108 gedcom_error (_("GEDCOM level number is %d higher than previous"),
112 static void error_tag_too_long(const char *tag)
114 gedcom_error(_("Tag '%s' too long, max %d characters allowed"),
118 static void error_invalid_character(const char *str, char ch)
120 gedcom_error(_("Invalid character for encoding: '%s' (0x%02x)"), str, ch);
123 static void error_pointer_too_long(const char *ptr)
125 gedcom_error(_("Pointer '%s' too long, max %d characters allowed"),
129 static void error_at_character()
131 gedcom_error(_("'@' character should be written as '@@' in values"));
134 static void error_unexpected_character(const char* str, char ch)
136 gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
139 #else /* of #ifndef IN_LEX */
141 #define TO_INTERNAL(STR,OUTBUF) \
142 to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
144 #define INIT_LINE_LEN \
147 #define CHECK_LINE_LEN \
148 { if (line_len != (size_t)-1) { \
149 line_len += strlen(yytext); \
150 if (line_len > MAXGEDCLINELEN * encoding_width) { \
151 error_line_too_long(); \
152 line_len = (size_t)-1; \
158 #define MKTAGACTION(THETAG) \
160 gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
161 gedcom_lval.tag.value = TAG_##THETAG; \
163 return TAG_##THETAG; \
166 /* The GEDCOM level number is converted into a sequence of opening
167 and closing brackets. Simply put, the following GEDCOM fragment:
180 { SOUR genes (1 higher: no closing brackets)
181 { VERS 1.6 (1 higher: no closing brackets)
182 } { NAME Genes (same level: 1 closing bracket)
183 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
197 But because this means that one token is converted into a series
198 of tokens, there is some initial code following immediately here
199 that returns "pending" tokens. */
201 #define ACTION_BEFORE_REGEXPS \
202 { if (level_diff < 1) { \
206 else if (level_diff == 1) { \
208 gedcom_lval.number = current_level; \
212 /* out of brackets... */ \
217 #define ACTION_INITIAL_WHITESPACE \
219 /* ignore initial whitespace further */ \
223 #define ACTION_0_DIGITS \
224 { error_level_leading_zero(); \
229 #define ACTION_DIGITS \
230 { int level = atoi(TO_INTERNAL(yytext, str_buf)); \
232 if ((level < 0) || (level > MAXGEDCLEVEL)) { \
233 error_level_out_of_range(); \
236 level_diff = level - current_level; \
238 current_level = level; \
239 if (level_diff < 1) { \
243 else if (level_diff == 1) { \
245 gedcom_lval.number = current_level; \
249 /* should never happen (error to GEDCOM spec) */ \
250 error_level_too_high(level_diff); \
256 #define ACTION_ALPHANUM \
257 { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
258 error_tag_too_long(yytext); \
262 gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
263 gedcom_lval.tag.value = USERTAG; \
269 #define ACTION_DELIM \
271 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
279 tmp = TO_INTERNAL(yytext, str_buf); \
281 /* Something went wrong during conversion... */ \
282 error_invalid_character(yytext, yytext[0]); \
286 gedcom_lval.string = tmp; \
287 /* Due to character conversions, it is possible that the current \
288 character will be combined with the next, and so now we don't have a \
290 In principle, this is only applicable to the 1byte case (e.g. ANSEL),\
291 but it doesn't harm the unicode case. \
293 if (strlen(gedcom_lval.string) > 0) \
299 #define ACTION_ESCAPE \
301 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
306 #define ACTION_POINTER \
308 if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \
309 error_pointer_too_long(yytext); \
312 gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \
317 /* Due to the conversion of level numbers into brackets, the
318 terminator is not important, so no token is returned here.
319 Although not strictly according to the GEDCOM spec, we'll ignore
320 whitespace just before the terminator.
323 #define ACTION_TERMINATOR \
331 /* Eventually we have to return 1 closing bracket (for the trailer).
332 We can detect whether we have sent the closing bracket using the
333 level_diff (at eof, first it is 2, then we increment it ourselves)
337 { if (level_diff == 2) { \
342 char* ptr; int size; \
343 /* Reset our state */ \
344 current_level = -1; \
345 level_diff = MAXGEDCLEVEL; \
346 /* ... then terminate lex */ \
348 /* Get rid of f*cking compiler warning from lex generated code */ \
349 /* yyterminate does return(), so program will never come here */ \
350 yy_flex_realloc(ptr, size); \
354 #define ACTION_NORMAL_AT \
357 char *yycopy = strdup(yytext); \
359 for (i = 0; i < 2; i++) \
360 for (j = yyleng - 1; j >= 0; --j) \
369 error_at_character(); \
374 #define ACTION_UNEXPECTED \
375 { error_unexpected_character(yytext, yytext[0]); \