2 Copyright (C) 2001, 2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 #include "gedcom_internal.h"
30 #include "gedcom.tab.h"
34 static size_t encoding_width;
35 static int current_level = -1;
36 static int level_diff=MAXGEDCLEVEL;
37 static size_t line_len = 0;
39 static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
40 static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
41 static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
49 void message_handler(Gedcom_msg_type type, char *msg)
51 fprintf(stderr, "(%d) %s\n", type, msg);
54 int test_loop(ENCODING enc, char* code)
58 set_encoding_width(enc);
59 gedcom_set_message_handler(message_handler);
60 res = open_conv_to_internal(code);
62 gedcom_error("Unable to open conversion context: %s",
69 case BADTOKEN: printf("BADTOKEN "); break;
70 case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
71 case CLOSE: printf("CLOSE "); break;
72 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
73 case DELIM: printf("DELIM "); break;
74 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
75 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
76 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag.string); break;
77 default: printf("TAG(%s) ", gedcom_lval.tag.string); break;
82 close_conv_to_internal();
86 #endif /* of #ifdef LEXER_TEST */
88 #else /* of #ifndef IN_LEX */
90 #define TO_INTERNAL(STR,OUTBUF) \
91 to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
93 #define INIT_LINE_LEN \
96 #define CHECK_LINE_LEN \
97 { if (line_len != (size_t)-1) { \
98 line_len += strlen(yytext); \
99 if (line_len > MAXGEDCLINELEN * encoding_width) { \
100 gedcom_error(_("Line too long, max %d characters allowed"), \
102 line_len = (size_t)-1; \
108 #define MKTAGACTION(THETAG) \
110 gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
111 gedcom_lval.tag.value = TAG_##THETAG; \
113 return TAG_##THETAG; \
116 /* The GEDCOM level number is converted into a sequence of opening
117 and closing brackets. Simply put, the following GEDCOM fragment:
130 { SOUR genes (1 higher: no closing brackets)
131 { VERS 1.6 (1 higher: no closing brackets)
132 } { NAME Genes (same level: 1 closing bracket)
133 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
147 But because this means that one token is converted into a series
148 of tokens, there is some initial code following immediately here
149 that returns "pending" tokens. */
151 #define ACTION_BEFORE_REGEXPS \
152 { if (level_diff < 1) { \
156 else if (level_diff == 1) { \
158 gedcom_lval.number = current_level; \
162 /* out of brackets... */ \
167 #define ACTION_INITIAL_WHITESPACE \
169 /* ignore initial whitespace further */ \
173 #define ACTION_0_DIGITS \
174 { gedcom_error (_("Level number with leading zero not allowed")); \
179 #define ACTION_DIGITS \
180 { int level = atoi(TO_INTERNAL(yytext, str_buf)); \
182 if ((level < 0) || (level > MAXGEDCLEVEL)) { \
183 gedcom_error (_("Level number out of range [0..%d]"), \
187 level_diff = level - current_level; \
189 current_level = level; \
190 if (level_diff < 1) { \
194 else if (level_diff == 1) { \
196 gedcom_lval.number = current_level; \
200 /* should never happen (error to GEDCOM spec) */ \
201 gedcom_error (_("GEDCOM level number is %d higher than previous"), \
208 #define ACTION_ALPHANUM \
209 { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
210 gedcom_error(_("Tag '%s' too long, max %d characters allowed"), \
211 yytext, MAXGEDCTAGLEN); \
215 gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
216 gedcom_lval.tag.value = USERTAG; \
222 #define ACTION_DELIM \
224 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
231 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
232 /* Due to character conversions, it is possible that the current \
233 character will be combined with the next, and so now we don't have a \
235 In principle, this is only applicable to the 1byte case (e.g. ANSEL), \
236 but it doesn't harm the unicode case. \
238 if (strlen(gedcom_lval.string) > 0) \
243 #define ACTION_ESCAPE \
245 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
250 #define ACTION_POINTER \
252 if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \
253 gedcom_error(_("Pointer '%s' too long, max %d characters allowed"), \
254 yytext, MAXGEDCPTRLEN); \
257 gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \
262 /* Due to the conversion of level numbers into brackets, the
263 terminator is not important, so no token is returned here.
264 Although not strictly according to the GEDCOM spec, we'll ignore
265 whitespace just before the terminator.
268 #define ACTION_TERMINATOR \
276 /* Eventually we have to return 1 closing bracket (for the trailer).
277 We can detect whether we have sent the closing bracket using the
278 level_diff (at eof, first it is 2, then we increment it ourselves)
282 { if (level_diff == 2) { \
287 /* Reset our state */ \
288 current_level = -1; \
289 level_diff = MAXGEDCLEVEL; \
290 /* ... then terminate lex */ \
296 #define ACTION_UNEXPECTED \
297 { gedcom_error(_("Unexpected character: '%s' (0x%02x)"), \
298 yytext, yytext[0]); \