1 /* This program is free software; you can redistribute it and/or modify *
2 * it under the terms of the GNU General Public License as published by *
3 * the Free Software Foundation; either version 2 of the License, or *
4 * (at your option) any later version. *
6 (C) 2001 by The Genes Development Team
7 Original author: Peter Verthez (Peter.Verthez@advalvas.be)
15 #include "gedcom.tab.h"
22 static size_t encoding_width;
23 static int current_level = -1;
24 static int level_diff=MAXGEDCLEVEL;
25 static size_t line_len = 0;
27 static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
28 static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
29 static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
37 int test_loop(ENCODING enc, char* code)
41 set_encoding_width(enc);
42 res = open_conv_to_internal(code);
44 gedcom_error("Unable to open conversion context: %s",
51 case BADTOKEN: printf("BADTOKEN "); break;
52 case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
53 case CLOSE: printf("CLOSE "); break;
54 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
55 case DELIM: printf("DELIM "); break;
56 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
57 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
58 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
59 default: printf("TAG(%s) ", gedcom_lval.string); break;
64 close_conv_to_internal();
68 #endif /* of #ifdef LEXER_TEST */
70 #else /* of #ifndef IN_LEX */
72 #define TO_INTERNAL(STR,OUTBUF) \
73 to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
75 #define INIT_LINE_LEN \
78 #define CHECK_LINE_LEN \
79 { if (line_len != (size_t)-1) { \
80 line_len += strlen(yytext); \
81 if (line_len > MAXGEDCLINELEN * encoding_width) { \
82 gedcom_error("Line too long, max %d characters", \
84 line_len = (size_t)-1; \
90 #define MKTAGACTION(THETAG) \
92 gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \
94 return TAG_##THETAG; \
97 /* The GEDCOM level number is converted into a sequence of opening
98 and closing brackets. Simply put, the following GEDCOM fragment:
111 { SOUR genes (1 higher: no closing brackets)
112 { VERS 1.6 (1 higher: no closing brackets)
113 } { NAME Genes (same level: 1 closing bracket)
114 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
128 But because this means that one token is converted into a series
129 of tokens, there is some initial code following immediately here
130 that returns "pending" tokens. */
132 #define ACTION_BEFORE_REGEXPS \
133 { if (level_diff < 1) { \
137 else if (level_diff == 1) { \
139 gedcom_lval.number = current_level; \
143 /* out of brackets... */ \
148 #define ACTION_INITIAL_WHITESPACE \
150 /* ignore initial whitespace further */ \
154 #define ACTION_0_DIGITS \
155 { gedcom_error ("Level number with leading zero"); \
160 #define ACTION_DIGITS \
161 { int level = atoi(TO_INTERNAL(yytext, str_buf)); \
163 if ((level < 0) || (level > MAXGEDCLEVEL)) { \
164 gedcom_error ("Level number out of range [0..%d]", \
168 level_diff = level - current_level; \
170 current_level = level; \
171 if (level_diff < 1) { \
175 else if (level_diff == 1) { \
177 gedcom_lval.number = current_level; \
181 /* should never happen (error to GEDCOM spec) */ \
182 gedcom_error ("GEDCOM level number is %d higher than " \
190 #define ACTION_ALPHANUM \
191 { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
192 gedcom_error("Tag '%s' too long, max %d characters", \
193 yytext, MAXGEDCTAGLEN); \
197 gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \
203 #define ACTION_DELIM \
205 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
212 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
213 /* Due to character conversions, it is possible that the current \
214 character will be combined with the next, and so now we don't have a \
216 In principle, this is only applicable to the 1byte case (e.g. ANSEL), \
217 but it doesn't harm the unicode case. \
219 if (strlen(gedcom_lval.string) > 0) \
224 #define ACTION_ESCAPE \
226 gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
231 #define ACTION_POINTER \
233 if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \
234 gedcom_error("Pointer '%s' too long, max %d characters", \
235 yytext, MAXGEDCPTRLEN); \
238 gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \
243 /* Due to the conversion of level numbers into brackets, the
244 terminator is not important, so no token is returned here.
245 Although not strictly according to the GEDCOM spec, we'll ignore
246 whitespace just before the terminator.
249 #define ACTION_TERMINATOR \
257 /* Eventually we have to return 1 closing bracket (for the trailer).
258 We can detect whether we have sent the closing bracket using the
259 level_diff (at eof, first it is 2, then we increment it ourselves)
263 { if (level_diff == 2) { \
273 #define ACTION_UNEXPECTED \
274 { gedcom_error("Unexpected character: '%s' (0x%02x)", \
275 yytext, yytext[0]); \