Get rid of compiler warning from lex generated code.
[gedcom-parse.git] / gedcom / gedcom_lex_common.c
1 /* Common lexer code.
2    Copyright (C) 2001, 2002 The Genes Development Team
3    This file is part of the Gedcom parser library.
4    Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
5
6    The Gedcom parser library is free software; you can redistribute it
7    and/or modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The Gedcom parser library is distributed in the hope that it will be
12    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the Gedcom parser library; if not, write to the
18    Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 /* $Id$ */
22 /* $Name$ */
23
24 #ifndef IN_LEX
25
26 #include "gedcom_internal.h"
27 #include "multilex.h"
28 #include "encoding.h"
29 #include "gedcom.h"
30 #include "gedcom.tab.h"
31
32 #define YY_NO_UNPUT
33
34 static size_t encoding_width;
35 static int current_level = -1;
36 static int level_diff=MAXGEDCLEVEL;
37 static size_t line_len = 0;
38
39 static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
40 static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
41 static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
42
43 #ifdef LEXER_TEST 
44 YYSTYPE gedcom_lval;
45 int line_no = 1;
46
47 int gedcom_lex();
48
49 void message_handler(Gedcom_msg_type type, char *msg)
50 {
51   fprintf(stderr, "(%d) %s\n", type, msg);
52 }
53
54 int test_loop(ENCODING enc, char* code)
55 {
56   int tok, res;
57   init_encodings();
58   set_encoding_width(enc);
59   gedcom_set_message_handler(message_handler);
60   res = open_conv_to_internal(code);
61   if (!res) {
62     gedcom_error("Unable to open conversion context: %s",
63                  strerror(errno));
64     return 1;
65   }
66   tok = gedcom_lex();
67   while (tok) {
68     switch(tok) {
69       case BADTOKEN: printf("BADTOKEN "); break;
70       case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
71       case CLOSE: printf("CLOSE "); break;
72       case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
73       case DELIM: printf("DELIM "); break;
74       case ANYCHAR: printf("%s ", gedcom_lval.string); break;
75       case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
76       case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag.string); break;
77       default: printf("TAG(%s) ", gedcom_lval.tag.string); break;
78     }
79     tok = gedcom_lex();
80   }
81   printf("\n");
82   close_conv_to_internal();
83   return 0;  
84 }
85  
86 #endif /* of #ifdef LEXER_TEST */
87
88 #else  /* of #ifndef IN_LEX */
89
90 #define TO_INTERNAL(STR,OUTBUF) \
91   to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
92
93 #define INIT_LINE_LEN \
94   line_len = 0;
95
96 #define CHECK_LINE_LEN                                                        \
97   { if (line_len != (size_t)-1) {                                             \
98       line_len += strlen(yytext);                                             \
99       if (line_len > MAXGEDCLINELEN * encoding_width) {                       \
100         gedcom_error(_("Line too long, max %d characters allowed"),           \
101                      MAXGEDCLINELEN);                                         \
102         line_len = (size_t)-1;                                                \
103         return BADTOKEN;                                                      \
104       }                                                                       \
105     }                                                                         \
106   }
107
108 #define MKTAGACTION(THETAG)                                                  \
109   { CHECK_LINE_LEN;                                                          \
110     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
111     gedcom_lval.tag.value  = TAG_##THETAG;                                   \
112     BEGIN(NORMAL);                                                           \
113     return TAG_##THETAG;                                                     \
114   }
115
116 /* The GEDCOM level number is converted into a sequence of opening
117    and closing brackets.  Simply put, the following GEDCOM fragment:
118    
119    0 HEAD
120    1 SOUR genes
121    2 VERS 1.6
122    2 NAME Genes
123    1 DATE 07 OCT 2001
124    ...
125    0 TRLR
126    
127    is converted into:
128    
129    { HEAD                     (initial)  
130    { SOUR genes               (1 higher: no closing brackets)
131    { VERS 1.6                 (1 higher: no closing brackets)
132    } { NAME Genes             (same level: 1 closing bracket)
133    } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
134    ...
135    } { TRLR }
136    
137    or more clearly:
138    
139    { HEAD
140      { SOUR genes
141        { VERS 1.6 }
142        { NAME Genes } }
143      { DATE 07 OCT 2001
144      ... }
145    { TRLR }
146
147    But because this means that one token is converted into a series
148    of tokens, there is some initial code following immediately here
149    that returns "pending" tokens. */
150
151 #define ACTION_BEFORE_REGEXPS                                                 \
152    { if (level_diff < 1) {                                                    \
153        level_diff++;                                                          \
154        return CLOSE;                                                          \
155      }                                                                        \
156      else if (level_diff == 1) {                                              \
157        level_diff++;                                                          \
158        gedcom_lval.number = current_level;                                    \
159        return OPEN;                                                           \
160      }                                                                        \
161      else {                                                                   \
162        /* out of brackets... */                                               \
163      }                                                                        \
164    }
165
166
167 #define ACTION_INITIAL_WHITESPACE                                             \
168   { CHECK_LINE_LEN;                                                           \
169     /* ignore initial whitespace further */                                   \
170   }
171
172
173 #define ACTION_0_DIGITS                                                       \
174    { gedcom_error (_("Level number with leading zero not allowed"));          \
175      return BADTOKEN;                                                         \
176    } 
177
178
179 #define ACTION_DIGITS                                                         \
180    { int level = atoi(TO_INTERNAL(yytext, str_buf));                          \
181      CHECK_LINE_LEN;                                                          \
182      if ((level < 0) || (level > MAXGEDCLEVEL)) {                             \
183        gedcom_error (_("Level number out of range [0..%d]"),                  \
184                      MAXGEDCLEVEL);                                           \
185        return BADTOKEN;                                                       \
186      }                                                                        \
187      level_diff = level - current_level;                                      \
188      BEGIN(EXPECT_TAG);                                                       \
189      current_level = level;                                                   \
190      if (level_diff < 1) {                                                    \
191        level_diff++;                                                          \
192        return CLOSE;                                                          \
193      }                                                                        \
194      else if (level_diff == 1) {                                              \
195        level_diff++;                                                          \
196        gedcom_lval.number = current_level;                                    \
197        return OPEN;                                                           \
198      }                                                                        \
199      else {                                                                   \
200        /* should never happen (error to GEDCOM spec) */                       \
201        gedcom_error (_("GEDCOM level number is %d higher than previous"),     \
202                      level_diff);                                             \
203        return BADTOKEN;                                                       \
204      }                                                                        \
205    } 
206
207
208 #define ACTION_ALPHANUM                                                       \
209    { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) {                   \
210        gedcom_error(_("Tag '%s' too long, max %d characters allowed"),        \
211                     yytext, MAXGEDCTAGLEN);                                   \
212        return BADTOKEN;                                                       \
213      }                                                                        \
214      CHECK_LINE_LEN;                                                          \
215      gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
216      gedcom_lval.tag.value  = USERTAG;                                        \
217      BEGIN(NORMAL);                                                           \
218      return USERTAG;                                                          \
219    }
220
221
222 #define ACTION_DELIM                                                          \
223   { CHECK_LINE_LEN;                                                           \
224     gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
225     return DELIM;                                                             \
226   }
227
228
229 #define ACTION_ANY                                                            \
230   { CHECK_LINE_LEN;                                                           \
231     gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
232     /* Due to character conversions, it is possible that the current          \
233        character will be combined with the next, and so now we don't have a   \
234        character yet...                                                       \
235        In principle, this is only applicable to the 1byte case (e.g. ANSEL),  \
236        but it doesn't harm the unicode case.                                  \
237     */                                                                        \
238     if (strlen(gedcom_lval.string) > 0)                                       \
239       return ANYCHAR;                                                         \
240   }
241
242
243 #define ACTION_ESCAPE                                                         \
244   { CHECK_LINE_LEN;                                                           \
245     gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
246     return ESCAPE;                                                            \
247   }
248
249
250 #define ACTION_POINTER                                                        \
251   { CHECK_LINE_LEN;                                                           \
252     if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) {                    \
253       gedcom_error(_("Pointer '%s' too long, max %d characters allowed"),     \
254                    yytext, MAXGEDCPTRLEN);                                    \
255       return BADTOKEN;                                                        \
256     }                                                                         \
257     gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf);                        \
258     return POINTER;                                                           \
259   }
260
261
262 /* Due to the conversion of level numbers into brackets, the
263    terminator is not important, so no token is returned here.
264    Although not strictly according to the GEDCOM spec, we'll ignore
265    whitespace just before the terminator.
266 */
267
268 #define ACTION_TERMINATOR                                                     \
269   { CHECK_LINE_LEN;                                                           \
270     INIT_LINE_LEN;                                                            \
271     line_no++;                                                                \
272     BEGIN(INITIAL);                                                           \
273   }
274
275
276 /* Eventually we have to return 1 closing bracket (for the trailer).
277    We can detect whether we have sent the closing bracket using the
278    level_diff (at eof, first it is 2, then we increment it ourselves)
279 */
280
281 #define ACTION_EOF                                                            \
282   { if (level_diff == 2) {                                                    \
283       level_diff++;                                                           \
284       return CLOSE;                                                           \
285     }                                                                         \
286     else {                                                                    \
287       char* ptr; int size;                                                    \
288       /* Reset our state */                                                   \
289       current_level = -1;                                                     \
290       level_diff = MAXGEDCLEVEL;                                              \
291       /* ... then terminate lex */                                            \
292       yyterminate();                                                          \
293       /* Get rid of f*cking compiler warning from lex generated code */       \
294       /* yyterminate does return(), so program will never come here  */       \
295       yy_flex_realloc(ptr, size);                                             \
296     }                                                                         \
297   } 
298
299
300 #define ACTION_UNEXPECTED                                                     \
301   { gedcom_error(_("Unexpected character: '%s' (0x%02x)"),                    \
302                  yytext, yytext[0]);                                          \
303     return BADTOKEN;                                                          \
304   }
305
306 #endif /* IN_LEX */