Use of gedcom_lex_common.c.
[gedcom-parse.git] / gedcom_lex_common.c
1 /*  This program is free software; you can redistribute it and/or modify  *
2  *  it under the terms of the GNU General Public License as published by  *
3  *  the Free Software Foundation; either version 2 of the License, or     *
4  *  (at your option) any later version.                                   *
5
6  (C) 2001 by The Genes Development Team
7  Original author: Peter Verthez (Peter.Verthez@advalvas.be)
8 */
9
10 /* $Id$ */
11 /* $Name$ */
12
13 char string_buf[MAXGEDCLINELEN+1];
14  
15 #define TO_INTERNAL(str) to_internal(str, yyleng) 
16
17 #define MKTAGACTION(the_tag) \
18   { gedcom_lval.tag = TO_INTERNAL(yytext); \
19     BEGIN(NORMAL); \
20     return TAG_##the_tag; }
21
22
23 /* The GEDCOM level number is converted into a sequence of opening
24    and closing brackets.  Simply put, the following GEDCOM fragment:
25    
26    0 HEAD
27    1 SOUR genes
28    2 VERS 1.6
29    2 NAME Genes
30    1 DATE 07 OCT 2001
31    ...
32    0 TRLR
33    
34    is converted into:
35    
36    { HEAD                     (initial)  
37    { SOUR genes               (1 higher: no closing brackets)
38    { VERS 1.6                 (1 higher: no closing brackets)
39    } { NAME Genes             (same level: 1 closing bracket)
40    } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
41    ...
42    } { TRLR }
43    
44    or more clearly:
45    
46    { HEAD
47      { SOUR genes
48        { VERS 1.6 }
49        { NAME Genes } }
50      { DATE 07 OCT 2001
51      ... }
52    { TRLR }
53
54    But because this means that one token is converted into a series
55    of tokens, there is some initial code following immediately here
56    that returns "pending" tokens. */
57
58 #define ACTION_BEFORE_REGEXPS                                                 \
59    { if (level_diff < 1) {                                                    \
60        level_diff++;                                                          \
61        return CLOSE;                                                          \
62      }                                                                        \
63      else if (level_diff == 1) {                                              \
64        level_diff++;                                                          \
65        gedcom_lval.level = current_level;                                     \
66        return OPEN;                                                           \
67      }                                                                        \
68      else {                                                                   \
69        /* out of brackets... */                                               \
70      }                                                                        \
71    } 
72
73
74 #define ACTION_0_DIGITS                                                       \
75    { gedcom_error ("Level number with leading zero");                         \
76      return BADTOKEN;                                                         \
77    } 
78
79
80 #define ACTION_DIGITS                                                         \
81    { int level = atoi(TO_INTERNAL(yytext));                                   \
82      if ((level < 0) || (level > MAXGEDCLEVEL)) {                             \
83        gedcom_error ("Level number out of range [0..%d]",                     \
84                      MAXGEDCLEVEL);                                           \
85        return BADTOKEN;                                                       \
86      }                                                                        \
87      level_diff = level - current_level;                                      \
88      BEGIN(EXPECT_TAG);                                                       \
89      current_level = level;                                                   \
90      if (level_diff < 1) {                                                    \
91        level_diff++;                                                          \
92        return CLOSE;                                                          \
93      }                                                                        \
94      else if (level_diff == 1) {                                              \
95        level_diff++;                                                          \
96        gedcom_lval.level = current_level;                                     \
97        return OPEN;                                                           \
98      }                                                                        \
99      else {                                                                   \
100        /* should never happen (error to GEDCOM spec) */                       \
101        gedcom_error ("GEDCOM level number is %d higher than "                 \
102                      "previous",                                              \
103                      level_diff);                                             \
104        return BADTOKEN;                                                       \
105      }                                                                        \
106    } 
107
108
109 #define ACTION_ALPHANUM                                                       \
110    { if (strlen(yytext) > MAXGEDCTAGLEN) {                                    \
111        gedcom_error("Tag '%s' too long, max %d chars");                       \
112        return BADTOKEN;                                                       \
113      }                                                                        \
114      strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);                            \
115      gedcom_lval.tag = TO_INTERNAL(string_buf);                               \
116      BEGIN(NORMAL);                                                           \
117      return USERTAG;                                                          \
118    }
119
120
121 #define ACTION_DELIM                                                          \
122   { gedcom_lval.string = TO_INTERNAL(yytext);                                 \
123     return DELIM;                                                             \
124   }
125
126
127 #define ACTION_ANY                                                            \
128   { gedcom_lval.string = TO_INTERNAL(yytext);                                 \
129     /* Due to character conversions, it is possible                           \
130        that the current character will be combined with                       \
131        the next, and so now we don't have a character yet...                  \
132        In principle, this is only applicable to the 1byte case (e.g. ANSEL),  \
133        but it doesn't harm the unicode case.                                  \
134     */                                                                        \
135     if (strlen(gedcom_lval.string) > 0)                                       \
136       return ANYCHAR;                                                         \
137   }
138
139
140 #define ACTION_ESCAPE                                                         \
141   { gedcom_lval.string = TO_INTERNAL(yytext);                                 \
142     return ESCAPE;                                                            \
143   }
144
145
146 #define ACTION_POINTER                                                        \
147   { gedcom_lval.pointer = TO_INTERNAL(yytext);                                \
148     return POINTER;                                                           \
149   }
150
151
152 /* Due to the conversion of level numbers into brackets, the
153    terminator is not important, so no token is returned here.
154    Although not strictly according to the GEDCOM spec, we'll ignore
155    whitespace just before the terminator.
156 */
157
158 #define ACTION_TERMINATOR                                                     \
159   { line_no++;                                                                \
160     BEGIN(INITIAL);                                                           \
161   }
162
163
164 /* Eventually we have to return 1 closing bracket (for the trailer).
165    We can detect whether we have sent the closing bracket using the
166    level_diff (at eof, first it is 2, then we increment it ourselves)
167 */
168
169 #define ACTION_EOF                                                            \
170   { if (level_diff == 2) {                                                    \
171       level_diff++;                                                           \
172       return CLOSE;                                                           \
173     }                                                                         \
174     else {                                                                    \
175       yyterminate();                                                          \
176     }                                                                         \
177   } 
178
179
180 #define ACTION_UNEXPECTED                                                     \
181   { gedcom_error("Unexpected character: '%s' (0x%02x)",                       \
182                  yytext, yytext[0]);                                          \
183     return BADTOKEN;                                                          \
184   }