4 /* In low-high order, a space is encoded as 0x20 0x00 */
5 /* i.e. this is utf-16-le */
8 #include "gedcom.tab.h"
23 otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]\x00|[\x00-\xFF][\x01-\xFF]
24 terminator \x0D\x00|\x0A\x00|\x0D\x00\x0A\x00|\x0A\x00\x0D\x00
26 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
27 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
28 non_at {alpha}|{digit}|{otherchar}|{delim}|{hash}
29 alphanum {alpha}|{digit}
30 gen_delim {delim}|{tab}
32 escape @\x00#\x00{any_char}+@\x00
33 pointer @\x00{alphanum}{non_at}+@\x00
36 static int current_level=-1;
37 static int level_diff=MAXGEDCLEVEL;
47 /* The GEDCOM level number is converted into a sequence of opening
48 and closing brackets. Simply put, the following GEDCOM fragment:
61 { SOUR genes (1 higher: no closing brackets)
62 { VERS 1.6 (1 higher: no closing brackets)
63 } { NAME Genes (same level: 1 closing bracket)
64 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
78 But because this means that one token is converted into a series
79 of tokens, there is some initial code following immediately here
80 that returns "pending" tokens. */
83 char string_buf[MAXGEDCLINELEN+1];
89 else if (level_diff == 1) {
94 /* out of brackets... */
97 #define TO_INTERNAL(str) to_internal(str, yyleng)
99 #define MKTAGACTION(tag) \
100 { gedcom_lval.string = TO_INTERNAL(yytext); \
106 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
108 <INITIAL>\x00[0]{digit}+ { gedcom_error ("Level number with leading zero");
112 <INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
113 if ((level < 0) || (level > MAXGEDCLEVEL)) {
114 gedcom_error ("Level number out of range [0..%d]",
118 level_diff = level - current_level;
120 current_level = level;
121 if (level_diff < 1) {
125 else if (level_diff == 1) {
130 /* should never happen (error to GEDCOM spec) */
131 gedcom_error ("GEDCOM level number is %d higher than "
138 <EXPECT_TAG>A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR)
139 <EXPECT_TAG>A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR)
140 <EXPECT_TAG>A\x00D\x00R\x001\x00 MKTAGACTION(ADR1)
141 <EXPECT_TAG>A\x00D\x00R\x002\x00 MKTAGACTION(ADR2)
142 <EXPECT_TAG>A\x00D\x00O\x00P\x00 MKTAGACTION(ADOP)
143 <EXPECT_TAG>A\x00F\x00N\x00 MKTAGACTION(AFN)
144 <EXPECT_TAG>A\x00G\x00E\x00 MKTAGACTION(AGE)
145 <EXPECT_TAG>A\x00G\x00N\x00C\x00 MKTAGACTION(AGNC)
146 <EXPECT_TAG>A\x00L\x00I\x00A\x00 MKTAGACTION(ALIA)
147 <EXPECT_TAG>A\x00N\x00C\x00E\x00 MKTAGACTION(ANCE)
148 <EXPECT_TAG>A\x00N\x00C\x00I\x00 MKTAGACTION(ANCI)
149 <EXPECT_TAG>A\x00N\x00U\x00L\x00 MKTAGACTION(ANUL)
150 <EXPECT_TAG>A\x00S\x00S\x00O\x00 MKTAGACTION(ASSO)
151 <EXPECT_TAG>A\x00U\x00T\x00H\x00 MKTAGACTION(AUTH)
152 <EXPECT_TAG>B\x00A\x00P\x00L\x00 MKTAGACTION(BAPL)
153 <EXPECT_TAG>B\x00A\x00P\x00M\x00 MKTAGACTION(BAPM)
154 <EXPECT_TAG>B\x00A\x00R\x00M\x00 MKTAGACTION(BARM)
155 <EXPECT_TAG>B\x00A\x00S\x00M\x00 MKTAGACTION(BASM)
156 <EXPECT_TAG>B\x00I\x00R\x00T\x00 MKTAGACTION(BIRT)
157 <EXPECT_TAG>B\x00L\x00E\x00S\x00 MKTAGACTION(BLES)
158 <EXPECT_TAG>B\x00L\x00O\x00B\x00 MKTAGACTION(BLOB)
159 <EXPECT_TAG>B\x00U\x00R\x00I\x00 MKTAGACTION(BURI)
160 <EXPECT_TAG>C\x00A\x00L\x00N\x00 MKTAGACTION(CALN)
161 <EXPECT_TAG>C\x00A\x00S\x00T\x00 MKTAGACTION(CAST)
162 <EXPECT_TAG>C\x00A\x00U\x00S\x00 MKTAGACTION(CAUS)
163 <EXPECT_TAG>C\x00E\x00N\x00S\x00 MKTAGACTION(CENS)
164 <EXPECT_TAG>C\x00H\x00A\x00N\x00 MKTAGACTION(CHAN)
165 <EXPECT_TAG>C\x00H\x00A\x00R\x00 MKTAGACTION(CHAR)
166 <EXPECT_TAG>C\x00H\x00I\x00L\x00 MKTAGACTION(CHIL)
167 <EXPECT_TAG>C\x00H\x00R\x00 MKTAGACTION(CHR)
168 <EXPECT_TAG>C\x00H\x00R\x00A\x00 MKTAGACTION(CHRA)
169 <EXPECT_TAG>C\x00I\x00T\x00Y\x00 MKTAGACTION(CITY)
170 <EXPECT_TAG>C\x00O\x00N\x00C\x00 MKTAGACTION(CONC)
171 <EXPECT_TAG>C\x00O\x00N\x00F\x00 MKTAGACTION(CONF)
172 <EXPECT_TAG>C\x00O\x00N\x00L\x00 MKTAGACTION(CONL)
173 <EXPECT_TAG>C\x00O\x00N\x00T\x00 MKTAGACTION(CONT)
174 <EXPECT_TAG>C\x00O\x00P\x00R\x00 MKTAGACTION(COPR)
175 <EXPECT_TAG>C\x00O\x00R\x00P\x00 MKTAGACTION(CORP)
176 <EXPECT_TAG>C\x00R\x00E\x00M\x00 MKTAGACTION(CREM)
177 <EXPECT_TAG>C\x00T\x00R\x00Y\x00 MKTAGACTION(CTRY)
178 <EXPECT_TAG>D\x00A\x00T\x00A\x00 MKTAGACTION(DATA)
179 <EXPECT_TAG>D\x00A\x00T\x00E\x00 MKTAGACTION(DATE)
180 <EXPECT_TAG>D\x00E\x00A\x00T\x00 MKTAGACTION(DEAT)
181 <EXPECT_TAG>D\x00E\x00S\x00C\x00 MKTAGACTION(DESC)
182 <EXPECT_TAG>D\x00E\x00S\x00I\x00 MKTAGACTION(DESI)
183 <EXPECT_TAG>D\x00E\x00S\x00T\x00 MKTAGACTION(DEST)
184 <EXPECT_TAG>D\x00I\x00V\x00 MKTAGACTION(DIV)
185 <EXPECT_TAG>D\x00I\x00V\x00F\x00 MKTAGACTION(DIVF)
186 <EXPECT_TAG>D\x00S\x00C\x00R\x00 MKTAGACTION(DSCR)
187 <EXPECT_TAG>E\x00D\x00U\x00C\x00 MKTAGACTION(EDUC)
188 <EXPECT_TAG>E\x00M\x00I\x00G\x00 MKTAGACTION(EMIG)
189 <EXPECT_TAG>E\x00N\x00D\x00L\x00 MKTAGACTION(ENDL)
190 <EXPECT_TAG>E\x00N\x00G\x00A\x00 MKTAGACTION(ENGA)
191 <EXPECT_TAG>E\x00V\x00E\x00N\x00 MKTAGACTION(EVEN)
192 <EXPECT_TAG>F\x00A\x00M\x00 MKTAGACTION(FAM)
193 <EXPECT_TAG>F\x00A\x00M\x00C\x00 MKTAGACTION(FAMC)
194 <EXPECT_TAG>F\x00A\x00M\x00F\x00 MKTAGACTION(FAMF)
195 <EXPECT_TAG>F\x00A\x00M\x00S\x00 MKTAGACTION(FAMS)
196 <EXPECT_TAG>F\x00C\x00O\x00M\x00 MKTAGACTION(FCOM)
197 <EXPECT_TAG>F\x00I\x00L\x00E\x00 MKTAGACTION(FILE)
198 <EXPECT_TAG>F\x00O\x00R\x00M\x00 MKTAGACTION(FORM)
199 <EXPECT_TAG>G\x00E\x00D\x00C\x00 MKTAGACTION(GEDC)
200 <EXPECT_TAG>G\x00I\x00V\x00N\x00 MKTAGACTION(GIVN)
201 <EXPECT_TAG>G\x00R\x00A\x00D\x00 MKTAGACTION(GRAD)
202 <EXPECT_TAG>H\x00E\x00A\x00D\x00 MKTAGACTION(HEAD)
203 <EXPECT_TAG>H\x00U\x00S\x00B\x00 MKTAGACTION(HUSB)
204 <EXPECT_TAG>I\x00D\x00N\x00O\x00 MKTAGACTION(IDNO)
205 <EXPECT_TAG>I\x00M\x00M\x00I\x00 MKTAGACTION(IMMI)
206 <EXPECT_TAG>I\x00N\x00D\x00I\x00 MKTAGACTION(INDI)
207 <EXPECT_TAG>L\x00A\x00N\x00G\x00 MKTAGACTION(LANG)
208 <EXPECT_TAG>L\x00E\x00G\x00A\x00 MKTAGACTION(LEGA)
209 <EXPECT_TAG>M\x00A\x00R\x00B\x00 MKTAGACTION(MARB)
210 <EXPECT_TAG>M\x00A\x00R\x00C\x00 MKTAGACTION(MARC)
211 <EXPECT_TAG>M\x00A\x00R\x00L\x00 MKTAGACTION(MARL)
212 <EXPECT_TAG>M\x00A\x00R\x00R\x00 MKTAGACTION(MARR)
213 <EXPECT_TAG>M\x00A\x00R\x00S\x00 MKTAGACTION(MARS)
214 <EXPECT_TAG>M\x00E\x00D\x00I\x00 MKTAGACTION(MEDI)
215 <EXPECT_TAG>N\x00A\x00M\x00E\x00 MKTAGACTION(NAME)
216 <EXPECT_TAG>N\x00A\x00T\x00I\x00 MKTAGACTION(NATI)
217 <EXPECT_TAG>N\x00A\x00T\x00U\x00 MKTAGACTION(NATU)
218 <EXPECT_TAG>N\x00C\x00H\x00I\x00 MKTAGACTION(NCHI)
219 <EXPECT_TAG>N\x00I\x00C\x00K\x00 MKTAGACTION(NICK)
220 <EXPECT_TAG>N\x00M\x00R\x00 MKTAGACTION(NMR)
221 <EXPECT_TAG>N\x00O\x00T\x00E\x00 MKTAGACTION(NOTE)
222 <EXPECT_TAG>N\x00P\x00F\x00X\x00 MKTAGACTION(NPFX)
223 <EXPECT_TAG>N\x00S\x00F\x00X\x00 MKTAGACTION(NSFX)
224 <EXPECT_TAG>O\x00B\x00J\x00E\x00 MKTAGACTION(OBJE)
225 <EXPECT_TAG>O\x00C\x00C\x00U\x00 MKTAGACTION(OCCU)
226 <EXPECT_TAG>O\x00R\x00D\x00I\x00 MKTAGACTION(ORDI)
227 <EXPECT_TAG>O\x00R\x00D\x00N\x00 MKTAGACTION(ORDN)
228 <EXPECT_TAG>P\x00A\x00G\x00E\x00 MKTAGACTION(PAGE)
229 <EXPECT_TAG>P\x00E\x00D\x00I\x00 MKTAGACTION(PEDI)
230 <EXPECT_TAG>P\x00H\x00O\x00N\x00 MKTAGACTION(PHON)
231 <EXPECT_TAG>P\x00L\x00A\x00C\x00 MKTAGACTION(PLAC)
232 <EXPECT_TAG>P\x00O\x00S\x00T\x00 MKTAGACTION(POST)
233 <EXPECT_TAG>P\x00R\x00O\x00B\x00 MKTAGACTION(PROB)
234 <EXPECT_TAG>P\x00R\x00O\x00P\x00 MKTAGACTION(PROP)
235 <EXPECT_TAG>P\x00U\x00B\x00L\x00 MKTAGACTION(PUBL)
236 <EXPECT_TAG>Q\x00U\x00A\x00Y\x00 MKTAGACTION(QUAY)
237 <EXPECT_TAG>R\x00E\x00F\x00N\x00 MKTAGACTION(REFN)
238 <EXPECT_TAG>R\x00E\x00L\x00A\x00 MKTAGACTION(RELA)
239 <EXPECT_TAG>R\x00E\x00L\x00I\x00 MKTAGACTION(RELI)
240 <EXPECT_TAG>R\x00E\x00P\x00O\x00 MKTAGACTION(REPO)
241 <EXPECT_TAG>R\x00E\x00S\x00I\x00 MKTAGACTION(RESI)
242 <EXPECT_TAG>R\x00E\x00S\x00N\x00 MKTAGACTION(RESN)
243 <EXPECT_TAG>R\x00E\x00T\x00I\x00 MKTAGACTION(RETI)
244 <EXPECT_TAG>R\x00F\x00N\x00 MKTAGACTION(RFN)
245 <EXPECT_TAG>R\x00I\x00N\x00 MKTAGACTION(RIN)
246 <EXPECT_TAG>R\x00O\x00L\x00E\x00 MKTAGACTION(ROLE)
247 <EXPECT_TAG>S\x00E\x00X\x00 MKTAGACTION(SEX)
248 <EXPECT_TAG>S\x00L\x00G\x00C\x00 MKTAGACTION(SLGC)
249 <EXPECT_TAG>S\x00L\x00G\x00S\x00 MKTAGACTION(SLGS)
250 <EXPECT_TAG>S\x00O\x00U\x00R\x00 MKTAGACTION(SOUR)
251 <EXPECT_TAG>S\x00P\x00F\x00X\x00 MKTAGACTION(SPFX)
252 <EXPECT_TAG>S\x00S\x00N\x00 MKTAGACTION(SSN)
253 <EXPECT_TAG>S\x00T\x00A\x00E\x00 MKTAGACTION(STAE)
254 <EXPECT_TAG>S\x00T\x00A\x00T\x00 MKTAGACTION(STAT)
255 <EXPECT_TAG>S\x00U\x00B\x00M\x00 MKTAGACTION(SUBM)
256 <EXPECT_TAG>S\x00U\x00B\x00N\x00 MKTAGACTION(SUBN)
257 <EXPECT_TAG>S\x00U\x00R\x00N\x00 MKTAGACTION(SURN)
258 <EXPECT_TAG>T\x00E\x00M\x00P\x00 MKTAGACTION(TEMP)
259 <EXPECT_TAG>T\x00E\x00X\x00T\x00 MKTAGACTION(TEXT)
260 <EXPECT_TAG>T\x00I\x00M\x00E\x00 MKTAGACTION(TIME)
261 <EXPECT_TAG>T\x00I\x00T\x00L\x00 MKTAGACTION(TITL)
262 <EXPECT_TAG>T\x00R\x00L\x00R\x00 MKTAGACTION(TRLR)
263 <EXPECT_TAG>T\x00Y\x00P\x00E\x00 MKTAGACTION(TYPE)
264 <EXPECT_TAG>V\x00E\x00R\x00S\x00 MKTAGACTION(VERS)
265 <EXPECT_TAG>W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE)
266 <EXPECT_TAG>W\x00I\x00L\x00L\x00 MKTAGACTION(WILL)
268 <EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
269 gedcom_error("Tag '%s' too long, max %d chars");
272 strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
273 gedcom_lval.string = TO_INTERNAL(string_buf);
278 {delim} { gedcom_lval.string = TO_INTERNAL(yytext);
282 {any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
286 {escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext);
290 {pointer} { gedcom_lval.string = TO_INTERNAL(yytext);
294 /* Due to the conversion of level numbers into brackets, the
295 terminator is not important, so no token is returned here.
296 Although not strictly according to the GEDCOM spec, we'll ignore
297 whitespace just before the terminator.
300 {gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
302 /* Eventually we have to return 1 closing bracket (for the trailer).
303 We can detect whether we have sent the closing bracket using the
304 level_diff (at eof, first it is 2, then we increment it ourselves) */
306 <<EOF>> { if (level_diff == 2) {
315 . { gedcom_error("Unexpected character: '%s' (0x%02x)",
332 int res = open_conv_to_internal("UTF16LE");
334 gedcom_error("Unable to open conversion context: %s",
338 tok = gedcom_lohi_lex();
341 case BADTOKEN: printf("BADTOKEN "); break;
342 case OPEN: printf("OPEN "); break;
343 case CLOSE: printf("CLOSE "); break;
344 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
345 case DELIM: printf("DELIM "); break;
346 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
347 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
348 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
349 default: printf("TAG(%s) ", gedcom_lval.string); break;
351 tok = gedcom_lohi_lex();
354 close_conv_to_internal();