1 /* This program is free software; you can redistribute it and/or modify *
2 * it under the terms of the GNU General Public License as published by *
3 * the Free Software Foundation; either version 2 of the License, or *
4 * (at your option) any later version. *
6 (C) 2001 by The Genes Development Team
7 Original author: Peter Verthez (Peter.Verthez@advalvas.be)
14 #include "gedcom.tab.h"
31 otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFE]
32 terminator \x0D|\x0A|\x0D\x0A|\x0A\x0D
34 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
35 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
36 non_at {alpha}|{digit}|{otherchar}|{delim}|{hash}
37 alphanum {alpha}|{digit}
38 gen_delim {delim}|{tab}
41 pointer @{alphanum}{non_at}+@
44 static int current_level=-1;
45 static int level_diff=MAXGEDCLEVEL;
56 /* The GEDCOM level number is converted into a sequence of opening
57 and closing brackets. Simply put, the following GEDCOM fragment:
70 { SOUR genes (1 higher: no closing brackets)
71 { VERS 1.6 (1 higher: no closing brackets)
72 } { NAME Genes (same level: 1 closing bracket)
73 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
87 But because this means that one token is converted into a series
88 of tokens, there is some initial code following immediately here
89 that returns "pending" tokens. */
92 char string_buf[MAXGEDCLINELEN+1];
98 else if (level_diff == 1) {
103 /* out of brackets... */
106 #define TO_INTERNAL(str) to_internal(str, yyleng)
108 #define MKTAGACTION(tag) \
109 { gedcom_lval.string = TO_INTERNAL(yytext); \
115 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
117 <INITIAL>0{digit}+ { gedcom_error ("Level number with leading zero");
121 <INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
122 if ((level < 0) || (level > MAXGEDCLEVEL)) {
123 gedcom_error ("Level number out of range [0..%d]",
127 level_diff = level - current_level;
129 current_level = level;
130 if (level_diff < 1) {
134 else if (level_diff == 1) {
139 /* should never happen (error to GEDCOM spec) */
140 gedcom_error ("GEDCOM level number is %d higher than "
147 <EXPECT_TAG>ABBR MKTAGACTION(ABBR)
148 <EXPECT_TAG>ADDR MKTAGACTION(ADDR)
149 <EXPECT_TAG>ADR1 MKTAGACTION(ADR1)
150 <EXPECT_TAG>ADR2 MKTAGACTION(ADR2)
151 <EXPECT_TAG>ADOP MKTAGACTION(ADOP)
152 <EXPECT_TAG>AFN MKTAGACTION(AFN)
153 <EXPECT_TAG>AGE MKTAGACTION(AGE)
154 <EXPECT_TAG>AGNC MKTAGACTION(AGNC)
155 <EXPECT_TAG>ALIA MKTAGACTION(ALIA)
156 <EXPECT_TAG>ANCE MKTAGACTION(ANCE)
157 <EXPECT_TAG>ANCI MKTAGACTION(ANCI)
158 <EXPECT_TAG>ANUL MKTAGACTION(ANUL)
159 <EXPECT_TAG>ASSO MKTAGACTION(ASSO)
160 <EXPECT_TAG>AUTH MKTAGACTION(AUTH)
161 <EXPECT_TAG>BAPL MKTAGACTION(BAPL)
162 <EXPECT_TAG>BAPM MKTAGACTION(BAPM)
163 <EXPECT_TAG>BARM MKTAGACTION(BARM)
164 <EXPECT_TAG>BASM MKTAGACTION(BASM)
165 <EXPECT_TAG>BIRT MKTAGACTION(BIRT)
166 <EXPECT_TAG>BLES MKTAGACTION(BLES)
167 <EXPECT_TAG>BLOB MKTAGACTION(BLOB)
168 <EXPECT_TAG>BURI MKTAGACTION(BURI)
169 <EXPECT_TAG>CALN MKTAGACTION(CALN)
170 <EXPECT_TAG>CAST MKTAGACTION(CAST)
171 <EXPECT_TAG>CAUS MKTAGACTION(CAUS)
172 <EXPECT_TAG>CENS MKTAGACTION(CENS)
173 <EXPECT_TAG>CHAN MKTAGACTION(CHAN)
174 <EXPECT_TAG>CHAR MKTAGACTION(CHAR)
175 <EXPECT_TAG>CHIL MKTAGACTION(CHIL)
176 <EXPECT_TAG>CHR MKTAGACTION(CHR)
177 <EXPECT_TAG>CHRA MKTAGACTION(CHRA)
178 <EXPECT_TAG>CITY MKTAGACTION(CITY)
179 <EXPECT_TAG>CONC MKTAGACTION(CONC)
180 <EXPECT_TAG>CONF MKTAGACTION(CONF)
181 <EXPECT_TAG>CONL MKTAGACTION(CONL)
182 <EXPECT_TAG>CONT MKTAGACTION(CONT)
183 <EXPECT_TAG>COPR MKTAGACTION(COPR)
184 <EXPECT_TAG>CORP MKTAGACTION(CORP)
185 <EXPECT_TAG>CREM MKTAGACTION(CREM)
186 <EXPECT_TAG>CTRY MKTAGACTION(CTRY)
187 <EXPECT_TAG>DATA MKTAGACTION(DATA)
188 <EXPECT_TAG>DATE MKTAGACTION(DATE)
189 <EXPECT_TAG>DEAT MKTAGACTION(DEAT)
190 <EXPECT_TAG>DESC MKTAGACTION(DESC)
191 <EXPECT_TAG>DESI MKTAGACTION(DESI)
192 <EXPECT_TAG>DEST MKTAGACTION(DEST)
193 <EXPECT_TAG>DIV MKTAGACTION(DIV)
194 <EXPECT_TAG>DIVF MKTAGACTION(DIVF)
195 <EXPECT_TAG>DSCR MKTAGACTION(DSCR)
196 <EXPECT_TAG>EDUC MKTAGACTION(EDUC)
197 <EXPECT_TAG>EMIG MKTAGACTION(EMIG)
198 <EXPECT_TAG>ENDL MKTAGACTION(ENDL)
199 <EXPECT_TAG>ENGA MKTAGACTION(ENGA)
200 <EXPECT_TAG>EVEN MKTAGACTION(EVEN)
201 <EXPECT_TAG>FAM MKTAGACTION(FAM)
202 <EXPECT_TAG>FAMC MKTAGACTION(FAMC)
203 <EXPECT_TAG>FAMF MKTAGACTION(FAMF)
204 <EXPECT_TAG>FAMS MKTAGACTION(FAMS)
205 <EXPECT_TAG>FCOM MKTAGACTION(FCOM)
206 <EXPECT_TAG>FILE MKTAGACTION(FILE)
207 <EXPECT_TAG>FORM MKTAGACTION(FORM)
208 <EXPECT_TAG>GEDC MKTAGACTION(GEDC)
209 <EXPECT_TAG>GIVN MKTAGACTION(GIVN)
210 <EXPECT_TAG>GRAD MKTAGACTION(GRAD)
211 <EXPECT_TAG>HEAD MKTAGACTION(HEAD)
212 <EXPECT_TAG>HUSB MKTAGACTION(HUSB)
213 <EXPECT_TAG>IDNO MKTAGACTION(IDNO)
214 <EXPECT_TAG>IMMI MKTAGACTION(IMMI)
215 <EXPECT_TAG>INDI MKTAGACTION(INDI)
216 <EXPECT_TAG>LANG MKTAGACTION(LANG)
217 <EXPECT_TAG>LEGA MKTAGACTION(LEGA)
218 <EXPECT_TAG>MARB MKTAGACTION(MARB)
219 <EXPECT_TAG>MARC MKTAGACTION(MARC)
220 <EXPECT_TAG>MARL MKTAGACTION(MARL)
221 <EXPECT_TAG>MARR MKTAGACTION(MARR)
222 <EXPECT_TAG>MARS MKTAGACTION(MARS)
223 <EXPECT_TAG>MEDI MKTAGACTION(MEDI)
224 <EXPECT_TAG>NAME MKTAGACTION(NAME)
225 <EXPECT_TAG>NATI MKTAGACTION(NATI)
226 <EXPECT_TAG>NATU MKTAGACTION(NATU)
227 <EXPECT_TAG>NCHI MKTAGACTION(NCHI)
228 <EXPECT_TAG>NICK MKTAGACTION(NICK)
229 <EXPECT_TAG>NMR MKTAGACTION(NMR)
230 <EXPECT_TAG>NOTE MKTAGACTION(NOTE)
231 <EXPECT_TAG>NPFX MKTAGACTION(NPFX)
232 <EXPECT_TAG>NSFX MKTAGACTION(NSFX)
233 <EXPECT_TAG>OBJE MKTAGACTION(OBJE)
234 <EXPECT_TAG>OCCU MKTAGACTION(OCCU)
235 <EXPECT_TAG>ORDI MKTAGACTION(ORDI)
236 <EXPECT_TAG>ORDN MKTAGACTION(ORDN)
237 <EXPECT_TAG>PAGE MKTAGACTION(PAGE)
238 <EXPECT_TAG>PEDI MKTAGACTION(PEDI)
239 <EXPECT_TAG>PHON MKTAGACTION(PHON)
240 <EXPECT_TAG>PLAC MKTAGACTION(PLAC)
241 <EXPECT_TAG>POST MKTAGACTION(POST)
242 <EXPECT_TAG>PROB MKTAGACTION(PROB)
243 <EXPECT_TAG>PROP MKTAGACTION(PROP)
244 <EXPECT_TAG>PUBL MKTAGACTION(PUBL)
245 <EXPECT_TAG>QUAY MKTAGACTION(QUAY)
246 <EXPECT_TAG>REFN MKTAGACTION(REFN)
247 <EXPECT_TAG>RELA MKTAGACTION(RELA)
248 <EXPECT_TAG>RELI MKTAGACTION(RELI)
249 <EXPECT_TAG>REPO MKTAGACTION(REPO)
250 <EXPECT_TAG>RESI MKTAGACTION(RESI)
251 <EXPECT_TAG>RESN MKTAGACTION(RESN)
252 <EXPECT_TAG>RETI MKTAGACTION(RETI)
253 <EXPECT_TAG>RFN MKTAGACTION(RFN)
254 <EXPECT_TAG>RIN MKTAGACTION(RIN)
255 <EXPECT_TAG>ROLE MKTAGACTION(ROLE)
256 <EXPECT_TAG>SEX MKTAGACTION(SEX)
257 <EXPECT_TAG>SLGC MKTAGACTION(SLGC)
258 <EXPECT_TAG>SLGS MKTAGACTION(SLGS)
259 <EXPECT_TAG>SOUR MKTAGACTION(SOUR)
260 <EXPECT_TAG>SPFX MKTAGACTION(SPFX)
261 <EXPECT_TAG>SSN MKTAGACTION(SSN)
262 <EXPECT_TAG>STAE MKTAGACTION(STAE)
263 <EXPECT_TAG>STAT MKTAGACTION(STAT)
264 <EXPECT_TAG>SUBM MKTAGACTION(SUBM)
265 <EXPECT_TAG>SUBN MKTAGACTION(SUBN)
266 <EXPECT_TAG>SURN MKTAGACTION(SURN)
267 <EXPECT_TAG>TEMP MKTAGACTION(TEMP)
268 <EXPECT_TAG>TEXT MKTAGACTION(TEXT)
269 <EXPECT_TAG>TIME MKTAGACTION(TIME)
270 <EXPECT_TAG>TITL MKTAGACTION(TITL)
271 <EXPECT_TAG>TRLR MKTAGACTION(TRLR)
272 <EXPECT_TAG>TYPE MKTAGACTION(TYPE)
273 <EXPECT_TAG>VERS MKTAGACTION(VERS)
274 <EXPECT_TAG>WIFE MKTAGACTION(WIFE)
275 <EXPECT_TAG>WILL MKTAGACTION(WILL)
277 <EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
278 gedcom_error("Tag '%s' too long, max %d chars");
281 strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
282 gedcom_lval.string = TO_INTERNAL(string_buf);
287 {delim} { gedcom_lval.string = TO_INTERNAL(yytext);
291 {any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
292 /* Due to character conversions, it is possible
293 that the current character will be combined with
294 the next, and so now we don't have a character yet...
295 This is only applicable to the 1byte case (e.g. ANSEL).
297 if (strlen(gedcom_lval.string) > 0)
301 {escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext);
305 {pointer} { gedcom_lval.string = TO_INTERNAL(yytext);
309 /* Due to the conversion of level numbers into brackets, the
310 terminator is not important, so no token is returned here.
311 Although not strictly according to the GEDCOM spec, we'll ignore
312 whitespace just before the terminator.
315 {gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
317 /* Eventually we have to return 1 closing bracket (for the trailer).
318 We can detect whether we have sent the closing bracket using the
319 level_diff (at eof, first it is 2, then we increment it ourselves) */
321 <<EOF>> { if (level_diff == 2) {
330 . { gedcom_error("Unexpected character: '%s' (0x%02x)",
347 set_encoding_width(ONE_BYTE);
348 res = open_conv_to_internal("ASCII");
350 gedcom_error("Unable to open conversion context: %s",
354 tok = gedcom_1byte_lex();
357 case BADTOKEN: printf("BADTOKEN "); break;
358 case OPEN: printf("OPEN "); break;
359 case CLOSE: printf("CLOSE "); break;
360 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
361 case DELIM: printf("DELIM "); break;
362 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
363 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
364 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
365 default: printf("TAG(%s) ", gedcom_lval.string); break;
367 tok = gedcom_1byte_lex();
370 close_conv_to_internal();