1 /* This program is free software; you can redistribute it and/or modify *
2 * it under the terms of the GNU General Public License as published by *
3 * the Free Software Foundation; either version 2 of the License, or *
4 * (at your option) any later version. *
6 (C) 2001 by The Genes Development Team
7 Original author: Peter Verthez (Peter.Verthez@advalvas.be)
13 /* In low-high order, a space is encoded as 0x20 0x00 */
14 /* i.e. this is utf-16-le */
17 #include "gedcom.tab.h"
34 otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]\x00|[\x00-\xFF][\x01-\xFF]
35 terminator \x0D\x00|\x0A\x00|\x0D\x00\x0A\x00|\x0A\x00\x0D\x00
37 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
38 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
39 non_at {alpha}|{digit}|{otherchar}|{delim}|{hash}
40 alphanum {alpha}|{digit}
41 gen_delim {delim}|{tab}
43 escape @\x00#\x00{any_char}+@\x00
44 pointer @\x00{alphanum}{non_at}+@\x00
47 static int current_level=-1;
48 static int level_diff=MAXGEDCLEVEL;
58 /* The GEDCOM level number is converted into a sequence of opening
59 and closing brackets. Simply put, the following GEDCOM fragment:
72 { SOUR genes (1 higher: no closing brackets)
73 { VERS 1.6 (1 higher: no closing brackets)
74 } { NAME Genes (same level: 1 closing bracket)
75 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
89 But because this means that one token is converted into a series
90 of tokens, there is some initial code following immediately here
91 that returns "pending" tokens. */
94 char string_buf[MAXGEDCLINELEN+1];
100 else if (level_diff == 1) {
102 gedcom_lval.number = current_level;
106 /* out of brackets... */
109 #define TO_INTERNAL(str) to_internal(str, yyleng)
111 #define MKTAGACTION(tag) \
112 { gedcom_lval.string = TO_INTERNAL(yytext); \
118 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
120 <INITIAL>\x00[0]{digit}+ { gedcom_error ("Level number with leading zero");
124 <INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
125 if ((level < 0) || (level > MAXGEDCLEVEL)) {
126 gedcom_error ("Level number out of range [0..%d]",
130 level_diff = level - current_level;
132 current_level = level;
133 if (level_diff < 1) {
137 else if (level_diff == 1) {
139 gedcom_lval.number = current_level;
143 /* should never happen (error to GEDCOM spec) */
144 gedcom_error ("GEDCOM level number is %d higher than "
151 <EXPECT_TAG>A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR)
152 <EXPECT_TAG>A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR)
153 <EXPECT_TAG>A\x00D\x00R\x001\x00 MKTAGACTION(ADR1)
154 <EXPECT_TAG>A\x00D\x00R\x002\x00 MKTAGACTION(ADR2)
155 <EXPECT_TAG>A\x00D\x00O\x00P\x00 MKTAGACTION(ADOP)
156 <EXPECT_TAG>A\x00F\x00N\x00 MKTAGACTION(AFN)
157 <EXPECT_TAG>A\x00G\x00E\x00 MKTAGACTION(AGE)
158 <EXPECT_TAG>A\x00G\x00N\x00C\x00 MKTAGACTION(AGNC)
159 <EXPECT_TAG>A\x00L\x00I\x00A\x00 MKTAGACTION(ALIA)
160 <EXPECT_TAG>A\x00N\x00C\x00E\x00 MKTAGACTION(ANCE)
161 <EXPECT_TAG>A\x00N\x00C\x00I\x00 MKTAGACTION(ANCI)
162 <EXPECT_TAG>A\x00N\x00U\x00L\x00 MKTAGACTION(ANUL)
163 <EXPECT_TAG>A\x00S\x00S\x00O\x00 MKTAGACTION(ASSO)
164 <EXPECT_TAG>A\x00U\x00T\x00H\x00 MKTAGACTION(AUTH)
165 <EXPECT_TAG>B\x00A\x00P\x00L\x00 MKTAGACTION(BAPL)
166 <EXPECT_TAG>B\x00A\x00P\x00M\x00 MKTAGACTION(BAPM)
167 <EXPECT_TAG>B\x00A\x00R\x00M\x00 MKTAGACTION(BARM)
168 <EXPECT_TAG>B\x00A\x00S\x00M\x00 MKTAGACTION(BASM)
169 <EXPECT_TAG>B\x00I\x00R\x00T\x00 MKTAGACTION(BIRT)
170 <EXPECT_TAG>B\x00L\x00E\x00S\x00 MKTAGACTION(BLES)
171 <EXPECT_TAG>B\x00L\x00O\x00B\x00 MKTAGACTION(BLOB)
172 <EXPECT_TAG>B\x00U\x00R\x00I\x00 MKTAGACTION(BURI)
173 <EXPECT_TAG>C\x00A\x00L\x00N\x00 MKTAGACTION(CALN)
174 <EXPECT_TAG>C\x00A\x00S\x00T\x00 MKTAGACTION(CAST)
175 <EXPECT_TAG>C\x00A\x00U\x00S\x00 MKTAGACTION(CAUS)
176 <EXPECT_TAG>C\x00E\x00N\x00S\x00 MKTAGACTION(CENS)
177 <EXPECT_TAG>C\x00H\x00A\x00N\x00 MKTAGACTION(CHAN)
178 <EXPECT_TAG>C\x00H\x00A\x00R\x00 MKTAGACTION(CHAR)
179 <EXPECT_TAG>C\x00H\x00I\x00L\x00 MKTAGACTION(CHIL)
180 <EXPECT_TAG>C\x00H\x00R\x00 MKTAGACTION(CHR)
181 <EXPECT_TAG>C\x00H\x00R\x00A\x00 MKTAGACTION(CHRA)
182 <EXPECT_TAG>C\x00I\x00T\x00Y\x00 MKTAGACTION(CITY)
183 <EXPECT_TAG>C\x00O\x00N\x00C\x00 MKTAGACTION(CONC)
184 <EXPECT_TAG>C\x00O\x00N\x00F\x00 MKTAGACTION(CONF)
185 <EXPECT_TAG>C\x00O\x00N\x00L\x00 MKTAGACTION(CONL)
186 <EXPECT_TAG>C\x00O\x00N\x00T\x00 MKTAGACTION(CONT)
187 <EXPECT_TAG>C\x00O\x00P\x00R\x00 MKTAGACTION(COPR)
188 <EXPECT_TAG>C\x00O\x00R\x00P\x00 MKTAGACTION(CORP)
189 <EXPECT_TAG>C\x00R\x00E\x00M\x00 MKTAGACTION(CREM)
190 <EXPECT_TAG>C\x00T\x00R\x00Y\x00 MKTAGACTION(CTRY)
191 <EXPECT_TAG>D\x00A\x00T\x00A\x00 MKTAGACTION(DATA)
192 <EXPECT_TAG>D\x00A\x00T\x00E\x00 MKTAGACTION(DATE)
193 <EXPECT_TAG>D\x00E\x00A\x00T\x00 MKTAGACTION(DEAT)
194 <EXPECT_TAG>D\x00E\x00S\x00C\x00 MKTAGACTION(DESC)
195 <EXPECT_TAG>D\x00E\x00S\x00I\x00 MKTAGACTION(DESI)
196 <EXPECT_TAG>D\x00E\x00S\x00T\x00 MKTAGACTION(DEST)
197 <EXPECT_TAG>D\x00I\x00V\x00 MKTAGACTION(DIV)
198 <EXPECT_TAG>D\x00I\x00V\x00F\x00 MKTAGACTION(DIVF)
199 <EXPECT_TAG>D\x00S\x00C\x00R\x00 MKTAGACTION(DSCR)
200 <EXPECT_TAG>E\x00D\x00U\x00C\x00 MKTAGACTION(EDUC)
201 <EXPECT_TAG>E\x00M\x00I\x00G\x00 MKTAGACTION(EMIG)
202 <EXPECT_TAG>E\x00N\x00D\x00L\x00 MKTAGACTION(ENDL)
203 <EXPECT_TAG>E\x00N\x00G\x00A\x00 MKTAGACTION(ENGA)
204 <EXPECT_TAG>E\x00V\x00E\x00N\x00 MKTAGACTION(EVEN)
205 <EXPECT_TAG>F\x00A\x00M\x00 MKTAGACTION(FAM)
206 <EXPECT_TAG>F\x00A\x00M\x00C\x00 MKTAGACTION(FAMC)
207 <EXPECT_TAG>F\x00A\x00M\x00F\x00 MKTAGACTION(FAMF)
208 <EXPECT_TAG>F\x00A\x00M\x00S\x00 MKTAGACTION(FAMS)
209 <EXPECT_TAG>F\x00C\x00O\x00M\x00 MKTAGACTION(FCOM)
210 <EXPECT_TAG>F\x00I\x00L\x00E\x00 MKTAGACTION(FILE)
211 <EXPECT_TAG>F\x00O\x00R\x00M\x00 MKTAGACTION(FORM)
212 <EXPECT_TAG>G\x00E\x00D\x00C\x00 MKTAGACTION(GEDC)
213 <EXPECT_TAG>G\x00I\x00V\x00N\x00 MKTAGACTION(GIVN)
214 <EXPECT_TAG>G\x00R\x00A\x00D\x00 MKTAGACTION(GRAD)
215 <EXPECT_TAG>H\x00E\x00A\x00D\x00 MKTAGACTION(HEAD)
216 <EXPECT_TAG>H\x00U\x00S\x00B\x00 MKTAGACTION(HUSB)
217 <EXPECT_TAG>I\x00D\x00N\x00O\x00 MKTAGACTION(IDNO)
218 <EXPECT_TAG>I\x00M\x00M\x00I\x00 MKTAGACTION(IMMI)
219 <EXPECT_TAG>I\x00N\x00D\x00I\x00 MKTAGACTION(INDI)
220 <EXPECT_TAG>L\x00A\x00N\x00G\x00 MKTAGACTION(LANG)
221 <EXPECT_TAG>L\x00E\x00G\x00A\x00 MKTAGACTION(LEGA)
222 <EXPECT_TAG>M\x00A\x00R\x00B\x00 MKTAGACTION(MARB)
223 <EXPECT_TAG>M\x00A\x00R\x00C\x00 MKTAGACTION(MARC)
224 <EXPECT_TAG>M\x00A\x00R\x00L\x00 MKTAGACTION(MARL)
225 <EXPECT_TAG>M\x00A\x00R\x00R\x00 MKTAGACTION(MARR)
226 <EXPECT_TAG>M\x00A\x00R\x00S\x00 MKTAGACTION(MARS)
227 <EXPECT_TAG>M\x00E\x00D\x00I\x00 MKTAGACTION(MEDI)
228 <EXPECT_TAG>N\x00A\x00M\x00E\x00 MKTAGACTION(NAME)
229 <EXPECT_TAG>N\x00A\x00T\x00I\x00 MKTAGACTION(NATI)
230 <EXPECT_TAG>N\x00A\x00T\x00U\x00 MKTAGACTION(NATU)
231 <EXPECT_TAG>N\x00C\x00H\x00I\x00 MKTAGACTION(NCHI)
232 <EXPECT_TAG>N\x00I\x00C\x00K\x00 MKTAGACTION(NICK)
233 <EXPECT_TAG>N\x00M\x00R\x00 MKTAGACTION(NMR)
234 <EXPECT_TAG>N\x00O\x00T\x00E\x00 MKTAGACTION(NOTE)
235 <EXPECT_TAG>N\x00P\x00F\x00X\x00 MKTAGACTION(NPFX)
236 <EXPECT_TAG>N\x00S\x00F\x00X\x00 MKTAGACTION(NSFX)
237 <EXPECT_TAG>O\x00B\x00J\x00E\x00 MKTAGACTION(OBJE)
238 <EXPECT_TAG>O\x00C\x00C\x00U\x00 MKTAGACTION(OCCU)
239 <EXPECT_TAG>O\x00R\x00D\x00I\x00 MKTAGACTION(ORDI)
240 <EXPECT_TAG>O\x00R\x00D\x00N\x00 MKTAGACTION(ORDN)
241 <EXPECT_TAG>P\x00A\x00G\x00E\x00 MKTAGACTION(PAGE)
242 <EXPECT_TAG>P\x00E\x00D\x00I\x00 MKTAGACTION(PEDI)
243 <EXPECT_TAG>P\x00H\x00O\x00N\x00 MKTAGACTION(PHON)
244 <EXPECT_TAG>P\x00L\x00A\x00C\x00 MKTAGACTION(PLAC)
245 <EXPECT_TAG>P\x00O\x00S\x00T\x00 MKTAGACTION(POST)
246 <EXPECT_TAG>P\x00R\x00O\x00B\x00 MKTAGACTION(PROB)
247 <EXPECT_TAG>P\x00R\x00O\x00P\x00 MKTAGACTION(PROP)
248 <EXPECT_TAG>P\x00U\x00B\x00L\x00 MKTAGACTION(PUBL)
249 <EXPECT_TAG>Q\x00U\x00A\x00Y\x00 MKTAGACTION(QUAY)
250 <EXPECT_TAG>R\x00E\x00F\x00N\x00 MKTAGACTION(REFN)
251 <EXPECT_TAG>R\x00E\x00L\x00A\x00 MKTAGACTION(RELA)
252 <EXPECT_TAG>R\x00E\x00L\x00I\x00 MKTAGACTION(RELI)
253 <EXPECT_TAG>R\x00E\x00P\x00O\x00 MKTAGACTION(REPO)
254 <EXPECT_TAG>R\x00E\x00S\x00I\x00 MKTAGACTION(RESI)
255 <EXPECT_TAG>R\x00E\x00S\x00N\x00 MKTAGACTION(RESN)
256 <EXPECT_TAG>R\x00E\x00T\x00I\x00 MKTAGACTION(RETI)
257 <EXPECT_TAG>R\x00F\x00N\x00 MKTAGACTION(RFN)
258 <EXPECT_TAG>R\x00I\x00N\x00 MKTAGACTION(RIN)
259 <EXPECT_TAG>R\x00O\x00L\x00E\x00 MKTAGACTION(ROLE)
260 <EXPECT_TAG>S\x00E\x00X\x00 MKTAGACTION(SEX)
261 <EXPECT_TAG>S\x00L\x00G\x00C\x00 MKTAGACTION(SLGC)
262 <EXPECT_TAG>S\x00L\x00G\x00S\x00 MKTAGACTION(SLGS)
263 <EXPECT_TAG>S\x00O\x00U\x00R\x00 MKTAGACTION(SOUR)
264 <EXPECT_TAG>S\x00P\x00F\x00X\x00 MKTAGACTION(SPFX)
265 <EXPECT_TAG>S\x00S\x00N\x00 MKTAGACTION(SSN)
266 <EXPECT_TAG>S\x00T\x00A\x00E\x00 MKTAGACTION(STAE)
267 <EXPECT_TAG>S\x00T\x00A\x00T\x00 MKTAGACTION(STAT)
268 <EXPECT_TAG>S\x00U\x00B\x00M\x00 MKTAGACTION(SUBM)
269 <EXPECT_TAG>S\x00U\x00B\x00N\x00 MKTAGACTION(SUBN)
270 <EXPECT_TAG>S\x00U\x00R\x00N\x00 MKTAGACTION(SURN)
271 <EXPECT_TAG>T\x00E\x00M\x00P\x00 MKTAGACTION(TEMP)
272 <EXPECT_TAG>T\x00E\x00X\x00T\x00 MKTAGACTION(TEXT)
273 <EXPECT_TAG>T\x00I\x00M\x00E\x00 MKTAGACTION(TIME)
274 <EXPECT_TAG>T\x00I\x00T\x00L\x00 MKTAGACTION(TITL)
275 <EXPECT_TAG>T\x00R\x00L\x00R\x00 MKTAGACTION(TRLR)
276 <EXPECT_TAG>T\x00Y\x00P\x00E\x00 MKTAGACTION(TYPE)
277 <EXPECT_TAG>V\x00E\x00R\x00S\x00 MKTAGACTION(VERS)
278 <EXPECT_TAG>W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE)
279 <EXPECT_TAG>W\x00I\x00L\x00L\x00 MKTAGACTION(WILL)
281 <EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
282 gedcom_error("Tag '%s' too long, max %d chars");
285 strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
286 gedcom_lval.string = TO_INTERNAL(string_buf);
291 {delim} { gedcom_lval.string = TO_INTERNAL(yytext);
295 {any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
299 {escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext);
303 {pointer} { gedcom_lval.string = TO_INTERNAL(yytext);
307 /* Due to the conversion of level numbers into brackets, the
308 terminator is not important, so no token is returned here.
309 Although not strictly according to the GEDCOM spec, we'll ignore
310 whitespace just before the terminator.
313 {gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
315 /* Eventually we have to return 1 closing bracket (for the trailer).
316 We can detect whether we have sent the closing bracket using the
317 level_diff (at eof, first it is 2, then we increment it ourselves) */
319 <<EOF>> { if (level_diff == 2) {
328 . { gedcom_error("Unexpected character: '%s' (0x%02x)",
346 set_encoding_width(TWO_BYTE_LOHI);
347 res = open_conv_to_internal("UNICODE");
349 gedcom_error("Unable to open conversion context: %s",
353 tok = gedcom_lohi_lex();
356 case BADTOKEN: printf("BADTOKEN "); break;
357 case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
358 case CLOSE: printf("CLOSE "); break;
359 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
360 case DELIM: printf("DELIM "); break;
361 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
362 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
363 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
364 default: printf("TAG(%s) ", gedcom_lval.string); break;
366 tok = gedcom_lohi_lex();
369 close_conv_to_internal();