4 /* In high-low order, a space is encoded as 0x00 0x20 */
5 /* i.e. this is utf-16-be */
8 #include "gedcom.tab.h"
25 otherchar \x00[\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]|[\x01-\xFF][\x00-\xFF]
26 terminator \x00\x0D|\x00\x0A|\x00\x0D\x00\x0A|\x00\x0A\x00\x0D
28 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
29 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
30 non_at {alpha}|{digit}|{otherchar}|{delim}|{hash}
31 alphanum {alpha}|{digit}
32 gen_delim {delim}|{tab}
34 escape \x00@\x00#{any_char}+\x00@
35 pointer \x00@{alphanum}{non_at}+\x00@
38 static int current_level=-1;
39 static int level_diff=MAXGEDCLEVEL;
49 /* The GEDCOM level number is converted into a sequence of opening
50 and closing brackets. Simply put, the following GEDCOM fragment:
63 { SOUR genes (1 higher: no closing brackets)
64 { VERS 1.6 (1 higher: no closing brackets)
65 } { NAME Genes (same level: 1 closing bracket)
66 } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets)
80 But because this means that one token is converted into a series
81 of tokens, there is some initial code following immediately here
82 that returns "pending" tokens. */
85 char string_buf[MAXGEDCLINELEN+1];
91 else if (level_diff == 1) {
96 /* out of brackets... */
99 #define TO_INTERNAL(str) to_internal(str, yyleng)
101 #define MKTAGACTION(tag) \
102 { gedcom_lval.string = TO_INTERNAL(yytext); \
108 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
110 <INITIAL>\x00[0]{digit}+ { gedcom_error ("Level number with leading zero");
114 <INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
115 if ((level < 0) || (level > MAXGEDCLEVEL)) {
116 gedcom_error ("Level number out of range [0..%d]",
120 level_diff = level - current_level;
122 current_level = level;
123 if (level_diff < 1) {
127 else if (level_diff == 1) {
132 /* should never happen (error to GEDCOM spec) */
133 gedcom_error ("GEDCOM level number is %d higher than "
140 <EXPECT_TAG>\x00A\x00B\x00B\x00R MKTAGACTION(ABBR)
141 <EXPECT_TAG>\x00A\x00D\x00D\x00R MKTAGACTION(ADDR)
142 <EXPECT_TAG>\x00A\x00D\x00R\x001 MKTAGACTION(ADR1)
143 <EXPECT_TAG>\x00A\x00D\x00R\x002 MKTAGACTION(ADR2)
144 <EXPECT_TAG>\x00A\x00D\x00O\x00P MKTAGACTION(ADOP)
145 <EXPECT_TAG>\x00A\x00F\x00N MKTAGACTION(AFN)
146 <EXPECT_TAG>\x00A\x00G\x00E MKTAGACTION(AGE)
147 <EXPECT_TAG>\x00A\x00G\x00N\x00C MKTAGACTION(AGNC)
148 <EXPECT_TAG>\x00A\x00L\x00I\x00A MKTAGACTION(ALIA)
149 <EXPECT_TAG>\x00A\x00N\x00C\x00E MKTAGACTION(ANCE)
150 <EXPECT_TAG>\x00A\x00N\x00C\x00I MKTAGACTION(ANCI)
151 <EXPECT_TAG>\x00A\x00N\x00U\x00L MKTAGACTION(ANUL)
152 <EXPECT_TAG>\x00A\x00S\x00S\x00O MKTAGACTION(ASSO)
153 <EXPECT_TAG>\x00A\x00U\x00T\x00H MKTAGACTION(AUTH)
154 <EXPECT_TAG>\x00B\x00A\x00P\x00L MKTAGACTION(BAPL)
155 <EXPECT_TAG>\x00B\x00A\x00P\x00M MKTAGACTION(BAPM)
156 <EXPECT_TAG>\x00B\x00A\x00R\x00M MKTAGACTION(BARM)
157 <EXPECT_TAG>\x00B\x00A\x00S\x00M MKTAGACTION(BASM)
158 <EXPECT_TAG>\x00B\x00I\x00R\x00T MKTAGACTION(BIRT)
159 <EXPECT_TAG>\x00B\x00L\x00E\x00S MKTAGACTION(BLES)
160 <EXPECT_TAG>\x00B\x00L\x00O\x00B MKTAGACTION(BLOB)
161 <EXPECT_TAG>\x00B\x00U\x00R\x00I MKTAGACTION(BURI)
162 <EXPECT_TAG>\x00C\x00A\x00L\x00N MKTAGACTION(CALN)
163 <EXPECT_TAG>\x00C\x00A\x00S\x00T MKTAGACTION(CAST)
164 <EXPECT_TAG>\x00C\x00A\x00U\x00S MKTAGACTION(CAUS)
165 <EXPECT_TAG>\x00C\x00E\x00N\x00S MKTAGACTION(CENS)
166 <EXPECT_TAG>\x00C\x00H\x00A\x00N MKTAGACTION(CHAN)
167 <EXPECT_TAG>\x00C\x00H\x00A\x00R MKTAGACTION(CHAR)
168 <EXPECT_TAG>\x00C\x00H\x00I\x00L MKTAGACTION(CHIL)
169 <EXPECT_TAG>\x00C\x00H\x00R MKTAGACTION(CHR)
170 <EXPECT_TAG>\x00C\x00H\x00R\x00A MKTAGACTION(CHRA)
171 <EXPECT_TAG>\x00C\x00I\x00T\x00Y MKTAGACTION(CITY)
172 <EXPECT_TAG>\x00C\x00O\x00N\x00C MKTAGACTION(CONC)
173 <EXPECT_TAG>\x00C\x00O\x00N\x00F MKTAGACTION(CONF)
174 <EXPECT_TAG>\x00C\x00O\x00N\x00L MKTAGACTION(CONL)
175 <EXPECT_TAG>\x00C\x00O\x00N\x00T MKTAGACTION(CONT)
176 <EXPECT_TAG>\x00C\x00O\x00P\x00R MKTAGACTION(COPR)
177 <EXPECT_TAG>\x00C\x00O\x00R\x00P MKTAGACTION(CORP)
178 <EXPECT_TAG>\x00C\x00R\x00E\x00M MKTAGACTION(CREM)
179 <EXPECT_TAG>\x00C\x00T\x00R\x00Y MKTAGACTION(CTRY)
180 <EXPECT_TAG>\x00D\x00A\x00T\x00A MKTAGACTION(DATA)
181 <EXPECT_TAG>\x00D\x00A\x00T\x00E MKTAGACTION(DATE)
182 <EXPECT_TAG>\x00D\x00E\x00A\x00T MKTAGACTION(DEAT)
183 <EXPECT_TAG>\x00D\x00E\x00S\x00C MKTAGACTION(DESC)
184 <EXPECT_TAG>\x00D\x00E\x00S\x00I MKTAGACTION(DESI)
185 <EXPECT_TAG>\x00D\x00E\x00S\x00T MKTAGACTION(DEST)
186 <EXPECT_TAG>\x00D\x00I\x00V MKTAGACTION(DIV)
187 <EXPECT_TAG>\x00D\x00I\x00V\x00F MKTAGACTION(DIVF)
188 <EXPECT_TAG>\x00D\x00S\x00C\x00R MKTAGACTION(DSCR)
189 <EXPECT_TAG>\x00E\x00D\x00U\x00C MKTAGACTION(EDUC)
190 <EXPECT_TAG>\x00E\x00M\x00I\x00G MKTAGACTION(EMIG)
191 <EXPECT_TAG>\x00E\x00N\x00D\x00L MKTAGACTION(ENDL)
192 <EXPECT_TAG>\x00E\x00N\x00G\x00A MKTAGACTION(ENGA)
193 <EXPECT_TAG>\x00E\x00V\x00E\x00N MKTAGACTION(EVEN)
194 <EXPECT_TAG>\x00F\x00A\x00M MKTAGACTION(FAM)
195 <EXPECT_TAG>\x00F\x00A\x00M\x00C MKTAGACTION(FAMC)
196 <EXPECT_TAG>\x00F\x00A\x00M\x00F MKTAGACTION(FAMF)
197 <EXPECT_TAG>\x00F\x00A\x00M\x00S MKTAGACTION(FAMS)
198 <EXPECT_TAG>\x00F\x00C\x00O\x00M MKTAGACTION(FCOM)
199 <EXPECT_TAG>\x00F\x00I\x00L\x00E MKTAGACTION(FILE)
200 <EXPECT_TAG>\x00F\x00O\x00R\x00M MKTAGACTION(FORM)
201 <EXPECT_TAG>\x00G\x00E\x00D\x00C MKTAGACTION(GEDC)
202 <EXPECT_TAG>\x00G\x00I\x00V\x00N MKTAGACTION(GIVN)
203 <EXPECT_TAG>\x00G\x00R\x00A\x00D MKTAGACTION(GRAD)
204 <EXPECT_TAG>\x00H\x00E\x00A\x00D MKTAGACTION(HEAD)
205 <EXPECT_TAG>\x00H\x00U\x00S\x00B MKTAGACTION(HUSB)
206 <EXPECT_TAG>\x00I\x00D\x00N\x00O MKTAGACTION(IDNO)
207 <EXPECT_TAG>\x00I\x00M\x00M\x00I MKTAGACTION(IMMI)
208 <EXPECT_TAG>\x00I\x00N\x00D\x00I MKTAGACTION(INDI)
209 <EXPECT_TAG>\x00L\x00A\x00N\x00G MKTAGACTION(LANG)
210 <EXPECT_TAG>\x00L\x00E\x00G\x00A MKTAGACTION(LEGA)
211 <EXPECT_TAG>\x00M\x00A\x00R\x00B MKTAGACTION(MARB)
212 <EXPECT_TAG>\x00M\x00A\x00R\x00C MKTAGACTION(MARC)
213 <EXPECT_TAG>\x00M\x00A\x00R\x00L MKTAGACTION(MARL)
214 <EXPECT_TAG>\x00M\x00A\x00R\x00R MKTAGACTION(MARR)
215 <EXPECT_TAG>\x00M\x00A\x00R\x00S MKTAGACTION(MARS)
216 <EXPECT_TAG>\x00M\x00E\x00D\x00I MKTAGACTION(MEDI)
217 <EXPECT_TAG>\x00N\x00A\x00M\x00E MKTAGACTION(NAME)
218 <EXPECT_TAG>\x00N\x00A\x00T\x00I MKTAGACTION(NATI)
219 <EXPECT_TAG>\x00N\x00A\x00T\x00U MKTAGACTION(NATU)
220 <EXPECT_TAG>\x00N\x00C\x00H\x00I MKTAGACTION(NCHI)
221 <EXPECT_TAG>\x00N\x00I\x00C\x00K MKTAGACTION(NICK)
222 <EXPECT_TAG>\x00N\x00M\x00R MKTAGACTION(NMR)
223 <EXPECT_TAG>\x00N\x00O\x00T\x00E MKTAGACTION(NOTE)
224 <EXPECT_TAG>\x00N\x00P\x00F\x00X MKTAGACTION(NPFX)
225 <EXPECT_TAG>\x00N\x00S\x00F\x00X MKTAGACTION(NSFX)
226 <EXPECT_TAG>\x00O\x00B\x00J\x00E MKTAGACTION(OBJE)
227 <EXPECT_TAG>\x00O\x00C\x00C\x00U MKTAGACTION(OCCU)
228 <EXPECT_TAG>\x00O\x00R\x00D\x00I MKTAGACTION(ORDI)
229 <EXPECT_TAG>\x00O\x00R\x00D\x00N MKTAGACTION(ORDN)
230 <EXPECT_TAG>\x00P\x00A\x00G\x00E MKTAGACTION(PAGE)
231 <EXPECT_TAG>\x00P\x00E\x00D\x00I MKTAGACTION(PEDI)
232 <EXPECT_TAG>\x00P\x00H\x00O\x00N MKTAGACTION(PHON)
233 <EXPECT_TAG>\x00P\x00L\x00A\x00C MKTAGACTION(PLAC)
234 <EXPECT_TAG>\x00P\x00O\x00S\x00T MKTAGACTION(POST)
235 <EXPECT_TAG>\x00P\x00R\x00O\x00B MKTAGACTION(PROB)
236 <EXPECT_TAG>\x00P\x00R\x00O\x00P MKTAGACTION(PROP)
237 <EXPECT_TAG>\x00P\x00U\x00B\x00L MKTAGACTION(PUBL)
238 <EXPECT_TAG>\x00Q\x00U\x00A\x00Y MKTAGACTION(QUAY)
239 <EXPECT_TAG>\x00R\x00E\x00F\x00N MKTAGACTION(REFN)
240 <EXPECT_TAG>\x00R\x00E\x00L\x00A MKTAGACTION(RELA)
241 <EXPECT_TAG>\x00R\x00E\x00L\x00I MKTAGACTION(RELI)
242 <EXPECT_TAG>\x00R\x00E\x00P\x00O MKTAGACTION(REPO)
243 <EXPECT_TAG>\x00R\x00E\x00S\x00I MKTAGACTION(RESI)
244 <EXPECT_TAG>\x00R\x00E\x00S\x00N MKTAGACTION(RESN)
245 <EXPECT_TAG>\x00R\x00E\x00T\x00I MKTAGACTION(RETI)
246 <EXPECT_TAG>\x00R\x00F\x00N MKTAGACTION(RFN)
247 <EXPECT_TAG>\x00R\x00I\x00N MKTAGACTION(RIN)
248 <EXPECT_TAG>\x00R\x00O\x00L\x00E MKTAGACTION(ROLE)
249 <EXPECT_TAG>\x00S\x00E\x00X MKTAGACTION(SEX)
250 <EXPECT_TAG>\x00S\x00L\x00G\x00C MKTAGACTION(SLGC)
251 <EXPECT_TAG>\x00S\x00L\x00G\x00S MKTAGACTION(SLGS)
252 <EXPECT_TAG>\x00S\x00O\x00U\x00R MKTAGACTION(SOUR)
253 <EXPECT_TAG>\x00S\x00P\x00F\x00X MKTAGACTION(SPFX)
254 <EXPECT_TAG>\x00S\x00S\x00N MKTAGACTION(SSN)
255 <EXPECT_TAG>\x00S\x00T\x00A\x00E MKTAGACTION(STAE)
256 <EXPECT_TAG>\x00S\x00T\x00A\x00T MKTAGACTION(STAT)
257 <EXPECT_TAG>\x00S\x00U\x00B\x00M MKTAGACTION(SUBM)
258 <EXPECT_TAG>\x00S\x00U\x00B\x00N MKTAGACTION(SUBN)
259 <EXPECT_TAG>\x00S\x00U\x00R\x00N MKTAGACTION(SURN)
260 <EXPECT_TAG>\x00T\x00E\x00M\x00P MKTAGACTION(TEMP)
261 <EXPECT_TAG>\x00T\x00E\x00X\x00T MKTAGACTION(TEXT)
262 <EXPECT_TAG>\x00T\x00I\x00M\x00E MKTAGACTION(TIME)
263 <EXPECT_TAG>\x00T\x00I\x00T\x00L MKTAGACTION(TITL)
264 <EXPECT_TAG>\x00T\x00R\x00L\x00R MKTAGACTION(TRLR)
265 <EXPECT_TAG>\x00T\x00Y\x00P\x00E MKTAGACTION(TYPE)
266 <EXPECT_TAG>\x00V\x00E\x00R\x00S MKTAGACTION(VERS)
267 <EXPECT_TAG>\x00W\x00I\x00F\x00E MKTAGACTION(WIFE)
268 <EXPECT_TAG>\x00W\x00I\x00L\x00L MKTAGACTION(WILL)
270 <EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
271 gedcom_error("Tag '%s' too long, max %d chars");
274 strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
275 gedcom_lval.string = TO_INTERNAL(string_buf);
280 {delim} { gedcom_lval.string = TO_INTERNAL(yytext);
284 {any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
288 {escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext);
292 {pointer} { gedcom_lval.string = TO_INTERNAL(yytext);
296 /* Due to the conversion of level numbers into brackets, the
297 terminator is not important, so no token is returned here.
298 Although not strictly according to the GEDCOM spec, we'll ignore
299 whitespace just before the terminator.
302 {gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
304 /* Eventually we have to return 1 closing bracket (for the trailer).
305 We can detect whether we have sent the closing bracket using the
306 level_diff (at eof, first it is 2, then we increment it ourselves) */
308 <<EOF>> { if (level_diff == 2) {
317 . { gedcom_error("Unexpected character: '%s' (0x%02x)",
335 set_encoding_width(TWO_BYTE_HILO);
336 res = open_conv_to_internal("UNICODE");
338 gedcom_error("Unable to open conversion context: %s",
342 tok = gedcom_hilo_lex();
345 case BADTOKEN: printf("BADTOKEN "); break;
346 case OPEN: printf("OPEN "); break;
347 case CLOSE: printf("CLOSE "); break;
348 case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
349 case DELIM: printf("DELIM "); break;
350 case ANYCHAR: printf("%s ", gedcom_lval.string); break;
351 case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
352 case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
353 default: printf("TAG(%s) ", gedcom_lval.string); break;
355 tok = gedcom_hilo_lex();
358 close_conv_to_internal();