1 /* Lexer for Unicode encoding (high-low order) of Gedcom.
2 Copyright (C) 2001 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 /* In high-low order, a space is encoded as 0x00 0x20 */
25 /* i.e. this is utf-16-be */
28 #define LEX_SECTION 1 /* include only a specific part of the following file */
29 #define yymyinit gedcom_hilo_myinit
30 #include "gedcom_lex_common.c"
32 static size_t encoding_width = 2;
45 otherchar \x00[\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]|[\x01-\xFF][\x00-\xFF]
46 terminator \x00\x0D|\x00\x0A|\x00\x0D\x00\x0A|\x00\x0A\x00\x0D
48 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
49 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
50 non_at {alpha}|{digit}|{otherchar}|{delim}|{hash}
51 alphanum {alpha}|{digit}
52 gen_delim {delim}|{tab}
54 escape \x00@\x00#{any_char}+\x00@
55 pointer \x00@{alphanum}{non_at}+\x00@
61 #define LEX_SECTION 2 /* include only a specific part of the following file */
62 #include "gedcom_lex_common.c"
68 <INITIAL>{gen_delim}* ACTION_INITIAL_WHITESPACE
70 <INITIAL>\x00[0]{digit}+ ACTION_0_DIGITS
72 <INITIAL>{digit}+ ACTION_DIGITS
74 <EXPECT_TAG>\x00A\x00B\x00B\x00R MKTAGACTION(ABBR)
75 <EXPECT_TAG>\x00A\x00D\x00D\x00R MKTAGACTION(ADDR)
76 <EXPECT_TAG>\x00A\x00D\x00R\x001 MKTAGACTION(ADR1)
77 <EXPECT_TAG>\x00A\x00D\x00R\x002 MKTAGACTION(ADR2)
78 <EXPECT_TAG>\x00A\x00D\x00O\x00P MKTAGACTION(ADOP)
79 <EXPECT_TAG>\x00A\x00F\x00N MKTAGACTION(AFN)
80 <EXPECT_TAG>\x00A\x00G\x00E MKTAGACTION(AGE)
81 <EXPECT_TAG>\x00A\x00G\x00N\x00C MKTAGACTION(AGNC)
82 <EXPECT_TAG>\x00A\x00L\x00I\x00A MKTAGACTION(ALIA)
83 <EXPECT_TAG>\x00A\x00N\x00C\x00E MKTAGACTION(ANCE)
84 <EXPECT_TAG>\x00A\x00N\x00C\x00I MKTAGACTION(ANCI)
85 <EXPECT_TAG>\x00A\x00N\x00U\x00L MKTAGACTION(ANUL)
86 <EXPECT_TAG>\x00A\x00S\x00S\x00O MKTAGACTION(ASSO)
87 <EXPECT_TAG>\x00A\x00U\x00T\x00H MKTAGACTION(AUTH)
88 <EXPECT_TAG>\x00B\x00A\x00P\x00L MKTAGACTION(BAPL)
89 <EXPECT_TAG>\x00B\x00A\x00P\x00M MKTAGACTION(BAPM)
90 <EXPECT_TAG>\x00B\x00A\x00R\x00M MKTAGACTION(BARM)
91 <EXPECT_TAG>\x00B\x00A\x00S\x00M MKTAGACTION(BASM)
92 <EXPECT_TAG>\x00B\x00I\x00R\x00T MKTAGACTION(BIRT)
93 <EXPECT_TAG>\x00B\x00L\x00E\x00S MKTAGACTION(BLES)
94 <EXPECT_TAG>\x00B\x00L\x00O\x00B MKTAGACTION(BLOB)
95 <EXPECT_TAG>\x00B\x00U\x00R\x00I MKTAGACTION(BURI)
96 <EXPECT_TAG>\x00C\x00A\x00L\x00N MKTAGACTION(CALN)
97 <EXPECT_TAG>\x00C\x00A\x00S\x00T MKTAGACTION(CAST)
98 <EXPECT_TAG>\x00C\x00A\x00U\x00S MKTAGACTION(CAUS)
99 <EXPECT_TAG>\x00C\x00E\x00N\x00S MKTAGACTION(CENS)
100 <EXPECT_TAG>\x00C\x00H\x00A\x00N MKTAGACTION(CHAN)
101 <EXPECT_TAG>\x00C\x00H\x00A\x00R MKTAGACTION(CHAR)
102 <EXPECT_TAG>\x00C\x00H\x00I\x00L MKTAGACTION(CHIL)
103 <EXPECT_TAG>\x00C\x00H\x00R MKTAGACTION(CHR)
104 <EXPECT_TAG>\x00C\x00H\x00R\x00A MKTAGACTION(CHRA)
105 <EXPECT_TAG>\x00C\x00I\x00T\x00Y MKTAGACTION(CITY)
106 <EXPECT_TAG>\x00C\x00O\x00N\x00C MKTAGACTION(CONC)
107 <EXPECT_TAG>\x00C\x00O\x00N\x00F MKTAGACTION(CONF)
108 <EXPECT_TAG>\x00C\x00O\x00N\x00L MKTAGACTION(CONL)
109 <EXPECT_TAG>\x00C\x00O\x00N\x00T MKTAGACTION(CONT)
110 <EXPECT_TAG>\x00C\x00O\x00P\x00R MKTAGACTION(COPR)
111 <EXPECT_TAG>\x00C\x00O\x00R\x00P MKTAGACTION(CORP)
112 <EXPECT_TAG>\x00C\x00R\x00E\x00M MKTAGACTION(CREM)
113 <EXPECT_TAG>\x00C\x00T\x00R\x00Y MKTAGACTION(CTRY)
114 <EXPECT_TAG>\x00D\x00A\x00T\x00A MKTAGACTION(DATA)
115 <EXPECT_TAG>\x00D\x00A\x00T\x00E MKTAGACTION(DATE)
116 <EXPECT_TAG>\x00D\x00E\x00A\x00T MKTAGACTION(DEAT)
117 <EXPECT_TAG>\x00D\x00E\x00S\x00C MKTAGACTION(DESC)
118 <EXPECT_TAG>\x00D\x00E\x00S\x00I MKTAGACTION(DESI)
119 <EXPECT_TAG>\x00D\x00E\x00S\x00T MKTAGACTION(DEST)
120 <EXPECT_TAG>\x00D\x00I\x00V MKTAGACTION(DIV)
121 <EXPECT_TAG>\x00D\x00I\x00V\x00F MKTAGACTION(DIVF)
122 <EXPECT_TAG>\x00D\x00S\x00C\x00R MKTAGACTION(DSCR)
123 <EXPECT_TAG>\x00E\x00D\x00U\x00C MKTAGACTION(EDUC)
124 <EXPECT_TAG>\x00E\x00M\x00I\x00G MKTAGACTION(EMIG)
125 <EXPECT_TAG>\x00E\x00N\x00D\x00L MKTAGACTION(ENDL)
126 <EXPECT_TAG>\x00E\x00N\x00G\x00A MKTAGACTION(ENGA)
127 <EXPECT_TAG>\x00E\x00V\x00E\x00N MKTAGACTION(EVEN)
128 <EXPECT_TAG>\x00F\x00A\x00M MKTAGACTION(FAM)
129 <EXPECT_TAG>\x00F\x00A\x00M\x00C MKTAGACTION(FAMC)
130 <EXPECT_TAG>\x00F\x00A\x00M\x00F MKTAGACTION(FAMF)
131 <EXPECT_TAG>\x00F\x00A\x00M\x00S MKTAGACTION(FAMS)
132 <EXPECT_TAG>\x00F\x00C\x00O\x00M MKTAGACTION(FCOM)
133 <EXPECT_TAG>\x00F\x00I\x00L\x00E MKTAGACTION(FILE)
134 <EXPECT_TAG>\x00F\x00O\x00R\x00M MKTAGACTION(FORM)
135 <EXPECT_TAG>\x00G\x00E\x00D\x00C MKTAGACTION(GEDC)
136 <EXPECT_TAG>\x00G\x00I\x00V\x00N MKTAGACTION(GIVN)
137 <EXPECT_TAG>\x00G\x00R\x00A\x00D MKTAGACTION(GRAD)
138 <EXPECT_TAG>\x00H\x00E\x00A\x00D MKTAGACTION(HEAD)
139 <EXPECT_TAG>\x00H\x00U\x00S\x00B MKTAGACTION(HUSB)
140 <EXPECT_TAG>\x00I\x00D\x00N\x00O MKTAGACTION(IDNO)
141 <EXPECT_TAG>\x00I\x00M\x00M\x00I MKTAGACTION(IMMI)
142 <EXPECT_TAG>\x00I\x00N\x00D\x00I MKTAGACTION(INDI)
143 <EXPECT_TAG>\x00L\x00A\x00N\x00G MKTAGACTION(LANG)
144 <EXPECT_TAG>\x00L\x00E\x00G\x00A MKTAGACTION(LEGA)
145 <EXPECT_TAG>\x00M\x00A\x00R\x00B MKTAGACTION(MARB)
146 <EXPECT_TAG>\x00M\x00A\x00R\x00C MKTAGACTION(MARC)
147 <EXPECT_TAG>\x00M\x00A\x00R\x00L MKTAGACTION(MARL)
148 <EXPECT_TAG>\x00M\x00A\x00R\x00R MKTAGACTION(MARR)
149 <EXPECT_TAG>\x00M\x00A\x00R\x00S MKTAGACTION(MARS)
150 <EXPECT_TAG>\x00M\x00E\x00D\x00I MKTAGACTION(MEDI)
151 <EXPECT_TAG>\x00N\x00A\x00M\x00E MKTAGACTION(NAME)
152 <EXPECT_TAG>\x00N\x00A\x00T\x00I MKTAGACTION(NATI)
153 <EXPECT_TAG>\x00N\x00A\x00T\x00U MKTAGACTION(NATU)
154 <EXPECT_TAG>\x00N\x00C\x00H\x00I MKTAGACTION(NCHI)
155 <EXPECT_TAG>\x00N\x00I\x00C\x00K MKTAGACTION(NICK)
156 <EXPECT_TAG>\x00N\x00M\x00R MKTAGACTION(NMR)
157 <EXPECT_TAG>\x00N\x00O\x00T\x00E MKTAGACTION(NOTE)
158 <EXPECT_TAG>\x00N\x00P\x00F\x00X MKTAGACTION(NPFX)
159 <EXPECT_TAG>\x00N\x00S\x00F\x00X MKTAGACTION(NSFX)
160 <EXPECT_TAG>\x00O\x00B\x00J\x00E MKTAGACTION(OBJE)
161 <EXPECT_TAG>\x00O\x00C\x00C\x00U MKTAGACTION(OCCU)
162 <EXPECT_TAG>\x00O\x00R\x00D\x00I MKTAGACTION(ORDI)
163 <EXPECT_TAG>\x00O\x00R\x00D\x00N MKTAGACTION(ORDN)
164 <EXPECT_TAG>\x00P\x00A\x00G\x00E MKTAGACTION(PAGE)
165 <EXPECT_TAG>\x00P\x00E\x00D\x00I MKTAGACTION(PEDI)
166 <EXPECT_TAG>\x00P\x00H\x00O\x00N MKTAGACTION(PHON)
167 <EXPECT_TAG>\x00P\x00L\x00A\x00C MKTAGACTION(PLAC)
168 <EXPECT_TAG>\x00P\x00O\x00S\x00T MKTAGACTION(POST)
169 <EXPECT_TAG>\x00P\x00R\x00O\x00B MKTAGACTION(PROB)
170 <EXPECT_TAG>\x00P\x00R\x00O\x00P MKTAGACTION(PROP)
171 <EXPECT_TAG>\x00P\x00U\x00B\x00L MKTAGACTION(PUBL)
172 <EXPECT_TAG>\x00Q\x00U\x00A\x00Y MKTAGACTION(QUAY)
173 <EXPECT_TAG>\x00R\x00E\x00F\x00N MKTAGACTION(REFN)
174 <EXPECT_TAG>\x00R\x00E\x00L\x00A MKTAGACTION(RELA)
175 <EXPECT_TAG>\x00R\x00E\x00L\x00I MKTAGACTION(RELI)
176 <EXPECT_TAG>\x00R\x00E\x00P\x00O MKTAGACTION(REPO)
177 <EXPECT_TAG>\x00R\x00E\x00S\x00I MKTAGACTION(RESI)
178 <EXPECT_TAG>\x00R\x00E\x00S\x00N MKTAGACTION(RESN)
179 <EXPECT_TAG>\x00R\x00E\x00T\x00I MKTAGACTION(RETI)
180 <EXPECT_TAG>\x00R\x00F\x00N MKTAGACTION(RFN)
181 <EXPECT_TAG>\x00R\x00I\x00N MKTAGACTION(RIN)
182 <EXPECT_TAG>\x00R\x00O\x00L\x00E MKTAGACTION(ROLE)
183 <EXPECT_TAG>\x00S\x00E\x00X MKTAGACTION(SEX)
184 <EXPECT_TAG>\x00S\x00L\x00G\x00C MKTAGACTION(SLGC)
185 <EXPECT_TAG>\x00S\x00L\x00G\x00S MKTAGACTION(SLGS)
186 <EXPECT_TAG>\x00S\x00O\x00U\x00R MKTAGACTION(SOUR)
187 <EXPECT_TAG>\x00S\x00P\x00F\x00X MKTAGACTION(SPFX)
188 <EXPECT_TAG>\x00S\x00S\x00N MKTAGACTION(SSN)
189 <EXPECT_TAG>\x00S\x00T\x00A\x00E MKTAGACTION(STAE)
190 <EXPECT_TAG>\x00S\x00T\x00A\x00T MKTAGACTION(STAT)
191 <EXPECT_TAG>\x00S\x00U\x00B\x00M MKTAGACTION(SUBM)
192 <EXPECT_TAG>\x00S\x00U\x00B\x00N MKTAGACTION(SUBN)
193 <EXPECT_TAG>\x00S\x00U\x00R\x00N MKTAGACTION(SURN)
194 <EXPECT_TAG>\x00T\x00E\x00M\x00P MKTAGACTION(TEMP)
195 <EXPECT_TAG>\x00T\x00E\x00X\x00T MKTAGACTION(TEXT)
196 <EXPECT_TAG>\x00T\x00I\x00M\x00E MKTAGACTION(TIME)
197 <EXPECT_TAG>\x00T\x00I\x00T\x00L MKTAGACTION(TITL)
198 <EXPECT_TAG>\x00T\x00R\x00L\x00R MKTAGACTION(TRLR)
199 <EXPECT_TAG>\x00T\x00Y\x00P\x00E MKTAGACTION(TYPE)
200 <EXPECT_TAG>\x00V\x00E\x00R\x00S MKTAGACTION(VERS)
201 <EXPECT_TAG>\x00W\x00I\x00F\x00E MKTAGACTION(WIFE)
202 <EXPECT_TAG>\x00W\x00I\x00L\x00L MKTAGACTION(WILL)
204 <EXPECT_TAG>{alphanum}+ ACTION_ALPHANUM
208 {any_but_delim} ACTION_ANY
210 {escape}/{non_at} ACTION_ESCAPE
212 {pointer} ACTION_POINTER
214 {gen_delim}*{terminator} ACTION_TERMINATOR
218 {normal_at} ACTION_NORMAL_AT
224 #define LEX_SECTION 3 /* include only a specific part of the following file */
225 #include "gedcom_lex_common.c"
230 return gedcom_hilo_lex();
235 return test_loop(TWO_BYTE_HILO, "UNICODE");