1 /* Lexer for Unicode encoding (low-high order) of Gedcom.
2 Copyright (C) 2001 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 /* In low-high order, a space is encoded as 0x20 0x00 */
25 /* i.e. this is utf-16-le */
28 #undef IN_LEX /* include only a specific part of the following file */
29 #include "gedcom_lex_common.c"
31 static size_t encoding_width = 2;
43 otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]\x00|[\x00-\xFF][\x01-\xFF]
44 terminator \x0D\x00|\x0A\x00|\x0D\x00\x0A\x00|\x0A\x00\x0D\x00
46 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
47 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
48 non_at {alpha}|{digit}|{otherchar}|{delim}|{hash}
49 alphanum {alpha}|{digit}
50 gen_delim {delim}|{tab}
52 escape @\x00#\x00{any_char}+@\x00
53 pointer @\x00{alphanum}{non_at}+@\x00
58 #define IN_LEX /* include only a specific part of the following file */
59 #include "gedcom_lex_common.c"
65 <INITIAL>{gen_delim}* ACTION_INITIAL_WHITESPACE
67 <INITIAL>\x00[0]{digit}+ ACTION_0_DIGITS
69 <INITIAL>{digit}+ ACTION_DIGITS
71 <EXPECT_TAG>A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR)
72 <EXPECT_TAG>A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR)
73 <EXPECT_TAG>A\x00D\x00R\x001\x00 MKTAGACTION(ADR1)
74 <EXPECT_TAG>A\x00D\x00R\x002\x00 MKTAGACTION(ADR2)
75 <EXPECT_TAG>A\x00D\x00O\x00P\x00 MKTAGACTION(ADOP)
76 <EXPECT_TAG>A\x00F\x00N\x00 MKTAGACTION(AFN)
77 <EXPECT_TAG>A\x00G\x00E\x00 MKTAGACTION(AGE)
78 <EXPECT_TAG>A\x00G\x00N\x00C\x00 MKTAGACTION(AGNC)
79 <EXPECT_TAG>A\x00L\x00I\x00A\x00 MKTAGACTION(ALIA)
80 <EXPECT_TAG>A\x00N\x00C\x00E\x00 MKTAGACTION(ANCE)
81 <EXPECT_TAG>A\x00N\x00C\x00I\x00 MKTAGACTION(ANCI)
82 <EXPECT_TAG>A\x00N\x00U\x00L\x00 MKTAGACTION(ANUL)
83 <EXPECT_TAG>A\x00S\x00S\x00O\x00 MKTAGACTION(ASSO)
84 <EXPECT_TAG>A\x00U\x00T\x00H\x00 MKTAGACTION(AUTH)
85 <EXPECT_TAG>B\x00A\x00P\x00L\x00 MKTAGACTION(BAPL)
86 <EXPECT_TAG>B\x00A\x00P\x00M\x00 MKTAGACTION(BAPM)
87 <EXPECT_TAG>B\x00A\x00R\x00M\x00 MKTAGACTION(BARM)
88 <EXPECT_TAG>B\x00A\x00S\x00M\x00 MKTAGACTION(BASM)
89 <EXPECT_TAG>B\x00I\x00R\x00T\x00 MKTAGACTION(BIRT)
90 <EXPECT_TAG>B\x00L\x00E\x00S\x00 MKTAGACTION(BLES)
91 <EXPECT_TAG>B\x00L\x00O\x00B\x00 MKTAGACTION(BLOB)
92 <EXPECT_TAG>B\x00U\x00R\x00I\x00 MKTAGACTION(BURI)
93 <EXPECT_TAG>C\x00A\x00L\x00N\x00 MKTAGACTION(CALN)
94 <EXPECT_TAG>C\x00A\x00S\x00T\x00 MKTAGACTION(CAST)
95 <EXPECT_TAG>C\x00A\x00U\x00S\x00 MKTAGACTION(CAUS)
96 <EXPECT_TAG>C\x00E\x00N\x00S\x00 MKTAGACTION(CENS)
97 <EXPECT_TAG>C\x00H\x00A\x00N\x00 MKTAGACTION(CHAN)
98 <EXPECT_TAG>C\x00H\x00A\x00R\x00 MKTAGACTION(CHAR)
99 <EXPECT_TAG>C\x00H\x00I\x00L\x00 MKTAGACTION(CHIL)
100 <EXPECT_TAG>C\x00H\x00R\x00 MKTAGACTION(CHR)
101 <EXPECT_TAG>C\x00H\x00R\x00A\x00 MKTAGACTION(CHRA)
102 <EXPECT_TAG>C\x00I\x00T\x00Y\x00 MKTAGACTION(CITY)
103 <EXPECT_TAG>C\x00O\x00N\x00C\x00 MKTAGACTION(CONC)
104 <EXPECT_TAG>C\x00O\x00N\x00F\x00 MKTAGACTION(CONF)
105 <EXPECT_TAG>C\x00O\x00N\x00L\x00 MKTAGACTION(CONL)
106 <EXPECT_TAG>C\x00O\x00N\x00T\x00 MKTAGACTION(CONT)
107 <EXPECT_TAG>C\x00O\x00P\x00R\x00 MKTAGACTION(COPR)
108 <EXPECT_TAG>C\x00O\x00R\x00P\x00 MKTAGACTION(CORP)
109 <EXPECT_TAG>C\x00R\x00E\x00M\x00 MKTAGACTION(CREM)
110 <EXPECT_TAG>C\x00T\x00R\x00Y\x00 MKTAGACTION(CTRY)
111 <EXPECT_TAG>D\x00A\x00T\x00A\x00 MKTAGACTION(DATA)
112 <EXPECT_TAG>D\x00A\x00T\x00E\x00 MKTAGACTION(DATE)
113 <EXPECT_TAG>D\x00E\x00A\x00T\x00 MKTAGACTION(DEAT)
114 <EXPECT_TAG>D\x00E\x00S\x00C\x00 MKTAGACTION(DESC)
115 <EXPECT_TAG>D\x00E\x00S\x00I\x00 MKTAGACTION(DESI)
116 <EXPECT_TAG>D\x00E\x00S\x00T\x00 MKTAGACTION(DEST)
117 <EXPECT_TAG>D\x00I\x00V\x00 MKTAGACTION(DIV)
118 <EXPECT_TAG>D\x00I\x00V\x00F\x00 MKTAGACTION(DIVF)
119 <EXPECT_TAG>D\x00S\x00C\x00R\x00 MKTAGACTION(DSCR)
120 <EXPECT_TAG>E\x00D\x00U\x00C\x00 MKTAGACTION(EDUC)
121 <EXPECT_TAG>E\x00M\x00I\x00G\x00 MKTAGACTION(EMIG)
122 <EXPECT_TAG>E\x00N\x00D\x00L\x00 MKTAGACTION(ENDL)
123 <EXPECT_TAG>E\x00N\x00G\x00A\x00 MKTAGACTION(ENGA)
124 <EXPECT_TAG>E\x00V\x00E\x00N\x00 MKTAGACTION(EVEN)
125 <EXPECT_TAG>F\x00A\x00M\x00 MKTAGACTION(FAM)
126 <EXPECT_TAG>F\x00A\x00M\x00C\x00 MKTAGACTION(FAMC)
127 <EXPECT_TAG>F\x00A\x00M\x00F\x00 MKTAGACTION(FAMF)
128 <EXPECT_TAG>F\x00A\x00M\x00S\x00 MKTAGACTION(FAMS)
129 <EXPECT_TAG>F\x00C\x00O\x00M\x00 MKTAGACTION(FCOM)
130 <EXPECT_TAG>F\x00I\x00L\x00E\x00 MKTAGACTION(FILE)
131 <EXPECT_TAG>F\x00O\x00R\x00M\x00 MKTAGACTION(FORM)
132 <EXPECT_TAG>G\x00E\x00D\x00C\x00 MKTAGACTION(GEDC)
133 <EXPECT_TAG>G\x00I\x00V\x00N\x00 MKTAGACTION(GIVN)
134 <EXPECT_TAG>G\x00R\x00A\x00D\x00 MKTAGACTION(GRAD)
135 <EXPECT_TAG>H\x00E\x00A\x00D\x00 MKTAGACTION(HEAD)
136 <EXPECT_TAG>H\x00U\x00S\x00B\x00 MKTAGACTION(HUSB)
137 <EXPECT_TAG>I\x00D\x00N\x00O\x00 MKTAGACTION(IDNO)
138 <EXPECT_TAG>I\x00M\x00M\x00I\x00 MKTAGACTION(IMMI)
139 <EXPECT_TAG>I\x00N\x00D\x00I\x00 MKTAGACTION(INDI)
140 <EXPECT_TAG>L\x00A\x00N\x00G\x00 MKTAGACTION(LANG)
141 <EXPECT_TAG>L\x00E\x00G\x00A\x00 MKTAGACTION(LEGA)
142 <EXPECT_TAG>M\x00A\x00R\x00B\x00 MKTAGACTION(MARB)
143 <EXPECT_TAG>M\x00A\x00R\x00C\x00 MKTAGACTION(MARC)
144 <EXPECT_TAG>M\x00A\x00R\x00L\x00 MKTAGACTION(MARL)
145 <EXPECT_TAG>M\x00A\x00R\x00R\x00 MKTAGACTION(MARR)
146 <EXPECT_TAG>M\x00A\x00R\x00S\x00 MKTAGACTION(MARS)
147 <EXPECT_TAG>M\x00E\x00D\x00I\x00 MKTAGACTION(MEDI)
148 <EXPECT_TAG>N\x00A\x00M\x00E\x00 MKTAGACTION(NAME)
149 <EXPECT_TAG>N\x00A\x00T\x00I\x00 MKTAGACTION(NATI)
150 <EXPECT_TAG>N\x00A\x00T\x00U\x00 MKTAGACTION(NATU)
151 <EXPECT_TAG>N\x00C\x00H\x00I\x00 MKTAGACTION(NCHI)
152 <EXPECT_TAG>N\x00I\x00C\x00K\x00 MKTAGACTION(NICK)
153 <EXPECT_TAG>N\x00M\x00R\x00 MKTAGACTION(NMR)
154 <EXPECT_TAG>N\x00O\x00T\x00E\x00 MKTAGACTION(NOTE)
155 <EXPECT_TAG>N\x00P\x00F\x00X\x00 MKTAGACTION(NPFX)
156 <EXPECT_TAG>N\x00S\x00F\x00X\x00 MKTAGACTION(NSFX)
157 <EXPECT_TAG>O\x00B\x00J\x00E\x00 MKTAGACTION(OBJE)
158 <EXPECT_TAG>O\x00C\x00C\x00U\x00 MKTAGACTION(OCCU)
159 <EXPECT_TAG>O\x00R\x00D\x00I\x00 MKTAGACTION(ORDI)
160 <EXPECT_TAG>O\x00R\x00D\x00N\x00 MKTAGACTION(ORDN)
161 <EXPECT_TAG>P\x00A\x00G\x00E\x00 MKTAGACTION(PAGE)
162 <EXPECT_TAG>P\x00E\x00D\x00I\x00 MKTAGACTION(PEDI)
163 <EXPECT_TAG>P\x00H\x00O\x00N\x00 MKTAGACTION(PHON)
164 <EXPECT_TAG>P\x00L\x00A\x00C\x00 MKTAGACTION(PLAC)
165 <EXPECT_TAG>P\x00O\x00S\x00T\x00 MKTAGACTION(POST)
166 <EXPECT_TAG>P\x00R\x00O\x00B\x00 MKTAGACTION(PROB)
167 <EXPECT_TAG>P\x00R\x00O\x00P\x00 MKTAGACTION(PROP)
168 <EXPECT_TAG>P\x00U\x00B\x00L\x00 MKTAGACTION(PUBL)
169 <EXPECT_TAG>Q\x00U\x00A\x00Y\x00 MKTAGACTION(QUAY)
170 <EXPECT_TAG>R\x00E\x00F\x00N\x00 MKTAGACTION(REFN)
171 <EXPECT_TAG>R\x00E\x00L\x00A\x00 MKTAGACTION(RELA)
172 <EXPECT_TAG>R\x00E\x00L\x00I\x00 MKTAGACTION(RELI)
173 <EXPECT_TAG>R\x00E\x00P\x00O\x00 MKTAGACTION(REPO)
174 <EXPECT_TAG>R\x00E\x00S\x00I\x00 MKTAGACTION(RESI)
175 <EXPECT_TAG>R\x00E\x00S\x00N\x00 MKTAGACTION(RESN)
176 <EXPECT_TAG>R\x00E\x00T\x00I\x00 MKTAGACTION(RETI)
177 <EXPECT_TAG>R\x00F\x00N\x00 MKTAGACTION(RFN)
178 <EXPECT_TAG>R\x00I\x00N\x00 MKTAGACTION(RIN)
179 <EXPECT_TAG>R\x00O\x00L\x00E\x00 MKTAGACTION(ROLE)
180 <EXPECT_TAG>S\x00E\x00X\x00 MKTAGACTION(SEX)
181 <EXPECT_TAG>S\x00L\x00G\x00C\x00 MKTAGACTION(SLGC)
182 <EXPECT_TAG>S\x00L\x00G\x00S\x00 MKTAGACTION(SLGS)
183 <EXPECT_TAG>S\x00O\x00U\x00R\x00 MKTAGACTION(SOUR)
184 <EXPECT_TAG>S\x00P\x00F\x00X\x00 MKTAGACTION(SPFX)
185 <EXPECT_TAG>S\x00S\x00N\x00 MKTAGACTION(SSN)
186 <EXPECT_TAG>S\x00T\x00A\x00E\x00 MKTAGACTION(STAE)
187 <EXPECT_TAG>S\x00T\x00A\x00T\x00 MKTAGACTION(STAT)
188 <EXPECT_TAG>S\x00U\x00B\x00M\x00 MKTAGACTION(SUBM)
189 <EXPECT_TAG>S\x00U\x00B\x00N\x00 MKTAGACTION(SUBN)
190 <EXPECT_TAG>S\x00U\x00R\x00N\x00 MKTAGACTION(SURN)
191 <EXPECT_TAG>T\x00E\x00M\x00P\x00 MKTAGACTION(TEMP)
192 <EXPECT_TAG>T\x00E\x00X\x00T\x00 MKTAGACTION(TEXT)
193 <EXPECT_TAG>T\x00I\x00M\x00E\x00 MKTAGACTION(TIME)
194 <EXPECT_TAG>T\x00I\x00T\x00L\x00 MKTAGACTION(TITL)
195 <EXPECT_TAG>T\x00R\x00L\x00R\x00 MKTAGACTION(TRLR)
196 <EXPECT_TAG>T\x00Y\x00P\x00E\x00 MKTAGACTION(TYPE)
197 <EXPECT_TAG>V\x00E\x00R\x00S\x00 MKTAGACTION(VERS)
198 <EXPECT_TAG>W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE)
199 <EXPECT_TAG>W\x00I\x00L\x00L\x00 MKTAGACTION(WILL)
201 <EXPECT_TAG>{alphanum}+ ACTION_ALPHANUM
205 {any_but_delim} ACTION_ANY
207 {escape}/{non_at} ACTION_ESCAPE
209 {pointer} ACTION_POINTER
211 {gen_delim}*{terminator} ACTION_TERMINATOR
227 return gedcom_lohi_lex();
232 return test_loop(TWO_BYTE_LOHI, "UNICODE");