Copied from old documentation. Removed all Gedcom_val details.
[gedcom-parse.git] / gedcom / gedcom_1byte.lex
1 /* Lexer for 1-byte encoding of Gedcom.
2    Copyright (C) 2001 The Genes Development Team
3    This file is part of the Gedcom parser library.
4    Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
5
6    The Gedcom parser library is free software; you can redistribute it
7    and/or modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The Gedcom parser library is distributed in the hope that it will be
12    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the Gedcom parser library; if not, write to the
18    Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 /* $Id$ */
22 /* $Name$ */
23
24 %{
25 #define LEX_SECTION 1  /* include only a specific part of the following file */
26 #define yymyinit gedcom_1byte_myinit
27 #include "gedcom_lex_common.c"
28
29 static size_t encoding_width = 1;
30 %}
31
32 %s NORMAL
33 %s EXPECT_TAG
34
35 alpha        [A-Za-z_]
36 digit        [0-9]
37 delim        " "
38 tab          [\t]
39 hash         #
40 literal_at   @@
41 normal_at    @
42 otherchar    [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFE]
43 terminator   \x0D|\x0A|\x0D\x0A|\x0A\x0D
44
45 any_char     {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
46 any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
47 non_at       {alpha}|{digit}|{otherchar}|{delim}|{hash}
48 alphanum     {alpha}|{digit}
49 gen_delim    {delim}|{tab}
50
51 escape       @#{any_char}+@
52 pointer      @{alphanum}{non_at}*@
53
54 %%
55
56 %{
57 #undef LEX_SECTION
58 #define LEX_SECTION 2  /* include only a specific part of the following file */
59 #include "gedcom_lex_common.c"
60
61 ACTION_BEFORE_REGEXPS
62   
63 %}
64
65 <INITIAL>{gen_delim}* ACTION_INITIAL_WHITESPACE
66
67 <INITIAL>0{digit}+    ACTION_0_DIGITS
68
69 <INITIAL>{digit}+     ACTION_DIGITS
70
71 <EXPECT_TAG>ABBR  MKTAGACTION(ABBR)
72 <EXPECT_TAG>ADDR  MKTAGACTION(ADDR)
73 <EXPECT_TAG>ADR1  MKTAGACTION(ADR1)
74 <EXPECT_TAG>ADR2  MKTAGACTION(ADR2)
75 <EXPECT_TAG>ADOP  MKTAGACTION(ADOP)
76 <EXPECT_TAG>AFN   MKTAGACTION(AFN)
77 <EXPECT_TAG>AGE   MKTAGACTION(AGE)
78 <EXPECT_TAG>AGNC  MKTAGACTION(AGNC)
79 <EXPECT_TAG>ALIA  MKTAGACTION(ALIA)
80 <EXPECT_TAG>ANCE  MKTAGACTION(ANCE)
81 <EXPECT_TAG>ANCI  MKTAGACTION(ANCI)
82 <EXPECT_TAG>ANUL  MKTAGACTION(ANUL)
83 <EXPECT_TAG>ASSO  MKTAGACTION(ASSO)
84 <EXPECT_TAG>AUTH  MKTAGACTION(AUTH)
85 <EXPECT_TAG>BAPL  MKTAGACTION(BAPL)
86 <EXPECT_TAG>BAPM  MKTAGACTION(BAPM)
87 <EXPECT_TAG>BARM  MKTAGACTION(BARM)
88 <EXPECT_TAG>BASM  MKTAGACTION(BASM)
89 <EXPECT_TAG>BIRT  MKTAGACTION(BIRT)
90 <EXPECT_TAG>BLES  MKTAGACTION(BLES)
91 <EXPECT_TAG>BLOB  MKTAGACTION(BLOB)
92 <EXPECT_TAG>BURI  MKTAGACTION(BURI)
93 <EXPECT_TAG>CALN  MKTAGACTION(CALN)
94 <EXPECT_TAG>CAST  MKTAGACTION(CAST)
95 <EXPECT_TAG>CAUS  MKTAGACTION(CAUS)
96 <EXPECT_TAG>CENS  MKTAGACTION(CENS)
97 <EXPECT_TAG>CHAN  MKTAGACTION(CHAN)
98 <EXPECT_TAG>CHAR  MKTAGACTION(CHAR)
99 <EXPECT_TAG>CHIL  MKTAGACTION(CHIL)
100 <EXPECT_TAG>CHR   MKTAGACTION(CHR)
101 <EXPECT_TAG>CHRA  MKTAGACTION(CHRA)
102 <EXPECT_TAG>CITY  MKTAGACTION(CITY)
103 <EXPECT_TAG>CONC  MKTAGACTION(CONC)
104 <EXPECT_TAG>CONF  MKTAGACTION(CONF)
105 <EXPECT_TAG>CONL  MKTAGACTION(CONL)
106 <EXPECT_TAG>CONT  MKTAGACTION(CONT)
107 <EXPECT_TAG>COPR  MKTAGACTION(COPR)
108 <EXPECT_TAG>CORP  MKTAGACTION(CORP)
109 <EXPECT_TAG>CREM  MKTAGACTION(CREM)
110 <EXPECT_TAG>CTRY  MKTAGACTION(CTRY)
111 <EXPECT_TAG>DATA  MKTAGACTION(DATA)
112 <EXPECT_TAG>DATE  MKTAGACTION(DATE)
113 <EXPECT_TAG>DEAT  MKTAGACTION(DEAT)
114 <EXPECT_TAG>DESC  MKTAGACTION(DESC)
115 <EXPECT_TAG>DESI  MKTAGACTION(DESI)
116 <EXPECT_TAG>DEST  MKTAGACTION(DEST)
117 <EXPECT_TAG>DIV   MKTAGACTION(DIV)
118 <EXPECT_TAG>DIVF  MKTAGACTION(DIVF)
119 <EXPECT_TAG>DSCR  MKTAGACTION(DSCR)
120 <EXPECT_TAG>EDUC  MKTAGACTION(EDUC)
121 <EXPECT_TAG>EMIG  MKTAGACTION(EMIG)
122 <EXPECT_TAG>ENDL  MKTAGACTION(ENDL)
123 <EXPECT_TAG>ENGA  MKTAGACTION(ENGA)
124 <EXPECT_TAG>EVEN  MKTAGACTION(EVEN)
125 <EXPECT_TAG>FAM   MKTAGACTION(FAM)
126 <EXPECT_TAG>FAMC  MKTAGACTION(FAMC)
127 <EXPECT_TAG>FAMF  MKTAGACTION(FAMF)
128 <EXPECT_TAG>FAMS  MKTAGACTION(FAMS)
129 <EXPECT_TAG>FCOM  MKTAGACTION(FCOM)
130 <EXPECT_TAG>FILE  MKTAGACTION(FILE)
131 <EXPECT_TAG>FORM  MKTAGACTION(FORM)
132 <EXPECT_TAG>GEDC  MKTAGACTION(GEDC)
133 <EXPECT_TAG>GIVN  MKTAGACTION(GIVN)
134 <EXPECT_TAG>GRAD  MKTAGACTION(GRAD)
135 <EXPECT_TAG>HEAD  MKTAGACTION(HEAD)
136 <EXPECT_TAG>HUSB  MKTAGACTION(HUSB)
137 <EXPECT_TAG>IDNO  MKTAGACTION(IDNO)
138 <EXPECT_TAG>IMMI  MKTAGACTION(IMMI)
139 <EXPECT_TAG>INDI  MKTAGACTION(INDI)
140 <EXPECT_TAG>LANG  MKTAGACTION(LANG)
141 <EXPECT_TAG>LEGA  MKTAGACTION(LEGA)
142 <EXPECT_TAG>MARB  MKTAGACTION(MARB)
143 <EXPECT_TAG>MARC  MKTAGACTION(MARC)
144 <EXPECT_TAG>MARL  MKTAGACTION(MARL)
145 <EXPECT_TAG>MARR  MKTAGACTION(MARR)
146 <EXPECT_TAG>MARS  MKTAGACTION(MARS)
147 <EXPECT_TAG>MEDI  MKTAGACTION(MEDI)
148 <EXPECT_TAG>NAME  MKTAGACTION(NAME)
149 <EXPECT_TAG>NATI  MKTAGACTION(NATI)
150 <EXPECT_TAG>NATU  MKTAGACTION(NATU)
151 <EXPECT_TAG>NCHI  MKTAGACTION(NCHI)
152 <EXPECT_TAG>NICK  MKTAGACTION(NICK)
153 <EXPECT_TAG>NMR   MKTAGACTION(NMR)
154 <EXPECT_TAG>NOTE  MKTAGACTION(NOTE)
155 <EXPECT_TAG>NPFX  MKTAGACTION(NPFX)
156 <EXPECT_TAG>NSFX  MKTAGACTION(NSFX)
157 <EXPECT_TAG>OBJE  MKTAGACTION(OBJE)
158 <EXPECT_TAG>OCCU  MKTAGACTION(OCCU)
159 <EXPECT_TAG>ORDI  MKTAGACTION(ORDI)
160 <EXPECT_TAG>ORDN  MKTAGACTION(ORDN)
161 <EXPECT_TAG>PAGE  MKTAGACTION(PAGE)
162 <EXPECT_TAG>PEDI  MKTAGACTION(PEDI)
163 <EXPECT_TAG>PHON  MKTAGACTION(PHON)
164 <EXPECT_TAG>PLAC  MKTAGACTION(PLAC)
165 <EXPECT_TAG>POST  MKTAGACTION(POST)
166 <EXPECT_TAG>PROB  MKTAGACTION(PROB)
167 <EXPECT_TAG>PROP  MKTAGACTION(PROP)
168 <EXPECT_TAG>PUBL  MKTAGACTION(PUBL)
169 <EXPECT_TAG>QUAY  MKTAGACTION(QUAY)
170 <EXPECT_TAG>REFN  MKTAGACTION(REFN)
171 <EXPECT_TAG>RELA  MKTAGACTION(RELA)
172 <EXPECT_TAG>RELI  MKTAGACTION(RELI)
173 <EXPECT_TAG>REPO  MKTAGACTION(REPO)
174 <EXPECT_TAG>RESI  MKTAGACTION(RESI)
175 <EXPECT_TAG>RESN  MKTAGACTION(RESN)
176 <EXPECT_TAG>RETI  MKTAGACTION(RETI)
177 <EXPECT_TAG>RFN   MKTAGACTION(RFN)
178 <EXPECT_TAG>RIN   MKTAGACTION(RIN)
179 <EXPECT_TAG>ROLE  MKTAGACTION(ROLE)
180 <EXPECT_TAG>SEX   MKTAGACTION(SEX)
181 <EXPECT_TAG>SLGC  MKTAGACTION(SLGC)
182 <EXPECT_TAG>SLGS  MKTAGACTION(SLGS)
183 <EXPECT_TAG>SOUR  MKTAGACTION(SOUR)
184 <EXPECT_TAG>SPFX  MKTAGACTION(SPFX)
185 <EXPECT_TAG>SSN   MKTAGACTION(SSN)
186 <EXPECT_TAG>STAE  MKTAGACTION(STAE)
187 <EXPECT_TAG>STAT  MKTAGACTION(STAT)
188 <EXPECT_TAG>SUBM  MKTAGACTION(SUBM)
189 <EXPECT_TAG>SUBN  MKTAGACTION(SUBN)
190 <EXPECT_TAG>SURN  MKTAGACTION(SURN)
191 <EXPECT_TAG>TEMP  MKTAGACTION(TEMP)
192 <EXPECT_TAG>TEXT  MKTAGACTION(TEXT)
193 <EXPECT_TAG>TIME  MKTAGACTION(TIME)
194 <EXPECT_TAG>TITL  MKTAGACTION(TITL)
195 <EXPECT_TAG>TRLR  MKTAGACTION(TRLR)
196 <EXPECT_TAG>TYPE  MKTAGACTION(TYPE)
197 <EXPECT_TAG>VERS  MKTAGACTION(VERS)
198 <EXPECT_TAG>WIFE  MKTAGACTION(WIFE)
199 <EXPECT_TAG>WILL  MKTAGACTION(WILL)
200      
201 <EXPECT_TAG>{alphanum}+   ACTION_ALPHANUM
202
203 {delim}                   ACTION_DELIM
204
205 {any_but_delim}           ACTION_ANY
206
207 {escape}/{non_at}         ACTION_ESCAPE
208
209 {pointer}                 ACTION_POINTER
210
211 {gen_delim}*{terminator}  ACTION_TERMINATOR
212
213 <<EOF>>                   ACTION_EOF
214
215 {normal_at}               ACTION_NORMAL_AT
216
217 {tab}                     ACTION_TAB
218
219 .                         ACTION_UNEXPECTED
220
221 %%
222 #undef LEX_SECTION
223 #define LEX_SECTION 3  /* include only a specific part of the following file */
224 #include "gedcom_lex_common.c"
225
226 int gedcom_check_token(const char* str, ParseState state, int check_token)
227 {
228   int result = 0;
229   int token;
230   YY_BUFFER_STATE buffer;
231
232   yy_delete_buffer(YY_CURRENT_BUFFER);
233   buffer = yy_scan_string(str);
234
235   INIT_LINE_LEN;
236   if (state == STATE_NORMAL)
237     BEGIN(NORMAL);
238   else if (state == STATE_INITIAL)
239     BEGIN(INITIAL);
240   else if (state == STATE_EXPECT_TAG)
241     BEGIN(EXPECT_TAG);
242
243   /* Input is UTF-8 coming from the application, so bypass iconv */
244   dummy_conv = 1;
245   token = yylex();
246   if (token != check_token)
247     result = 1;
248   
249   if (token != 0) {
250     token = yylex();
251     if (token != 0)
252       result = 1;
253   }
254   dummy_conv = 0;
255   
256   yy_delete_buffer(buffer);
257   return result;
258 }
259
260 #ifdef LEXER_TEST
261 int gedcom_lex()
262 {
263   return gedcom_1byte_lex();
264 }
265
266 int main()
267 {
268   return test_loop(ONE_BYTE, "ASCII");
269 }
270 #endif