/* Lexer for Unicode encoding (low-high order) of Gedcom. Copyright (C) 2001 The Genes Development Team This file is part of the Gedcom parser library. Contributed by Peter Verthez , 2001. The Gedcom parser library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The Gedcom parser library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the Gedcom parser library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* $Id$ */ /* $Name$ */ /* In low-high order, a space is encoded as 0x20 0x00 */ /* i.e. this is utf-16-le */ %{ #define LEX_SECTION 1 /* include only a specific part of the following file */ #define yymyinit gedcom_lohi_myinit #include "gedcom_lex_common.c" static size_t encoding_width = 2; %} %s NORMAL %s EXPECT_TAG alpha [A-Za-z_]\x00 digit [0-9]\x00 delim \x20\x00 tab [\t]\x00 hash #\x00 literal_at @\x00@\x00 normal_at @\x00 otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]\x00|[\x00-\xFF][\x01-\xFF] terminator \x0D\x00|\x0A\x00|\x0D\x00\x0A\x00|\x0A\x00\x0D\x00 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at} any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at} non_at {alpha}|{digit}|{otherchar}|{delim}|{hash} alphanum {alpha}|{digit} gen_delim {delim}|{tab} escape @\x00#\x00{any_char}+@\x00 pointer @\x00{alphanum}{non_at}*@\x00 %% %{ #undef LEX_SECTION #define LEX_SECTION 2 /* include only a specific part of the following file */ #include "gedcom_lex_common.c" ACTION_BEFORE_REGEXPS %} {gen_delim}* ACTION_INITIAL_WHITESPACE \x00[0]{digit}+ ACTION_0_DIGITS {digit}+ ACTION_DIGITS A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR) A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR) A\x00D\x00R\x001\x00 MKTAGACTION(ADR1) A\x00D\x00R\x002\x00 MKTAGACTION(ADR2) A\x00D\x00O\x00P\x00 MKTAGACTION(ADOP) A\x00F\x00N\x00 MKTAGACTION(AFN) A\x00G\x00E\x00 MKTAGACTION(AGE) A\x00G\x00N\x00C\x00 MKTAGACTION(AGNC) A\x00L\x00I\x00A\x00 MKTAGACTION(ALIA) A\x00N\x00C\x00E\x00 MKTAGACTION(ANCE) A\x00N\x00C\x00I\x00 MKTAGACTION(ANCI) A\x00N\x00U\x00L\x00 MKTAGACTION(ANUL) A\x00S\x00S\x00O\x00 MKTAGACTION(ASSO) A\x00U\x00T\x00H\x00 MKTAGACTION(AUTH) B\x00A\x00P\x00L\x00 MKTAGACTION(BAPL) B\x00A\x00P\x00M\x00 MKTAGACTION(BAPM) B\x00A\x00R\x00M\x00 MKTAGACTION(BARM) B\x00A\x00S\x00M\x00 MKTAGACTION(BASM) B\x00I\x00R\x00T\x00 MKTAGACTION(BIRT) B\x00L\x00E\x00S\x00 MKTAGACTION(BLES) B\x00L\x00O\x00B\x00 MKTAGACTION(BLOB) B\x00U\x00R\x00I\x00 MKTAGACTION(BURI) C\x00A\x00L\x00N\x00 MKTAGACTION(CALN) C\x00A\x00S\x00T\x00 MKTAGACTION(CAST) C\x00A\x00U\x00S\x00 MKTAGACTION(CAUS) C\x00E\x00N\x00S\x00 MKTAGACTION(CENS) C\x00H\x00A\x00N\x00 MKTAGACTION(CHAN) C\x00H\x00A\x00R\x00 MKTAGACTION(CHAR) C\x00H\x00I\x00L\x00 MKTAGACTION(CHIL) C\x00H\x00R\x00 MKTAGACTION(CHR) C\x00H\x00R\x00A\x00 MKTAGACTION(CHRA) C\x00I\x00T\x00Y\x00 MKTAGACTION(CITY) C\x00O\x00N\x00C\x00 MKTAGACTION(CONC) C\x00O\x00N\x00F\x00 MKTAGACTION(CONF) C\x00O\x00N\x00L\x00 MKTAGACTION(CONL) C\x00O\x00N\x00T\x00 MKTAGACTION(CONT) C\x00O\x00P\x00R\x00 MKTAGACTION(COPR) C\x00O\x00R\x00P\x00 MKTAGACTION(CORP) C\x00R\x00E\x00M\x00 MKTAGACTION(CREM) C\x00T\x00R\x00Y\x00 MKTAGACTION(CTRY) D\x00A\x00T\x00A\x00 MKTAGACTION(DATA) D\x00A\x00T\x00E\x00 MKTAGACTION(DATE) D\x00E\x00A\x00T\x00 MKTAGACTION(DEAT) D\x00E\x00S\x00C\x00 MKTAGACTION(DESC) D\x00E\x00S\x00I\x00 MKTAGACTION(DESI) D\x00E\x00S\x00T\x00 MKTAGACTION(DEST) D\x00I\x00V\x00 MKTAGACTION(DIV) D\x00I\x00V\x00F\x00 MKTAGACTION(DIVF) D\x00S\x00C\x00R\x00 MKTAGACTION(DSCR) E\x00D\x00U\x00C\x00 MKTAGACTION(EDUC) E\x00M\x00I\x00G\x00 MKTAGACTION(EMIG) E\x00N\x00D\x00L\x00 MKTAGACTION(ENDL) E\x00N\x00G\x00A\x00 MKTAGACTION(ENGA) E\x00V\x00E\x00N\x00 MKTAGACTION(EVEN) F\x00A\x00M\x00 MKTAGACTION(FAM) F\x00A\x00M\x00C\x00 MKTAGACTION(FAMC) F\x00A\x00M\x00F\x00 MKTAGACTION(FAMF) F\x00A\x00M\x00S\x00 MKTAGACTION(FAMS) F\x00C\x00O\x00M\x00 MKTAGACTION(FCOM) F\x00I\x00L\x00E\x00 MKTAGACTION(FILE) F\x00O\x00R\x00M\x00 MKTAGACTION(FORM) G\x00E\x00D\x00C\x00 MKTAGACTION(GEDC) G\x00I\x00V\x00N\x00 MKTAGACTION(GIVN) G\x00R\x00A\x00D\x00 MKTAGACTION(GRAD) H\x00E\x00A\x00D\x00 MKTAGACTION(HEAD) H\x00U\x00S\x00B\x00 MKTAGACTION(HUSB) I\x00D\x00N\x00O\x00 MKTAGACTION(IDNO) I\x00M\x00M\x00I\x00 MKTAGACTION(IMMI) I\x00N\x00D\x00I\x00 MKTAGACTION(INDI) L\x00A\x00N\x00G\x00 MKTAGACTION(LANG) L\x00E\x00G\x00A\x00 MKTAGACTION(LEGA) M\x00A\x00R\x00B\x00 MKTAGACTION(MARB) M\x00A\x00R\x00C\x00 MKTAGACTION(MARC) M\x00A\x00R\x00L\x00 MKTAGACTION(MARL) M\x00A\x00R\x00R\x00 MKTAGACTION(MARR) M\x00A\x00R\x00S\x00 MKTAGACTION(MARS) M\x00E\x00D\x00I\x00 MKTAGACTION(MEDI) N\x00A\x00M\x00E\x00 MKTAGACTION(NAME) N\x00A\x00T\x00I\x00 MKTAGACTION(NATI) N\x00A\x00T\x00U\x00 MKTAGACTION(NATU) N\x00C\x00H\x00I\x00 MKTAGACTION(NCHI) N\x00I\x00C\x00K\x00 MKTAGACTION(NICK) N\x00M\x00R\x00 MKTAGACTION(NMR) N\x00O\x00T\x00E\x00 MKTAGACTION(NOTE) N\x00P\x00F\x00X\x00 MKTAGACTION(NPFX) N\x00S\x00F\x00X\x00 MKTAGACTION(NSFX) O\x00B\x00J\x00E\x00 MKTAGACTION(OBJE) O\x00C\x00C\x00U\x00 MKTAGACTION(OCCU) O\x00R\x00D\x00I\x00 MKTAGACTION(ORDI) O\x00R\x00D\x00N\x00 MKTAGACTION(ORDN) P\x00A\x00G\x00E\x00 MKTAGACTION(PAGE) P\x00E\x00D\x00I\x00 MKTAGACTION(PEDI) P\x00H\x00O\x00N\x00 MKTAGACTION(PHON) P\x00L\x00A\x00C\x00 MKTAGACTION(PLAC) P\x00O\x00S\x00T\x00 MKTAGACTION(POST) P\x00R\x00O\x00B\x00 MKTAGACTION(PROB) P\x00R\x00O\x00P\x00 MKTAGACTION(PROP) P\x00U\x00B\x00L\x00 MKTAGACTION(PUBL) Q\x00U\x00A\x00Y\x00 MKTAGACTION(QUAY) R\x00E\x00F\x00N\x00 MKTAGACTION(REFN) R\x00E\x00L\x00A\x00 MKTAGACTION(RELA) R\x00E\x00L\x00I\x00 MKTAGACTION(RELI) R\x00E\x00P\x00O\x00 MKTAGACTION(REPO) R\x00E\x00S\x00I\x00 MKTAGACTION(RESI) R\x00E\x00S\x00N\x00 MKTAGACTION(RESN) R\x00E\x00T\x00I\x00 MKTAGACTION(RETI) R\x00F\x00N\x00 MKTAGACTION(RFN) R\x00I\x00N\x00 MKTAGACTION(RIN) R\x00O\x00L\x00E\x00 MKTAGACTION(ROLE) S\x00E\x00X\x00 MKTAGACTION(SEX) S\x00L\x00G\x00C\x00 MKTAGACTION(SLGC) S\x00L\x00G\x00S\x00 MKTAGACTION(SLGS) S\x00O\x00U\x00R\x00 MKTAGACTION(SOUR) S\x00P\x00F\x00X\x00 MKTAGACTION(SPFX) S\x00S\x00N\x00 MKTAGACTION(SSN) S\x00T\x00A\x00E\x00 MKTAGACTION(STAE) S\x00T\x00A\x00T\x00 MKTAGACTION(STAT) S\x00U\x00B\x00M\x00 MKTAGACTION(SUBM) S\x00U\x00B\x00N\x00 MKTAGACTION(SUBN) S\x00U\x00R\x00N\x00 MKTAGACTION(SURN) T\x00E\x00M\x00P\x00 MKTAGACTION(TEMP) T\x00E\x00X\x00T\x00 MKTAGACTION(TEXT) T\x00I\x00M\x00E\x00 MKTAGACTION(TIME) T\x00I\x00T\x00L\x00 MKTAGACTION(TITL) T\x00R\x00L\x00R\x00 MKTAGACTION(TRLR) T\x00Y\x00P\x00E\x00 MKTAGACTION(TYPE) V\x00E\x00R\x00S\x00 MKTAGACTION(VERS) W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE) W\x00I\x00L\x00L\x00 MKTAGACTION(WILL) {alphanum}+ ACTION_ALPHANUM {delim} ACTION_DELIM {any_but_delim} ACTION_ANY {escape}/{non_at} ACTION_ESCAPE {pointer} ACTION_POINTER {gen_delim}*{terminator} ACTION_TERMINATOR <> ACTION_EOF {normal_at} ACTION_NORMAL_AT {tab} ACTION_TAB . ACTION_UNEXPECTED %% #undef LEX_SECTION #define LEX_SECTION 3 /* include only a specific part of the following file */ #include "gedcom_lex_common.c" #ifdef LEXER_TEST int gedcom_lex() { return gedcom_lohi_lex(); } int main() { return test_loop(TWO_BYTE_LOHI, "UNICODE"); } #endif