/* This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * (C) 2001 by The Genes Development Team Original author: Peter Verthez (Peter.Verthez@advalvas.be) */ /* $Id$ */ /* $Name$ */ /* In low-high order, a space is encoded as 0x20 0x00 */ /* i.e. this is utf-16-le */ %{ #include "gedcom.tab.h" #include "gedcom.h" #include "multilex.h" #include "encoding.h" #define YY_NO_UNPUT %} %s NORMAL %s EXPECT_TAG alpha [A-Za-z_]\x00 digit [0-9]\x00 delim \x20\x00 tab [\t]\x00 hash #\x00 literal_at @\x00@\x00 otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]\x00|[\x00-\xFF][\x01-\xFF] terminator \x0D\x00|\x0A\x00|\x0D\x00\x0A\x00|\x0A\x00\x0D\x00 any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at} any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at} non_at {alpha}|{digit}|{otherchar}|{delim}|{hash} alphanum {alpha}|{digit} gen_delim {delim}|{tab} escape @\x00#\x00{any_char}+@\x00 pointer @\x00{alphanum}{non_at}+@\x00 %{ static int current_level=-1; static int level_diff=MAXGEDCLEVEL; #ifdef LEXER_TEST YYSTYPE gedcom_lval; int line_no = 1; #endif %} %% %{ #include "gedcom_lex_common.c" ACTION_BEFORE_REGEXPS %} {gen_delim}* /* ignore leading whitespace (also tabs) */ \x00[0]{digit}+ ACTION_0_DIGITS {digit}+ ACTION_DIGITS A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR) A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR) A\x00D\x00R\x001\x00 MKTAGACTION(ADR1) A\x00D\x00R\x002\x00 MKTAGACTION(ADR2) A\x00D\x00O\x00P\x00 MKTAGACTION(ADOP) A\x00F\x00N\x00 MKTAGACTION(AFN) A\x00G\x00E\x00 MKTAGACTION(AGE) A\x00G\x00N\x00C\x00 MKTAGACTION(AGNC) A\x00L\x00I\x00A\x00 MKTAGACTION(ALIA) A\x00N\x00C\x00E\x00 MKTAGACTION(ANCE) A\x00N\x00C\x00I\x00 MKTAGACTION(ANCI) A\x00N\x00U\x00L\x00 MKTAGACTION(ANUL) A\x00S\x00S\x00O\x00 MKTAGACTION(ASSO) A\x00U\x00T\x00H\x00 MKTAGACTION(AUTH) B\x00A\x00P\x00L\x00 MKTAGACTION(BAPL) B\x00A\x00P\x00M\x00 MKTAGACTION(BAPM) B\x00A\x00R\x00M\x00 MKTAGACTION(BARM) B\x00A\x00S\x00M\x00 MKTAGACTION(BASM) B\x00I\x00R\x00T\x00 MKTAGACTION(BIRT) B\x00L\x00E\x00S\x00 MKTAGACTION(BLES) B\x00L\x00O\x00B\x00 MKTAGACTION(BLOB) B\x00U\x00R\x00I\x00 MKTAGACTION(BURI) C\x00A\x00L\x00N\x00 MKTAGACTION(CALN) C\x00A\x00S\x00T\x00 MKTAGACTION(CAST) C\x00A\x00U\x00S\x00 MKTAGACTION(CAUS) C\x00E\x00N\x00S\x00 MKTAGACTION(CENS) C\x00H\x00A\x00N\x00 MKTAGACTION(CHAN) C\x00H\x00A\x00R\x00 MKTAGACTION(CHAR) C\x00H\x00I\x00L\x00 MKTAGACTION(CHIL) C\x00H\x00R\x00 MKTAGACTION(CHR) C\x00H\x00R\x00A\x00 MKTAGACTION(CHRA) C\x00I\x00T\x00Y\x00 MKTAGACTION(CITY) C\x00O\x00N\x00C\x00 MKTAGACTION(CONC) C\x00O\x00N\x00F\x00 MKTAGACTION(CONF) C\x00O\x00N\x00L\x00 MKTAGACTION(CONL) C\x00O\x00N\x00T\x00 MKTAGACTION(CONT) C\x00O\x00P\x00R\x00 MKTAGACTION(COPR) C\x00O\x00R\x00P\x00 MKTAGACTION(CORP) C\x00R\x00E\x00M\x00 MKTAGACTION(CREM) C\x00T\x00R\x00Y\x00 MKTAGACTION(CTRY) D\x00A\x00T\x00A\x00 MKTAGACTION(DATA) D\x00A\x00T\x00E\x00 MKTAGACTION(DATE) D\x00E\x00A\x00T\x00 MKTAGACTION(DEAT) D\x00E\x00S\x00C\x00 MKTAGACTION(DESC) D\x00E\x00S\x00I\x00 MKTAGACTION(DESI) D\x00E\x00S\x00T\x00 MKTAGACTION(DEST) D\x00I\x00V\x00 MKTAGACTION(DIV) D\x00I\x00V\x00F\x00 MKTAGACTION(DIVF) D\x00S\x00C\x00R\x00 MKTAGACTION(DSCR) E\x00D\x00U\x00C\x00 MKTAGACTION(EDUC) E\x00M\x00I\x00G\x00 MKTAGACTION(EMIG) E\x00N\x00D\x00L\x00 MKTAGACTION(ENDL) E\x00N\x00G\x00A\x00 MKTAGACTION(ENGA) E\x00V\x00E\x00N\x00 MKTAGACTION(EVEN) F\x00A\x00M\x00 MKTAGACTION(FAM) F\x00A\x00M\x00C\x00 MKTAGACTION(FAMC) F\x00A\x00M\x00F\x00 MKTAGACTION(FAMF) F\x00A\x00M\x00S\x00 MKTAGACTION(FAMS) F\x00C\x00O\x00M\x00 MKTAGACTION(FCOM) F\x00I\x00L\x00E\x00 MKTAGACTION(FILE) F\x00O\x00R\x00M\x00 MKTAGACTION(FORM) G\x00E\x00D\x00C\x00 MKTAGACTION(GEDC) G\x00I\x00V\x00N\x00 MKTAGACTION(GIVN) G\x00R\x00A\x00D\x00 MKTAGACTION(GRAD) H\x00E\x00A\x00D\x00 MKTAGACTION(HEAD) H\x00U\x00S\x00B\x00 MKTAGACTION(HUSB) I\x00D\x00N\x00O\x00 MKTAGACTION(IDNO) I\x00M\x00M\x00I\x00 MKTAGACTION(IMMI) I\x00N\x00D\x00I\x00 MKTAGACTION(INDI) L\x00A\x00N\x00G\x00 MKTAGACTION(LANG) L\x00E\x00G\x00A\x00 MKTAGACTION(LEGA) M\x00A\x00R\x00B\x00 MKTAGACTION(MARB) M\x00A\x00R\x00C\x00 MKTAGACTION(MARC) M\x00A\x00R\x00L\x00 MKTAGACTION(MARL) M\x00A\x00R\x00R\x00 MKTAGACTION(MARR) M\x00A\x00R\x00S\x00 MKTAGACTION(MARS) M\x00E\x00D\x00I\x00 MKTAGACTION(MEDI) N\x00A\x00M\x00E\x00 MKTAGACTION(NAME) N\x00A\x00T\x00I\x00 MKTAGACTION(NATI) N\x00A\x00T\x00U\x00 MKTAGACTION(NATU) N\x00C\x00H\x00I\x00 MKTAGACTION(NCHI) N\x00I\x00C\x00K\x00 MKTAGACTION(NICK) N\x00M\x00R\x00 MKTAGACTION(NMR) N\x00O\x00T\x00E\x00 MKTAGACTION(NOTE) N\x00P\x00F\x00X\x00 MKTAGACTION(NPFX) N\x00S\x00F\x00X\x00 MKTAGACTION(NSFX) O\x00B\x00J\x00E\x00 MKTAGACTION(OBJE) O\x00C\x00C\x00U\x00 MKTAGACTION(OCCU) O\x00R\x00D\x00I\x00 MKTAGACTION(ORDI) O\x00R\x00D\x00N\x00 MKTAGACTION(ORDN) P\x00A\x00G\x00E\x00 MKTAGACTION(PAGE) P\x00E\x00D\x00I\x00 MKTAGACTION(PEDI) P\x00H\x00O\x00N\x00 MKTAGACTION(PHON) P\x00L\x00A\x00C\x00 MKTAGACTION(PLAC) P\x00O\x00S\x00T\x00 MKTAGACTION(POST) P\x00R\x00O\x00B\x00 MKTAGACTION(PROB) P\x00R\x00O\x00P\x00 MKTAGACTION(PROP) P\x00U\x00B\x00L\x00 MKTAGACTION(PUBL) Q\x00U\x00A\x00Y\x00 MKTAGACTION(QUAY) R\x00E\x00F\x00N\x00 MKTAGACTION(REFN) R\x00E\x00L\x00A\x00 MKTAGACTION(RELA) R\x00E\x00L\x00I\x00 MKTAGACTION(RELI) R\x00E\x00P\x00O\x00 MKTAGACTION(REPO) R\x00E\x00S\x00I\x00 MKTAGACTION(RESI) R\x00E\x00S\x00N\x00 MKTAGACTION(RESN) R\x00E\x00T\x00I\x00 MKTAGACTION(RETI) R\x00F\x00N\x00 MKTAGACTION(RFN) R\x00I\x00N\x00 MKTAGACTION(RIN) R\x00O\x00L\x00E\x00 MKTAGACTION(ROLE) S\x00E\x00X\x00 MKTAGACTION(SEX) S\x00L\x00G\x00C\x00 MKTAGACTION(SLGC) S\x00L\x00G\x00S\x00 MKTAGACTION(SLGS) S\x00O\x00U\x00R\x00 MKTAGACTION(SOUR) S\x00P\x00F\x00X\x00 MKTAGACTION(SPFX) S\x00S\x00N\x00 MKTAGACTION(SSN) S\x00T\x00A\x00E\x00 MKTAGACTION(STAE) S\x00T\x00A\x00T\x00 MKTAGACTION(STAT) S\x00U\x00B\x00M\x00 MKTAGACTION(SUBM) S\x00U\x00B\x00N\x00 MKTAGACTION(SUBN) S\x00U\x00R\x00N\x00 MKTAGACTION(SURN) T\x00E\x00M\x00P\x00 MKTAGACTION(TEMP) T\x00E\x00X\x00T\x00 MKTAGACTION(TEXT) T\x00I\x00M\x00E\x00 MKTAGACTION(TIME) T\x00I\x00T\x00L\x00 MKTAGACTION(TITL) T\x00R\x00L\x00R\x00 MKTAGACTION(TRLR) T\x00Y\x00P\x00E\x00 MKTAGACTION(TYPE) V\x00E\x00R\x00S\x00 MKTAGACTION(VERS) W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE) W\x00I\x00L\x00L\x00 MKTAGACTION(WILL) {alphanum}+ ACTION_ALPHANUM {delim} ACTION_DELIM {any_but_delim} ACTION_ANY {escape}/{non_at} ACTION_ESCAPE {pointer} ACTION_POINTER {gen_delim}*{terminator} ACTION_TERMINATOR <> ACTION_EOF . ACTION_UNEXPECTED %% int yywrap() { return 1; } #ifdef LEXER_TEST int main() { int tok, res; init_encodings(); set_encoding_width(TWO_BYTE_LOHI); res = open_conv_to_internal("UNICODE"); if (!res) { gedcom_error("Unable to open conversion context: %s", strerror(errno)); return 1; } tok = gedcom_lohi_lex(); while (tok) { switch(tok) { case BADTOKEN: printf("BADTOKEN "); break; case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break; case CLOSE: printf("CLOSE "); break; case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break; case DELIM: printf("DELIM "); break; case ANYCHAR: printf("%s ", gedcom_lval.string); break; case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break; case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break; default: printf("TAG(%s) ", gedcom_lval.tag); break; } tok = gedcom_lohi_lex(); } printf("\n"); close_conv_to_internal(); return 0; } #endif