/* Lexer for 1-byte encoding of Gedcom. Copyright (C) 2001 The Genes Development Team This file is part of the Gedcom parser library. Contributed by Peter Verthez , 2001. The Gedcom parser library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The Gedcom parser library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the Gedcom parser library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* $Id$ */ /* $Name$ */ %{ #define LEX_SECTION 1 /* include only a specific part of the following file */ #define yymyinit gedcom_1byte_myinit #include "gedcom_lex_common.c" static size_t encoding_width = 1; %} %s NORMAL %s EXPECT_TAG alpha [A-Za-z_] digit [0-9] delim " " tab [\t] hash # literal_at @@ normal_at @ otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFE] terminator \x0D|\x0A|\x0D\x0A|\x0A\x0D any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at} any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at} non_at {alpha}|{digit}|{otherchar}|{delim}|{hash} alphanum {alpha}|{digit} gen_delim {delim}|{tab} escape @#{any_char}+@ pointer @{alphanum}{non_at}*@ %% %{ #undef LEX_SECTION #define LEX_SECTION 2 /* include only a specific part of the following file */ #include "gedcom_lex_common.c" ACTION_BEFORE_REGEXPS %} {gen_delim}* ACTION_INITIAL_WHITESPACE 0{digit}+ ACTION_0_DIGITS {digit}+ ACTION_DIGITS ABBR MKTAGACTION(ABBR) ADDR MKTAGACTION(ADDR) ADR1 MKTAGACTION(ADR1) ADR2 MKTAGACTION(ADR2) ADOP MKTAGACTION(ADOP) AFN MKTAGACTION(AFN) AGE MKTAGACTION(AGE) AGNC MKTAGACTION(AGNC) ALIA MKTAGACTION(ALIA) ANCE MKTAGACTION(ANCE) ANCI MKTAGACTION(ANCI) ANUL MKTAGACTION(ANUL) ASSO MKTAGACTION(ASSO) AUTH MKTAGACTION(AUTH) BAPL MKTAGACTION(BAPL) BAPM MKTAGACTION(BAPM) BARM MKTAGACTION(BARM) BASM MKTAGACTION(BASM) BIRT MKTAGACTION(BIRT) BLES MKTAGACTION(BLES) BLOB MKTAGACTION(BLOB) BURI MKTAGACTION(BURI) CALN MKTAGACTION(CALN) CAST MKTAGACTION(CAST) CAUS MKTAGACTION(CAUS) CENS MKTAGACTION(CENS) CHAN MKTAGACTION(CHAN) CHAR MKTAGACTION(CHAR) CHIL MKTAGACTION(CHIL) CHR MKTAGACTION(CHR) CHRA MKTAGACTION(CHRA) CITY MKTAGACTION(CITY) CONC MKTAGACTION(CONC) CONF MKTAGACTION(CONF) CONL MKTAGACTION(CONL) CONT MKTAGACTION(CONT) COPR MKTAGACTION(COPR) CORP MKTAGACTION(CORP) CREM MKTAGACTION(CREM) CTRY MKTAGACTION(CTRY) DATA MKTAGACTION(DATA) DATE MKTAGACTION(DATE) DEAT MKTAGACTION(DEAT) DESC MKTAGACTION(DESC) DESI MKTAGACTION(DESI) DEST MKTAGACTION(DEST) DIV MKTAGACTION(DIV) DIVF MKTAGACTION(DIVF) DSCR MKTAGACTION(DSCR) EDUC MKTAGACTION(EDUC) EMIG MKTAGACTION(EMIG) ENDL MKTAGACTION(ENDL) ENGA MKTAGACTION(ENGA) EVEN MKTAGACTION(EVEN) FAM MKTAGACTION(FAM) FAMC MKTAGACTION(FAMC) FAMF MKTAGACTION(FAMF) FAMS MKTAGACTION(FAMS) FCOM MKTAGACTION(FCOM) FILE MKTAGACTION(FILE) FORM MKTAGACTION(FORM) GEDC MKTAGACTION(GEDC) GIVN MKTAGACTION(GIVN) GRAD MKTAGACTION(GRAD) HEAD MKTAGACTION(HEAD) HUSB MKTAGACTION(HUSB) IDNO MKTAGACTION(IDNO) IMMI MKTAGACTION(IMMI) INDI MKTAGACTION(INDI) LANG MKTAGACTION(LANG) LEGA MKTAGACTION(LEGA) MARB MKTAGACTION(MARB) MARC MKTAGACTION(MARC) MARL MKTAGACTION(MARL) MARR MKTAGACTION(MARR) MARS MKTAGACTION(MARS) MEDI MKTAGACTION(MEDI) NAME MKTAGACTION(NAME) NATI MKTAGACTION(NATI) NATU MKTAGACTION(NATU) NCHI MKTAGACTION(NCHI) NICK MKTAGACTION(NICK) NMR MKTAGACTION(NMR) NOTE MKTAGACTION(NOTE) NPFX MKTAGACTION(NPFX) NSFX MKTAGACTION(NSFX) OBJE MKTAGACTION(OBJE) OCCU MKTAGACTION(OCCU) ORDI MKTAGACTION(ORDI) ORDN MKTAGACTION(ORDN) PAGE MKTAGACTION(PAGE) PEDI MKTAGACTION(PEDI) PHON MKTAGACTION(PHON) PLAC MKTAGACTION(PLAC) POST MKTAGACTION(POST) PROB MKTAGACTION(PROB) PROP MKTAGACTION(PROP) PUBL MKTAGACTION(PUBL) QUAY MKTAGACTION(QUAY) REFN MKTAGACTION(REFN) RELA MKTAGACTION(RELA) RELI MKTAGACTION(RELI) REPO MKTAGACTION(REPO) RESI MKTAGACTION(RESI) RESN MKTAGACTION(RESN) RETI MKTAGACTION(RETI) RFN MKTAGACTION(RFN) RIN MKTAGACTION(RIN) ROLE MKTAGACTION(ROLE) SEX MKTAGACTION(SEX) SLGC MKTAGACTION(SLGC) SLGS MKTAGACTION(SLGS) SOUR MKTAGACTION(SOUR) SPFX MKTAGACTION(SPFX) SSN MKTAGACTION(SSN) STAE MKTAGACTION(STAE) STAT MKTAGACTION(STAT) SUBM MKTAGACTION(SUBM) SUBN MKTAGACTION(SUBN) SURN MKTAGACTION(SURN) TEMP MKTAGACTION(TEMP) TEXT MKTAGACTION(TEXT) TIME MKTAGACTION(TIME) TITL MKTAGACTION(TITL) TRLR MKTAGACTION(TRLR) TYPE MKTAGACTION(TYPE) VERS MKTAGACTION(VERS) WIFE MKTAGACTION(WIFE) WILL MKTAGACTION(WILL) {alphanum}+ ACTION_ALPHANUM {delim} ACTION_DELIM {any_but_delim} ACTION_ANY {escape}/{non_at} ACTION_ESCAPE {pointer} ACTION_POINTER {gen_delim}*{terminator} ACTION_TERMINATOR <> ACTION_EOF {normal_at} ACTION_NORMAL_AT {tab} ACTION_TAB . ACTION_UNEXPECTED %% #undef LEX_SECTION #define LEX_SECTION 3 /* include only a specific part of the following file */ #include "gedcom_lex_common.c" int gedcom_check_token(const char* str, ParseState state, int check_token) { int result = 0; int token; YY_BUFFER_STATE buffer; yy_delete_buffer(YY_CURRENT_BUFFER); buffer = yy_scan_string(str); INIT_LINE_LEN; if (state == STATE_NORMAL) BEGIN(NORMAL); else if (state == STATE_INITIAL) BEGIN(INITIAL); else if (state == STATE_EXPECT_TAG) BEGIN(EXPECT_TAG); /* Input is UTF-8 coming from the application, so bypass iconv */ dummy_conv = 1; token = yylex(); if (token != check_token) result = 1; if (token != 0) { token = yylex(); if (token != 0) result = 1; } dummy_conv = 0; yy_delete_buffer(buffer); return result; } #ifdef LEXER_TEST int gedcom_lex() { return gedcom_1byte_lex(); } int main() { return test_loop(ONE_BYTE, "ASCII"); } #endif