From 8093e53a57e174b019f07760f5bf815271ceee9b Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Sun, 9 Dec 2001 09:50:23 +0000 Subject: [PATCH] Moved to gedcom subdirectory. --- encoding.c => gedcom/encoding.c | 0 encoding.h => gedcom/encoding.h | 0 gedcom/gedcom.y | 2667 +++++++++++++++++++++++++++++++ gedcom/gedcom_1byte.lex | 220 +++ gedcom/gedcom_hilo.lex | 223 +++ gedcom/gedcom_internal.h | 40 + gedcom/gedcom_lex_common.c | 284 ++++ gedcom/gedcom_lohi.lex | 223 +++ gedcom/interface.c | 80 + gedcom/interface.h | 28 + gedcom/message.c | 124 ++ gedcom/multilex.c | 118 ++ gedcom/multilex.h | 25 + 13 files changed, 4032 insertions(+) rename encoding.c => gedcom/encoding.c (100%) rename encoding.h => gedcom/encoding.h (100%) create mode 100644 gedcom/gedcom.y create mode 100644 gedcom/gedcom_1byte.lex create mode 100644 gedcom/gedcom_hilo.lex create mode 100644 gedcom/gedcom_internal.h create mode 100644 gedcom/gedcom_lex_common.c create mode 100644 gedcom/gedcom_lohi.lex create mode 100644 gedcom/interface.c create mode 100644 gedcom/interface.h create mode 100644 gedcom/message.c create mode 100644 gedcom/multilex.c create mode 100644 gedcom/multilex.h diff --git a/encoding.c b/gedcom/encoding.c similarity index 100% rename from encoding.c rename to gedcom/encoding.c diff --git a/encoding.h b/gedcom/encoding.h similarity index 100% rename from encoding.h rename to gedcom/encoding.h diff --git a/gedcom/gedcom.y b/gedcom/gedcom.y new file mode 100644 index 0000000..d957ca0 --- /dev/null +++ b/gedcom/gedcom.y @@ -0,0 +1,2667 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +/* WARNING: THIS PARSER RELIES HEAVILY ON SOME FEATURES OF BISON. + DON'T TRY TO USE IT WITH YACC, IT WON'T WORK... +*/ + +/* Design of the parser: + --------------------- + In general, a GEDCOM file contains records, each consisting of a line + (which we'll call a section), hierarchically containing other lines + (subsections of the section). + + This means that in general we have: + + A 'record' is a 'section' (sect) containing 'subsections' (subs) + Each 'subsection' (sub) is again a specific 'section' (sect) + + In parser notation, this means: + + record : sect + + sect : subs + + subs : | subs sub + + sub : sect_a | sect_b | ... + + This pattern is repeated throughout the parser for the different types of + sections. + + + Cardinality of the subsections: + ------------------------------- + Note that in the above, the order of the subsections is of no importance. + Indeed, this is the case in the GEDCOM grammar. However, this also makes + it difficult to check whether there are not too many subsections of a + specific type, or whether a mandatory subsection is indeed there. + + Suppose there is a section A that can contain 0 or 1 B section and + 2 C sections. + + This can be expressed in parser notation as follows: + + A : CC | BCC | CBC | CCB + + So, cardinality is indeed expressable. However, as the number of subsection + types and the limits grow bigger (and even theoretically limitless), listing + all possible permutations becomes quickly unfeasible. + + Much simpler is to say: + + A : subs + subs : | subs sub + sub : B | C + + and then check the cardinality in the semantic actions, which is the + solution chosen in the parser below, using the following macros: + + - OPEN() + Make a new context for the tag to count child tags in + + - OCCUR2(, , ) + Express that the tag should occur at least times and + at most tags within its parent + + What this actually does is the following. It increments the counter + for that tag and then checks whether the maximum is exceeded. If so, + then a parser error is produced. The minimum is not actually checked + by this macro, but it makes the statements more declarative. + + - OCCUR1(, ) + Express that the tag should occur at least times within + its parent (no upper limit) + + Actually, this only increments the counter for the tag, but it looks + very like the previous macro. + + If the minimum is 0, it is not necessary to express this constraint. + + - CHECKn(, ..., ) + This closes the context for the parent tag and checks whether the + given tags did effectively occur within the parent (i.e. + these are the tags that were mandatory). + + Since the values above are always 0 or 1 in GEDCOM, this is + sufficient. All sub-tags that declare a minimum of 1 in the OCCUR + macros should be listed in this macro here. + + The macros CHECK0 to CHECK4 are defined like this (the first one + has no arguments and is only used to close the parent context; note + that this is necessary for correct functioning). + + Example of usage: + + Only sections that have subsections need to use these macros. This can + be done like this (the OPEN and CHECK macros are used as mid-rule + actions around the subsections): + + head_sect : OPEN DELIM TAG_HEAD + { OPEN(HEAD) } + head_subs + { CHECK1(SOUR) } + CLOSE { } + + head_subs : + | head_subs head_sub + ; + + head_sub : head_sour_sect { OCCUR2(SOUR, 1, 1) } + | head_dest_sect { OCCUR2(DEST, 0, 1) } + | head_date_sect { OCCUR2(DATE, 0, 1) } + ; +*/ + +/* General notes: + + - The syntax analysis doesn't handle the contents of the line values; + this is done in the semantic analysis. + + */ + +%{ +#include "gedcom_internal.h" +#include "multilex.h" +#include "encoding.h" +#include "interface.h" + +int count_level = 0; +int fail = 0; +int compat_enabled = 1; +int gedcom_high_level_debug = 0; +int compatibility = 0; +Gedcom_err_mech error_mechanism = IMMED_FAIL; + +char line_item_buf[MAXGEDCLINELEN * UTF_FACTOR + 1]; +char *line_item_buf_ptr; + +enum _COMPAT { + C_FTREE = 0x01 +}; + +/* These are defined at the bottom of the file */ +void push_countarray(); +void set_parenttag(char* tag); +char* get_parenttag(); +void set_parentctxt(Gedcom_ctxt ctxt); +Gedcom_ctxt get_parentctxt(); +void pop_countarray(); +int count_tag(int tag); +int check_occurrence(int tag); +void set_compatibility(char* program); +int compat_mode(int flags); + +#define CLEAR_BUFFER(BUF) \ + memset(BUF, 0, sizeof(BUF)); + +#define HANDLE_ERROR \ + { if (error_mechanism == IMMED_FAIL) { \ + YYABORT; \ + } \ + else if (error_mechanism == DEFER_FAIL) { \ + yyerrok; fail = 1; \ + } \ + else if (error_mechanism == IGNORE_ERRORS) { \ + yyerrok; \ + } \ + } +#define START(PARENTTAG,PARENTCTXT) \ + { ++count_level; \ + set_parenttag(#PARENTTAG); \ + set_parentctxt(PARENTCTXT); \ + push_countarray(); \ + } +#define PARENT \ + get_parentctxt() +#define CHK(TAG) \ + { if (!check_occurrence(TAG_##TAG)) { \ + char* parenttag = get_parenttag(); \ + gedcom_error("The tag '%s' is mandatory within '%s', but missing", \ + #TAG, parenttag); \ + HANDLE_ERROR; \ + } \ + } +#define POP \ + { pop_countarray(); \ + --count_level; \ + } +#define CHECK0 POP; +#define CHECK1(TAG1) { CHK(TAG1); POP; } +#define CHECK2(TAG1,TAG2) \ + { CHK(TAG1); CHK(TAG2); POP; } +#define CHECK3(TAG1,TAG2,TAG3) \ + { CHK(TAG1); CHK(TAG2); CHK(TAG3); POP; } +#define CHECK4(TAG1,TAG2,TAG3,TAG4) \ + { CHK(TAG1); CHK(TAG2); CHK(TAG3); CHK(TAG4); POP; } +#define OCCUR1(CHILDTAG, MIN) { count_tag(TAG_##CHILDTAG); } +#define OCCUR2(CHILDTAG, MIN, MAX) \ + { int num = count_tag(TAG_##CHILDTAG); \ + if (num > MAX) { \ + char* parenttag = get_parenttag(); \ + gedcom_error("The tag '%s' can maximally occur %d " \ + "time(s) within '%s'", \ + #CHILDTAG, MAX, parenttag); \ + HANDLE_ERROR; \ + } \ + } +#define INVALID_TAG(CHILDTAG) \ + { char* parenttag = get_parenttag(); \ + gedcom_error("The tag '%s' is not a valid tag within '%s'", \ + CHILDTAG, parenttag); \ + HANDLE_ERROR; \ + } +#define INVALID_TOP_TAG(CHILDTAG) \ + { gedcom_error("The tag '%s' is not a valid top-level tag", \ + CHILDTAG); \ + HANDLE_ERROR; \ + } + +%} + +%union { + int number; + char *string; + Gedcom_ctxt ctxt; +} + +%token_table +%expect 300 + +%token BADTOKEN +%token OPEN +%token CLOSE +%token ESCAPE +%token DELIM +%token ANYCHAR +%token POINTER +%token USERTAG +%token TAG_ABBR +%token TAG_ADDR +%token TAG_ADR1 +%token TAG_ADR2 +%token TAG_ADOP +%token TAG_AFN +%token TAG_AGE +%token TAG_AGNC +%token TAG_ALIA +%token TAG_ANCE +%token TAG_ANCI +%token TAG_ANUL +%token TAG_ASSO +%token TAG_AUTH +%token TAG_BAPL +%token TAG_BAPM +%token TAG_BARM +%token TAG_BASM +%token TAG_BIRT +%token TAG_BLES +%token TAG_BLOB +%token TAG_BURI +%token TAG_CALN +%token TAG_CAST +%token TAG_CAUS +%token TAG_CENS +%token TAG_CHAN +%token TAG_CHAR +%token TAG_CHIL +%token TAG_CHR +%token TAG_CHRA +%token TAG_CITY +%token TAG_CONC +%token TAG_CONF +%token TAG_CONL +%token TAG_CONT +%token TAG_COPR +%token TAG_CORP +%token TAG_CREM +%token TAG_CTRY +%token TAG_DATA +%token TAG_DATE +%token TAG_DEAT +%token TAG_DESC +%token TAG_DESI +%token TAG_DEST +%token TAG_DIV +%token TAG_DIVF +%token TAG_DSCR +%token TAG_EDUC +%token TAG_EMIG +%token TAG_ENDL +%token TAG_ENGA +%token TAG_EVEN +%token TAG_FAM +%token TAG_FAMC +%token TAG_FAMF +%token TAG_FAMS +%token TAG_FCOM +%token TAG_FILE +%token TAG_FORM +%token TAG_GEDC +%token TAG_GIVN +%token TAG_GRAD +%token TAG_HEAD +%token TAG_HUSB +%token TAG_IDNO +%token TAG_IMMI +%token TAG_INDI +%token TAG_LANG +%token TAG_LEGA +%token TAG_MARB +%token TAG_MARC +%token TAG_MARL +%token TAG_MARR +%token TAG_MARS +%token TAG_MEDI +%token TAG_NAME +%token TAG_NATI +%token TAG_NATU +%token TAG_NCHI +%token TAG_NICK +%token TAG_NMR +%token TAG_NOTE +%token TAG_NPFX +%token TAG_NSFX +%token TAG_OBJE +%token TAG_OCCU +%token TAG_ORDI +%token TAG_ORDN +%token TAG_PAGE +%token TAG_PEDI +%token TAG_PHON +%token TAG_PLAC +%token TAG_POST +%token TAG_PROB +%token TAG_PROP +%token TAG_PUBL +%token TAG_QUAY +%token TAG_REFN +%token TAG_RELA +%token TAG_RELI +%token TAG_REPO +%token TAG_RESI +%token TAG_RESN +%token TAG_RETI +%token TAG_RFN +%token TAG_RIN +%token TAG_ROLE +%token TAG_SEX +%token TAG_SLGC +%token TAG_SLGS +%token TAG_SOUR +%token TAG_SPFX +%token TAG_SSN +%token TAG_STAE +%token TAG_STAT +%token TAG_SUBM +%token TAG_SUBN +%token TAG_SURN +%token TAG_TEMP +%token TAG_TEXT +%token TAG_TIME +%token TAG_TITL +%token TAG_TRLR +%token TAG_TYPE +%token TAG_VERS +%token TAG_WIFE +%token TAG_WILL + +%type anystdtag +%type anytoptag +%type line_item +%type line_value +%type mand_line_item +%type mand_pointer +%type note_line_item +%type anychar +%type opt_xref +%type opt_value +%type head_sect + +%% + +file : head_sect records trlr_sect + { if (fail == 1) YYABORT; } + ; + +records : /* empty */ + | records record + ; + +record : fam_rec + | indiv_rec + | multim_rec + | note_rec + | repos_rec + | source_rec + | submis_rec + | submit_rec + | no_std_rec + ; + +/*********************************************************************/ +/**** Header ****/ +/*********************************************************************/ +head_sect : OPEN DELIM TAG_HEAD + { $$ = start_record(REC_HEAD, $1, NULL, $3); + START(HEAD, $$) } + head_subs + { if (compat_mode(C_FTREE)) + CHECK3(SOUR, GEDC, CHAR) + else + CHECK4(SOUR, SUBM, GEDC, CHAR) + } + CLOSE + { end_record(REC_HEAD, $4); } + ; + +head_subs : /* empty */ + | head_subs head_sub + ; + +head_sub : head_sour_sect { OCCUR2(SOUR, 1, 1) } + | head_dest_sect { OCCUR2(DEST, 0, 1) } + | head_date_sect { OCCUR2(DATE, 0, 1) } + | head_subm_sect { OCCUR2(SUBM, 1, 1) } + | head_subn_sect { OCCUR2(SUBN, 0, 1) } + | head_file_sect { OCCUR2(FILE, 0, 1) } + | head_copr_sect { OCCUR2(COPR, 0, 1) } + | head_gedc_sect { OCCUR2(GEDC, 1, 1) } + | head_char_sect { OCCUR2(CHAR, 1, 1) } + | head_lang_sect { OCCUR2(LANG, 0, 1) } + | head_plac_sect { OCCUR2(PLAC, 0, 1) } + | head_note_sect { OCCUR2(NOTE, 0, 1) } + | no_std_sub + ; + +/* HEAD.SOUR */ +head_sour_sect : OPEN DELIM TAG_SOUR mand_line_item + { set_compatibility($4); + $$ = start_element(ELT_HEAD_SOUR, PARENT, + $1, $3, $4, $4); + START(SOUR, $$) + } + head_sour_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR, PARENT, $5, NULL); } + ; + +head_sour_subs : /* empty */ + | head_sour_subs head_sour_sub + ; + +head_sour_sub : head_sour_vers_sect { OCCUR2(VERS, 0, 1) } + | head_sour_name_sect { OCCUR2(NAME, 0, 1) } + | head_sour_corp_sect { OCCUR2(CORP, 0, 1) } + | head_sour_data_sect { OCCUR2(DATA, 0, 1) } + | no_std_sub + ; + +head_sour_vers_sect : OPEN DELIM TAG_VERS mand_line_item + { $$ = start_element(ELT_HEAD_SOUR_VERS, PARENT, + $1, $3, $4, $4); + START(VERS, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR_VERS, + PARENT, $5, NULL); + } + ; +head_sour_name_sect : OPEN DELIM TAG_NAME mand_line_item + { $$ = start_element(ELT_HEAD_SOUR_NAME, PARENT, + $1, $3, $4, $4); + START(NAME, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR_NAME, + PARENT, $5, NULL); + } + ; +head_sour_corp_sect : OPEN DELIM TAG_CORP mand_line_item + { $$ = start_element(ELT_HEAD_SOUR_CORP, PARENT, + $1, $3, $4, $4); + START(CORP, $$) + } + head_sour_corp_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR_CORP, + PARENT, $5, NULL); + } + ; + +head_sour_corp_subs : /* empty */ + | head_sour_corp_subs head_sour_corp_sub + ; + +head_sour_corp_sub : addr_struc_sub /* 0:1 */ + | no_std_sub + ; + +head_sour_data_sect : OPEN DELIM TAG_DATA mand_line_item + { $$ = start_element(ELT_HEAD_SOUR_DATA, PARENT, + $1, $3, $4, $4); + START(DATA, $$) + } + head_sour_data_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR_DATA, + PARENT, $5, NULL); + } + ; + +head_sour_data_subs : /* empty */ + | head_sour_data_subs head_sour_data_sub + ; + +head_sour_data_sub : head_sour_data_date_sect { OCCUR2(DATE, 0, 1) } + | head_sour_data_copr_sect { OCCUR2(COPR, 0, 1) } + | no_std_sub + ; + +head_sour_data_date_sect : OPEN DELIM TAG_DATE mand_line_item + { $$ = start_element(ELT_HEAD_SOUR_DATA_DATE, + PARENT, $1, $3, $4, $4); + START(DATE, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR_DATA_DATE, + PARENT, $5, NULL); + } + ; +head_sour_data_copr_sect : OPEN DELIM TAG_COPR mand_line_item + { $$ = start_element(ELT_HEAD_SOUR_DATA_COPR, + PARENT, $1, $3, $4, $4); + START(COPR, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SOUR_DATA_COPR, + PARENT, $5, NULL); + } + ; + +/* HEAD.DEST */ +head_dest_sect : OPEN DELIM TAG_DEST mand_line_item + { $$ = start_element(ELT_HEAD_DEST, + PARENT, $1, $3, $4, $4); + START(DEST, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_DEST, + PARENT, $5, NULL); + } + ; + +/* HEAD.DATE */ +head_date_sect : OPEN DELIM TAG_DATE mand_line_item + { $$ = start_element(ELT_HEAD_DATE, + PARENT, $1, $3, $4, $4); + START(DATE, $$) + } + head_date_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_DATE, + PARENT, $5, NULL); + } + ; + +head_date_subs : /* empty */ + | head_date_subs head_date_sub + ; + +head_date_sub : head_date_time_sect { OCCUR2(TIME, 0, 1) } + | no_std_sub + ; + +head_date_time_sect : OPEN DELIM TAG_TIME mand_line_item + { $$ = start_element(ELT_HEAD_DATE_TIME, + PARENT, $1, $3, $4, $4); + START(TIME, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_DATE_TIME, + PARENT, $5, NULL); + } + ; + +/* HEAD.SUBM */ +head_subm_sect : OPEN DELIM TAG_SUBM mand_pointer + { $$ = start_element(ELT_HEAD_SUBM, + PARENT, $1, $3, $4, $4); + START(SUBM, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SUBM, + PARENT, $5, NULL); + } + ; +/* HEAD.SUBN */ +head_subn_sect : OPEN DELIM TAG_SUBN mand_pointer + { $$ = start_element(ELT_HEAD_SUBN, + PARENT, $1, $3, $4, $4); + START(SUBN, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_SUBN, + PARENT, $5, NULL); + } + ; +/* HEAD.FILE */ +head_file_sect : OPEN DELIM TAG_FILE mand_line_item + { $$ = start_element(ELT_HEAD_FILE, + PARENT, $1, $3, $4, $4); + START(FILE, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_FILE, PARENT, $5, NULL); + } + ; +/* HEAD.COPR */ +head_copr_sect : OPEN DELIM TAG_COPR mand_line_item + { $$ = start_element(ELT_HEAD_COPR, + PARENT, $1, $3, $4, $4); + START(COPR, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_COPR, PARENT, $5, NULL); + } + ; +/* HEAD.GEDC */ +head_gedc_sect : OPEN DELIM TAG_GEDC + { $$ = start_element(ELT_HEAD_GEDC, + PARENT, $1, $3, NULL, NULL); + START(GEDC, $$) + } + head_gedc_subs + { CHECK2(VERS, FORM) } + CLOSE + { end_element(ELT_HEAD_GEDC, PARENT, $4, NULL); + } + ; + +head_gedc_subs : /* empty */ + | head_gedc_subs head_gedc_sub + ; + +head_gedc_sub : head_gedc_vers_sect { OCCUR2(VERS, 1, 1) } + | head_gedc_form_sect { OCCUR2(FORM, 1, 1) } + | no_std_sub + ; +head_gedc_vers_sect : OPEN DELIM TAG_VERS mand_line_item + { $$ = start_element(ELT_HEAD_GEDC_VERS, + PARENT, $1, $3, $4, $4); + START(VERS, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_GEDC_VERS, + PARENT, $5, NULL); + } + ; +head_gedc_form_sect : OPEN DELIM TAG_FORM mand_line_item + { $$ = start_element(ELT_HEAD_GEDC_FORM, + PARENT, $1, $3, $4, $4); + START(FORM, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_GEDC_FORM, + PARENT, $5, NULL); + } + ; + +/* HEAD.CHAR */ +head_char_sect : OPEN DELIM TAG_CHAR mand_line_item + { if (open_conv_to_internal($4) == 0) YYERROR; + $$ = start_element(ELT_HEAD_CHAR, + PARENT, $1, $3, $4, $4); + START(CHAR, $$) + } + head_char_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_CHAR, PARENT, $5, NULL); + } + ; + +head_char_subs : /* empty */ + | head_char_subs head_char_sub + ; + +head_char_sub : head_char_vers_sect { OCCUR2(VERS, 0, 1) } + | no_std_sub + ; +head_char_vers_sect : OPEN DELIM TAG_VERS mand_line_item + { $$ = start_element(ELT_HEAD_CHAR_VERS, + PARENT, $1, $3, $4, $4); + START(VERS, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_CHAR_VERS, + PARENT, $5, NULL); + } + ; + +/* HEAD.LANG */ +head_lang_sect : OPEN DELIM TAG_LANG mand_line_item + { $$ = start_element(ELT_HEAD_LANG, + PARENT, $1, $3, $4, $4); + START(LANG, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_LANG, PARENT, $5, NULL); + } + ; +/* HEAD.PLAC */ +head_plac_sect : OPEN DELIM TAG_PLAC + { $$ = start_element(ELT_HEAD_PLAC, + PARENT, $1, $3, NULL, NULL); + START(PLAC, $$) + } + head_plac_subs + { CHECK1(FORM) } + CLOSE + { end_element(ELT_HEAD_PLAC, PARENT, $4, NULL); + } + ; + +head_plac_subs : /* empty */ + | head_plac_subs head_plac_sub + ; + +head_plac_sub : head_plac_form_sect { OCCUR2(FORM, 1, 1) } + | no_std_sub + ; +head_plac_form_sect : OPEN DELIM TAG_FORM mand_line_item + { $$ = start_element(ELT_HEAD_PLAC_FORM, + PARENT, $1, $3, $4, $4); + START(FORM, $$) + } + no_std_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_PLAC_FORM, + PARENT, $5, NULL); + } + ; + +/* HEAD.NOTE */ +head_note_sect : OPEN DELIM TAG_NOTE mand_line_item + { $$ = start_element(ELT_HEAD_NOTE, + PARENT, $1, $3, $4, $4); + START(NOTE, $$) + } + head_note_subs + { CHECK0 } + CLOSE + { end_element(ELT_HEAD_NOTE, PARENT, $5, NULL); + } + ; + +head_note_subs : /* empty */ + | head_note_subs head_note_sub + ; + +head_note_sub : continuation_sub /* 0:M */ + | no_std_sub + ; + +/*********************************************************************/ +/**** Trailer ****/ +/*********************************************************************/ +/* Don't need callbacks here, there is no information... */ +trlr_sect : OPEN DELIM TAG_TRLR CLOSE { } + ; + +/*********************************************************************/ +/**** Family record ****/ +/*********************************************************************/ +fam_rec : OPEN DELIM POINTER DELIM TAG_FAM + { $$ = start_record(REC_FAM, $1, $3, $5); + START(FAM, $$) } + fam_subs + { CHECK0 } + CLOSE + { end_record(REC_FAM, $6); } + ; + +fam_subs : /* empty */ + | fam_subs fam_sub + ; + +fam_sub : fam_event_struc_sub /* 0:M */ + | fam_husb_sect { OCCUR2(HUSB, 0, 1) } + | fam_wife_sect { OCCUR2(WIFE, 0, 1) } + | fam_chil_sect /* 0:M */ + | fam_nchi_sect { OCCUR2(NCHI, 0, 1) } + | fam_subm_sect /* 0:M */ + | lds_spouse_seal_sub /* 0:M */ + | source_cit_sub /* 0:M */ + | multim_link_sub /* 0:M */ + | note_struc_sub /* 0:M */ + | ident_struc_sub /* 0:1 */ + | change_date_sub /* 0:1 */ + | no_std_sub + ; + +/* FAM.HUSB */ +fam_husb_sect : OPEN DELIM TAG_HUSB mand_pointer + { START(HUSB, NULL) } no_std_subs { CHECK0 } CLOSE + { } + ; + +/* FAM.WIFE */ +fam_wife_sect : OPEN DELIM TAG_WIFE mand_pointer + { START(WIFE, NULL) } no_std_subs { CHECK0 } CLOSE + { } + ; + +/* FAM.CHIL */ +fam_chil_sect : OPEN DELIM TAG_CHIL mand_pointer + { START(CHIL, NULL) } no_std_subs { CHECK0 } CLOSE + { } + ; + +/* FAM.NCHI */ +fam_nchi_sect : OPEN DELIM TAG_NCHI mand_line_item + { START(NCHI, NULL) } no_std_subs { CHECK0 } CLOSE + { } + ; + +/* FAM.SUBM */ +fam_subm_sect : OPEN DELIM TAG_SUBM mand_pointer + { START(SUBM, NULL) } no_std_subs { CHECK0 } CLOSE + { } + ; + +/*********************************************************************/ +/**** Individual record ****/ +/*********************************************************************/ +indiv_rec : OPEN DELIM POINTER DELIM TAG_INDI + { $$ = start_record(REC_INDI, $1, $3, $5); + START(INDI, $$) } + indi_subs + { CHECK0 } + CLOSE + { end_record(REC_INDI, $6); } + ; + +indi_subs : /* empty */ + | indi_subs indi_sub + ; + +indi_sub : indi_resn_sect { OCCUR2(RESN, 0, 1) } + | pers_name_struc_sub /* 0:M */ + | indi_sex_sect { OCCUR2(SEX, 0, 1) } + | indiv_even_struc_sub /* 0:M */ + | indiv_attr_struc_sub /* 0:M */ + | lds_indiv_ord_sub /* 0:M */ + | chi_fam_link_sub /* 0:M */ + | spou_fam_link_sub /* 0:M */ + | indi_subm_sect /* 0:M */ + | assoc_struc_sub /* 0:M */ + | indi_alia_sect /* 0:M */ + | indi_anci_sect /* 0:M */ + | indi_desi_sect /* 0:M */ + | source_cit_sub /* 0:M */ + | multim_link_sub /* 0:M */ + | note_struc_sub /* 0:M */ + | indi_rfn_sect { OCCUR2(RFN, 0, 1) } + | indi_afn_sect /* 0:M */ + | ident_struc_sub /* 0:1 */ + | change_date_sub /* 0:1 */ + | ftree_addr_sect { if (!compat_mode(C_FTREE)) + INVALID_TAG("ADDR"); + } + | no_std_sub + ; + +/* INDI.RESN */ +indi_resn_sect : OPEN DELIM TAG_RESN mand_line_item + { START(RESN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.SEX */ +indi_sex_sect : OPEN DELIM TAG_SEX mand_line_item + { START(SEX, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.SUBM */ +indi_subm_sect : OPEN DELIM TAG_SUBM mand_pointer + { START(SUBM, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.ALIA */ +indi_alia_sect : OPEN DELIM TAG_ALIA mand_pointer + { START(ALIA, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.ANCI */ +indi_anci_sect : OPEN DELIM TAG_ANCI mand_pointer + { START(ANCI, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.DESI */ +indi_desi_sect : OPEN DELIM TAG_DESI mand_pointer + { START(DESI, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.RFN */ +indi_rfn_sect : OPEN DELIM TAG_RFN mand_line_item + { START(RFN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.AFN */ +indi_afn_sect : OPEN DELIM TAG_AFN mand_line_item + { START(AFN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDI.ADDR (Only for 'ftree' compatibility) */ +ftree_addr_sect : OPEN DELIM TAG_ADDR opt_line_item + { START(ADDR, NULL) } no_std_subs { CHECK0 } CLOSE { } + +/*********************************************************************/ +/**** Multimedia record ****/ +/*********************************************************************/ +multim_rec : OPEN DELIM POINTER DELIM TAG_OBJE + { $$ = start_record(REC_OBJE, $1, $3, $5); + START(OBJE, $$) } + obje_subs + { CHECK2(FORM, BLOB) } + CLOSE + { end_record(REC_OBJE, $6); } + ; + +obje_subs : /* empty */ + | obje_subs obje_sub + ; + +obje_sub : obje_form_sect { OCCUR2(FORM, 1, 1) } + | obje_titl_sect { OCCUR2(TITL, 0, 1) } + | note_struc_sub /* 0:M */ + | obje_blob_sect { OCCUR2(BLOB, 1, 1) } + | obje_obje_sect { OCCUR2(OBJE, 0, 1) } + | ident_struc_sub /* 0:1 */ + | change_date_sub /* 0:1 */ + | no_std_sub + ; + +/* OBJE.FORM */ +obje_form_sect : OPEN DELIM TAG_FORM mand_line_item + { START(FORM, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* OBJE.TITL */ +obje_titl_sect : OPEN DELIM TAG_TITL mand_line_item + { START(TITL, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* OBJE.BLOB */ +obje_blob_sect : OPEN DELIM TAG_BLOB + { START(BLOB, NULL) } + obje_blob_subs + { CHECK1(CONT) } + CLOSE { } + ; + +obje_blob_subs : /* empty */ + | obje_blob_subs obje_blob_sub + ; + +obje_blob_sub : obje_blob_cont_sect { OCCUR1(CONT, 1) } + | no_std_sub + ; + +obje_blob_cont_sect : OPEN DELIM TAG_CONT mand_line_item + { START(CONT, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* OBJE.OBJE */ +obje_obje_sect : OPEN DELIM TAG_OBJE mand_pointer + { START(OBJE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/*********************************************************************/ +/**** Note record ****/ +/*********************************************************************/ +note_rec : OPEN DELIM POINTER DELIM TAG_NOTE note_line_item + { $$ = start_record(REC_NOTE, $1, $3, $5); + START(NOTE, $$) } + note_subs + { CHECK0 } + CLOSE + { end_record(REC_NOTE, $6); } + ; + +note_line_item : /* empty */ + { if (!compat_mode(C_FTREE)) { + gedcom_error("Missing value"); YYERROR; + } + } + | DELIM line_item + { gedcom_debug_print("==Val: %s==\n", $2); + $$ = $2; } + ; + +note_subs : /* empty */ + | note_subs note_sub + ; + +note_sub : continuation_sub /* 0:M */ + | source_cit_sub /* 0:M */ + | ident_struc_sub /* 0:1 */ + | change_date_sub /* 0:1 */ + | no_std_sub + ; + +/*********************************************************************/ +/**** Repository record ****/ +/*********************************************************************/ +repos_rec : OPEN DELIM POINTER DELIM TAG_REPO + { $$ = start_record(REC_REPO, $1, $3, $5); + START(REPO, $$) } + repo_subs + { CHECK0 } + CLOSE + { end_record(REC_REPO, $6); } + ; + +repo_subs : /* empty */ + | repo_subs repo_sub + ; + +repo_sub : repo_name_sect { OCCUR2(NAME, 0, 1) } + | addr_struc_sub /* 0:1 */ + | note_struc_sub /* 0:M */ + | ident_struc_sub /* 0:1 */ + | change_date_sub /* 0:1 */ + | no_std_sub + ; + +/* REPO.NAME */ +repo_name_sect : OPEN DELIM TAG_NAME mand_line_item + { START(NAME, NULL) } no_std_subs { CHECK0 } CLOSE {} + ; + +/*********************************************************************/ +/**** Source record ****/ +/*********************************************************************/ +source_rec : OPEN DELIM POINTER DELIM TAG_SOUR + { $$ = start_record(REC_SOUR, $1, $3, $5); + START(SOUR, $$) } + sour_subs + { CHECK0 } + CLOSE + { end_record(REC_SOUR, $6); } + ; + +sour_subs : /* empty */ + | sour_subs sour_sub + ; + +sour_sub : sour_data_sect { OCCUR2(DATA, 0, 1) } + | sour_auth_sect { OCCUR2(AUTH, 0, 1) } + | sour_titl_sect { OCCUR2(TITL, 0, 1) } + | sour_abbr_sect { OCCUR2(ABBR, 0, 1) } + | sour_publ_sect { OCCUR2(PUBL, 0, 1) } + | sour_text_sect { OCCUR2(TEXT, 0, 1) } + | source_repos_cit_sub /* 0:1 */ + | multim_link_sub /* 0:M */ + | note_struc_sub /* 0:M */ + | ident_struc_sub /* 0:1 */ + | change_date_sub /* 0:1 */ + | no_std_sub + ; + +/* SOUR.DATA */ +sour_data_sect : OPEN DELIM TAG_DATA + { START(DATA, NULL) } + sour_data_subs + { CHECK0 } + CLOSE { } + ; + +sour_data_subs : /* empty */ + | sour_data_subs sour_data_sub + ; + +sour_data_sub : sour_data_even_sect /* 0:M */ + | sour_data_agnc_sect { OCCUR2(AGNC, 0, 1) } + | note_struc_sub /* 0:M */ + | no_std_sub + ; + +sour_data_even_sect : OPEN DELIM TAG_EVEN mand_line_item + { START(EVEN, NULL) } + sour_data_even_subs + { CHECK0 } + CLOSE { } + ; + +sour_data_even_subs : /* empty */ + | sour_data_even_subs sour_data_even_sub + ; + +sour_data_even_sub : sour_data_even_date_sect { OCCUR2(DATE, 0, 1) } + | sour_data_even_plac_sect { OCCUR2(PLAC, 0, 1) } + | no_std_sub + ; + +sour_data_even_date_sect : OPEN DELIM TAG_DATE mand_line_item + { START(DATE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +sour_data_even_plac_sect : OPEN DELIM TAG_PLAC mand_line_item + { START(PLAC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +sour_data_agnc_sect : OPEN DELIM TAG_AGNC mand_line_item + { START(AGNC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SOUR.AUTH */ +sour_auth_sect : OPEN DELIM TAG_AUTH mand_line_item + { START(AUTH, NULL) } + sour_auth_subs + { CHECK0 } + CLOSE { } + ; + +sour_auth_subs : /* empty */ + | sour_auth_subs sour_auth_sub + ; + +sour_auth_sub : continuation_sub /* 0:M */ + | no_std_sub + ; + +/* SOUR.TITL */ +sour_titl_sect : OPEN DELIM TAG_TITL mand_line_item + { START(TITL, NULL) } + sour_titl_subs + { CHECK0 } + CLOSE { } + ; + +sour_titl_subs : /* empty */ + | sour_titl_subs sour_titl_sub + ; + +sour_titl_sub : continuation_sub /* 0:M */ + | no_std_sub + ; + +/* SOUR.ABBR */ +sour_abbr_sect : OPEN DELIM TAG_ABBR mand_line_item + { START(ABBR, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SOUR.PUBL */ +sour_publ_sect : OPEN DELIM TAG_PUBL mand_line_item + { START(PUBL, NULL) } + sour_publ_subs + { CHECK0 } + CLOSE { } + ; + +sour_publ_subs : /* empty */ + | sour_publ_subs sour_publ_sub + ; + +sour_publ_sub : continuation_sub /* 0:M */ + | no_std_sub + ; + +/* SOUR.TEXT */ +sour_text_sect : OPEN DELIM TAG_TEXT mand_line_item + { START(TEXT, NULL) } + sour_text_subs + { CHECK0 } + CLOSE { } + ; + +sour_text_subs : /* empty */ + | sour_text_subs sour_text_sub + ; + +sour_text_sub : continuation_sub /* 0:M */ + | no_std_sub + ; + +/*********************************************************************/ +/**** Submission record ****/ +/*********************************************************************/ +submis_rec : OPEN DELIM POINTER DELIM TAG_SUBN + { $$ = start_record(REC_SUBN, $1, $3, $5); + START(SUBN, $$) } + subn_subs + { CHECK0 } + CLOSE + { end_record(REC_SUBN, $6); } + ; + +subn_subs : /* empty */ + | subn_subs subn_sub + ; + +subn_sub : subn_subm_sect { OCCUR2(SUBM, 0, 1) } + | subn_famf_sect { OCCUR2(FAMF, 0, 1) } + | subn_temp_sect { OCCUR2(TEMP, 0, 1) } + | subn_ance_sect { OCCUR2(ANCE, 0, 1) } + | subn_desc_sect { OCCUR2(DESC, 0, 1) } + | subn_ordi_sect { OCCUR2(ORDI, 0, 1) } + | subn_rin_sect { OCCUR2(RIN, 0, 1) } + | no_std_sub + ; + +/* SUBN.SUBM */ +subn_subm_sect : OPEN DELIM TAG_SUBM mand_pointer + { START(SUBM, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBN.FAMF */ +subn_famf_sect : OPEN DELIM TAG_FAMF mand_line_item + { START(FAMF, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBN.TEMP */ +subn_temp_sect : OPEN DELIM TAG_TEMP mand_line_item + { START(TEMP, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBN.ANCE */ +subn_ance_sect : OPEN DELIM TAG_ANCE mand_line_item + { START(ANCE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBN.DESC */ +subn_desc_sect : OPEN DELIM TAG_DESC mand_line_item + { START(DESC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBN.ORDI */ +subn_ordi_sect : OPEN DELIM TAG_ORDI mand_line_item + { START(ORDI, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBN.RIN */ +subn_rin_sect : OPEN DELIM TAG_RIN mand_line_item + { START(RIN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/*********************************************************************/ +/**** Submitter record ****/ +/*********************************************************************/ +submit_rec : OPEN DELIM POINTER DELIM TAG_SUBM + { $$ = start_record(REC_SUBM, $1, $3, $5); + START(SUBM, $$) } + subm_subs + { CHECK1(NAME) } + CLOSE + { end_record(REC_SUBM, $6); } + ; + +subm_subs : /* empty */ + | subm_subs subm_sub + ; + +subm_sub : subm_name_sect { OCCUR2(NAME, 0, 1) } + | addr_struc_sub /* 0:1 */ + | multim_link_sub /* 0:M */ + | subm_lang_sect { OCCUR2(LANG, 0, 3) } + | subm_rfn_sect { OCCUR2(RFN, 0, 1) } + | subm_rin_sect { OCCUR2(RIN, 0, 1) } + | change_date_sub /* 0:1 */ + | no_std_sub + ; + +/* SUBM.NAME */ +subm_name_sect : OPEN DELIM TAG_NAME mand_line_item + { START(NAME, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBM.LANG */ +subm_lang_sect : OPEN DELIM TAG_LANG mand_line_item + { START(LANG, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBM.RFN */ +subm_rfn_sect : OPEN DELIM TAG_RFN mand_line_item + { START(RFN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SUBM.RIN */ +subm_rin_sect : OPEN DELIM TAG_RIN mand_line_item + { START(RIN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/*********************************************************************/ +/**** Substructures ****/ +/*********************************************************************/ + +/* ADDRESS STRUCTURE */ +addr_struc_sub : addr_sect { OCCUR2(ADDR, 0, 1) } + | phon_sect { OCCUR2(PHON, 0, 3) } + ; + +addr_sect : OPEN DELIM TAG_ADDR mand_line_item + { START(ADDR, NULL) } + addr_subs + { CHECK0 } + CLOSE { } + ; + +addr_subs : /* empty */ + | addr_subs addr_sub + ; + +addr_sub : addr_cont_sect /* 0:M */ + | addr_adr1_sect { OCCUR2(ADR1, 0, 1) } + | addr_adr2_sect { OCCUR2(ADR2, 0, 1) } + | addr_city_sect { OCCUR2(CITY, 0, 1) } + | addr_stae_sect { OCCUR2(STAE, 0, 1) } + | addr_post_sect { OCCUR2(POST, 0, 1) } + | addr_ctry_sect { OCCUR2(CTRY, 0, 1) } + | no_std_sub + ; + +addr_cont_sect : OPEN DELIM TAG_CONT mand_line_item + { START(CONT, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +addr_adr1_sect : OPEN DELIM TAG_ADR1 mand_line_item + { START(ADR1, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +addr_adr2_sect : OPEN DELIM TAG_ADR2 mand_line_item + { START(ADR2, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +addr_city_sect : OPEN DELIM TAG_CITY mand_line_item + { START(CITY, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +addr_stae_sect : OPEN DELIM TAG_STAE mand_line_item + { START(STAE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +addr_post_sect : OPEN DELIM TAG_POST mand_line_item + { START(POST, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +addr_ctry_sect : OPEN DELIM TAG_CTRY mand_line_item + { START(CTRY, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +phon_sect : OPEN DELIM TAG_PHON mand_line_item + { START(PHON, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* ASSOCIATION STRUCTURE */ +assoc_struc_sub : asso_sect /* 0:M */ + ; + +asso_sect : OPEN DELIM TAG_ASSO mand_pointer + { START(ASSO, NULL) } + asso_subs + { CHECK2(TYPE,RELA) } + CLOSE { } + ; + +asso_subs : /* empty */ + | asso_type_sect { OCCUR2(TYPE, 1, 1) } + | asso_rela_sect { OCCUR2(RELA, 1, 1) } + | note_struc_sub + | source_cit_sub + | no_std_sub + ; + +asso_type_sect : OPEN DELIM TAG_TYPE mand_line_item + { START(TYPE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +asso_rela_sect : OPEN DELIM TAG_RELA mand_line_item + { START(RELA, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* CHANGE DATE */ +change_date_sub : change_date_chan_sect { OCCUR2(CHAN, 0, 1) } + ; + +change_date_chan_sect : OPEN DELIM TAG_CHAN + { START(CHAN, NULL) } + change_date_chan_subs + { CHECK1(DATE) } + CLOSE { } + ; + +change_date_chan_subs : /* empty */ + | change_date_chan_subs change_date_chan_sub + ; + +change_date_chan_sub : change_date_date_sect { OCCUR2(DATE, 1, 1) } + | note_struc_sub + | no_std_sub + ; + +change_date_date_sect : OPEN DELIM TAG_DATE mand_line_item + { START(DATE, NULL) } + change_date_date_subs + { CHECK0 } + CLOSE { } + ; + +change_date_date_subs : /* empty */ + | change_date_date_subs change_date_date_sub + ; + +change_date_date_sub : change_date_date_time_sect { OCCUR2(TIME, 0, 1) } + | no_std_sub + ; + +change_date_date_time_sect : OPEN DELIM TAG_TIME mand_line_item + { START(TIME, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* CHILD TO FAMILY LINK */ +chi_fam_link_sub : famc_sect /* 0:M */ + ; + +famc_sect : OPEN DELIM TAG_FAMC mand_pointer + { START(FAMC, NULL) } + famc_subs + { CHECK0 } + CLOSE { } + ; + +famc_subs : /* empty */ + | famc_subs famc_sub + ; + +famc_sub : famc_pedi_sect /* 0:M */ + | note_struc_sub + | no_std_sub + ; + +famc_pedi_sect : OPEN DELIM TAG_PEDI mand_line_item + { START(PEDI, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* CONTINUATION SUBSECTIONS */ +continuation_sub : cont_sect /* 0:M */ + | conc_sect /* 0:M */ + ; + +cont_sect : OPEN DELIM TAG_CONT mand_line_item + { START(CONT, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +conc_sect : OPEN DELIM TAG_CONC mand_line_item + { START(CONC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* EVENT DETAIL */ +event_detail_sub : event_detail_type_sect { OCCUR2(TYPE, 0, 1) } + | event_detail_date_sect { OCCUR2(DATE, 0, 1) } + | place_struc_sub + | addr_struc_sub + | event_detail_age_sect { OCCUR2(AGE, 0, 1) } + | event_detail_agnc_sect { OCCUR2(AGNC, 0, 1) } + | event_detail_caus_sect { OCCUR2(CAUS, 0, 1) } + | source_cit_sub + | multim_link_sub + | note_struc_sub + ; + +event_detail_type_sect : OPEN DELIM TAG_TYPE mand_line_item + { START(TYPE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +event_detail_date_sect : OPEN DELIM TAG_DATE mand_line_item + { START(DATE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +event_detail_age_sect : OPEN DELIM TAG_AGE mand_line_item + { START(AGE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +event_detail_agnc_sect : OPEN DELIM TAG_AGNC mand_line_item + { START(AGNC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +event_detail_caus_sect : OPEN DELIM TAG_CAUS mand_line_item + { START(CAUS, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* FAMILY EVENT STRUCTURE */ +fam_event_struc_sub : fam_event_sect + | fam_gen_even_sect /* 0:M */ + ; + +fam_event_sect : OPEN DELIM fam_event_tag opt_value fam_event_subs + { CHECK0 } + CLOSE { } + ; + +fam_event_tag : TAG_ANUL { START(ANUL, NULL) } + | TAG_CENS { START(CENS, NULL) } + | TAG_DIV { START(DIV, NULL) } + | TAG_DIVF { START(DIVF, NULL) } + | TAG_ENGA { START(ENGA, NULL) } + | TAG_MARR { START(MARR, NULL) } + | TAG_MARB { START(MARB, NULL) } + | TAG_MARC { START(MARC, NULL) } + | TAG_MARL { START(MARL, NULL) } + | TAG_MARS { START(MARS, NULL) } + ; + +fam_event_subs : /* empty */ + | fam_event_subs fam_event_sub + ; + +fam_event_sub : event_detail_sub + | fam_even_husb_sect { OCCUR2(HUSB, 0, 1) } + | fam_even_wife_sect { OCCUR2(WIFE, 0, 1) } + | no_std_sub + ; + +fam_even_husb_sect : OPEN DELIM TAG_HUSB + { START(HUSB, NULL) } + fam_even_husb_subs + { CHECK1(AGE) } + CLOSE { } + ; + +fam_even_husb_subs : /* empty */ + | fam_even_husb_subs fam_even_husb_sub + ; + +fam_even_husb_sub : fam_even_husb_age_sect { OCCUR2(AGE, 1, 1) } + | no_std_sub + ; + +fam_even_husb_age_sect : OPEN DELIM TAG_AGE mand_line_item + { START(AGE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +fam_even_wife_sect : OPEN DELIM TAG_WIFE + { START(HUSB, NULL) } + fam_even_husb_subs + { CHECK1(AGE) } + CLOSE { } + ; + +fam_gen_even_sect : OPEN DELIM TAG_EVEN + { START(EVEN, NULL) } + fam_gen_even_subs + { CHECK0 } + CLOSE { } + ; + +fam_gen_even_subs : /* empty */ + | fam_gen_even_subs fam_gen_even_sub + ; + +fam_gen_even_sub : event_detail_sub + | fam_even_husb_sect { OCCUR2(HUSB, 0, 1) } + | fam_even_wife_sect { OCCUR2(WIFE, 0, 1) } + | no_std_sub + ; + +/* IDENTIFICATION STRUCTURE */ +ident_struc_sub : ident_refn_sect /* 0:M */ + | ident_rin_sect { OCCUR2(RIN, 0, 1) } + ; + +ident_refn_sect : OPEN DELIM TAG_REFN mand_line_item + { START(REFN, NULL) } + ident_refn_subs + { CHECK0 } + CLOSE { } + ; + +ident_refn_subs : /* empty */ + | ident_refn_subs ident_refn_sub + ; + +ident_refn_sub : ident_refn_type_sect { OCCUR2(TYPE, 0, 1) } + | no_std_sub + ; + +ident_refn_type_sect : OPEN DELIM TAG_TYPE mand_line_item + { START(TYPE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +ident_rin_sect : OPEN DELIM TAG_RIN mand_line_item + { START(RIN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* INDIVIDUAL ATTRIBUTE STRUCTURE */ +indiv_attr_struc_sub : indiv_cast_sect /* 0:M */ + | indiv_dscr_sect /* 0:M */ + | indiv_educ_sect /* 0:M */ + | indiv_idno_sect /* 0:M */ + | indiv_nati_sect /* 0:M */ + | indiv_nchi_sect /* 0:M */ + | indiv_nmr_sect /* 0:M */ + | indiv_occu_sect /* 0:M */ + | indiv_prop_sect /* 0:M */ + | indiv_reli_sect /* 0:M */ + | indiv_resi_sect /* 0:M */ + | indiv_ssn_sect /* 0:M */ + | indiv_titl_sect /* 0:M */ + ; + +indiv_cast_sect : OPEN DELIM TAG_CAST mand_line_item + { START(CAST, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_dscr_sect : OPEN DELIM TAG_DSCR mand_line_item + { START(DSCR, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_educ_sect : OPEN DELIM TAG_EDUC mand_line_item + { START(EDUC, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_idno_sect : OPEN DELIM TAG_IDNO mand_line_item + { START(IDNO, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_nati_sect : OPEN DELIM TAG_NATI mand_line_item + { START(NATI, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_nchi_sect : OPEN DELIM TAG_NCHI mand_line_item + { START(NCHI, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_nmr_sect : OPEN DELIM TAG_NMR mand_line_item + { START(NMR, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_occu_sect : OPEN DELIM TAG_OCCU mand_line_item + { START(OCCU, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_prop_sect : OPEN DELIM TAG_PROP mand_line_item + { START(PROP, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_reli_sect : OPEN DELIM TAG_RELI mand_line_item + { START(RELI, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_resi_sect : OPEN DELIM TAG_RESI + { START(RESI, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_ssn_sect : OPEN DELIM TAG_SSN mand_line_item + { START(SSN, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; +indiv_titl_sect : OPEN DELIM TAG_TITL mand_line_item + { START(TITL, NULL) } + indiv_attr_event_subs + { CHECK0 } + CLOSE { } + ; + +indiv_attr_event_subs : /* empty */ + | indiv_attr_event_subs indiv_attr_event_sub + ; + +indiv_attr_event_sub : event_detail_sub + | no_std_sub + ; + +/* INDIVIDUAL EVENT STRUCTURE */ +indiv_even_struc_sub : indiv_birt_sect + | indiv_gen_sect + | indiv_adop_sect /* 0:M */ + | indiv_even_sect /* 0:M */ + ; + +indiv_birt_sect : OPEN DELIM indiv_birt_tag opt_value indiv_birt_subs + { CHECK0 } + CLOSE { } + ; + +indiv_birt_tag : TAG_BIRT { START(BIRT, NULL) } + | TAG_CHR { START(CHR, NULL) } + ; + +indiv_birt_subs : /* empty */ + | indiv_birt_subs indiv_birt_sub + ; + +indiv_birt_sub : event_detail_sub + | indiv_birt_famc_sect { OCCUR2(FAMC,0, 1) } + | no_std_sub + ; + +indiv_birt_famc_sect : OPEN DELIM TAG_FAMC mand_pointer + { START(FAMC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +indiv_gen_sect : OPEN DELIM indiv_gen_tag opt_value indiv_gen_subs + { CHECK0 } + CLOSE { } + ; + +indiv_gen_tag : TAG_DEAT { START(DEAT, NULL) } + | TAG_BURI { START(BURI, NULL) } + | TAG_CREM { START(CREM, NULL) } + | TAG_BAPM { START(BAPM, NULL) } + | TAG_BARM { START(BARM, NULL) } + | TAG_BASM { START(BASM, NULL) } + | TAG_BLES { START(BLES, NULL) } + | TAG_CHRA { START(CHRA, NULL) } + | TAG_CONF { START(CONF, NULL) } + | TAG_FCOM { START(FCOM, NULL) } + | TAG_ORDN { START(ORDN, NULL) } + | TAG_NATU { START(NATU, NULL) } + | TAG_EMIG { START(EMIG, NULL) } + | TAG_IMMI { START(IMMI, NULL) } + | TAG_CENS { START(CENS, NULL) } + | TAG_PROB { START(PROB, NULL) } + | TAG_WILL { START(WILL, NULL) } + | TAG_GRAD { START(GRAD, NULL) } + | TAG_RETI { START(RETI, NULL) } + ; + +indiv_gen_subs : /* empty */ + | indiv_gen_subs indiv_gen_sub + ; + +indiv_gen_sub : event_detail_sub + | no_std_sub + ; + +indiv_adop_sect : OPEN DELIM TAG_ADOP opt_value + { START(ADOP, NULL) } + indiv_adop_subs + { CHECK0 } + CLOSE { } + ; + +indiv_adop_subs : /* empty */ + | indiv_adop_subs indiv_adop_sub + ; + +indiv_adop_sub : event_detail_sub + | indiv_adop_famc_sect { OCCUR2(FAMC,0, 1) } + | no_std_sub + ; + +indiv_adop_famc_sect : OPEN DELIM TAG_FAMC mand_pointer + { START(FAMC, NULL) } + indiv_adop_famc_subs + { CHECK0 } + CLOSE { } + ; + +indiv_adop_famc_subs : /* empty */ + | indiv_adop_famc_subs indiv_adop_famc_sub + ; + +indiv_adop_famc_sub : indiv_adop_famc_adop_sect { OCCUR2(ADOP,0, 1) } + | no_std_sub + ; + +indiv_adop_famc_adop_sect : OPEN DELIM TAG_ADOP mand_line_item + { START(ADOP, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +indiv_even_sect : OPEN DELIM TAG_EVEN + { START(EVEN, NULL) } + indiv_gen_subs + { CHECK0 } + CLOSE { } + ; + +/* LDS INDIVIDUAL ORDINANCE */ +lds_indiv_ord_sub : lio_bapl_sect /* 0:M */ + | lio_slgc_sect /* 0:M */ + ; + +lio_bapl_sect : OPEN DELIM lio_bapl_tag lio_bapl_subs + { CHECK0 } + CLOSE { } + ; + +lio_bapl_tag : TAG_BAPL { START(BAPL, NULL) } + | TAG_CONL { START(CONL, NULL) } + | TAG_ENDL { START(ENDL, NULL) } + ; + +lio_bapl_subs : /* empty */ + | lio_bapl_subs lio_bapl_sub + ; + +lio_bapl_sub : lio_bapl_stat_sect { OCCUR2(STAT, 0, 1) } + | lio_bapl_date_sect { OCCUR2(DATE, 0, 1) } + | lio_bapl_temp_sect { OCCUR2(TEMP, 0, 1) } + | lio_bapl_plac_sect { OCCUR2(PLAC, 0, 1) } + | source_cit_sub + | note_struc_sub + | no_std_sub + ; + +lio_bapl_stat_sect : OPEN DELIM TAG_STAT mand_line_item + { START(STAT, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +lio_bapl_date_sect : OPEN DELIM TAG_DATE mand_line_item + { START(DATE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +lio_bapl_temp_sect : OPEN DELIM TAG_TEMP mand_line_item + { START(TEMP, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +lio_bapl_plac_sect : OPEN DELIM TAG_PLAC mand_line_item + { START(PLAC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +lio_slgc_sect : OPEN DELIM TAG_SLGC + { START(SLGC, NULL) } + lio_slgc_subs + { CHECK1(FAMC) } + CLOSE { } + ; + +lio_slgc_subs : /* empty */ + | lio_slgc_subs lio_slgc_sub + ; + +lio_slgc_sub : lio_bapl_sub + | lio_slgc_famc_sect { OCCUR2(FAMC, 1, 1) } + ; + +lio_slgc_famc_sect : OPEN DELIM TAG_FAMC mand_pointer + { START(FAMC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* LDS SPOUSE SEALING */ +lds_spouse_seal_sub : lss_slgs_sect + ; + +lss_slgs_sect : OPEN DELIM TAG_SLGS + { START(SLGS, NULL) } + lss_slgs_subs + { CHECK0 } + CLOSE { } + ; + +lss_slgs_subs : /* empty */ + | lss_slgs_subs lss_slgs_sub + ; + +lss_slgs_sub : lss_slgs_stat_sect { OCCUR2(STAT, 0, 1) } + | lss_slgs_date_sect { OCCUR2(DATE, 0, 1) } + | lss_slgs_temp_sect { OCCUR2(TEMP, 0, 1) } + | lss_slgs_plac_sect { OCCUR2(PLAC, 0, 1) } + | source_cit_sub + | note_struc_sub + | no_std_sub + ; + +lss_slgs_stat_sect : OPEN DELIM TAG_STAT mand_line_item + { START(STAT, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +lss_slgs_date_sect : OPEN DELIM TAG_DATE mand_line_item + { START(DATE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +lss_slgs_temp_sect : OPEN DELIM TAG_TEMP mand_line_item + { START(TEMP, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +lss_slgs_plac_sect : OPEN DELIM TAG_PLAC mand_line_item + { START(PLAC, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* MULTIMEDIA LINK */ +multim_link_sub : multim_obje_link_sect + | multim_obje_emb_sect + ; + +multim_obje_link_sect : OPEN DELIM TAG_OBJE DELIM POINTER + { START(OBJE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +multim_obje_emb_sect : OPEN DELIM TAG_OBJE + { START(OBJE, NULL) } + multim_obje_emb_subs + { CHECK2(FORM,FILE) } + CLOSE { } + ; + +multim_obje_emb_subs : /* empty */ + | multim_obje_emb_subs multim_obje_emb_sub + ; + +multim_obje_emb_sub : multim_obje_form_sect { OCCUR2(FORM, 1, 1) } + | multim_obje_titl_sect { OCCUR2(TITL, 0, 1) } + | multim_obje_file_sect { OCCUR2(FILE, 1, 1) } + | note_struc_sub + | no_std_sub + ; + +multim_obje_form_sect : OPEN DELIM TAG_FORM mand_line_item + { START(FORM, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +multim_obje_titl_sect : OPEN DELIM TAG_TITL mand_line_item + { START(TITL, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +multim_obje_file_sect : OPEN DELIM TAG_FILE mand_line_item + { START(FILE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* NOTE STRUCTURE */ +note_struc_sub : note_struc_link_sect /* 0:M */ + | note_struc_emb_sect /* 0:M */ + ; + +note_struc_link_sect : OPEN DELIM TAG_NOTE DELIM POINTER + { START(NOTE, NULL) } + note_struc_link_subs + { CHECK0 } + CLOSE { } + ; + +note_struc_link_subs : /* empty */ + | note_struc_link_subs note_struc_link_sub + ; + +note_struc_link_sub : source_cit_sub + | no_std_sub + ; + +note_struc_emb_sect : OPEN DELIM TAG_NOTE opt_line_item + { START(NOTE, NULL) } + note_struc_emb_subs + { CHECK0 } + CLOSE { } + ; + +note_struc_emb_subs : /* empty */ + | note_struc_emb_subs note_struc_emb_sub + ; + +note_struc_emb_sub : continuation_sub + | source_cit_sub + | no_std_sub + ; + +/* PERSONAL NAME STRUCTURE */ +pers_name_struc_sub : pers_name_sect /* 0:M */ + ; + +pers_name_sect : OPEN DELIM TAG_NAME mand_line_item + { START(NAME, NULL) } + pers_name_subs + { CHECK0 } + CLOSE { } + ; + +pers_name_subs : /* empty */ + | pers_name_subs pers_name_sub + ; + +pers_name_sub : pers_name_npfx_sect { OCCUR2(NPFX, 0, 1) } + | pers_name_givn_sect { OCCUR2(GIVN, 0, 1) } + | pers_name_nick_sect { OCCUR2(NICK, 0, 1) } + | pers_name_spfx_sect { OCCUR2(SPFX, 0, 1) } + | pers_name_surn_sect { OCCUR2(SURN, 0, 1) } + | pers_name_nsfx_sect { OCCUR2(NSFX, 0, 1) } + | source_cit_sub + | note_struc_sub + | no_std_sub + ; + +pers_name_npfx_sect : OPEN DELIM TAG_NPFX mand_line_item + { START(NPFX, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +pers_name_givn_sect : OPEN DELIM TAG_GIVN mand_line_item + { START(GIVN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +pers_name_nick_sect : OPEN DELIM TAG_NICK mand_line_item + { START(NICK, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +pers_name_spfx_sect : OPEN DELIM TAG_SPFX mand_line_item + { START(SPFX, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +pers_name_surn_sect : OPEN DELIM TAG_SURN mand_line_item + { START(SURN, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; +pers_name_nsfx_sect : OPEN DELIM TAG_NSFX mand_line_item + { START(NSFX, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* PLACE STRUCTURE */ +place_struc_sub : place_struc_plac_sect /* 0:M */ + ; + +place_struc_plac_sect : OPEN DELIM TAG_PLAC mand_line_item + { START(PLAC, NULL) } + place_struc_plac_subs + { CHECK0 } + CLOSE { } + ; + +place_struc_plac_subs : /* empty */ + | place_struc_plac_subs place_struc_plac_sub + ; + +place_struc_plac_sub : place_plac_form_sect { OCCUR2(FORM, 0, 1) } + | source_cit_sub + | note_struc_sub + | no_std_sub + ; + +place_plac_form_sect : OPEN DELIM TAG_FORM mand_line_item + { START(FORM, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SOURCE_CITATION */ +source_cit_sub : source_cit_link_sect /* 0:M */ + | source_cit_emb_sect /* 0:M */ + ; + +source_cit_link_sect : OPEN DELIM TAG_SOUR DELIM POINTER + { START(SOUR, NULL) } + source_cit_link_subs + { CHECK0 } + CLOSE { } + ; + +source_cit_link_subs : /* empty */ + | source_cit_link_subs source_cit_link_sub + ; + +source_cit_link_sub : source_cit_page_sect { OCCUR2(PAGE, 0, 1) } + | source_cit_even_sect { OCCUR2(EVEN, 0, 1) } + | source_cit_data_sect { OCCUR2(DATA, 0, 1) } + | source_cit_quay_sect { OCCUR2(QUAY, 0, 1) } + | multim_link_sub + | note_struc_sub + | no_std_sub + ; + +source_cit_page_sect : OPEN DELIM TAG_PAGE mand_line_item + { START(PAGE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +source_cit_even_sect : OPEN DELIM TAG_EVEN mand_line_item + { START(EVEN, NULL) } + source_cit_even_subs + { CHECK0 } + CLOSE { } + ; + +source_cit_even_subs : /* empty */ + | source_cit_even_subs source_cit_even_sub + ; + +source_cit_even_sub : source_cit_even_role_sect { OCCUR2(ROLE, 0, 1) } + | no_std_sub + ; + +source_cit_even_role_sect : OPEN DELIM TAG_ROLE mand_line_item + { START(ROLE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +source_cit_data_sect : OPEN DELIM TAG_DATA + { START(DATA, NULL) } + source_cit_data_subs + { CHECK0 } + CLOSE { } + ; + +source_cit_data_subs : /* empty */ + | source_cit_data_subs source_cit_data_sub + ; + +source_cit_data_sub : source_cit_data_date_sect { OCCUR2(DATE, 0, 1) } + | source_cit_text_sect /* 0:M */ + | no_std_sub + ; + +source_cit_data_date_sect : OPEN DELIM TAG_DATE mand_line_item + { START(DATE, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +source_cit_text_sect : OPEN DELIM TAG_TEXT mand_line_item + { START(TEXT, NULL) } + source_cit_text_subs + { CHECK0 } + CLOSE { } + ; + +source_cit_text_subs : /* empty */ + | source_cit_text_subs source_cit_text_sub + ; + +source_cit_text_sub : continuation_sub + | no_std_sub + ; + +source_cit_quay_sect : OPEN DELIM TAG_QUAY mand_line_item + { START(QUAY, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +source_cit_emb_sect : OPEN DELIM TAG_SOUR mand_line_item + { START(SOUR, NULL) } + source_cit_emb_subs + { CHECK0 } + CLOSE { } + ; + +source_cit_emb_subs : /* empty */ + | source_cit_emb_subs source_cit_emb_sub + ; + +source_cit_emb_sub : continuation_sub + | source_cit_text_sect /* 0:M */ + | note_struc_sub + | no_std_sub + ; + +/* SOURCE REPOSITORY CITATION */ +source_repos_cit_sub : source_repos_repo_sect { OCCUR2(REPO, 0, 1) } + ; + +source_repos_repo_sect : OPEN DELIM TAG_REPO mand_pointer + { START(REPO, NULL) } + source_repos_repo_subs + { CHECK0 } + CLOSE { } + ; + +source_repos_repo_subs : /* empty */ + | source_repos_repo_subs source_repos_repo_sub + ; + +source_repos_repo_sub : note_struc_sub + | caln_sect /* 0:M */ + | no_std_sub + ; + +caln_sect : OPEN DELIM TAG_CALN mand_line_item + { START(CALN, NULL) } + caln_subs + { CHECK0 } + CLOSE { } + ; + +caln_subs : /* empty */ + | caln_subs caln_sub + ; + +caln_sub : caln_medi_sect { OCCUR2(MEDI, 0, 1) } + | no_std_sub + ; + +caln_medi_sect : OPEN DELIM TAG_MEDI mand_line_item + { START(MEDI, NULL) } no_std_subs { CHECK0 } CLOSE { } + ; + +/* SPOUSE TO FAMILY LINK */ +spou_fam_link_sub : spou_fam_fams_sect /* 0:M */ + ; + +spou_fam_fams_sect : OPEN DELIM TAG_FAMS mand_pointer + { START(FAMS, NULL) } + spou_fam_fams_subs + { CHECK0 } + CLOSE { } + ; + +spou_fam_fams_subs : /* empty */ + | spou_fam_fams_subs spou_fam_fams_sub + ; + +spou_fam_fams_sub : note_struc_sub + | no_std_sub + ; + +/*********************************************************************/ +/**** General ****/ +/*********************************************************************/ + +no_std_subs : /* empty */ + | no_std_subs no_std_sub + ; + +no_std_sub : user_sect /* 0:M */ + | gen_sect + | error error_subs CLOSE { HANDLE_ERROR } + ; + +no_std_rec : user_rec /* 0:M */ + | gen_rec + | error error_subs CLOSE { HANDLE_ERROR } + ; + +user_rec : OPEN DELIM opt_xref USERTAG + { if ($4[0] != '_') { + gedcom_error("Undefined tag (and not a valid user tag): %s", + $4); + YYERROR; + } + } + opt_value + { $$ = start_record(REC_USER, $1, $3, $4); + START($4, $$) + } + user_sects + { CHECK0 } + CLOSE + { end_record(REC_USER, $7); } + ; +user_sect : OPEN DELIM opt_xref USERTAG + { if ($4[0] != '_') { + gedcom_error("Undefined tag (and not a valid user tag): %s", + $4); + YYERROR; + } + } + opt_value + { $$ = start_element(ELT_USER, PARENT, $1, $4, $6, $6); + START($4, $$); + } + user_sects + { CHECK0 } + CLOSE + { end_element(ELT_USER, PARENT, $7, NULL); + } + ; + +user_sects : /* empty */ { } + | user_sects user_sect { } + ; + +opt_xref : /* empty */ { $$ = NULL; } + | POINTER DELIM { $$ = $1; } + ; + +opt_value : /* empty */ { $$ = NULL; } + | DELIM line_value { $$ = $2; } + ; + +line_value : POINTER { $$ = $1; } + | line_item { $$ = $1; } + ; + +mand_pointer : /* empty */ { gedcom_error("Missing pointer"); YYERROR; } + | DELIM POINTER { gedcom_debug_print("==Ptr: %s==\n", $2); + $$ = $2; } + ; + +mand_line_item : /* empty */ { gedcom_error("Missing value"); YYERROR; } + | DELIM line_item { gedcom_debug_print("==Val: %s==\n", $2); + $$ = $2; } + ; + +opt_line_item : /* empty */ { } + | DELIM line_item { } + ; + +line_item : anychar { size_t i; + CLEAR_BUFFER(line_item_buf); + line_item_buf_ptr = line_item_buf; + /* The following also takes care of '@@' */ + if (!strncmp($1, "@@", 3)) + *line_item_buf_ptr++ = '@'; + else + for (i=0; i < strlen($1); i++) + *line_item_buf_ptr++ = $1[i]; + $$ = line_item_buf; + } + | ESCAPE { CLEAR_BUFFER(line_item_buf); + line_item_buf_ptr = line_item_buf; + /* For now, ignore escapes */ + $$ = line_item_buf; + } + | line_item anychar + { size_t i; + /* The following also takes care of '@@' */ + if (!strncmp($2, "@@", 3)) + *line_item_buf_ptr++ = '@'; + else + for (i=0; i < strlen($2); i++) + *line_item_buf_ptr++ = $2[i]; + $$ = line_item_buf; + } + | line_item ESCAPE + { /* For now, ignore escapes */ + $$ = line_item_buf; + } + ; + +anychar : ANYCHAR { } + | DELIM { } + ; + +error_subs : /* empty */ + | error_subs error_sect + ; + +error_sect : OPEN DELIM opt_xref anytag opt_value error_subs CLOSE { } + +gen_sect : OPEN DELIM opt_xref anystdtag + { INVALID_TAG($4); } + opt_value opt_sects CLOSE + { } + ; + +gen_rec : gen_rec_top + | gen_rec_norm + ; + +gen_rec_norm : OPEN DELIM opt_xref anystdtag + { INVALID_TOP_TAG($4) } + opt_value opt_sects CLOSE + { } + ; + +gen_rec_top : OPEN DELIM anytoptag + { gedcom_error("Missing cross-reference"); YYERROR; } + opt_value opt_sects CLOSE + { } + ; + +opt_sects : /* empty */ { } + | opt_sects gen_sect { } + ; + +anytag : USERTAG { } + | anystdtag { } + ; + +anytoptag : TAG_FAM + | TAG_INDI + | TAG_OBJE + | TAG_NOTE + | TAG_REPO + | TAG_SOUR + | TAG_SUBN + | TAG_SUBM + ; + +anystdtag : TAG_ABBR + | TAG_ADDR + | TAG_ADR1 + | TAG_ADR2 { } + | TAG_ADOP { } + | TAG_AFN { } + | TAG_AGE { } + | TAG_AGNC { } + | TAG_ALIA { } + | TAG_ANCE { } + | TAG_ANCI { } + | TAG_ANUL { } + | TAG_ASSO { } + | TAG_AUTH { } + | TAG_BAPL { } + | TAG_BAPM { } + | TAG_BARM { } + | TAG_BASM { } + | TAG_BIRT { } + | TAG_BLES { } + | TAG_BLOB { } + | TAG_BURI { } + | TAG_CALN { } + | TAG_CAST { } + | TAG_CAUS { } + | TAG_CENS { } + | TAG_CHAN { } + | TAG_CHAR { } + | TAG_CHIL { } + | TAG_CHR { } + | TAG_CHRA { } + | TAG_CITY { } + | TAG_CONC { } + | TAG_CONF { } + | TAG_CONL { } + | TAG_CONT { } + | TAG_COPR { } + | TAG_CORP { } + | TAG_CREM { } + | TAG_CTRY { } + | TAG_DATA { } + | TAG_DATE { } + | TAG_DEAT { } + | TAG_DESC { } + | TAG_DESI { } + | TAG_DEST { } + | TAG_DIV { } + | TAG_DIVF { } + | TAG_DSCR { } + | TAG_EDUC { } + | TAG_EMIG { } + | TAG_ENDL { } + | TAG_ENGA { } + | TAG_EVEN { } + | TAG_FAM { } + | TAG_FAMC { } + | TAG_FAMS { } + | TAG_FCOM { } + | TAG_FILE { } + | TAG_FORM { } + | TAG_GEDC { } + | TAG_GIVN { } + | TAG_GRAD { } + | TAG_HEAD { } + | TAG_HUSB { } + | TAG_IDNO { } + | TAG_IMMI { } + | TAG_INDI { } + | TAG_LANG { } + | TAG_LEGA { } + | TAG_MARB { } + | TAG_MARC { } + | TAG_MARL { } + | TAG_MARR { } + | TAG_MARS { } + | TAG_MEDI { } + | TAG_NAME { } + | TAG_NATI { } + | TAG_NCHI { } + | TAG_NICK { } + | TAG_NMR { } + | TAG_NOTE { } + | TAG_NPFX { } + | TAG_NSFX { } + | TAG_OBJE { } + | TAG_OCCU { } + | TAG_ORDI { } + | TAG_ORDN { } + | TAG_PAGE { } + | TAG_PEDI { } + | TAG_PHON { } + | TAG_PLAC { } + | TAG_POST { } + | TAG_PROB { } + | TAG_PROP { } + | TAG_PUBL { } + | TAG_QUAY { } + | TAG_REFN { } + | TAG_RELA { } + | TAG_RELI { } + | TAG_REPO { } + | TAG_RESI { } + | TAG_RESN { } + | TAG_RETI { } + | TAG_RFN { } + | TAG_RIN { } + | TAG_ROLE { } + | TAG_SEX { } + | TAG_SLGC { } + | TAG_SLGS { } + | TAG_SOUR { } + | TAG_SPFX { } + | TAG_SSN { } + | TAG_STAE { } + | TAG_STAT { } + | TAG_SUBM { } + | TAG_SUBN { } + | TAG_SURN { } + | TAG_TEMP { } + | TAG_TEXT { } + | TAG_TIME { } + | TAG_TITL { } + | TAG_TRLR { } + | TAG_TYPE { } + | TAG_VERS { } + | TAG_WIFE { } + | TAG_WILL { } + +%% + +/* Functions that handle the counting of subtags */ + +int* count_arrays[MAXGEDCLEVEL+1]; +char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLEN+1]; +Gedcom_ctxt ctxt_stack[MAXGEDCLEVEL+1]; + +void push_countarray() +{ + int *count = NULL; + if (count_level > MAXGEDCLEVEL) { + gedcom_error("Internal error: count array overflow"); + exit(1); + } + else { + count = (int *)calloc(YYNTOKENS, sizeof(int)); + if (count == NULL) { + gedcom_error("Internal error: count array calloc error"); + exit(1); + } + else { + count_arrays[count_level] = count; + } + } +} + +void set_parenttag(char* tag) +{ + strncpy(tag_stack[count_level], tag, MAXSTDTAGLEN+1); +} + +void set_parentctxt(Gedcom_ctxt ctxt) +{ + ctxt_stack[count_level] = ctxt; +} + +char* get_parenttag() +{ + return tag_stack[count_level]; +} + +Gedcom_ctxt get_parentctxt() +{ + return ctxt_stack[count_level]; +} + +int count_tag(int tag) +{ + int *count = count_arrays[count_level]; + return ++count[tag - GEDCOMTAGOFFSET]; +} + +int check_occurrence(int tag) +{ + int *count = count_arrays[count_level]; + return (count[tag - GEDCOMTAGOFFSET] > 0); +} + +void pop_countarray() +{ + int *count; + if (count_level < 0) { + gedcom_error("Internal error: count array underflow"); + exit(1); + } + else { + count = count_arrays[count_level]; + free(count); + count_arrays[count_level] = NULL; + } +} + +/* Enabling debug mode */ +/* level 0: no debugging */ +/* level 1: only internal */ +/* level 2: also bison */ +FILE* trace_output; + +void gedcom_set_debug_level(int level, FILE* f) +{ + if (f != NULL) + trace_output = f; + else + trace_output = stderr; + if (level > 0) { + gedcom_high_level_debug = 1; + } + if (level > 1) { +#if YYDEBUG != 0 + gedcom_debug = 1; +#endif + } +} + +int gedcom_debug_print(char* s, ...) +{ + int res; + if (gedcom_high_level_debug) { + va_list ap; + va_start(ap, s); + res = vfprintf(trace_output, s, ap); + va_end(ap); + } + return(res); +} + +/* Setting the error mechanism */ +void gedcom_set_error_handling(Gedcom_err_mech mechanism) +{ + error_mechanism = mechanism; +} + +/* Compatibility handling */ + +void gedcom_set_compat_handling(int enable_compat) +{ + compat_enabled = enable_compat; +} + +void set_compatibility(char* program) +{ + if (compat_enabled) { + gedcom_debug_print("==== Program: %s\n", program); + if (! strncmp(program, "ftree", 6)) { + gedcom_warning("Enabling compatibility with 'ftree'"); + compatibility = C_FTREE; + } + else { + compatibility = 0; + } + } +} + +int compat_mode(int compat_flags) +{ + return (compat_flags & compatibility); +} + diff --git a/gedcom/gedcom_1byte.lex b/gedcom/gedcom_1byte.lex new file mode 100644 index 0000000..a4a5659 --- /dev/null +++ b/gedcom/gedcom_1byte.lex @@ -0,0 +1,220 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +%{ +#undef IN_LEX /* include only a specific part of the following file */ +#include "gedcom_lex_common.c" + +static size_t encoding_width = 1; +%} + +%s NORMAL +%s EXPECT_TAG + +alpha [A-Za-z_] +digit [0-9] +delim " " +tab [\t] +hash # +literal_at @@ +otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFE] +terminator \x0D|\x0A|\x0D\x0A|\x0A\x0D + +any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at} +any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at} +non_at {alpha}|{digit}|{otherchar}|{delim}|{hash} +alphanum {alpha}|{digit} +gen_delim {delim}|{tab} + +escape @#{any_char}+@ +pointer @{alphanum}{non_at}+@ + +%% + +%{ +#define IN_LEX /* include only a specific part of the following file */ +#include "gedcom_lex_common.c" + +ACTION_BEFORE_REGEXPS + +%} + +{gen_delim}* ACTION_INITIAL_WHITESPACE + +0{digit}+ ACTION_0_DIGITS + +{digit}+ ACTION_DIGITS + +ABBR MKTAGACTION(ABBR) +ADDR MKTAGACTION(ADDR) +ADR1 MKTAGACTION(ADR1) +ADR2 MKTAGACTION(ADR2) +ADOP MKTAGACTION(ADOP) +AFN MKTAGACTION(AFN) +AGE MKTAGACTION(AGE) +AGNC MKTAGACTION(AGNC) +ALIA MKTAGACTION(ALIA) +ANCE MKTAGACTION(ANCE) +ANCI MKTAGACTION(ANCI) +ANUL MKTAGACTION(ANUL) +ASSO MKTAGACTION(ASSO) +AUTH MKTAGACTION(AUTH) +BAPL MKTAGACTION(BAPL) +BAPM MKTAGACTION(BAPM) +BARM MKTAGACTION(BARM) +BASM MKTAGACTION(BASM) +BIRT MKTAGACTION(BIRT) +BLES MKTAGACTION(BLES) +BLOB MKTAGACTION(BLOB) +BURI MKTAGACTION(BURI) +CALN MKTAGACTION(CALN) +CAST MKTAGACTION(CAST) +CAUS MKTAGACTION(CAUS) +CENS MKTAGACTION(CENS) +CHAN MKTAGACTION(CHAN) +CHAR MKTAGACTION(CHAR) +CHIL MKTAGACTION(CHIL) +CHR MKTAGACTION(CHR) +CHRA MKTAGACTION(CHRA) +CITY MKTAGACTION(CITY) +CONC MKTAGACTION(CONC) +CONF MKTAGACTION(CONF) +CONL MKTAGACTION(CONL) +CONT MKTAGACTION(CONT) +COPR MKTAGACTION(COPR) +CORP MKTAGACTION(CORP) +CREM MKTAGACTION(CREM) +CTRY MKTAGACTION(CTRY) +DATA MKTAGACTION(DATA) +DATE MKTAGACTION(DATE) +DEAT MKTAGACTION(DEAT) +DESC MKTAGACTION(DESC) +DESI MKTAGACTION(DESI) +DEST MKTAGACTION(DEST) +DIV MKTAGACTION(DIV) +DIVF MKTAGACTION(DIVF) +DSCR MKTAGACTION(DSCR) +EDUC MKTAGACTION(EDUC) +EMIG MKTAGACTION(EMIG) +ENDL MKTAGACTION(ENDL) +ENGA MKTAGACTION(ENGA) +EVEN MKTAGACTION(EVEN) +FAM MKTAGACTION(FAM) +FAMC MKTAGACTION(FAMC) +FAMF MKTAGACTION(FAMF) +FAMS MKTAGACTION(FAMS) +FCOM MKTAGACTION(FCOM) +FILE MKTAGACTION(FILE) +FORM MKTAGACTION(FORM) +GEDC MKTAGACTION(GEDC) +GIVN MKTAGACTION(GIVN) +GRAD MKTAGACTION(GRAD) +HEAD MKTAGACTION(HEAD) +HUSB MKTAGACTION(HUSB) +IDNO MKTAGACTION(IDNO) +IMMI MKTAGACTION(IMMI) +INDI MKTAGACTION(INDI) +LANG MKTAGACTION(LANG) +LEGA MKTAGACTION(LEGA) +MARB MKTAGACTION(MARB) +MARC MKTAGACTION(MARC) +MARL MKTAGACTION(MARL) +MARR MKTAGACTION(MARR) +MARS MKTAGACTION(MARS) +MEDI MKTAGACTION(MEDI) +NAME MKTAGACTION(NAME) +NATI MKTAGACTION(NATI) +NATU MKTAGACTION(NATU) +NCHI MKTAGACTION(NCHI) +NICK MKTAGACTION(NICK) +NMR MKTAGACTION(NMR) +NOTE MKTAGACTION(NOTE) +NPFX MKTAGACTION(NPFX) +NSFX MKTAGACTION(NSFX) +OBJE MKTAGACTION(OBJE) +OCCU MKTAGACTION(OCCU) +ORDI MKTAGACTION(ORDI) +ORDN MKTAGACTION(ORDN) +PAGE MKTAGACTION(PAGE) +PEDI MKTAGACTION(PEDI) +PHON MKTAGACTION(PHON) +PLAC MKTAGACTION(PLAC) +POST MKTAGACTION(POST) +PROB MKTAGACTION(PROB) +PROP MKTAGACTION(PROP) +PUBL MKTAGACTION(PUBL) +QUAY MKTAGACTION(QUAY) +REFN MKTAGACTION(REFN) +RELA MKTAGACTION(RELA) +RELI MKTAGACTION(RELI) +REPO MKTAGACTION(REPO) +RESI MKTAGACTION(RESI) +RESN MKTAGACTION(RESN) +RETI MKTAGACTION(RETI) +RFN MKTAGACTION(RFN) +RIN MKTAGACTION(RIN) +ROLE MKTAGACTION(ROLE) +SEX MKTAGACTION(SEX) +SLGC MKTAGACTION(SLGC) +SLGS MKTAGACTION(SLGS) +SOUR MKTAGACTION(SOUR) +SPFX MKTAGACTION(SPFX) +SSN MKTAGACTION(SSN) +STAE MKTAGACTION(STAE) +STAT MKTAGACTION(STAT) +SUBM MKTAGACTION(SUBM) +SUBN MKTAGACTION(SUBN) +SURN MKTAGACTION(SURN) +TEMP MKTAGACTION(TEMP) +TEXT MKTAGACTION(TEXT) +TIME MKTAGACTION(TIME) +TITL MKTAGACTION(TITL) +TRLR MKTAGACTION(TRLR) +TYPE MKTAGACTION(TYPE) +VERS MKTAGACTION(VERS) +WIFE MKTAGACTION(WIFE) +WILL MKTAGACTION(WILL) + +{alphanum}+ ACTION_ALPHANUM + +{delim} ACTION_DELIM + +{any_but_delim} ACTION_ANY + +{escape}/{non_at} ACTION_ESCAPE + +{pointer} ACTION_POINTER + +{gen_delim}*{terminator} ACTION_TERMINATOR + +<> ACTION_EOF + +. ACTION_UNEXPECTED + +%% + +int yywrap() +{ + return 1; +} + +#ifdef LEXER_TEST +int gedcom_lex() +{ + return gedcom_1byte_lex(); +} + +int main() +{ + return test_loop(ONE_BYTE, "ASCII"); +} +#endif diff --git a/gedcom/gedcom_hilo.lex b/gedcom/gedcom_hilo.lex new file mode 100644 index 0000000..5c674a7 --- /dev/null +++ b/gedcom/gedcom_hilo.lex @@ -0,0 +1,223 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +/* In high-low order, a space is encoded as 0x00 0x20 */ +/* i.e. this is utf-16-be */ + +%{ +#undef IN_LEX /* include only a specific part of the following file */ +#include "gedcom_lex_common.c" + +static size_t encoding_width = 2; +%} + +%s NORMAL +%s EXPECT_TAG + +alpha \x00[A-Za-z_] +digit \x00[0-9] +delim \x00\x20 +tab \x00[\t] +hash \x00# +literal_at \x00@\x00@ +otherchar \x00[\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]|[\x01-\xFF][\x00-\xFF] +terminator \x00\x0D|\x00\x0A|\x00\x0D\x00\x0A|\x00\x0A\x00\x0D + +any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at} +any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at} +non_at {alpha}|{digit}|{otherchar}|{delim}|{hash} +alphanum {alpha}|{digit} +gen_delim {delim}|{tab} + +escape \x00@\x00#{any_char}+\x00@ +pointer \x00@{alphanum}{non_at}+\x00@ + +%% + +%{ +#define IN_LEX /* include only a specific part of the following file */ +#include "gedcom_lex_common.c" + +ACTION_BEFORE_REGEXPS + +%} + +{gen_delim}* ACTION_INITIAL_WHITESPACE + +\x00[0]{digit}+ ACTION_0_DIGITS + +{digit}+ ACTION_DIGITS + +\x00A\x00B\x00B\x00R MKTAGACTION(ABBR) +\x00A\x00D\x00D\x00R MKTAGACTION(ADDR) +\x00A\x00D\x00R\x001 MKTAGACTION(ADR1) +\x00A\x00D\x00R\x002 MKTAGACTION(ADR2) +\x00A\x00D\x00O\x00P MKTAGACTION(ADOP) +\x00A\x00F\x00N MKTAGACTION(AFN) +\x00A\x00G\x00E MKTAGACTION(AGE) +\x00A\x00G\x00N\x00C MKTAGACTION(AGNC) +\x00A\x00L\x00I\x00A MKTAGACTION(ALIA) +\x00A\x00N\x00C\x00E MKTAGACTION(ANCE) +\x00A\x00N\x00C\x00I MKTAGACTION(ANCI) +\x00A\x00N\x00U\x00L MKTAGACTION(ANUL) +\x00A\x00S\x00S\x00O MKTAGACTION(ASSO) +\x00A\x00U\x00T\x00H MKTAGACTION(AUTH) +\x00B\x00A\x00P\x00L MKTAGACTION(BAPL) +\x00B\x00A\x00P\x00M MKTAGACTION(BAPM) +\x00B\x00A\x00R\x00M MKTAGACTION(BARM) +\x00B\x00A\x00S\x00M MKTAGACTION(BASM) +\x00B\x00I\x00R\x00T MKTAGACTION(BIRT) +\x00B\x00L\x00E\x00S MKTAGACTION(BLES) +\x00B\x00L\x00O\x00B MKTAGACTION(BLOB) +\x00B\x00U\x00R\x00I MKTAGACTION(BURI) +\x00C\x00A\x00L\x00N MKTAGACTION(CALN) +\x00C\x00A\x00S\x00T MKTAGACTION(CAST) +\x00C\x00A\x00U\x00S MKTAGACTION(CAUS) +\x00C\x00E\x00N\x00S MKTAGACTION(CENS) +\x00C\x00H\x00A\x00N MKTAGACTION(CHAN) +\x00C\x00H\x00A\x00R MKTAGACTION(CHAR) +\x00C\x00H\x00I\x00L MKTAGACTION(CHIL) +\x00C\x00H\x00R MKTAGACTION(CHR) +\x00C\x00H\x00R\x00A MKTAGACTION(CHRA) +\x00C\x00I\x00T\x00Y MKTAGACTION(CITY) +\x00C\x00O\x00N\x00C MKTAGACTION(CONC) +\x00C\x00O\x00N\x00F MKTAGACTION(CONF) +\x00C\x00O\x00N\x00L MKTAGACTION(CONL) +\x00C\x00O\x00N\x00T MKTAGACTION(CONT) +\x00C\x00O\x00P\x00R MKTAGACTION(COPR) +\x00C\x00O\x00R\x00P MKTAGACTION(CORP) +\x00C\x00R\x00E\x00M MKTAGACTION(CREM) +\x00C\x00T\x00R\x00Y MKTAGACTION(CTRY) +\x00D\x00A\x00T\x00A MKTAGACTION(DATA) +\x00D\x00A\x00T\x00E MKTAGACTION(DATE) +\x00D\x00E\x00A\x00T MKTAGACTION(DEAT) +\x00D\x00E\x00S\x00C MKTAGACTION(DESC) +\x00D\x00E\x00S\x00I MKTAGACTION(DESI) +\x00D\x00E\x00S\x00T MKTAGACTION(DEST) +\x00D\x00I\x00V MKTAGACTION(DIV) +\x00D\x00I\x00V\x00F MKTAGACTION(DIVF) +\x00D\x00S\x00C\x00R MKTAGACTION(DSCR) +\x00E\x00D\x00U\x00C MKTAGACTION(EDUC) +\x00E\x00M\x00I\x00G MKTAGACTION(EMIG) +\x00E\x00N\x00D\x00L MKTAGACTION(ENDL) +\x00E\x00N\x00G\x00A MKTAGACTION(ENGA) +\x00E\x00V\x00E\x00N MKTAGACTION(EVEN) +\x00F\x00A\x00M MKTAGACTION(FAM) +\x00F\x00A\x00M\x00C MKTAGACTION(FAMC) +\x00F\x00A\x00M\x00F MKTAGACTION(FAMF) +\x00F\x00A\x00M\x00S MKTAGACTION(FAMS) +\x00F\x00C\x00O\x00M MKTAGACTION(FCOM) +\x00F\x00I\x00L\x00E MKTAGACTION(FILE) +\x00F\x00O\x00R\x00M MKTAGACTION(FORM) +\x00G\x00E\x00D\x00C MKTAGACTION(GEDC) +\x00G\x00I\x00V\x00N MKTAGACTION(GIVN) +\x00G\x00R\x00A\x00D MKTAGACTION(GRAD) +\x00H\x00E\x00A\x00D MKTAGACTION(HEAD) +\x00H\x00U\x00S\x00B MKTAGACTION(HUSB) +\x00I\x00D\x00N\x00O MKTAGACTION(IDNO) +\x00I\x00M\x00M\x00I MKTAGACTION(IMMI) +\x00I\x00N\x00D\x00I MKTAGACTION(INDI) +\x00L\x00A\x00N\x00G MKTAGACTION(LANG) +\x00L\x00E\x00G\x00A MKTAGACTION(LEGA) +\x00M\x00A\x00R\x00B MKTAGACTION(MARB) +\x00M\x00A\x00R\x00C MKTAGACTION(MARC) +\x00M\x00A\x00R\x00L MKTAGACTION(MARL) +\x00M\x00A\x00R\x00R MKTAGACTION(MARR) +\x00M\x00A\x00R\x00S MKTAGACTION(MARS) +\x00M\x00E\x00D\x00I MKTAGACTION(MEDI) +\x00N\x00A\x00M\x00E MKTAGACTION(NAME) +\x00N\x00A\x00T\x00I MKTAGACTION(NATI) +\x00N\x00A\x00T\x00U MKTAGACTION(NATU) +\x00N\x00C\x00H\x00I MKTAGACTION(NCHI) +\x00N\x00I\x00C\x00K MKTAGACTION(NICK) +\x00N\x00M\x00R MKTAGACTION(NMR) +\x00N\x00O\x00T\x00E MKTAGACTION(NOTE) +\x00N\x00P\x00F\x00X MKTAGACTION(NPFX) +\x00N\x00S\x00F\x00X MKTAGACTION(NSFX) +\x00O\x00B\x00J\x00E MKTAGACTION(OBJE) +\x00O\x00C\x00C\x00U MKTAGACTION(OCCU) +\x00O\x00R\x00D\x00I MKTAGACTION(ORDI) +\x00O\x00R\x00D\x00N MKTAGACTION(ORDN) +\x00P\x00A\x00G\x00E MKTAGACTION(PAGE) +\x00P\x00E\x00D\x00I MKTAGACTION(PEDI) +\x00P\x00H\x00O\x00N MKTAGACTION(PHON) +\x00P\x00L\x00A\x00C MKTAGACTION(PLAC) +\x00P\x00O\x00S\x00T MKTAGACTION(POST) +\x00P\x00R\x00O\x00B MKTAGACTION(PROB) +\x00P\x00R\x00O\x00P MKTAGACTION(PROP) +\x00P\x00U\x00B\x00L MKTAGACTION(PUBL) +\x00Q\x00U\x00A\x00Y MKTAGACTION(QUAY) +\x00R\x00E\x00F\x00N MKTAGACTION(REFN) +\x00R\x00E\x00L\x00A MKTAGACTION(RELA) +\x00R\x00E\x00L\x00I MKTAGACTION(RELI) +\x00R\x00E\x00P\x00O MKTAGACTION(REPO) +\x00R\x00E\x00S\x00I MKTAGACTION(RESI) +\x00R\x00E\x00S\x00N MKTAGACTION(RESN) +\x00R\x00E\x00T\x00I MKTAGACTION(RETI) +\x00R\x00F\x00N MKTAGACTION(RFN) +\x00R\x00I\x00N MKTAGACTION(RIN) +\x00R\x00O\x00L\x00E MKTAGACTION(ROLE) +\x00S\x00E\x00X MKTAGACTION(SEX) +\x00S\x00L\x00G\x00C MKTAGACTION(SLGC) +\x00S\x00L\x00G\x00S MKTAGACTION(SLGS) +\x00S\x00O\x00U\x00R MKTAGACTION(SOUR) +\x00S\x00P\x00F\x00X MKTAGACTION(SPFX) +\x00S\x00S\x00N MKTAGACTION(SSN) +\x00S\x00T\x00A\x00E MKTAGACTION(STAE) +\x00S\x00T\x00A\x00T MKTAGACTION(STAT) +\x00S\x00U\x00B\x00M MKTAGACTION(SUBM) +\x00S\x00U\x00B\x00N MKTAGACTION(SUBN) +\x00S\x00U\x00R\x00N MKTAGACTION(SURN) +\x00T\x00E\x00M\x00P MKTAGACTION(TEMP) +\x00T\x00E\x00X\x00T MKTAGACTION(TEXT) +\x00T\x00I\x00M\x00E MKTAGACTION(TIME) +\x00T\x00I\x00T\x00L MKTAGACTION(TITL) +\x00T\x00R\x00L\x00R MKTAGACTION(TRLR) +\x00T\x00Y\x00P\x00E MKTAGACTION(TYPE) +\x00V\x00E\x00R\x00S MKTAGACTION(VERS) +\x00W\x00I\x00F\x00E MKTAGACTION(WIFE) +\x00W\x00I\x00L\x00L MKTAGACTION(WILL) + +{alphanum}+ ACTION_ALPHANUM + +{delim} ACTION_DELIM + +{any_but_delim} ACTION_ANY + +{escape}/{non_at} ACTION_ESCAPE + +{pointer} ACTION_POINTER + +{gen_delim}*{terminator} ACTION_TERMINATOR + +<> ACTION_EOF + +. ACTION_UNEXPECTED + +%% + +int yywrap() +{ + return 1; +} + +#ifdef LEXER_TEST +int gedcom_lex() +{ + return gedcom_hilo_lex(); +} + +int main() +{ + return test_loop(TWO_BYTE_HILO, "UNICODE"); +} +#endif diff --git a/gedcom/gedcom_internal.h b/gedcom/gedcom_internal.h new file mode 100644 index 0000000..1250340 --- /dev/null +++ b/gedcom/gedcom_internal.h @@ -0,0 +1,40 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#ifndef __GEDCOM_H +#define __GEDCOM_H +#include +#include +#include +#include +#include +#ifdef DMALLOC +#include +#endif + +#define MAXGEDCLEVEL 99 +#define MAXGEDCLINELEN 255 +#define MAXGEDCTAGLEN 31 +#define MAXSTDTAGLEN 4 +#define MAXGEDCPTRLEN 22 +#define GEDCOMTAGOFFSET 257 + +int gedcom_error(char* s, ...); +int gedcom_warning(char* s, ...); +int gedcom_message(char* s, ...); +int gedcom_debug_print(char* s, ...); + +int gedcom_parse(); +int gedcom_lex(); + +extern int line_no; +#endif /* __GEDCOM_H */ diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c new file mode 100644 index 0000000..5d734c4 --- /dev/null +++ b/gedcom/gedcom_lex_common.c @@ -0,0 +1,284 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#ifndef IN_LEX + +#include "gedcom_internal.h" +#include "multilex.h" +#include "encoding.h" +#include "gedcom.h" +#include "gedcom.tab.h" + +#define YY_NO_UNPUT + +static size_t encoding_width; +static int current_level = -1; +static int level_diff=MAXGEDCLEVEL; +static size_t line_len = 0; + +static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1]; +static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1]; +static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1]; + +#ifdef LEXER_TEST +YYSTYPE gedcom_lval; +int line_no = 1; + +int gedcom_lex(); + +int test_loop(ENCODING enc, char* code) +{ + int tok, res; + init_encodings(); + set_encoding_width(enc); + res = open_conv_to_internal(code); + if (!res) { + gedcom_error("Unable to open conversion context: %s", + strerror(errno)); + return 1; + } + tok = gedcom_lex(); + while (tok) { + switch(tok) { + case BADTOKEN: printf("BADTOKEN "); break; + case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break; + case CLOSE: printf("CLOSE "); break; + case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break; + case DELIM: printf("DELIM "); break; + case ANYCHAR: printf("%s ", gedcom_lval.string); break; + case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break; + case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break; + default: printf("TAG(%s) ", gedcom_lval.string); break; + } + tok = gedcom_lex(); + } + printf("\n"); + close_conv_to_internal(); + return 0; +} + +#endif /* of #ifdef LEXER_TEST */ + +#else /* of #ifndef IN_LEX */ + +#define TO_INTERNAL(STR,OUTBUF) \ + to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF)) + +#define INIT_LINE_LEN \ + line_len = 0; + +#define CHECK_LINE_LEN \ + { if (line_len != (size_t)-1) { \ + line_len += strlen(yytext); \ + if (line_len > MAXGEDCLINELEN * encoding_width) { \ + gedcom_error("Line too long, max %d characters", \ + MAXGEDCLINELEN); \ + line_len = (size_t)-1; \ + return BADTOKEN; \ + } \ + } \ + } + +#define MKTAGACTION(THETAG) \ + { CHECK_LINE_LEN; \ + gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \ + BEGIN(NORMAL); \ + return TAG_##THETAG; \ + } + +/* The GEDCOM level number is converted into a sequence of opening + and closing brackets. Simply put, the following GEDCOM fragment: + + 0 HEAD + 1 SOUR genes + 2 VERS 1.6 + 2 NAME Genes + 1 DATE 07 OCT 2001 + ... + 0 TRLR + + is converted into: + + { HEAD (initial) + { SOUR genes (1 higher: no closing brackets) + { VERS 1.6 (1 higher: no closing brackets) + } { NAME Genes (same level: 1 closing bracket) + } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets) + ... + } { TRLR } + + or more clearly: + + { HEAD + { SOUR genes + { VERS 1.6 } + { NAME Genes } } + { DATE 07 OCT 2001 + ... } + { TRLR } + + But because this means that one token is converted into a series + of tokens, there is some initial code following immediately here + that returns "pending" tokens. */ + +#define ACTION_BEFORE_REGEXPS \ + { if (level_diff < 1) { \ + level_diff++; \ + return CLOSE; \ + } \ + else if (level_diff == 1) { \ + level_diff++; \ + gedcom_lval.number = current_level; \ + return OPEN; \ + } \ + else { \ + /* out of brackets... */ \ + } \ + } + + +#define ACTION_INITIAL_WHITESPACE \ + { CHECK_LINE_LEN; \ + /* ignore initial whitespace further */ \ + } + + +#define ACTION_0_DIGITS \ + { gedcom_error ("Level number with leading zero"); \ + return BADTOKEN; \ + } + + +#define ACTION_DIGITS \ + { int level = atoi(TO_INTERNAL(yytext, str_buf)); \ + CHECK_LINE_LEN; \ + if ((level < 0) || (level > MAXGEDCLEVEL)) { \ + gedcom_error ("Level number out of range [0..%d]", \ + MAXGEDCLEVEL); \ + return BADTOKEN; \ + } \ + level_diff = level - current_level; \ + BEGIN(EXPECT_TAG); \ + current_level = level; \ + if (level_diff < 1) { \ + level_diff++; \ + return CLOSE; \ + } \ + else if (level_diff == 1) { \ + level_diff++; \ + gedcom_lval.number = current_level; \ + return OPEN; \ + } \ + else { \ + /* should never happen (error to GEDCOM spec) */ \ + gedcom_error ("GEDCOM level number is %d higher than " \ + "previous", \ + level_diff); \ + return BADTOKEN; \ + } \ + } + + +#define ACTION_ALPHANUM \ + { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \ + gedcom_error("Tag '%s' too long, max %d characters", \ + yytext, MAXGEDCTAGLEN); \ + return BADTOKEN; \ + } \ + CHECK_LINE_LEN; \ + gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \ + BEGIN(NORMAL); \ + return USERTAG; \ + } + + +#define ACTION_DELIM \ + { CHECK_LINE_LEN; \ + gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \ + return DELIM; \ + } + + +#define ACTION_ANY \ + { CHECK_LINE_LEN; \ + gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \ + /* Due to character conversions, it is possible that the current \ + character will be combined with the next, and so now we don't have a \ + character yet... \ + In principle, this is only applicable to the 1byte case (e.g. ANSEL), \ + but it doesn't harm the unicode case. \ + */ \ + if (strlen(gedcom_lval.string) > 0) \ + return ANYCHAR; \ + } + + +#define ACTION_ESCAPE \ + { CHECK_LINE_LEN; \ + gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \ + return ESCAPE; \ + } + + +#define ACTION_POINTER \ + { CHECK_LINE_LEN; \ + if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \ + gedcom_error("Pointer '%s' too long, max %d characters", \ + yytext, MAXGEDCPTRLEN); \ + return BADTOKEN; \ + } \ + gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \ + return POINTER; \ + } + + +/* Due to the conversion of level numbers into brackets, the + terminator is not important, so no token is returned here. + Although not strictly according to the GEDCOM spec, we'll ignore + whitespace just before the terminator. +*/ + +#define ACTION_TERMINATOR \ + { CHECK_LINE_LEN; \ + INIT_LINE_LEN; \ + line_no++; \ + BEGIN(INITIAL); \ + } + + +/* Eventually we have to return 1 closing bracket (for the trailer). + We can detect whether we have sent the closing bracket using the + level_diff (at eof, first it is 2, then we increment it ourselves) +*/ + +#define ACTION_EOF \ + { if (level_diff == 2) { \ + level_diff++; \ + return CLOSE; \ + } \ + else { \ + /* Reset our state */ \ + current_level = -1; \ + level_diff = MAXGEDCLEVEL; \ + /* ... then terminate lex */ \ + yyterminate(); \ + } \ + } + + +#define ACTION_UNEXPECTED \ + { gedcom_error("Unexpected character: '%s' (0x%02x)", \ + yytext, yytext[0]); \ + return BADTOKEN; \ + } + +#endif /* IN_LEX */ diff --git a/gedcom/gedcom_lohi.lex b/gedcom/gedcom_lohi.lex new file mode 100644 index 0000000..badd835 --- /dev/null +++ b/gedcom/gedcom_lohi.lex @@ -0,0 +1,223 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +/* In low-high order, a space is encoded as 0x20 0x00 */ +/* i.e. this is utf-16-le */ + +%{ +#undef IN_LEX /* include only a specific part of the following file */ +#include "gedcom_lex_common.c" + +static size_t encoding_width = 2; +%} + +%s NORMAL +%s EXPECT_TAG + +alpha [A-Za-z_]\x00 +digit [0-9]\x00 +delim \x20\x00 +tab [\t]\x00 +hash #\x00 +literal_at @\x00@\x00 +otherchar [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]\x00|[\x00-\xFF][\x01-\xFF] +terminator \x0D\x00|\x0A\x00|\x0D\x00\x0A\x00|\x0A\x00\x0D\x00 + +any_char {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at} +any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at} +non_at {alpha}|{digit}|{otherchar}|{delim}|{hash} +alphanum {alpha}|{digit} +gen_delim {delim}|{tab} + +escape @\x00#\x00{any_char}+@\x00 +pointer @\x00{alphanum}{non_at}+@\x00 + +%% + +%{ +#define IN_LEX /* include only a specific part of the following file */ +#include "gedcom_lex_common.c" + +ACTION_BEFORE_REGEXPS + +%} + +{gen_delim}* ACTION_INITIAL_WHITESPACE + +\x00[0]{digit}+ ACTION_0_DIGITS + +{digit}+ ACTION_DIGITS + +A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR) +A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR) +A\x00D\x00R\x001\x00 MKTAGACTION(ADR1) +A\x00D\x00R\x002\x00 MKTAGACTION(ADR2) +A\x00D\x00O\x00P\x00 MKTAGACTION(ADOP) +A\x00F\x00N\x00 MKTAGACTION(AFN) +A\x00G\x00E\x00 MKTAGACTION(AGE) +A\x00G\x00N\x00C\x00 MKTAGACTION(AGNC) +A\x00L\x00I\x00A\x00 MKTAGACTION(ALIA) +A\x00N\x00C\x00E\x00 MKTAGACTION(ANCE) +A\x00N\x00C\x00I\x00 MKTAGACTION(ANCI) +A\x00N\x00U\x00L\x00 MKTAGACTION(ANUL) +A\x00S\x00S\x00O\x00 MKTAGACTION(ASSO) +A\x00U\x00T\x00H\x00 MKTAGACTION(AUTH) +B\x00A\x00P\x00L\x00 MKTAGACTION(BAPL) +B\x00A\x00P\x00M\x00 MKTAGACTION(BAPM) +B\x00A\x00R\x00M\x00 MKTAGACTION(BARM) +B\x00A\x00S\x00M\x00 MKTAGACTION(BASM) +B\x00I\x00R\x00T\x00 MKTAGACTION(BIRT) +B\x00L\x00E\x00S\x00 MKTAGACTION(BLES) +B\x00L\x00O\x00B\x00 MKTAGACTION(BLOB) +B\x00U\x00R\x00I\x00 MKTAGACTION(BURI) +C\x00A\x00L\x00N\x00 MKTAGACTION(CALN) +C\x00A\x00S\x00T\x00 MKTAGACTION(CAST) +C\x00A\x00U\x00S\x00 MKTAGACTION(CAUS) +C\x00E\x00N\x00S\x00 MKTAGACTION(CENS) +C\x00H\x00A\x00N\x00 MKTAGACTION(CHAN) +C\x00H\x00A\x00R\x00 MKTAGACTION(CHAR) +C\x00H\x00I\x00L\x00 MKTAGACTION(CHIL) +C\x00H\x00R\x00 MKTAGACTION(CHR) +C\x00H\x00R\x00A\x00 MKTAGACTION(CHRA) +C\x00I\x00T\x00Y\x00 MKTAGACTION(CITY) +C\x00O\x00N\x00C\x00 MKTAGACTION(CONC) +C\x00O\x00N\x00F\x00 MKTAGACTION(CONF) +C\x00O\x00N\x00L\x00 MKTAGACTION(CONL) +C\x00O\x00N\x00T\x00 MKTAGACTION(CONT) +C\x00O\x00P\x00R\x00 MKTAGACTION(COPR) +C\x00O\x00R\x00P\x00 MKTAGACTION(CORP) +C\x00R\x00E\x00M\x00 MKTAGACTION(CREM) +C\x00T\x00R\x00Y\x00 MKTAGACTION(CTRY) +D\x00A\x00T\x00A\x00 MKTAGACTION(DATA) +D\x00A\x00T\x00E\x00 MKTAGACTION(DATE) +D\x00E\x00A\x00T\x00 MKTAGACTION(DEAT) +D\x00E\x00S\x00C\x00 MKTAGACTION(DESC) +D\x00E\x00S\x00I\x00 MKTAGACTION(DESI) +D\x00E\x00S\x00T\x00 MKTAGACTION(DEST) +D\x00I\x00V\x00 MKTAGACTION(DIV) +D\x00I\x00V\x00F\x00 MKTAGACTION(DIVF) +D\x00S\x00C\x00R\x00 MKTAGACTION(DSCR) +E\x00D\x00U\x00C\x00 MKTAGACTION(EDUC) +E\x00M\x00I\x00G\x00 MKTAGACTION(EMIG) +E\x00N\x00D\x00L\x00 MKTAGACTION(ENDL) +E\x00N\x00G\x00A\x00 MKTAGACTION(ENGA) +E\x00V\x00E\x00N\x00 MKTAGACTION(EVEN) +F\x00A\x00M\x00 MKTAGACTION(FAM) +F\x00A\x00M\x00C\x00 MKTAGACTION(FAMC) +F\x00A\x00M\x00F\x00 MKTAGACTION(FAMF) +F\x00A\x00M\x00S\x00 MKTAGACTION(FAMS) +F\x00C\x00O\x00M\x00 MKTAGACTION(FCOM) +F\x00I\x00L\x00E\x00 MKTAGACTION(FILE) +F\x00O\x00R\x00M\x00 MKTAGACTION(FORM) +G\x00E\x00D\x00C\x00 MKTAGACTION(GEDC) +G\x00I\x00V\x00N\x00 MKTAGACTION(GIVN) +G\x00R\x00A\x00D\x00 MKTAGACTION(GRAD) +H\x00E\x00A\x00D\x00 MKTAGACTION(HEAD) +H\x00U\x00S\x00B\x00 MKTAGACTION(HUSB) +I\x00D\x00N\x00O\x00 MKTAGACTION(IDNO) +I\x00M\x00M\x00I\x00 MKTAGACTION(IMMI) +I\x00N\x00D\x00I\x00 MKTAGACTION(INDI) +L\x00A\x00N\x00G\x00 MKTAGACTION(LANG) +L\x00E\x00G\x00A\x00 MKTAGACTION(LEGA) +M\x00A\x00R\x00B\x00 MKTAGACTION(MARB) +M\x00A\x00R\x00C\x00 MKTAGACTION(MARC) +M\x00A\x00R\x00L\x00 MKTAGACTION(MARL) +M\x00A\x00R\x00R\x00 MKTAGACTION(MARR) +M\x00A\x00R\x00S\x00 MKTAGACTION(MARS) +M\x00E\x00D\x00I\x00 MKTAGACTION(MEDI) +N\x00A\x00M\x00E\x00 MKTAGACTION(NAME) +N\x00A\x00T\x00I\x00 MKTAGACTION(NATI) +N\x00A\x00T\x00U\x00 MKTAGACTION(NATU) +N\x00C\x00H\x00I\x00 MKTAGACTION(NCHI) +N\x00I\x00C\x00K\x00 MKTAGACTION(NICK) +N\x00M\x00R\x00 MKTAGACTION(NMR) +N\x00O\x00T\x00E\x00 MKTAGACTION(NOTE) +N\x00P\x00F\x00X\x00 MKTAGACTION(NPFX) +N\x00S\x00F\x00X\x00 MKTAGACTION(NSFX) +O\x00B\x00J\x00E\x00 MKTAGACTION(OBJE) +O\x00C\x00C\x00U\x00 MKTAGACTION(OCCU) +O\x00R\x00D\x00I\x00 MKTAGACTION(ORDI) +O\x00R\x00D\x00N\x00 MKTAGACTION(ORDN) +P\x00A\x00G\x00E\x00 MKTAGACTION(PAGE) +P\x00E\x00D\x00I\x00 MKTAGACTION(PEDI) +P\x00H\x00O\x00N\x00 MKTAGACTION(PHON) +P\x00L\x00A\x00C\x00 MKTAGACTION(PLAC) +P\x00O\x00S\x00T\x00 MKTAGACTION(POST) +P\x00R\x00O\x00B\x00 MKTAGACTION(PROB) +P\x00R\x00O\x00P\x00 MKTAGACTION(PROP) +P\x00U\x00B\x00L\x00 MKTAGACTION(PUBL) +Q\x00U\x00A\x00Y\x00 MKTAGACTION(QUAY) +R\x00E\x00F\x00N\x00 MKTAGACTION(REFN) +R\x00E\x00L\x00A\x00 MKTAGACTION(RELA) +R\x00E\x00L\x00I\x00 MKTAGACTION(RELI) +R\x00E\x00P\x00O\x00 MKTAGACTION(REPO) +R\x00E\x00S\x00I\x00 MKTAGACTION(RESI) +R\x00E\x00S\x00N\x00 MKTAGACTION(RESN) +R\x00E\x00T\x00I\x00 MKTAGACTION(RETI) +R\x00F\x00N\x00 MKTAGACTION(RFN) +R\x00I\x00N\x00 MKTAGACTION(RIN) +R\x00O\x00L\x00E\x00 MKTAGACTION(ROLE) +S\x00E\x00X\x00 MKTAGACTION(SEX) +S\x00L\x00G\x00C\x00 MKTAGACTION(SLGC) +S\x00L\x00G\x00S\x00 MKTAGACTION(SLGS) +S\x00O\x00U\x00R\x00 MKTAGACTION(SOUR) +S\x00P\x00F\x00X\x00 MKTAGACTION(SPFX) +S\x00S\x00N\x00 MKTAGACTION(SSN) +S\x00T\x00A\x00E\x00 MKTAGACTION(STAE) +S\x00T\x00A\x00T\x00 MKTAGACTION(STAT) +S\x00U\x00B\x00M\x00 MKTAGACTION(SUBM) +S\x00U\x00B\x00N\x00 MKTAGACTION(SUBN) +S\x00U\x00R\x00N\x00 MKTAGACTION(SURN) +T\x00E\x00M\x00P\x00 MKTAGACTION(TEMP) +T\x00E\x00X\x00T\x00 MKTAGACTION(TEXT) +T\x00I\x00M\x00E\x00 MKTAGACTION(TIME) +T\x00I\x00T\x00L\x00 MKTAGACTION(TITL) +T\x00R\x00L\x00R\x00 MKTAGACTION(TRLR) +T\x00Y\x00P\x00E\x00 MKTAGACTION(TYPE) +V\x00E\x00R\x00S\x00 MKTAGACTION(VERS) +W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE) +W\x00I\x00L\x00L\x00 MKTAGACTION(WILL) + +{alphanum}+ ACTION_ALPHANUM + +{delim} ACTION_DELIM + +{any_but_delim} ACTION_ANY + +{escape}/{non_at} ACTION_ESCAPE + +{pointer} ACTION_POINTER + +{gen_delim}*{terminator} ACTION_TERMINATOR + +<> ACTION_EOF + +. ACTION_UNEXPECTED + +%% + +int yywrap() +{ + return 1; +} + +#ifdef LEXER_TEST +int gedcom_lex() +{ + return gedcom_lohi_lex(); +} + +int main() +{ + return test_loop(TWO_BYTE_LOHI, "UNICODE"); +} +#endif diff --git a/gedcom/interface.c b/gedcom/interface.c new file mode 100644 index 0000000..c4db84b --- /dev/null +++ b/gedcom/interface.c @@ -0,0 +1,80 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#include "gedcom_internal.h" +#include "interface.h" + +static Gedcom_rec_start_cb record_start_callback [LAST_REC] = { NULL }; +static Gedcom_rec_end_cb record_end_callback [LAST_REC] = { NULL }; +static Gedcom_elt_start_cb element_start_callback[LAST_ELT] = { NULL }; +static Gedcom_elt_end_cb element_end_callback [LAST_ELT] = { NULL }; +static Gedcom_def_cb default_cb = NULL; + +void gedcom_set_default_callback(Gedcom_def_cb func) +{ + default_cb = func; +} + +void gedcom_subscribe_to_record(Gedcom_rec rec, + Gedcom_rec_start_cb cb_start, + Gedcom_rec_end_cb cb_end) +{ + record_start_callback[rec] = cb_start; + record_end_callback[rec] = cb_end; +} + +void gedcom_subscribe_to_element(Gedcom_elt elt, + Gedcom_elt_start_cb cb_start, + Gedcom_elt_end_cb cb_end) +{ + element_start_callback[elt] = cb_start; + element_end_callback[elt] = cb_end; +} + +Gedcom_ctxt start_record(Gedcom_rec rec, + int level, char *xref, char *tag) +{ + Gedcom_rec_start_cb cb = record_start_callback[rec]; + if (cb != NULL) + return (*cb)(level, xref, tag); + else + return NULL; +} + +void end_record(Gedcom_rec rec, Gedcom_ctxt self) +{ + Gedcom_rec_end_cb cb = record_end_callback[rec]; + if (cb != NULL) + (*cb)(self); +} + +Gedcom_ctxt start_element(Gedcom_elt elt, Gedcom_ctxt parent, + int level, char *tag, char *raw_value, + Gedcom_val parsed_value) +{ + Gedcom_elt_start_cb cb = element_start_callback[elt]; + Gedcom_ctxt ctxt = parent; + if (cb != NULL) + ctxt = (*cb)(parent, level, tag, raw_value, parsed_value); + else if (default_cb != NULL) + (*default_cb)(parent, level, tag, raw_value); + return ctxt; +} + +void end_element(Gedcom_elt elt, Gedcom_ctxt parent, Gedcom_ctxt self, + Gedcom_val parsed_value) +{ + Gedcom_elt_end_cb cb = element_end_callback[elt]; + if (cb != NULL) + (*cb)(parent, self, parsed_value); +} + diff --git a/gedcom/interface.h b/gedcom/interface.h new file mode 100644 index 0000000..db1a048 --- /dev/null +++ b/gedcom/interface.h @@ -0,0 +1,28 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#ifndef __INTERFACE_H +#define __INTERFACE_H + +#include "gedcom.h" + +Gedcom_ctxt start_record(Gedcom_rec rec, int level, char *xref, char *tag); +void end_record(Gedcom_rec rec, Gedcom_ctxt self); + +Gedcom_ctxt start_element(Gedcom_elt elt, Gedcom_ctxt parent, + int level, char *tag, char *raw_value, + Gedcom_val parsed_value); +void end_element(Gedcom_elt elt, Gedcom_ctxt parent, Gedcom_ctxt self, + Gedcom_val parsed_value); + + +#endif /* __INTERFACE_H */ diff --git a/gedcom/message.c b/gedcom/message.c new file mode 100644 index 0000000..6d44530 --- /dev/null +++ b/gedcom/message.c @@ -0,0 +1,124 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#include "gedcom_internal.h" +#include "gedcom.h" + +#define INITIAL_BUF_SIZE 256 +char *mess_buffer = NULL; +size_t bufsize; + +Gedcom_msg_handler msg_handler = NULL; + +void gedcom_set_message_handler(Gedcom_msg_handler func) +{ + msg_handler = func; +} + +void reset_mess_buffer() +{ + if (mess_buffer != NULL) + mess_buffer[0] = '\0'; +} + +void init_mess_buffer() +{ + if (mess_buffer == NULL) { + mess_buffer = (char *)malloc(INITIAL_BUF_SIZE); + mess_buffer[0] = '\0'; + bufsize = INITIAL_BUF_SIZE; + } +} + +int safe_buf_vappend(char *s, va_list ap) +{ + int res; + int len; + init_mess_buffer(); + len = strlen(mess_buffer); + while (1) { + char *buf_ptr = mess_buffer + len; + int rest_size = bufsize - len; + + res = vsnprintf(buf_ptr, rest_size, s, ap); + + if (res > -1 && res < rest_size) { + break; + } + else { + bufsize *= 2; + mess_buffer = realloc(mess_buffer, bufsize); + } + } + return res; +} + +int safe_buf_append(char *s, ...) +{ + int res; + va_list ap; + + va_start(ap, s); + res = safe_buf_vappend(s, ap); + va_end(ap); + + return res; +} + +int gedcom_message(char* s, ...) +{ + int res; + va_list ap; + + va_start(ap, s); + reset_mess_buffer(); + res = safe_buf_vappend(s, ap); + va_end(ap); + safe_buf_append("\n"); + if (msg_handler) + (*msg_handler)(MESSAGE, mess_buffer); + return res; +} + +int gedcom_warning(char* s, ...) +{ + int res; + va_list ap; + + reset_mess_buffer(); + safe_buf_append("Warning on line %d: ", line_no); + va_start(ap, s); + res = safe_buf_vappend(s, ap); + va_end(ap); + safe_buf_append("\n"); + if (msg_handler) + (*msg_handler)(WARNING, mess_buffer); + + return res; +} + +int gedcom_error(char* s, ...) +{ + int res; + va_list ap; + + reset_mess_buffer(); + safe_buf_append("Error on line %d: ", line_no); + va_start(ap, s); + res = safe_buf_vappend(s, ap); + va_end(ap); + safe_buf_append("\n"); + if (msg_handler) + (*msg_handler)(ERROR, mess_buffer); + + return res; +} diff --git a/gedcom/multilex.c b/gedcom/multilex.c new file mode 100644 index 0000000..a6ea0fa --- /dev/null +++ b/gedcom/multilex.c @@ -0,0 +1,118 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#include "gedcom_internal.h" +#include "multilex.h" +#include "encoding.h" + +int line_no; + +typedef int (*lex_func)(void); +lex_func lf; + +int lexer_init(ENCODING enc, FILE* f) +{ + if (enc == ONE_BYTE) { + gedcom_1byte_in = f; + lf = &gedcom_1byte_lex; + set_encoding_width(enc); + return open_conv_to_internal("ASCII"); + } + else if (enc == TWO_BYTE_HILO) { + gedcom_hilo_in = f; + lf = &gedcom_hilo_lex; + set_encoding_width(enc); + return open_conv_to_internal("UNICODE"); + } + else if (enc == TWO_BYTE_LOHI) { + gedcom_lohi_in = f; + lf = &gedcom_lohi_lex; + set_encoding_width(enc); + return open_conv_to_internal("UNICODE"); + } + else { + return 0; + } +} + +void lexer_close() +{ + close_conv_to_internal(); +} + +int gedcom_lex() +{ + return (*lf)(); +} + +int determine_encoding(FILE* f) +{ + char first[2]; + + fread(first, 1, 2, f); + if ((first[0] == '0') && (first[1] == ' ')) { + gedcom_message("One-byte encoding"); + fseek(f, 0, 0); + return ONE_BYTE; + } + else if ((first[0] == '\0') && (first[1] == '0')) + { + gedcom_message("Two-byte encoding, high-low"); + fseek(f, 0, 0); + return TWO_BYTE_HILO; + } + else if ((first[0] == '\xFE') && (first[1] == '\xFF')) + { + gedcom_message("Two-byte encoding, high-low, with BOM"); + return TWO_BYTE_HILO; + } + else if ((first[0] == '0') && (first[1] == '\0')) + { + gedcom_message("Two-byte encoding, low-high"); + fseek(f, 0, 0); + return TWO_BYTE_LOHI; + } + else if ((first[0] == '\xFF') && (first[1] == '\xFE')) + { + gedcom_message("Two-byte encoding, low-high, with BOM"); + return TWO_BYTE_LOHI; + } + else { + gedcom_message("Unknown encoding, falling back to one-byte"); + fseek(f, 0, 0); + return ONE_BYTE; + } +} + +int gedcom_parse_file(char* file_name) +{ + ENCODING enc; + int result = 1; + FILE* file = fopen (file_name, "r"); + line_no = 1; + if (!file) { + gedcom_error("Could not open file '%s'\n", file_name); + return 1; + } + + init_encodings(); + enc = determine_encoding(file); + + if (lexer_init(enc, file)) { + result = gedcom_parse(); + } + lexer_close(); + fclose(file); + + return result; +} + diff --git a/gedcom/multilex.h b/gedcom/multilex.h new file mode 100644 index 0000000..6a16393 --- /dev/null +++ b/gedcom/multilex.h @@ -0,0 +1,25 @@ +/* This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + + (C) 2001 by The Genes Development Team + Original author: Peter Verthez (Peter.Verthez@advalvas.be) +*/ + +/* $Id$ */ +/* $Name$ */ + +#ifndef __MULTILEX_H +#define __MULTILEX_H +#include + +int gedcom_1byte_lex(); +extern FILE *gedcom_1byte_in; + +int gedcom_hilo_lex(); +extern FILE *gedcom_hilo_in; + +int gedcom_lohi_lex(); +extern FILE *gedcom_lohi_in; +#endif /* __MULTILEX_H */ -- 2.30.2