From: Peter Verthez Date: Sat, 7 Dec 2002 14:40:50 +0000 (+0000) Subject: Pass complete concatenated text in end callback. X-Git-Url: https://git.dlugolecki.net.pl/?a=commitdiff_plain;h=505301d49e81bcd0e1827026cd4672b8722ac349;p=gedcom-parse.git Pass complete concatenated text in end callback. --- diff --git a/gedcom/compat.c b/gedcom/compat.c index 3b72548..507de09 100644 --- a/gedcom/compat.c +++ b/gedcom/compat.c @@ -126,7 +126,7 @@ void compat_generate_submitter() end_element(ELT_SUBM_NAME, self1, self2, NULL); /* close "0 SUBM" */ - end_record(REC_SUBM, self1); + end_record(REC_SUBM, self1, NULL); } void compat_generate_gedcom(Gedcom_ctxt parent) diff --git a/gedcom/gedcom.y b/gedcom/gedcom.y index 56f10c2..2f2e0a4 100644 --- a/gedcom/gedcom.y +++ b/gedcom/gedcom.y @@ -149,6 +149,7 @@ #include "age.h" #include "xref.h" #include "compat.h" +#include "buffer.h" int count_level = 0; int fail = 0; @@ -160,6 +161,9 @@ Gedcom_val_struct val2; char line_item_buf[MAXGEDCLINELEN * UTF_FACTOR + 1]; char *line_item_buf_ptr; +void cleanup_concat_buffer(); +struct safe_buffer concat_buffer = { NULL, 0, cleanup_concat_buffer }; + /* These are defined at the bottom of the file */ void push_countarray(int level); void set_parenttag(const char* tag); @@ -456,7 +460,7 @@ head_sect : OPEN DELIM TAG_HEAD CHECK4(SOUR, SUBM, GEDC, CHAR) } CLOSE - { end_record(REC_HEAD, $4); + { end_record(REC_HEAD, $4, NULL); if (compat_mode(C_FTREE | C_LIFELINES)) compat_generate_submitter(); } @@ -870,12 +874,16 @@ head_note_sect : OPEN DELIM TAG_NOTE mand_line_item { $$ = start_element(ELT_HEAD_NOTE, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(NOTE, $1, $$) } head_note_subs { CHECK0 } CLOSE - { end_element(ELT_HEAD_NOTE, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_HEAD_NOTE, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -909,7 +917,7 @@ fam_rec : OPEN DELIM POINTER DELIM TAG_FAM fam_subs { CHECK0 } CLOSE - { end_record(REC_FAM, $6); } + { end_record(REC_FAM, $6, NULL); } ; fam_subs : /* empty */ @@ -1027,7 +1035,7 @@ indiv_rec : OPEN DELIM POINTER DELIM TAG_INDI indi_subs { CHECK0 } CLOSE - { end_record(REC_INDI, $6); } + { end_record(REC_INDI, $6, NULL); } ; indi_subs : /* empty */ @@ -1247,7 +1255,7 @@ multim_rec : OPEN DELIM POINTER DELIM TAG_OBJE obje_subs { CHECK2(FORM, BLOB) } CLOSE - { end_record(REC_OBJE, $6); } + { end_record(REC_OBJE, $6, NULL); } ; obje_subs : /* empty */ @@ -1297,12 +1305,16 @@ obje_blob_sect : OPEN DELIM TAG_BLOB { $$ = start_element(ELT_OBJE_BLOB, PARENT, $1, $3, NULL, GEDCOM_MAKE_NULL(val1)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, ""); START(BLOB, $1, $$) } obje_blob_subs { CHECK1(CONT) } CLOSE - { end_element(ELT_OBJE_BLOB, PARENT, $4, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_OBJE_BLOB, PARENT, $4, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -1318,6 +1330,7 @@ obje_blob_cont_sect : OPEN DELIM TAG_CONT mand_line_item { $$ = start_element(ELT_OBJE_BLOB_CONT, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + safe_buf_append(&concat_buffer, $4); START(CONT, $1, $$) } no_std_subs @@ -1355,11 +1368,15 @@ note_rec : OPEN DELIM POINTER DELIM TAG_NOTE note_line_item $$ = start_record(REC_NOTE, $1, GEDCOM_MAKE_XREF_PTR(val1, xr), $5, $6, GEDCOM_MAKE_STRING(val2, $6)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $6); START(NOTE, $1, $$) } note_subs { CHECK0 } CLOSE - { end_record(REC_NOTE, $7); } + { char* complete = get_buf_string(&concat_buffer); + end_record(REC_NOTE, $7, + GEDCOM_MAKE_STRING(val1, complete)); } ; note_line_item : /* empty */ @@ -1400,7 +1417,7 @@ repos_rec : OPEN DELIM POINTER DELIM TAG_REPO repo_subs { CHECK0 } CLOSE - { end_record(REC_REPO, $6); } + { end_record(REC_REPO, $6, NULL); } ; repo_subs : /* empty */ @@ -1443,7 +1460,7 @@ source_rec : OPEN DELIM POINTER DELIM TAG_SOUR sour_subs { CHECK0 } CLOSE - { end_record(REC_SOUR, $6); } + { end_record(REC_SOUR, $6, NULL); } ; sour_subs : /* empty */ @@ -1561,12 +1578,16 @@ sour_auth_sect : OPEN DELIM TAG_AUTH mand_line_item { $$ = start_element(ELT_SOUR_AUTH, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(AUTH, $1, $$) } sour_auth_subs { CHECK0 } CLOSE - { end_element(ELT_SOUR_AUTH, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SOUR_AUTH, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -1583,12 +1604,16 @@ sour_titl_sect : OPEN DELIM TAG_TITL mand_line_item { $$ = start_element(ELT_SOUR_TITL, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(TITL, $1, $$) } sour_titl_subs { CHECK0 } CLOSE - { end_element(ELT_SOUR_TITL, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SOUR_TITL, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -1619,12 +1644,16 @@ sour_publ_sect : OPEN DELIM TAG_PUBL mand_line_item { $$ = start_element(ELT_SOUR_PUBL, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(PUBL, $1, $$) } sour_publ_subs { CHECK0 } CLOSE - { end_element(ELT_SOUR_PUBL, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SOUR_PUBL, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -1641,12 +1670,16 @@ sour_text_sect : OPEN DELIM TAG_TEXT mand_line_item { $$ = start_element(ELT_SOUR_TEXT, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(TEXT, $1, $$) } sour_text_subs { CHECK0 } CLOSE - { end_element(ELT_SOUR_TEXT, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SOUR_TEXT, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -1672,7 +1705,7 @@ submis_rec : OPEN DELIM POINTER DELIM TAG_SUBN subn_subs { CHECK0 } CLOSE - { end_record(REC_SUBN, $6); } + { end_record(REC_SUBN, $6, NULL); } ; subn_subs : /* empty */ @@ -1804,7 +1837,7 @@ submit_rec : OPEN DELIM POINTER DELIM TAG_SUBM subm_subs { CHECK1(NAME) } CLOSE - { end_record(REC_SUBM, $6); } + { end_record(REC_SUBM, $6, NULL); } ; subm_subs : /* empty */ @@ -1890,12 +1923,16 @@ addr_sect : OPEN DELIM TAG_ADDR mand_line_item { $$ = start_element(ELT_SUB_ADDR, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(ADDR, $1, $$) } addr_subs { CHECK0 } CLOSE - { end_element(ELT_SUB_ADDR, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SUB_ADDR, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -1917,6 +1954,8 @@ addr_cont_sect : OPEN DELIM TAG_CONT mand_line_item { $$ = start_element(ELT_SUB_ADDR_CONT, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + safe_buf_append(&concat_buffer, "\n"); + safe_buf_append(&concat_buffer, $4); START(CONT, $1, $$) } no_std_subs @@ -2179,6 +2218,8 @@ cont_sect : OPEN DELIM TAG_CONT mand_line_item { $$ = start_element(ELT_SUB_CONT, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + safe_buf_append(&concat_buffer, "\n"); + safe_buf_append(&concat_buffer, $4); START(CONT, $1, $$) } no_std_subs @@ -2192,6 +2233,7 @@ conc_sect : OPEN DELIM TAG_CONC mand_line_item { $$ = start_element(ELT_SUB_CONC, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + safe_buf_append(&concat_buffer, $4); START(CONC, $1, $$) } no_std_subs @@ -3000,7 +3042,7 @@ note_struc_link_sect : OPEN DELIM TAG_NOTE DELIM POINTER = start_element(ELT_SUB_NOTE, PARENT, $1, $3, $5, GEDCOM_MAKE_XREF_PTR(val1, xr)); - START(NOTE, $1, $$) + START(NOTE, $1, $$) } note_struc_link_subs { CHECK0 } @@ -3022,12 +3064,17 @@ note_struc_emb_sect : OPEN DELIM TAG_NOTE opt_line_item = start_element(ELT_SUB_NOTE, PARENT, $1, $3, $4, GEDCOM_MAKE_NULL_OR_STRING(val1, $4)); + reset_buffer(&concat_buffer); + if ($4) + safe_buf_append(&concat_buffer, $4); START(NOTE, $1, $$) } note_struc_emb_subs { CHECK0 } CLOSE - { end_element(ELT_SUB_NOTE, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SUB_NOTE, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -3327,13 +3374,16 @@ source_cit_text_sect : OPEN DELIM TAG_TEXT mand_line_item = start_element(ELT_SUB_SOUR_TEXT, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); - START(TEXT, $1, $$) + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); + START(TEXT, $1, $$) } source_cit_text_subs { CHECK0 } CLOSE - { end_element(ELT_SUB_SOUR_TEXT, PARENT, $5, - NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SUB_SOUR_TEXT, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -3364,12 +3414,16 @@ source_cit_emb_sect : OPEN DELIM TAG_SOUR mand_line_item { $$ = start_element(ELT_SUB_SOUR, PARENT, $1, $3, $4, GEDCOM_MAKE_STRING(val1, $4)); + reset_buffer(&concat_buffer); + safe_buf_append(&concat_buffer, $4); START(SOUR, $1, $$) } source_cit_emb_subs { CHECK0 } CLOSE - { end_element(ELT_SUB_SOUR, PARENT, $5, NULL); + { char* complete = get_buf_string(&concat_buffer); + end_element(ELT_SUB_SOUR, PARENT, $5, + GEDCOM_MAKE_STRING(val1, complete)); } ; @@ -3518,7 +3572,7 @@ user_rec : OPEN DELIM opt_xref USERTAG user_sects { CHECK0 } CLOSE - { end_record(REC_USER, $7); } + { end_record(REC_USER, $7, NULL); } ; user_sect : OPEN DELIM opt_xref USERTAG { if ($4.string[0] != '_') { @@ -3889,6 +3943,11 @@ void clean_up() } } +void cleanup_concat_buffer() +{ + cleanup_buffer(&concat_buffer); +} + /* Enabling debug mode */ /* level 0: no debugging */ /* level 1: only internal */ diff --git a/gedcom/interface.c b/gedcom/interface.c index d22f8e6..ac79bed 100644 --- a/gedcom/interface.c +++ b/gedcom/interface.c @@ -73,11 +73,11 @@ Gedcom_ctxt start_record(Gedcom_rec rec, return NULL; } -void end_record(Gedcom_rec rec, Gedcom_ctxt self) +void end_record(Gedcom_rec rec, Gedcom_ctxt self, Gedcom_val parsed_value) { Gedcom_rec_end_cb cb = record_end_callback[rec]; if (cb != NULL) - (*cb)(rec, self); + (*cb)(rec, self, parsed_value); } Gedcom_ctxt start_element(Gedcom_elt elt, Gedcom_ctxt parent, diff --git a/gedcom/interface.h b/gedcom/interface.h index 3d4422f..d766d94 100644 --- a/gedcom/interface.h +++ b/gedcom/interface.h @@ -30,7 +30,8 @@ Gedcom_ctxt start_record(Gedcom_rec rec, int level, Gedcom_val xref, struct tag_struct tag, char *raw_value, Gedcom_val parsed_value); -void end_record(Gedcom_rec rec, Gedcom_ctxt self); +void end_record(Gedcom_rec rec, Gedcom_ctxt self, + Gedcom_val parsed_value); Gedcom_ctxt start_element(Gedcom_elt elt, Gedcom_ctxt parent, int level, struct tag_struct tag, char *raw_value, diff --git a/gom/gom.c b/gom/gom.c index ed9d5e5..2a8aa20 100644 --- a/gom/gom.c +++ b/gom/gom.c @@ -185,7 +185,8 @@ void gom_default_callback (Gedcom_elt elt UNUSED, Gedcom_ctxt parent UNUSED, level, tag, raw_value); } -void def_rec_end(Gedcom_rec rec UNUSED, Gedcom_ctxt self) +void def_rec_end(Gedcom_rec rec UNUSED, Gedcom_ctxt self, + Gedcom_val parsed_value UNUSED) { Gom_ctxt ctxt = (Gom_ctxt)self; destroy_gom_ctxt(ctxt); diff --git a/gom/gom_internal.h b/gom/gom_internal.h index 2038c26..829b13d 100644 --- a/gom/gom_internal.h +++ b/gom/gom_internal.h @@ -100,7 +100,7 @@ void gom_mem_error(const char *filename, int line); #define MEMORY_ERROR gom_mem_error(__FILE__, __LINE__) -void def_rec_end(Gedcom_rec rec, Gedcom_ctxt self); +void def_rec_end(Gedcom_rec rec, Gedcom_ctxt self, Gedcom_val parsed_value); void def_elt_end(Gedcom_elt elt, Gedcom_ctxt parent, Gedcom_ctxt self, Gedcom_val parsed_value); void set_xref_type(struct xref_value *xr, const char* str); diff --git a/include/gedcom.h.in b/include/gedcom.h.in index c2c0bf1..424c516 100644 --- a/include/gedcom.h.in +++ b/include/gedcom.h.in @@ -480,7 +480,7 @@ typedef Gedcom_ctxt char *raw_value, int tag_value, Gedcom_val parsed_value); typedef void (*Gedcom_rec_end_cb) - (Gedcom_rec rec, Gedcom_ctxt self); + (Gedcom_rec rec, Gedcom_ctxt self, Gedcom_val parsed_value); typedef Gedcom_ctxt (*Gedcom_elt_start_cb) diff --git a/t/output/allged.ref b/t/output/allged.ref index 8966e32..03423ec 100644 --- a/t/output/allged.ref +++ b/t/output/allged.ref @@ -35,7 +35,7 @@ Source context 1001 in parent 1 == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) LINEAGE-LINKED (ctxt is 1, conversion failures: 0) == 1 LANG (334) language (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) A general note about this file: (ctxt is 1, conversion failures: 0) +Note: A general note about this file: (ctxt is 1, parent is 1) == 2 CONT (300) It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1: (ctxt is 1, conversion failures: 0) == 2 CONT (300) His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given), (ctxt is 1, conversion failures: 0) == 2 CONT (300) adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7). (ctxt is 1, conversion failures: 0) @@ -52,6 +52,23 @@ Source context 1001 in parent 1 == 2 CONT (300) This @ (commercial at) character may only appear ONCE! (ctxt is 1, conversion failures: 0) == 2 CONT (300) Note continued here. The word TE (ctxt is 1, conversion failures: 0) == 2 CONC (297) ST should not be broken! (ctxt is 1, conversion failures: 0) +Complete note: +A general note about this file: +It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1: +His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given), +adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7). +In PERSON1, FAMILY1, SUBMITTER, SUBMISSION and SOURCE1 as many datafields as possible are used. +All other individuals/families contain no data. Note, that many data tags can appear more than once +(in this transmission this is demonstrated with tags: NAME, OCCU, PLACE and NOTE. Seek the word 'another'. +The data transmitted here do not make sence. Just the HEAD.DATE tag contains the date of the creation +of this file and will change in future Versions! +This file is created by H. Eichmann: h.eichmann@gmx.de. Feel free to copy and use it for any +non-commercial purpose. For the creation the GEDCOM standard Release 5.5 (2 JAN 1996) has been used. +Copyright: The church of Jesus Christ of latter-day saints, gedcom@gedcom.org +Download it (the GEDCOM 5.5 specs) from: ftp.gedcom.com/pub/genealogy/gedcom. +Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit). +This @ (commercial at) character may only appear ONCE! +Note continued here. The word TEST should not be broken! == 1 _MYOWNTAG (264) This is a non-standard tag. Not recommended but allowed (ctxt is 1, conversion failures: 0) Header end, context is 1 Submitter, xref is @SUBMITTER@ @@ -2307,7 +2324,7 @@ Source context 1001 in parent 1 == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) LINEAGE-LINKED (ctxt is 1, conversion failures: 0) == 1 LANG (334) language (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) A general note about this file: (ctxt is 1, conversion failures: 0) +Note: A general note about this file: (ctxt is 1, parent is 1) == 2 CONT (300) It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1: (ctxt is 1, conversion failures: 0) == 2 CONT (300) His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given), (ctxt is 1, conversion failures: 0) == 2 CONT (300) adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7). (ctxt is 1, conversion failures: 0) @@ -2324,6 +2341,23 @@ Source context 1001 in parent 1 == 2 CONT (300) This @ (commercial at) character may only appear ONCE! (ctxt is 1, conversion failures: 0) == 2 CONT (300) Note continued here. The word TE (ctxt is 1, conversion failures: 0) == 2 CONC (297) ST should not be broken! (ctxt is 1, conversion failures: 0) +Complete note: +A general note about this file: +It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1: +His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given), +adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7). +In PERSON1, FAMILY1, SUBMITTER, SUBMISSION and SOURCE1 as many datafields as possible are used. +All other individuals/families contain no data. Note, that many data tags can appear more than once +(in this transmission this is demonstrated with tags: NAME, OCCU, PLACE and NOTE. Seek the word 'another'. +The data transmitted here do not make sence. Just the HEAD.DATE tag contains the date of the creation +of this file and will change in future Versions! +This file is created by H. Eichmann: h.eichmann@gmx.de. Feel free to copy and use it for any +non-commercial purpose. For the creation the GEDCOM standard Release 5.5 (2 JAN 1996) has been used. +Copyright: The church of Jesus Christ of latter-day saints, gedcom@gedcom.org +Download it (the GEDCOM 5.5 specs) from: ftp.gedcom.com/pub/genealogy/gedcom. +Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit). +This @ (commercial at) character may only appear ONCE! +Note continued here. The word TEST should not be broken! == 1 _MYOWNTAG (264) This is a non-standard tag. Not recommended but allowed (ctxt is 1, conversion failures: 0) Header end, context is 1 Submitter, xref is @SUBMITTER@ diff --git a/t/output/ansel.ref b/t/output/ansel.ref index e9ed53d..8add325 100644 --- a/t/output/ansel.ref +++ b/t/output/ansel.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) This GEDCOM transmission contains a charcter set test. It consists (ctxt is 1, conversion failures: 0) +Note: This GEDCOM transmission contains a charcter set test. It consists (ctxt is 1, parent is 1) == 2 CONT (300) of a single family (two parents, many children). The parents are empty (ctxt is 1, conversion failures: 0) == 2 CONT (300) in the ANSEL version of the transmission. The children contain the (ctxt is 1, conversion failures: 0) == 2 CONT (300) combined letters and the special charcters (value > 128). (ctxt is 1, conversion failures: 0) @@ -35,6 +35,35 @@ Source context 1001 in parent 1 == 2 CONT (300) (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) (ctxt is 1, conversion failures: 0) == 2 CONT (300) and on an updated ANSEL description in: (ctxt is 1, conversion failures: 0) == 2 CONT (300) http://www.gendex.com/gedcom55/55gcappd.htm (ctxt is 1, conversion failures: 0) +Complete note: +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are empty +in the ANSEL version of the transmission. The children contain the +combined letters and the special charcters (value > 128). +The NAME tag of each 'person' is the name of the characters tested +within the person. The BIRT.PLAC and DEAT.PLAC tags contain the +test-strings. +The first children contain special characters. Here the test string +is 'character name (test character), ...' where 'character name' +is the name of the character (like 'british pound') and +'test character' is a single byte representing this character +in ANSEL. +The last children contain combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +in ANSEL and UNICODE. The test strings contain the whole latin +alphabet combined with this non-spacing character: captial letters +in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag. +Example: One 'person' is named 'circle above'. The BIRT.PLAC +tag contains all 26 capital letters with a small ring on top. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +Many of the combined characters generated here do not even have +a UNICDOE code point! +This file based mainly on the GEDCOM 5.5 specification +(see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) +and on an updated ANSEL description in: +http://www.gendex.com/gedcom55/55gcappd.htm == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -328,7 +357,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) This GEDCOM transmission contains a charcter set test. It consists (ctxt is 1, conversion failures: 0) +Note: This GEDCOM transmission contains a charcter set test. It consists (ctxt is 1, parent is 1) == 2 CONT (300) of a single family (two parents, many children). The parents are empty (ctxt is 1, conversion failures: 0) == 2 CONT (300) in the ANSEL version of the transmission. The children contain the (ctxt is 1, conversion failures: 0) == 2 CONT (300) combined letters and the special charcters (value > 128). (ctxt is 1, conversion failures: 0) @@ -356,6 +385,35 @@ Source context 1001 in parent 1 == 2 CONT (300) (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) (ctxt is 1, conversion failures: 0) == 2 CONT (300) and on an updated ANSEL description in: (ctxt is 1, conversion failures: 0) == 2 CONT (300) http://www.gendex.com/gedcom55/55gcappd.htm (ctxt is 1, conversion failures: 0) +Complete note: +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are empty +in the ANSEL version of the transmission. The children contain the +combined letters and the special charcters (value > 128). +The NAME tag of each 'person' is the name of the characters tested +within the person. The BIRT.PLAC and DEAT.PLAC tags contain the +test-strings. +The first children contain special characters. Here the test string +is 'character name (test character), ...' where 'character name' +is the name of the character (like 'british pound') and +'test character' is a single byte representing this character +in ANSEL. +The last children contain combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +in ANSEL and UNICODE. The test strings contain the whole latin +alphabet combined with this non-spacing character: captial letters +in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag. +Example: One 'person' is named 'circle above'. The BIRT.PLAC +tag contains all 26 capital letters with a small ring on top. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +Many of the combined characters generated here do not even have +a UNICDOE code point! +This file based mainly on the GEDCOM 5.5 specification +(see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) +and on an updated ANSEL description in: +http://www.gendex.com/gedcom55/55gcappd.htm == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/uhlbomcl.ref b/t/output/uhlbomcl.ref index de6595c..5684679 100644 --- a/t/output/uhlbomcl.ref +++ b/t/output/uhlbomcl.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Hi-Lo order (non-Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Hi-Lo order (non-Intel) +The transmission does start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Hi-Lo order (non-Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Hi-Lo order (non-Intel) +The transmission does start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/uhlcl.ref b/t/output/uhlcl.ref index dea1eca..5b081c8 100644 --- a/t/output/uhlcl.ref +++ b/t/output/uhlcl.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Hi-Lo order (non-Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Hi-Lo order (non-Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Hi-Lo order (non-Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Hi-Lo order (non-Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/ulhbomcl.ref b/t/output/ulhbomcl.ref index 19e8f4b..1e6daa3 100644 --- a/t/output/ulhbomcl.ref +++ b/t/output/ulhbomcl.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/ulhc.ref b/t/output/ulhc.ref index a451cb4..d3f16c0 100644 --- a/t/output/ulhc.ref +++ b/t/output/ulhc.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using carriage return. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using carriage return. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/ulhcl.ref b/t/output/ulhcl.ref index c60ae26..bf93ff5 100644 --- a/t/output/ulhcl.ref +++ b/t/output/ulhcl.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using carriage return + line feed. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using carriage return + line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/ulhl.ref b/t/output/ulhl.ref index a1a0379..8a91387 100644 --- a/t/output/ulhl.ref +++ b/t/output/ulhl.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using line feed. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using line feed. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using line feed. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/ulhlc.ref b/t/output/ulhlc.ref index 29a8743..f87e90b 100644 --- a/t/output/ulhlc.ref +++ b/t/output/ulhlc.ref @@ -7,7 +7,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using line feed + carriage return. (ctxt is 1, conversion failures: 0) @@ -41,6 +41,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using line feed + carriage return. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 @@ -342,7 +377,7 @@ Source context 1001 in parent 1 == 1 GEDC (326) (null) (ctxt is 1, conversion failures: 0) == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) Lineage-Linked (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) UNICODE transmission test. (ctxt is 1, conversion failures: 0) +Note: UNICODE transmission test. (ctxt is 1, parent is 1) == 2 CONT (300) Each UNICODE character is stored in Lo-Hi order (Intel) (ctxt is 1, conversion failures: 0) == 2 CONT (300) The transmission does NOT start with a byte order mark (BOM) (ctxt is 1, conversion failures: 0) == 2 CONT (300) Each line is terminated using line feed + carriage return. (ctxt is 1, conversion failures: 0) @@ -376,6 +411,41 @@ Source context 1001 in parent 1 == 2 CONT (300) www.unicode.org delivered the connection from the code point names (ctxt is 1, conversion failures: 0) == 2 CONT (300) to the actual values. Note, that much more UNICODE characters are (ctxt is 1, conversion failures: 0) == 2 CONT (300) possible (like the chinese alphabet). (ctxt is 1, conversion failures: 0) +Complete note: +UNICODE transmission test. +Each UNICODE character is stored in Lo-Hi order (Intel) +The transmission does NOT start with a byte order mark (BOM) +Each line is terminated using line feed + carriage return. +This GEDCOM transmission contains a charcter set test. It consists +of a single family (two parents, many children). The parents are used +to test the cyrillic and greek letters. In both 'persons' the +BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +small letters of alphabet. +The children contain some combined letters and special charcters. +The NAME tag of each 'person' is the name of the characters tested +within the person. +The first children contain some special characters. Here the strings +given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +where 'character name'is the name of the character (like 'british pound') +and 'test character' is a single byte representing this character +in ANSEL. +The last children contain some combined characters. The name tag gives +the name of the non-spacing character tested within the 'person'. +Within the name the hex-values of the non-spacing character is given +UNICODE. The DEAT.PLAC tag contains all latin characters which are +combined with the non-spacing character tested here and which have +a UNICODE code point. The BIRT.PLAC tag contain the same letters +without the non-spacing part. +Example: One 'person' is named 'ring above'. The BIRT.PLAC +tag contains all latin letters which have a UNICODE code point if +combined with a ring above. The DEAT.PLAC tag contain the same +charcters combined with this ring. +Note: Not all charcters can be displayed on all computers. +This strongly depends on the installed fonts and codepages. +This file based on the following source: +www.unicode.org delivered the connection from the code point names +to the actual values. Note, that much more UNICODE characters are +possible (like the chinese alphabet). == 1 SUBM (382) @SUBMITTER@ (ctxt is 1, conversion failures: 0) == 1 DATE (306) 20 JAN 1998 (ctxt is 1, conversion failures: 0) Header end, context is 1 diff --git a/t/output/unknown_ignore_errors.ref b/t/output/unknown_ignore_errors.ref index 9f29819..a85bcc8 100644 --- a/t/output/unknown_ignore_errors.ref +++ b/t/output/unknown_ignore_errors.ref @@ -36,7 +36,7 @@ Source context 1001 in parent 1 == 2 VERS (391) 5.5 (ctxt is 1, conversion failures: 0) == 2 FORM (325) LINEAGE-LINKED (ctxt is 1, conversion failures: 0) == 1 LANG (334) language (ctxt is 1, conversion failures: 0) -== 1 NOTE (348) A general note about this file: (ctxt is 1, conversion failures: 0) +Note: A general note about this file: (ctxt is 1, parent is 1) == 2 CONT (300) It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1: (ctxt is 1, conversion failures: 0) == 2 CONT (300) His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given), (ctxt is 1, conversion failures: 0) == 2 CONT (300) adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7). (ctxt is 1, conversion failures: 0) @@ -53,6 +53,23 @@ Source context 1001 in parent 1 == 2 CONT (300) This @ (commercial at) character may only appear ONCE! (ctxt is 1, conversion failures: 0) == 2 CONT (300) Note continued here. The word TE (ctxt is 1, conversion failures: 0) == 2 CONC (297) ST should not be broken! (ctxt is 1, conversion failures: 0) +Complete note: +A general note about this file: +It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1: +His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given), +adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7). +In PERSON1, FAMILY1, SUBMITTER, SUBMISSION and SOURCE1 as many datafields as possible are used. +All other individuals/families contain no data. Note, that many data tags can appear more than once +(in this transmission this is demonstrated with tags: NAME, OCCU, PLACE and NOTE. Seek the word 'another'. +The data transmitted here do not make sence. Just the HEAD.DATE tag contains the date of the creation +of this file and will change in future Versions! +This file is created by H. Eichmann: h.eichmann@gmx.de. Feel free to copy and use it for any +non-commercial purpose. For the creation the GEDCOM standard Release 5.5 (2 JAN 1996) has been used. +Copyright: The church of Jesus Christ of latter-day saints, gedcom@gedcom.org +Download it (the GEDCOM 5.5 specs) from: ftp.gedcom.com/pub/genealogy/gedcom. +Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit). +This @ (commercial at) character may only appear ONCE! +Note continued here. The word TEST should not be broken! == 1 _MYOWNTAG (264) This is a non-standard tag. Not recommended but allowed (ctxt is 1, conversion failures: 0) Header end, context is 1 Submitter, xref is @SUBMITTER@ diff --git a/t/src/standalone.c b/t/src/standalone.c index 7868cf2..9c22284 100644 --- a/t/src/standalone.c +++ b/t/src/standalone.c @@ -62,7 +62,7 @@ Gedcom_ctxt header_start(Gedcom_rec rec, int level, Gedcom_val xref, char *tag, return (Gedcom_ctxt)1; } -void header_end(Gedcom_rec rec, Gedcom_ctxt self) +void header_end(Gedcom_rec rec, Gedcom_ctxt self, Gedcom_val parsed_value) { output(1, "Header end, context is %ld\n", void_ptr_to_int(self)); } @@ -102,7 +102,7 @@ Gedcom_ctxt note_start(Gedcom_rec rec, int level, Gedcom_val xref, char *tag, return (Gedcom_ctxt)int_to_void_ptr(tag_value); } -void family_end(Gedcom_rec rec, Gedcom_ctxt self) +void family_end(Gedcom_rec rec, Gedcom_ctxt self, Gedcom_val parsed_value) { output(1, "Family end, xref is %s\n", family_xreftags[void_ptr_to_int(self)]); @@ -134,6 +134,23 @@ void source_end(Gedcom_elt elt, Gedcom_ctxt parent, Gedcom_ctxt self, void_ptr_to_int(self), void_ptr_to_int(parent)); } +Gedcom_ctxt head_note_start(Gedcom_elt elt, Gedcom_ctxt parent, int level, + char *tag, char* raw_value, + int tag_value, Gedcom_val parsed_value) +{ + Gedcom_ctxt self = (Gedcom_ctxt)(void_ptr_to_int(parent)); + output(1, "Note: %s (ctxt is %ld, parent is %ld)\n", + GEDCOM_STRING(parsed_value), void_ptr_to_int(self), + void_ptr_to_int(parent)); + return self; +} + +void head_note_end(Gedcom_elt elt, Gedcom_ctxt parent, Gedcom_ctxt self, + Gedcom_val parsed_value) +{ + output(1, "Complete note:\n%s\n", GEDCOM_STRING(parsed_value)); +} + Gedcom_ctxt date_start(Gedcom_elt elt, Gedcom_ctxt parent, int level, char *tag, char* raw_value, int tag_value, Gedcom_val parsed_value) @@ -207,6 +224,7 @@ void subscribe_callbacks() gedcom_subscribe_to_record(REC_SUBM, submit_start, NULL); gedcom_subscribe_to_record(REC_USER, rec_start, NULL); gedcom_subscribe_to_element(ELT_HEAD_SOUR, source_start, source_end); + gedcom_subscribe_to_element(ELT_HEAD_NOTE, head_note_start, head_note_end); gedcom_subscribe_to_element(ELT_SOUR_DATA_EVEN_DATE, date_start, NULL); gedcom_subscribe_to_element(ELT_SUB_EVT_DATE, date_start, NULL);