X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fwrite.c;h=f04697e6fb03ed4e56fcdaf2b6010937483daa45;hb=HEAD;hp=e338b948c30108142a02cebe1e6c30b1e9c848ef;hpb=177218adadc7aa92a1c14e6b8b33080dc77d6e4e;p=gedcom-parse.git diff --git a/gedcom/write.c b/gedcom/write.c index e338b94..f04697e 100644 --- a/gedcom/write.c +++ b/gedcom/write.c @@ -24,6 +24,7 @@ #include "gedcom_internal.h" #include "gedcom.h" #include "encoding.h" +#include "encoding_state.h" #include "tag_data.h" #include "buffer.h" #include "utf8tools.h" @@ -34,11 +35,6 @@ #define MAXWRITELEN MAXGEDCLINELEN -const char* encoding = "ASCII"; -int write_encoding_details = ONE_BYTE; -/* SYS_NEWLINE is defined in config.h */ -const char* write_terminator = SYS_NEWLINE; - struct Gedcom_write_struct { int filedesc; convert_t conv; @@ -48,19 +44,6 @@ struct Gedcom_write_struct { int ctxt_level; }; -const char* default_encoding[] = { - /* ONE_BYTE */ "ASCII", - /* TWO_BYTE_HILO */ "UCS-2BE", - /* TWO_BYTE_LOHI */ "UCS-2LE" -}; - -const char* terminator[] = { - /* END_CR */ "\x0D", - /* END_LF */ "\x0A", - /* END_CR_LF */ "\x0D\x0A", - /* END_LF_CR */ "\x0A\x0D" -}; - void cleanup_write_buffer(); struct safe_buffer write_buffer = { NULL, 0, NULL, 0, cleanup_write_buffer }; @@ -79,7 +62,8 @@ void cleanup_convert_at_buffer() } int write_simple(Gedcom_write_hndl hndl, - int level, char* xref, char* tag, char* value) + int level, const char* xref, const char* tag, + const char* value) { int res; @@ -119,13 +103,23 @@ int write_simple(Gedcom_write_hndl hndl, return 0; } +int write_encoding_value(Gedcom_write_hndl hndl, + int level, const char* xref, const char* tag, + const char* value) +{ + if (strcmp(value, write_encoding.charset)) + gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"), + write_encoding.charset); + return write_simple(hndl, level, xref, tag, write_encoding.charset); +} + int supports_continuation(int elt_or_rec, int which_continuation) { return tag_data[elt_or_rec].options & which_continuation; } int write_long(Gedcom_write_hndl hndl, int elt_or_rec, - int level, char* xref, char* tag, char* value) + int level, const char* xref, const char* tag, const char* value) { int prefix_len, value_len = 0, term_len; char* nl_pos = NULL; @@ -140,11 +134,11 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec, if (!nl_pos && prefix_len + value_len + term_len <= MAXWRITELEN) write_simple(hndl, level, xref, tag, value); else { - char* value_ptr = value; + const char* value_ptr = value; int cont_supported = supports_continuation(elt_or_rec, OPT_CONT); int cont_as_conc = supports_continuation(elt_or_rec, OPT_CONT_AS_CONC); if (nl_pos && !cont_supported) { - gedcom_error (_("The tag %s doesn't support newlines\n"), tag); + gedcom_error (_("The tag %s doesn't support newlines"), tag); return 1; } else { @@ -160,8 +154,13 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec, if (prefix_len + line_len + term_len > MAXWRITELEN) { line_len = MAXWRITELEN - prefix_len - term_len; - if (!cont_as_conc) + if (!cont_as_conc) { cont_tag = "CONC"; + while (value_ptr[line_len] == ' ' + || value_ptr[line_len-1] == ' ') { + line_len--; + } + } } memset(value_part, 0, sizeof(value_part)); @@ -171,7 +170,7 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec, if (line_len < value_len) { value_ptr = value_ptr + line_len; value_len = value_len - line_len; - while (*value_ptr == '\n') { + if (*value_ptr == '\n') { value_ptr++; value_len--; } @@ -190,43 +189,13 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec, return 0; } -int gedcom_write_set_encoding(const char* charset, - Encoding width, Enc_bom bom) -{ - char* new_encoding = NULL; - if (!strcmp(charset, "UNICODE")) { - if (width == ONE_BYTE) { - gedcom_error(_("Unicode cannot be encoded into one byte")); - return 1; - } - else { - new_encoding = get_encoding(charset, width); - if (new_encoding) { - encoding = new_encoding; - write_encoding_details = width | bom; - } - else - return 1; - } - } - else { - new_encoding = get_encoding(charset, ONE_BYTE); - if (new_encoding) { - encoding = new_encoding; - write_encoding_details = ONE_BYTE; - } - else - return 1; - } - return 0; -} +/** The basic function for opening a GEDCOM file for writing. -int gedcom_write_set_line_terminator(Enc_line_end end) -{ - write_terminator = terminator[end]; - return 0; -} + \param filename The name of the file to write + \return A write handle, which needs to be used in the writing functions, + or \c NULL in case of errors. + */ Gedcom_write_hndl gedcom_write_open(const char *filename) { Gedcom_write_hndl hndl; @@ -236,11 +205,13 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) if (!hndl) MEMORY_ERROR; else { + init_write_encoding(); + init_write_terminator(); hndl->total_conv_fails = 0; - hndl->conv = initialize_utf8_conversion(encoding, 0); + hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0); if (!hndl->conv) { gedcom_error(_("Could not open encoding '%s' for writing: %s"), - encoding, strerror(errno)); + write_encoding.encoding, strerror(errno)); free(hndl); hndl = NULL; } @@ -254,15 +225,17 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) hndl = NULL; } else { - hndl->term = write_terminator; + hndl->term = write_encoding.terminator; hndl->ctxt_level = -1; - if (write_encoding_details & WITH_BOM) { - if (write_encoding_details & TWO_BYTE_HILO) + if (write_encoding.bom == WITH_BOM) { + if (write_encoding.width == TWO_BYTE_HILO) write(hndl->filedesc, "\xFE\xFF", 2); - else if (write_encoding_details & TWO_BYTE_LOHI) + else if (write_encoding.width == TWO_BYTE_LOHI) write(hndl->filedesc, "\xFF\xFE", 2); + else if (!strcmp(write_encoding.encoding, "UTF-8")) + write(hndl->filedesc, "\xEF\xBB\xBF", 3); else - gedcom_warning(_("Byte order mark configured, but no Unicode")); + gedcom_warning(_("Byte order mark configured, but not relevant")); } } } @@ -271,6 +244,16 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) return hndl; } +/** The basic function for closing a GEDCOM file for writing. + + \param hndl The write handle as returned by gedcom_write_open(). + \param total_conv_fails If you pass an actual integer pointer for this, + the function will write in it the total number of conversion failures; + you can pass \c NULL if you're not interested + + \retval 0 in case of success + \retval >0 in case of failure. + */ int gedcom_write_close(Gedcom_write_hndl hndl, int* total_conv_fails) { int result = 0; @@ -298,7 +281,7 @@ char* get_tag_string(int elt_or_rec, int tag) } } else { - gedcom_error(_("The element or record type '%s' requires a specific tag" + gedcom_error(_("The element or record type '%s' requires a specific tag " "for writing"), tag_data[elt_or_rec].elt_name); return NULL; @@ -361,7 +344,7 @@ char* convert_at(const char* input) int _gedcom_write_val(Gedcom_write_hndl hndl, int rec_or_elt, int tag, int parent_rec_or_elt, - char* xrefstr, char* val) + const char* xrefstr, const char* val) { int result = 1; int level = 0; @@ -370,7 +353,9 @@ int _gedcom_write_val(Gedcom_write_hndl hndl, tag_str = get_tag_string(rec_or_elt, tag); level = get_level(hndl, rec_or_elt, parent_rec_or_elt); if (tag_str && (level != -1)) { - if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC|OPT_CONT_AS_CONC)) + if (rec_or_elt == ELT_HEAD_CHAR) + result = write_encoding_value(hndl, level, xrefstr, tag_str, val); + else if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC)) result = write_long(hndl, rec_or_elt, level, xrefstr, tag_str, val); else result = write_simple(hndl, level, xrefstr, tag_str, val); @@ -379,19 +364,59 @@ int _gedcom_write_val(Gedcom_write_hndl hndl, return result; } +/** Function for writing lines corresponding to standard records (i.e. on + level 0). + + \param hndl The write handle that was returned by gedcom_write_open(). + \param rec One of the identifiers given in the first column in + this table (except REC_USER). + \param xrefstr The cross-reference key of the record (something like + \c "@FAM01@". + \param val The value of the record line, which should be \c NULL for some + record types, according to + this table. + + \retval 0 on success + \retval >0 on failure +*/ int gedcom_write_record_str(Gedcom_write_hndl hndl, - Gedcom_rec rec, int tag, - char* xrefstr, char* val) + Gedcom_rec rec, const char* xrefstr, + const char* val) { int result = 1; if (check_type(rec, (val ? GV_CHAR_PTR : GV_NULL))) - result = _gedcom_write_val(hndl, rec, tag, -1, xrefstr, convert_at(val)); + result = _gedcom_write_val(hndl, rec, 0, -1, xrefstr, convert_at(val)); return result; } +/** Function for writing lines corresponding to standard elements (i.e. on + level bigger than 0), with a string as value. + + \param hndl The write handle that was returned by gedcom_write_open(). + \param elt One of the identifiers given in the first column in + this table + (except ELT_USER). + \param tag Some of the \c elt identifiers can actually stand for different + tags. For this reason, the \c tag has to be passed for some of them. This + parsed tag is the same as was returned by the callback functions, and is + an identifier of the form TAG_name. This parameter + is needed whenever the second column in + this table shows several + possible tags (this is e.g. the case for \c ELT_SUB_FAM_EVT). Otherwise, + you can pass 0. + \param parent_rec_or_elt The corresponding \c rec or \c elt identifier of + the logically enclosing statement: this will determine the level number + written on the line, as the level number of the parent + 1. + \param val The value of the element line, which should be \c NULL for some + element types, according to + this table. + + \retval 0 on success + \retval >0 on failure +*/ int gedcom_write_element_str(Gedcom_write_hndl hndl, Gedcom_elt elt, int tag, int parent_rec_or_elt, - char* val) + const char* val) { int result = 1; if (check_type(elt, (val ? GV_CHAR_PTR : GV_NULL))) @@ -400,19 +425,14 @@ int gedcom_write_element_str(Gedcom_write_hndl hndl, return result; } -int gedcom_write_record_xref(Gedcom_write_hndl hndl, - Gedcom_rec rec, int tag, - char* xrefstr, struct xref_value* val) -{ - int result = 1; - if (check_type(rec, (val ? GV_XREF_PTR : GV_NULL))) - result = _gedcom_write_val(hndl, rec, tag, -1, xrefstr, val->string); - return result; -} +/** Function for writing lines corresponding to standard elements (i.e. on + level bigger than 0), with a cross-reference as value. + See gedcom_write_element_str() for details. +*/ int gedcom_write_element_xref(Gedcom_write_hndl hndl, Gedcom_elt elt, int tag, int parent_rec_or_elt, - struct xref_value* val) + const struct xref_value* val) { int result = 1; if (check_type(elt, (val ? GV_XREF_PTR : GV_NULL))) @@ -421,9 +441,14 @@ int gedcom_write_element_xref(Gedcom_write_hndl hndl, return result; } +/** Function for writing lines corresponding to standard elements (i.e. on + level bigger than 0), with a date as value. + + See gedcom_write_element_str() for details. +*/ int gedcom_write_element_date(Gedcom_write_hndl hndl, Gedcom_elt elt, int tag, int parent_rec_or_elt, - struct date_value* val) + const struct date_value* val) { int result = 1; if (check_type(elt, (val ? GV_DATE_VALUE : GV_NULL))) @@ -432,9 +457,14 @@ int gedcom_write_element_date(Gedcom_write_hndl hndl, return result; } +/** Function for writing lines corresponding to standard elements (i.e. on + level bigger than 0), with an age as value. + + See gedcom_write_element_str() for details. +*/ int gedcom_write_element_age(Gedcom_write_hndl hndl, Gedcom_elt elt, int tag, int parent_rec_or_elt, - struct age_value* val) + const struct age_value* val) { int result = 1; if (check_type(elt, (val ? GV_AGE_VALUE : GV_NULL))) @@ -443,8 +473,25 @@ int gedcom_write_element_age(Gedcom_write_hndl hndl, return result; } -int gedcom_write_user_str(Gedcom_write_hndl hndl, int level, char* tag, - char* xrefstr, char* value) +/** Function for writing lines corresponding to user-defined records and + elements, with a string as value. + + In the case of user-defined tags, the + level and tag string are passed verbatim (not controlled by the library). + This allows to write any extra data that doesn't use a standard tag, but + is only allowed for tags starting with an underscore. + + \param hndl The write handle that was returned by gedcom_write_open(). + \param level The integer level of the GEDCOM line + \param tag The tag, as a literal string + \param xrefstr An optional cross-reference of the record or element. + \param value The value of the record or element line. + + \retval 0 on success + \retval >0 on failure +*/ +int gedcom_write_user_str(Gedcom_write_hndl hndl, int level, const char* tag, + const char* xrefstr, const char* value) { int result = 1; if (tag && tag[0] == '_') @@ -452,8 +499,13 @@ int gedcom_write_user_str(Gedcom_write_hndl hndl, int level, char* tag, return result; } -int gedcom_write_user_xref(Gedcom_write_hndl hndl, int level, char* tag, - char* xrefstr, struct xref_value* val) +/** Function for writing lines corresponding to user-defined records and + elements, with a cross-reference as value. + + See gedcom_write_user_str() for details. +*/ +int gedcom_write_user_xref(Gedcom_write_hndl hndl, int level, const char* tag, + const char* xrefstr, const struct xref_value* val) { int result = 1; if (tag && tag[0] == '_')