X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fwrite.c;h=6e186b7b8c612d444d9895cfa7622df67288a36a;hb=d2dbc2834cae06a5de0dec22c0e7a530b7dd1589;hp=e82fd62bb8167a6ae2a0275ba31c25785b5df639;hpb=8d7d7a57b31433fad9bcfb2bb21cfa5cd1090679;p=gedcom-parse.git diff --git a/gedcom/write.c b/gedcom/write.c index e82fd62..6e186b7 100644 --- a/gedcom/write.c +++ b/gedcom/write.c @@ -24,18 +24,16 @@ #include "gedcom_internal.h" #include "gedcom.h" #include "encoding.h" +#include "encoding_state.h" #include "tag_data.h" #include "buffer.h" -#include "utf8.h" +#include "utf8tools.h" #include #include #include #include -const char* encoding = "ASCII"; -int write_encoding_details = ONE_BYTE; -/* SYS_NEWLINE is defined in config.h */ -const char* write_terminator = SYS_NEWLINE; +#define MAXWRITELEN MAXGEDCLINELEN struct Gedcom_write_struct { int filedesc; @@ -46,30 +44,26 @@ struct Gedcom_write_struct { int ctxt_level; }; -const char* default_encoding[] = { - /* ONE_BYTE */ "ASCII", - /* TWO_BYTE_HILO */ "UCS-2BE", - /* TWO_BYTE_LOHI */ "UCS-2LE" -}; - -const char* terminator[] = { - /* END_CR */ "\x0D", - /* END_LF */ "\x0A", - /* END_CR_LF */ "\x0D\x0A", - /* END_LF_CR */ "\x0A\x0D" -}; - void cleanup_write_buffer(); - struct safe_buffer write_buffer = { NULL, 0, NULL, 0, cleanup_write_buffer }; +void cleanup_convert_at_buffer(); +struct safe_buffer convert_at_buffer = { NULL, 0, NULL, 0, + cleanup_convert_at_buffer }; + void cleanup_write_buffer() { cleanup_buffer(&write_buffer); } +void cleanup_convert_at_buffer() +{ + cleanup_buffer(&convert_at_buffer); +} + int write_simple(Gedcom_write_hndl hndl, - int level, char* xref, char* tag, char* value) + int level, const char* xref, const char* tag, + const char* value) { int res; @@ -87,51 +81,111 @@ int write_simple(Gedcom_write_hndl hndl, res += safe_buf_append(&write_buffer, " %s", value); res += safe_buf_append(&write_buffer, hndl->term); - converted = convert_from_utf8(hndl->conv, get_buf_string(&write_buffer), - &conv_fails, &outlen); - - if (converted && (conv_fails == 0)) - write(hndl->filedesc, converted, outlen); + if (utf8_strlen(get_buf_string(&write_buffer)) > MAXGEDCLINELEN) { + gedcom_error(_("Line too long")); + } else { - hndl->total_conv_fails += conv_fails; - gedcom_error - (_("Error converting output string: %s (%d conversion failures)"), - strerror(errno), conv_fails); + converted = convert_from_utf8(hndl->conv, get_buf_string(&write_buffer), + &conv_fails, &outlen); + + if (converted && (conv_fails == 0)) { + line_no++; + write(hndl->filedesc, converted, outlen); + } + else { + hndl->total_conv_fails += conv_fails; + gedcom_error + (_("Error converting output string: %s (%d conversion failures)"), + strerror(errno), conv_fails); + } } } return 0; } -int gedcom_write_set_encoding(const char* charset, - Encoding width, Enc_bom bom) +int write_encoding_value(Gedcom_write_hndl hndl, + int level, const char* xref, const char* tag, + const char* value) +{ + if (strcmp(value, write_encoding.charset)) + gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"), + write_encoding.charset); + return write_simple(hndl, level, xref, tag, write_encoding.charset); +} + +int supports_continuation(int elt_or_rec, int which_continuation) +{ + return tag_data[elt_or_rec].options & which_continuation; +} + +int write_long(Gedcom_write_hndl hndl, int elt_or_rec, + int level, const char* xref, const char* tag, const char* value) { - char* new_encoding = NULL; - if (!strcmp(charset, "UNICODE")) { - if (width == ONE_BYTE) { - gedcom_error(_("Unicode cannot be encoded into one byte")); + int prefix_len, value_len = 0, term_len; + char* nl_pos = NULL; + if (value) nl_pos = strchr(value, '\n'); + + prefix_len = utf8_strlen(tag) + 3; /* for e.g. "0 INDI " */ + if (level > 9) prefix_len++; + if (xref) prefix_len += utf8_strlen(xref) + 1; + if (value) value_len = utf8_strlen(value); + term_len = strlen(hndl->term); + + if (!nl_pos && prefix_len + value_len + term_len <= MAXWRITELEN) + write_simple(hndl, level, xref, tag, value); + else { + const char* value_ptr = value; + int cont_supported = supports_continuation(elt_or_rec, OPT_CONT); + int cont_as_conc = supports_continuation(elt_or_rec, OPT_CONT_AS_CONC); + if (nl_pos && !cont_supported) { + gedcom_error (_("The tag %s doesn't support newlines"), tag); return 1; } else { - new_encoding = get_encoding(charset, width); - if (new_encoding) { - encoding = new_encoding; - write_encoding_details = width | bom; + char value_part[MAXWRITELEN]; + int cont_prefix_len, write_level = level; + cont_prefix_len = utf8_strlen("CONT") + 3; + if (level + 1 > 9) cont_prefix_len++; + + while (value_ptr) { + char* cont_tag = "CONT"; + int line_len = (nl_pos && cont_supported + ? nl_pos - value_ptr : value_len); + + if (prefix_len + line_len + term_len > MAXWRITELEN) { + line_len = MAXWRITELEN - prefix_len - term_len; + if (!cont_as_conc) { + cont_tag = "CONC"; + while (value_ptr[line_len] == ' ' + || value_ptr[line_len-1] == ' ') { + line_len--; + } + } + } + + memset(value_part, 0, sizeof(value_part)); + strncpy(value_part, value_ptr, line_len); + write_simple(hndl, write_level, xref, tag, value_part); + + if (line_len < value_len) { + value_ptr = value_ptr + line_len; + value_len = value_len - line_len; + if (*value_ptr == '\n') { + value_ptr++; + value_len--; + } + prefix_len = cont_prefix_len; + write_level = level + 1; + xref = NULL; + tag = cont_tag; + nl_pos = strchr(value_ptr, '\n'); + } + else + value_ptr = NULL; } } } - else { - new_encoding = get_encoding(charset, ONE_BYTE); - if (new_encoding) { - encoding = new_encoding; - write_encoding_details = ONE_BYTE; - } - } - return 0; -} - -int gedcom_write_set_line_terminator(Enc_line_end end) -{ - write_terminator = terminator[end]; + return 0; } @@ -144,11 +198,13 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) if (!hndl) MEMORY_ERROR; else { + init_write_encoding(); + init_write_terminator(); hndl->total_conv_fails = 0; - hndl->conv = initialize_utf8_conversion(encoding, 0); + hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0); if (!hndl->conv) { gedcom_error(_("Could not open encoding '%s' for writing: %s"), - encoding, strerror(errno)); + write_encoding.encoding, strerror(errno)); free(hndl); hndl = NULL; } @@ -162,15 +218,17 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) hndl = NULL; } else { - hndl->term = write_terminator; + hndl->term = write_encoding.terminator; hndl->ctxt_level = -1; - if (write_encoding_details & WITH_BOM) { - if (write_encoding_details & TWO_BYTE_HILO) + if (write_encoding.bom == WITH_BOM) { + if (write_encoding.width == TWO_BYTE_HILO) write(hndl->filedesc, "\xFE\xFF", 2); - else if (write_encoding_details & TWO_BYTE_LOHI) + else if (write_encoding.width == TWO_BYTE_LOHI) write(hndl->filedesc, "\xFF\xFE", 2); + else if (!strcmp(write_encoding.encoding, "UTF-8")) + write(hndl->filedesc, "\xEF\xBB\xBF", 3); else - gedcom_warning(_("Byte order mark configured, but no Unicode")); + gedcom_warning(_("Byte order mark configured, but not relevant")); } } } @@ -192,16 +250,21 @@ int gedcom_write_close(Gedcom_write_hndl hndl, int* total_conv_fails) return result; } -char* get_tag_string(int elt_or_rec, char* tag) +char* get_tag_string(int elt_or_rec, int tag) { - char* result = tag_data[elt_or_rec].tag_name; + int tagnum = tag_data[elt_or_rec].tag; + if (!tagnum) tagnum = tag; - if (result) - return result; - else if (tag) - return tag; + if (tagnum) { + if (tagnum >= TAG_NUM_START && tagnum <= TAG_NUM_END) + return tag_name[tagnum - TAG_NUM_START]; + else { + gedcom_error(_("Not a valid tag: %d"), tagnum); + return NULL; + } + } else { - gedcom_error(_("The element or record type '%s' requires a specific tag" + gedcom_error(_("The element or record type '%s' requires a specific tag " "for writing"), tag_data[elt_or_rec].elt_name); return NULL; @@ -241,40 +304,117 @@ int get_level(Gedcom_write_hndl hndl, int elt_or_rec, int parent) return hndl->ctxt_level; } -int gedcom_write_record_str(Gedcom_write_hndl hndl, - Gedcom_rec rec, char* tag, - struct xref_value* xref, char* val) +char* convert_at(const char* input) +{ + if (input) { + const char* ptr = input; + reset_buffer(&convert_at_buffer); + while (*ptr) { + if (*ptr == '@') { + SAFE_BUF_ADDCHAR(&convert_at_buffer, '@'); + SAFE_BUF_ADDCHAR(&convert_at_buffer, '@'); + } + else { + SAFE_BUF_ADDCHAR(&convert_at_buffer, *ptr); + } + ptr++; + } + return get_buf_string(&convert_at_buffer); + } + else + return NULL; +} + +int _gedcom_write_val(Gedcom_write_hndl hndl, + int rec_or_elt, int tag, int parent_rec_or_elt, + const char* xrefstr, const char* val) { int result = 1; int level = 0; char* tag_str = NULL; - char* xref_str = NULL; - tag_str = get_tag_string(rec, tag); - level = get_level(hndl, rec, -1); - if (tag_str && check_type(rec, (val ? GV_CHAR_PTR : GV_NULL))) { - if (xref) - xref_str = xref->string; - result = write_simple(hndl, level, xref_str, tag_str, val); + tag_str = get_tag_string(rec_or_elt, tag); + level = get_level(hndl, rec_or_elt, parent_rec_or_elt); + if (tag_str && (level != -1)) { + if (rec_or_elt == ELT_HEAD_CHAR) + result = write_encoding_value(hndl, level, xrefstr, tag_str, val); + else if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC)) + result = write_long(hndl, rec_or_elt, level, xrefstr, tag_str, val); + else + result = write_simple(hndl, level, xrefstr, tag_str, val); } return result; } +int gedcom_write_record_str(Gedcom_write_hndl hndl, + Gedcom_rec rec, const char* xrefstr, + const char* val) +{ + int result = 1; + if (check_type(rec, (val ? GV_CHAR_PTR : GV_NULL))) + result = _gedcom_write_val(hndl, rec, 0, -1, xrefstr, convert_at(val)); + return result; +} + int gedcom_write_element_str(Gedcom_write_hndl hndl, - Gedcom_elt elt, char* tag, int parent_rec_or_elt, - char* val) + Gedcom_elt elt, int tag, int parent_rec_or_elt, + const char* val) { int result = 1; - int level = -1; - char* tag_str = NULL; + if (check_type(elt, (val ? GV_CHAR_PTR : GV_NULL))) + result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL, + convert_at(val)); + return result; +} - tag_str = get_tag_string(elt, tag); - level = get_level(hndl, elt, parent_rec_or_elt); - if (tag_str && (level != -1) - && check_type(elt, (val ? GV_CHAR_PTR : GV_NULL))) { - result = write_simple(hndl, level, NULL, tag_str, val); - } +int gedcom_write_element_xref(Gedcom_write_hndl hndl, + Gedcom_elt elt, int tag, int parent_rec_or_elt, + const struct xref_value* val) +{ + int result = 1; + if (check_type(elt, (val ? GV_XREF_PTR : GV_NULL))) + result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL, + val->string); + return result; +} + +int gedcom_write_element_date(Gedcom_write_hndl hndl, + Gedcom_elt elt, int tag, int parent_rec_or_elt, + const struct date_value* val) +{ + int result = 1; + if (check_type(elt, (val ? GV_DATE_VALUE : GV_NULL))) + result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL, + gedcom_date_to_string(val)); + return result; +} + +int gedcom_write_element_age(Gedcom_write_hndl hndl, + Gedcom_elt elt, int tag, int parent_rec_or_elt, + const struct age_value* val) +{ + int result = 1; + if (check_type(elt, (val ? GV_AGE_VALUE : GV_NULL))) + result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL, + gedcom_age_to_string(val)); + return result; +} + +int gedcom_write_user_str(Gedcom_write_hndl hndl, int level, const char* tag, + const char* xrefstr, const char* value) +{ + int result = 1; + if (tag && tag[0] == '_') + result = write_simple(hndl, level, xrefstr, tag, convert_at(value)); + return result; +} +int gedcom_write_user_xref(Gedcom_write_hndl hndl, int level, const char* tag, + const char* xrefstr, const struct xref_value* val) +{ + int result = 1; + if (tag && tag[0] == '_') + result = write_simple(hndl, level, xrefstr, tag, val->string); return result; }