#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "gedcom.h"
#include "gedcom.tabgen.h"
#include "compat.h"
static size_t encoding_width;
static int current_level = -1;
-static int level_diff=MAXGEDCLEVEL;
+static int level_diff = MAXGEDCLEVEL;
static size_t line_len = 0;
+static int tab_space = 0;
+static int current_tag = -1;
static struct conv_buffer* ptr_buffer = NULL;
static struct conv_buffer* tag_buffer = NULL;
/* These are defined as functions here, because xgettext has trouble
extracting the strings out of long pre-processor defined */
-static void error_line_too_long(const char *line)
+static void error_line_too_long()
{
- gedcom_error(_("Line too long, max %d characters allowed: %s"),
- MAXGEDCLINELEN, line);
+ gedcom_error(_("Line too long, max %d characters allowed"),
+ MAXGEDCLINELEN);
}
static void error_level_leading_zero()
gedcom_error(_("'@' character should be written as '@@' in values"));
}
+static void error_tab_character()
+{
+ gedcom_error(_("Tab character is not allowed in values"));
+}
+
static void error_unexpected_character(const char* str, char ch)
{
gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
#define CHECK_LINE_LEN \
{ if (line_len != (size_t)-1) { \
line_len += strlen(yytext); \
- if (line_len > MAXGEDCLINELEN * encoding_width) { \
- error_line_too_long(yytext); \
+ if (line_len > MAXGEDCLINELEN * encoding_width \
+ && ! compat_long_line(current_level, current_tag)) { \
+ error_line_too_long(); \
line_len = (size_t)-1; \
return BADTOKEN; \
} \
} \
}
+#define GENERATE_TAB_SPACE \
+ { gedcom_lval.string = " "; \
+ tab_space--; \
+ return DELIM; \
+ }
+
#define MKTAGACTION(THETAG) \
{ CHECK_LINE_LEN; \
gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \
- gedcom_lval.tag.value = TAG_##THETAG; \
+ current_tag = TAG_##THETAG; \
+ gedcom_lval.tag.value = current_tag; \
BEGIN(NORMAL); \
line_no++; \
- return TAG_##THETAG; \
+ return current_tag; \
}
/* The GEDCOM level number is converted into a sequence of opening
But because this means that one token is converted into a series
of tokens, there is some initial code following immediately here
- that returns "pending" tokens. */
+ that returns "pending" tokens.
+
+ Also, for compatibility tabs are converted into spaces, which is
+ also handled here */
#define ACTION_BEFORE_REGEXPS \
- { if (level_diff < 1) { \
+ { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) { \
+ GENERATE_TAB_SPACE; \
+ } \
+ else if (level_diff < 1) { \
level_diff++; \
return CLOSE; \
} \
{ CHECK_LINE_LEN; \
INIT_LINE_LEN; \
if (line_no == 1) \
- set_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
+ set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
BEGIN(INITIAL); \
}
} \
}
+#define ACTION_TAB \
+ { if (compat_mode(C_TAB_CHARACTER)) { \
+ tab_space = 8; \
+ GENERATE_TAB_SPACE; \
+ } \
+ else { \
+ error_tab_character(); \
+ return BADTOKEN; \
+ } \
+ }
+
#define ACTION_UNEXPECTED \
{ error_unexpected_character(yytext, yytext[0]); \
return BADTOKEN; \