#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "gedcom.h"
#include "gedcom.tabgen.h"
#include "compat.h"
-#include "utf8.h"
static size_t encoding_width;
static int current_level = -1;
-static int level_diff=MAXGEDCLEVEL;
+static int level_diff = MAXGEDCLEVEL;
static size_t line_len = 0;
+static int tab_space = 0;
+static int current_tag = -1;
static struct conv_buffer* ptr_buffer = NULL;
static struct conv_buffer* tag_buffer = NULL;
#ifdef LEXER_TEST
YYSTYPE gedcom_lval;
int line_no = 1;
-int compat_at = 0;
int gedcom_lex();
static void error_line_too_long()
{
- gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN);
+ gedcom_error(_("Line too long, max %d characters allowed"),
+ MAXGEDCLINELEN);
}
static void error_level_leading_zero()
gedcom_error(_("'@' character should be written as '@@' in values"));
}
+static void error_tab_character()
+{
+ gedcom_error(_("Tab character is not allowed in values"));
+}
+
static void error_unexpected_character(const char* str, char ch)
{
gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
#define CHECK_LINE_LEN \
{ if (line_len != (size_t)-1) { \
line_len += strlen(yytext); \
- if (line_len > MAXGEDCLINELEN * encoding_width) { \
+ if (line_len > MAXGEDCLINELEN * encoding_width \
+ && ! compat_long_line(current_level, current_tag)) { \
error_line_too_long(); \
line_len = (size_t)-1; \
return BADTOKEN; \
} \
}
+#define GENERATE_TAB_SPACE \
+ { gedcom_lval.string = " "; \
+ tab_space--; \
+ return DELIM; \
+ }
+
#define MKTAGACTION(THETAG) \
{ CHECK_LINE_LEN; \
gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \
- gedcom_lval.tag.value = TAG_##THETAG; \
+ current_tag = TAG_##THETAG; \
+ gedcom_lval.tag.value = current_tag; \
BEGIN(NORMAL); \
line_no++; \
- return TAG_##THETAG; \
+ return current_tag; \
}
/* The GEDCOM level number is converted into a sequence of opening
But because this means that one token is converted into a series
of tokens, there is some initial code following immediately here
- that returns "pending" tokens. */
+ that returns "pending" tokens.
+
+ Also, for compatibility tabs are converted into spaces, which is
+ also handled here */
#define ACTION_BEFORE_REGEXPS \
- { if (level_diff < 1) { \
+ { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) { \
+ GENERATE_TAB_SPACE; \
+ } \
+ else if (level_diff < 1) { \
level_diff++; \
return CLOSE; \
} \
#define ACTION_TERMINATOR \
{ CHECK_LINE_LEN; \
INIT_LINE_LEN; \
+ if (line_no == 1) \
+ set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
BEGIN(INITIAL); \
}
}
#define ACTION_NORMAL_AT \
- { if (compat_at) { \
+ { if (compat_mode(C_NO_DOUBLE_AT)) { \
int i, j; \
char *yycopy = strdup(yytext); \
if (yycopy) { \
} \
}
+#define ACTION_TAB \
+ { if (compat_mode(C_TAB_CHARACTER)) { \
+ tab_space = 8; \
+ GENERATE_TAB_SPACE; \
+ } \
+ else { \
+ error_tab_character(); \
+ return BADTOKEN; \
+ } \
+ }
+
#define ACTION_UNEXPECTED \
{ error_unexpected_character(yytext, yytext[0]); \
return BADTOKEN; \