#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "gedcom.h"
#include "gedcom.tabgen.h"
#include "compat.h"
-#include "utf8.h"
static size_t encoding_width;
static int current_level = -1;
static int level_diff=MAXGEDCLEVEL;
static size_t line_len = 0;
+static int tab_space = 0;
static struct conv_buffer* ptr_buffer = NULL;
static struct conv_buffer* tag_buffer = NULL;
#ifdef LEXER_TEST
YYSTYPE gedcom_lval;
int line_no = 1;
-int compat_at = 0;
int gedcom_lex();
/* These are defined as functions here, because xgettext has trouble
extracting the strings out of long pre-processor defined */
-static void error_line_too_long()
+static void error_line_too_long(const char *line)
{
- gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN);
+ gedcom_error(_("Line too long, max %d characters allowed: %s"),
+ MAXGEDCLINELEN, line);
}
static void error_level_leading_zero()
gedcom_error(_("'@' character should be written as '@@' in values"));
}
+static void error_tab_character()
+{
+ gedcom_error(_("Tab character is not allowed in values"));
+}
+
static void error_unexpected_character(const char* str, char ch)
{
gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
{ if (line_len != (size_t)-1) { \
line_len += strlen(yytext); \
if (line_len > MAXGEDCLINELEN * encoding_width) { \
- error_line_too_long(); \
+ error_line_too_long(yytext); \
line_len = (size_t)-1; \
return BADTOKEN; \
} \
} \
}
+#define GENERATE_TAB_SPACE \
+ { gedcom_lval.string = " "; \
+ tab_space--; \
+ return DELIM; \
+ }
+
#define MKTAGACTION(THETAG) \
{ CHECK_LINE_LEN; \
gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \
But because this means that one token is converted into a series
of tokens, there is some initial code following immediately here
- that returns "pending" tokens. */
+ that returns "pending" tokens.
+
+ Also, for compatibility tabs are converted into spaces, which is
+ also handled here */
#define ACTION_BEFORE_REGEXPS \
- { if (level_diff < 1) { \
+ { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) { \
+ GENERATE_TAB_SPACE; \
+ } \
+ else if (level_diff < 1) { \
level_diff++; \
return CLOSE; \
} \
#define ACTION_TERMINATOR \
{ CHECK_LINE_LEN; \
INIT_LINE_LEN; \
+ if (line_no == 1) \
+ set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
BEGIN(INITIAL); \
}
}
#define ACTION_NORMAL_AT \
- { if (compat_at) { \
+ { if (compat_mode(C_NO_DOUBLE_AT)) { \
int i, j; \
char *yycopy = strdup(yytext); \
if (yycopy) { \
} \
}
+#define ACTION_TAB \
+ { if (compat_mode(C_TAB_CHARACTER)) { \
+ tab_space = 8; \
+ GENERATE_TAB_SPACE; \
+ } \
+ else { \
+ error_tab_character(); \
+ return BADTOKEN; \
+ } \
+ }
+
#define ACTION_UNEXPECTED \
{ error_unexpected_character(yytext, yytext[0]); \
return BADTOKEN; \